drm/i915: Flush TLBs before releasing backing store (7938d615) · Commits · EulixOS / Software / Kernel

drivers/gpu/drm/i915/gem/i915_gem_object_types.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -311,6 +311,7 @@ struct drm_i915_gem_object {
		#define I915_BO_READONLY BIT(6)
		#define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */
		#define I915_BO_PROTECTED BIT(8)
		#define I915_BO_WAS_BOUND_BIT 9
		/**
		* @mem_flags - Mutable placement-related flags
		*

drivers/gpu/drm/i915/gem/i915_gem_pages.c

+10 −0

Original line number	Diff line number	Diff line
		@@ -10,6 +10,8 @@
		#include "i915_gem_lmem.h"
		#include "i915_gem_mman.h"

		#include "gt/intel_gt.h"

		void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
		struct sg_table *pages,
		unsigned int sg_page_sizes)
		@@ -221,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
		__i915_gem_object_reset_page_iter(obj);
		obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;

		if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
		struct drm_i915_private *i915 = to_i915(obj->base.dev);
		intel_wakeref_t wakeref;

		with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref)
		intel_gt_invalidate_tlbs(to_gt(i915));
		}

		return pages;
		}

drivers/gpu/drm/i915/gt/intel_gt.c

+108 −0

Original line number	Diff line number	Diff line
		@@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt gt, struct drm_i915_private i915)
		{
		spin_lock_init(&gt->irq_lock);

		mutex_init(&gt->tlb_invalidate_lock);

		INIT_LIST_HEAD(&gt->closed_vma);
		spin_lock_init(&gt->closed_lock);

		@@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info,

		intel_sseu_dump(&info->sseu, p);
		}

		struct reg_and_bit {
		i915_reg_t reg;
		u32 bit;
		};

		static struct reg_and_bit
		get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8,
		const i915_reg_t *regs, const unsigned int num)
		{
		const unsigned int class = engine->class;
		struct reg_and_bit rb = { };

		if (drm_WARN_ON_ONCE(&engine->i915->drm,
		class >= num \|\| !regs[class].reg))
		return rb;

		rb.reg = regs[class];
		if (gen8 && class == VIDEO_DECODE_CLASS)
		rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
		else
		rb.bit = engine->instance;

		rb.bit = BIT(rb.bit);

		return rb;
		}

		void intel_gt_invalidate_tlbs(struct intel_gt *gt)
		{
		static const i915_reg_t gen8_regs[] = {
		[RENDER_CLASS] = GEN8_RTCR,
		[VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */
		[VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR,
		[COPY_ENGINE_CLASS] = GEN8_BTCR,
		};
		static const i915_reg_t gen12_regs[] = {
		[RENDER_CLASS] = GEN12_GFX_TLB_INV_CR,
		[VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR,
		[VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR,
		[COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR,
		};
		struct drm_i915_private *i915 = gt->i915;
		struct intel_uncore *uncore = gt->uncore;
		struct intel_engine_cs *engine;
		enum intel_engine_id id;
		const i915_reg_t *regs;
		unsigned int num = 0;

		if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
		return;

		if (GRAPHICS_VER(i915) == 12) {
		regs = gen12_regs;
		num = ARRAY_SIZE(gen12_regs);
		} else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) {
		regs = gen8_regs;
		num = ARRAY_SIZE(gen8_regs);
		} else if (GRAPHICS_VER(i915) < 8) {
		return;
		}

		if (drm_WARN_ONCE(&i915->drm, !num,
		"Platform does not implement TLB invalidation!"))
		return;

		GEM_TRACE("\n");

		assert_rpm_wakelock_held(&i915->runtime_pm);

		mutex_lock(&gt->tlb_invalidate_lock);
		intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

		for_each_engine(engine, gt, id) {
		/*
		* HW architecture suggest typical invalidation time at 40us,
		* with pessimistic cases up to 100us and a recommendation to
		* cap at 1ms. We go a bit higher just in case.
		*/
		const unsigned int timeout_us = 100;
		const unsigned int timeout_ms = 4;
		struct reg_and_bit rb;

		rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
		if (!i915_mmio_reg_offset(rb.reg))
		continue;

		intel_uncore_write_fw(uncore, rb.reg, rb.bit);
		if (__intel_wait_for_register_fw(uncore,
		rb.reg, rb.bit, 0,
		timeout_us, timeout_ms,
		NULL))
		drm_err_ratelimited(&gt->i915->drm,
		"%s TLB invalidation did not complete in %ums!\n",
		engine->name, timeout_ms);
		}

		/*
		* Use delayed put since a) we mostly expect a flurry of TLB
		* invalidations so it is good to avoid paying the forcewake cost and
		* b) it works around a bug in Icelake which cannot cope with too rapid
		* transitions.
		*/
		intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
		mutex_unlock(&gt->tlb_invalidate_lock);
		}

drivers/gpu/drm/i915/gt/intel_gt.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -91,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info,

		void intel_gt_watchdog_work(struct work_struct *work);

		void intel_gt_invalidate_tlbs(struct intel_gt *gt);

		#endif /* __INTEL_GT_H__ */

drivers/gpu/drm/i915/gt/intel_gt_types.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -73,6 +73,8 @@ struct intel_gt {

		struct intel_uc uc;

		struct mutex tlb_invalidate_lock;

		struct i915_wa_list wa_list;

		struct intel_gt_timelines {