drm/i915/gt: Move TLB invalidation to its own file (568a2e6f) · Commits · EulixOS / Software / Kernel

drivers/gpu/drm/i915/Makefile

+1 −0

Original line number	Diff line number	Diff line
		@@ -131,6 +131,7 @@ gt-y += \
		gt/intel_sseu.o \
		gt/intel_sseu_debugfs.o \
		gt/intel_timeline.o \
		gt/intel_tlb.o \
		gt/intel_wopcm.o \
		gt/intel_workarounds.o \
		gt/shmem_utils.o \

drivers/gpu/drm/i915/gem/i915_gem_pages.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -7,7 +7,7 @@
		#include <drm/drm_cache.h>

		#include "gt/intel_gt.h"
		#include "gt/intel_gt_pm.h"
		#include "gt/intel_tlb.h"

		#include "i915_drv.h"
		#include "i915_gem_object.h"
		@@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
		if (!obj->mm.tlb)
		return;

		intel_gt_invalidate_tlb(gt, obj->mm.tlb);
		intel_gt_invalidate_tlb_full(gt, obj->mm.tlb);
		obj->mm.tlb = 0;
		}

drivers/gpu/drm/i915/gt/intel_gt.c

+3 −137

Original line number	Diff line number	Diff line
		@@ -33,6 +33,7 @@
		#include "intel_rps.h"
		#include "intel_sa_media.h"
		#include "intel_gt_sysfs.h"
		#include "intel_tlb.h"
		#include "intel_uncore.h"
		#include "shmem_utils.h"

		@@ -50,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
		intel_gt_init_reset(gt);
		intel_gt_init_requests(gt);
		intel_gt_init_timelines(gt);
		mutex_init(&gt->tlb.invalidate_lock);
		seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
		intel_gt_init_tlb(gt);
		intel_gt_pm_init_early(gt);

		intel_wopcm_init_early(&gt->wopcm);
		@@ -846,7 +846,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
		intel_gt_fini_requests(gt);
		intel_gt_fini_reset(gt);
		intel_gt_fini_timelines(gt);
		mutex_destroy(&gt->tlb.invalidate_lock);
		intel_gt_fini_tlb(gt);
		intel_engines_free(gt);
		}
		}
		@@ -1003,137 +1003,3 @@ void intel_gt_info_print(const struct intel_gt_info *info,

		intel_sseu_dump(&info->sseu, p);
		}

		/*
		* HW architecture suggest typical invalidation time at 40us,
		* with pessimistic cases up to 100us and a recommendation to
		* cap at 1ms. We go a bit higher just in case.
		*/
		#define TLB_INVAL_TIMEOUT_US 100
		#define TLB_INVAL_TIMEOUT_MS 4

		/*
		* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
		* but are now considered MCR registers. Since they exist within a GAM range,
		* the primary instance of the register rolls up the status from each unit.
		*/
		static int wait_for_invalidate(struct intel_engine_cs *engine)
		{
		if (engine->tlb_inv.mcr)
		return intel_gt_mcr_wait_for_reg(engine->gt,
		engine->tlb_inv.reg.mcr_reg,
		engine->tlb_inv.done,
		0,
		TLB_INVAL_TIMEOUT_US,
		TLB_INVAL_TIMEOUT_MS);
		else
		return __intel_wait_for_register_fw(engine->gt->uncore,
		engine->tlb_inv.reg.reg,
		engine->tlb_inv.done,
		0,
		TLB_INVAL_TIMEOUT_US,
		TLB_INVAL_TIMEOUT_MS,
		NULL);
		}

		static void mmio_invalidate_full(struct intel_gt *gt)
		{
		struct drm_i915_private *i915 = gt->i915;
		struct intel_uncore *uncore = gt->uncore;
		struct intel_engine_cs *engine;
		intel_engine_mask_t awake, tmp;
		enum intel_engine_id id;
		unsigned long flags;

		if (GRAPHICS_VER(i915) < 8)
		return;

		intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

		intel_gt_mcr_lock(gt, &flags);
		spin_lock(&uncore->lock); /* serialise invalidate with GT reset */

		awake = 0;
		for_each_engine(engine, gt, id) {
		if (!intel_engine_pm_is_awake(engine))
		continue;

		if (engine->tlb_inv.mcr)
		intel_gt_mcr_multicast_write_fw(gt,
		engine->tlb_inv.reg.mcr_reg,
		engine->tlb_inv.request);
		else
		intel_uncore_write_fw(uncore,
		engine->tlb_inv.reg.reg,
		engine->tlb_inv.request);

		awake \|= engine->mask;
		}

		GT_TRACE(gt, "invalidated engines %08x\n", awake);

		/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
		if (awake &&
		(IS_TIGERLAKE(i915) \|\|
		IS_DG1(i915) \|\|
		IS_ROCKETLAKE(i915) \|\|
		IS_ALDERLAKE_S(i915) \|\|
		IS_ALDERLAKE_P(i915)))
		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);

		spin_unlock(&uncore->lock);
		intel_gt_mcr_unlock(gt, flags);

		for_each_engine_masked(engine, gt, awake, tmp) {
		if (wait_for_invalidate(engine))
		gt_err_ratelimited(gt,
		"%s TLB invalidation did not complete in %ums!\n",
		engine->name, TLB_INVAL_TIMEOUT_MS);
		}

		/*
		* Use delayed put since a) we mostly expect a flurry of TLB
		* invalidations so it is good to avoid paying the forcewake cost and
		* b) it works around a bug in Icelake which cannot cope with too rapid
		* transitions.
		*/
		intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
		}

		static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
		{
		u32 cur = intel_gt_tlb_seqno(gt);

		/* Only skip if a full TLB invalidate barrier has passed */
		return (s32)(cur - ALIGN(seqno, 2)) > 0;
		}

		void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
		{
		intel_wakeref_t wakeref;

		if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
		return;

		if (intel_gt_is_wedged(gt))
		return;

		if (tlb_seqno_passed(gt, seqno))
		return;

		with_intel_gt_pm_if_awake(gt, wakeref) {
		mutex_lock(&gt->tlb.invalidate_lock);
		if (tlb_seqno_passed(gt, seqno))
		goto unlock;

		mmio_invalidate_full(gt);

		write_seqcount_invalidate(&gt->tlb.seqno);
		unlock:
		mutex_unlock(&gt->tlb.invalidate_lock);
		}
		}

		#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
		#include "selftest_tlb.c"
		#endif

drivers/gpu/drm/i915/gt/intel_gt.h

+0 −12

Original line number	Diff line number	Diff line
		@@ -107,16 +107,4 @@ void intel_gt_info_print(const struct intel_gt_info *info,

		void intel_gt_watchdog_work(struct work_struct *work);

		static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
		{
		return seqprop_sequence(&gt->tlb.seqno);
		}

		static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
		{
		return intel_gt_tlb_seqno(gt) \| 1;
		}

		void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);

		#endif /* __INTEL_GT_H__ */

drivers/gpu/drm/i915/gt/intel_tlb.c

0 → 100644

+159 −0

Original line number	Diff line number	Diff line
		// SPDX-License-Identifier: MIT
		/*
		* Copyright © 2023 Intel Corporation
		*/

		#include "i915_drv.h"
		#include "i915_perf_oa_regs.h"
		#include "intel_engine_pm.h"
		#include "intel_gt.h"
		#include "intel_gt_mcr.h"
		#include "intel_gt_pm.h"
		#include "intel_gt_print.h"
		#include "intel_gt_regs.h"
		#include "intel_tlb.h"

		/*
		* HW architecture suggest typical invalidation time at 40us,
		* with pessimistic cases up to 100us and a recommendation to
		* cap at 1ms. We go a bit higher just in case.
		*/
		#define TLB_INVAL_TIMEOUT_US 100
		#define TLB_INVAL_TIMEOUT_MS 4

		/*
		* On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
		* but are now considered MCR registers. Since they exist within a GAM range,
		* the primary instance of the register rolls up the status from each unit.
		*/
		static int wait_for_invalidate(struct intel_engine_cs *engine)
		{
		if (engine->tlb_inv.mcr)
		return intel_gt_mcr_wait_for_reg(engine->gt,
		engine->tlb_inv.reg.mcr_reg,
		engine->tlb_inv.done,
		0,
		TLB_INVAL_TIMEOUT_US,
		TLB_INVAL_TIMEOUT_MS);
		else
		return __intel_wait_for_register_fw(engine->gt->uncore,
		engine->tlb_inv.reg.reg,
		engine->tlb_inv.done,
		0,
		TLB_INVAL_TIMEOUT_US,
		TLB_INVAL_TIMEOUT_MS,
		NULL);
		}

		static void mmio_invalidate_full(struct intel_gt *gt)
		{
		struct drm_i915_private *i915 = gt->i915;
		struct intel_uncore *uncore = gt->uncore;
		struct intel_engine_cs *engine;
		intel_engine_mask_t awake, tmp;
		enum intel_engine_id id;
		unsigned long flags;

		if (GRAPHICS_VER(i915) < 8)
		return;

		intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

		intel_gt_mcr_lock(gt, &flags);
		spin_lock(&uncore->lock); /* serialise invalidate with GT reset */

		awake = 0;
		for_each_engine(engine, gt, id) {
		if (!intel_engine_pm_is_awake(engine))
		continue;

		if (engine->tlb_inv.mcr)
		intel_gt_mcr_multicast_write_fw(gt,
		engine->tlb_inv.reg.mcr_reg,
		engine->tlb_inv.request);
		else
		intel_uncore_write_fw(uncore,
		engine->tlb_inv.reg.reg,
		engine->tlb_inv.request);

		awake \|= engine->mask;
		}

		GT_TRACE(gt, "invalidated engines %08x\n", awake);

		/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
		if (awake &&
		(IS_TIGERLAKE(i915) \|\|
		IS_DG1(i915) \|\|
		IS_ROCKETLAKE(i915) \|\|
		IS_ALDERLAKE_S(i915) \|\|
		IS_ALDERLAKE_P(i915)))
		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);

		spin_unlock(&uncore->lock);
		intel_gt_mcr_unlock(gt, flags);

		for_each_engine_masked(engine, gt, awake, tmp) {
		if (wait_for_invalidate(engine))
		gt_err_ratelimited(gt,
		"%s TLB invalidation did not complete in %ums!\n",
		engine->name, TLB_INVAL_TIMEOUT_MS);
		}

		/*
		* Use delayed put since a) we mostly expect a flurry of TLB
		* invalidations so it is good to avoid paying the forcewake cost and
		* b) it works around a bug in Icelake which cannot cope with too rapid
		* transitions.
		*/
		intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
		}

		static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
		{
		u32 cur = intel_gt_tlb_seqno(gt);

		/* Only skip if a full TLB invalidate barrier has passed */
		return (s32)(cur - ALIGN(seqno, 2)) > 0;
		}

		void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
		{
		intel_wakeref_t wakeref;

		if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
		return;

		if (intel_gt_is_wedged(gt))
		return;

		if (tlb_seqno_passed(gt, seqno))
		return;

		with_intel_gt_pm_if_awake(gt, wakeref) {
		mutex_lock(&gt->tlb.invalidate_lock);
		if (tlb_seqno_passed(gt, seqno))
		goto unlock;

		mmio_invalidate_full(gt);

		write_seqcount_invalidate(&gt->tlb.seqno);
		unlock:
		mutex_unlock(&gt->tlb.invalidate_lock);
		}
		}

		void intel_gt_init_tlb(struct intel_gt *gt)
		{
		mutex_init(&gt->tlb.invalidate_lock);
		seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
		}

		void intel_gt_fini_tlb(struct intel_gt *gt)
		{
		mutex_destroy(&gt->tlb.invalidate_lock);
		}

		#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
		#include "selftest_tlb.c"
		#endif