Commit 568a2e6f authored by Chris Wilson's avatar Chris Wilson Committed by Andi Shyti
Browse files

drm/i915/gt: Move TLB invalidation to its own file



Prepare for supporting more TLB invalidation scenarios by moving
the current MMIO invalidation to its own file.

Signed-off-by: default avatarChris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: default avatarMauro Carvalho Chehab <mchehab@kernel.org>
Reviewed-by: default avatarAndi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: default avatarAndi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230801141955.383305-2-andi.shyti@linux.intel.com
parent acf228cd
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -131,6 +131,7 @@ gt-y += \
	gt/intel_sseu.o \
	gt/intel_sseu_debugfs.o \
	gt/intel_timeline.o \
	gt/intel_tlb.o \
	gt/intel_wopcm.o \
	gt/intel_workarounds.o \
	gt/shmem_utils.o \
+2 −2
Original line number Diff line number Diff line
@@ -7,7 +7,7 @@
#include <drm/drm_cache.h>

#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
#include "gt/intel_tlb.h"

#include "i915_drv.h"
#include "i915_gem_object.h"
@@ -198,7 +198,7 @@ static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
	if (!obj->mm.tlb)
		return;

	intel_gt_invalidate_tlb(gt, obj->mm.tlb);
	intel_gt_invalidate_tlb_full(gt, obj->mm.tlb);
	obj->mm.tlb = 0;
}

+3 −137
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include "intel_rps.h"
#include "intel_sa_media.h"
#include "intel_gt_sysfs.h"
#include "intel_tlb.h"
#include "intel_uncore.h"
#include "shmem_utils.h"

@@ -50,8 +51,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
	intel_gt_init_reset(gt);
	intel_gt_init_requests(gt);
	intel_gt_init_timelines(gt);
	mutex_init(&gt->tlb.invalidate_lock);
	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
	intel_gt_init_tlb(gt);
	intel_gt_pm_init_early(gt);

	intel_wopcm_init_early(&gt->wopcm);
@@ -846,7 +846,7 @@ void intel_gt_driver_late_release_all(struct drm_i915_private *i915)
		intel_gt_fini_requests(gt);
		intel_gt_fini_reset(gt);
		intel_gt_fini_timelines(gt);
		mutex_destroy(&gt->tlb.invalidate_lock);
		intel_gt_fini_tlb(gt);
		intel_engines_free(gt);
	}
}
@@ -1003,137 +1003,3 @@ void intel_gt_info_print(const struct intel_gt_info *info,

	intel_sseu_dump(&info->sseu, p);
}

/*
 * HW architecture suggest typical invalidation time at 40us,
 * with pessimistic cases up to 100us and a recommendation to
 * cap at 1ms. We go a bit higher just in case.
 */
#define TLB_INVAL_TIMEOUT_US 100
#define TLB_INVAL_TIMEOUT_MS 4

/*
 * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
 * but are now considered MCR registers.  Since they exist within a GAM range,
 * the primary instance of the register rolls up the status from each unit.
 */
static int wait_for_invalidate(struct intel_engine_cs *engine)
{
	if (engine->tlb_inv.mcr)
		return intel_gt_mcr_wait_for_reg(engine->gt,
						 engine->tlb_inv.reg.mcr_reg,
						 engine->tlb_inv.done,
						 0,
						 TLB_INVAL_TIMEOUT_US,
						 TLB_INVAL_TIMEOUT_MS);
	else
		return __intel_wait_for_register_fw(engine->gt->uncore,
						    engine->tlb_inv.reg.reg,
						    engine->tlb_inv.done,
						    0,
						    TLB_INVAL_TIMEOUT_US,
						    TLB_INVAL_TIMEOUT_MS,
						    NULL);
}

static void mmio_invalidate_full(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
	struct intel_uncore *uncore = gt->uncore;
	struct intel_engine_cs *engine;
	intel_engine_mask_t awake, tmp;
	enum intel_engine_id id;
	unsigned long flags;

	if (GRAPHICS_VER(i915) < 8)
		return;

	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

	intel_gt_mcr_lock(gt, &flags);
	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */

	awake = 0;
	for_each_engine(engine, gt, id) {
		if (!intel_engine_pm_is_awake(engine))
			continue;

		if (engine->tlb_inv.mcr)
			intel_gt_mcr_multicast_write_fw(gt,
							engine->tlb_inv.reg.mcr_reg,
							engine->tlb_inv.request);
		else
			intel_uncore_write_fw(uncore,
					      engine->tlb_inv.reg.reg,
					      engine->tlb_inv.request);

		awake |= engine->mask;
	}

	GT_TRACE(gt, "invalidated engines %08x\n", awake);

	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
	if (awake &&
	    (IS_TIGERLAKE(i915) ||
	     IS_DG1(i915) ||
	     IS_ROCKETLAKE(i915) ||
	     IS_ALDERLAKE_S(i915) ||
	     IS_ALDERLAKE_P(i915)))
		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);

	spin_unlock(&uncore->lock);
	intel_gt_mcr_unlock(gt, flags);

	for_each_engine_masked(engine, gt, awake, tmp) {
		if (wait_for_invalidate(engine))
			gt_err_ratelimited(gt,
					   "%s TLB invalidation did not complete in %ums!\n",
					   engine->name, TLB_INVAL_TIMEOUT_MS);
	}

	/*
	 * Use delayed put since a) we mostly expect a flurry of TLB
	 * invalidations so it is good to avoid paying the forcewake cost and
	 * b) it works around a bug in Icelake which cannot cope with too rapid
	 * transitions.
	 */
	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
}

static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
{
	u32 cur = intel_gt_tlb_seqno(gt);

	/* Only skip if a *full* TLB invalidate barrier has passed */
	return (s32)(cur - ALIGN(seqno, 2)) > 0;
}

void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno)
{
	intel_wakeref_t wakeref;

	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
		return;

	if (intel_gt_is_wedged(gt))
		return;

	if (tlb_seqno_passed(gt, seqno))
		return;

	with_intel_gt_pm_if_awake(gt, wakeref) {
		mutex_lock(&gt->tlb.invalidate_lock);
		if (tlb_seqno_passed(gt, seqno))
			goto unlock;

		mmio_invalidate_full(gt);

		write_seqcount_invalidate(&gt->tlb.seqno);
unlock:
		mutex_unlock(&gt->tlb.invalidate_lock);
	}
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_tlb.c"
#endif
+0 −12
Original line number Diff line number Diff line
@@ -107,16 +107,4 @@ void intel_gt_info_print(const struct intel_gt_info *info,

void intel_gt_watchdog_work(struct work_struct *work);

static inline u32 intel_gt_tlb_seqno(const struct intel_gt *gt)
{
	return seqprop_sequence(&gt->tlb.seqno);
}

static inline u32 intel_gt_next_invalidate_tlb_full(const struct intel_gt *gt)
{
	return intel_gt_tlb_seqno(gt) | 1;
}

void intel_gt_invalidate_tlb(struct intel_gt *gt, u32 seqno);

#endif /* __INTEL_GT_H__ */
+159 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: MIT
/*
 * Copyright © 2023 Intel Corporation
 */

#include "i915_drv.h"
#include "i915_perf_oa_regs.h"
#include "intel_engine_pm.h"
#include "intel_gt.h"
#include "intel_gt_mcr.h"
#include "intel_gt_pm.h"
#include "intel_gt_print.h"
#include "intel_gt_regs.h"
#include "intel_tlb.h"

/*
 * HW architecture suggest typical invalidation time at 40us,
 * with pessimistic cases up to 100us and a recommendation to
 * cap at 1ms. We go a bit higher just in case.
 */
#define TLB_INVAL_TIMEOUT_US 100
#define TLB_INVAL_TIMEOUT_MS 4

/*
 * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
 * but are now considered MCR registers.  Since they exist within a GAM range,
 * the primary instance of the register rolls up the status from each unit.
 */
static int wait_for_invalidate(struct intel_engine_cs *engine)
{
	if (engine->tlb_inv.mcr)
		return intel_gt_mcr_wait_for_reg(engine->gt,
						 engine->tlb_inv.reg.mcr_reg,
						 engine->tlb_inv.done,
						 0,
						 TLB_INVAL_TIMEOUT_US,
						 TLB_INVAL_TIMEOUT_MS);
	else
		return __intel_wait_for_register_fw(engine->gt->uncore,
						    engine->tlb_inv.reg.reg,
						    engine->tlb_inv.done,
						    0,
						    TLB_INVAL_TIMEOUT_US,
						    TLB_INVAL_TIMEOUT_MS,
						    NULL);
}

static void mmio_invalidate_full(struct intel_gt *gt)
{
	struct drm_i915_private *i915 = gt->i915;
	struct intel_uncore *uncore = gt->uncore;
	struct intel_engine_cs *engine;
	intel_engine_mask_t awake, tmp;
	enum intel_engine_id id;
	unsigned long flags;

	if (GRAPHICS_VER(i915) < 8)
		return;

	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);

	intel_gt_mcr_lock(gt, &flags);
	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */

	awake = 0;
	for_each_engine(engine, gt, id) {
		if (!intel_engine_pm_is_awake(engine))
			continue;

		if (engine->tlb_inv.mcr)
			intel_gt_mcr_multicast_write_fw(gt,
							engine->tlb_inv.reg.mcr_reg,
							engine->tlb_inv.request);
		else
			intel_uncore_write_fw(uncore,
					      engine->tlb_inv.reg.reg,
					      engine->tlb_inv.request);

		awake |= engine->mask;
	}

	GT_TRACE(gt, "invalidated engines %08x\n", awake);

	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
	if (awake &&
	    (IS_TIGERLAKE(i915) ||
	     IS_DG1(i915) ||
	     IS_ROCKETLAKE(i915) ||
	     IS_ALDERLAKE_S(i915) ||
	     IS_ALDERLAKE_P(i915)))
		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);

	spin_unlock(&uncore->lock);
	intel_gt_mcr_unlock(gt, flags);

	for_each_engine_masked(engine, gt, awake, tmp) {
		if (wait_for_invalidate(engine))
			gt_err_ratelimited(gt,
					   "%s TLB invalidation did not complete in %ums!\n",
					   engine->name, TLB_INVAL_TIMEOUT_MS);
	}

	/*
	 * Use delayed put since a) we mostly expect a flurry of TLB
	 * invalidations so it is good to avoid paying the forcewake cost and
	 * b) it works around a bug in Icelake which cannot cope with too rapid
	 * transitions.
	 */
	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
}

static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
{
	u32 cur = intel_gt_tlb_seqno(gt);

	/* Only skip if a *full* TLB invalidate barrier has passed */
	return (s32)(cur - ALIGN(seqno, 2)) > 0;
}

void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
{
	intel_wakeref_t wakeref;

	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
		return;

	if (intel_gt_is_wedged(gt))
		return;

	if (tlb_seqno_passed(gt, seqno))
		return;

	with_intel_gt_pm_if_awake(gt, wakeref) {
		mutex_lock(&gt->tlb.invalidate_lock);
		if (tlb_seqno_passed(gt, seqno))
			goto unlock;

		mmio_invalidate_full(gt);

		write_seqcount_invalidate(&gt->tlb.seqno);
unlock:
		mutex_unlock(&gt->tlb.invalidate_lock);
	}
}

void intel_gt_init_tlb(struct intel_gt *gt)
{
	mutex_init(&gt->tlb.invalidate_lock);
	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
}

void intel_gt_fini_tlb(struct intel_gt *gt)
{
	mutex_destroy(&gt->tlb.invalidate_lock);
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_tlb.c"
#endif
Loading