Commit eb5e7da7 authored by Matthew Brost's avatar Matthew Brost Committed by John Harrison
Browse files

drm/i915/guc: Reset implementation for new GuC interface



Reset implementation for new GuC interface. This is the legacy reset
implementation which is called when the i915 owns the engine hang check.
Future patches will offload the engine hang check to GuC but we will
continue to maintain this legacy path as a fallback and this code path
is also required if the GuC dies.

With the new GuC interface it is not possible to reset individual
engines - it is only possible to reset the GPU entirely. This patch
forces an entire chip reset if any engine hangs.

v2:
 (Michal)
  - Check for -EPIPE rather than -EIO (CT deadlock/corrupt check)
v3:
 (John H)
  - Split into a series of smaller patches
v4:
 (John H)
  - Fix typo
  - Add braces around if statements in reset code
v5:
 (Checkpatch)
  - Fix warnings

Cc: John Harrison <john.c.harrison@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarJohn Harrison <john.c.harrison@intel.com>
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-9-matthew.brost@intel.com
parent d1cee2d3
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -170,8 +170,6 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
	if (intel_gt_is_wedged(gt))
		intel_gt_unset_wedged(gt);

	intel_uc_sanitize(&gt->uc);

	for_each_engine(engine, gt, id)
		if (engine->reset.prepare)
			engine->reset.prepare(engine);
@@ -187,6 +185,8 @@ static void gt_sanitize(struct intel_gt *gt, bool force)
			__intel_engine_reset(engine, false);
	}

	intel_uc_reset(&gt->uc, false);

	for_each_engine(engine, gt, id)
		if (engine->reset.finish)
			engine->reset.finish(engine);
@@ -239,6 +239,8 @@ int intel_gt_resume(struct intel_gt *gt)
		goto err_wedged;
	}

	intel_uc_reset_finish(&gt->uc);

	intel_rps_enable(&gt->rps);
	intel_llc_enable(&gt->llc);

+12 −6
Original line number Diff line number Diff line
@@ -832,6 +832,8 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
		__intel_engine_reset(engine, stalled_mask & engine->mask);
	local_bh_enable();

	intel_uc_reset(&gt->uc, true);

	intel_ggtt_restore_fences(gt->ggtt);

	return err;
@@ -856,6 +858,8 @@ static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake)
		if (awake & engine->mask)
			intel_engine_pm_put(engine);
	}

	intel_uc_reset_finish(&gt->uc);
}

static void nop_submit_request(struct i915_request *request)
@@ -909,6 +913,7 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
	for_each_engine(engine, gt, id)
		if (engine->reset.cancel)
			engine->reset.cancel(engine);
	intel_uc_cancel_requests(&gt->uc);
	local_bh_enable();

	reset_finish(gt, awake);
@@ -1197,6 +1202,9 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
	ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags);
	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &gt->reset.flags));

	if (intel_engine_uses_guc(engine))
		return -ENODEV;

	if (!intel_engine_pm_get_if_awake(engine))
		return 0;

@@ -1207,13 +1215,10 @@ int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
			   "Resetting %s for %s\n", engine->name, msg);
	atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]);

	if (intel_engine_uses_guc(engine))
		ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
	else
	ret = intel_gt_reset_engine(engine);
	if (ret) {
		/* If we fail here, we expect to fallback to a global reset */
		ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
		ENGINE_TRACE(engine, "Failed to reset %s, err: %d\n", engine->name, ret);
		goto out;
	}

@@ -1347,7 +1352,8 @@ void intel_gt_handle_error(struct intel_gt *gt,
	 * Try engine reset when available. We fall back to full reset if
	 * single reset fails.
	 */
	if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
	if (!intel_uc_uses_guc_submission(&gt->uc) &&
	    intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
		local_bh_disable();
		for_each_engine_masked(engine, gt, engine_mask, tmp) {
			BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
+0 −13
Original line number Diff line number Diff line
@@ -572,19 +572,6 @@ int intel_guc_suspend(struct intel_guc *guc)
	return 0;
}

/**
 * intel_guc_reset_engine() - ask GuC to reset an engine
 * @guc:	intel_guc structure
 * @engine:	engine to be reset
 */
int intel_guc_reset_engine(struct intel_guc *guc,
			   struct intel_engine_cs *engine)
{
	/* XXX: to be implemented with submission interface rework */

	return -ENODEV;
}

/**
 * intel_guc_resume() - notify GuC resuming from suspend state
 * @guc:	the guc
+5 −3
Original line number Diff line number Diff line
@@ -249,14 +249,16 @@ static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask)

int intel_guc_wait_for_idle(struct intel_guc *guc, long timeout);

int intel_guc_reset_engine(struct intel_guc *guc,
			   struct intel_engine_cs *engine);

int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
					  const u32 *msg, u32 len);
int intel_guc_sched_done_process_msg(struct intel_guc *guc,
				     const u32 *msg, u32 len);

void intel_guc_submission_reset_prepare(struct intel_guc *guc);
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
void intel_guc_submission_reset_finish(struct intel_guc *guc);
void intel_guc_submission_cancel_requests(struct intel_guc *guc);

void intel_guc_load_status(struct intel_guc *guc, struct drm_printer *p);

#endif
+455 −107

File changed.

Preview size limit exceeded, changes collapsed.

Loading