Commit 194eb174 authored by Victor Zhao's avatar Victor Zhao Committed by Alex Deucher
Browse files

drm/amdgpu: reduce reset time



In multi container use case, reset time is important, so skip ring
tests and cp halt wait during ip suspending for reset as they are
going to fail and cost more time on reset

v2: add a hang flag to indicate the reset comes from a job timeout,
skip ring test and cp halt wait in this case

v3: move hang flag to adev

Signed-off-by: default avatarVictor Zhao <Victor.Zhao@amd.com>
Acked-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 72fadb13
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1065,6 +1065,7 @@ struct amdgpu_device {
	struct work_struct		reset_work;

	uint32_t						amdgpu_reset_level_mask;
	bool                            job_hang;
};

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+1 −1
Original line number Diff line number Diff line
@@ -477,7 +477,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
					   RESET_QUEUES, 0, 0);

	if (adev->gfx.kiq.ring.sched.ready)
	if (adev->gfx.kiq.ring.sched.ready && !adev->job_hang)
		r = amdgpu_ring_test_helper(kiq_ring);
	spin_unlock(&adev->gfx.kiq.ring_lock);

+2 −0
Original line number Diff line number Diff line
@@ -49,6 +49,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
	}

	memset(&ti, 0, sizeof(struct amdgpu_task_info));
	adev->job_hang = true;

	if (amdgpu_gpu_recovery &&
	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
@@ -83,6 +84,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
	}

exit:
	adev->job_hang = false;
	drm_dev_exit(idx);
	return DRM_GPU_SCHED_STAT_NOMINAL;
}
+7 −2
Original line number Diff line number Diff line
@@ -5971,6 +5971,9 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
		WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp);
	}

	if (adev->job_hang && !enable)
		return 0;

	for (i = 0; i < adev->usec_timeout; i++) {
		if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0)
			break;
@@ -7569,8 +7572,10 @@ static int gfx_v10_0_kiq_disable_kgq(struct amdgpu_device *adev)
	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i],
					   PREEMPT_QUEUES, 0, 0);

	if (!adev->job_hang)
		return amdgpu_ring_test_helper(kiq_ring);
	else
		return 0;
}
#endif