Commit c10983e1 authored by Luben Tuikov's avatar Luben Tuikov Committed by Maarten Lankhorst
Browse files

drm/scheduler: Job timeout handler returns status (v3)



This patch does not change current behaviour.

The driver's job timeout handler now returns
status indicating back to the DRM layer whether
the device (GPU) is no longer available, such as
after it's been unplugged, or whether all is
normal, i.e. current behaviour.

All drivers which make use of the
drm_sched_backend_ops' .timedout_job() callback
have been accordingly renamed and return the
would've-been default value of
DRM_GPU_SCHED_STAT_NOMINAL to restart the task's
timeout timer--this is the old behaviour, and is
preserved by this patch.

v2: Use enum as the status of a driver's job
    timeout callback method.

v3: Return scheduler/device information, rather
    than task information.

Cc: Alexander Deucher <Alexander.Deucher@amd.com>
Cc: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Russell King <linux+etnaviv@armlinux.org.uk>
Cc: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: Qiang Yu <yuq825@gmail.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Cc: Steven Price <steven.price@arm.com>
Cc: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Cc: Eric Anholt <eric@anholt.net>
Reported-by: default avatarkernel test robot <lkp@intel.com>
Signed-off-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Acked-by: default avatarAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Acked-by: default avatarSteven Price <steven.price@arm.com>
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/415095/


(cherry picked from commit a6a1f036)
Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
parent 78e4ba40
Loading
Loading
Loading
Loading
+4 −2
Original line number Diff line number Diff line
@@ -28,7 +28,7 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"

static void amdgpu_job_timedout(struct drm_sched_job *s_job)
static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
	struct amdgpu_job *job = to_amdgpu_job(s_job);
@@ -41,7 +41,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
	    amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
		DRM_ERROR("ring %s timeout, but soft recovered\n",
			  s_job->sched->name);
		return;
		return DRM_GPU_SCHED_STAT_NOMINAL;
	}

	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
@@ -53,10 +53,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)

	if (amdgpu_device_should_recover_gpu(ring->adev)) {
		amdgpu_device_gpu_recover(ring->adev, job);
		return DRM_GPU_SCHED_STAT_NOMINAL;
	} else {
		drm_sched_suspend_timeout(&ring->sched);
		if (amdgpu_sriov_vf(adev))
			adev->virt.tdr_debug = true;
		return DRM_GPU_SCHED_STAT_NOMINAL;
	}
}

+6 −1
Original line number Diff line number Diff line
@@ -82,7 +82,8 @@ static struct dma_fence *etnaviv_sched_run_job(struct drm_sched_job *sched_job)
	return fence;
}

static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
							  *sched_job)
{
	struct etnaviv_gem_submit *submit = to_etnaviv_submit(sched_job);
	struct etnaviv_gpu *gpu = submit->gpu;
@@ -120,9 +121,13 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)

	drm_sched_resubmit_jobs(&gpu->sched);

	drm_sched_start(&gpu->sched, true);
	return DRM_GPU_SCHED_STAT_NOMINAL;

out_no_timeout:
	/* restart scheduler after GPU is usable again */
	drm_sched_start(&gpu->sched, true);
	return DRM_GPU_SCHED_STAT_NOMINAL;
}

static void etnaviv_sched_free_job(struct drm_sched_job *sched_job)
+3 −1
Original line number Diff line number Diff line
@@ -415,7 +415,7 @@ static void lima_sched_build_error_task_list(struct lima_sched_task *task)
	mutex_unlock(&dev->error_task_list_lock);
}

static void lima_sched_timedout_job(struct drm_sched_job *job)
static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job)
{
	struct lima_sched_pipe *pipe = to_lima_pipe(job->sched);
	struct lima_sched_task *task = to_lima_task(job);
@@ -449,6 +449,8 @@ static void lima_sched_timedout_job(struct drm_sched_job *job)

	drm_sched_resubmit_jobs(&pipe->base);
	drm_sched_start(&pipe->base, true);

	return DRM_GPU_SCHED_STAT_NOMINAL;
}

static void lima_sched_free_job(struct drm_sched_job *job)
+6 −3
Original line number Diff line number Diff line
@@ -432,7 +432,8 @@ static void panfrost_scheduler_start(struct panfrost_queue_state *queue)
	mutex_unlock(&queue->lock);
}

static void panfrost_job_timedout(struct drm_sched_job *sched_job)
static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job
						     *sched_job)
{
	struct panfrost_job *job = to_panfrost_job(sched_job);
	struct panfrost_device *pfdev = job->pfdev;
@@ -443,7 +444,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
	 * spurious. Bail out.
	 */
	if (dma_fence_is_signaled(job->done_fence))
		return;
		return DRM_GPU_SCHED_STAT_NOMINAL;

	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
		js,
@@ -455,11 +456,13 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)

	/* Scheduler is already stopped, nothing to do. */
	if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job))
		return;
		return DRM_GPU_SCHED_STAT_NOMINAL;

	/* Schedule a reset if there's no reset in progress. */
	if (!atomic_xchg(&pfdev->reset.pending, 1))
		schedule_work(&pfdev->reset.work);

	return DRM_GPU_SCHED_STAT_NOMINAL;
}

static const struct drm_sched_backend_ops panfrost_sched_ops = {
+1 −3
Original line number Diff line number Diff line
@@ -527,7 +527,7 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
EXPORT_SYMBOL(drm_sched_start);

/**
 * drm_sched_resubmit_jobs - helper to relunch job from pending ring list
 * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list
 *
 * @sched: scheduler instance
 *
@@ -561,8 +561,6 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
		} else {
			s_job->s_fence->parent = fence;
		}


	}
}
EXPORT_SYMBOL(drm_sched_resubmit_jobs);
Loading