Commit 5fd8518d authored by Andrey Grodzovsky's avatar Andrey Grodzovsky
Browse files

drm/amdgpu: Move scheduler init to after XGMI is ready



Before we initialize schedulers we must know which reset
domain are we in - for single device there iis a single
domain per device and so single wq per device. For XGMI
the reset domain spans the entire XGMI hive and so the
reset wq is per hive.

Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/amd-gfx/msg74112.html
parent a4c63caf
Loading
Loading
Loading
Loading
+45 −0
Original line number Diff line number Diff line
@@ -2287,6 +2287,47 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
	return r;
}

static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
{
	long timeout;
	int r, i;

	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
		struct amdgpu_ring *ring = adev->rings[i];

		/* No need to setup the GPU scheduler for rings that don't need it */
		if (!ring || ring->no_scheduler)
			continue;

		switch (ring->funcs->type) {
		case AMDGPU_RING_TYPE_GFX:
			timeout = adev->gfx_timeout;
			break;
		case AMDGPU_RING_TYPE_COMPUTE:
			timeout = adev->compute_timeout;
			break;
		case AMDGPU_RING_TYPE_SDMA:
			timeout = adev->sdma_timeout;
			break;
		default:
			timeout = adev->video_timeout;
			break;
		}

		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
				   ring->num_hw_submission, amdgpu_job_hang_limit,
				   timeout, adev->reset_domain.wq, ring->sched_score, ring->name);
		if (r) {
			DRM_ERROR("Failed to create scheduler on ring %s.\n",
				  ring->name);
			return r;
		}
	}

	return 0;
}


/**
 * amdgpu_device_ip_init - run init for hardware IPs
 *
@@ -2419,6 +2460,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
		}
	}

	r = amdgpu_device_init_schedulers(adev);
	if (r)
		goto init_failed;

	/* Don't init kfd if whole hive need to be reset during init */
	if (!adev->gmc.xgmi.pending_reset)
		amdgpu_amdkfd_device_init(adev);
+5 −38
Original line number Diff line number Diff line
@@ -446,24 +446,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 * for the requested ring.
 *
 * @ring: ring to init the fence driver on
 * @num_hw_submission: number of entries on the hardware queue
 * @sched_score: optional score atomic shared with other schedulers
 *
 * Init the fence driver for the requested ring (all asics).
 * Helper function for amdgpu_fence_driver_init().
 */
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
				  unsigned num_hw_submission,
				  atomic_t *sched_score)
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;
	long timeout;
	int r;

	if (!adev)
		return -EINVAL;

	if (!is_power_of_2(num_hw_submission))
	if (!is_power_of_2(ring->num_hw_submission))
		return -EINVAL;

	ring->fence_drv.cpu_addr = NULL;
@@ -474,41 +468,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,

	timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);

	ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
	ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1;
	spin_lock_init(&ring->fence_drv.lock);
	ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
	ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *),
					 GFP_KERNEL);

	if (!ring->fence_drv.fences)
		return -ENOMEM;

	/* No need to setup the GPU scheduler for rings that don't need it */
	if (ring->no_scheduler)
		return 0;

	switch (ring->funcs->type) {
	case AMDGPU_RING_TYPE_GFX:
		timeout = adev->gfx_timeout;
		break;
	case AMDGPU_RING_TYPE_COMPUTE:
		timeout = adev->compute_timeout;
		break;
	case AMDGPU_RING_TYPE_SDMA:
		timeout = adev->sdma_timeout;
		break;
	default:
		timeout = adev->video_timeout;
		break;
	}

	r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
			   num_hw_submission, amdgpu_job_hang_limit,
			   timeout, NULL, sched_score, ring->name);
	if (r) {
		DRM_ERROR("Failed to create scheduler on ring %s.\n",
			  ring->name);
		return r;
	}

	return 0;
}

+3 −2
Original line number Diff line number Diff line
@@ -191,8 +191,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
		ring->adev = adev;
		ring->idx = adev->num_rings++;
		adev->rings[ring->idx] = ring;
		r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission,
						  sched_score);
		ring->num_hw_submission = sched_hw_submission;
		ring->sched_score = sched_score;
		r = amdgpu_fence_driver_init_ring(ring);
		if (r)
			return r;
	}
+3 −3
Original line number Diff line number Diff line
@@ -114,9 +114,7 @@ struct amdgpu_fence_driver {
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring);
void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring);

int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
				  unsigned num_hw_submission,
				  atomic_t *sched_score);
int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
				   struct amdgpu_irq_src *irq_src,
				   unsigned irq_type);
@@ -251,6 +249,8 @@ struct amdgpu_ring {
	bool			has_compute_vm_bug;
	bool			no_scheduler;
	int			hw_prio;
	unsigned 		num_hw_submission;
	atomic_t		*sched_score;
};

#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))