Commit e8573000 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-6.1-2022-09-23' of https://gitlab.freedesktop.org/agd5f/linux into drm-next



amd-drm-next-6.1-2022-09-23:

amdgpu:
- SDMA fix
- Add new firmware types to debugfs/IOCTL version queries
- Misc spelling and grammar fixes
- Misc code cleanups
- DCN 3.2.x fixes
- DCN 3.1.x fixes
- CS cleanup
- Gang submit support
- Clang fixes
- Non-DC audio fix
- GPUVM locking fixes
- Vega10 PWN fan speed fix

amdkgd:
- MQD manager cleanup
- Misc spelling and grammar fixes

UAPI:
- Add new firmware types to the FW version query IOCTL

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220923215729.6061-1-alexander.deucher@amd.com
parents 907cc346 3e43b760
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -885,6 +885,7 @@ struct amdgpu_device {
	u64				fence_context;
	unsigned			num_rings;
	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
	struct dma_fence __rcu		*gang_submit;
	bool				ib_pool_ready;
	struct amdgpu_sa_manager	ib_pools[AMDGPU_IB_POOL_MAX];
	struct amdgpu_sched		gpu_sched[AMDGPU_HW_IP_NUM][AMDGPU_RING_PRIO_MAX];
@@ -1294,6 +1295,8 @@ u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
				u32 reg);
void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
				u32 reg, u32 v);
struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
					    struct dma_fence *gang);

/* atpx handler */
#if defined(CONFIG_VGA_SWITCHEROO)
+1 −0
Original line number Diff line number Diff line
@@ -686,6 +686,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
	ib->length_dw = ib_len;
	/* This works for NO_HWS. TODO: need to handle without knowing VMID */
	job->vmid = vmid;
	job->num_ibs = 1;

	ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);

+6 −1
Original line number Diff line number Diff line
@@ -1674,10 +1674,12 @@ amdgpu_connector_add(struct amdgpu_device *adev,
						   adev->mode_info.dither_property,
						   AMDGPU_FMT_DITHER_DISABLE);

			if (amdgpu_audio != 0)
			if (amdgpu_audio != 0) {
				drm_object_attach_property(&amdgpu_connector->base.base,
							   adev->mode_info.audio_property,
							   AMDGPU_AUDIO_AUTO);
				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
			}

			subpixel_order = SubPixelHorizontalRGB;
			connector->interlace_allowed = true;
@@ -1799,6 +1801,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
				drm_object_attach_property(&amdgpu_connector->base.base,
							   adev->mode_info.audio_property,
							   AMDGPU_AUDIO_AUTO);
				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
			}
			drm_object_attach_property(&amdgpu_connector->base.base,
						   adev->mode_info.dither_property,
@@ -1852,6 +1855,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
				drm_object_attach_property(&amdgpu_connector->base.base,
							   adev->mode_info.audio_property,
							   AMDGPU_AUDIO_AUTO);
				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
			}
			drm_object_attach_property(&amdgpu_connector->base.base,
						   adev->mode_info.dither_property,
@@ -1902,6 +1906,7 @@ amdgpu_connector_add(struct amdgpu_device *adev,
				drm_object_attach_property(&amdgpu_connector->base.base,
							   adev->mode_info.audio_property,
							   AMDGPU_AUDIO_AUTO);
				amdgpu_connector->audio = AMDGPU_AUDIO_AUTO;
			}
			drm_object_attach_property(&amdgpu_connector->base.base,
						   adev->mode_info.dither_property,
+338 −269
Original line number Diff line number Diff line
@@ -64,11 +64,51 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
	return 0;
}

static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
			     struct drm_amdgpu_cs_chunk_ib *chunk_ib)
{
	struct drm_sched_entity *entity;
	unsigned int i;
	int r;

	r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
				  chunk_ib->ip_instance,
				  chunk_ib->ring, &entity);
	if (r)
		return r;

	/*
	 * Abort if there is no run queue associated with this entity.
	 * Possibly because of disabled HW IP.
	 */
	if (entity->rq == NULL)
		return -EINVAL;

	/* Check if we can add this IB to some existing job */
	for (i = 0; i < p->gang_size; ++i)
		if (p->entities[i] == entity)
			return i;

	/* If not increase the gang size if possible */
	if (i == AMDGPU_CS_GANG_SIZE)
		return -EINVAL;

	p->entities[i] = entity;
	p->gang_size = i + 1;
	return i;
}

static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
			   struct drm_amdgpu_cs_chunk_ib *chunk_ib,
			   unsigned int *num_ibs)
{
	++(*num_ibs);
	int r;

	r = amdgpu_cs_job_idx(p, chunk_ib);
	if (r < 0)
		return r;

	++(num_ibs[r]);
	return 0;
}

@@ -142,11 +182,12 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
			   union drm_amdgpu_cs *cs)
{
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
	struct amdgpu_vm *vm = &fpriv->vm;
	uint64_t *chunk_array_user;
	uint64_t *chunk_array;
	unsigned size, num_ibs = 0;
	uint32_t uf_offset = 0;
	unsigned int size;
	int ret;
	int i;

@@ -209,7 +250,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
			if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
				goto free_partial_kdata;

			ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, &num_ibs);
			ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
			if (ret)
				goto free_partial_kdata;
			break;
@@ -246,17 +287,28 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
		}
	}

	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
	if (!p->gang_size)
		return -EINVAL;

	for (i = 0; i < p->gang_size; ++i) {
		ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm);
		if (ret)
			goto free_all_kdata;

	if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
		ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i],
					 &fpriv->vm);
		if (ret)
			goto free_all_kdata;
	}
	p->gang_leader = p->jobs[p->gang_size - 1];

	if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
		ret = -ECANCELED;
		goto free_all_kdata;
	}

	if (p->uf_entry.tv.bo)
		p->job->uf_addr = uf_offset;
		p->gang_leader->uf_addr = uf_offset;
	kvfree(chunk_array);

	/* Use this opportunity to fill in task info for the vm */
@@ -278,62 +330,48 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
	return ret;
}

static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
			     struct amdgpu_cs_parser *parser)
static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
			   struct amdgpu_cs_chunk *chunk,
			   unsigned int *ce_preempt,
			   unsigned int *de_preempt)
{
	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
	struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct amdgpu_vm *vm = &fpriv->vm;
	int r, ce_preempt = 0, de_preempt = 0;
	struct amdgpu_ring *ring;
	int i, j;

	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
		struct amdgpu_cs_chunk *chunk;
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
		struct drm_sched_entity *entity;
	int r;

	r = amdgpu_cs_job_idx(p, chunk_ib);
	if (r < 0)
		return r;

		chunk = &parser->chunks[i];
		ib = &parser->job->ibs[j];
		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
	job = p->jobs[r];
	ring = amdgpu_job_ring(job);
	ib = &job->ibs[job->num_ibs++];

		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
			continue;
	/* MM engine doesn't support user fences */
	if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
		return -EINVAL;

	if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
	    chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
		if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
				ce_preempt++;
			(*ce_preempt)++;
		else
				de_preempt++;
			(*de_preempt)++;

			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
			if (ce_preempt > 1 || de_preempt > 1)
		/* Each GFX command submit allows only 1 IB max
		 * preemptible for CE & DE */
		if (*ce_preempt > 1 || *de_preempt > 1)
			return -EINVAL;
	}

		r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
					  chunk_ib->ip_instance, chunk_ib->ring,
					  &entity);
		if (r)
			return r;

	if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
			parser->job->preamble_status |=
				AMDGPU_PREAMBLE_IB_PRESENT;

		if (parser->entity && parser->entity != entity)
			return -EINVAL;
		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;

		/* Return if there is no run queue associated with this entity.
		 * Possibly because of disabled HW IP*/
		if (entity->rq == NULL)
			return -EINVAL;

		parser->entity = entity;

		ring = to_amdgpu_ring(entity->rq->sched);
		r =  amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
	r =  amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
			   chunk_ib->ib_bytes : 0,
			   AMDGPU_IB_POOL_DELAYED, ib);
	if (r) {
@@ -344,27 +382,17 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
	ib->gpu_addr = chunk_ib->va_start;
	ib->length_dw = chunk_ib->ib_bytes / 4;
	ib->flags = chunk_ib->flags;

		j++;
	}

	/* MM engine doesn't support user fences */
	ring = to_amdgpu_ring(parser->entity->rq->sched);
	if (parser->job->uf_addr && ring->funcs->no_user_fence)
		return -EINVAL;

	return 0;
}

static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
				     struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	unsigned num_deps;
	int i, r;
	struct drm_amdgpu_cs_chunk_dep *deps;

	deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_dep);

@@ -402,7 +430,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
			dma_fence_put(old);
		}

		r = amdgpu_sync_fence(&p->job->sync, fence);
		r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
		dma_fence_put(fence);
		if (r)
			return r;
@@ -410,7 +438,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
	return 0;
}

static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
					 uint32_t handle, u64 point,
					 u64 flags)
{
@@ -424,25 +452,23 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
		return r;
	}

	r = amdgpu_sync_fence(&p->job->sync, fence);
	r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
	dma_fence_put(fence);

	return r;
}

static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
				   struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_sem *deps;
	struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
	unsigned num_deps;
	int i, r;

	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_sem);
	for (i = 0; i < num_deps; ++i) {
		r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
							  0, 0);
		r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
		if (r)
			return r;
	}
@@ -450,19 +476,17 @@ static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
	return 0;
}

static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
					      struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
	unsigned num_deps;
	int i, r;

	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_syncobj);
	for (i = 0; i < num_deps; ++i) {
		r = amdgpu_syncobj_lookup_and_add_to_sync(p,
							  syncobj_deps[i].handle,
		r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
						  syncobj_deps[i].point,
						  syncobj_deps[i].flags);
		if (r)
@@ -472,14 +496,13 @@ static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
	return 0;
}

static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
				    struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_sem *deps;
	struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
	unsigned num_deps;
	int i;

	deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_sem);

@@ -507,15 +530,13 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
	return 0;
}


static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
						struct amdgpu_cs_chunk *chunk)
{
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
	struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
	unsigned num_deps;
	int i;

	syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
	num_deps = chunk->length_dw * 4 /
		sizeof(struct drm_amdgpu_cs_chunk_syncobj);

@@ -552,9 +573,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
	return 0;
}

static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
				  struct amdgpu_cs_parser *p)
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{
	unsigned int ce_preempt = 0, de_preempt = 0;
	int i, r;

	for (i = 0; i < p->nchunks; ++i) {
@@ -563,29 +584,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
		chunk = &p->chunks[i];

		switch (chunk->chunk_id) {
		case AMDGPU_CHUNK_ID_IB:
			r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_DEPENDENCIES:
		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
			r = amdgpu_cs_process_fence_dep(p, chunk);
			r = amdgpu_cs_p2_dependencies(p, chunk);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
			r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
			r = amdgpu_cs_p2_syncobj_in(p, chunk);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
			r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
			r = amdgpu_cs_p2_syncobj_out(p, chunk);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
			r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
			r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
			if (r)
				return r;
			break;
		case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
			r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
			r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
			if (r)
				return r;
			break;
@@ -830,6 +856,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
	struct amdgpu_vm *vm = &fpriv->vm;
	struct amdgpu_bo_list_entry *e;
	struct list_head duplicates;
	unsigned int i;
	int r;

	INIT_LIST_HEAD(&p->validated);
@@ -913,16 +940,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
		e->bo_va = amdgpu_vm_bo_find(vm, bo);
	}

	/* Move fence waiting after getting reservation lock of
	 * PD root. Then there is no need on a ctx mutex lock.
	 */
	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
	if (unlikely(r != 0)) {
		if (r != -ERESTARTSYS)
			DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
		goto error_validate;
	}

	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
					  &p->bytes_moved_vis_threshold);
	p->bytes_moved = 0;
@@ -943,25 +960,29 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
	if (r)
		goto error_validate;

	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
				     p->bytes_moved_vis);

	amdgpu_job_set_resources(p->job, p->bo_list->gds_obj,
				 p->bo_list->gws_obj, p->bo_list->oa_obj);

	if (!r && p->uf_entry.tv.bo) {
	if (p->uf_entry.tv.bo) {
		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);

		r = amdgpu_ttm_alloc_gart(&uf->tbo);
		p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
		if (r)
			goto error_validate;

		p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf);
	}

	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
				     p->bytes_moved_vis);

	for (i = 0; i < p->gang_size; ++i)
		amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
					 p->bo_list->gws_obj,
					 p->bo_list->oa_obj);
	return 0;

error_validate:
	if (r)
	ttm_eu_backoff_reservation(&p->ticket, &p->validated);

out_free_user_pages:
	if (r) {
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

@@ -971,61 +992,50 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
		kvfree(e->user_pages);
		e->user_pages = NULL;
	}
		mutex_unlock(&p->bo_list->bo_list_mutex);
	}
	return r;
}

static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
{
	int i;
	int i, j;

	if (!trace_amdgpu_cs_enabled())
		return;

	for (i = 0; i < parser->job->num_ibs; i++)
		trace_amdgpu_cs(parser, i);
	for (i = 0; i < p->gang_size; ++i) {
		struct amdgpu_job *job = p->jobs[i];

		for (j = 0; j < job->num_ibs; ++j)
			trace_amdgpu_cs(p, job, &job->ibs[j]);
	}
}

static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
			       struct amdgpu_job *job)
{
	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct amdgpu_device *adev = p->adev;
	struct amdgpu_vm *vm = &fpriv->vm;
	struct amdgpu_bo_list_entry *e;
	struct amdgpu_bo_va *bo_va;
	struct amdgpu_bo *bo;
	struct amdgpu_ring *ring = amdgpu_job_ring(job);
	unsigned int i;
	int r;

	/* Only for UVD/VCE VM emulation */
	if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
		unsigned i, j;
	if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
		return 0;

		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
			struct drm_amdgpu_cs_chunk_ib *chunk_ib;
	for (i = 0; i < job->num_ibs; ++i) {
		struct amdgpu_ib *ib = &job->ibs[i];
		struct amdgpu_bo_va_mapping *m;
			struct amdgpu_bo *aobj = NULL;
			struct amdgpu_cs_chunk *chunk;
			uint64_t offset, va_start;
			struct amdgpu_ib *ib;
		struct amdgpu_bo *aobj;
		uint64_t va_start;
		uint8_t *kptr;

			chunk = &p->chunks[i];
			ib = &p->job->ibs[j];
			chunk_ib = chunk->kdata;

			if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
				continue;

			va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
		va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
		r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
		if (r) {
			DRM_ERROR("IB va_start is invalid\n");
			return r;
		}

			if ((va_start + chunk_ib->ib_bytes) >
		if ((va_start + ib->length_dw * 4) >
		    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
			DRM_ERROR("IB va_start+ib_bytes is invalid\n");
			return -EINVAL;
@@ -1037,30 +1047,51 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
			return r;
		}

			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
			kptr += va_start - offset;
		kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);

		if (ring->funcs->parse_cs) {
				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
			memcpy(ib->ptr, kptr, ib->length_dw * 4);
			amdgpu_bo_kunmap(aobj);

				r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
			r = amdgpu_ring_parse_cs(ring, p, job, ib);
			if (r)
				return r;
		} else {
			ib->ptr = (uint32_t *)kptr;
				r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
			r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
			amdgpu_bo_kunmap(aobj);
			if (r)
				return r;
		}

			j++;
	}

	return 0;
}

	if (!p->job->vm)
static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
{
	unsigned int i;
	int r;

	for (i = 0; i < p->gang_size; ++i) {
		r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
		if (r)
			return r;
	}
	return 0;
}

static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
{
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct amdgpu_job *job = p->gang_leader;
	struct amdgpu_device *adev = p->adev;
	struct amdgpu_vm *vm = &fpriv->vm;
	struct amdgpu_bo_list_entry *e;
	struct amdgpu_bo_va *bo_va;
	struct amdgpu_bo *bo;
	unsigned int i;
	int r;

	r = amdgpu_vm_clear_freed(adev, vm, NULL);
	if (r)
@@ -1070,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
	if (r)
		return r;

	r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
	r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
	if (r)
		return r;

@@ -1081,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
		if (r)
			return r;

		r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
		if (r)
			return r;
	}
@@ -1100,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
		if (r)
			return r;

		r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
		if (r)
			return r;
	}
@@ -1113,11 +1144,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
	if (r)
		return r;

	r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
	r = amdgpu_sync_fence(&job->sync, vm->last_update);
	if (r)
		return r;

	p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
	for (i = 0; i < p->gang_size; ++i) {
		job = p->jobs[i];

		if (!job->vm)
			continue;

		job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
	}

	if (amdgpu_vm_debug) {
		/* Invalidate all BOs to test for userspace bugs */
@@ -1138,7 +1176,9 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct amdgpu_job *leader = p->gang_leader;
	struct amdgpu_bo_list_entry *e;
	unsigned int i;
	int r;

	list_for_each_entry(e, &p->validated, tv.head) {
@@ -1148,12 +1188,23 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)

		sync_mode = amdgpu_bo_explicit_sync(bo) ?
			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
		r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
		r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
				     &fpriv->vm);
		if (r)
			return r;
	}
	return 0;

	for (i = 0; i < p->gang_size - 1; ++i) {
		r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
		if (r)
			return r;
	}

	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
	if (r && r != -ERESTARTSYS)
		DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");

	return r;
}

static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1177,20 +1228,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
			    union drm_amdgpu_cs *cs)
{
	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
	struct drm_sched_entity *entity = p->entity;
	struct amdgpu_job *leader = p->gang_leader;
	struct amdgpu_bo_list_entry *e;
	struct amdgpu_job *job;
	unsigned int i;
	uint64_t seq;
	int r;

	job = p->job;
	p->job = NULL;
	for (i = 0; i < p->gang_size; ++i)
		drm_sched_job_arm(&p->jobs[i]->base);

	r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
	for (i = 0; i < (p->gang_size - 1); ++i) {
		struct dma_fence *fence;

		fence = &p->jobs[i]->base.s_fence->scheduled;
		r = amdgpu_sync_fence(&leader->sync, fence);
		if (r)
		goto error_unlock;
			goto error_cleanup;
	}

	drm_sched_job_arm(&job->base);
	if (p->gang_size > 1) {
		for (i = 0; i < p->gang_size; ++i)
			amdgpu_job_set_gang_leader(p->jobs[i], leader);
	}

	/* No memory allocation is allowed while holding the notifier lock.
	 * The lock is held until amdgpu_cs_submit is finished and fence is
@@ -1201,6 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
	 */
	r = 0;
	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

@@ -1208,62 +1268,65 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
	}
	if (r) {
		r = -EAGAIN;
		goto error_abort;
		goto error_unlock;
	}

	p->fence = dma_fence_get(&leader->base.s_fence->finished);
	list_for_each_entry(e, &p->validated, tv.head) {

		/* Everybody except for the gang leader uses READ */
		for (i = 0; i < (p->gang_size - 1); ++i) {
			dma_resv_add_fence(e->tv.bo->base.resv,
					   &p->jobs[i]->base.s_fence->finished,
					   DMA_RESV_USAGE_READ);
		}

	p->fence = dma_fence_get(&job->base.s_fence->finished);
		/* The gang leader is remembered as writer */
		e->tv.num_shared = 0;
	}

	seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
				   p->fence);
	amdgpu_cs_post_dependencies(p);

	if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
	if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
	    !p->ctx->preamble_presented) {
		job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
		leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
		p->ctx->preamble_presented = true;
	}

	cs->out.handle = seq;
	job->uf_sequence = seq;

	amdgpu_job_free_resources(job);
	leader->uf_sequence = seq;

	trace_amdgpu_cs_ioctl(job);
	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
	drm_sched_entity_push_job(&job->base);
	for (i = 0; i < p->gang_size; ++i) {
		amdgpu_job_free_resources(p->jobs[i]);
		trace_amdgpu_cs_ioctl(p->jobs[i]);
		drm_sched_entity_push_job(&p->jobs[i]->base);
		p->jobs[i] = NULL;
	}

	amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);

	/* Make sure all BOs are remembered as writers */
	amdgpu_bo_list_for_each_entry(e, p->bo_list)
		e->tv.num_shared = 0;

	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);

	mutex_unlock(&p->adev->notifier_lock);
	mutex_unlock(&p->bo_list->bo_list_mutex);

	return 0;

error_abort:
	drm_sched_job_cleanup(&job->base);
error_unlock:
	mutex_unlock(&p->adev->notifier_lock);

error_unlock:
	amdgpu_job_free(job);
error_cleanup:
	for (i = 0; i < p->gang_size; ++i)
		drm_sched_job_cleanup(&p->jobs[i]->base);
	return r;
}

/* Cleanup the parser structure */
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
				  bool backoff)
static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
	unsigned i;

	if (error && backoff) {
		ttm_eu_backoff_reservation(&parser->ticket,
					   &parser->validated);
		mutex_unlock(&parser->bo_list->bo_list_mutex);
	}

	for (i = 0; i < parser->num_post_deps; i++) {
		drm_syncobj_put(parser->post_deps[i].syncobj);
		kfree(parser->post_deps[i].chain);
@@ -1280,8 +1343,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
	for (i = 0; i < parser->nchunks; i++)
		kvfree(parser->chunks[i].kdata);
	kvfree(parser->chunks);
	if (parser->job)
		amdgpu_job_free(parser->job);
	for (i = 0; i < parser->gang_size; ++i) {
		if (parser->jobs[i])
			amdgpu_job_free(parser->jobs[i]);
	}
	if (parser->uf_entry.tv.bo) {
		struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);

@@ -1293,7 +1358,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
	struct amdgpu_device *adev = drm_to_adev(dev);
	struct amdgpu_cs_parser parser;
	bool reserved_buffers = false;
	int r;

	if (amdgpu_ras_intr_triggered())
@@ -1306,22 +1370,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
	if (r) {
		if (printk_ratelimit())
			DRM_ERROR("Failed to initialize parser %d!\n", r);
		goto out;
		return r;
	}

	r = amdgpu_cs_pass1(&parser, data);
	if (r)
		goto out;
		goto error_fini;

	r = amdgpu_cs_ib_fill(adev, &parser);
	r = amdgpu_cs_pass2(&parser);
	if (r)
		goto out;

	r = amdgpu_cs_dependencies(adev, &parser);
	if (r) {
		DRM_ERROR("Failed in the dependencies handling %d!\n", r);
		goto out;
	}
		goto error_fini;

	r = amdgpu_cs_parser_bos(&parser, data);
	if (r) {
@@ -1329,25 +1387,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
			DRM_ERROR("Not enough memory for command submission!\n");
		else if (r != -ERESTARTSYS && r != -EAGAIN)
			DRM_ERROR("Failed to process the buffer list %d!\n", r);
		goto out;
		goto error_fini;
	}

	reserved_buffers = true;

	trace_amdgpu_cs_ibs(&parser);
	r = amdgpu_cs_patch_jobs(&parser);
	if (r)
		goto error_backoff;

	r = amdgpu_cs_vm_handling(&parser);
	if (r)
		goto out;
		goto error_backoff;

	r = amdgpu_cs_sync_rings(&parser);
	if (r)
		goto out;
		goto error_backoff;

	trace_amdgpu_cs_ibs(&parser);

	r = amdgpu_cs_submit(&parser, data);
out:
	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
	if (r)
		goto error_backoff;

	amdgpu_cs_parser_fini(&parser);
	return 0;

error_backoff:
	ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
	mutex_unlock(&parser.bo_list->bo_list_mutex);

error_fini:
	amdgpu_cs_parser_fini(&parser);
	return r;
}

+7 −3

File changed.

Preview size limit exceeded, changes collapsed.

Loading