Commit 977f7e10 authored by Nirmoy Das's avatar Nirmoy Das Committed by Alex Deucher
Browse files

drm/amdgpu: allocate entities on demand



Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entity
only when needed.

v2: allocate memory for entity and fences together

Signed-off-by: default avatarNirmoy Das <nirmoy.das@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 18c6b74e
Loading
Loading
Loading
Loading
+120 −115
Original line number Diff line number Diff line
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
};

static int amdgpu_ctx_total_num_entities(void)
{
	unsigned i, num_entities = 0;

	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
		num_entities += amdgpu_ctx_num_entities[i];

	return num_entities;
}

static int amdgpu_ctx_priority_permit(struct drm_file *filp,
				      enum drm_sched_priority priority)
{
	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
		return -EINVAL;

	/* NORMAL and below are accessible by everyone */
	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
		return 0;
@@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
	return -EACCES;
}

static int amdgpu_ctx_init(struct amdgpu_device *adev,
			   enum drm_sched_priority priority,
			   struct drm_file *filp,
			   struct amdgpu_ctx *ctx)
static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
{
	unsigned num_entities = amdgpu_ctx_total_num_entities();
	unsigned i, j;
	struct amdgpu_device *adev = ctx->adev;
	struct amdgpu_ctx_entity *entity;
	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
	unsigned num_scheds = 0;
	enum drm_sched_priority priority;
	int r;

	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
		return -EINVAL;

	r = amdgpu_ctx_priority_permit(filp, priority);
	if (r)
		return r;

	memset(ctx, 0, sizeof(*ctx));
	ctx->adev = adev;


	ctx->entities[0] = kcalloc(num_entities,
				   sizeof(struct amdgpu_ctx_entity),
	entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
			 GFP_KERNEL);
	if (!ctx->entities[0])
	if (!entity)
		return  -ENOMEM;


	for (i = 0; i < num_entities; ++i) {
		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];

	entity->sequence = 1;
		entity->fences = kcalloc(amdgpu_sched_jobs,
					 sizeof(struct dma_fence*), GFP_KERNEL);
		if (!entity->fences) {
			r = -ENOMEM;
			goto error_cleanup_memory;
		}
	}
	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
		ctx->entities[i] = ctx->entities[i - 1] +
			amdgpu_ctx_num_entities[i - 1];

	kref_init(&ctx->refcount);
	spin_lock_init(&ctx->ring_lock);
	mutex_init(&ctx->lock);

	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
	ctx->reset_counter_query = ctx->reset_counter;
	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
	ctx->init_priority = priority;
	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;

	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		struct drm_gpu_scheduler **scheds;
		struct drm_gpu_scheduler *sched;
		unsigned num_scheds = 0;

		switch (i) {
	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
				ctx->init_priority : ctx->override_priority;
	switch (hw_ip) {
		case AMDGPU_HW_IP_GFX:
			sched = &adev->gfx.gfx_ring[0].sched;
			scheds = &sched;
@@ -168,61 +121,88 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
			break;
	}

		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
						  priority, scheds,
						  num_scheds, &ctx->guilty);
	r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
				  &ctx->guilty);
	if (r)
			goto error_cleanup_entities;
	}
		goto error_free_entity;

	ctx->entities[hw_ip][ring] = entity;
	return 0;

error_cleanup_entities:
	for (i = 0; i < num_entities; ++i)
		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
error_free_entity:
	kfree(entity);

error_cleanup_memory:
	for (i = 0; i < num_entities; ++i) {
		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];

		kfree(entity->fences);
		entity->fences = NULL;
	return r;
}

	kfree(ctx->entities[0]);
	ctx->entities[0] = NULL;
static int amdgpu_ctx_init(struct amdgpu_device *adev,
			   enum drm_sched_priority priority,
			   struct drm_file *filp,
			   struct amdgpu_ctx *ctx)
{
	int r;

	r = amdgpu_ctx_priority_permit(filp, priority);
	if (r)
		return r;

	memset(ctx, 0, sizeof(*ctx));

	ctx->adev = adev;

	kref_init(&ctx->refcount);
	spin_lock_init(&ctx->ring_lock);
	mutex_init(&ctx->lock);

	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
	ctx->reset_counter_query = ctx->reset_counter;
	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
	ctx->init_priority = priority;
	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;

	return 0;

}

static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{

	int i;

	if (!entity)
		return;

	for (i = 0; i < amdgpu_sched_jobs; ++i)
		dma_fence_put(entity->fences[i]);

	kfree(entity);
}

static void amdgpu_ctx_fini(struct kref *ref)
{
	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
	unsigned num_entities = amdgpu_ctx_total_num_entities();
	struct amdgpu_device *adev = ctx->adev;
	unsigned i, j;

	if (!adev)
		return;

	for (i = 0; i < num_entities; ++i) {
		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];

		for (j = 0; j < amdgpu_sched_jobs; ++j)
			dma_fence_put(entity->fences[j]);

		kfree(entity->fences);
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
			amdgpu_ctx_fini_entity(ctx->entities[i][j]);
			ctx->entities[i][j] = NULL;
		}
	}

	kfree(ctx->entities[0]);
	mutex_destroy(&ctx->lock);

	kfree(ctx);
}

int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
			  u32 ring, struct drm_sched_entity **entity)
{
	int r;

	if (hw_ip >= AMDGPU_HW_IP_NUM) {
		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
		return -EINVAL;
@@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
		return -EINVAL;
	}

	*entity = &ctx->entities[hw_ip][ring].entity;
	if (ctx->entities[hw_ip][ring] == NULL) {
		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
		if (r)
			return r;
	}

	*entity = &ctx->entities[hw_ip][ring]->entity;
	return 0;
}

@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
static void amdgpu_ctx_do_release(struct kref *ref)
{
	struct amdgpu_ctx *ctx;
	unsigned num_entities;
	u32 i;
	u32 i, j;

	ctx = container_of(ref, struct amdgpu_ctx, refcount);
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
			if (!ctx->entities[i][j])
				continue;

	num_entities = amdgpu_ctx_total_num_entities();
	for (i = 0; i < num_entities; i++)
		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
		}
	}

	amdgpu_ctx_fini(ref);
}
@@ -516,21 +505,25 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
				  enum drm_sched_priority priority)
{
	unsigned num_entities = amdgpu_ctx_total_num_entities();
	enum drm_sched_priority ctx_prio;
	unsigned i;
	unsigned i, j;

	ctx->override_priority = priority;

	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
			ctx->init_priority : ctx->override_priority;
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
			struct drm_sched_entity *entity;

	for (i = 0; i < num_entities; i++) {
		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
			if (!ctx->entities[i][j])
				continue;

			entity = &ctx->entities[i][j]->entity;
			drm_sched_entity_set_priority(entity, ctx_prio);
		}
	}
}

int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
			       struct drm_sched_entity *entity)
@@ -564,32 +557,35 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)

long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
{
	unsigned num_entities = amdgpu_ctx_total_num_entities();
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id, i;
	uint32_t id, i, j;

	idp = &mgr->ctx_handles;

	mutex_lock(&mgr->lock);
	idr_for_each_entry(idp, ctx, id) {
		for (i = 0; i < num_entities; i++) {
		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
				struct drm_sched_entity *entity;

			entity = &ctx->entities[0][i].entity;
				if (!ctx->entities[i][j])
					continue;

				entity = &ctx->entities[i][j]->entity;
				timeout = drm_sched_entity_flush(entity, timeout);
			}
		}
	}
	mutex_unlock(&mgr->lock);
	return timeout;
}

void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
	unsigned num_entities = amdgpu_ctx_total_num_entities();
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id, i;
	uint32_t id, i, j;

	idp = &mgr->ctx_handles;

@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
			continue;
		}

		for (i = 0; i < num_entities; i++)
			drm_sched_entity_fini(&ctx->entities[0][i].entity);
		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
				struct drm_sched_entity *entity;

				if (!ctx->entities[i][j])
					continue;

				entity = &ctx->entities[i][j]->entity;
				drm_sched_entity_fini(entity);
			}
		}
	}
}

+4 −2
Original line number Diff line number Diff line
@@ -29,10 +29,12 @@ struct drm_device;
struct drm_file;
struct amdgpu_fpriv;

#define AMDGPU_MAX_ENTITY_NUM 4

struct amdgpu_ctx_entity {
	uint64_t		sequence;
	struct dma_fence	**fences;
	struct drm_sched_entity	entity;
	struct dma_fence	*fences[];
};

struct amdgpu_ctx {
@@ -42,7 +44,7 @@ struct amdgpu_ctx {
	unsigned			reset_counter_query;
	uint32_t			vram_lost_counter;
	spinlock_t			ring_lock;
	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM];
	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
	bool				preamble_presented;
	enum drm_sched_priority		init_priority;
	enum drm_sched_priority		override_priority;