Commit 2f77b9a2 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher
Browse files

drm/amdkfd: Update MQD management on multi XCC setup



Update MQD management for both HIQ and user-mode compute
queues on a multi XCC setup. MQDs needs to be allocated,
initialized, loaded and destroyed for each XCC in the KFD
node.

v2: squash in fix "drm/amdkfd: Fix SDMA+HIQ HQD allocation on GFX9.4.3"

Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Tested-by: default avatarAmber Lin <Amber.Lin@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 74c5b85d
Loading
Loading
Loading
Loading
+36 −15
Original line number Diff line number Diff line
@@ -800,6 +800,41 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
	sg_free_table(ttm->sg);
}

/*
 * total_pages is constructed as MQD0+CtrlStack0 + MQD1+CtrlStack1 + ...
 * MQDn+CtrlStackn where n is the number of XCCs per partition.
 * pages_per_xcc is the size of one MQD+CtrlStack. The first page is MQD
 * and uses memory type default, UC. The rest of pages_per_xcc are
 * Ctrl stack and modify their memory type to NC.
 */
static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev,
				struct ttm_tt *ttm, uint64_t flags)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	uint64_t total_pages = ttm->num_pages;
	int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp);
	uint64_t page_idx, pages_per_xcc = total_pages / num_xcc;
	int i;
	uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
			AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);

	for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) {
		/* MQD page: use default flags */
		amdgpu_gart_bind(adev,
				gtt->offset + (page_idx << PAGE_SHIFT),
				1, &gtt->ttm.dma_address[page_idx], flags);
		/*
		 * Ctrl pages - modify the memory type to NC (ctrl_flags) from
		 * the second page of the BO onward.
		 */
		amdgpu_gart_bind(adev,
				gtt->offset + ((page_idx + 1) << PAGE_SHIFT),
				pages_per_xcc - 1,
				&gtt->ttm.dma_address[page_idx + 1],
				ctrl_flags);
	}
}

static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
				 struct ttm_buffer_object *tbo,
				 uint64_t flags)
@@ -812,21 +847,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
		flags |= AMDGPU_PTE_TMZ;

	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
		uint64_t page_idx = 1;

		amdgpu_gart_bind(adev, gtt->offset, page_idx,
				 gtt->ttm.dma_address, flags);

		/* The memory type of the first page defaults to UC. Now
		 * modify the memory type to NC from the second page of
		 * the BO onward.
		 */
		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
		flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);

		amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT),
				 ttm->num_pages - page_idx,
				 &(gtt->ttm.dma_address[page_idx]), flags);
		amdgpu_ttm_gart_bind_gfx9_mqd(adev, ttm, flags);
	} else {
		amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
				 gtt->ttm.dma_address, flags);
+2 −1
Original line number Diff line number Diff line
@@ -2247,7 +2247,8 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
		get_num_all_sdma_engines(dqm) *
		dev->kfd->device_info.num_sdma_queues_per_engine +
		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
		(dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
		dqm->dev->num_xcc_per_node);

	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
+27 −1
Original line number Diff line number Diff line
@@ -76,7 +76,8 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_node *dev,
		q->sdma_queue_id) *
		dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size;

	offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
	offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size *
		  dev->num_xcc_per_node;

	mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem
				+ offset);
@@ -246,3 +247,28 @@ bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
{
	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);
}

uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev)
{
	return dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
}

void kfd_get_hiq_xcc_mqd(struct kfd_node *dev, struct kfd_mem_obj *mqd_mem_obj,
		     uint32_t virtual_xcc_id)
{
	uint64_t offset;

	offset = kfd_hiq_mqd_stride(dev) * virtual_xcc_id;

	mqd_mem_obj->gtt_mem = (virtual_xcc_id == 0) ?
			dev->dqm->hiq_sdma_mqd.gtt_mem : NULL;
	mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset;
	mqd_mem_obj->cpu_ptr = (uint32_t *)((uintptr_t)
				dev->dqm->hiq_sdma_mqd.cpu_ptr + offset);
}

uint64_t kfd_mqd_stride(struct mqd_manager *mm,
			struct queue_properties *q)
{
	return mm->mqd_size;
}
+8 −0
Original line number Diff line number Diff line
@@ -119,6 +119,8 @@ struct mqd_manager {
	int	(*debugfs_show_mqd)(struct seq_file *m, void *data);
#endif
	uint32_t (*read_doorbell_id)(void *mqd);
	uint64_t (*mqd_stride)(struct mqd_manager *mm,
				struct queue_properties *p);

	struct mutex	mqd_mutex;
	struct kfd_node	*dev;
@@ -164,4 +166,10 @@ bool kfd_is_occupied_sdma(struct mqd_manager *mm, void *mqd,
		uint64_t queue_address, uint32_t pipe_id,
		uint32_t queue_id);

void kfd_get_hiq_xcc_mqd(struct kfd_node *dev,
		struct kfd_mem_obj *mqd_mem_obj, uint32_t virtual_xcc_id);

uint64_t kfd_hiq_mqd_stride(struct kfd_node *dev);
uint64_t kfd_mqd_stride(struct mqd_manager *mm,
			struct queue_properties *q);
#endif /* KFD_MQD_MANAGER_H_ */
+3 −0
Original line number Diff line number Diff line
@@ -428,6 +428,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
		mqd->destroy_mqd = kfd_destroy_mqd_cp;
		mqd->is_occupied = kfd_is_occupied_cp;
		mqd->mqd_size = sizeof(struct cik_mqd);
		mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
		mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -442,6 +443,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
		mqd->destroy_mqd = kfd_destroy_mqd_cp;
		mqd->is_occupied = kfd_is_occupied_cp;
		mqd->mqd_size = sizeof(struct cik_mqd);
		mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
		mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
@@ -457,6 +459,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
		mqd->checkpoint_mqd = checkpoint_mqd_sdma;
		mqd->restore_mqd = restore_mqd_sdma;
		mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers);
		mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
		mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
Loading