Commit 5ac3c3e4 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher
Browse files

drm/amdgpu: Add DMA mapping of GTT BOs



Use DMABufs with dynamic attachment to DMA-map GTT BOs on other GPUs.

Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarOak Zeng <Oak.Zeng@amd.com>
Acked-by: default avatarRamesh Errabolu <Ramesh.Errabolu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9e5d2753
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@ struct amdgpu_device;
enum kfd_mem_attachment_type {
	KFD_MEM_ATT_SHARED,	/* Share kgd_mem->bo or another attachment's */
	KFD_MEM_ATT_USERPTR,	/* SG bo to DMA map pages from a userptr bo */
	KFD_MEM_ATT_DMABUF,	/* DMAbuf to DMA map TTM BOs */
};

struct kfd_mem_attachment {
@@ -62,6 +63,7 @@ struct kfd_mem_attachment {
struct kgd_mem {
	struct mutex lock;
	struct amdgpu_bo *bo;
	struct dma_buf *dmabuf;
	struct list_head attachments;
	/* protected by amdkfd_process_info.lock */
	struct ttm_validate_buffer validate_list;
+75 −1
Original line number Diff line number Diff line
@@ -529,6 +529,16 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
	return ret;
}

static int
kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment)
{
	struct ttm_operation_ctx ctx = {.interruptible = true};
	struct amdgpu_bo *bo = attachment->bo_va->base.bo;

	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
	return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
}

static int
kfd_mem_dmamap_attachment(struct kgd_mem *mem,
			  struct kfd_mem_attachment *attachment)
@@ -538,6 +548,8 @@ kfd_mem_dmamap_attachment(struct kgd_mem *mem,
		return 0;
	case KFD_MEM_ATT_USERPTR:
		return kfd_mem_dmamap_userptr(mem, attachment);
	case KFD_MEM_ATT_DMABUF:
		return kfd_mem_dmamap_dmabuf(attachment);
	default:
		WARN_ON_ONCE(1);
	}
@@ -567,6 +579,19 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem,
	ttm->sg = NULL;
}

static void
kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment)
{
	struct ttm_operation_ctx ctx = {.interruptible = true};
	struct amdgpu_bo *bo = attachment->bo_va->base.bo;

	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
	ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
	/* FIXME: This does not guarantee that amdgpu_ttm_tt_unpopulate is
	 * called
	 */
}

static void
kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
			    struct kfd_mem_attachment *attachment)
@@ -577,6 +602,9 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem,
	case KFD_MEM_ATT_USERPTR:
		kfd_mem_dmaunmap_userptr(mem, attachment);
		break;
	case KFD_MEM_ATT_DMABUF:
		kfd_mem_dmaunmap_dmabuf(attachment);
		break;
	default:
		WARN_ON_ONCE(1);
	}
@@ -610,6 +638,38 @@ kfd_mem_attach_userptr(struct amdgpu_device *adev, struct kgd_mem *mem,
	return 0;
}

static int
kfd_mem_attach_dmabuf(struct amdgpu_device *adev, struct kgd_mem *mem,
		      struct amdgpu_bo **bo)
{
	struct drm_gem_object *gobj;

	if (!mem->dmabuf) {
		mem->dmabuf = amdgpu_gem_prime_export(&mem->bo->tbo.base,
			mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
				DRM_RDWR : 0);
		if (IS_ERR(mem->dmabuf)) {
			mem->dmabuf = NULL;
			return PTR_ERR(mem->dmabuf);
		}
	}

	gobj = amdgpu_gem_prime_import(&adev->ddev, mem->dmabuf);
	if (IS_ERR(gobj))
		return PTR_ERR(gobj);

	/* Import takes an extra reference on the dmabuf. Drop it now to
	 * avoid leaking it. We only need the one reference in
	 * kgd_mem->dmabuf.
	 */
	dma_buf_put(mem->dmabuf);

	*bo = gem_to_amdgpu_bo(gobj);
	(*bo)->parent = amdgpu_bo_ref(mem->bo);

	return 0;
}

/* kfd_mem_attach - Add a BO to a VM
 *
 * Everything that needs to bo done only once when a BO is first added
@@ -667,8 +727,20 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
			ret = kfd_mem_attach_userptr(adev, mem, &bo[i]);
			if (ret)
				goto unwind;
		} else if (mem->domain == AMDGPU_GEM_DOMAIN_GTT &&
			   mem->bo->tbo.type != ttm_bo_type_sg) {
			/* GTT BOs use DMA-mapping ability of dynamic-attach
			 * DMA bufs. TODO: The same should work for VRAM on
			 * large-BAR GPUs.
			 */
			attachment[i]->type = KFD_MEM_ATT_DMABUF;
			ret = kfd_mem_attach_dmabuf(adev, mem, &bo[i]);
			if (ret)
				goto unwind;
		} else {
			/* FIXME: Need to DMA-map other BO types */
			/* FIXME: Need to DMA-map other BO types:
			 * large-BAR VRAM, doorbells, MMIO remap
			 */
			attachment[i]->type = KFD_MEM_ATT_SHARED;
			bo[i] = mem->bo;
			drm_gem_object_get(&bo[i]->tbo.base);
@@ -1527,6 +1599,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(

	/* Free the BO*/
	drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
	if (mem->dmabuf)
		dma_buf_put(mem->dmabuf);
	drm_gem_object_put(&mem->bo->tbo.base);
	mutex_destroy(&mem->lock);
	kfree(mem);