Commit c3186665 authored by Shashank Sharma's avatar Shashank Sharma Committed by Alex Deucher
Browse files

drm/amdgpu: use doorbell mgr for kfd kernel doorbells



This patch:
- adds a doorbell bo in kfd device structure.
- creates doorbell page for kfd kernel usages.
- updates the get_kernel_doorbell and free_kernel_doorbell functions
  accordingly

V2: Do not use wrapper API, use direct amdgpu_create_kernel(Alex)
V3:
 - Move single variable declaration below (Christian)
 - Add a to-do item to reuse the KGD kernel level doorbells for
   KFD for non-MES cases, instead of reserving one page (Felix)

Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarShashank Sharma <shashank.sharma@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 76bd3478
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -455,8 +455,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
	atomic_set(&kfd->compute_profile, 0);

	mutex_init(&kfd->doorbell_mutex);
	memset(&kfd->doorbell_available_index, 0,
		sizeof(kfd->doorbell_available_index));

	ida_init(&kfd->doorbell_ida);

+33 −76
Original line number Diff line number Diff line
@@ -61,81 +61,46 @@ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
/* Doorbell calculations for device init. */
int kfd_doorbell_init(struct kfd_dev *kfd)
{
	size_t doorbell_start_offset;
	size_t doorbell_aperture_size;
	size_t doorbell_process_limit;
	int size = PAGE_SIZE;
	int r;

	/*
	 * With MES enabled, just set the doorbell base as it is needed
	 * to calculate doorbell physical address.
	 */
	if (kfd->shared_resources.enable_mes) {
		kfd->doorbell_base =
			kfd->shared_resources.doorbell_physical_address;
		return 0;
	}

	/*
	 * We start with calculations in bytes because the input data might
	 * only be byte-aligned.
	 * Only after we have done the rounding can we assume any alignment.
	 * Todo: KFD kernel level operations need only one doorbell for
	 * ring test/HWS. So instead of reserving a whole page here for
	 * kernel, reserve and consume a doorbell from existing KGD kernel
	 * doorbell page.
	 */

	doorbell_start_offset =
			roundup(kfd->shared_resources.doorbell_start_offset,
					kfd_doorbell_process_slice(kfd));

	doorbell_aperture_size =
			rounddown(kfd->shared_resources.doorbell_aperture_size,
					kfd_doorbell_process_slice(kfd));

	if (doorbell_aperture_size > doorbell_start_offset)
		doorbell_process_limit =
			(doorbell_aperture_size - doorbell_start_offset) /
						kfd_doorbell_process_slice(kfd);
	else
		return -ENOSPC;

	if (!kfd->max_doorbell_slices ||
	    doorbell_process_limit < kfd->max_doorbell_slices)
		kfd->max_doorbell_slices = doorbell_process_limit;

	kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
				doorbell_start_offset;

	kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);

	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
					   kfd_doorbell_process_slice(kfd));

	if (!kfd->doorbell_kernel_ptr)
	/* Bitmap to dynamically allocate doorbells from kernel page */
	kfd->doorbell_bitmap = bitmap_zalloc(size / sizeof(u32), GFP_KERNEL);
	if (!kfd->doorbell_bitmap) {
		DRM_ERROR("Failed to allocate kernel doorbell bitmap\n");
		return -ENOMEM;
	}

	pr_debug("Doorbell initialization:\n");
	pr_debug("doorbell base           == 0x%08lX\n",
			(uintptr_t)kfd->doorbell_base);

	pr_debug("doorbell_base_dw_offset      == 0x%08lX\n",
			kfd->doorbell_base_dw_offset);

	pr_debug("doorbell_process_limit  == 0x%08lX\n",
			doorbell_process_limit);

	pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
			(uintptr_t)kfd->doorbell_base);

	pr_debug("doorbell aperture size  == 0x%08lX\n",
			kfd->shared_resources.doorbell_aperture_size);

	pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
	/* Alloc a doorbell page for KFD kernel usages */
	r = amdgpu_bo_create_kernel(kfd->adev,
				    size,
				    PAGE_SIZE,
				    AMDGPU_GEM_DOMAIN_DOORBELL,
				    &kfd->doorbells,
				    NULL,
				    (void **)&kfd->doorbell_kernel_ptr);
	if (r) {
		pr_err("failed to allocate kernel doorbells\n");
		bitmap_free(kfd->doorbell_bitmap);
		return r;
	}

	pr_debug("Doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr);
	return 0;
}

void kfd_doorbell_fini(struct kfd_dev *kfd)
{
	if (kfd->doorbell_kernel_ptr)
		iounmap(kfd->doorbell_kernel_ptr);
	bitmap_free(kfd->doorbell_bitmap);
	amdgpu_bo_free_kernel(&kfd->doorbells, NULL,
			     (void **)&kfd->doorbell_kernel_ptr);
}

int kfd_doorbell_mmap(struct kfd_node *dev, struct kfd_process *process,
@@ -188,22 +153,15 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
	u32 inx;

	mutex_lock(&kfd->doorbell_mutex);
	inx = find_first_zero_bit(kfd->doorbell_available_index,
					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
	inx = find_first_zero_bit(kfd->doorbell_bitmap, PAGE_SIZE / sizeof(u32));

	__set_bit(inx, kfd->doorbell_available_index);
	__set_bit(inx, kfd->doorbell_bitmap);
	mutex_unlock(&kfd->doorbell_mutex);

	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
		return NULL;

	inx *= kfd->device_info.doorbell_size / sizeof(u32);

	/*
	 * Calculating the kernel doorbell offset using the first
	 * doorbell page.
	 */
	*doorbell_off = kfd->doorbell_base_dw_offset + inx;
	*doorbell_off = amdgpu_doorbell_index_on_bar(kfd->adev, kfd->doorbells, inx);

	pr_debug("Get kernel queue doorbell\n"
			"     doorbell offset   == 0x%08X\n"
@@ -217,11 +175,10 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{
	unsigned int inx;

	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
		* sizeof(u32) / kfd->device_info.doorbell_size;
	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);

	mutex_lock(&kfd->doorbell_mutex);
	__clear_bit(inx, kfd->doorbell_available_index);
	__clear_bit(inx, kfd->doorbell_bitmap);
	mutex_unlock(&kfd->doorbell_mutex);
}

+6 −0
Original line number Diff line number Diff line
@@ -385,6 +385,12 @@ struct kfd_dev {
	/* Track per device allocated watch points */
	uint32_t alloc_watch_ids;
	spinlock_t watch_points_lock;

	/* Kernel doorbells for KFD device */
	struct amdgpu_bo *doorbells;

	/* bitmap for dynamic doorbell allocation from doorbell object */
	unsigned long *doorbell_bitmap;
};

enum kfd_mempool {