Commit 8abc1eb2 authored by Christian König's avatar Christian König
Browse files

drm/amdkfd: switch over to using drm_exec v3



Avoids quite a bit of logic and kmalloc overhead.

v2: fix multiple problems pointed out by Felix
v3: two more nit picks from Felix fixed

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230711133122.3710-4-christian.koenig@amd.com
parent 9710631c
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ config DRM_AMDGPU
	select INTERVAL_TREE
	select DRM_BUDDY
	select DRM_SUBALLOC_HELPER
	select DRM_EXEC
	# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
	# ACPI_VIDEO's dependencies must also be selected.
	select INPUT if ACPI
+2 −3
Original line number Diff line number Diff line
@@ -25,6 +25,7 @@
#ifndef AMDGPU_AMDKFD_H_INCLUDED
#define AMDGPU_AMDKFD_H_INCLUDED

#include <linux/list.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/kthread.h>
@@ -32,7 +33,6 @@
#include <linux/mmu_notifier.h>
#include <linux/memremap.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
#include "amdgpu_vm.h"
#include "amdgpu_xcp.h"
@@ -71,8 +71,7 @@ struct kgd_mem {
	struct hmm_range *range;
	struct list_head attachments;
	/* protected by amdkfd_process_info.lock */
	struct ttm_validate_buffer validate_list;
	struct ttm_validate_buffer resv_list;
	struct list_head validate_list;
	uint32_t domain;
	unsigned int mapped_to_gpu_memory;
	uint64_t va;
+115 −184
Original line number Diff line number Diff line
@@ -27,6 +27,8 @@
#include <linux/sched/task.h>
#include <drm/ttm/ttm_tt.h>

#include <drm/drm_exec.h>

#include "amdgpu_object.h"
#include "amdgpu_gem.h"
#include "amdgpu_vm.h"
@@ -964,28 +966,20 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
				struct amdkfd_process_info *process_info,
				bool userptr)
{
	struct ttm_validate_buffer *entry = &mem->validate_list;
	struct amdgpu_bo *bo = mem->bo;

	INIT_LIST_HEAD(&entry->head);
	entry->num_shared = 1;
	entry->bo = &bo->tbo;
	mutex_lock(&process_info->lock);
	if (userptr)
		list_add_tail(&entry->head, &process_info->userptr_valid_list);
		list_add_tail(&mem->validate_list,
			      &process_info->userptr_valid_list);
	else
		list_add_tail(&entry->head, &process_info->kfd_bo_list);
		list_add_tail(&mem->validate_list, &process_info->kfd_bo_list);
	mutex_unlock(&process_info->lock);
}

static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
		struct amdkfd_process_info *process_info)
{
	struct ttm_validate_buffer *bo_list_entry;

	bo_list_entry = &mem->validate_list;
	mutex_lock(&process_info->lock);
	list_del(&bo_list_entry->head);
	list_del(&mem->validate_list);
	mutex_unlock(&process_info->lock);
}

@@ -1072,13 +1066,12 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
 * object can track VM updates.
 */
struct bo_vm_reservation_context {
	struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
	unsigned int n_vms;		    /* Number of VMs reserved	    */
	struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries  */
	struct ww_acquire_ctx ticket;	    /* Reservation ticket	    */
	struct list_head list, duplicates;  /* BO lists			    */
	struct amdgpu_sync *sync;	    /* Pointer to sync object	    */
	bool reserved;			    /* Whether BOs are reserved	    */
	/* DRM execution context for the reservation */
	struct drm_exec exec;
	/* Number of VMs reserved */
	unsigned int n_vms;
	/* Pointer to sync object */
	struct amdgpu_sync *sync;
};

enum bo_vm_match {
@@ -1102,37 +1095,28 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,

	WARN_ON(!vm);

	ctx->reserved = false;
	ctx->n_vms = 1;
	ctx->sync = &mem->sync;
	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
	drm_exec_until_all_locked(&ctx->exec) {
		ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
		drm_exec_retry_on_contention(&ctx->exec);
		if (unlikely(ret))
			goto error;

	INIT_LIST_HEAD(&ctx->list);
	INIT_LIST_HEAD(&ctx->duplicates);

	ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
	if (!ctx->vm_pd)
		return -ENOMEM;

	ctx->kfd_bo.priority = 0;
	ctx->kfd_bo.tv.bo = &bo->tbo;
	ctx->kfd_bo.tv.num_shared = 1;
	list_add(&ctx->kfd_bo.tv.head, &ctx->list);

	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
		ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base);
		drm_exec_retry_on_contention(&ctx->exec);
		if (unlikely(ret))
			goto error;
	}
	return 0;

	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
				     false, &ctx->duplicates);
	if (ret) {
error:
	pr_err("Failed to reserve buffers in ttm.\n");
		kfree(ctx->vm_pd);
		ctx->vm_pd = NULL;
	drm_exec_fini(&ctx->exec);
	return ret;
}

	ctx->reserved = true;
	return 0;
}

/**
 * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
 * @mem: KFD BO structure.
@@ -1147,65 +1131,41 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
				struct amdgpu_vm *vm, enum bo_vm_match map_type,
				struct bo_vm_reservation_context *ctx)
{
	struct amdgpu_bo *bo = mem->bo;
	struct kfd_mem_attachment *entry;
	unsigned int i;
	struct amdgpu_bo *bo = mem->bo;
	int ret;

	ctx->reserved = false;
	ctx->n_vms = 0;
	ctx->vm_pd = NULL;
	ctx->sync = &mem->sync;

	INIT_LIST_HEAD(&ctx->list);
	INIT_LIST_HEAD(&ctx->duplicates);

	drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT);
	drm_exec_until_all_locked(&ctx->exec) {
		ctx->n_vms = 0;
		list_for_each_entry(entry, &mem->attachments, list) {
			if ((vm && vm != entry->bo_va->base.vm) ||
				(entry->is_mapped != map_type
				&& map_type != BO_VM_ALL))
				continue;

		ctx->n_vms++;
	}

	if (ctx->n_vms != 0) {
		ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
				     GFP_KERNEL);
		if (!ctx->vm_pd)
			return -ENOMEM;
			ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm,
						&ctx->exec, 2);
			drm_exec_retry_on_contention(&ctx->exec);
			if (unlikely(ret))
				goto error;
			++ctx->n_vms;
		}

	ctx->kfd_bo.priority = 0;
	ctx->kfd_bo.tv.bo = &bo->tbo;
	ctx->kfd_bo.tv.num_shared = 1;
	list_add(&ctx->kfd_bo.tv.head, &ctx->list);

	i = 0;
	list_for_each_entry(entry, &mem->attachments, list) {
		if ((vm && vm != entry->bo_va->base.vm) ||
			(entry->is_mapped != map_type
			&& map_type != BO_VM_ALL))
			continue;

		amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
				&ctx->vm_pd[i]);
		i++;
		ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1);
		drm_exec_retry_on_contention(&ctx->exec);
		if (unlikely(ret))
			goto error;
	}
	return 0;

	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
				     false, &ctx->duplicates);
	if (ret) {
error:
	pr_err("Failed to reserve buffers in ttm.\n");
		kfree(ctx->vm_pd);
		ctx->vm_pd = NULL;
	drm_exec_fini(&ctx->exec);
	return ret;
}

	ctx->reserved = true;
	return 0;
}

/**
 * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
 * @ctx: Reservation context to unreserve
@@ -1224,15 +1184,8 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
	if (wait)
		ret = amdgpu_sync_wait(ctx->sync, intr);

	if (ctx->reserved)
		ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
	kfree(ctx->vm_pd);

	drm_exec_fini(&ctx->exec);
	ctx->sync = NULL;

	ctx->reserved = false;
	ctx->vm_pd = NULL;

	return ret;
}

@@ -1854,7 +1807,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
	bool use_release_notifier = (mem->bo->kfd_bo == mem);
	struct kfd_mem_attachment *entry, *tmp;
	struct bo_vm_reservation_context ctx;
	struct ttm_validate_buffer *bo_list_entry;
	unsigned int mapped_to_gpu_memory;
	int ret;
	bool is_imported = false;
@@ -1882,9 +1834,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
	}

	/* Make sure restore workers don't access the BO any more */
	bo_list_entry = &mem->validate_list;
	mutex_lock(&process_info->lock);
	list_del(&bo_list_entry->head);
	list_del(&mem->validate_list);
	mutex_unlock(&process_info->lock);

	/* Cleanup user pages and MMU notifiers */
@@ -2451,14 +2402,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
	/* Move all invalidated BOs to the userptr_inval_list */
	list_for_each_entry_safe(mem, tmp_mem,
				 &process_info->userptr_valid_list,
				 validate_list.head)
				 validate_list)
		if (mem->invalid)
			list_move_tail(&mem->validate_list.head,
			list_move_tail(&mem->validate_list,
				       &process_info->userptr_inval_list);

	/* Go through userptr_inval_list and update any invalid user_pages */
	list_for_each_entry(mem, &process_info->userptr_inval_list,
			    validate_list.head) {
			    validate_list) {
		invalid = mem->invalid;
		if (!invalid)
			/* BO hasn't been invalidated since the last
@@ -2538,50 +2489,41 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 */
static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
{
	struct amdgpu_bo_list_entry *pd_bo_list_entries;
	struct list_head resv_list, duplicates;
	struct ww_acquire_ctx ticket;
	struct ttm_operation_ctx ctx = { false, false };
	struct amdgpu_sync sync;
	struct drm_exec exec;

	struct amdgpu_vm *peer_vm;
	struct kgd_mem *mem, *tmp_mem;
	struct amdgpu_bo *bo;
	struct ttm_operation_ctx ctx = { false, false };
	int i, ret;

	pd_bo_list_entries = kcalloc(process_info->n_vms,
				     sizeof(struct amdgpu_bo_list_entry),
				     GFP_KERNEL);
	if (!pd_bo_list_entries) {
		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
		ret = -ENOMEM;
		goto out_no_mem;
	}
	int ret;

	INIT_LIST_HEAD(&resv_list);
	INIT_LIST_HEAD(&duplicates);
	amdgpu_sync_create(&sync);

	/* Get all the page directory BOs that need to be reserved */
	i = 0;
	drm_exec_init(&exec, 0);
	/* Reserve all BOs and page tables for validation */
	drm_exec_until_all_locked(&exec) {
		/* Reserve all the page directories */
		list_for_each_entry(peer_vm, &process_info->vm_list_head,
			    vm_list_node)
		amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
				    &pd_bo_list_entries[i++]);
	/* Add the userptr_inval_list entries to resv_list */
	list_for_each_entry(mem, &process_info->userptr_inval_list,
			    validate_list.head) {
		list_add_tail(&mem->resv_list.head, &resv_list);
		mem->resv_list.bo = mem->validate_list.bo;
		mem->resv_list.num_shared = mem->validate_list.num_shared;
				    vm_list_node) {
			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
			drm_exec_retry_on_contention(&exec);
			if (unlikely(ret))
				goto unreserve_out;
		}

	/* Reserve all BOs and page tables for validation */
	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
	WARN(!list_empty(&duplicates), "Duplicates should be empty");
	if (ret)
		goto out_free;
		/* Reserve the userptr_inval_list entries to resv_list */
		list_for_each_entry(mem, &process_info->userptr_inval_list,
				    validate_list) {
			struct drm_gem_object *gobj;

	amdgpu_sync_create(&sync);
			gobj = &mem->bo->tbo.base;
			ret = drm_exec_prepare_obj(&exec, gobj, 1);
			drm_exec_retry_on_contention(&exec);
			if (unlikely(ret))
				goto unreserve_out;
		}
	}

	ret = process_validate_vms(process_info);
	if (ret)
@@ -2590,7 +2532,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
	/* Validate BOs and update GPUVM page tables */
	list_for_each_entry_safe(mem, tmp_mem,
				 &process_info->userptr_inval_list,
				 validate_list.head) {
				 validate_list) {
		struct kfd_mem_attachment *attachment;

		bo = mem->bo;
@@ -2632,12 +2574,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
	ret = process_update_pds(process_info, &sync);

unreserve_out:
	ttm_eu_backoff_reservation(&ticket, &resv_list);
	drm_exec_fini(&exec);
	amdgpu_sync_wait(&sync, false);
	amdgpu_sync_free(&sync);
out_free:
	kfree(pd_bo_list_entries);
out_no_mem:

	return ret;
}
@@ -2653,7 +2592,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i

	list_for_each_entry_safe(mem, tmp_mem,
				 &process_info->userptr_inval_list,
				 validate_list.head) {
				 validate_list) {
		bool valid;

		/* keep mem without hmm range at userptr_inval_list */
@@ -2677,7 +2616,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i
			continue;
		}

		list_move_tail(&mem->validate_list.head,
		list_move_tail(&mem->validate_list,
			       &process_info->userptr_valid_list);
	}

@@ -2787,51 +2726,45 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 */
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
{
	struct amdgpu_bo_list_entry *pd_bo_list;
	struct amdkfd_process_info *process_info = info;
	struct amdgpu_vm *peer_vm;
	struct kgd_mem *mem;
	struct bo_vm_reservation_context ctx;
	struct amdgpu_amdkfd_fence *new_fence;
	int ret = 0, i;
	struct list_head duplicate_save;
	struct amdgpu_sync sync_obj;
	unsigned long failed_size = 0;
	unsigned long total_size = 0;
	struct drm_exec exec;
	int ret;

	INIT_LIST_HEAD(&duplicate_save);
	INIT_LIST_HEAD(&ctx.list);
	INIT_LIST_HEAD(&ctx.duplicates);

	pd_bo_list = kcalloc(process_info->n_vms,
			     sizeof(struct amdgpu_bo_list_entry),
			     GFP_KERNEL);
	if (!pd_bo_list)
		return -ENOMEM;

	i = 0;
	mutex_lock(&process_info->lock);

	drm_exec_init(&exec, 0);
	drm_exec_until_all_locked(&exec) {
		list_for_each_entry(peer_vm, &process_info->vm_list_head,
			vm_list_node)
		amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
				    vm_list_node) {
			ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
			drm_exec_retry_on_contention(&exec);
			if (unlikely(ret))
				goto ttm_reserve_fail;
		}

		/* Reserve all BOs and page tables/directory. Add all BOs from
		 * kfd_bo_list to ctx.list
		 */
		list_for_each_entry(mem, &process_info->kfd_bo_list,
			    validate_list.head) {

		list_add_tail(&mem->resv_list.head, &ctx.list);
		mem->resv_list.bo = mem->validate_list.bo;
		mem->resv_list.num_shared = mem->validate_list.num_shared;
	}
				    validate_list) {
			struct drm_gem_object *gobj;

	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
				     false, &duplicate_save);
	if (ret) {
		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
			gobj = &mem->bo->tbo.base;
			ret = drm_exec_prepare_obj(&exec, gobj, 1);
			drm_exec_retry_on_contention(&exec);
			if (unlikely(ret))
				goto ttm_reserve_fail;
		}
	}

	amdgpu_sync_create(&sync_obj);

@@ -2848,7 +2781,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)

	/* Validate BOs and map them to GPUVM (update VM page tables). */
	list_for_each_entry(mem, &process_info->kfd_bo_list,
			    validate_list.head) {
			    validate_list) {

		struct amdgpu_bo *bo = mem->bo;
		uint32_t domain = mem->domain;
@@ -2921,8 +2854,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
	*ef = dma_fence_get(&new_fence->base);

	/* Attach new eviction fence to all BOs except pinned ones */
	list_for_each_entry(mem, &process_info->kfd_bo_list,
		validate_list.head) {
	list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) {
		if (mem->bo->tbo.pin_count)
			continue;

@@ -2941,11 +2873,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
	}

validate_map_fail:
	ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
	amdgpu_sync_free(&sync_obj);
ttm_reserve_fail:
	drm_exec_fini(&exec);
	mutex_unlock(&process_info->lock);
	kfree(pd_bo_list);
	return ret;
}

+18 −0
Original line number Diff line number Diff line
@@ -34,6 +34,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/drm_drv.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_amdkfd.h"
@@ -360,6 +361,23 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
	list_add(&entry->tv.head, validated);
}

/**
 * amdgpu_vm_lock_pd - lock PD in drm_exec
 *
 * @vm: vm providing the BOs
 * @exec: drm execution context
 * @num_fences: number of extra fences to reserve
 *
 * Lock the VM root PD in the DRM execution context.
 */
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
		      unsigned int num_fences)
{
	/* We need at least two fences for the VM PD/PT updates */
	return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base,
				    2 + num_fences);
}

/**
 * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
 *
+4 −0
Original line number Diff line number Diff line
@@ -36,6 +36,8 @@
#include "amdgpu_ring.h"
#include "amdgpu_ids.h"

struct drm_exec;

struct amdgpu_bo_va;
struct amdgpu_job;
struct amdgpu_bo_list_entry;
@@ -399,6 +401,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
			 struct list_head *validated,
			 struct amdgpu_bo_list_entry *entry);
int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
		      unsigned int num_fences);
bool amdgpu_vm_ready(struct amdgpu_vm *vm);
uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm);
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
Loading