Commit 9527b9ca authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher
Browse files

drm/amdkfd: evict svm bo worker handle error



Migrate vram to ram may fail to find the vma if process is exiting
and vma is removed, evict svm bo worker sets prange->svm_bo to NULL
and warn svm_bo ref count != 1 only if migrating vram to ram
successfully.

Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4e2f50e2
Loading
Loading
Loading
Loading
+23 −6
Original line number Diff line number Diff line
@@ -638,6 +638,22 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
	return r;
}

/**
 * svm_migrate_vma_to_ram - migrate range inside one vma from device to system
 *
 * @adev: amdgpu device to migrate from
 * @prange: svm range structure
 * @vma: vm_area_struct that range [start, end] belongs to
 * @start: range start virtual address in pages
 * @end: range end virtual address in pages
 *
 * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
 *
 * Return:
 *   0 - success with all pages migrated
 *   negative values - indicate error
 *   positive values - partial migration, number of pages not migrated
 */
static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
		       struct vm_area_struct *vma, uint64_t start, uint64_t end)
@@ -709,8 +725,6 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
		pdd = svm_range_get_pdd_by_adev(prange, adev);
		if (pdd)
			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);

		return upages;
	}
	return r ? r : upages;
}
@@ -720,7 +734,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 * @prange: range structure
 * @mm: process mm, use current->mm if NULL
 *
 * Context: Process context, caller hold mmap read lock, svms lock, prange lock
 * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
 *
 * Return:
 * 0 - OK, otherwise error code
@@ -759,13 +773,16 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
		unsigned long next;

		vma = find_vma(mm, addr);
		if (!vma || addr < vma->vm_start)
		if (!vma || addr < vma->vm_start) {
			pr_debug("failed to find vma for prange %p\n", prange);
			r = -EFAULT;
			break;
		}

		next = min(vma->vm_end, end);
		r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
		if (r < 0) {
			pr_debug("failed %ld to migrate\n", r);
			pr_debug("failed %ld to migrate prange %p\n", r, prange);
			break;
		} else {
			upages += r;
@@ -773,7 +790,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
		addr = next;
	}

	if (!upages) {
	if (r >= 0 && !upages) {
		svm_range_vram_node_free(prange);
		prange->actual_loc = 0;
	}
+13 −8
Original line number Diff line number Diff line
@@ -3155,6 +3155,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
	struct svm_range_bo *svm_bo;
	struct kfd_process *p;
	struct mm_struct *mm;
	int r = 0;

	svm_bo = container_of(work, struct svm_range_bo, eviction_work);
	if (!svm_bo_ref_unless_zero(svm_bo))
@@ -3170,7 +3171,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)

	mmap_read_lock(mm);
	spin_lock(&svm_bo->list_lock);
	while (!list_empty(&svm_bo->range_list)) {
	while (!list_empty(&svm_bo->range_list) && !r) {
		struct svm_range *prange =
				list_first_entry(&svm_bo->range_list,
						struct svm_range, svm_bo_list);
@@ -3184,15 +3185,18 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)

		mutex_lock(&prange->migrate_mutex);
		do {
			svm_migrate_vram_to_ram(prange,
			r = svm_migrate_vram_to_ram(prange,
						svm_bo->eviction_fence->mm);
		} while (prange->actual_loc && --retries);
		WARN(prange->actual_loc, "Migration failed during eviction");
		} while (!r && prange->actual_loc && --retries);

		if (!r && prange->actual_loc)
			pr_info_once("Migration failed during eviction");

		if (!prange->actual_loc) {
			mutex_lock(&prange->lock);
			prange->svm_bo = NULL;
			mutex_unlock(&prange->lock);

		}
		mutex_unlock(&prange->migrate_mutex);

		spin_lock(&svm_bo->list_lock);
@@ -3201,10 +3205,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
	mmap_read_unlock(mm);

	dma_fence_signal(&svm_bo->eviction_fence->base);

	/* This is the last reference to svm_bo, after svm_range_vram_node_free
	 * has been called in svm_migrate_vram_to_ram
	 */
	WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
	WARN_ONCE(!r && kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
	svm_range_bo_unref(svm_bo);
}