Commit c7f21978 authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher
Browse files

drm/amdkfd: Add user queue eviction restore SMI event



Output user queue eviction and restore event. User queue eviction may be
triggered by svm or userptr MMU notifier, TTM eviction, device suspend
and CRIU checkpoint and restore.

User queue restore may be rescheduled if eviction happens again while
restore.

Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent acac270d
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -336,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
}
#endif
/* KGD2KFD callbacks */
int kgd2kfd_quiesce_mm(struct mm_struct *mm);
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger);
int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
						struct dma_fence *fence);
+8 −4
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
#include "amdgpu_dma_buf.h"
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
#include "kfd_smi_events.h"

/* Userptr restore delay, just long enough to allow consecutive VM
 * changes to accumulate
@@ -2346,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
	if (evicted_bos == 1) {
		/* First eviction, stop the queues */
		r = kgd2kfd_quiesce_mm(mm);
		r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
		if (r)
			pr_err("Failed to quiesce KFD\n");
		schedule_delayed_work(&process_info->restore_userptr_work,
@@ -2620,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)

unlock_out:
	mutex_unlock(&process_info->lock);
	mmput(mm);
	put_task_struct(usertask);

	/* If validation failed, reschedule another attempt */
	if (evicted_bos)
	if (evicted_bos) {
		schedule_delayed_work(&process_info->restore_userptr_work,
			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));

		kfd_smi_event_queue_restore_rescheduled(mm);
	}
	mmput(mm);
	put_task_struct(usertask);
}

/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+2 −2
Original line number Diff line number Diff line
@@ -2434,7 +2434,7 @@ static int criu_restore(struct file *filep,
	 * Set the process to evicted state to avoid running any new queues before all the memory
	 * mappings are ready.
	 */
	ret = kfd_process_evict_queues(p);
	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE);
	if (ret)
		goto exit_unlock;

@@ -2553,7 +2553,7 @@ static int criu_process_info(struct file *filep,
		goto err_unlock;
	}

	ret = kfd_process_evict_queues(p);
	ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT);
	if (ret)
		goto err_unlock;

+2 −2
Original line number Diff line number Diff line
@@ -837,7 +837,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
}

int kgd2kfd_quiesce_mm(struct mm_struct *mm)
int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger)
{
	struct kfd_process *p;
	int r;
@@ -851,7 +851,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
		return -ESRCH;

	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
	r = kfd_process_evict_queues(p);
	r = kfd_process_evict_queues(p, trigger);

	kfd_unref_process(p);
	return r;
+1 −1
Original line number Diff line number Diff line
@@ -947,7 +947,7 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
}

void kfd_unref_process(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p);
int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger);
int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
Loading