Commit fe1f05df authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher
Browse files

drm/amdkfd: Rework kfd_locked handling



Currently, even if kfd_locked is set, a process is first
created and then removed to work around a race condition
in updating kfd_locked flag. Rework kfd_locked handling to
ensure no processes is created if kfd_locked is set. This
is achieved by updating kfd_locked under kfd_processes_mutex.
With this there is no need for kfd_locked to be an atomic
counter. Instead, it can be a regular integer.

Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6b22ef25
Loading
Loading
Loading
Loading
+0 −7
Original line number Diff line number Diff line
@@ -146,13 +146,6 @@ static int kfd_open(struct inode *inode, struct file *filep)
	if (IS_ERR(process))
		return PTR_ERR(process);

	if (kfd_is_locked()) {
		dev_dbg(kfd_device, "kfd is locked!\n"
				"process %d unreferenced", process->pasid);
		kfd_unref_process(process);
		return -EAGAIN;
	}

	/* filep now owns the reference returned by kfd_create_process */
	filep->private_data = process;

+16 −5
Original line number Diff line number Diff line
@@ -42,7 +42,7 @@
 * once locked, kfd driver will stop any further GPU execution.
 * create process (open) will return -EAGAIN.
 */
static atomic_t kfd_locked = ATOMIC_INIT(0);
static int kfd_locked;

#ifdef CONFIG_DRM_AMDGPU_CIK
extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
@@ -880,7 +880,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
			return ret;
	}

	atomic_dec(&kfd_locked);
	mutex_lock(&kfd_processes_mutex);
	--kfd_locked;
	mutex_unlock(&kfd_processes_mutex);

	for (i = 0; i < kfd->num_nodes; i++) {
		node = kfd->nodes[i];
@@ -893,21 +895,27 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)

bool kfd_is_locked(void)
{
	return  (atomic_read(&kfd_locked) > 0);
	lockdep_assert_held(&kfd_processes_mutex);
	return  (kfd_locked > 0);
}

void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{
	struct kfd_node *node;
	int i;
	int count;

	if (!kfd->init_complete)
		return;

	/* for runtime suspend, skip locking kfd */
	if (!run_pm) {
		mutex_lock(&kfd_processes_mutex);
		count = ++kfd_locked;
		mutex_unlock(&kfd_processes_mutex);

		/* For first KFD device suspend all the KFD processes */
		if (atomic_inc_return(&kfd_locked) == 1)
		if (count == 1)
			kfd_suspend_all_processes();
	}

@@ -933,7 +941,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)

	/* for runtime resume, skip unlocking kfd */
	if (!run_pm) {
		count = atomic_dec_return(&kfd_locked);
		mutex_lock(&kfd_processes_mutex);
		count = --kfd_locked;
		mutex_unlock(&kfd_processes_mutex);

		WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
		if (count == 0)
			ret = kfd_resume_all_processes();
+2 −0
Original line number Diff line number Diff line
@@ -201,6 +201,8 @@ extern int amdgpu_no_queue_eviction_on_vm_fault;
/* Enable eviction debug messages */
extern bool debug_evictions;

extern struct mutex kfd_processes_mutex;

enum cache_policy {
	cache_policy_coherent,
	cache_policy_noncoherent
+7 −1
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ struct mm_struct;
 * Unique/indexed by mm_struct*
 */
DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
static DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_MUTEX(kfd_processes_mutex);

DEFINE_SRCU(kfd_processes_srcu);

@@ -818,6 +818,12 @@ struct kfd_process *kfd_create_process(struct file *filep)
	 */
	mutex_lock(&kfd_processes_mutex);

	if (kfd_is_locked()) {
		mutex_unlock(&kfd_processes_mutex);
		pr_debug("KFD is locked! Cannot create process");
		return ERR_PTR(-EINVAL);
	}

	/* A prior open of /dev/kfd could have already created the process. */
	process = find_process(thread, false);
	if (process) {