Commit 20bc9f76 authored by David Belanger's avatar David Belanger Committed by Alex Deucher
Browse files

drm/amdkfd: Fixed kfd_process cleanup on module exit.



Handle case when module is unloaded (kfd_exit) before a process space
(mm_struct) is released.

v2: Fixed potential race conditions by removing all kfd_process from
the process table first, then working on releasing the resources.

v3: Fixed loop element access / synchronization.  Fixed extra empty lines.

Signed-off-by: default avatarDavid Belanger <david.belanger@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 7304ee97
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@ static int kfd_init(void)

static void kfd_exit(void)
{
	kfd_cleanup_processes();
	kfd_debugfs_fini();
	kfd_process_destroy_wq();
	kfd_procfs_shutdown();
+1 −0
Original line number Diff line number Diff line
@@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);

int kfd_process_create_wq(void);
void kfd_process_destroy_wq(void);
void kfd_cleanup_processes(void);
struct kfd_process *kfd_create_process(struct file *filep);
struct kfd_process *kfd_get_process(const struct task_struct *task);
struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
+60 −7
Original line number Diff line number Diff line
@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
}

static void kfd_process_notifier_release_internal(struct kfd_process *p)
{
	cancel_delayed_work_sync(&p->eviction_work);
	cancel_delayed_work_sync(&p->restore_work);

	/* Indicate to other users that MM is no longer valid */
	p->mm = NULL;

	mmu_notifier_put(&p->mmu_notifier);
}

static void kfd_process_notifier_release(struct mmu_notifier *mn,
					struct mm_struct *mm)
{
@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
		return;

	mutex_lock(&kfd_processes_mutex);
	/*
	 * Do early return if table is empty.
	 *
	 * This could potentially happen if this function is called concurrently
	 * by mmu_notifier and by kfd_cleanup_pocesses.
	 *
	 */
	if (hash_empty(kfd_processes_table)) {
		mutex_unlock(&kfd_processes_mutex);
		return;
	}
	hash_del_rcu(&p->kfd_processes);
	mutex_unlock(&kfd_processes_mutex);
	synchronize_srcu(&kfd_processes_srcu);

	cancel_delayed_work_sync(&p->eviction_work);
	cancel_delayed_work_sync(&p->restore_work);

	/* Indicate to other users that MM is no longer valid */
	p->mm = NULL;

	mmu_notifier_put(&p->mmu_notifier);
	kfd_process_notifier_release_internal(p);
}

static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
@@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
	.free_notifier = kfd_process_free_notifier,
};

/*
 * This code handles the case when driver is being unloaded before all
 * mm_struct are released.  We need to safely free the kfd_process and
 * avoid race conditions with mmu_notifier that might try to free them.
 *
 */
void kfd_cleanup_processes(void)
{
	struct kfd_process *p;
	struct hlist_node *p_temp;
	unsigned int temp;
	HLIST_HEAD(cleanup_list);

	/*
	 * Move all remaining kfd_process from the process table to a
	 * temp list for processing.   Once done, callback from mmu_notifier
	 * release will not see the kfd_process in the table and do early return,
	 * avoiding double free issues.
	 */
	mutex_lock(&kfd_processes_mutex);
	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
		hash_del_rcu(&p->kfd_processes);
		synchronize_srcu(&kfd_processes_srcu);
		hlist_add_head(&p->kfd_processes, &cleanup_list);
	}
	mutex_unlock(&kfd_processes_mutex);

	hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
		kfd_process_notifier_release_internal(p);

	/*
	 * Ensures that all outstanding free_notifier get called, triggering
	 * the release of the kfd_process struct.
	 */
	mmu_notifier_synchronize();
}

static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
{
	unsigned long  offset;