Commit 6049db43 authored by Dennis Li's avatar Dennis Li Committed by Alex Deucher
Browse files

drm/amdgpu: change reset lock from mutex to rw_semaphore



clients don't need reset-lock for synchronization when no
GPU recovery.

v2:
change to return the return value of down_read_killable.

v3:
if GPU recovery begin, VF ignore FLR notification.

Reviewed-by: default avatarMonk Liu <monk.liu@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarDennis Li <Dennis.Li@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 66b8a9c0
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -951,7 +951,7 @@ struct amdgpu_device {

	atomic_t 			in_gpu_reset;
	enum pp_mp1_state               mp1_state;
	struct mutex  lock_reset;
	struct rw_semaphore reset_sem;
	struct amdgpu_doorbell_index doorbell_index;

	struct mutex			notifier_lock;
+16 −7
Original line number Diff line number Diff line
@@ -101,14 +101,18 @@ static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file)

	file->private_data = adev;

	mutex_lock(&adev->lock_reset);
	ret = down_read_killable(&adev->reset_sem);
	if (ret)
		return ret;

	if (adev->autodump.dumping.done) {
		reinit_completion(&adev->autodump.dumping);
		ret = 0;
	} else {
		ret = -EBUSY;
	}
	mutex_unlock(&adev->lock_reset);

	up_read(&adev->reset_sem);

	return ret;
}
@@ -1242,7 +1246,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
	}

	/* Avoid accidently unparking the sched thread during GPU reset */
	mutex_lock(&adev->lock_reset);
	r = down_read_killable(&adev->reset_sem);
	if (r)
		return r;

	/* hold on the scheduler */
	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -1269,7 +1275,7 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data)
		kthread_unpark(ring->sched.thread);
	}

	mutex_unlock(&adev->lock_reset);
	up_read(&adev->reset_sem);

	pm_runtime_mark_last_busy(dev->dev);
	pm_runtime_put_autosuspend(dev->dev);
@@ -1459,7 +1465,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
		return -ENOMEM;

	/* Avoid accidently unparking the sched thread during GPU reset */
	mutex_lock(&adev->lock_reset);
	r = down_read_killable(&adev->reset_sem);
	if (r)
		goto pro_end;

	/* stop the scheduler */
	kthread_park(ring->sched.thread);
@@ -1500,13 +1508,14 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
	/* restart the scheduler */
	kthread_unpark(ring->sched.thread);

	mutex_unlock(&adev->lock_reset);
	up_read(&adev->reset_sem);

	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);

pro_end:
	kfree(fences);

	return 0;
	return r;
}

static int amdgpu_debugfs_sclk_set(void *data, u64 val)
+3 −3
Original line number Diff line number Diff line
@@ -3054,7 +3054,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	mutex_init(&adev->virt.vf_errors.lock);
	hash_init(adev->mn_hash);
	atomic_set(&adev->in_gpu_reset, 0);
	mutex_init(&adev->lock_reset);
	init_rwsem(&adev->reset_sem);
	mutex_init(&adev->psp.mutex);
	mutex_init(&adev->notifier_lock);

@@ -4206,7 +4206,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev)
	if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0)
		return false;

	mutex_lock(&adev->lock_reset);
	down_write(&adev->reset_sem);

	atomic_inc(&adev->gpu_reset_counter);
	switch (amdgpu_asic_reset_method(adev)) {
@@ -4229,7 +4229,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
	amdgpu_vf_error_trans_all(adev);
	adev->mp1_state = PP_MP1_STATE_NONE;
	atomic_set(&adev->in_gpu_reset, 0);
	mutex_unlock(&adev->lock_reset);
	up_write(&adev->reset_sem);
}

static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
+6 −12
Original line number Diff line number Diff line
@@ -238,18 +238,14 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
	int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
	int locked;

	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
	 * otherwise the mailbox msg will be ruined/reseted by
	 * the VF FLR.
	 *
	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
	 * which means host side had finished this VF's FLR.
	 */
	locked = mutex_trylock(&adev->lock_reset);
	if (locked)
	if (!down_read_trylock(&adev->reset_sem))
		return;

	atomic_set(&adev->in_gpu_reset, 1);

	do {
@@ -261,10 +257,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
	} while (timeout > 1);

flr_done:
	if (locked) {
	atomic_set(&adev->in_gpu_reset, 0);
		mutex_unlock(&adev->lock_reset);
	}
	up_read(&adev->reset_sem);

	/* Trigger recovery for world switch failure if no TDR */
	if (amdgpu_device_should_recover_gpu(adev)
+6 −12
Original line number Diff line number Diff line
@@ -259,18 +259,14 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
	int timeout = NV_MAILBOX_POLL_FLR_TIMEDOUT;
	int locked;

	/* block amdgpu_gpu_recover till msg FLR COMPLETE received,
	 * otherwise the mailbox msg will be ruined/reseted by
	 * the VF FLR.
	 *
	 * we can unlock the lock_reset to allow "amdgpu_job_timedout"
	 * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
	 * which means host side had finished this VF's FLR.
	 */
	locked = mutex_trylock(&adev->lock_reset);
	if (locked)
	if (!down_read_trylock(&adev->reset_sem))
		return;

	atomic_set(&adev->in_gpu_reset, 1);

	do {
@@ -282,10 +278,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work)
	} while (timeout > 1);

flr_done:
	if (locked) {
	atomic_set(&adev->in_gpu_reset, 0);
		mutex_unlock(&adev->lock_reset);
	}
	up_read(&adev->reset_sem);

	/* Trigger recovery for world switch failure if no TDR */
	if (amdgpu_device_should_recover_gpu(adev)