Commit 5bf06c4c authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-fixes-6.1-2022-11-09' of...

Merge tag 'amd-drm-fixes-6.1-2022-11-09' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-fixes

amd-drm-fixes-6.1-2022-11-09:

amdgpu:
- SMU 13.0.4 update
- GPUVM TLB race fix
- DCN 3.1.4 fixes
- DCN 3.2.x fixes
- Vega10 fan fix
- BACO fix for Beige Goby board
- PSR fix
- GPU VM PT locking fixes

amdkfd:
- CRIU fixes

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221109234554.6028-1-alexander.deucher@amd.com
parents f0c4d9fc 675d8462
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -992,6 +992,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
		kvfree(e->user_pages);
		e->user_pages = NULL;
	}
	mutex_unlock(&p->bo_list->bo_list_mutex);
	return r;
}

+0 −26
Original line number Diff line number Diff line
@@ -143,32 +143,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
	return 0;
}

/*
 * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
 * happens while holding this lock anywhere to prevent deadlocks when
 * an MMU notifier runs in reclaim-FS context.
 */
static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
{
	mutex_lock(&vm->eviction_lock);
	vm->saved_flags = memalloc_noreclaim_save();
}

static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
{
	if (mutex_trylock(&vm->eviction_lock)) {
		vm->saved_flags = memalloc_noreclaim_save();
		return 1;
	}
	return 0;
}

static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
{
	memalloc_noreclaim_restore(vm->saved_flags);
	mutex_unlock(&vm->eviction_lock);
}

/**
 * amdgpu_vm_bo_evicted - vm_bo is evicted
 *
+41 −0
Original line number Diff line number Diff line
@@ -492,7 +492,48 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
 */
static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
{
	unsigned long flags;
	spinlock_t *lock;

	/*
	 * Workaround to stop racing between the fence signaling and handling
	 * the cb. The lock is static after initially setting it up, just make
	 * sure that the dma_fence structure isn't freed up.
	 */
	rcu_read_lock();
	lock = vm->last_tlb_flush->lock;
	rcu_read_unlock();

	spin_lock_irqsave(lock, flags);
	spin_unlock_irqrestore(lock, flags);

	return atomic64_read(&vm->tlb_seq);
}

/*
 * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
 * happens while holding this lock anywhere to prevent deadlocks when
 * an MMU notifier runs in reclaim-FS context.
 */
static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
{
	mutex_lock(&vm->eviction_lock);
	vm->saved_flags = memalloc_noreclaim_save();
}

static inline bool amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
{
	if (mutex_trylock(&vm->eviction_lock)) {
		vm->saved_flags = memalloc_noreclaim_save();
		return true;
	}
	return false;
}

static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
{
	memalloc_noreclaim_restore(vm->saved_flags);
	mutex_unlock(&vm->eviction_lock);
}

#endif
+2 −0
Original line number Diff line number Diff line
@@ -597,7 +597,9 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
	if (entry->bo)
		return 0;

	amdgpu_vm_eviction_unlock(vm);
	r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
	amdgpu_vm_eviction_lock(vm);
	if (r)
		return r;

+15 −19
Original line number Diff line number Diff line
@@ -1950,7 +1950,7 @@ static int criu_checkpoint(struct file *filep,
{
	int ret;
	uint32_t num_devices, num_bos, num_objects;
	uint64_t priv_size, priv_offset = 0;
	uint64_t priv_size, priv_offset = 0, bo_priv_offset;

	if (!args->devices || !args->bos || !args->priv_data)
		return -EINVAL;
@@ -1994,38 +1994,34 @@ static int criu_checkpoint(struct file *filep,
	if (ret)
		goto exit_unlock;

	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
			    (uint8_t __user *)args->priv_data, &priv_offset);
	if (ret)
		goto exit_unlock;
	/* Leave room for BOs in the private data. They need to be restored
	 * before events, but we checkpoint them last to simplify the error
	 * handling.
	 */
	bo_priv_offset = priv_offset;
	priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);

	if (num_objects) {
		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
						 &priv_offset);
		if (ret)
			goto close_bo_fds;
			goto exit_unlock;

		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
						 &priv_offset);
		if (ret)
			goto close_bo_fds;
			goto exit_unlock;

		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
		if (ret)
			goto close_bo_fds;
			goto exit_unlock;
	}

close_bo_fds:
	if (ret) {
		/* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
		uint32_t i;
		struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;

		for (i = 0; i < num_bos; i++) {
			if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
				close_fd(bo_buckets[i].dmabuf_fd);
		}
	}
	/* This must be the last thing in this function that can fail.
	 * Otherwise we leak dmabuf file descriptors.
	 */
	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
			   (uint8_t __user *)args->priv_data, &bo_priv_offset);

exit_unlock:
	mutex_unlock(&p->mutex);
Loading