drm/amdkfd: add svm_bo eviction mechanism support (b41896e3) · Commits · EulixOS / Software / Kernel

drivers/gpu/drm/amd/amdkfd/kfd_svm.c

+159 −42

Original line number	Diff line number	Diff line
		@@ -35,6 +35,7 @@

		#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1

		static void svm_range_evict_svm_bo_worker(struct work_struct *work);
		static bool
		svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
		const struct mmu_notifier_range *range,
		@@ -320,7 +321,15 @@ static void svm_range_bo_release(struct kref *kref)
		spin_lock(&svm_bo->list_lock);
		}
		spin_unlock(&svm_bo->list_lock);

		if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
		/* We're not in the eviction worker.
		* Signal the fence and synchronize with any
		* pending eviction work.
		*/
		dma_fence_signal(&svm_bo->eviction_fence->base);
		cancel_work_sync(&svm_bo->eviction_work);
		}
		dma_fence_put(&svm_bo->eviction_fence->base);
		amdgpu_bo_unref(&svm_bo->bo);
		kfree(svm_bo);
		}
		@@ -333,6 +342,61 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
		kref_put(&svm_bo->kref, svm_range_bo_release);
		}

		static bool svm_range_validate_svm_bo(struct svm_range *prange)
		{
		mutex_lock(&prange->lock);
		if (!prange->svm_bo) {
		mutex_unlock(&prange->lock);
		return false;
		}
		if (prange->ttm_res) {
		/* We still have a reference, all is well */
		mutex_unlock(&prange->lock);
		return true;
		}
		if (svm_bo_ref_unless_zero(prange->svm_bo)) {
		if (READ_ONCE(prange->svm_bo->evicting)) {
		struct dma_fence *f;
		struct svm_range_bo *svm_bo;
		/* The BO is getting evicted,
		* we need to get a new one
		*/
		mutex_unlock(&prange->lock);
		svm_bo = prange->svm_bo;
		f = dma_fence_get(&svm_bo->eviction_fence->base);
		svm_range_bo_unref(prange->svm_bo);
		/* wait for the fence to avoid long spin-loop
		* at list_empty_careful
		*/
		dma_fence_wait(f, false);
		dma_fence_put(f);
		} else {
		/* The BO was still around and we got
		* a new reference to it
		*/
		mutex_unlock(&prange->lock);
		pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
		prange->svms, prange->start, prange->last);

		prange->ttm_res = &prange->svm_bo->bo->tbo.mem;
		return true;
		}

		} else {
		mutex_unlock(&prange->lock);
		}

		/* We need a new svm_bo. Spin-loop to wait for concurrent
		* svm_range_bo_release to finish removing this range from
		* its range list. After this, it is safe to reuse the
		* svm_bo pointer and svm_bo_list head.
		*/
		while (!list_empty_careful(&prange->svm_bo_list))
		;

		return false;
		}

		static struct svm_range_bo *svm_range_bo_new(void)
		{
		struct svm_range_bo *svm_bo;
		@@ -352,72 +416,56 @@ int
		svm_range_vram_node_new(struct amdgpu_device adev, struct svm_range prange,
		bool clear)
		{
		struct amdkfd_process_info *process_info;
		struct amdgpu_bo_param bp;
		struct svm_range_bo *svm_bo;
		struct amdgpu_bo_user *ubo;
		struct amdgpu_bo *bo;
		struct kfd_process *p;
		struct mm_struct *mm;
		int r;

		pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last);
		mutex_lock(&prange->lock);
		if (prange->svm_bo) {
		if (prange->ttm_res) {
		/* We still have a reference, all is well */
		mutex_unlock(&prange->lock);
		return 0;
		}
		if (svm_bo_ref_unless_zero(prange->svm_bo)) {
		/* The BO was still around and we got
		* a new reference to it
		*/
		mutex_unlock(&prange->lock);
		pr_debug("reuse old bo [0x%lx 0x%lx]\n",
		p = container_of(prange->svms, struct kfd_process, svms);
		pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms,
		prange->start, prange->last);

		prange->ttm_res = &prange->svm_bo->bo->tbo.mem;
		if (svm_range_validate_svm_bo(prange))
		return 0;
		}

		mutex_unlock(&prange->lock);

		/* We need a new svm_bo. Spin-loop to wait for concurrent
		* svm_range_bo_release to finish removing this range from
		* its range list. After this, it is safe to reuse the
		* svm_bo pointer and svm_bo_list head.
		*/
		while (!list_empty_careful(&prange->svm_bo_list))
		;

		} else {
		mutex_unlock(&prange->lock);
		}

		svm_bo = svm_range_bo_new();
		if (!svm_bo) {
		pr_debug("failed to alloc svm bo\n");
		return -ENOMEM;
		}

		mm = get_task_mm(p->lead_thread);
		if (!mm) {
		pr_debug("failed to get mm\n");
		kfree(svm_bo);
		return -ESRCH;
		}
		svm_bo->svms = prange->svms;
		svm_bo->eviction_fence =
		amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
		mm,
		svm_bo);
		mmput(mm);
		INIT_WORK(&svm_bo->eviction_work, svm_range_evict_svm_bo_worker);
		svm_bo->evicting = 0;
		memset(&bp, 0, sizeof(bp));
		bp.size = prange->npages * PAGE_SIZE;
		bp.byte_align = PAGE_SIZE;
		bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
		bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
		bp.flags \|= clear ? AMDGPU_GEM_CREATE_VRAM_CLEARED : 0;
		bp.flags \|= AMDGPU_AMDKFD_CREATE_SVM_BO;
		bp.type = ttm_bo_type_device;
		bp.resv = NULL;

		r = amdgpu_bo_create_user(adev, &bp, &ubo);
		if (r) {
		pr_debug("failed %d to create bo\n", r);
		kfree(svm_bo);
		return r;
		goto create_bo_failed;
		}
		bo = &ubo->bo;

		p = container_of(prange->svms, struct kfd_process, svms);
		r = amdgpu_bo_reserve(bo, true);
		if (r) {
		pr_debug("failed %d to reserve bo\n", r);
		@@ -430,8 +478,7 @@ svm_range_vram_node_new(struct amdgpu_device adev, struct svm_range prange,
		amdgpu_bo_unreserve(bo);
		goto reserve_bo_failed;
		}
		process_info = p->kgd_process_info;
		amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
		amdgpu_bo_fence(bo, &svm_bo->eviction_fence->base, true);

		amdgpu_bo_unreserve(bo);

		@@ -447,8 +494,10 @@ svm_range_vram_node_new(struct amdgpu_device adev, struct svm_range prange,
		return 0;

		reserve_bo_failed:
		kfree(svm_bo);
		amdgpu_bo_unref(&bo);
		create_bo_failed:
		dma_fence_put(&svm_bo->eviction_fence->base);
		kfree(svm_bo);
		prange->ttm_res = NULL;

		return r;
		@@ -2329,6 +2378,74 @@ svm_range_trigger_migration(struct mm_struct mm, struct svm_range prange,
		return r;
		}

		int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
		{
		if (!fence)
		return -EINVAL;

		if (dma_fence_is_signaled(&fence->base))
		return 0;

		if (fence->svm_bo) {
		WRITE_ONCE(fence->svm_bo->evicting, 1);
		schedule_work(&fence->svm_bo->eviction_work);
		}

		return 0;
		}

		static void svm_range_evict_svm_bo_worker(struct work_struct *work)
		{
		struct svm_range_bo *svm_bo;
		struct kfd_process *p;
		struct mm_struct *mm;

		svm_bo = container_of(work, struct svm_range_bo, eviction_work);
		if (!svm_bo_ref_unless_zero(svm_bo))
		return; /* svm_bo was freed while eviction was pending */

		/* svm_range_bo_release destroys this worker thread. So during
		* the lifetime of this thread, kfd_process and mm will be valid.
		*/
		p = container_of(svm_bo->svms, struct kfd_process, svms);
		mm = p->mm;
		if (!mm)
		return;

		mmap_read_lock(mm);
		spin_lock(&svm_bo->list_lock);
		while (!list_empty(&svm_bo->range_list)) {
		struct svm_range *prange =
		list_first_entry(&svm_bo->range_list,
		struct svm_range, svm_bo_list);
		list_del_init(&prange->svm_bo_list);
		spin_unlock(&svm_bo->list_lock);

		pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
		prange->start, prange->last);

		mutex_lock(&prange->migrate_mutex);
		svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);

		mutex_lock(&prange->lock);
		prange->svm_bo = NULL;
		mutex_unlock(&prange->lock);

		mutex_unlock(&prange->migrate_mutex);

		spin_lock(&svm_bo->list_lock);
		}
		spin_unlock(&svm_bo->list_lock);
		mmap_read_unlock(mm);

		dma_fence_signal(&svm_bo->eviction_fence->base);
		/* This is the last reference to svm_bo, after svm_range_vram_node_free
		* has been called in svm_migrate_vram_to_ram
		*/
		WARN_ONCE(kref_read(&svm_bo->kref) != 1, "This was not the last reference\n");
		svm_range_bo_unref(svm_bo);
		}

		static int
		svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
		uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)

drivers/gpu/drm/amd/amdkfd/kfd_svm.h

+9 −4

Original line number	Diff line number	Diff line
		@@ -38,6 +38,10 @@ struct svm_range_bo {
		struct kref kref;
		struct list_head range_list; /* all svm ranges shared this bo */
		spinlock_t list_lock;
		struct amdgpu_amdkfd_fence *eviction_fence;
		struct work_struct eviction_work;
		struct svm_range_list *svms;
		uint32_t evicting;
		};

		enum svm_work_list_ops {
		@@ -157,6 +161,7 @@ int svm_range_split_by_granularity(struct kfd_process p, struct mm_struct mm,
		struct svm_range *prange);
		int svm_range_restore_pages(struct amdgpu_device *adev,
		unsigned int pasid, uint64_t addr);
		int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence);
		void svm_range_add_list_work(struct svm_range_list *svms,
		struct svm_range prange, struct mm_struct mm,
		enum svm_work_list_ops op);