Commit a1f091f8 authored by Christian König's avatar Christian König
Browse files

drm/ttm: switch to per device LRU lock



Instead of having a global lock for potentially less contention.

Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Tested-by: default avatarNirmoy Das <nirmoy.das@amd.com>
Reviewed-by: default avatarHuang Rui <ray.huang@amd.com>
Reviewed-by: default avatarMatthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/424010/
parent f9e2a03e
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -638,15 +638,15 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
	struct amdgpu_vm_bo_base *bo_base;

	if (vm->bulk_moveable) {
		spin_lock(&ttm_glob.lru_lock);
		spin_lock(&adev->mman.bdev.lru_lock);
		ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
		spin_unlock(&ttm_glob.lru_lock);
		spin_unlock(&adev->mman.bdev.lru_lock);
		return;
	}

	memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));

	spin_lock(&ttm_glob.lru_lock);
	spin_lock(&adev->mman.bdev.lru_lock);
	list_for_each_entry(bo_base, &vm->idle, vm_status) {
		struct amdgpu_bo *bo = bo_base->bo;

@@ -660,7 +660,7 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
						&bo->shadow->tbo.mem,
						&vm->lru_bulk_move);
	}
	spin_unlock(&ttm_glob.lru_lock);
	spin_unlock(&adev->mman.bdev.lru_lock);

	vm->bulk_moveable = true;
}
+1 −4
Original line number Diff line number Diff line
@@ -426,16 +426,13 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release)
		       release->id | 0xf0000000, release->base.seqno);
	trace_dma_fence_emit(&release->base);

	spin_lock(&ttm_glob.lru_lock);

	list_for_each_entry(entry, &release->bos, head) {
		bo = entry->bo;

		dma_resv_add_shared_fence(bo->base.resv, &release->base);
		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
		ttm_bo_move_to_lru_tail_unlocked(bo);
		dma_resv_unlock(bo->base.resv);
	}
	spin_unlock(&ttm_glob.lru_lock);
	ww_acquire_fini(&release->ticket);
}
+23 −26
Original line number Diff line number Diff line
@@ -243,9 +243,9 @@ static int ttm_bo_individualize_resv(struct ttm_buffer_object *bo)
		 * reference it any more. The only tricky case is the trylock on
		 * the resv object while holding the lru_lock.
		 */
		spin_lock(&ttm_glob.lru_lock);
		spin_lock(&bo->bdev->lru_lock);
		bo->base.resv = &bo->base._resv;
		spin_unlock(&ttm_glob.lru_lock);
		spin_unlock(&bo->bdev->lru_lock);
	}

	return r;
@@ -304,7 +304,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,

		if (unlock_resv)
			dma_resv_unlock(bo->base.resv);
		spin_unlock(&ttm_glob.lru_lock);
		spin_unlock(&bo->bdev->lru_lock);

		lret = dma_resv_wait_timeout_rcu(resv, true, interruptible,
						 30 * HZ);
@@ -314,7 +314,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
		else if (lret == 0)
			return -EBUSY;

		spin_lock(&ttm_glob.lru_lock);
		spin_lock(&bo->bdev->lru_lock);
		if (unlock_resv && !dma_resv_trylock(bo->base.resv)) {
			/*
			 * We raced, and lost, someone else holds the reservation now,
@@ -324,7 +324,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
			 * delayed destruction would succeed, so just return success
			 * here.
			 */
			spin_unlock(&ttm_glob.lru_lock);
			spin_unlock(&bo->bdev->lru_lock);
			return 0;
		}
		ret = 0;
@@ -333,13 +333,13 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
	if (ret || unlikely(list_empty(&bo->ddestroy))) {
		if (unlock_resv)
			dma_resv_unlock(bo->base.resv);
		spin_unlock(&ttm_glob.lru_lock);
		spin_unlock(&bo->bdev->lru_lock);
		return ret;
	}

	ttm_bo_del_from_lru(bo);
	list_del_init(&bo->ddestroy);
	spin_unlock(&ttm_glob.lru_lock);
	spin_unlock(&bo->bdev->lru_lock);
	ttm_bo_cleanup_memtype_use(bo);

	if (unlock_resv)
@@ -356,13 +356,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 */
bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
{
	struct ttm_global *glob = &ttm_glob;
	struct list_head removed;
	bool empty;

	INIT_LIST_HEAD(&removed);

	spin_lock(&glob->lru_lock);
	spin_lock(&bdev->lru_lock);
	while (!list_empty(&bdev->ddestroy)) {
		struct ttm_buffer_object *bo;

@@ -373,24 +372,24 @@ bool ttm_bo_delayed_delete(struct ttm_device *bdev, bool remove_all)
			continue;

		if (remove_all || bo->base.resv != &bo->base._resv) {
			spin_unlock(&glob->lru_lock);
			spin_unlock(&bdev->lru_lock);
			dma_resv_lock(bo->base.resv, NULL);

			spin_lock(&glob->lru_lock);
			spin_lock(&bdev->lru_lock);
			ttm_bo_cleanup_refs(bo, false, !remove_all, true);

		} else if (dma_resv_trylock(bo->base.resv)) {
			ttm_bo_cleanup_refs(bo, false, !remove_all, true);
		} else {
			spin_unlock(&glob->lru_lock);
			spin_unlock(&bdev->lru_lock);
		}

		ttm_bo_put(bo);
		spin_lock(&glob->lru_lock);
		spin_lock(&bdev->lru_lock);
	}
	list_splice_tail(&removed, &bdev->ddestroy);
	empty = list_empty(&bdev->ddestroy);
	spin_unlock(&glob->lru_lock);
	spin_unlock(&bdev->lru_lock);

	return empty;
}
@@ -425,7 +424,7 @@ static void ttm_bo_release(struct kref *kref)
		ttm_bo_flush_all_fences(bo);
		bo->deleted = true;

		spin_lock(&ttm_glob.lru_lock);
		spin_lock(&bo->bdev->lru_lock);

		/*
		 * Make pinned bos immediately available to
@@ -442,17 +441,17 @@ static void ttm_bo_release(struct kref *kref)

		kref_init(&bo->kref);
		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
		spin_unlock(&ttm_glob.lru_lock);
		spin_unlock(&bo->bdev->lru_lock);

		schedule_delayed_work(&bdev->wq,
				      ((HZ / 100) < 1) ? 1 : HZ / 100);
		return;
	}

	spin_lock(&ttm_glob.lru_lock);
	spin_lock(&bo->bdev->lru_lock);
	ttm_bo_del_from_lru(bo);
	list_del(&bo->ddestroy);
	spin_unlock(&ttm_glob.lru_lock);
	spin_unlock(&bo->bdev->lru_lock);

	ttm_bo_cleanup_memtype_use(bo);
	dma_resv_unlock(bo->base.resv);
@@ -626,7 +625,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
	unsigned i;
	int ret;

	spin_lock(&ttm_glob.lru_lock);
	spin_lock(&bo->bdev->lru_lock);
	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
		list_for_each_entry(bo, &man->lru[i], lru) {
			bool busy;
@@ -663,7 +662,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
	if (!bo) {
		if (busy_bo && !ttm_bo_get_unless_zero(busy_bo))
			busy_bo = NULL;
		spin_unlock(&ttm_glob.lru_lock);
		spin_unlock(&bo->bdev->lru_lock);
		ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket);
		if (busy_bo)
			ttm_bo_put(busy_bo);
@@ -677,7 +676,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev,
		return ret;
	}

	spin_unlock(&ttm_glob.lru_lock);
	spin_unlock(&bo->bdev->lru_lock);

	ret = ttm_bo_evict(bo, ctx);
	if (locked)
@@ -777,10 +776,9 @@ static int ttm_bo_mem_placement(struct ttm_buffer_object *bo,
	mem->mem_type = place->mem_type;
	mem->placement = place->flags;

	spin_lock(&ttm_glob.lru_lock);
	spin_lock(&bo->bdev->lru_lock);
	ttm_bo_move_to_lru_tail(bo, mem, NULL);
	spin_unlock(&ttm_glob.lru_lock);

	spin_unlock(&bo->bdev->lru_lock);
	return 0;
}

@@ -1167,7 +1165,6 @@ EXPORT_SYMBOL(ttm_bo_wait);
int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
		   gfp_t gfp_flags)
{
	struct ttm_global *glob = &ttm_glob;
	bool locked;
	int ret;

@@ -1188,7 +1185,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,

	ttm_bo_del_from_lru(bo);
	/* TODO: Cleanup the locking */
	spin_unlock(&glob->lru_lock);
	spin_unlock(&bo->bdev->lru_lock);

	/*
	 * Move to system cached
+5 −7
Original line number Diff line number Diff line
@@ -81,7 +81,6 @@ static int ttm_global_init(void)
	ttm_pool_mgr_init(num_pages * 50 / 100);
	ttm_tt_mgr_init();

	spin_lock_init(&glob->lru_lock);
	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);

	if (unlikely(glob->dummy_read_page == NULL)) {
@@ -125,13 +124,12 @@ EXPORT_SYMBOL(ttm_global_swapout);
int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
		       gfp_t gfp_flags)
{
	struct ttm_global *glob = &ttm_glob;
	struct ttm_resource_manager *man;
	struct ttm_buffer_object *bo;
	unsigned i, j;
	int ret;

	spin_lock(&glob->lru_lock);
	spin_lock(&bdev->lru_lock);
	for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) {
		man = ttm_manager_type(bdev, i);
		if (!man || !man->use_tt)
@@ -156,7 +154,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
			}
		}
	}
	spin_unlock(&glob->lru_lock);
	spin_unlock(&bdev->lru_lock);
	return 0;
}
EXPORT_SYMBOL(ttm_device_swapout);
@@ -223,6 +221,7 @@ int ttm_device_init(struct ttm_device *bdev, struct ttm_device_funcs *funcs,

	bdev->vma_manager = vma_manager;
	INIT_DELAYED_WORK(&bdev->wq, ttm_device_delayed_workqueue);
	spin_lock_init(&bdev->lru_lock);
	INIT_LIST_HEAD(&bdev->ddestroy);
	bdev->dev_mapping = mapping;
	mutex_lock(&ttm_global_mutex);
@@ -235,7 +234,6 @@ EXPORT_SYMBOL(ttm_device_init);

void ttm_device_fini(struct ttm_device *bdev)
{
	struct ttm_global *glob = &ttm_glob;
	struct ttm_resource_manager *man;
	unsigned i;

@@ -252,11 +250,11 @@ void ttm_device_fini(struct ttm_device *bdev)
	if (ttm_bo_delayed_delete(bdev, true))
		pr_debug("Delayed destroy list was clean\n");

	spin_lock(&glob->lru_lock);
	spin_lock(&bdev->lru_lock);
	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i)
		if (list_empty(&man->lru[0]))
			pr_debug("Swap list %d was clean\n", i);
	spin_unlock(&glob->lru_lock);
	spin_unlock(&bdev->lru_lock);

	ttm_pool_fini(&bdev->pool);
	ttm_global_release();
+2 −6
Original line number Diff line number Diff line
@@ -51,14 +51,12 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
	if (list_empty(list))
		return;

	spin_lock(&ttm_glob.lru_lock);
	list_for_each_entry(entry, list, head) {
		struct ttm_buffer_object *bo = entry->bo;

		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
		ttm_bo_move_to_lru_tail_unlocked(bo);
		dma_resv_unlock(bo->base.resv);
	}
	spin_unlock(&ttm_glob.lru_lock);

	if (ticket)
		ww_acquire_fini(ticket);
@@ -154,7 +152,6 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
	if (list_empty(list))
		return;

	spin_lock(&ttm_glob.lru_lock);
	list_for_each_entry(entry, list, head) {
		struct ttm_buffer_object *bo = entry->bo;

@@ -162,10 +159,9 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
			dma_resv_add_shared_fence(bo->base.resv, fence);
		else
			dma_resv_add_excl_fence(bo->base.resv, fence);
		ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
		ttm_bo_move_to_lru_tail_unlocked(bo);
		dma_resv_unlock(bo->base.resv);
	}
	spin_unlock(&ttm_glob.lru_lock);
	if (ticket)
		ww_acquire_fini(ticket);
}
Loading