Commit 1f4b2aca authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-intel-gt-next-2020-09-07' of...

Merge tag 'drm-intel-gt-next-2020-09-07' of git://anongit.freedesktop.org/drm/drm-intel

 into drm-next

(Same content as drm-intel-gt-next-2020-09-04-3, S-o-b's added)

UAPI Changes:
(- Potential implicit changes from WW locking refactoring)

Cross-subsystem Changes:
(- WW locking changes should align the i915 locking more with others)

Driver Changes:

- MAJOR: Apply WW locking across the driver (Maarten)

- Reverts for 5 commits to make applying WW locking faster (Maarten)
- Disable preparser around invalidations on Tigerlake for non-RCS engines (Chris)
- Add missing dma_fence_put() for error case of syncobj timeline (Chris)
- Parse command buffer earlier in eb_relocate(slow) to facilitate backoff (Maarten)
- Pin engine before pinning all objects (Maarten)
- Rework intel_context pinning to do everything outside of pin_mutex (Maarten)

- Avoid tracking GEM context until registered (Cc: stable, Chris)
- Provide a fastpath for waiting on vma bindings (Chris)
- Fixes to preempt-to-busy mechanism (Chris)
- Distinguish the virtual breadcrumbs from the irq breadcrumbs (Chris)
- Switch to object allocations for page directories (Chris)
- Hold context/request reference while breadcrumbs are active (Chris)
- Make sure execbuffer always passes ww state to i915_vma_pin (Maarten)

- Code refactoring to facilitate use of WW locking (Maarten)
- Locking refactoring to use more granular locking (Maarten, Chris)
- Support for multiple pinned timelines per engine (Chris)
- Move complication of I915_GEM_THROTTLE to the ioctl from general code (Chris)
- Make active tracking/vma page-directory stash work preallocated (Chris)
- Avoid flushing submission tasklet too often (Chris)
- Reduce context termination list iteration guard to RCU (Chris)
- Reductions to locking contention (Chris)
- Fixes for issues found by CI (Chris)

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Joonas Lahtinen <jlahtine@jlahtine-mobl.ger.corp.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200907130039.GA27766@jlahtine-mobl.ger.corp.intel.com
parents 61d98185 e0ee152f
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -2311,7 +2311,7 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb,
void intel_unpin_fb_vma(struct i915_vma *vma, unsigned long flags)
{
	i915_gem_object_lock(vma->obj);
	i915_gem_object_lock(vma->obj, NULL);
	if (flags & PLANE_HAS_FENCE)
		i915_vma_unpin_fence(vma);
	i915_gem_object_unpin_from_display_plane(vma);
@@ -3451,7 +3451,7 @@ initial_plane_vma(struct drm_i915_private *i915,
	if (IS_ERR(vma))
		goto err_obj;
	if (i915_ggtt_pin(vma, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
	if (i915_ggtt_pin(vma, NULL, 0, PIN_MAPPABLE | PIN_OFFSET_FIXED | base))
		goto err_obj;
	if (i915_gem_object_is_tiled(obj) &&
@@ -17194,7 +17194,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
	if (!intel_fb->frontbuffer)
		return -ENOMEM;
	i915_gem_object_lock(obj);
	i915_gem_object_lock(obj, NULL);
	tiling = i915_gem_object_get_tiling(obj);
	stride = i915_gem_object_get_stride(obj);
	i915_gem_object_unlock(obj);
+67 −22
Original line number Diff line number Diff line
@@ -32,12 +32,13 @@ static void vma_clear_pages(struct i915_vma *vma)
	vma->pages = NULL;
}

static int vma_bind(struct i915_address_space *vm,
static void vma_bind(struct i915_address_space *vm,
		     struct i915_vm_pt_stash *stash,
		     struct i915_vma *vma,
		     enum i915_cache_level cache_level,
		     u32 flags)
{
	return vm->vma_ops.bind_vma(vm, vma, cache_level, flags);
	vm->vma_ops.bind_vma(vm, stash, vma, cache_level, flags);
}

static void vma_unbind(struct i915_address_space *vm, struct i915_vma *vma)
@@ -157,6 +158,7 @@ static void clear_pages_worker(struct work_struct *work)
	struct clear_pages_work *w = container_of(work, typeof(*w), work);
	struct drm_i915_gem_object *obj = w->sleeve->vma->obj;
	struct i915_vma *vma = w->sleeve->vma;
	struct i915_gem_ww_ctx ww;
	struct i915_request *rq;
	struct i915_vma *batch;
	int err = w->dma.error;
@@ -172,17 +174,20 @@ static void clear_pages_worker(struct work_struct *work)
	obj->read_domains = I915_GEM_GPU_DOMAINS;
	obj->write_domain = 0;

	err = i915_vma_pin(vma, 0, 0, PIN_USER);
	if (unlikely(err))
	i915_gem_ww_ctx_init(&ww, false);
	intel_engine_pm_get(w->ce->engine);
retry:
	err = intel_context_pin_ww(w->ce, &ww);
	if (err)
		goto out_signal;

	batch = intel_emit_vma_fill_blt(w->ce, vma, w->value);
	batch = intel_emit_vma_fill_blt(w->ce, vma, &ww, w->value);
	if (IS_ERR(batch)) {
		err = PTR_ERR(batch);
		goto out_unpin;
		goto out_ctx;
	}

	rq = intel_context_create_request(w->ce);
	rq = i915_request_create(w->ce);
	if (IS_ERR(rq)) {
		err = PTR_ERR(rq);
		goto out_batch;
@@ -224,9 +229,19 @@ static void clear_pages_worker(struct work_struct *work)
	i915_request_add(rq);
out_batch:
	intel_emit_vma_release(w->ce, batch);
out_unpin:
	i915_vma_unpin(vma);
out_ctx:
	intel_context_unpin(w->ce);
out_signal:
	if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);

	i915_vma_unpin(w->sleeve->vma);
	intel_engine_pm_put(w->ce->engine);

	if (unlikely(err)) {
		dma_fence_set_error(&w->dma, err);
		dma_fence_signal(&w->dma);
@@ -234,6 +249,44 @@ static void clear_pages_worker(struct work_struct *work)
	}
}

static int pin_wait_clear_pages_work(struct clear_pages_work *w,
				     struct intel_context *ce)
{
	struct i915_vma *vma = w->sleeve->vma;
	struct i915_gem_ww_ctx ww;
	int err;

	i915_gem_ww_ctx_init(&ww, false);
retry:
	err = i915_gem_object_lock(vma->obj, &ww);
	if (err)
		goto out;

	err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
	if (unlikely(err))
		goto out;

	err = i915_sw_fence_await_reservation(&w->wait,
					      vma->obj->base.resv, NULL,
					      true, 0, I915_FENCE_GFP);
	if (err)
		goto err_unpin_vma;

	dma_resv_add_excl_fence(vma->obj->base.resv, &w->dma);

err_unpin_vma:
	if (err)
		i915_vma_unpin(vma);
out:
	if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);
	return err;
}

static int __i915_sw_fence_call
clear_pages_work_notify(struct i915_sw_fence *fence,
			enum i915_sw_fence_notify state)
@@ -287,17 +340,9 @@ int i915_gem_schedule_fill_pages_blt(struct drm_i915_gem_object *obj,
	dma_fence_init(&work->dma, &clear_pages_work_ops, &fence_lock, 0, 0);
	i915_sw_fence_init(&work->wait, clear_pages_work_notify);

	i915_gem_object_lock(obj);
	err = i915_sw_fence_await_reservation(&work->wait,
					      obj->base.resv, NULL, true, 0,
					      I915_FENCE_GFP);
	if (err < 0) {
	err = pin_wait_clear_pages_work(work, ce);
	if (err < 0)
		dma_fence_set_error(&work->dma, err);
	} else {
		dma_resv_add_excl_fence(obj->base.resv, &work->dma);
		err = 0;
	}
	i915_gem_object_unlock(obj);

	dma_fence_get(&work->dma);
	i915_sw_fence_commit(&work->wait);
+71 −34
Original line number Diff line number Diff line
@@ -439,29 +439,36 @@ static bool __cancel_engine(struct intel_engine_cs *engine)
	return __reset_engine(engine);
}

static struct intel_engine_cs *__active_engine(struct i915_request *rq)
static bool
__active_engine(struct i915_request *rq, struct intel_engine_cs **active)
{
	struct intel_engine_cs *engine, *locked;
	bool ret = false;

	/*
	 * Serialise with __i915_request_submit() so that it sees
	 * is-banned?, or we know the request is already inflight.
	 *
	 * Note that rq->engine is unstable, and so we double
	 * check that we have acquired the lock on the final engine.
	 */
	locked = READ_ONCE(rq->engine);
	spin_lock_irq(&locked->active.lock);
	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {
		spin_unlock(&locked->active.lock);
		spin_lock(&engine->active.lock);
		locked = engine;
		spin_lock(&locked->active.lock);
	}

	engine = NULL;
	if (!i915_request_completed(rq)) {
		if (i915_request_is_active(rq) && rq->fence.error != -EIO)
		engine = rq->engine;
			*active = locked;
		ret = true;
	}

	spin_unlock_irq(&locked->active.lock);

	return engine;
	return ret;
}

static struct intel_engine_cs *active_engine(struct intel_context *ce)
@@ -472,17 +479,16 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
	if (!ce->timeline)
		return NULL;

	mutex_lock(&ce->timeline->mutex);
	list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
		if (i915_request_completed(rq))
			break;
	rcu_read_lock();
	list_for_each_entry_rcu(rq, &ce->timeline->requests, link) {
		if (i915_request_is_active(rq) && i915_request_completed(rq))
			continue;

		/* Check with the backend if the request is inflight */
		engine = __active_engine(rq);
		if (engine)
		if (__active_engine(rq, &engine))
			break;
	}
	mutex_unlock(&ce->timeline->mutex);
	rcu_read_unlock();

	return engine;
}
@@ -713,6 +719,7 @@ __create_context(struct drm_i915_private *i915)
	ctx->i915 = i915;
	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
	mutex_init(&ctx->mutex);
	INIT_LIST_HEAD(&ctx->link);

	spin_lock_init(&ctx->stale.lock);
	INIT_LIST_HEAD(&ctx->stale.engines);
@@ -740,10 +747,6 @@ __create_context(struct drm_i915_private *i915)
	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;

	spin_lock(&i915->gem.contexts.lock);
	list_add_tail(&ctx->link, &i915->gem.contexts.list);
	spin_unlock(&i915->gem.contexts.lock);

	return ctx;

err_free:
@@ -889,7 +892,7 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
		struct intel_timeline *timeline;

		timeline = intel_timeline_create(&i915->gt, NULL);
		timeline = intel_timeline_create(&i915->gt);
		if (IS_ERR(timeline)) {
			context_close(ctx);
			return ERR_CAST(timeline);
@@ -931,6 +934,7 @@ static int gem_context_register(struct i915_gem_context *ctx,
				struct drm_i915_file_private *fpriv,
				u32 *id)
{
	struct drm_i915_private *i915 = ctx->i915;
	struct i915_address_space *vm;
	int ret;

@@ -949,8 +953,16 @@ static int gem_context_register(struct i915_gem_context *ctx,
	/* And finally expose ourselves to userspace via the idr */
	ret = xa_alloc(&fpriv->context_xa, id, ctx, xa_limit_32b, GFP_KERNEL);
	if (ret)
		put_pid(fetch_and_zero(&ctx->pid));
		goto err_pid;

	spin_lock(&i915->gem.contexts.lock);
	list_add_tail(&ctx->link, &i915->gem.contexts.list);
	spin_unlock(&i915->gem.contexts.lock);

	return 0;

err_pid:
	put_pid(fetch_and_zero(&ctx->pid));
	return ret;
}

@@ -1094,6 +1106,7 @@ I915_SELFTEST_DECLARE(static intel_engine_mask_t context_barrier_inject_fault);
static int context_barrier_task(struct i915_gem_context *ctx,
				intel_engine_mask_t engines,
				bool (*skip)(struct intel_context *ce, void *data),
				int (*pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data),
				int (*emit)(struct i915_request *rq, void *data),
				void (*task)(void *data),
				void *data)
@@ -1101,6 +1114,7 @@ static int context_barrier_task(struct i915_gem_context *ctx,
	struct context_barrier_task *cb;
	struct i915_gem_engines_iter it;
	struct i915_gem_engines *e;
	struct i915_gem_ww_ctx ww;
	struct intel_context *ce;
	int err = 0;

@@ -1138,10 +1152,21 @@ static int context_barrier_task(struct i915_gem_context *ctx,
		if (skip && skip(ce, data))
			continue;

		rq = intel_context_create_request(ce);
		i915_gem_ww_ctx_init(&ww, true);
retry:
		err = intel_context_pin_ww(ce, &ww);
		if (err)
			goto err;

		if (pin)
			err = pin(ce, &ww, data);
		if (err)
			goto err_unpin;

		rq = i915_request_create(ce);
		if (IS_ERR(rq)) {
			err = PTR_ERR(rq);
			break;
			goto err_unpin;
		}

		err = 0;
@@ -1151,6 +1176,16 @@ static int context_barrier_task(struct i915_gem_context *ctx,
			err = i915_active_add_request(&cb->base, rq);

		i915_request_add(rq);
err_unpin:
		intel_context_unpin(ce);
err:
		if (err == -EDEADLK) {
			err = i915_gem_ww_ctx_backoff(&ww);
			if (!err)
				goto retry;
		}
		i915_gem_ww_ctx_fini(&ww);

		if (err)
			break;
	}
@@ -1206,6 +1241,17 @@ static void set_ppgtt_barrier(void *data)
	i915_vm_close(old);
}

static int pin_ppgtt_update(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void *data)
{
	struct i915_address_space *vm = ce->vm;

	if (!HAS_LOGICAL_RING_CONTEXTS(vm->i915))
		/* ppGTT is not part of the legacy context image */
		return gen6_ppgtt_pin(i915_vm_to_ppgtt(vm), ww);

	return 0;
}

static int emit_ppgtt_update(struct i915_request *rq, void *data)
{
	struct i915_address_space *vm = rq->context->vm;
@@ -1262,20 +1308,10 @@ static int emit_ppgtt_update(struct i915_request *rq, void *data)

static bool skip_ppgtt_update(struct intel_context *ce, void *data)
{
	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))
		return true;

	if (HAS_LOGICAL_RING_CONTEXTS(ce->engine->i915))
		return false;

	if (!atomic_read(&ce->pin_count))
		return true;

	/* ppGTT is not part of the legacy context image */
	if (gen6_ppgtt_pin(i915_vm_to_ppgtt(ce->vm)))
		return true;

	return false;
		return !ce->state;
	else
		return !atomic_read(&ce->pin_count);
}

static int set_ppgtt(struct drm_i915_file_private *file_priv,
@@ -1326,6 +1362,7 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
	 */
	err = context_barrier_task(ctx, ALL_ENGINES,
				   skip_ppgtt_update,
				   pin_ppgtt_update,
				   emit_ppgtt_update,
				   set_ppgtt_barrier,
				   old);
+2 −2
Original line number Diff line number Diff line
@@ -128,7 +128,7 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire
	if (err)
		return err;

	err = i915_gem_object_lock_interruptible(obj);
	err = i915_gem_object_lock_interruptible(obj, NULL);
	if (err)
		goto out;

@@ -149,7 +149,7 @@ static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direct
	if (err)
		return err;

	err = i915_gem_object_lock_interruptible(obj);
	err = i915_gem_object_lock_interruptible(obj, NULL);
	if (err)
		goto out;

+48 −32
Original line number Diff line number Diff line
@@ -32,11 +32,17 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
	if (!i915_gem_object_is_framebuffer(obj))
		return;

	i915_gem_object_lock(obj);
	i915_gem_object_lock(obj, NULL);
	__i915_gem_object_flush_for_display(obj);
	i915_gem_object_unlock(obj);
}

void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
{
	if (i915_gem_object_is_framebuffer(obj))
		__i915_gem_object_flush_for_display(obj);
}

/**
 * Moves a single object to the WC read, and possibly write domain.
 * @obj: object to act on
@@ -197,18 +203,12 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
	if (ret)
		return ret;

	ret = i915_gem_object_lock_interruptible(obj);
	if (ret)
		return ret;

	/* Always invalidate stale cachelines */
	if (obj->cache_level != cache_level) {
		i915_gem_object_set_cache_coherency(obj, cache_level);
		obj->cache_dirty = true;
	}

	i915_gem_object_unlock(obj);

	/* The cache-level will be applied when each vma is rebound. */
	return i915_gem_object_unbind(obj,
				      I915_GEM_OBJECT_UNBIND_ACTIVE |
@@ -293,7 +293,12 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
		goto out;
	}

	ret = i915_gem_object_lock_interruptible(obj, NULL);
	if (ret)
		goto out;

	ret = i915_gem_object_set_cache_level(obj, level);
	i915_gem_object_unlock(obj);

out:
	i915_gem_object_put(obj);
@@ -313,6 +318,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
				     unsigned int flags)
{
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	struct i915_gem_ww_ctx ww;
	struct i915_vma *vma;
	int ret;

@@ -320,6 +326,11 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
		return ERR_PTR(-EINVAL);

	i915_gem_ww_ctx_init(&ww, true);
retry:
	ret = i915_gem_object_lock(obj, &ww);
	if (ret)
		goto err;
	/*
	 * The display engine is not coherent with the LLC cache on gen6.  As
	 * a result, we make sure that the pinning that is about to occur is
@@ -334,7 +345,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
					      HAS_WT(i915) ?
					      I915_CACHE_WT : I915_CACHE_NONE);
	if (ret)
		return ERR_PTR(ret);
		goto err;

	/*
	 * As the user may map the buffer once pinned in the display plane
@@ -347,18 +358,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
	vma = ERR_PTR(-ENOSPC);
	if ((flags & PIN_MAPPABLE) == 0 &&
	    (!view || view->type == I915_GGTT_VIEW_NORMAL))
		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
					       flags |
					       PIN_MAPPABLE |
		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0, alignment,
						  flags | PIN_MAPPABLE |
						  PIN_NONBLOCK);
	if (IS_ERR(vma))
		vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags);
	if (IS_ERR(vma))
		return vma;
	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, view, 0,
						  alignment, flags);
	if (IS_ERR(vma)) {
		ret = PTR_ERR(vma);
		goto err;
	}

	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);

	i915_gem_object_flush_if_display(obj);
	i915_gem_object_flush_if_display_locked(obj);

err:
	if (ret == -EDEADLK) {
		ret = i915_gem_ww_ctx_backoff(&ww);
		if (!ret)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);

	if (ret)
		return ERR_PTR(ret);

	return vma;
}
@@ -536,7 +560,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
	if (err)
		goto out;

	err = i915_gem_object_lock_interruptible(obj);
	err = i915_gem_object_lock_interruptible(obj, NULL);
	if (err)
		goto out_unpin;

@@ -576,19 +600,17 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
	if (!i915_gem_object_has_struct_page(obj))
		return -ENODEV;

	ret = i915_gem_object_lock_interruptible(obj);
	if (ret)
		return ret;
	assert_object_held(obj);

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE,
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
		goto err_unlock;
		return ret;

	ret = i915_gem_object_pin_pages(obj);
	if (ret)
		goto err_unlock;
		return ret;

	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -616,8 +638,6 @@ int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,

err_unpin:
	i915_gem_object_unpin_pages(obj);
err_unlock:
	i915_gem_object_unlock(obj);
	return ret;
}

@@ -630,20 +650,18 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
	if (!i915_gem_object_has_struct_page(obj))
		return -ENODEV;

	ret = i915_gem_object_lock_interruptible(obj);
	if (ret)
		return ret;
	assert_object_held(obj);

	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_ALL,
				   MAX_SCHEDULE_TIMEOUT);
	if (ret)
		goto err_unlock;
		return ret;

	ret = i915_gem_object_pin_pages(obj);
	if (ret)
		goto err_unlock;
		return ret;

	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
@@ -680,7 +698,5 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,

err_unpin:
	i915_gem_object_unpin_pages(obj);
err_unlock:
	i915_gem_object_unlock(obj);
	return ret;
}
Loading