Commit c18636f7 authored by Chris Wilson's avatar Chris Wilson Committed by Joonas Lahtinen
Browse files

drm/i915: Remove requirement for holding i915_request.lock for breadcrumbs



Since the breadcrumb enabling/cancelling itself is serialised by the
breadcrumbs.irq_lock, with a bit of care we can remove the outer
serialisation with i915_request.lock for concurrent
dma_fence_enable_signaling(). This has the important side-effect of
eliminating the nested i915_request.lock within request submission.

The challenge in serialisation is around the unsubmission where we take
an active request that wants a breadcrumb on the signaling engine and
put it to sleep. We do not want a concurrent
dma_fence_enable_signaling() to attach a breadcrumb as we unsubmit, so
we must mark the request as no longer active before serialising with the
concurrent enable-signaling.

On retire, we serialise with the concurrent enable-signaling, but
instead of clearing ACTIVE, we mark it as SIGNALED.

Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200731154834.8378-1-chris@chris-wilson.co.uk


Signed-off-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
[Joonas: Rebased and reordered into drm-intel-gt-next branch]
Signed-off-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
parent af5c6fcf
Loading
Loading
Loading
Loading
+84 −46
Original line number Diff line number Diff line
@@ -220,17 +220,17 @@ static void signal_irq_work(struct irq_work *work)
	}
}

static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
{
	struct intel_engine_cs *engine =
		container_of(b, struct intel_engine_cs, breadcrumbs);

	lockdep_assert_held(&b->irq_lock);
	if (b->irq_armed)
		return true;
		return;

	if (!intel_gt_pm_get_if_awake(engine->gt))
		return false;
		return;

	/*
	 * The breadcrumb irq will be disarmed on the interrupt after the
@@ -250,8 +250,6 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)

	if (!b->irq_enabled++)
		irq_enable(engine);

	return true;
}

void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
@@ -310,25 +308,16 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
}

bool i915_request_enable_breadcrumb(struct i915_request *rq)
static void insert_breadcrumb(struct i915_request *rq,
			      struct intel_breadcrumbs *b)
{
	lockdep_assert_held(&rq->lock);

	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
		return true;

	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
		struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
	struct intel_context *ce = rq->context;
	struct list_head *pos;

		spin_lock(&b->irq_lock);

	if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
			goto unlock;
		return;

		if (!__intel_breadcrumbs_arm_irq(b))
			goto unlock;
	__intel_breadcrumbs_arm_irq(b);

	/*
	 * We keep the seqno in retirement order, so we can break
@@ -357,10 +346,61 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
	GEM_BUG_ON(!check_signal_order(ce, rq));

	set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
unlock:
}

bool i915_request_enable_breadcrumb(struct i915_request *rq)
{
	struct intel_breadcrumbs *b;

	/* Serialises with i915_request_retire() using rq->lock */
	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
		return true;

	/*
	 * Peek at i915_request_submit()/i915_request_unsubmit() status.
	 *
	 * If the request is not yet active (and not signaled), we will
	 * attach the breadcrumb later.
	 */
	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
		return true;

	/*
	 * rq->engine is locked by rq->engine->active.lock. That however
	 * is not known until after rq->engine has been dereferenced and
	 * the lock acquired. Hence we acquire the lock and then validate
	 * that rq->engine still matches the lock we hold for it.
	 *
	 * Here, we are using the breadcrumb lock as a proxy for the
	 * rq->engine->active.lock, and we know that since the breadcrumb
	 * will be serialised within i915_request_submit/i915_request_unsubmit,
	 * the engine cannot change while active as long as we hold the
	 * breadcrumb lock on that engine.
	 *
	 * From the dma_fence_enable_signaling() path, we are outside of the
	 * request submit/unsubmit path, and so we must be more careful to
	 * acquire the right lock.
	 */
	b = &READ_ONCE(rq->engine)->breadcrumbs;
	spin_lock(&b->irq_lock);
	while (unlikely(b != &READ_ONCE(rq->engine)->breadcrumbs)) {
		spin_unlock(&b->irq_lock);
		b = &READ_ONCE(rq->engine)->breadcrumbs;
		spin_lock(&b->irq_lock);
	}

	/*
	 * Now that we are finally serialised with request submit/unsubmit,
	 * [with b->irq_lock] and with i915_request_retire() [via checking
	 * SIGNALED with rq->lock] confirm the request is indeed active. If
	 * it is no longer active, the breadcrumb will be attached upon
	 * i915_request_submit().
	 */
	if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
		insert_breadcrumb(rq, b);

	spin_unlock(&b->irq_lock);

	return !__request_completed(rq);
}

@@ -368,8 +408,6 @@ void i915_request_cancel_breadcrumb(struct i915_request *rq)
{
	struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;

	lockdep_assert_held(&rq->lock);

	/*
	 * We must wait for b->irq_lock so that we know the interrupt handler
	 * has released its reference to the intel_context and has completed
+0 −14
Original line number Diff line number Diff line
@@ -1148,20 +1148,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
		} else {
			struct intel_engine_cs *owner = rq->context->engine;

			/*
			 * Decouple the virtual breadcrumb before moving it
			 * back to the virtual engine -- we don't want the
			 * request to complete in the background and try
			 * and cancel the breadcrumb on the virtual engine
			 * (instead of the old engine where it is linked)!
			 */
			if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
				     &rq->fence.flags)) {
				spin_lock_nested(&rq->lock,
						 SINGLE_DEPTH_NESTING);
				i915_request_cancel_breadcrumb(rq);
				spin_unlock(&rq->lock);
			}
			WRITE_ONCE(rq->engine, owner);
			owner->submit_request(rq);
			active = NULL;
+39 −27
Original line number Diff line number Diff line
@@ -301,14 +301,24 @@ bool i915_request_retire(struct i915_request *rq)
		dma_fence_signal_locked(&rq->fence);
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
		i915_request_cancel_breadcrumb(rq);
	spin_unlock_irq(&rq->lock);

	if (i915_request_has_waitboost(rq)) {
		GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters));
		atomic_dec(&rq->engine->gt->rps.num_waiters);
	}
	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {
		set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
		__notify_execute_cb(rq);
	}

	/*
	 * We only loosely track inflight requests across preemption,
	 * and so we may find ourselves attempting to retire a _completed_
	 * request that we have removed from the HW and put back on a run
	 * queue.
	 *
	 * As we set I915_FENCE_FLAG_ACTIVE on the request, this should be
	 * after removing the breadcrumb and signaling it, so that we do not
	 * inadvertently attach the breadcrumb to a completed request.
	 */
	remove_from_engine(rq);
	GEM_BUG_ON(!llist_empty(&rq->execute_cb));
	spin_unlock_irq(&rq->lock);

@@ -547,20 +557,22 @@ bool __i915_request_submit(struct i915_request *request)
		clear_bit(I915_FENCE_FLAG_PQUEUE, &request->fence.flags);
	}

	/* We may be recursing from the signal callback of another i915 fence */
	if (!i915_request_signaled(request)) {
		spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);

	/*
	 * XXX Rollback bonded-execution on __i915_request_unsubmit()?
	 *
	 * In the future, perhaps when we have an active time-slicing scheduler,
	 * it will be interesting to unsubmit parallel execution and remove
	 * busywaits from the GPU until their master is restarted. This is
	 * quite hairy, we have to carefully rollback the fence and do a
	 * preempt-to-idle cycle on the target engine, all the while the
	 * master execute_cb may refire.
	 */
	__notify_execute_cb(request);
		if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
			     &request->fence.flags) &&

	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
	    !i915_request_enable_breadcrumb(request))
		intel_engine_signal_breadcrumbs(engine);

		spin_unlock(&request->lock);
		GEM_BUG_ON(!llist_empty(&request->execute_cb));
	}

	return result;
}

@@ -581,27 +593,27 @@ void __i915_request_unsubmit(struct i915_request *request)
{
	struct intel_engine_cs *engine = request->engine;

	/*
	 * Only unwind in reverse order, required so that the per-context list
	 * is kept in seqno/ring order.
	 */
	RQ_TRACE(request, "\n");

	GEM_BUG_ON(!irqs_disabled());
	lockdep_assert_held(&engine->active.lock);

	/*
	 * Only unwind in reverse order, required so that the per-context list
	 * is kept in seqno/ring order.
	 * Before we remove this breadcrumb from the signal list, we have
	 * to ensure that a concurrent dma_fence_enable_signaling() does not
	 * attach itself. We first mark the request as no longer active and
	 * make sure that is visible to other cores, and then remove the
	 * breadcrumb if attached.
	 */

	/* We may be recursing from the signal callback of another i915 fence */
	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);

	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
	clear_bit_unlock(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
		i915_request_cancel_breadcrumb(request);

	GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
	clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);

	spin_unlock(&request->lock);

	/* We've already spun, don't charge on resubmitting. */
	if (request->sched.semaphores && i915_request_started(request))
		request->sched.semaphores = 0;