Commit 12ca695d authored by Maarten Lankhorst's avatar Maarten Lankhorst Committed by Daniel Vetter
Browse files

drm/i915: Do not share hwsp across contexts any more, v8.



Instead of sharing pages with breadcrumbs, give each timeline a
single page. This allows unrelated timelines not to share locks
any more during command submission.

As an additional benefit, seqno wraparound no longer requires
i915_vma_pin, which means we no longer need to worry about a
potential -EDEADLK at a point where we are ready to submit.

Changes since v1:
- Fix erroneous i915_vma_acquire that should be a i915_vma_release (ickle).
- Extra check for completion in intel_read_hwsp().
Changes since v2:
- Fix inconsistent indent in hwsp_alloc() (kbuild)
- memset entire cacheline to 0.
Changes since v3:
- Do same in intel_timeline_reset_seqno(), and clflush for good measure.
Changes since v4:
- Use refcounting on timeline, instead of relying on i915_active.
- Fix waiting on kernel requests.
Changes since v5:
- Bump amount of slots to maximum (256), for best wraparounds.
- Add hwsp_offset to i915_request to fix potential wraparound hang.
- Ensure timeline wrap test works with the changes.
- Assign hwsp in intel_timeline_read_hwsp() within the rcu lock to
  fix a hang.
Changes since v6:
- Rename i915_request_active_offset to i915_request_active_seqno(),
  and elaborate the function. (tvrtko)
Changes since v7:
- Move hunk to where it belongs. (jekstrand)
- Replace CACHELINE_BYTES with TIMELINE_SEQNO_BYTES. (jekstrand)

Signed-off-by: default avatarMaarten Lankhorst <maarten.lankhorst@linux.intel.com>
Reviewed-by: Thomas Hellström <thomas.hellstrom@intel.com> #v1
Reported-by: default avatarkernel test robot <lkp@intel.com>
Signed-off-by: default avatarDaniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20210323155059.628690-2-maarten.lankhorst@linux.intel.com
parent 547be6a4
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -143,7 +143,7 @@ static u32 *__gen2_emit_breadcrumb(struct i915_request *rq, u32 *cs,
				   int flush, int post)
{
	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
	GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);

	*cs++ = MI_FLUSH;

+4 −4
Original line number Diff line number Diff line
@@ -161,7 +161,7 @@ u32 *gen6_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
		 PIPE_CONTROL_DC_FLUSH_ENABLE |
		 PIPE_CONTROL_QW_WRITE |
		 PIPE_CONTROL_CS_STALL);
	*cs++ = i915_request_active_timeline(rq)->hwsp_offset |
	*cs++ = i915_request_active_seqno(rq) |
		PIPE_CONTROL_GLOBAL_GTT;
	*cs++ = rq->fence.seqno;

@@ -359,7 +359,7 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
		 PIPE_CONTROL_QW_WRITE |
		 PIPE_CONTROL_GLOBAL_GTT_IVB |
		 PIPE_CONTROL_CS_STALL);
	*cs++ = i915_request_active_timeline(rq)->hwsp_offset;
	*cs++ = i915_request_active_seqno(rq);
	*cs++ = rq->fence.seqno;

	*cs++ = MI_USER_INTERRUPT;
@@ -374,7 +374,7 @@ u32 *gen7_emit_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
u32 *gen6_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
{
	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
	GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);

	*cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
	*cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
@@ -394,7 +394,7 @@ u32 *gen7_emit_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
	int i;

	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
	GEM_BUG_ON(offset_in_page(i915_request_active_timeline(rq)->hwsp_offset) != I915_GEM_HWS_SEQNO_ADDR);
	GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);

	*cs++ = MI_FLUSH_DW | MI_INVALIDATE_TLB |
		MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+6 −7
Original line number Diff line number Diff line
@@ -338,15 +338,14 @@ static u32 preempt_address(struct intel_engine_cs *engine)

static u32 hwsp_offset(const struct i915_request *rq)
{
	const struct intel_timeline_cacheline *cl;
	const struct intel_timeline *tl;

	/* Before the request is executed, the timeline/cachline is fixed */
	/* Before the request is executed, the timeline is fixed */
	tl = rcu_dereference_protected(rq->timeline,
				       !i915_request_signaled(rq));

	cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
	if (cl)
		return cl->ggtt_offset;

	return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
	/* See the comment in i915_request_active_seqno(). */
	return page_mask_bits(tl->hwsp_offset) + offset_in_page(rq->hwsp_seqno);
}

int gen8_emit_init_breadcrumb(struct i915_request *rq)
+1 −0
Original line number Diff line number Diff line
@@ -763,6 +763,7 @@ static int measure_breadcrumb_dw(struct intel_context *ce)
	frame->rq.engine = engine;
	frame->rq.context = ce;
	rcu_assign_pointer(frame->rq.timeline, ce->timeline);
	frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno;

	frame->ring.vaddr = frame->cs;
	frame->ring.size = sizeof(frame->cs);
+0 −4
Original line number Diff line number Diff line
@@ -39,10 +39,6 @@ struct intel_gt {
	struct intel_gt_timelines {
		spinlock_t lock; /* protects active_list */
		struct list_head active_list;

		/* Pack multiple timelines' seqnos into the same page */
		spinlock_t hwsp_lock;
		struct list_head hwsp_free_list;
	} timelines;

	struct intel_gt_requests {
Loading