Commit bb6287cb authored by Tvrtko Ursulin's avatar Tvrtko Ursulin
Browse files

drm/i915: Track context current active time



Track context active (on hardware) status together with the start
timestamp.

This will be used to provide better granularity of context
runtime reporting in conjunction with already tracked pphwsp accumulated
runtime.

The latter is only updated on context save so does not give us visibility
to any currently executing work.

As part of the patch the existing runtime tracking data is moved under the
new ce->stats member and updated under the seqlock. This provides the
ability to atomically read out accumulated plus active runtime.

v2:
 * Rename and make __intel_context_get_active_time unlocked.

v3:
 * Use GRAPHICS_VER.

Signed-off-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Aravind Iddamsetty <aravind.iddamsetty@intel.com> #  v1
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220401142205.3123159-6-tvrtko.ursulin@linux.intel.com
parent 49bd54b3
Loading
Loading
Loading
Loading
+26 −1
Original line number Diff line number Diff line
@@ -386,7 +386,7 @@ intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
	ce->ring = NULL;
	ce->ring_size = SZ_4K;

	ewma_runtime_init(&ce->runtime.avg);
	ewma_runtime_init(&ce->stats.runtime.avg);

	ce->vm = i915_vm_get(engine->gt->vm);

@@ -576,6 +576,31 @@ void intel_context_bind_parent_child(struct intel_context *parent,
	child->parallel.parent = parent;
}

u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
{
	u64 total, active;

	total = ce->stats.runtime.total;
	if (ce->ops->flags & COPS_RUNTIME_CYCLES)
		total *= ce->engine->gt->clock_period_ns;

	active = READ_ONCE(ce->stats.active);
	if (active)
		active = intel_context_clock() - active;

	return total + active;
}

u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
{
	u64 avg = ewma_runtime_read(&ce->stats.runtime.avg);

	if (ce->ops->flags & COPS_RUNTIME_CYCLES)
		avg *= ce->engine->gt->clock_period_ns;

	return avg;
}

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif
+5 −10
Original line number Diff line number Diff line
@@ -351,18 +351,13 @@ intel_context_clear_nopreempt(struct intel_context *ce)
	clear_bit(CONTEXT_NOPREEMPT, &ce->flags);
}

static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
{
	const u32 period = ce->engine->gt->clock_period_ns;

	return READ_ONCE(ce->runtime.total) * period;
}
u64 intel_context_get_total_runtime_ns(const struct intel_context *ce);
u64 intel_context_get_avg_runtime_ns(struct intel_context *ce);

static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
static inline u64 intel_context_clock(void)
{
	const u32 period = ce->engine->gt->clock_period_ns;

	return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
	/* As we mix CS cycles with CPU clocks, use the raw monotonic clock. */
	return ktime_get_raw_fast_ns();
}

#endif /* __INTEL_CONTEXT_H__ */
+16 −8
Original line number Diff line number Diff line
@@ -35,6 +35,9 @@ struct intel_context_ops {
#define COPS_HAS_INFLIGHT_BIT 0
#define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)

#define COPS_RUNTIME_CYCLES_BIT 1
#define COPS_RUNTIME_CYCLES BIT(COPS_RUNTIME_CYCLES_BIT)

	int (*alloc)(struct intel_context *ce);

	void (*ban)(struct intel_context *ce, struct i915_request *rq);
@@ -134,6 +137,10 @@ struct intel_context {
	} lrc;
	u32 tag; /* cookie passed to HW to track this context on submission */

	/** stats: Context GPU engine busyness tracking. */
	struct intel_context_stats {
		u64 active;

		/* Time on GPU as tracked by the hw. */
		struct {
			struct ewma_runtime avg;
@@ -142,6 +149,7 @@ struct intel_context {
			I915_SELFTEST_DECLARE(u32 num_underflow);
			I915_SELFTEST_DECLARE(u32 max_underflow);
		} runtime;
	} stats;

	unsigned int active_count; /* protected by timeline->mutex */

+18 −5
Original line number Diff line number Diff line
@@ -624,8 +624,6 @@ static void __execlists_schedule_out(struct i915_request * const rq,
		GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag));
		__set_bit(ccid - 1, &engine->context_tag);
	}

	lrc_update_runtime(ce);
	intel_engine_context_out(engine);
	execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
	if (engine->fw_domain && !--engine->fw_active)
@@ -2004,8 +2002,23 @@ process_csb(struct intel_engine_cs *engine, struct i915_request **inactive)
	 * and merits a fresh timeslice. We reinstall the timer after
	 * inspecting the queue to see if we need to resumbit.
	 */
	if (*prev != *execlists->active) /* elide lite-restores */
	if (*prev != *execlists->active) { /* elide lite-restores */
		/*
		 * Note the inherent discrepancy between the HW runtime,
		 * recorded as part of the context switch, and the CPU
		 * adjustment for active contexts. We have to hope that
		 * the delay in processing the CS event is very small
		 * and consistent. It works to our advantage to have
		 * the CPU adjustment _undershoot_ (i.e. start later than)
		 * the CS timestamp so we never overreport the runtime
		 * and correct overselves later when updating from HW.
		 */
		if (*prev)
			lrc_runtime_stop((*prev)->context);
		if (*execlists->active)
			lrc_runtime_start((*execlists->active)->context);
		new_timeslice(execlists);
	}

	return inactive;
}
@@ -2637,7 +2650,7 @@ execlists_create_parallel(struct intel_engine_cs **engines,
}

static const struct intel_context_ops execlists_context_ops = {
	.flags = COPS_HAS_INFLIGHT,
	.flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,

	.alloc = execlists_context_alloc,

@@ -3695,7 +3708,7 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
}

static const struct intel_context_ops virtual_context_ops = {
	.flags = COPS_HAS_INFLIGHT,
	.flags = COPS_HAS_INFLIGHT | COPS_RUNTIME_CYCLES,

	.alloc = virtual_context_alloc,

+4 −0
Original line number Diff line number Diff line
@@ -161,6 +161,10 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
	if (gt->clock_frequency)
		gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);

	/* Icelake appears to use another fixed frequency for CTX_TIMESTAMP */
	if (GRAPHICS_VER(gt->i915) == 11)
		gt->clock_period_ns = NSEC_PER_SEC / 13750000;

	GT_TRACE(gt,
		 "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
		 gt->clock_frequency / 1000,
Loading