Commit bc9ffef3 authored by Peter Zijlstra's avatar Peter Zijlstra
Browse files

sched/core: Simplify core-wide task selection



Tao suggested a two-pass task selection to avoid the retry loop.

Not only does it avoid the retry loop, it results in *much* simpler
code.

This also fixes an issue spotted by Josh Don where, for SMT3+, we can
forget to update max on the first pass and get to do an extra round.

Suggested-by: default avatarTao Zhou <tao.zhou@linux.dev>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarJosh Don <joshdon@google.com>
Reviewed-by: default avatarVineeth Pillai (Microsoft) <vineethrp@gmail.com>
Link: https://lkml.kernel.org/r/YSS9+k1teA9oPEKl@hirez.programming.kicks-ass.net
parent c33627e9
Loading
Loading
Loading
Loading
+45 −111
Original line number Diff line number Diff line
@@ -5580,8 +5580,7 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
			return p;
	}

	/* The idle class should always have a runnable task: */
	BUG();
	BUG(); /* The idle class should always have a runnable task. */
}

#ifdef CONFIG_SCHED_CORE
@@ -5603,54 +5602,18 @@ static inline bool cookie_match(struct task_struct *a, struct task_struct *b)
	return a->core_cookie == b->core_cookie;
}

// XXX fairness/fwd progress conditions
/*
 * Returns
 * - NULL if there is no runnable task for this class.
 * - the highest priority task for this runqueue if it matches
 *   rq->core->core_cookie or its priority is greater than max.
 * - Else returns idle_task.
 */
static struct task_struct *
pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max, bool in_fi)
static inline struct task_struct *pick_task(struct rq *rq)
{
	struct task_struct *class_pick, *cookie_pick;
	unsigned long cookie = rq->core->core_cookie;

	class_pick = class->pick_task(rq);
	if (!class_pick)
		return NULL;

	if (!cookie) {
		/*
		 * If class_pick is tagged, return it only if it has
		 * higher priority than max.
		 */
		if (max && class_pick->core_cookie &&
		    prio_less(class_pick, max, in_fi))
			return idle_sched_class.pick_task(rq);
	const struct sched_class *class;
	struct task_struct *p;

		return class_pick;
	for_each_class(class) {
		p = class->pick_task(rq);
		if (p)
			return p;
	}

	/*
	 * If class_pick is idle or matches cookie, return early.
	 */
	if (cookie_equals(class_pick, cookie))
		return class_pick;

	cookie_pick = sched_core_find(rq, cookie);

	/*
	 * If class > max && class > cookie, it is the highest priority task on
	 * the core (so far) and it must be selected, otherwise we must go with
	 * the cookie pick in order to satisfy the constraint.
	 */
	if (prio_less(cookie_pick, class_pick, in_fi) &&
	    (!max || prio_less(max, class_pick, in_fi)))
		return class_pick;

	return cookie_pick;
	BUG(); /* The idle class should always have a runnable task. */
}

extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
@@ -5658,11 +5621,12 @@ extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_f
static struct task_struct *
pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
{
	struct task_struct *next, *max = NULL;
	const struct sched_class *class;
	struct task_struct *next, *p, *max = NULL;
	const struct cpumask *smt_mask;
	bool fi_before = false;
	int i, j, cpu, occ = 0;
	unsigned long cookie;
	int i, cpu, occ = 0;
	struct rq *rq_i;
	bool need_sync;

	if (!sched_core_enabled(rq))
@@ -5735,12 +5699,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
	 * and there are no cookied tasks running on siblings.
	 */
	if (!need_sync) {
		for_each_class(class) {
			next = class->pick_task(rq);
			if (next)
				break;
		}

		next = pick_task(rq);
		if (!next->core_cookie) {
			rq->core_pick = NULL;
			/*
@@ -5753,76 +5712,51 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
		}
	}

	for_each_cpu(i, smt_mask) {
		struct rq *rq_i = cpu_rq(i);

		rq_i->core_pick = NULL;
	/*
	 * For each thread: do the regular task pick and find the max prio task
	 * amongst them.
	 *
	 * Tie-break prio towards the current CPU
	 */
	for_each_cpu_wrap(i, smt_mask, cpu) {
		rq_i = cpu_rq(i);

		if (i != cpu)
			update_rq_clock(rq_i);
	}

	/*
	 * Try and select tasks for each sibling in descending sched_class
	 * order.
	 */
	for_each_class(class) {
again:
		for_each_cpu_wrap(i, smt_mask, cpu) {
			struct rq *rq_i = cpu_rq(i);
			struct task_struct *p;
		p = rq_i->core_pick = pick_task(rq_i);
		if (!max || prio_less(max, p, fi_before))
			max = p;
	}

			if (rq_i->core_pick)
				continue;
	cookie = rq->core->core_cookie = max->core_cookie;

	/*
			 * If this sibling doesn't yet have a suitable task to
			 * run; ask for the most eligible task, given the
			 * highest priority task already selected for this
			 * core.
	 * For each thread: try and find a runnable task that matches @max or
	 * force idle.
	 */
			p = pick_task(rq_i, class, max, fi_before);
			if (!p)
				continue;
	for_each_cpu(i, smt_mask) {
		rq_i = cpu_rq(i);
		p = rq_i->core_pick;

			if (!is_task_rq_idle(p))
				occ++;
		if (!cookie_equals(p, cookie)) {
			p = NULL;
			if (cookie)
				p = sched_core_find(rq_i, cookie);
			if (!p)
				p = idle_sched_class.pick_task(rq_i);
		}

		rq_i->core_pick = p;
			if (rq_i->idle == p && rq_i->nr_running) {

		if (p == rq_i->idle) {
			if (rq_i->nr_running) {
				rq->core->core_forceidle = true;
				if (!fi_before)
					rq->core->core_forceidle_seq++;
			}

			/*
			 * If this new candidate is of higher priority than the
			 * previous; and they're incompatible; we need to wipe
			 * the slate and start over. pick_task makes sure that
			 * p's priority is more than max if it doesn't match
			 * max's cookie.
			 *
			 * NOTE: this is a linear max-filter and is thus bounded
			 * in execution time.
			 */
			if (!max || !cookie_match(max, p)) {
				struct task_struct *old_max = max;

				rq->core->core_cookie = p->core_cookie;
				max = p;

				if (old_max) {
					rq->core->core_forceidle = false;
					for_each_cpu(j, smt_mask) {
						if (j == i)
							continue;

						cpu_rq(j)->core_pick = NULL;
					}
					occ = 1;
					goto again;
				}
			}
		} else {
			occ++;
		}
	}

@@ -5842,7 +5776,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
	 * non-matching user state.
	 */
	for_each_cpu(i, smt_mask) {
		struct rq *rq_i = cpu_rq(i);
		rq_i = cpu_rq(i);

		/*
		 * An online sibling might have gone offline before a task