sched: Add core wide task selection and scheduling (539f6512) · Commits · EulixOS / Software / Kernel

kernel/sched/core.c

+300 −1

Original line number	Diff line number	Diff line
		@@ -5282,7 +5282,7 @@ static void put_prev_task_balance(struct rq rq, struct task_struct prev,
		* Pick up the highest-prio task:
		*/
		static inline struct task_struct *
		pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		__pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		{
		const struct sched_class *class;
		struct task_struct *p;
		@@ -5323,6 +5323,294 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		}

		#ifdef CONFIG_SCHED_CORE
		static inline bool is_task_rq_idle(struct task_struct *t)
		{
		return (task_rq(t)->idle == t);
		}

		static inline bool cookie_equals(struct task_struct *a, unsigned long cookie)
		{
		return is_task_rq_idle(a) \|\| (a->core_cookie == cookie);
		}

		static inline bool cookie_match(struct task_struct a, struct task_struct b)
		{
		if (is_task_rq_idle(a) \|\| is_task_rq_idle(b))
		return true;

		return a->core_cookie == b->core_cookie;
		}

		// XXX fairness/fwd progress conditions
		/*
		* Returns
		* - NULL if there is no runnable task for this class.
		* - the highest priority task for this runqueue if it matches
		* rq->core->core_cookie or its priority is greater than max.
		* - Else returns idle_task.
		*/
		static struct task_struct *
		pick_task(struct rq rq, const struct sched_class class, struct task_struct *max)
		{
		struct task_struct class_pick, cookie_pick;
		unsigned long cookie = rq->core->core_cookie;

		class_pick = class->pick_task(rq);
		if (!class_pick)
		return NULL;

		if (!cookie) {
		/*
		* If class_pick is tagged, return it only if it has
		* higher priority than max.
		*/
		if (max && class_pick->core_cookie &&
		prio_less(class_pick, max))
		return idle_sched_class.pick_task(rq);

		return class_pick;
		}

		/*
		* If class_pick is idle or matches cookie, return early.
		*/
		if (cookie_equals(class_pick, cookie))
		return class_pick;

		cookie_pick = sched_core_find(rq, cookie);

		/*
		* If class > max && class > cookie, it is the highest priority task on
		* the core (so far) and it must be selected, otherwise we must go with
		* the cookie pick in order to satisfy the constraint.
		*/
		if (prio_less(cookie_pick, class_pick) &&
		(!max \|\| prio_less(max, class_pick)))
		return class_pick;

		return cookie_pick;
		}

		static struct task_struct *
		pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		{
		struct task_struct next, max = NULL;
		const struct sched_class *class;
		const struct cpumask *smt_mask;
		bool need_sync;
		int i, j, cpu;

		if (!sched_core_enabled(rq))
		return __pick_next_task(rq, prev, rf);

		cpu = cpu_of(rq);

		/* Stopper task is switching into idle, no need core-wide selection. */
		if (cpu_is_offline(cpu)) {
		/*
		* Reset core_pick so that we don't enter the fastpath when
		* coming online. core_pick would already be migrated to
		* another cpu during offline.
		*/
		rq->core_pick = NULL;
		return __pick_next_task(rq, prev, rf);
		}

		/*
		* If there were no {en,de}queues since we picked (IOW, the task
		* pointers are all still valid), and we haven't scheduled the last
		* pick yet, do so now.
		*
		* rq->core_pick can be NULL if no selection was made for a CPU because
		* it was either offline or went offline during a sibling's core-wide
		* selection. In this case, do a core-wide selection.
		*/
		if (rq->core->core_pick_seq == rq->core->core_task_seq &&
		rq->core->core_pick_seq != rq->core_sched_seq &&
		rq->core_pick) {
		WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq);

		next = rq->core_pick;
		if (next != prev) {
		put_prev_task(rq, prev);
		set_next_task(rq, next);
		}

		rq->core_pick = NULL;
		return next;
		}

		put_prev_task_balance(rq, prev, rf);

		smt_mask = cpu_smt_mask(cpu);

		/*
		* core->core_task_seq, core->core_pick_seq, rq->core_sched_seq
		*
		* @task_seq guards the task state ({en,de}queues)
		* @pick_seq is the @task_seq we did a selection on
		* @sched_seq is the @pick_seq we scheduled
		*
		* However, preemptions can cause multiple picks on the same task set.
		* 'Fix' this by also increasing @task_seq for every pick.
		*/
		rq->core->core_task_seq++;
		need_sync = !!rq->core->core_cookie;

		/* reset state */
		rq->core->core_cookie = 0UL;
		for_each_cpu(i, smt_mask) {
		struct rq *rq_i = cpu_rq(i);

		rq_i->core_pick = NULL;

		if (rq_i->core_forceidle) {
		need_sync = true;
		rq_i->core_forceidle = false;
		}

		if (i != cpu)
		update_rq_clock(rq_i);
		}

		/*
		* Try and select tasks for each sibling in decending sched_class
		* order.
		*/
		for_each_class(class) {
		again:
		for_each_cpu_wrap(i, smt_mask, cpu) {
		struct rq *rq_i = cpu_rq(i);
		struct task_struct *p;

		if (rq_i->core_pick)
		continue;

		/*
		* If this sibling doesn't yet have a suitable task to
		* run; ask for the most elegible task, given the
		* highest priority task already selected for this
		* core.
		*/
		p = pick_task(rq_i, class, max);
		if (!p) {
		/*
		* If there weren't no cookies; we don't need to
		* bother with the other siblings.
		* If the rest of the core is not running a tagged
		* task, i.e. need_sync == 0, and the current CPU
		* which called into the schedule() loop does not
		* have any tasks for this class, skip selecting for
		* other siblings since there's no point. We don't skip
		* for RT/DL because that could make CFS force-idle RT.
		*/
		if (i == cpu && !need_sync && class == &fair_sched_class)
		goto next_class;

		continue;
		}

		/*
		* Optimize the 'normal' case where there aren't any
		* cookies and we don't need to sync up.
		*/
		if (i == cpu && !need_sync && !p->core_cookie) {
		next = p;
		goto done;
		}

		rq_i->core_pick = p;

		/*
		* If this new candidate is of higher priority than the
		* previous; and they're incompatible; we need to wipe
		* the slate and start over. pick_task makes sure that
		* p's priority is more than max if it doesn't match
		* max's cookie.
		*
		* NOTE: this is a linear max-filter and is thus bounded
		* in execution time.
		*/
		if (!max \|\| !cookie_match(max, p)) {
		struct task_struct *old_max = max;

		rq->core->core_cookie = p->core_cookie;
		max = p;

		if (old_max) {
		for_each_cpu(j, smt_mask) {
		if (j == i)
		continue;

		cpu_rq(j)->core_pick = NULL;
		}
		goto again;
		} else {
		/*
		* Once we select a task for a cpu, we
		* should not be doing an unconstrained
		* pick because it might starve a task
		* on a forced idle cpu.
		*/
		need_sync = true;
		}

		}
		}
		next_class:;
		}

		rq->core->core_pick_seq = rq->core->core_task_seq;
		next = rq->core_pick;
		rq->core_sched_seq = rq->core->core_pick_seq;

		/* Something should have been selected for current CPU */
		WARN_ON_ONCE(!next);

		/*
		* Reschedule siblings
		*
		* NOTE: L1TF -- at this point we're no longer running the old task and
		* sending an IPI (below) ensures the sibling will no longer be running
		* their task. This ensures there is no inter-sibling overlap between
		* non-matching user state.
		*/
		for_each_cpu(i, smt_mask) {
		struct rq *rq_i = cpu_rq(i);

		/*
		* An online sibling might have gone offline before a task
		* could be picked for it, or it might be offline but later
		* happen to come online, but its too late and nothing was
		* picked for it. That's Ok - it will pick tasks for itself,
		* so ignore it.
		*/
		if (!rq_i->core_pick)
		continue;

		if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running)
		rq_i->core_forceidle = true;

		if (i == cpu) {
		rq_i->core_pick = NULL;
		continue;
		}

		/* Did we break L1TF mitigation requirements? */
		WARN_ON_ONCE(!cookie_match(next, rq_i->core_pick));

		if (rq_i->curr == rq_i->core_pick) {
		rq_i->core_pick = NULL;
		continue;
		}

		resched_curr(rq_i);
		}

		done:
		set_next_task(rq, next);
		return next;
		}

		static inline void sched_core_cpu_starting(unsigned int cpu)
		{
		@@ -5354,6 +5642,12 @@ static inline void sched_core_cpu_starting(unsigned int cpu)

		static inline void sched_core_cpu_starting(unsigned int cpu) {}

		static struct task_struct *
		pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		{
		return __pick_next_task(rq, prev, rf);
		}

		#endif /* CONFIG_SCHED_CORE */

		/*
		@@ -8609,7 +8903,12 @@ void __init sched_init(void)

		#ifdef CONFIG_SCHED_CORE
		rq->core = NULL;
		rq->core_pick = NULL;
		rq->core_enabled = 0;
		rq->core_tree = RB_ROOT;
		rq->core_forceidle = false;

		rq->core_cookie = 0UL;
		#endif
		}

kernel/sched/sched.h

+5 −1

Original line number	Diff line number	Diff line
		@@ -1079,11 +1079,16 @@ struct rq {
		#ifdef CONFIG_SCHED_CORE
		/* per rq */
		struct rq *core;
		struct task_struct *core_pick;
		unsigned int core_enabled;
		unsigned int core_sched_seq;
		struct rb_root core_tree;
		unsigned char core_forceidle;

		/* shared state */
		unsigned int core_task_seq;
		unsigned int core_pick_seq;
		unsigned long core_cookie;
		#endif
		};

		@@ -2060,7 +2065,6 @@ static inline void put_prev_task(struct rq rq, struct task_struct prev)

		static inline void set_next_task(struct rq rq, struct task_struct next)
		{
		WARN_ON_ONCE(rq->curr != next);
		next->sched_class->set_next_task(rq, next, false);
		}