Merge tag 'sched_core_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (6ae71436) · Commits · EulixOS / Software / Kernel

include/linux/psi.h

+1 −0

Original line number	Diff line number	Diff line
		/* SPDX-License-Identifier: GPL-2.0 */
		#ifndef _LINUX_PSI_H
		#define _LINUX_PSI_H

include/linux/psi_types.h

+13 −1

Original line number	Diff line number	Diff line
		/* SPDX-License-Identifier: GPL-2.0 */
		#ifndef _LINUX_PSI_TYPES_H
		#define _LINUX_PSI_TYPES_H

		@@ -21,7 +22,17 @@ enum psi_task_count {
		* don't have to special case any state tracking for it.
		*/
		NR_ONCPU,
		NR_PSI_TASK_COUNTS = 4,
		/*
		* For IO and CPU stalls the presence of running/oncpu tasks
		* in the domain means a partial rather than a full stall.
		* For memory it's not so simple because of page reclaimers:
		* they are running/oncpu while representing a stall. To tell
		* whether a domain has productivity left or not, we need to
		* distinguish between regular running (i.e. productive)
		* threads and memstall ones.
		*/
		NR_MEMSTALL_RUNNING,
		NR_PSI_TASK_COUNTS = 5,
		};

		/* Task state bitmasks */
		@@ -29,6 +40,7 @@ enum psi_task_count {
		#define TSK_MEMSTALL (1 << NR_MEMSTALL)
		#define TSK_RUNNING (1 << NR_RUNNING)
		#define TSK_ONCPU (1 << NR_ONCPU)
		#define TSK_MEMSTALL_RUNNING (1 << NR_MEMSTALL_RUNNING)

		/* Resources that workloads could be stalled on */
		enum psi_res {

include/linux/sched.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -523,7 +523,11 @@ struct sched_statistics {
		u64 nr_wakeups_affine_attempts;
		u64 nr_wakeups_passive;
		u64 nr_wakeups_idle;

		#ifdef CONFIG_SCHED_CORE
		u64 core_forceidle_sum;
		#endif
		#endif /* CONFIG_SCHEDSTATS */
		} ____cacheline_aligned;

		struct sched_entity {

kernel/sched/core.c

+63 −21

Original line number	Diff line number	Diff line
		@@ -144,7 +144,7 @@ static inline bool __sched_core_less(struct task_struct a, struct task_struct
		return false;

		/* flip prio, so high prio is leftmost */
		if (prio_less(b, a, task_rq(a)->core->core_forceidle))
		if (prio_less(b, a, !!task_rq(a)->core->core_forceidle_count))
		return true;

		return false;
		@@ -181,17 +181,25 @@ void sched_core_enqueue(struct rq rq, struct task_struct p)
		rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less);
		}

		void sched_core_dequeue(struct rq rq, struct task_struct p)
		void sched_core_dequeue(struct rq rq, struct task_struct p, int flags)
		{
		rq->core->core_task_seq++;

		if (!sched_core_enqueued(p))
		return;

		if (sched_core_enqueued(p)) {
		rb_erase(&p->core_node, &rq->core_tree);
		RB_CLEAR_NODE(&p->core_node);
		}

		/*
		* Migrating the last task off the cpu, with the cpu in forced idle
		* state. Reschedule to create an accounting edge for forced idle,
		* and re-examine whether the core is still in forced idle state.
		*/
		if (!(flags & DEQUEUE_SAVE) && rq->nr_running == 1 &&
		rq->core->core_forceidle_count && rq->curr == rq->idle)
		resched_curr(rq);
		}

		/*
		* Find left-most (aka, highest priority) task matching @cookie.
		*/
		@@ -280,6 +288,8 @@ static void __sched_core_flip(bool enabled)
		for_each_cpu(t, smt_mask)
		cpu_rq(t)->core_enabled = enabled;

		cpu_rq(cpu)->core->core_forceidle_start = 0;

		sched_core_unlock(cpu, &flags);

		cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
		@@ -364,7 +374,8 @@ void sched_core_put(void)
		#else /* !CONFIG_SCHED_CORE */

		static inline void sched_core_enqueue(struct rq rq, struct task_struct p) { }
		static inline void sched_core_dequeue(struct rq rq, struct task_struct p) { }
		static inline void
		sched_core_dequeue(struct rq rq, struct task_struct p, int flags) { }

		#endif /* CONFIG_SCHED_CORE */

		@@ -2005,7 +2016,7 @@ static inline void enqueue_task(struct rq rq, struct task_struct p, int flags)
		static inline void dequeue_task(struct rq rq, struct task_struct p, int flags)
		{
		if (sched_core_enabled(rq))
		sched_core_dequeue(rq, p);
		sched_core_dequeue(rq, p, flags);

		if (!(flags & DEQUEUE_NOCLOCK))
		update_rq_clock(rq);
		@@ -5244,6 +5255,7 @@ void scheduler_tick(void)
		if (sched_feat(LATENCY_WARN))
		resched_latency = cpu_resched_latency(rq);
		calc_global_load_tick(rq);
		sched_core_tick(rq);

		rq_unlock(rq, &rf);

		@@ -5656,6 +5668,7 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		struct task_struct next, p, *max = NULL;
		const struct cpumask *smt_mask;
		bool fi_before = false;
		bool core_clock_updated = (rq == rq->core);
		unsigned long cookie;
		int i, cpu, occ = 0;
		struct rq *rq_i;
		@@ -5708,10 +5721,18 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)

		/* reset state */
		rq->core->core_cookie = 0UL;
		if (rq->core->core_forceidle) {
		if (rq->core->core_forceidle_count) {
		if (!core_clock_updated) {
		update_rq_clock(rq->core);
		core_clock_updated = true;
		}
		sched_core_account_forceidle(rq);
		/* reset after accounting force idle */
		rq->core->core_forceidle_start = 0;
		rq->core->core_forceidle_count = 0;
		rq->core->core_forceidle_occupation = 0;
		need_sync = true;
		fi_before = true;
		rq->core->core_forceidle = false;
		}

		/*
		@@ -5753,7 +5774,12 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		for_each_cpu_wrap(i, smt_mask, cpu) {
		rq_i = cpu_rq(i);

		if (i != cpu)
		/*
		* Current cpu always has its clock updated on entrance to
		* pick_next_task(). If the current cpu is not the core,
		* the core may also have been updated above.
		*/
		if (i != cpu && (rq_i != rq->core \|\| !core_clock_updated))
		update_rq_clock(rq_i);

		p = rq_i->core_pick = pick_task(rq_i);
		@@ -5783,7 +5809,7 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)

		if (p == rq_i->idle) {
		if (rq_i->nr_running) {
		rq->core->core_forceidle = true;
		rq->core->core_forceidle_count++;
		if (!fi_before)
		rq->core->core_forceidle_seq++;
		}
		@@ -5792,6 +5818,12 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		}
		}

		if (schedstat_enabled() && rq->core->core_forceidle_count) {
		if (cookie)
		rq->core->core_forceidle_start = rq_clock(rq->core);
		rq->core->core_forceidle_occupation = occ;
		}

		rq->core->core_pick_seq = rq->core->core_task_seq;
		next = rq->core_pick;
		rq->core_sched_seq = rq->core->core_pick_seq;
		@@ -5828,8 +5860,8 @@ pick_next_task(struct rq rq, struct task_struct prev, struct rq_flags *rf)
		* 1 0 1
		* 1 1 0
		*/
		if (!(fi_before && rq->core->core_forceidle))
		task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle);
		if (!(fi_before && rq->core->core_forceidle_count))
		task_vruntime_update(rq_i, rq_i->core_pick, !!rq->core->core_forceidle_count);

		rq_i->core_pick->core_occupation = occ;

		@@ -6036,8 +6068,16 @@ static void sched_core_cpu_deactivate(unsigned int cpu)
		core_rq->core_task_seq = rq->core_task_seq;
		core_rq->core_pick_seq = rq->core_pick_seq;
		core_rq->core_cookie = rq->core_cookie;
		core_rq->core_forceidle = rq->core_forceidle;
		core_rq->core_forceidle_count = rq->core_forceidle_count;
		core_rq->core_forceidle_seq = rq->core_forceidle_seq;
		core_rq->core_forceidle_occupation = rq->core_forceidle_occupation;

		/*
		* Accounting edge for forced idle is handled in pick_next_task().
		* Don't need another one here, since the hotplug thread shouldn't
		* have a cookie.
		*/
		core_rq->core_forceidle_start = 0;

		/* install new leader */
		for_each_cpu(t, smt_mask) {
		@@ -7126,7 +7166,7 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,

		unsigned long sched_cpu_util(int cpu, unsigned long max)
		{
		return effective_cpu_util(cpu, cpu_util_cfs(cpu_rq(cpu)), max,
		return effective_cpu_util(cpu, cpu_util_cfs(cpu), max,
		ENERGY_UTIL, NULL);
		}
		#endif /* CONFIG_SMP */
		@@ -9409,7 +9449,9 @@ void __init sched_init(void)
		rq->core_pick = NULL;
		rq->core_enabled = 0;
		rq->core_tree = RB_ROOT;
		rq->core_forceidle = false;
		rq->core_forceidle_count = 0;
		rq->core_forceidle_occupation = 0;
		rq->core_forceidle_start = 0;

		rq->core_cookie = 0UL;
		#endif

kernel/sched/core_sched.c

+65 −1

Original line number	Diff line number	Diff line
		@@ -73,7 +73,7 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,

		enqueued = sched_core_enqueued(p);
		if (enqueued)
		sched_core_dequeue(rq, p);
		sched_core_dequeue(rq, p, DEQUEUE_SAVE);

		old_cookie = p->core_cookie;
		p->core_cookie = cookie;
		@@ -85,6 +85,10 @@ static unsigned long sched_core_update_cookie(struct task_struct *p,
		* If task is currently running, it may not be compatible anymore after
		* the cookie change, so enter the scheduler on its CPU to schedule it
		* away.
		*
		* Note that it is possible that as a result of this cookie change, the
		* core has now entered/left forced idle state. Defer accounting to the
		* next scheduling edge, rather than always forcing a reschedule here.
		*/
		if (task_running(rq, p))
		resched_curr(rq);
		@@ -232,3 +236,63 @@ int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
		return err;
		}

		#ifdef CONFIG_SCHEDSTATS

		/* REQUIRES: rq->core's clock recently updated. */
		void __sched_core_account_forceidle(struct rq *rq)
		{
		const struct cpumask *smt_mask = cpu_smt_mask(cpu_of(rq));
		u64 delta, now = rq_clock(rq->core);
		struct rq *rq_i;
		struct task_struct *p;
		int i;

		lockdep_assert_rq_held(rq);

		WARN_ON_ONCE(!rq->core->core_forceidle_count);

		if (rq->core->core_forceidle_start == 0)
		return;

		delta = now - rq->core->core_forceidle_start;
		if (unlikely((s64)delta <= 0))
		return;

		rq->core->core_forceidle_start = now;

		if (WARN_ON_ONCE(!rq->core->core_forceidle_occupation)) {
		/* can't be forced idle without a running task */
		} else if (rq->core->core_forceidle_count > 1 \|\|
		rq->core->core_forceidle_occupation > 1) {
		/*
		* For larger SMT configurations, we need to scale the charged
		* forced idle amount since there can be more than one forced
		* idle sibling and more than one running cookied task.
		*/
		delta *= rq->core->core_forceidle_count;
		delta = div_u64(delta, rq->core->core_forceidle_occupation);
		}

		for_each_cpu(i, smt_mask) {
		rq_i = cpu_rq(i);
		p = rq_i->core_pick ?: rq_i->curr;

		if (!p->core_cookie)
		continue;

		__schedstat_add(p->stats.core_forceidle_sum, delta);
		}
		}

		void __sched_core_tick(struct rq *rq)
		{
		if (!rq->core->core_forceidle_count)
		return;

		if (rq != rq->core)
		update_rq_clock(rq->core);

		__sched_core_account_forceidle(rq);
		}

		#endif /* CONFIG_SCHEDSTATS */