sched: Introduce priority load balance for qos scheduler (89bf80a4) · Commits · EulixOS / Software / Kernel

arch/arm64/configs/openeuler_defconfig

+1 −0

Original line number	Diff line number	Diff line
		@@ -166,6 +166,7 @@ CONFIG_CGROUP_WRITEBACK=y
		CONFIG_CGROUP_V1_WRITEBACK=y
		CONFIG_CGROUP_SCHED=y
		CONFIG_QOS_SCHED=y
		CONFIG_QOS_SCHED_PRIO_LB=y
		CONFIG_FAIR_GROUP_SCHED=y
		CONFIG_CFS_BANDWIDTH=y
		CONFIG_RT_GROUP_SCHED=y

arch/x86/configs/openeuler_defconfig

+1 −0

Original line number	Diff line number	Diff line
		@@ -188,6 +188,7 @@ CONFIG_CGROUP_WRITEBACK=y
		CONFIG_CGROUP_V1_WRITEBACK=y
		CONFIG_CGROUP_SCHED=y
		CONFIG_QOS_SCHED=y
		CONFIG_QOS_SCHED_PRIO_LB=y
		CONFIG_FAIR_GROUP_SCHED=y
		CONFIG_CFS_BANDWIDTH=y
		CONFIG_RT_GROUP_SCHED=y

init/Kconfig

+9 −0

Original line number	Diff line number	Diff line
		@@ -1031,6 +1031,15 @@ config QOS_SCHED

		If in doubt, say N.

		config QOS_SCHED_PRIO_LB
		bool "Priority load balance for Qos scheduler"
		depends on QOS_SCHED
		default n
		help
		This feature enable priority load balance
		for Qos scheduler, which prefer migrating online tasks
		and migrating offline tasks secondly between CPUs.

		config FAIR_GROUP_SCHED
		bool "Group scheduling for SCHED_OTHER"
		depends on CGROUP_SCHED

kernel/sched/core.c

+3 −0

Original line number	Diff line number	Diff line
		@@ -10045,6 +10045,9 @@ void __init sched_init(void)
		rq->max_idle_balance_cost = sysctl_sched_migration_cost;

		INIT_LIST_HEAD(&rq->cfs_tasks);
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		INIT_LIST_HEAD(&rq->cfs_offline_tasks);
		#endif

		rq_attach_root(rq, &def_root_domain);
		#ifdef CONFIG_NO_HZ_COMMON

kernel/sched/fair.c

+82 −1

Original line number	Diff line number	Diff line
		@@ -147,6 +147,10 @@ static int hundred_thousand = 100000;
		static int unthrottle_qos_cfs_rqs(int cpu);
		#endif

		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		unsigned int sysctl_sched_prio_load_balance_enabled;
		#endif

		#ifdef CONFIG_CFS_BANDWIDTH
		/*
		* Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
		@@ -234,6 +238,17 @@ static struct ctl_table sched_fair_sysctls[] = {
		.extra1 = SYSCTL_ZERO,
		.extra2 = SYSCTL_ONE_HUNDRED,
		},
		#endif
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		{
		.procname = "sched_prio_load_balance_enabled",
		.data = &sysctl_sched_prio_load_balance_enabled,
		.maxlen = sizeof(unsigned int),
		.mode = 0644,
		.proc_handler = proc_dointvec_minmax,
		.extra1 = SYSCTL_ZERO,
		.extra2 = SYSCTL_ONE,
		},
		#endif
		{}
		};
		@@ -3585,6 +3600,21 @@ static inline void update_scan_period(struct task_struct *p, int new_cpu)

		#endif /* CONFIG_NUMA_BALANCING */

		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		static void
		adjust_rq_cfs_tasks(void (list_op)(struct list_head , struct list_head *),
		struct rq *rq,
		struct sched_entity *se)
		{
		struct task_group *tg = task_group(task_of(se));

		if (sysctl_sched_prio_load_balance_enabled && tg->qos_level == -1)
		(*list_op)(&se->group_node, &rq->cfs_offline_tasks);
		else
		(*list_op)(&se->group_node, &rq->cfs_tasks);
		}
		#endif

		static void
		account_entity_enqueue(struct cfs_rq cfs_rq, struct sched_entity se)
		{
		@@ -3594,7 +3624,11 @@ account_entity_enqueue(struct cfs_rq cfs_rq, struct sched_entity se)
		struct rq *rq = rq_of(cfs_rq);

		account_numa_enqueue(rq, task_of(se));
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		adjust_rq_cfs_tasks(list_add, rq, se);
		#else
		list_add(&se->group_node, &rq->cfs_tasks);
		#endif
		}
		#endif
		cfs_rq->nr_running++;
		@@ -8885,7 +8919,11 @@ done: __maybe_unused;
		* the list, so our cfs_tasks list becomes MRU
		* one.
		*/
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		adjust_rq_cfs_tasks(list_move, rq, &p->se);
		#else
		list_move(&p->se.group_node, &rq->cfs_tasks);
		#endif
		#endif

		if (hrtick_enabled_fair(rq))
		@@ -9233,6 +9271,14 @@ static int task_hot(struct task_struct p, struct lb_env env)
		(&p->se == cfs_rq_of(&p->se)->next))
		return 1;

		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		/* Preempt sched idle cpu do not consider migration cost */
		if (sysctl_sched_prio_load_balance_enabled &&
		cpus_share_cache(env->src_cpu, env->dst_cpu) &&
		sched_idle_cpu(env->dst_cpu))
		return 0;
		#endif

		if (sysctl_sched_migration_cost == -1)
		return 1;

		@@ -9432,11 +9478,18 @@ static void detach_task(struct task_struct p, struct lb_env env)
		static struct task_struct detach_one_task(struct lb_env env)
		{
		struct task_struct *p;
		struct list_head *tasks = &env->src_rq->cfs_tasks;
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		int loop = 0;
		#endif

		lockdep_assert_rq_held(env->src_rq);

		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		again:
		#endif
		list_for_each_entry_reverse(p,
		&env->src_rq->cfs_tasks, se.group_node) {
		tasks, se.group_node) {
		if (!can_migrate_task(p, env))
		continue;

		@@ -9451,6 +9504,15 @@ static struct task_struct detach_one_task(struct lb_env env)
		schedstat_inc(env->sd->lb_gained[env->idle]);
		return p;
		}
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		if (sysctl_sched_prio_load_balance_enabled) {
		loop++;
		if (loop == 1) {
		tasks = &env->src_rq->cfs_offline_tasks;
		goto again;
		}
		}
		#endif
		return NULL;
		}

		@@ -9466,6 +9528,9 @@ static int detach_tasks(struct lb_env *env)
		unsigned long util, load;
		struct task_struct *p;
		int detached = 0;
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		int loop = 0;
		#endif

		lockdep_assert_rq_held(env->src_rq);

		@@ -9481,6 +9546,9 @@ static int detach_tasks(struct lb_env *env)
		if (env->imbalance <= 0)
		return 0;

		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		again:
		#endif
		while (!list_empty(tasks)) {
		/*
		* We don't want to steal all, otherwise we may be treated likewise,
		@@ -9586,6 +9654,15 @@ static int detach_tasks(struct lb_env *env)
		list_move(&p->se.group_node, tasks);
		}

		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		if (sysctl_sched_prio_load_balance_enabled && env->imbalance > 0) {
		loop++;
		if (loop == 1) {
		tasks = &env->src_rq->cfs_offline_tasks;
		goto again;
		}
		}
		#endif
		/*
		* Right now, this is one of only two places we collect this stat
		* so we can safely collect detach_one_task() stats here rather
		@@ -13212,7 +13289,11 @@ static void set_next_task_fair(struct rq rq, struct task_struct p, bool first)
		* Move the next running task to the front of the list, so our
		* cfs_tasks list becomes MRU one.
		*/
		#ifdef CONFIG_QOS_SCHED_PRIO_LB
		adjust_rq_cfs_tasks(list_move, rq, se);
		#else
		list_move(&se->group_node, &rq->cfs_tasks);
		#endif
		}
		#endif