Commit 70a232a5 authored by Hui Tang's avatar Hui Tang Committed by Yongqiang Liu
Browse files

sched: Adjust wakeup cpu range according CPU util dynamicly

hulk inclusion
category: feature
bugzilla: 187173, https://gitee.com/openeuler/kernel/issues/I5G4IH


CVE: NA

--------------------------------

Compare taskgroup 'util_avg' in perferred cpu with capacity preferred cpu,
dynamicly adjust cpu range for task wakeup process.

Signed-off-by: default avatarHui Tang <tanghui20@huawei.com>
Reviewed-by: default avatarChen Hui <judy.chenhui@huawei.com>
Reviewed-by: default avatarZhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: default avatarYongqiang Liu <liuyongqiang13@huawei.com>
parent 243865da
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -1251,13 +1251,15 @@ struct task_struct {
#if !defined(__GENKSYMS__)
#if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY)
	cpumask_t			*prefer_cpus;
	const cpumask_t			*select_cpus;
#else
	KABI_RESERVE(6)
	KABI_RESERVE(7)
#endif
#else
	KABI_RESERVE(6)
#endif
	KABI_RESERVE(7)
#endif
	KABI_RESERVE(8)

	/* CPU-specific state of this task: */
+4 −0
Original line number Diff line number Diff line
@@ -32,6 +32,10 @@ extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
extern int sysctl_sched_util_low_pct;
#endif

enum sched_tunable_scaling {
	SCHED_TUNABLESCALING_NONE,
	SCHED_TUNABLESCALING_LOG,
+144 −0
Original line number Diff line number Diff line
@@ -1775,6 +1775,9 @@ static void task_numa_compare(struct task_numa_env *env,
		 * can be used from IRQ context.
		 */
		local_irq_disable();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		env->p->select_cpus = &env->p->cpus_allowed;
#endif
		env->dst_cpu = select_idle_sibling(env->p, env->src_cpu,
						   env->dst_cpu);
		local_irq_enable();
@@ -5955,8 +5958,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
		int i;

		/* Skip over this group if it has no CPUs allowed */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		if (!cpumask_intersects(sched_group_span(group),
					p->select_cpus))
#else
		if (!cpumask_intersects(sched_group_span(group),
					&p->cpus_allowed))
#endif
			continue;

		local_group = cpumask_test_cpu(this_cpu,
@@ -6088,7 +6096,11 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
		return cpumask_first(sched_group_span(group));

	/* Traverse only the allowed CPUs */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	for_each_cpu_and(i, sched_group_span(group), p->select_cpus) {
#else
	for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) {
#endif
		if (sched_idle_cpu(i))
			return i;

@@ -6131,7 +6143,11 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
{
	int new_cpu = cpu;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if (!cpumask_intersects(sched_domain_span(sd), p->select_cpus))
#else
	if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
#endif
		return prev_cpu;

	/*
@@ -6248,7 +6264,11 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
	if (!test_idle_cores(target, false))
		return -1;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_and(cpus, sched_domain_span(sd), p->select_cpus);
#else
	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
#endif

	for_each_cpu_wrap(core, cpus, target) {
		bool idle = true;
@@ -6282,8 +6302,13 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
		return -1;

	for_each_cpu(cpu, cpu_smt_mask(target)) {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		if (!cpumask_test_cpu(cpu, p->select_cpus) ||
		    !cpumask_test_cpu(cpu, sched_domain_span(sd)))
#else
		if (!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
		    !cpumask_test_cpu(cpu, sched_domain_span(sd)))
#endif
			continue;
		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
			return cpu;
@@ -6344,7 +6369,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t

	time = local_clock();

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_and(cpus, sched_domain_span(sd), p->select_cpus);
#else
	cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed);
#endif

	for_each_cpu_wrap(cpu, cpus, target) {
		if (!--nr)
@@ -6383,7 +6412,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
	struct sched_domain *sd;
	int i, recent_used_cpu;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
	    cpumask_test_cpu(target, p->select_cpus)) {
#else
	if (available_idle_cpu(target) || sched_idle_cpu(target)) {
#endif
		SET_STAT(found_idle_cpu_easy);
		return target;
	}
@@ -6391,8 +6425,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
	/*
	 * If the previous CPU is cache affine and idle, don't be stupid:
	 */
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if (prev != target && cpus_share_cache(prev, target) &&
	    cpumask_test_cpu(prev, p->select_cpus) &&
	    (available_idle_cpu(prev) || sched_idle_cpu(prev))) {
#else
	if (prev != target && cpus_share_cache(prev, target) &&
	    (available_idle_cpu(prev) || sched_idle_cpu(prev))) {
#endif
		SET_STAT(found_idle_cpu_easy);
		return prev;
	}
@@ -6403,7 +6443,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
	    recent_used_cpu != target &&
	    cpus_share_cache(recent_used_cpu, target) &&
	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	    cpumask_test_cpu(p->recent_used_cpu, p->select_cpus)) {
#else
	    cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
#endif
		/*
		 * Replace recent_used_cpu with prev as it is a potential
		 * candidate for the next wake:
@@ -6605,8 +6649,86 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
	sync_entity_load_avg(&p->se);

	return min_cap * 1024 < task_util(p) * capacity_margin;

}

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
/*
 * Low utilization threshold for CPU
 *
 * (default: 85%), units: percentage of CPU utilization)
 */
int sysctl_sched_util_low_pct = 85;

static inline bool prefer_cpus_valid(struct task_struct *p)
{
	return p->prefer_cpus &&
	       !cpumask_empty(p->prefer_cpus) &&
	       !cpumask_equal(p->prefer_cpus, &p->cpus_allowed) &&
	       cpumask_subset(p->prefer_cpus, &p->cpus_allowed);
}

/*
 * set_task_select_cpus: select the cpu range for task
 * @p: the task whose available cpu range will to set
 * @idlest_cpu: the cpu which is the idlest in prefer cpus
 *
 * If sum of 'util_avg' among 'preferred_cpus' lower than the percentage
 * 'sysctl_sched_util_low_pct' of 'preferred_cpus' capacity, select
 * 'preferred_cpus' range for task, otherwise select 'preferred_cpus' for task.
 *
 * The available cpu range set to p->select_cpus. Idlest cpu in preferred cpus
 * set to @idlest_cpu, which is set to wakeup cpu when fast path wakeup cpu
 * without p->select_cpus.
 */
static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu,
				 int sd_flag)
{
	unsigned long util_avg_sum = 0;
	unsigned long tg_capacity = 0;
	long min_util = INT_MIN;
	struct task_group *tg;
	long spare;
	int cpu;

	p->select_cpus = &p->cpus_allowed;
	if (!prefer_cpus_valid(p))
		return;

	rcu_read_lock();
	tg = task_group(p);
	for_each_cpu(cpu, p->prefer_cpus) {
		if (unlikely(!tg->se[cpu]))
			continue;

		if (idlest_cpu && available_idle_cpu(cpu)) {
			*idlest_cpu = cpu;
		} else if (idlest_cpu) {
			spare = (long)(capacity_of(cpu) - tg->se[cpu]->avg.util_avg);
			if (spare > min_util) {
				min_util = spare;
				*idlest_cpu = cpu;
			}
		}

		if (available_idle_cpu(cpu)) {
			rcu_read_unlock();
			p->select_cpus = p->prefer_cpus;
			return;
		}

		util_avg_sum += tg->se[cpu]->avg.util_avg;
		tg_capacity += capacity_of(cpu);
	}
	rcu_read_unlock();

	if (tg_capacity > cpumask_weight(p->prefer_cpus) &&
	    util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) {
		p->select_cpus = p->prefer_cpus;
	}
}
#endif

/*
 * select_task_rq_fair: Select target runqueue for the waking task in domains
 * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
@@ -6628,13 +6750,24 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
	int new_cpu = prev_cpu;
	int want_affine = 0;
	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	int idlest_cpu = 0;
#endif

	time = schedstat_start_time();

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	set_task_select_cpus(p, &idlest_cpu, sd_flag);
#endif

	if (sd_flag & SD_BALANCE_WAKE) {
		record_wakee(p);
		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
			      && cpumask_test_cpu(cpu, p->select_cpus);
#else
			      && cpumask_test_cpu(cpu, &p->cpus_allowed);
#endif
	}

	rcu_read_lock();
@@ -6648,7 +6781,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
		 */
		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
			new_cpu = cpu;
			if (cpu != prev_cpu &&
			    cpumask_test_cpu(prev_cpu, p->select_cpus))
#else
			if (cpu != prev_cpu)
#endif
				new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);

			sd = NULL; /* Prefer wake_affine over balance flags */
@@ -6673,6 +6812,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
			current->recent_used_cpu = cpu;
	}
	rcu_read_unlock();

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if (!cpumask_test_cpu(new_cpu, p->select_cpus))
		new_cpu = idlest_cpu;
#endif
	schedstat_end_time(cpu_rq(cpu), time);

	return new_cpu;
+11 −0
Original line number Diff line number Diff line
@@ -1325,6 +1325,17 @@ static struct ctl_table kern_table[] = {
		.extra1		= &one_hundred,
		.extra2		= &one_thousand,
	},
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	{
		.procname       = "sched_util_low_pct",
		.data           = &sysctl_sched_util_low_pct,
		.maxlen         = sizeof(sysctl_sched_util_low_pct),
		.mode           = 0644,
		.proc_handler   = proc_dointvec_minmax,
		.extra1         = &zero,
		.extra2		= &one_hundred,
	},
#endif
	{ }
};