Commit e5ed0550 authored by Vincent Guittot's avatar Vincent Guittot Committed by Peter Zijlstra
Browse files

sched/fair: unlink misfit task from cpu overutilized



By taking into account uclamp_min, the 1:1 relation between task misfit
and cpu overutilized is no more true as a task with a small util_avg may
not fit a high capacity cpu because of uclamp_min constraint.

Add a new state in util_fits_cpu() to reflect the case that task would fit
a CPU except for the uclamp_min hint which is a performance requirement.

Use -1 to reflect that a CPU doesn't fit only because of uclamp_min so we
can use this new value to take additional action to select the best CPU
that doesn't match uclamp_min hint.

When util_fits_cpu() returns -1, we will continue to look for a possible
CPU with better performance, which replaces Capacity Inversion detection
with capacity_orig_of() - thermal_load_avg to detect a capacity inversion.

Signed-off-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Reviewed-and-tested-by: default avatarQais Yousef <qyousef@layalina.io>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarDietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: default avatarKajetan Puchalski <kajetan.puchalski@arm.com>
Link: https://lore.kernel.org/r/20230201143628.270912-2-vincent.guittot@linaro.org
parent 443ed4c3
Loading
Loading
Loading
Loading
+82 −23
Original line number Diff line number Diff line
@@ -4561,8 +4561,8 @@ static inline int util_fits_cpu(unsigned long util,
	 * handle the case uclamp_min > uclamp_max.
	 */
	uclamp_min = min(uclamp_min, uclamp_max);
	if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE)
		fits = fits && (uclamp_min <= capacity_orig_thermal);
	if (fits && (util < uclamp_min) && (uclamp_min > capacity_orig_thermal))
		return -1;

	return fits;
}
@@ -4572,7 +4572,11 @@ static inline int task_fits_cpu(struct task_struct *p, int cpu)
	unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN);
	unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX);
	unsigned long util = task_util_est(p);
	return util_fits_cpu(util, uclamp_min, uclamp_max, cpu);
	/*
	 * Return true only if the cpu fully fits the task requirements, which
	 * include the utilization but also the performance hints.
	 */
	return (util_fits_cpu(util, uclamp_min, uclamp_max, cpu) > 0);
}

static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@@ -6138,6 +6142,7 @@ static inline bool cpu_overutilized(int cpu)
	unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
	unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);

	/* Return true only if the utilization doesn't fit CPU's capacity */
	return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
}

@@ -6931,6 +6936,7 @@ static int
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
{
	unsigned long task_util, util_min, util_max, best_cap = 0;
	int fits, best_fits = 0;
	int cpu, best_cpu = -1;
	struct cpumask *cpus;

@@ -6946,12 +6952,28 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)

		if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
			continue;
		if (util_fits_cpu(task_util, util_min, util_max, cpu))

		fits = util_fits_cpu(task_util, util_min, util_max, cpu);

		/* This CPU fits with all requirements */
		if (fits > 0)
			return cpu;
		/*
		 * Only the min performance hint (i.e. uclamp_min) doesn't fit.
		 * Look for the CPU with best capacity.
		 */
		else if (fits < 0)
			cpu_cap = capacity_orig_of(cpu) - thermal_load_avg(cpu_rq(cpu));

		if (cpu_cap > best_cap) {
		/*
		 * First, select CPU which fits better (-1 being better than 0).
		 * Then, select the one with best capacity at same level.
		 */
		if ((fits < best_fits) ||
		    ((fits == best_fits) && (cpu_cap > best_cap))) {
			best_cap = cpu_cap;
			best_cpu = cpu;
			best_fits = fits;
		}
	}

@@ -6964,7 +6986,11 @@ static inline bool asym_fits_cpu(unsigned long util,
				 int cpu)
{
	if (sched_asym_cpucap_active())
		return util_fits_cpu(util, util_min, util_max, cpu);
		/*
		 * Return true only if the cpu fully fits the task requirements
		 * which include the utilization and the performance hints.
		 */
		return (util_fits_cpu(util, util_min, util_max, cpu) > 0);

	return true;
}
@@ -7331,6 +7357,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
	unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
	struct root_domain *rd = this_rq()->rd;
	int cpu, best_energy_cpu, target = -1;
	int prev_fits = -1, best_fits = -1;
	unsigned long best_thermal_cap = 0;
	unsigned long prev_thermal_cap = 0;
	struct sched_domain *sd;
	struct perf_domain *pd;
	struct energy_env eenv;
@@ -7366,6 +7395,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
		unsigned long prev_spare_cap = 0;
		int max_spare_cap_cpu = -1;
		unsigned long base_energy;
		int fits, max_fits = -1;

		cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);

@@ -7415,7 +7445,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
				util_min = max(rq_util_min, p_util_min);
				util_max = max(rq_util_max, p_util_max);
			}
			if (!util_fits_cpu(util, util_min, util_max, cpu))

			fits = util_fits_cpu(util, util_min, util_max, cpu);
			if (!fits)
				continue;

			lsub_positive(&cpu_cap, util);
@@ -7423,7 +7455,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
			if (cpu == prev_cpu) {
				/* Always use prev_cpu as a candidate. */
				prev_spare_cap = cpu_cap;
			} else if (cpu_cap > max_spare_cap) {
				prev_fits = fits;
			} else if ((fits > max_fits) ||
				   ((fits == max_fits) && (cpu_cap > max_spare_cap))) {
				/*
				 * Find the CPU with the maximum spare capacity
				 * among the remaining CPUs in the performance
@@ -7431,6 +7465,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
				 */
				max_spare_cap = cpu_cap;
				max_spare_cap_cpu = cpu;
				max_fits = fits;
			}
		}

@@ -7449,26 +7484,50 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
			if (prev_delta < base_energy)
				goto unlock;
			prev_delta -= base_energy;
			prev_thermal_cap = cpu_thermal_cap;
			best_delta = min(best_delta, prev_delta);
		}

		/* Evaluate the energy impact of using max_spare_cap_cpu. */
		if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
			/* Current best energy cpu fits better */
			if (max_fits < best_fits)
				continue;

			/*
			 * Both don't fit performance hint (i.e. uclamp_min)
			 * but best energy cpu has better capacity.
			 */
			if ((max_fits < 0) &&
			    (cpu_thermal_cap <= best_thermal_cap))
				continue;

			cur_delta = compute_energy(&eenv, pd, cpus, p,
						   max_spare_cap_cpu);
			/* CPU utilization has changed */
			if (cur_delta < base_energy)
				goto unlock;
			cur_delta -= base_energy;
			if (cur_delta < best_delta) {

			/*
			 * Both fit for the task but best energy cpu has lower
			 * energy impact.
			 */
			if ((max_fits > 0) && (best_fits > 0) &&
			    (cur_delta >= best_delta))
				continue;

			best_delta = cur_delta;
			best_energy_cpu = max_spare_cap_cpu;
			}
			best_fits = max_fits;
			best_thermal_cap = cpu_thermal_cap;
		}
	}
	rcu_read_unlock();

	if (best_delta < prev_delta)
	if ((best_fits > prev_fits) ||
	    ((best_fits > 0) && (best_delta < prev_delta)) ||
	    ((best_fits < 0) && (best_thermal_cap > prev_thermal_cap)))
		target = best_energy_cpu;

	return target;
@@ -10271,24 +10330,23 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
	 */
	update_sd_lb_stats(env, &sds);

	if (sched_energy_enabled()) {
		struct root_domain *rd = env->dst_rq->rd;

		if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized))
			goto out_balanced;
	}

	local = &sds.local_stat;
	busiest = &sds.busiest_stat;

	/* There is no busy sibling group to pull tasks from */
	if (!sds.busiest)
		goto out_balanced;

	busiest = &sds.busiest_stat;

	/* Misfit tasks should be dealt with regardless of the avg load */
	if (busiest->group_type == group_misfit_task)
		goto force_balance;

	if (sched_energy_enabled()) {
		struct root_domain *rd = env->dst_rq->rd;

		if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized))
			goto out_balanced;
	}

	/* ASYM feature bypasses nice load balance check */
	if (busiest->group_type == group_asym_packing)
		goto force_balance;
@@ -10301,6 +10359,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
	if (busiest->group_type == group_imbalanced)
		goto force_balance;

	local = &sds.local_stat;
	/*
	 * If the local group is busier than the selected busiest group
	 * don't try and pull any tasks.