Commit 0ed9b4b1 authored by Zhang Qiao's avatar Zhang Qiao
Browse files

sched/numa: Fix numa imbalance in load_balance()

hulk inclusion
category: performance
bugzilla: https://gitee.com/openeuler/kernel/issues/I9RMHW


CVE: NA

--------------------------------

When perform load balance, a NUMA imbalance is allowed
if busy CPUs is less than the maximum threshold, it
keeps a pair of communication tasks on the current
node when the destination is lightly loaded.

1. But, calculate_imbalance() use local->sum_nr_running,
it may not be accurate, because communication tasks is
on busiest group, so should be busiest->sum_nr_running.

2. At the same time, idles cpus are used to calculate
imbalance, but the group_weight may not be the same
between local and busiest groups. In this case, even if
both groups are very idle, imbalance will be calculated
very large, so the imbalance is calculated by calculating
the difference of busy cpus between groups.

Signed-off-by: default avatarZhang Qiao <zhangqiao22@huawei.com>
parent c564f68d
Loading
Loading
Loading
Loading
+8 −6
Original line number Diff line number Diff line
@@ -1474,7 +1474,6 @@ static inline bool is_core_idle(int cpu)
}

#ifdef CONFIG_NUMA
#define NUMA_IMBALANCE_MIN 2

static inline long
adjust_numa_imbalance(int imbalance, int dst_running, int imb_numa_nr)
@@ -1493,7 +1492,7 @@ adjust_numa_imbalance(int imbalance, int dst_running, int imb_numa_nr)
	 * Allow a small imbalance based on a simple pair of communicating
	 * tasks that remain local when the destination is lightly loaded.
	 */
	if (imbalance <= NUMA_IMBALANCE_MIN)
	if (imbalance <= imb_numa_nr)
		return 0;

	return imbalance;
@@ -12077,6 +12076,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
#ifdef CONFIG_NUMA
		if (sd->flags & SD_NUMA) {
			int imb_numa_nr = sd->imb_numa_nr;
			int local_busy_cpus = local_sgs.group_weight - local_sgs.idle_cpus;
			int idlest_busy_cpus = idlest_sgs.group_weight - idlest_sgs.idle_cpus;
#ifdef CONFIG_NUMA_BALANCING
			int idlest_cpu;
			/*
@@ -12106,7 +12107,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
				imb_numa_nr = min(cpumask_weight(cpus), sd->imb_numa_nr);
			}

			imbalance = abs(local_sgs.idle_cpus - idlest_sgs.idle_cpus);
			imbalance = abs(idlest_busy_cpus - local_busy_cpus);
			if (!adjust_numa_imbalance(imbalance,
						   local_sgs.sum_nr_running + 1,
						   imb_numa_nr)) {
@@ -12382,18 +12383,19 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s

			/*
			 * If there is no overload, we just want to even the number of
			 * idle cpus.
			 * busy cpus.
			 */
			env->migration_type = migrate_task;
			env->imbalance = max_t(long, 0,
					       (local->idle_cpus - busiest->idle_cpus));
						((busiest->group_weight - busiest->idle_cpus)
						- (local->group_weight - local->idle_cpus)));
		}

#ifdef CONFIG_NUMA
		/* Consider allowing a small imbalance between NUMA groups */
		if (env->sd->flags & SD_NUMA) {
			env->imbalance = adjust_numa_imbalance(env->imbalance,
							       local->sum_nr_running + 1,
							       busiest->sum_nr_running,
							       env->sd->imb_numa_nr);
		}
#endif