Unverified Commit 8edbf4ae authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!2774 [OLK-5.10] sched/fair: Scan cluster before scanning LLC in wake-up path

Merge Pull Request from: @liujie-248683921 
 
https://gitee.com/openeuler/kernel/issues/I8E8NN
This is the follow-up work to support cluster scheduler. Previously
we have added cluster level in the scheduler for both ARM64[1] and
X86[2] to support load balance between clusters to bring more memory
bandwidth and decrease cache contention. This patchset, on the other
hand, takes care of wake-up path by giving CPUs within the same cluster
a try before scanning the whole LLC to benefit those tasks communicating
with each other.

[1] 778c558f ("sched: Add cluster scheduler level in core and related Kconfig for ARM64")
[2] 66558b73 ("sched: Add cluster scheduler level for x86")

Barry Song (2):
  sched: Add cpus_share_resources API
  sched/fair: Scan cluster before scanning LLC in wake-up path

Yicong Yang (1):
  sched/fair: Use candidate prev/recent_used CPU if scanning failed for
    cluster wakeup

 include/linux/sched/sd_flags.h |  7 ++++
 include/linux/sched/topology.h |  8 ++++-
 kernel/sched/core.c            | 12 +++++++
 kernel/sched/fair.c            | 58 +++++++++++++++++++++++++++++++---
 kernel/sched/sched.h           |  2 ++
 kernel/sched/topology.c        | 25 +++++++++++++++
 6 files changed, 106 insertions(+), 6 deletions(-)

-- 
2.24.0
|主线commit id|patch|回合是否冲突|冲突原因|
|---|---|---|---|
|NA|Revert "sched/fair: Scan cluster before scanning LLC in wake-up path"|是|kernel/sched/topology.c中部分代码的相对位置发生变化|
|NA|Revert "sched: Add per_cpu cluster domain info and cpus_share_lowest_cache API"|是|kernel/sched/core.c中部分代码的相对位置发生变化|
|16d364ba|sched/topology: Introduce sched_group::flags|是|kernel/sched/sched.h中部分代码的相对位置发生变化|
|bf2dc42d|sched/topology: Propagate SMT flags when removing degenerate domain|无|NA|
|4efcc8bc|sched/topology: Align group flags when removing degenerate domain|无|NA|
|b95303e0aeaf|sched: Add cpus_share_resources API|是|include/linux/sched/topology.h和kernel/sched/core.c中部分代码的相对位置发生变化|
|8881e1639f1f|sched/fair: Scan cluster before scanning LLC in wake-up path|是|kernel/sched/fair.c、kernel/sched/sched.h、kernel/sched/topology.c中部分代码的相对位置发生变化;kernel/sched/fair.c存在四处适配5.10的修改|
|22165f61d0c4|sched/fair: Use candidate prev/recent_used CPU if scanning failed for cluster wakeup|是|kernel/sched/fair.c中部分代码的相对位置发生变化|
 
 
Link:https://gitee.com/openeuler/kernel/pulls/2774

 

Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents b1d93034 b126dff5
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -189,7 +189,7 @@ cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);

bool cpus_share_cache(int this_cpu, int that_cpu);
bool cpus_share_lowest_cache(int this_cpu, int that_cpu);
bool cpus_share_resources(int this_cpu, int that_cpu);

typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
typedef int (*sched_domain_flags_f)(void);
@@ -244,7 +244,7 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
	return true;
}

static inline bool cpus_share_lowest_cache(int this_cpu, int that_cpu)
static inline bool cpus_share_resources(int this_cpu, int that_cpu)
{
	return true;
}
+3 −3
Original line number Diff line number Diff line
@@ -3022,15 +3022,15 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
}

/*
 * Whether CPUs are share lowest cache, which means LLC on non-cluster
 * Whether CPUs are share cache resources, which means LLC on non-cluster
 * machines and LLC tag or L2 on machines with clusters.
 */
bool cpus_share_lowest_cache(int this_cpu, int that_cpu)
bool cpus_share_resources(int this_cpu, int that_cpu)
{
	if (this_cpu == that_cpu)
		return true;

	return per_cpu(sd_lowest_cache_id, this_cpu) == per_cpu(sd_lowest_cache_id, that_cpu);
	return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu);
}

static inline bool ttwu_queue_cond(int cpu)
+32 −9
Original line number Diff line number Diff line
@@ -6664,10 +6664,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
	}

	if (static_branch_unlikely(&sched_cluster_active)) {
		struct sched_domain *sdc = rcu_dereference(per_cpu(sd_cluster, target));
		struct sched_group *sg = sd->groups;

		if (sdc) {
			for_each_cpu_wrap(cpu, sched_domain_span(sdc), target) {
		if (sg->flags & SD_CLUSTER) {
			for_each_cpu_wrap(cpu, sched_group_span(sg), target) {
				if (!cpumask_test_cpu(cpu, cpus))
					continue;

@@ -6683,7 +6683,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
						return idle_cpu;
				}
			}
			cpumask_andnot(cpus, cpus, sched_domain_span(sdc));
			cpumask_andnot(cpus, cpus, sched_group_span(sg));
		}
	}

@@ -6778,7 +6778,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
{
	struct sched_domain *sd;
	unsigned long task_util;
	int i, recent_used_cpu;
	int i, recent_used_cpu, prev_aff = -1;

	/*
	 * On asymmetric system, update task utilization because we will check
@@ -6806,14 +6806,19 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
	/*
	 * If the previous CPU is cache affine and idle, don't be stupid:
	 */
	if (prev != target && cpus_share_lowest_cache(prev, target) &&
	if (prev != target && cpus_share_cache(prev, target) &&
	    (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	    cpumask_test_cpu(prev, p->select_cpus) &&
#endif
	    asym_fits_capacity(task_util, prev)) {
		SET_STAT(found_idle_cpu_easy);

		if (!static_branch_unlikely(&sched_cluster_active) ||
		    cpus_share_resources(prev, target))
			return prev;

		prev_aff = prev;
	}

	/*
@@ -6837,7 +6842,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
	recent_used_cpu = p->recent_used_cpu;
	if (recent_used_cpu != prev &&
	    recent_used_cpu != target &&
	    cpus_share_lowest_cache(recent_used_cpu, target) &&
	    cpus_share_cache(recent_used_cpu, target) &&
	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	    cpumask_test_cpu(p->recent_used_cpu, p->select_cpus) &&
@@ -6851,7 +6856,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		 */
		SET_STAT(found_idle_cpu_easy);
		p->recent_used_cpu = prev;

		if (!static_branch_unlikely(&sched_cluster_active) ||
		    cpus_share_resources(recent_used_cpu, target))
			return recent_used_cpu;

	} else {
		recent_used_cpu = -1;
	}

	/*
@@ -6888,6 +6899,18 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
	}

	SET_STAT(nofound_idle_cpu);

	/*
	 * For cluster machines which have lower sharing cache like L2 or
	 * LLC Tag, we tend to find an idle CPU in the target's cluster
	 * first. But prev_cpu or recent_used_cpu may also be a good candidate,
	 * use them if possible when no idle CPU found in select_idle_cpu().
	 */
	if ((unsigned int)prev_aff < nr_cpumask_bits)
		return prev_aff;
	if ((unsigned int)recent_used_cpu < nr_cpumask_bits)
		return recent_used_cpu;

	return target;
}

+2 −3
Original line number Diff line number Diff line
@@ -1844,9 +1844,8 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DECLARE_PER_CPU(int, sd_llc_size);
DECLARE_PER_CPU(int, sd_llc_id);
DECLARE_PER_CPU(int, sd_lowest_cache_id);
DECLARE_PER_CPU(int, sd_share_id);
DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
@@ -1880,7 +1879,7 @@ struct sched_group {
	struct sched_group_capacity *sgc;
	int			asym_prefer_cpu;	/* CPU of highest priority in group */

	KABI_RESERVE(1)
	KABI_USE(1, int flags)
	KABI_RESERVE(2)
	/*
	 * The CPUs this group covers.
+29 −10
Original line number Diff line number Diff line
@@ -647,8 +647,7 @@ static void destroy_sched_domains(struct sched_domain *sd)
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
DEFINE_PER_CPU(int, sd_llc_id);
DEFINE_PER_CPU(int, sd_lowest_cache_id);
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
DEFINE_PER_CPU(int, sd_share_id);
DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
@@ -689,14 +688,13 @@ static void update_top_cache_domain(int cpu)
	sd = lowest_flag_domain(cpu, SD_CLUSTER);
	if (sd)
		id = cpumask_first(sched_domain_span(sd));
	rcu_assign_pointer(per_cpu(sd_cluster, cpu), sd);

	/*
	 * This assignment should be placed after the sd_llc_id as
	 * we want this id equals to cluster id on cluster machines
	 * but equals to LLC id on non-Cluster machines.
	 */
	per_cpu(sd_lowest_cache_id, cpu) = id;
	per_cpu(sd_share_id, cpu) = id;

	sd = lowest_flag_domain(cpu, SD_NUMA);
	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
@@ -727,8 +725,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)

		if (sd_parent_degenerate(tmp, parent)) {
			tmp->parent = parent->parent;
			if (parent->parent)

			if (parent->parent) {
				parent->parent->child = tmp;
				parent->parent->groups->flags = tmp->flags;
			}

			/*
			 * Transfer SD_PREFER_SIBLING down in case of a
			 * degenerate parent; the spans match for this
@@ -745,9 +747,21 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
		tmp = sd;
		sd = sd->parent;
		destroy_sched_domain(tmp);
		if (sd)
		if (sd) {
			struct sched_group *sg = sd->groups;

			/*
			 * sched groups hold the flags of the child sched
			 * domain for convenience. Clear such flags since
			 * the child is being destroyed.
			 */
			do {
				sg->flags = 0;
			} while (sg != sd->groups);

			sd->child = NULL;
		}
	}

	for (tmp = sd; tmp; tmp = tmp->parent)
		numa_distance += !!(tmp->flags & SD_NUMA);
@@ -945,10 +959,12 @@ build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
		return NULL;

	sg_span = sched_group_span(sg);
	if (sd->child)
	if (sd->child) {
		cpumask_copy(sg_span, sched_domain_span(sd->child));
	else
		sg->flags = sd->child->flags;
	} else {
		cpumask_copy(sg_span, sched_domain_span(sd));
	}

	atomic_inc(&sg->ref);
	return sg;
@@ -1198,6 +1214,7 @@ static struct sched_group *get_group(int cpu, struct sd_data *sdd)
	if (child) {
		cpumask_copy(sched_group_span(sg), sched_domain_span(child));
		cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
		sg->flags = child->flags;
	} else {
		cpumask_set_cpu(cpu, sched_group_span(sg));
		cpumask_set_cpu(cpu, group_balance_mask(sg));
@@ -2366,7 +2383,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
			sd = build_sched_domain(tl, cpu_map, attr, sd, i);

			has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
			has_cluster |= sd->flags & SD_CLUSTER;

			if (tl == sched_domain_topology)
				*per_cpu_ptr(d.sd, i) = sd;
@@ -2474,6 +2490,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
			WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);

		cpu_attach_domain(sd, d.rd, i);

		if (lowest_flag_domain(i, SD_CLUSTER))
			has_cluster = true;
	}
	rcu_read_unlock();

@@ -2583,7 +2602,7 @@ static void detach_destroy_domains(const struct cpumask *cpu_map)
	if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
		static_branch_dec_cpuslocked(&sched_asym_cpucapacity);

	if (rcu_access_pointer(per_cpu(sd_cluster, cpu)))
	if (static_branch_unlikely(&sched_cluster_active))
		static_branch_dec_cpuslocked(&sched_cluster_active);

	rcu_read_lock();