!2774 [OLK-5.10] sched/fair: Scan cluster before scanning LLC in wake-up path (8edbf4ae) · Commits · EulixOS / Software / Kernel

include/linux/sched/topology.h

+2 −2

Original line number	Diff line number	Diff line
		@@ -189,7 +189,7 @@ cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
		void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);

		bool cpus_share_cache(int this_cpu, int that_cpu);
		bool cpus_share_lowest_cache(int this_cpu, int that_cpu);
		bool cpus_share_resources(int this_cpu, int that_cpu);

		typedef const struct cpumask (sched_domain_mask_f)(int cpu);
		typedef int (*sched_domain_flags_f)(void);
		@@ -244,7 +244,7 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
		return true;
		}

		static inline bool cpus_share_lowest_cache(int this_cpu, int that_cpu)
		static inline bool cpus_share_resources(int this_cpu, int that_cpu)
		{
		return true;
		}

kernel/sched/core.c

+3 −3

Original line number	Diff line number	Diff line
		@@ -3022,15 +3022,15 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
		}

		/*
		* Whether CPUs are share lowest cache, which means LLC on non-cluster
		* Whether CPUs are share cache resources, which means LLC on non-cluster
		* machines and LLC tag or L2 on machines with clusters.
		*/
		bool cpus_share_lowest_cache(int this_cpu, int that_cpu)
		bool cpus_share_resources(int this_cpu, int that_cpu)
		{
		if (this_cpu == that_cpu)
		return true;

		return per_cpu(sd_lowest_cache_id, this_cpu) == per_cpu(sd_lowest_cache_id, that_cpu);
		return per_cpu(sd_share_id, this_cpu) == per_cpu(sd_share_id, that_cpu);
		}

		static inline bool ttwu_queue_cond(int cpu)

kernel/sched/fair.c

+32 −9

Original line number	Diff line number	Diff line
		@@ -6664,10 +6664,10 @@ static int select_idle_cpu(struct task_struct p, struct sched_domain sd, int t
		}

		if (static_branch_unlikely(&sched_cluster_active)) {
		struct sched_domain *sdc = rcu_dereference(per_cpu(sd_cluster, target));
		struct sched_group *sg = sd->groups;

		if (sdc) {
		for_each_cpu_wrap(cpu, sched_domain_span(sdc), target) {
		if (sg->flags & SD_CLUSTER) {
		for_each_cpu_wrap(cpu, sched_group_span(sg), target) {
		if (!cpumask_test_cpu(cpu, cpus))
		continue;

		@@ -6683,7 +6683,7 @@ static int select_idle_cpu(struct task_struct p, struct sched_domain sd, int t
		return idle_cpu;
		}
		}
		cpumask_andnot(cpus, cpus, sched_domain_span(sdc));
		cpumask_andnot(cpus, cpus, sched_group_span(sg));
		}
		}

		@@ -6778,7 +6778,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		{
		struct sched_domain *sd;
		unsigned long task_util;
		int i, recent_used_cpu;
		int i, recent_used_cpu, prev_aff = -1;

		/*
		* On asymmetric system, update task utilization because we will check
		@@ -6806,14 +6806,19 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		/*
		* If the previous CPU is cache affine and idle, don't be stupid:
		*/
		if (prev != target && cpus_share_lowest_cache(prev, target) &&
		if (prev != target && cpus_share_cache(prev, target) &&
		(available_idle_cpu(prev) \|\| sched_idle_cpu(prev)) &&
		#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		cpumask_test_cpu(prev, p->select_cpus) &&
		#endif
		asym_fits_capacity(task_util, prev)) {
		SET_STAT(found_idle_cpu_easy);

		if (!static_branch_unlikely(&sched_cluster_active) \|\|
		cpus_share_resources(prev, target))
		return prev;

		prev_aff = prev;
		}

		/*
		@@ -6837,7 +6842,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		recent_used_cpu = p->recent_used_cpu;
		if (recent_used_cpu != prev &&
		recent_used_cpu != target &&
		cpus_share_lowest_cache(recent_used_cpu, target) &&
		cpus_share_cache(recent_used_cpu, target) &&
		(available_idle_cpu(recent_used_cpu) \|\| sched_idle_cpu(recent_used_cpu)) &&
		#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		cpumask_test_cpu(p->recent_used_cpu, p->select_cpus) &&
		@@ -6851,7 +6856,13 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		*/
		SET_STAT(found_idle_cpu_easy);
		p->recent_used_cpu = prev;

		if (!static_branch_unlikely(&sched_cluster_active) \|\|
		cpus_share_resources(recent_used_cpu, target))
		return recent_used_cpu;

		} else {
		recent_used_cpu = -1;
		}

		/*
		@@ -6888,6 +6899,18 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		}

		SET_STAT(nofound_idle_cpu);

		/*
		* For cluster machines which have lower sharing cache like L2 or
		* LLC Tag, we tend to find an idle CPU in the target's cluster
		* first. But prev_cpu or recent_used_cpu may also be a good candidate,
		* use them if possible when no idle CPU found in select_idle_cpu().
		*/
		if ((unsigned int)prev_aff < nr_cpumask_bits)
		return prev_aff;
		if ((unsigned int)recent_used_cpu < nr_cpumask_bits)
		return recent_used_cpu;

		return target;
		}

kernel/sched/sched.h

+2 −3

Original line number	Diff line number	Diff line
		@@ -1844,9 +1844,8 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
		DECLARE_PER_CPU(int, sd_llc_size);
		DECLARE_PER_CPU(int, sd_llc_id);
		DECLARE_PER_CPU(int, sd_lowest_cache_id);
		DECLARE_PER_CPU(int, sd_share_id);
		DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
		@@ -1880,7 +1879,7 @@ struct sched_group {
		struct sched_group_capacity *sgc;
		int asym_prefer_cpu; /* CPU of highest priority in group */

		KABI_RESERVE(1)
		KABI_USE(1, int flags)
		KABI_RESERVE(2)
		/*
		* The CPUs this group covers.

kernel/sched/topology.c

+29 −10

Original line number	Diff line number	Diff line
		@@ -647,8 +647,7 @@ static void destroy_sched_domains(struct sched_domain *sd)
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
		DEFINE_PER_CPU(int, sd_llc_size);
		DEFINE_PER_CPU(int, sd_llc_id);
		DEFINE_PER_CPU(int, sd_lowest_cache_id);
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
		DEFINE_PER_CPU(int, sd_share_id);
		DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
		@@ -689,14 +688,13 @@ static void update_top_cache_domain(int cpu)
		sd = lowest_flag_domain(cpu, SD_CLUSTER);
		if (sd)
		id = cpumask_first(sched_domain_span(sd));
		rcu_assign_pointer(per_cpu(sd_cluster, cpu), sd);

		/*
		* This assignment should be placed after the sd_llc_id as
		* we want this id equals to cluster id on cluster machines
		* but equals to LLC id on non-Cluster machines.
		*/
		per_cpu(sd_lowest_cache_id, cpu) = id;
		per_cpu(sd_share_id, cpu) = id;

		sd = lowest_flag_domain(cpu, SD_NUMA);
		rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
		@@ -727,8 +725,12 @@ cpu_attach_domain(struct sched_domain sd, struct root_domain rd, int cpu)

		if (sd_parent_degenerate(tmp, parent)) {
		tmp->parent = parent->parent;
		if (parent->parent)

		if (parent->parent) {
		parent->parent->child = tmp;
		parent->parent->groups->flags = tmp->flags;
		}

		/*
		* Transfer SD_PREFER_SIBLING down in case of a
		* degenerate parent; the spans match for this
		@@ -745,9 +747,21 @@ cpu_attach_domain(struct sched_domain sd, struct root_domain rd, int cpu)
		tmp = sd;
		sd = sd->parent;
		destroy_sched_domain(tmp);
		if (sd)
		if (sd) {
		struct sched_group *sg = sd->groups;

		/*
		* sched groups hold the flags of the child sched
		* domain for convenience. Clear such flags since
		* the child is being destroyed.
		*/
		do {
		sg->flags = 0;
		} while (sg != sd->groups);

		sd->child = NULL;
		}
		}

		for (tmp = sd; tmp; tmp = tmp->parent)
		numa_distance += !!(tmp->flags & SD_NUMA);
		@@ -945,10 +959,12 @@ build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
		return NULL;

		sg_span = sched_group_span(sg);
		if (sd->child)
		if (sd->child) {
		cpumask_copy(sg_span, sched_domain_span(sd->child));
		else
		sg->flags = sd->child->flags;
		} else {
		cpumask_copy(sg_span, sched_domain_span(sd));
		}

		atomic_inc(&sg->ref);
		return sg;
		@@ -1198,6 +1214,7 @@ static struct sched_group get_group(int cpu, struct sd_data sdd)
		if (child) {
		cpumask_copy(sched_group_span(sg), sched_domain_span(child));
		cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
		sg->flags = child->flags;
		} else {
		cpumask_set_cpu(cpu, sched_group_span(sg));
		cpumask_set_cpu(cpu, group_balance_mask(sg));
		@@ -2366,7 +2383,6 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
		sd = build_sched_domain(tl, cpu_map, attr, sd, i);

		has_asym \|= sd->flags & SD_ASYM_CPUCAPACITY;
		has_cluster \|= sd->flags & SD_CLUSTER;

		if (tl == sched_domain_topology)
		*per_cpu_ptr(d.sd, i) = sd;
		@@ -2474,6 +2490,9 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
		WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);

		cpu_attach_domain(sd, d.rd, i);

		if (lowest_flag_domain(i, SD_CLUSTER))
		has_cluster = true;
		}
		rcu_read_unlock();

		@@ -2583,7 +2602,7 @@ static void detach_destroy_domains(const struct cpumask *cpu_map)
		if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
		static_branch_dec_cpuslocked(&sched_asym_cpucapacity);

		if (rcu_access_pointer(per_cpu(sd_cluster, cpu)))
		if (static_branch_unlikely(&sched_cluster_active))
		static_branch_dec_cpuslocked(&sched_cluster_active);

		rcu_read_lock();