sched/fair: Scan cluster before scanning LLC in wake-up path (0c3a4f98) · Commits · EulixOS / Software / Kernel

kernel/sched/fair.c

+27 −3

Original line number	Diff line number	Diff line
		@@ -6370,6 +6370,30 @@ static int select_idle_cpu(struct task_struct p, struct sched_domain sd, int t
		}
		}

		if (static_branch_unlikely(&sched_cluster_active)) {
		struct sched_domain *sdc = rcu_dereference(per_cpu(sd_cluster, target));

		if (sdc) {
		for_each_cpu_wrap(cpu, sched_domain_span(sdc), target) {
		if (!cpumask_test_cpu(cpu, cpus))
		continue;

		if (smt) {
		i = select_idle_core(p, cpu, cpus, &idle_cpu);
		if ((unsigned int)i < nr_cpumask_bits)
		return i;
		} else {
		if (--nr <= 0)
		return -1;
		idle_cpu = __select_idle_cpu(cpu, p);
		if ((unsigned int)idle_cpu < nr_cpumask_bits)
		return idle_cpu;
		}
		}
		cpumask_andnot(cpus, cpus, sched_domain_span(sdc));
		}
		}

		for_each_cpu_wrap(cpu, cpus, target) {
		if (smt) {
		i = select_idle_core(p, cpu, cpus, &idle_cpu);
		@@ -6377,7 +6401,7 @@ static int select_idle_cpu(struct task_struct p, struct sched_domain sd, int t
		return i;

		} else {
		if (!--nr)
		if (--nr <= 0)
		return -1;
		idle_cpu = __select_idle_cpu(cpu, p);
		if ((unsigned int)idle_cpu < nr_cpumask_bits)
		@@ -6487,7 +6511,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		/*
		* If the previous CPU is cache affine and idle, don't be stupid:
		*/
		if (prev != target && cpus_share_cache(prev, target) &&
		if (prev != target && cpus_share_lowest_cache(prev, target) &&
		(available_idle_cpu(prev) \|\| sched_idle_cpu(prev)) &&
		#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		cpumask_test_cpu(prev, p->select_cpus) &&
		@@ -6518,7 +6542,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
		recent_used_cpu = p->recent_used_cpu;
		if (recent_used_cpu != prev &&
		recent_used_cpu != target &&
		cpus_share_cache(recent_used_cpu, target) &&
		cpus_share_lowest_cache(recent_used_cpu, target) &&
		(available_idle_cpu(recent_used_cpu) \|\| sched_idle_cpu(recent_used_cpu)) &&
		#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		cpumask_test_cpu(p->recent_used_cpu, p->select_cpus) &&

kernel/sched/sched.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -1808,6 +1808,7 @@ DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
		DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
		extern struct static_key_false sched_asym_cpucapacity;
		extern struct static_key_false sched_cluster_active;

		struct sched_group_capacity {
		atomic_t ref;

kernel/sched/topology.c

+11 −0

Original line number	Diff line number	Diff line
		@@ -653,7 +653,9 @@ DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
		DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);

		DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
		DEFINE_STATIC_KEY_FALSE(sched_cluster_active);

		static void update_top_cache_domain(int cpu)
		{
		@@ -2200,6 +2202,7 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
		int i, ret = -ENOMEM;
		struct sched_domain_topology_level *tl_asym;
		bool has_asym = false;
		bool has_cluster = false;

		if (WARN_ON(cpumask_empty(cpu_map)))
		goto error;
		@@ -2227,6 +2230,8 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att

		sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);

		has_cluster \|= sd->flags & SD_CLUSTER;

		if (tl == sched_domain_topology)
		*per_cpu_ptr(d.sd, i) = sd;
		if (tl->flags & SDTL_OVERLAP)
		@@ -2286,6 +2291,9 @@ build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr att
		if (has_asym)
		static_branch_inc_cpuslocked(&sched_asym_cpucapacity);

		if (has_cluster)
		static_branch_inc_cpuslocked(&sched_cluster_active);

		if (rq && sched_debug_enabled) {
		pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
		cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
		@@ -2385,6 +2393,9 @@ static void detach_destroy_domains(const struct cpumask *cpu_map)
		if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
		static_branch_dec_cpuslocked(&sched_asym_cpucapacity);

		if (rcu_access_pointer(per_cpu(sd_cluster, cpu)))
		static_branch_dec_cpuslocked(&sched_cluster_active);

		rcu_read_lock();
		for_each_cpu(i, cpu_map)
		cpu_attach_domain(NULL, &def_root_domain, i);