Commit fbd5102b authored by Cheng Jian's avatar Cheng Jian
Browse files

sched/fair: introduce SCHED_STEAL

hulk inclusion
category: feature
bugzilla: 38261, https://bugzilla.openeuler.org/show_bug.cgi?id=23


CVE: NA

---------------------------

Introduce CONFIG_SCHED_STEAL to limit the impact of steal task.

1). If turn off CONFIG_SCHED_STEAL, then all the changes will not
exist, for we use some empty functions, so this depends on compiler
optimization.

2). enable CONFIG_SCHED_STEAL, but disable STEAL and schedstats, it
will introduce some impact whith schedstat check. but this has little
effect on performance. This will be our default choice.

Signed-off-by: default avatarCheng Jian <cj.chengjian@huawei.com>
Reviewed-by: default avatarHanjun Guo <guohanjun@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
Reviewed-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
parent da163c22
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -72,7 +72,9 @@ struct sched_domain_shared {
	atomic_t	ref;
	atomic_t	nr_busy_cpus;
	int		has_idle_cores;
#ifdef CONFIG_SCHED_STEAL
	struct sparsemask *cfs_overload_cpus;
#endif
};

struct sched_domain {
+15 −0
Original line number Diff line number Diff line
@@ -996,6 +996,21 @@ config NET_NS

endif # NAMESPACES

config SCHED_STEAL
	bool "Steal tasks to improve CPU utilization"
	depends on SMP
	default n
	help
	  When a CPU has no more CFS tasks to run, and idle_balance() fails
	  to find a task, then attempt to steal a task from an overloaded
	  CPU in the same LLC. Maintain and use a bitmap of overloaded CPUs
	  to efficiently identify candidates.  To minimize search time, steal
	  the first migratable task that is found when the bitmap is traversed.
	  For fairness, search for migratable tasks on an overloaded CPU in
	  order of next to run.

	  If unsure, say N here.

config CHECKPOINT_RESTORE
	bool "Checkpoint/restore support"
	select PROC_CHILDREN
+4 −0
Original line number Diff line number Diff line
@@ -2240,6 +2240,7 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
DEFINE_STATIC_KEY_FALSE(sched_schedstats);
static bool __initdata __sched_schedstats = false;

#ifdef CONFIG_SCHED_STEAL
unsigned long schedstat_skid;

static void compute_skid(void)
@@ -2263,6 +2264,9 @@ static void compute_skid(void)
		schedstat_skid = 0;
	pr_info("schedstat_skid = %lu\n", schedstat_skid);
}
#else
static inline void compute_skid(void) {}
#endif

static void set_schedstats(bool enabled)
{
+25 −6
Original line number Diff line number Diff line
@@ -21,7 +21,9 @@
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
 */
#include "sched.h"
#ifdef CONFIG_SCHED_STEAL
#include "sparsemask.h"
#endif

#include <trace/events/sched.h>

@@ -3821,6 +3823,8 @@ static inline void rq_idle_stamp_clear(struct rq *rq)
	rq->idle_stamp = 0;
}

#ifdef CONFIG_SCHED_STEAL

static inline bool steal_enabled(void)
{
#ifdef CONFIG_NUMA
@@ -3845,7 +3849,7 @@ static void overload_clear(struct rq *rq)
	if (overload_cpus)
		sparsemask_clear_elem(overload_cpus, rq->cpu);
	rcu_read_unlock();
	schedstat_end_time(rq->find_time, time);
	schedstat_end_time(rq, time);
}

static void overload_set(struct rq *rq)
@@ -3862,10 +3866,15 @@ static void overload_set(struct rq *rq)
	if (overload_cpus)
		sparsemask_set_elem(overload_cpus, rq->cpu);
	rcu_read_unlock();
	schedstat_end_time(rq->find_time, time);
	schedstat_end_time(rq, time);
}

static int try_steal(struct rq *this_rq, struct rq_flags *rf);
#else
static inline int try_steal(struct rq *this_rq, struct rq_flags *rf) { return 0; }
static inline void overload_clear(struct rq *rq) {}
static inline void overload_set(struct rq *rq) {}
#endif

#else /* CONFIG_SMP */

@@ -6306,6 +6315,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
	return cpu;
}

#ifdef CONFIG_SCHED_STEAL
#define SET_STAT(STAT)							\
	do {								\
		if (schedstat_enabled()) {				\
@@ -6315,6 +6325,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
				__schedstat_inc(rq->STAT);		\
		}							\
	} while (0)
#else
#define SET_STAT(STAT)
#endif

/*
 * Try and locate an idle core/thread in the LLC cache domain.
@@ -6563,13 +6576,15 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
static int
select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
{
	unsigned long time = schedstat_start_time();
	unsigned long time;
	struct sched_domain *tmp, *sd = NULL;
	int cpu = smp_processor_id();
	int new_cpu = prev_cpu;
	int want_affine = 0;
	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);

	time = schedstat_start_time();

	if (sd_flag & SD_BALANCE_WAKE) {
		record_wakee(p);
		want_affine = !wake_wide(p) && !wake_cap(p, cpu, prev_cpu)
@@ -6612,7 +6627,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
			current->recent_used_cpu = cpu;
	}
	rcu_read_unlock();
	schedstat_end_time(cpu_rq(cpu)->find_time, time);
	schedstat_end_time(cpu_rq(cpu), time);

	return new_cpu;
}
@@ -6980,14 +6995,14 @@ done: __maybe_unused;
	rq_idle_stamp_update(rq);

	new_tasks = idle_balance(rq, rf);

	if (new_tasks == 0)
		new_tasks = try_steal(rq, rf);
	schedstat_end_time(rq, time);

	if (new_tasks)
		rq_idle_stamp_clear(rq);

	schedstat_end_time(rq->find_time, time);

	/*
	 * Because try_steal() and idle_balance() release (and re-acquire)
	 * rq->lock, it is possible for any higher priority task to appear.
@@ -7398,6 +7413,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
	return 0;
}

#ifdef CONFIG_SCHED_STEAL
/*
 * Return true if task @p can migrate from @rq to @dst_rq in the same LLC.
 * No need to test for co-locality, and no need to test task_hot(), as sharing
@@ -7425,6 +7441,7 @@ can_migrate_task_llc(struct task_struct *p, struct rq *rq, struct rq *dst_rq)

	return true;
}
#endif

/*
 * detach_task() -- detach the task for the migration from @src_rq to @dst_cpu.
@@ -9937,6 +9954,7 @@ void trigger_load_balance(struct rq *rq)
	nohz_balancer_kick(rq);
}

#ifdef CONFIG_SCHED_STEAL
/*
 * Search the runnable tasks in @cfs_rq in order of next to run, and find
 * the first one that can be migrated to @dst_rq.  @cfs_rq is locked on entry.
@@ -10085,6 +10103,7 @@ static int try_steal(struct rq *dst_rq, struct rq_flags *dst_rf)
		schedstat_inc(dst_rq->steal_fail);
	return stolen;
}
#endif

static void rq_online_fair(struct rq *rq)
{
+2 −0
Original line number Diff line number Diff line
@@ -58,11 +58,13 @@ SCHED_FEAT(TTWU_QUEUE, true)
SCHED_FEAT(SIS_AVG_CPU, false)
SCHED_FEAT(SIS_PROP, true)

#ifdef CONFIG_SCHED_STEAL
/*
 * Steal a CFS task from another CPU when going idle.
 * Improves CPU utilization.
 */
SCHED_FEAT(STEAL, false)
#endif

/*
 * Issue a WARN when we do multiple update_rq_clock() calls
Loading