Unverified Commit 084a6771 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!12006 v3 add steal_task for cgroup

Merge Pull Request from: @ci-robot 
 
PR sync from: Cheng Yu <serein.chengyu@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/NLB7KTWAVNUSO7Q6CZ27KMAVYOOUBBA2/ 
add steal_task for cgroup

Cheng Yu (6):
  Revert "sched: add mutex lock to protect qos_level"
  sched/fair: Add group_steal in cmdline to enable STEAL for cgroup
  sched/core: Add cpu.steal_task in cgroup v1 cpu subsystem
  sched/topology: Remove SCHED_STEAL_NODE_LIMIT_DEFAULT
  sched/fair: Count the number of tasks marked as steal_task on cfs_rq
  sched/fair: Set the maximum number of steal attempts

Zheng Zucheng (2):
  sched/debug: Add h_nr_running/steal_h_nr_running in sched_debug
  sched/core: Add mutex lock to protect steal_task


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/IAQWPQ
https://gitee.com/openeuler/kernel/issues/IAS45L 
 
Link:https://gitee.com/openeuler/kernel/pulls/12006

 

Reviewed-by: default avatarzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
parents 677c06d9 9c8a6883
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -522,7 +522,11 @@ struct sched_entity {
#else
	KABI_RESERVE(1)
#endif
#ifdef CONFIG_SCHED_STEAL
	KABI_USE(2, int steal_task)
#else
	KABI_RESERVE(2)
#endif
	KABI_RESERVE(3)
	KABI_RESERVE(4)
};
+4 −0
Original line number Diff line number Diff line
@@ -39,6 +39,10 @@ extern int sysctl_sched_util_low_pct;
extern int sysctl_sched_util_ratio;
#endif

#ifdef CONFIG_SCHED_STEAL
extern int sysctl_sched_max_steal_count;
#endif

#ifdef CONFIG_QOS_SCHED_SMART_GRID
extern unsigned int sysctl_smart_grid_strategy_ctrl;
extern int sysctl_affinity_adjust_delay_ms;
+114 −9
Original line number Diff line number Diff line
@@ -8275,6 +8275,9 @@ void __init sched_init(void)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		root_task_group.smt_expell = TG_SMT_EXPELL;
#endif
#ifdef CONFIG_SCHED_STEAL
		root_task_group.steal_task = TG_STEAL_NO;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
		root_task_group.rt_se = (struct sched_rt_entity **)ptr;
		ptr += nr_cpu_ids * sizeof(void **);
@@ -8636,13 +8639,6 @@ static inline int alloc_qos_sched_group(struct task_group *tg,
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	tg->smt_expell = parent->smt_expell;
#endif
	tg->qos_level_mutex = kzalloc(sizeof(struct mutex), GFP_KERNEL);

	if (!tg->qos_level_mutex)
		return 0;

	mutex_init(tg->qos_level_mutex);

	return 1;
}

@@ -8719,6 +8715,20 @@ static void sched_free_group(struct task_group *tg)
	kmem_cache_free(task_group_cache, tg);
}

#ifdef CONFIG_SCHED_STEAL
static void sched_change_steal_group(struct task_struct *tsk, struct task_group *tg)
{
	struct sched_entity *se = &tsk->se;

	se->steal_task = tg->steal_task;
}

static inline void tg_init_steal(struct task_group *tg, struct task_group *ptg)
{
	tg->steal_task = ptg->steal_task;
}
#endif

#ifdef CONFIG_BPF_SCHED
static inline void tg_init_tag(struct task_group *tg, struct task_group *ptg)
{
@@ -8746,6 +8756,10 @@ struct task_group *sched_create_group(struct task_group *parent)
	if (!alloc_rt_sched_group(tg, parent))
		goto err;

#ifdef CONFIG_SCHED_STEAL
	tg_init_steal(tg, parent);
#endif

#ifdef CONFIG_BPF_SCHED
	tg_init_tag(tg, parent);
#endif
@@ -8821,6 +8835,10 @@ static void sched_change_group(struct task_struct *tsk, int type)
	sched_change_qos_group(tsk, tg);
#endif

#ifdef CONFIG_SCHED_STEAL
	sched_change_steal_group(tsk, tg);
#endif

#ifdef CONFIG_BPF_SCHED
	/*
	 * This function has cleared and restored the task status,
@@ -9727,7 +9745,6 @@ static int tg_change_scheduler(struct task_group *tg, void *data)
	s64 qos_level = *(s64 *)data;
	struct cgroup_subsys_state *css = &tg->css;

	mutex_lock(tg->qos_level_mutex);
	tg->qos_level = qos_level;
	if (is_offline_level(qos_level))
		policy = SCHED_IDLE;
@@ -9745,7 +9762,6 @@ static int tg_change_scheduler(struct task_group *tg, void *data)
		sched_setscheduler(tsk, policy, &param);
	}
	css_task_iter_end(&it);
	mutex_unlock(tg->qos_level_mutex);

	return 0;
}
@@ -9796,6 +9812,87 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css,
}
#endif

#ifdef CONFIG_SCHED_STEAL
static DEFINE_MUTEX(steal_mutex);

static inline s64 cpu_steal_task_read(struct cgroup_subsys_state *css,
				      struct cftype *cft)
{
	return css_tg(css)->steal_task;
}

void sched_setsteal(struct task_struct *tsk, s64 steal_task)
{
	struct sched_entity *se = &tsk->se;
	int queued, running, queue_flags =
			DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
	struct rq_flags rf;
	struct rq *rq;

	if (se->steal_task == steal_task)
		return;

	rq = task_rq_lock(tsk, &rf);

	running = task_current(rq, tsk);
	queued = task_on_rq_queued(tsk);

	update_rq_clock(rq);
	if (queued)
		dequeue_task(rq, tsk, queue_flags);
	if (running)
		put_prev_task(rq, tsk);

	se->steal_task = steal_task;

	if (queued)
		enqueue_task(rq, tsk, queue_flags);
	if (running)
		set_next_task(rq, tsk);

	task_rq_unlock(rq, tsk, &rf);
}

int tg_change_steal(struct task_group *tg, void *data)
{
	struct css_task_iter it;
	struct task_struct *tsk;
	s64 steal_task = *(s64 *)data;
	struct cgroup_subsys_state *css = &tg->css;

	tg->steal_task = steal_task;

	css_task_iter_start(css, 0, &it);
	while ((tsk = css_task_iter_next(&it)))
		sched_setsteal(tsk, steal_task);
	css_task_iter_end(&it);

	return 0;
}

static int cpu_steal_task_write(struct cgroup_subsys_state *css,
				struct cftype *cftype, s64 steal_task)
{
	struct task_group *tg = css_tg(css);

	if (!group_steal_used())
		return -EPERM;

	if (steal_task < TG_STEAL_NO || steal_task > TG_STEAL)
		return -EINVAL;

	mutex_lock(&steal_mutex);

	rcu_read_lock();
	walk_tg_tree_from(tg, tg_change_steal, tg_nop, (void *)(&steal_task));
	rcu_read_unlock();

	mutex_unlock(&steal_mutex);

	return 0;
}
#endif

#ifdef CONFIG_BPF_SCHED
void sched_settag(struct task_struct *tsk, s64 tag)
{
@@ -9966,6 +10063,14 @@ static struct cftype cpu_legacy_files[] = {
		.write_s64 = cpu_smt_expell_write,
	},
#endif
#ifdef CONFIG_SCHED_STEAL
	{
		.name = "steal_task",
		.flags = CFTYPE_NOT_ON_ROOT,
		.read_s64 = cpu_steal_task_read,
		.write_s64 = cpu_steal_task_write,
	},
#endif
#ifdef CONFIG_BPF_SCHED
	{
		.name = "tag",
+5 −0
Original line number Diff line number Diff line
@@ -594,6 +594,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
			cfs_rq->nr_spread_over);
	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
#ifdef CONFIG_SCHED_STEAL
	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
	SEQ_printf(m, "  .%-30s: %ld\n", "steal_h_nr_running",
			cfs_rq->steal_h_nr_running);
#endif
	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_SMP
	SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
+134 −26
Original line number Diff line number Diff line
@@ -4459,6 +4459,14 @@ static inline void rq_idle_stamp_clear(struct rq *rq)
}

#ifdef CONFIG_SCHED_STEAL
DEFINE_STATIC_KEY_FALSE(group_steal);

static int __init group_steal_setup(char *__unused)
{
	static_branch_enable(&group_steal);
	return 1;
}
__setup("group_steal", group_steal_setup);

static inline bool steal_enabled(void)
{
@@ -4470,14 +4478,30 @@ static inline bool steal_enabled(void)
	return sched_feat(STEAL) && allow;
}

static inline bool group_steal_enabled(int steal_task)
{
	return group_steal_used() && is_tg_steal(steal_task);
}

static void overload_clear(struct rq *rq)
{
	struct sparsemask *overload_cpus;
	unsigned long time;
	bool need_clear = false;

	if (!steal_enabled())
		return;

	if (!group_steal_used() && rq->cfs.h_nr_running >= 2)
		return;

	if (group_steal_used() &&
	    (rq->cfs.h_nr_running < 2 || rq->cfs.steal_h_nr_running == 0))
		need_clear = true;

	if (!need_clear)
		return;

	time = schedstat_start_time();
	rcu_read_lock();
	overload_cpus = rcu_dereference(rq->cfs_overload_cpus);
@@ -4495,6 +4519,12 @@ static void overload_set(struct rq *rq)
	if (!steal_enabled())
		return;

	if (rq->cfs.h_nr_running < 2)
		return;

	if (group_steal_used() && rq->cfs.steal_h_nr_running < 1)
		return;

	time = schedstat_start_time();
	rcu_read_lock();
	overload_cpus = rcu_dereference(rq->cfs_overload_cpus);
@@ -5278,13 +5308,15 @@ static int tg_throttle_down(struct task_group *tg, void *data)
static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	unsigned int prev_nr = rq->cfs.h_nr_running;
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	struct sched_entity *se;
	long task_delta, idle_task_delta, dequeue = 1;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif

	raw_spin_lock(&cfs_b->lock);
	/* This will start the period timer if necessary */
@@ -5319,6 +5351,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
@@ -5338,6 +5373,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		qcfs_rq->steal_h_nr_running -= steal_delta;
#endif

		if (qcfs_rq->load.weight)
			dequeue = 0;
@@ -5345,8 +5383,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)

	if (!se) {
		sub_nr_running(rq, task_delta);
		if (prev_nr >= 2 && prev_nr - task_delta < 2)
#ifdef CONFIG_SCHED_STEAL
		overload_clear(rq);
#endif
	}

	/*
@@ -5361,13 +5400,15 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	unsigned int prev_nr = rq->cfs.h_nr_running;
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	struct sched_entity *se;
	long task_delta, idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif

	se = cfs_rq->tg->se[cpu_of(rq)];

@@ -5399,6 +5440,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		if (se->on_rq)
			break;
@@ -5410,6 +5455,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -5427,6 +5475,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -5442,8 +5493,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

	/* At this point se is NULL and we are at root level*/
	add_nr_running(rq, task_delta);
	if (prev_nr < 2 && prev_nr + task_delta >= 2)
#ifdef CONFIG_SCHED_STEAL
	overload_set(rq);
#endif

unthrottle_throttle:
	/*
@@ -6576,8 +6628,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
	int idle_h_nr_running = task_has_idle_policy(p);

	int task_new = !(flags & ENQUEUE_WAKEUP);
	unsigned int prev_nr = rq->cfs.h_nr_running;

#ifdef CONFIG_SCHED_STEAL
	bool tg_steal_enabled = group_steal_enabled(se->steal_task);
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	int qos_idle_h_nr_running;

@@ -6612,6 +6665,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running++;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6632,6 +6689,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running++;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6647,8 +6708,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

	/* At this point se is NULL and we are at root level*/
	add_nr_running(rq, 1);
	if (prev_nr == 1)
#ifdef CONFIG_SCHED_STEAL
	overload_set(rq);
#endif

	/*
	 * Since new tasks are assigned an initial util_avg equal to
@@ -6707,9 +6769,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
	int task_sleep = flags & DEQUEUE_SLEEP;
	int idle_h_nr_running = task_has_idle_policy(p);

	unsigned int prev_nr = rq->cfs.h_nr_running;
	bool was_sched_idle = sched_idle_rq(rq);

#ifdef CONFIG_SCHED_STEAL
	bool tg_steal_enabled = group_steal_enabled(se->steal_task);
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	int qos_idle_h_nr_running = se->qos_idle ? 1 : 0;

@@ -6727,6 +6790,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running--;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6759,6 +6826,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running--;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6768,8 +6839,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)

	/* At this point se is NULL and we are at root level*/
	sub_nr_running(rq, 1);
	if (prev_nr == 2)
#ifdef CONFIG_SCHED_STEAL
	overload_clear(rq);
#endif

	/* balance early to pull high priority tasks */
	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
@@ -8543,10 +8615,12 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct sched_entity *se;
	unsigned int prev_nr = cfs_rq->h_nr_running;
	long task_delta, idle_task_delta, dequeue = 1;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif
	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];

@@ -8560,6 +8634,10 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
		/* throttled entity or throttle-on-deactivate */
@@ -8578,6 +8656,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		qcfs_rq->steal_h_nr_running -= steal_delta;
#endif

		if (qcfs_rq->load.weight)
			dequeue = 0;
@@ -8585,9 +8666,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)

	if (!se) {
		sub_nr_running(rq, task_delta);
		if (prev_nr >= 2 && prev_nr - task_delta < 2)
#ifdef CONFIG_SCHED_STEAL
		overload_clear(rq);

#endif
	}

	if (!qos_timer_is_activated(cpu_of(rq)))
@@ -8603,11 +8684,13 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct sched_entity *se;
	unsigned int prev_nr = cfs_rq->h_nr_running;
	long task_delta, idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif

	se = cfs_rq->tg->se[cpu_of(rq)];

@@ -8632,6 +8715,10 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		if (se->on_rq)
			break;
@@ -8644,6 +8731,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		if (cfs_rq_throttled(cfs_rq))
			goto unthrottle_throttle;
@@ -8660,6 +8750,10 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
			goto unthrottle_throttle;
@@ -8673,8 +8767,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
	}

	add_nr_running(rq, task_delta);
	if (prev_nr < 2 && prev_nr + task_delta >= 2)
#ifdef CONFIG_SCHED_STEAL
	overload_set(rq);
#endif

unthrottle_throttle:
	/*
@@ -9842,10 +9937,14 @@ static bool
can_migrate_task_llc(struct task_struct *p, struct rq *rq, struct rq *dst_rq)
{
	int dst_cpu = dst_rq->cpu;
	struct task_group *tg = task_group(p);

	lockdep_assert_rq_held(rq);

	if (throttled_lb_pair(task_group(p), cpu_of(rq), dst_cpu))
	if (group_steal_used() && !is_tg_steal(tg->steal_task))
		return false;

	if (throttled_lb_pair(tg, cpu_of(rq), dst_cpu))
		return false;

	if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) {
@@ -13084,6 +13183,7 @@ void trigger_load_balance(struct rq *rq)
}

#ifdef CONFIG_SCHED_STEAL
int sysctl_sched_max_steal_count = 32;
/*
 * Search the runnable tasks in @cfs_rq in order of next to run, and find
 * the first one that can be migrated to @dst_rq.  @cfs_rq is locked on entry.
@@ -13095,14 +13195,20 @@ detach_next_task(struct cfs_rq *cfs_rq, struct rq *dst_rq)
	int dst_cpu = dst_rq->cpu;
	struct task_struct *p;
	struct rq *rq = rq_of(cfs_rq);
	int count = 1;

	lockdep_assert_rq_held(rq_of(cfs_rq));

	list_for_each_entry_reverse(p, &rq->cfs_tasks, se.group_node) {
		if (count > sysctl_sched_max_steal_count)
			break;

		if (can_migrate_task_llc(p, rq, dst_rq)) {
			detach_task(p, rq, dst_cpu);
			return p;
		}

		count++;
	}
	return NULL;
}
@@ -13122,10 +13228,14 @@ static int steal_from(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
	int stolen = 0;
	int dst_cpu = dst_rq->cpu;
	struct rq *src_rq = cpu_rq(src_cpu);
	bool tg_used = group_steal_used();

	if (dst_cpu == src_cpu || src_rq->cfs.h_nr_running < 2)
		return 0;

	if (tg_used && src_rq->cfs.steal_h_nr_running < 1)
		return 0;

	if (*locked) {
		rq_unpin_lock(dst_rq, dst_rf);
		raw_spin_rq_unlock(dst_rq);
@@ -13134,7 +13244,8 @@ static int steal_from(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
	rq_lock_irqsave(src_rq, &rf);
	update_rq_clock(src_rq);

	if (src_rq->cfs.h_nr_running < 2 || !cpu_active(src_cpu))
	if (!cpu_active(src_cpu) || src_rq->cfs.h_nr_running < 2 ||
	    (tg_used && src_rq->cfs.steal_h_nr_running < 1))
		p = NULL;
	else
		p = detach_next_task(&src_rq->cfs, dst_rq);
@@ -13691,9 +13802,6 @@ void free_fair_sched_group(struct task_group *tg)
			kfree(tg->se[i]);
	}

#ifdef CONFIG_QOS_SCHED
	kfree(tg->qos_level_mutex);
#endif
	kfree(tg->cfs_rq);
	kfree(tg->se);
}
Loading