Commit 433c0b72 authored by Cheng Yu's avatar Cheng Yu
Browse files

sched/fair: Count the number of tasks marked as steal_task on cfs_rq

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAQWPQ



-----------------------------------------

Assume that we have a cpu cgroup named test and set the cpu.steal_task
to 1, then the tasks in the test cgroup are called tasks marked with
steal_task.

When there are at least 2 cfs tasks on the cpu's rq, and at least 1 of
them is marked with steal_task, the cpu is considered as an overload
cpu.

Before a cpu enters idle, it will pull tasks from busy cpu through idle
balance. If it fails to pull a task, steal task will be triggered. The
idle cpu will pull a task from the overload cpu.

Signed-off-by: default avatarCheng Yu <serein.chengyu@huawei.com>
parent a54dbd9e
Loading
Loading
Loading
Loading
+119 −23
Original line number Diff line number Diff line
@@ -4478,14 +4478,30 @@ static inline bool steal_enabled(void)
	return sched_feat(STEAL) && allow;
}

static inline bool group_steal_enabled(int steal_task)
{
	return group_steal_used() && is_tg_steal(steal_task);
}

static void overload_clear(struct rq *rq)
{
	struct sparsemask *overload_cpus;
	unsigned long time;
	bool need_clear = false;

	if (!steal_enabled())
		return;

	if (!group_steal_used() && rq->cfs.h_nr_running >= 2)
		return;

	if (group_steal_used() &&
	    (rq->cfs.h_nr_running < 2 || rq->cfs.steal_h_nr_running == 0))
		need_clear = true;

	if (!need_clear)
		return;

	time = schedstat_start_time();
	rcu_read_lock();
	overload_cpus = rcu_dereference(rq->cfs_overload_cpus);
@@ -4503,6 +4519,12 @@ static void overload_set(struct rq *rq)
	if (!steal_enabled())
		return;

	if (rq->cfs.h_nr_running < 2)
		return;

	if (group_steal_used() && rq->cfs.steal_h_nr_running < 1)
		return;

	time = schedstat_start_time();
	rcu_read_lock();
	overload_cpus = rcu_dereference(rq->cfs_overload_cpus);
@@ -5286,13 +5308,15 @@ static int tg_throttle_down(struct task_group *tg, void *data)
static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	unsigned int prev_nr = rq->cfs.h_nr_running;
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	struct sched_entity *se;
	long task_delta, idle_task_delta, dequeue = 1;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif

	raw_spin_lock(&cfs_b->lock);
	/* This will start the period timer if necessary */
@@ -5327,6 +5351,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
@@ -5346,6 +5373,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		qcfs_rq->steal_h_nr_running -= steal_delta;
#endif

		if (qcfs_rq->load.weight)
			dequeue = 0;
@@ -5353,8 +5383,9 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)

	if (!se) {
		sub_nr_running(rq, task_delta);
		if (prev_nr >= 2 && prev_nr - task_delta < 2)
#ifdef CONFIG_SCHED_STEAL
		overload_clear(rq);
#endif
	}

	/*
@@ -5369,13 +5400,15 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	unsigned int prev_nr = rq->cfs.h_nr_running;
	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
	struct sched_entity *se;
	long task_delta, idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif

	se = cfs_rq->tg->se[cpu_of(rq)];

@@ -5407,6 +5440,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		if (se->on_rq)
			break;
@@ -5418,6 +5455,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -5435,6 +5475,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -5450,8 +5493,9 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

	/* At this point se is NULL and we are at root level*/
	add_nr_running(rq, task_delta);
	if (prev_nr < 2 && prev_nr + task_delta >= 2)
#ifdef CONFIG_SCHED_STEAL
	overload_set(rq);
#endif

unthrottle_throttle:
	/*
@@ -6584,8 +6628,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
	int idle_h_nr_running = task_has_idle_policy(p);

	int task_new = !(flags & ENQUEUE_WAKEUP);
	unsigned int prev_nr = rq->cfs.h_nr_running;

#ifdef CONFIG_SCHED_STEAL
	bool tg_steal_enabled = group_steal_enabled(se->steal_task);
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	int qos_idle_h_nr_running;

@@ -6620,6 +6665,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running++;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6640,6 +6689,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running++;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6655,8 +6708,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

	/* At this point se is NULL and we are at root level*/
	add_nr_running(rq, 1);
	if (prev_nr == 1)
#ifdef CONFIG_SCHED_STEAL
	overload_set(rq);
#endif

	/*
	 * Since new tasks are assigned an initial util_avg equal to
@@ -6715,9 +6769,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
	int task_sleep = flags & DEQUEUE_SLEEP;
	int idle_h_nr_running = task_has_idle_policy(p);

	unsigned int prev_nr = rq->cfs.h_nr_running;
	bool was_sched_idle = sched_idle_rq(rq);

#ifdef CONFIG_SCHED_STEAL
	bool tg_steal_enabled = group_steal_enabled(se->steal_task);
#endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	int qos_idle_h_nr_running = se->qos_idle ? 1 : 0;

@@ -6735,6 +6790,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running--;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6767,6 +6826,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running -= qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
		if (tg_steal_enabled)
			cfs_rq->steal_h_nr_running--;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
@@ -6776,8 +6839,9 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)

	/* At this point se is NULL and we are at root level*/
	sub_nr_running(rq, 1);
	if (prev_nr == 2)
#ifdef CONFIG_SCHED_STEAL
	overload_clear(rq);
#endif

	/* balance early to pull high priority tasks */
	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
@@ -8551,10 +8615,12 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct sched_entity *se;
	unsigned int prev_nr = cfs_rq->h_nr_running;
	long task_delta, idle_task_delta, dequeue = 1;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif
	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];

@@ -8568,6 +8634,10 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
		/* throttled entity or throttle-on-deactivate */
@@ -8586,6 +8656,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		qcfs_rq->qos_idle_h_nr_running -= qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		qcfs_rq->steal_h_nr_running -= steal_delta;
#endif

		if (qcfs_rq->load.weight)
			dequeue = 0;
@@ -8593,9 +8666,9 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)

	if (!se) {
		sub_nr_running(rq, task_delta);
		if (prev_nr >= 2 && prev_nr - task_delta < 2)
#ifdef CONFIG_SCHED_STEAL
		overload_clear(rq);

#endif
	}

	if (!qos_timer_is_activated(cpu_of(rq)))
@@ -8611,11 +8684,13 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct sched_entity *se;
	unsigned int prev_nr = cfs_rq->h_nr_running;
	long task_delta, idle_task_delta;
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	long qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
	long steal_delta;
#endif

	se = cfs_rq->tg->se[cpu_of(rq)];

@@ -8640,6 +8715,10 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
	qos_idle_delta = cfs_rq->qos_idle_h_nr_running;
#endif
#ifdef CONFIG_SCHED_STEAL
	steal_delta = cfs_rq->steal_h_nr_running;
#endif

	for_each_sched_entity(se) {
		if (se->on_rq)
			break;
@@ -8652,6 +8731,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		if (cfs_rq_throttled(cfs_rq))
			goto unthrottle_throttle;
@@ -8668,6 +8750,10 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
#endif
#ifdef CONFIG_SCHED_STEAL
		cfs_rq->steal_h_nr_running += steal_delta;
#endif

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
			goto unthrottle_throttle;
@@ -8681,8 +8767,9 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
	}

	add_nr_running(rq, task_delta);
	if (prev_nr < 2 && prev_nr + task_delta >= 2)
#ifdef CONFIG_SCHED_STEAL
	overload_set(rq);
#endif

unthrottle_throttle:
	/*
@@ -9850,10 +9937,14 @@ static bool
can_migrate_task_llc(struct task_struct *p, struct rq *rq, struct rq *dst_rq)
{
	int dst_cpu = dst_rq->cpu;
	struct task_group *tg = task_group(p);

	lockdep_assert_rq_held(rq);

	if (throttled_lb_pair(task_group(p), cpu_of(rq), dst_cpu))
	if (group_steal_used() && !is_tg_steal(tg->steal_task))
		return false;

	if (throttled_lb_pair(tg, cpu_of(rq), dst_cpu))
		return false;

	if (!cpumask_test_cpu(dst_cpu, p->cpus_ptr)) {
@@ -13130,10 +13221,14 @@ static int steal_from(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
	int stolen = 0;
	int dst_cpu = dst_rq->cpu;
	struct rq *src_rq = cpu_rq(src_cpu);
	bool tg_used = group_steal_used();

	if (dst_cpu == src_cpu || src_rq->cfs.h_nr_running < 2)
		return 0;

	if (tg_used && src_rq->cfs.steal_h_nr_running < 1)
		return 0;

	if (*locked) {
		rq_unpin_lock(dst_rq, dst_rf);
		raw_spin_rq_unlock(dst_rq);
@@ -13142,7 +13237,8 @@ static int steal_from(struct rq *dst_rq, struct rq_flags *dst_rf, bool *locked,
	rq_lock_irqsave(src_rq, &rf);
	update_rq_clock(src_rq);

	if (src_rq->cfs.h_nr_running < 2 || !cpu_active(src_cpu))
	if (!cpu_active(src_cpu) || src_rq->cfs.h_nr_running < 2 ||
	    (tg_used && src_rq->cfs.steal_h_nr_running < 1))
		p = NULL;
	else
		p = detach_next_task(&src_rq->cfs, dst_rq);
+3 −2
Original line number Diff line number Diff line
@@ -723,12 +723,13 @@ struct cfs_rq {
	unsigned int		forceidle_seq;
	KABI_FILL_HOLE(unsigned int kabi_hole)
	u64			min_vruntime_fi;
#elif defined CONFIG_QOS_SCHED_SMT_EXPELLER && !defined(__GENKSYMS__)
#elif (defined(CONFIG_QOS_SCHED_SMT_EXPELLER) || \
		defined(CONFIG_SCHED_STEAL)) && !defined(__GENKSYMS__)
	union {
		unsigned int            qos_idle_h_nr_running; /* qos_level:-1 */
		unsigned long           qos_idle_h_nr_running_padding;
	};
	KABI_FILL_HOLE(unsigned long kabi_hole)
	unsigned long		steal_h_nr_running;
#else
	KABI_RESERVE(3)
	KABI_RESERVE(4)