Commit 926b9b0c authored by Zhang Qiao's avatar Zhang Qiao Committed by Wenyu Huang
Browse files

sched: Throttle qos cfs_rq when current cpu is running online task

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8MF4R


CVE: NA

--------------------------------

In a co-location scenario, we usually deploy online and
offline task groups in the same server.

The online tasks are more important than offline tasks and to
avoid offline tasks affects online tasks, we will throttle the
offline tasks group when some online task groups running in the
same cpu and unthrottle offline tasks when the cpu is about to
enter idle state.

Signed-off-by: default avatarZhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: default avatarWenyu Huang <huangwenyu5@huawei.com>
parent 1621f465
Loading
Loading
Loading
Loading
+258 −1
Original line number Diff line number Diff line
@@ -124,6 +124,18 @@ int __weak arch_asym_cpu_priority(int cpu)
#define capacity_greater(cap1, cap2) ((cap1) * 1024 > (cap2) * 1078)
#endif

#ifdef CONFIG_QOS_SCHED

/*
 * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled
 * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1).
 */
#define QOS_THROTTLED	2

static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq);
static int unthrottle_qos_cfs_rqs(int cpu);
#endif

#ifdef CONFIG_CFS_BANDWIDTH
/*
 * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -5639,6 +5651,14 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)

	se = cfs_rq->tg->se[cpu_of(rq)];

#ifdef CONFIG_QOS_SCHED
	/*
	 * if this cfs_rq throttled by qos, not need unthrottle it.
	 */
	if (cfs_rq->throttled == QOS_THROTTLED)
		return;
#endif

	cfs_rq->throttled = 0;

	update_rq_clock(rq);
@@ -5823,7 +5843,20 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
			goto next;
#endif

		/* By the above checks, this should never be true */
		/*
		 * CPU hotplug callbacks race against distribute_cfs_runtime()
		 * when the QOS_SCHED feature is enabled, there may be
		 * situations where the runtime_remaining > 0.
		 * Qos_sched does not care whether the cfs_rq has time left,
		 * so no longer allocate time to cfs_rq in this scenario.
		 */
#ifdef CONFIG_QOS_SCHED
		if (cfs_rq->throttled == QOS_THROTTLED &&
			cfs_rq->runtime_remaining > 0)
			goto next;
#endif

		/* By the above check, this should never be true */
		SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);

		raw_spin_lock(&cfs_b->lock);
@@ -6191,6 +6224,9 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
#ifdef CONFIG_SMP
	INIT_LIST_HEAD(&cfs_rq->throttled_csd_list);
#endif
#ifdef CONFIG_QOS_SCHED
	INIT_LIST_HEAD(&cfs_rq->qos_throttled_list);
#endif
}

void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@ -6280,6 +6316,9 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
	 * the rq clock again in unthrottle_cfs_rq().
	 */
	rq_clock_start_loop_update(rq);
#ifdef CONFIG_QOS_SCHED
	unthrottle_qos_cfs_rqs(cpu_of(rq));
#endif

	rcu_read_lock();
	list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -6305,6 +6344,9 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
	rcu_read_unlock();

	rq_clock_stop_loop_update(rq);
#ifdef CONFIG_QOS_SCHED
	unthrottle_qos_cfs_rqs(cpu_of(rq));
#endif
}

bool cfs_task_bw_constrained(struct task_struct *p)
@@ -8115,6 +8157,196 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
	resched_curr(rq);
}

#ifdef CONFIG_QOS_SCHED
static inline bool is_offline_task(struct task_struct *p)
{
	return task_group(p)->qos_level == -1;
}

static void start_qos_hrtimer(int cpu);

static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct sched_entity *se;
	long task_delta, idle_task_delta;

	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];

	/* freeze hierarchy runnable averages while throttled */
	rcu_read_lock();
	walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
	rcu_read_unlock();

	task_delta = cfs_rq->h_nr_running;
	idle_task_delta = cfs_rq->idle_h_nr_running;
	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
		/* throttled entity or throttle-on-deactivate */
		if (!se->on_rq)
			goto done;

		dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);

		qcfs_rq->h_nr_running -= task_delta;
		qcfs_rq->idle_h_nr_running -= idle_task_delta;

		if (qcfs_rq->load.weight) {
			/* Avoid re-evaluating load for this entity: */
			se = parent_entity(se);
			break;
		}
	}

	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
		/* throttled entity or throttle-on-deactivate */
		if (!se->on_rq)
			goto done;

		update_load_avg(qcfs_rq, se, 0);
		se_update_runnable(se);

		if (cfs_rq_is_idle(group_cfs_rq(se)))
			idle_task_delta = cfs_rq->h_nr_running;

		qcfs_rq->h_nr_running -= task_delta;
		qcfs_rq->idle_h_nr_running -= idle_task_delta;
	}

	/* At this point se is NULL and we are at root level*/
	sub_nr_running(rq, task_delta);

done:
	if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq))))
		start_qos_hrtimer(cpu_of(rq));

	cfs_rq->throttled = QOS_THROTTLED;

	list_add(&cfs_rq->qos_throttled_list,
		 &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
}

static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct sched_entity *se;
	long task_delta, idle_task_delta;

	se = cfs_rq->tg->se[cpu_of(rq)];

	if (cfs_rq->throttled != QOS_THROTTLED)
		return;

	cfs_rq->throttled = 0;

	update_rq_clock(rq);
	list_del_init(&cfs_rq->qos_throttled_list);

	/* update hierarchical throttle state */
	rcu_read_lock();
	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
	rcu_read_unlock();

	if (!cfs_rq->load.weight) {
		if (!cfs_rq->on_list)
			return;
		/*
		 * Nothing to run but something to decay (on_list)?
		 * Complete the branch.
		 */
		for_each_sched_entity(se) {
			if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
				break;
		}
		goto unthrottle_throttle;
	}

	task_delta = cfs_rq->h_nr_running;
	idle_task_delta = cfs_rq->idle_h_nr_running;
	for_each_sched_entity(se) {
		if (se->on_rq)
			break;

		cfs_rq = cfs_rq_of(se);
		enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);

		cfs_rq->h_nr_running += task_delta;
		cfs_rq->idle_h_nr_running += idle_task_delta;

		if (cfs_rq_throttled(cfs_rq))
			goto unthrottle_throttle;
	}

	for_each_sched_entity(se) {
		cfs_rq = cfs_rq_of(se);

		update_load_avg(cfs_rq, se, UPDATE_TG);
		se_update_runnable(se);

		cfs_rq->h_nr_running += task_delta;
		cfs_rq->idle_h_nr_running += idle_task_delta;

		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
			goto unthrottle_throttle;
	}

	add_nr_running(rq, task_delta);

unthrottle_throttle:

	assert_list_leaf_cfs_rq(rq);

	/* Determine whether we need to wake up potentially idle CPU: */
	if (rq->curr == rq->idle && rq->cfs.nr_running)
		resched_curr(rq);
}

static int unthrottle_qos_cfs_rqs(int cpu)
{
	struct cfs_rq *cfs_rq, *tmp_rq;
	int res = 0;

	list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(qos_throttled_cfs_rq, cpu),
				 qos_throttled_list) {
		if (cfs_rq_throttled(cfs_rq)) {
			unthrottle_qos_cfs_rq(cfs_rq);
			res++;
		}
	}

	return res;
}

static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq)
{
	if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 &&
		     !sched_idle_cpu(smp_processor_id()) &&
		     cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) {

		if (!rq_of(cfs_rq)->online)
			return false;

		throttle_qos_cfs_rq(cfs_rq);
		return true;
	}

	return false;
}

static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq)
{
	struct rq *rq = rq_of(cfs_rq);
	struct rq_flags rf;

	rq_lock_irqsave(rq, &rf);
	if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq))
		unthrottle_qos_cfs_rq(cfs_rq);
	rq_unlock_irqrestore(rq, &rf);
}
#endif

#ifdef CONFIG_SMP
static struct task_struct *pick_task_fair(struct rq *rq)
{
@@ -8205,6 +8437,16 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf

		se = pick_next_entity(cfs_rq, curr);
		cfs_rq = group_cfs_rq(se);
#ifdef CONFIG_QOS_SCHED
		if (check_qos_cfs_rq(cfs_rq)) {
			cfs_rq = &rq->cfs;
			WARN(cfs_rq->nr_running == 0,
			    "rq->nr_running=%u, cfs_rq->idle_h_nr_running=%u\n",
			    rq->nr_running, cfs_rq->idle_h_nr_running);
			if (unlikely(!cfs_rq->nr_running))
				return NULL;
		}
#endif
	} while (cfs_rq);

	p = task_of(se);
@@ -8284,6 +8526,12 @@ done: __maybe_unused;
	if (new_tasks > 0)
		goto again;

#ifdef CONFIG_QOS_SCHED
	if (unthrottle_qos_cfs_rqs(cpu_of(rq))) {
		rq->idle_stamp = 0;
		goto again;
	}
#endif
	/*
	 * rq is about to be idle, check if we need to update the
	 * lost_idle_time of clock_pelt
@@ -12600,6 +12848,10 @@ void free_fair_sched_group(struct task_group *tg)
	int i;

	for_each_possible_cpu(i) {
#ifdef CONFIG_QOS_SCHED
		if (tg->cfs_rq && tg->cfs_rq[i])
			unthrottle_qos_sched_group(tg->cfs_rq[i]);
#endif
		if (tg->cfs_rq)
			kfree(tg->cfs_rq[i]);
		if (tg->se)
@@ -12989,6 +13241,11 @@ __init void init_sched_fair_class(void)
#endif
	}

#ifdef CONFIG_QOS_SCHED
	for_each_possible_cpu(i)
		INIT_LIST_HEAD(&per_cpu(qos_throttled_cfs_rq, i));
#endif

	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);

#ifdef CONFIG_NO_HZ_COMMON
+4 −0
Original line number Diff line number Diff line
@@ -653,6 +653,10 @@ struct cfs_rq {
#endif
#endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */

#if defined(CONFIG_QOS_SCHED)
	struct list_head	qos_throttled_list;
#endif
};

static inline int rt_bandwidth_enabled(void)