Commit 51bf903b authored by Chengming Zhou's avatar Chengming Zhou Committed by Peter Zijlstra
Browse files

sched/fair: Optimize and simplify rq leaf_cfs_rq_list



We notice the rq leaf_cfs_rq_list has two problems when do bugfix
backports and some test profiling.

1. cfs_rqs under throttled subtree could be added to the list, and
   make their fully decayed ancestors on the list, even though not needed.

2. #1 also make the leaf_cfs_rq_list management complex and error prone,
   this is the list of related bugfix so far:

   commit 31bc6aea ("sched/fair: Optimize update_blocked_averages()")
   commit fe61468b ("sched/fair: Fix enqueue_task_fair warning")
   commit b34cb07d ("sched/fair: Fix enqueue_task_fair() warning some more")
   commit 39f23ce0 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
   commit 0258bdfa ("sched/fair: Fix unfairness caused by missing load decay")
   commit a7b359fc ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
   commit fdaba61e ("sched/fair: Ensure that the CFS parent is added after unthrottling")
   commit 2630cde2 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")

commit 31bc6aea ("sched/fair: Optimize update_blocked_averages()")
delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
and delete the throttled_hierarchy() test in update_blocked_averages(),
which optimized update_blocked_averages().

But those later bugfix add cfs_rqs under throttled subtree back to
rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
the integrity of rq->leaf_cfs_rq_list.

This patch takes another method, skip all cfs_rqs under throttled
hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
under throttled subtree off the leaf_cfs_rq_list.

So we don't need to consider throttled related things in
enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
which simplify the code a lot. Also optimize update_blocked_averages()
since cfs_rqs under throttled hierarchy and their ancestors
won't be on the leaf_cfs_rq_list.

Signed-off-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20220601021848.76943-1-zhouchengming@bytedance.com
parent f5b2eeb4
Loading
Loading
Loading
Loading
+28 −64
Original line number Diff line number Diff line
@@ -3179,6 +3179,8 @@ void reweight_task(struct task_struct *p, int prio)
	load->inv_weight = sched_prio_to_wmult[prio];
}

static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);

#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
/*
@@ -3289,8 +3291,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
}
#endif /* CONFIG_SMP */

static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);

/*
 * Recomputes the group entity based on the current state of its group
 * runqueue.
@@ -4403,16 +4403,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
		__enqueue_entity(cfs_rq, se);
	se->on_rq = 1;

	/*
	 * When bandwidth control is enabled, cfs might have been removed
	 * because of a parent been throttled but cfs->nr_running > 1. Try to
	 * add it unconditionally.
	 */
	if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
		list_add_leaf_cfs_rq(cfs_rq);

	if (cfs_rq->nr_running == 1)
	if (cfs_rq->nr_running == 1) {
		check_enqueue_throttle(cfs_rq);
		if (!throttled_hierarchy(cfs_rq))
			list_add_leaf_cfs_rq(cfs_rq);
	}
}

static void __clear_buddies_last(struct sched_entity *se)
@@ -5027,11 +5022,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
	/* update hierarchical throttle state */
	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);

	/* Nothing to run but something to decay (on_list)? Complete the branch */
	if (!cfs_rq->load.weight) {
		if (cfs_rq->on_list)
			goto unthrottle_throttle;
		if (!cfs_rq->on_list)
			return;
		/*
		 * Nothing to run but something to decay (on_list)?
		 * Complete the branch.
		 */
		for_each_sched_entity(se) {
			if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
				break;
		}
		goto unthrottle_throttle;
	}

	task_delta = cfs_rq->h_nr_running;
@@ -5069,31 +5071,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(qcfs_rq))
			goto unthrottle_throttle;

		/*
		 * One parent has been throttled and cfs_rq removed from the
		 * list. Add it back to not break the leaf list.
		 */
		if (throttled_hierarchy(qcfs_rq))
			list_add_leaf_cfs_rq(qcfs_rq);
	}

	/* At this point se is NULL and we are at root level*/
	add_nr_running(rq, task_delta);

unthrottle_throttle:
	/*
	 * The cfs_rq_throttled() breaks in the above iteration can result in
	 * incomplete leaf list maintenance, resulting in triggering the
	 * assertion below.
	 */
	for_each_sched_entity(se) {
		struct cfs_rq *qcfs_rq = cfs_rq_of(se);

		if (list_add_leaf_cfs_rq(qcfs_rq))
			break;
	}

	assert_list_leaf_cfs_rq(rq);

	/* Determine whether we need to wake up potentially idle CPU: */
@@ -5748,13 +5731,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
		/* end evaluation on encountering a throttled cfs_rq */
		if (cfs_rq_throttled(cfs_rq))
			goto enqueue_throttle;

               /*
                * One parent has been throttled and cfs_rq removed from the
                * list. Add it back to not break the leaf list.
                */
               if (throttled_hierarchy(cfs_rq))
                       list_add_leaf_cfs_rq(cfs_rq);
	}

	/* At this point se is NULL and we are at root level*/
@@ -5778,21 +5754,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
		update_overutilized_status(rq);

enqueue_throttle:
	if (cfs_bandwidth_used()) {
		/*
		 * When bandwidth control is enabled; the cfs_rq_throttled()
		 * breaks in the above iteration can result in incomplete
		 * leaf list maintenance, resulting in triggering the assertion
		 * below.
		 */
		for_each_sched_entity(se) {
			cfs_rq = cfs_rq_of(se);

			if (list_add_leaf_cfs_rq(cfs_rq))
				break;
		}
	}

	assert_list_leaf_cfs_rq(rq);

	hrtick_update(rq);
@@ -11316,9 +11277,13 @@ static inline bool vruntime_normalized(struct task_struct *p)
 */
static void propagate_entity_cfs_rq(struct sched_entity *se)
{
	struct cfs_rq *cfs_rq;
	struct cfs_rq *cfs_rq = cfs_rq_of(se);

	list_add_leaf_cfs_rq(cfs_rq_of(se));
	if (cfs_rq_throttled(cfs_rq))
		return;

	if (!throttled_hierarchy(cfs_rq))
		list_add_leaf_cfs_rq(cfs_rq);

	/* Start to propagate at parent */
	se = se->parent;
@@ -11326,14 +11291,13 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
	for_each_sched_entity(se) {
		cfs_rq = cfs_rq_of(se);

		if (!cfs_rq_throttled(cfs_rq)){
		update_load_avg(cfs_rq, se, UPDATE_TG);
			list_add_leaf_cfs_rq(cfs_rq);
			continue;
		}

		if (list_add_leaf_cfs_rq(cfs_rq))
		if (cfs_rq_throttled(cfs_rq))
			break;

		if (!throttled_hierarchy(cfs_rq))
			list_add_leaf_cfs_rq(cfs_rq);
	}
}
#else