Commit 18940c88 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_urgent_for_v6.3_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fix from Borislav Petkov:

 - Fix a corner case where vruntime of a task is not being sanitized

* tag 'sched_urgent_for_v6.3_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Sanitize vruntime of entity being migrated
parents 974fc943 a53ce18c
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -2084,6 +2084,9 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)

void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
	if (task_on_rq_migrating(p))
		flags |= ENQUEUE_MIGRATED;

	enqueue_task(rq, p, flags);

	p->on_rq = TASK_ON_RQ_QUEUED;
+44 −11
Original line number Diff line number Diff line
@@ -4648,11 +4648,33 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
#endif
}

static inline bool entity_is_long_sleeper(struct sched_entity *se)
{
	struct cfs_rq *cfs_rq;
	u64 sleep_time;

	if (se->exec_start == 0)
		return false;

	cfs_rq = cfs_rq_of(se);

	sleep_time = rq_clock_task(rq_of(cfs_rq));

	/* Happen while migrating because of clock task divergence */
	if (sleep_time <= se->exec_start)
		return false;

	sleep_time -= se->exec_start;
	if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
		return true;

	return false;
}

static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
	u64 vruntime = cfs_rq->min_vruntime;
	u64 sleep_time;

	/*
	 * The 'current' period is already promised to the current tasks,
@@ -4684,13 +4706,24 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)

	/*
	 * Pull vruntime of the entity being placed to the base level of
	 * cfs_rq, to prevent boosting it if placed backwards.  If the entity
	 * slept for a long time, don't even try to compare its vruntime with
	 * the base as it may be too far off and the comparison may get
	 * inversed due to s64 overflow.
	 */
	sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
	if ((s64)sleep_time > 60LL * NSEC_PER_SEC)
	 * cfs_rq, to prevent boosting it if placed backwards.
	 * However, min_vruntime can advance much faster than real time, with
	 * the extreme being when an entity with the minimal weight always runs
	 * on the cfs_rq. If the waking entity slept for a long time, its
	 * vruntime difference from min_vruntime may overflow s64 and their
	 * comparison may get inversed, so ignore the entity's original
	 * vruntime in that case.
	 * The maximal vruntime speedup is given by the ratio of normal to
	 * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
	 * When placing a migrated waking entity, its exec_start has been set
	 * from a different rq. In order to take into account a possible
	 * divergence between new and prev rq's clocks task because of irq and
	 * stolen time, we take an additional margin.
	 * So, cutting off on the sleep time of
	 *     2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
	 * should be safe.
	 */
	if (entity_is_long_sleeper(se))
		se->vruntime = vruntime;
	else
		se->vruntime = max_vruntime(se->vruntime, vruntime);
@@ -4770,6 +4803,9 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)

	if (flags & ENQUEUE_WAKEUP)
		place_entity(cfs_rq, se, 0);
	/* Entity has migrated, no longer consider this task hot */
	if (flags & ENQUEUE_MIGRATED)
		se->exec_start = 0;

	check_schedstat_required();
	update_stats_enqueue_fair(cfs_rq, se, flags);
@@ -7657,9 +7693,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
	/* Tell new CPU we are migrated */
	se->avg.last_update_time = 0;

	/* We have migrated, no longer consider this task hot */
	se->exec_start = 0;

	update_scan_period(p, new_cpu);
}