Commit a1ff03cd authored by Frederic Weisbecker's avatar Frederic Weisbecker
Browse files

tick: Detect and fix jiffies update stall



On some rare cases, the timekeeper CPU may be delaying its jiffies
update duty for a while. Known causes include:

* The timekeeper is waiting on stop_machine in a MULTI_STOP_DISABLE_IRQ
  or MULTI_STOP_RUN state. Disabled interrupts prevent from timekeeping
  updates while waiting for the target CPU to complete its
  stop_machine() callback.

* The timekeeper vcpu has VMEXIT'ed for a long while due to some overload
  on the host.

Detect and fix these situations with emergency timekeeping catchups.

Original-patch-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Signed-off-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
parent 58dedf0a
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -169,6 +169,8 @@ static ktime_t tick_init_jiffy_update(void)
	return period;
}

#define MAX_STALLED_JIFFIES 5

static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
	int cpu = smp_processor_id();
@@ -196,6 +198,21 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
	if (tick_do_timer_cpu == cpu)
		tick_do_update_jiffies64(now);

	/*
	 * If jiffies update stalled for too long (timekeeper in stop_machine()
	 * or VMEXIT'ed for several msecs), force an update.
	 */
	if (ts->last_tick_jiffies != jiffies) {
		ts->stalled_jiffies = 0;
		ts->last_tick_jiffies = READ_ONCE(jiffies);
	} else {
		if (++ts->stalled_jiffies == MAX_STALLED_JIFFIES) {
			tick_do_update_jiffies64(now);
			ts->stalled_jiffies = 0;
			ts->last_tick_jiffies = READ_ONCE(jiffies);
		}
	}

	if (ts->inidle)
		ts->got_idle_tick = 1;
}
+4 −0
Original line number Diff line number Diff line
@@ -49,6 +49,8 @@ enum tick_nohz_mode {
 * @timer_expires_base:	Base time clock monotonic for @timer_expires
 * @next_timer:		Expiry time of next expiring timer for debugging purpose only
 * @tick_dep_mask:	Tick dependency mask - is set, if someone needs the tick
 * @last_tick_jiffies:	Value of jiffies seen on last tick
 * @stalled_jiffies:	Number of stalled jiffies detected across ticks
 */
struct tick_sched {
	struct hrtimer			sched_timer;
@@ -77,6 +79,8 @@ struct tick_sched {
	u64				next_timer;
	ktime_t				idle_expires;
	atomic_t			tick_dep_mask;
	unsigned long			last_tick_jiffies;
	unsigned int			stalled_jiffies;
};

extern struct tick_sched *tick_get_tick_sched(int cpu);