Commit ab407a19 authored by Thomas Gleixner's avatar Thomas Gleixner
Browse files

Merge tag 'clocksource.2023.02.06b' of...

Merge tag 'clocksource.2023.02.06b' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into timers/core

Pull clocksource watchdog changes from Paul McKenney:

     o	Improvements to clocksource-watchdog console messages.

     o	Loosening of the clocksource-watchdog skew criteria to match
     	those of NTP (500 parts per million, relaxed from 400 parts
     	per million).  If it is good enough for NTP, it is good enough
     	for the clocksource watchdog.

     o	Suspend clocksource-watchdog checking temporarily when high
     	memory latencies are detected.	This avoids the false-positive
     	clock-skew events that have been seen on production systems
     	running memory-intensive workloads.

     o	On systems where the TSC is deemed trustworthy, use it as the
     	watchdog timesource, but only when specifically requested using
     	the tsc=watchdog kernel boot parameter.  This permits clock-skew
     	events to be detected, but avoids forcing workloads to use the
     	slow HPET and ACPI PM timers.  These last two timers are slow
     	enough to cause systems to be needlessly marked bad on the one
     	hand, and real skew does sometimes happen on production systems
     	running production workloads on the other.  And sometimes it is
     	the fault of the TSC, or at least of the firmware that told the
     	kernel to program the TSC with the wrong frequency.

     o	Add a tsc=revalidate kernel boot parameter to allow the kernel
     	to diagnose cases where the TSC hardware works fine, but was told
     	by firmware to tick at the wrong frequency.  Such cases are rare,
     	but they really have happened on production systems.

Link: https://lore.kernel.org/r/20230210193640.GA3325193@paulmck-ThinkPad-P17-Gen-1
parents 7b0f95f2 0051293c
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -6369,6 +6369,16 @@
			in situations with strict latency requirements (where
			interruptions from clocksource watchdog are not
			acceptable).
			[x86] recalibrate: force recalibration against a HW timer
			(HPET or PM timer) on systems whose TSC frequency was
			obtained from HW or FW using either an MSR or CPUID(0x15).
			Warn if the difference is more than 500 ppm.
			[x86] watchdog: Use TSC as the watchdog clocksource with
			which to check other HW timers (HPET or PM timer), but
			only on systems where TSC has been deemed trustworthy.
			This will be suppressed by an earlier tsc=nowatchdog and
			can be overridden by a later tsc=nowatchdog.  A console
			message will flag any such suppression or overriding.

	tsc_early_khz=  [X86] Skip early TSC calibration and use the given
			value instead. Useful when the early TSC frequency discovery
+1 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
extern void hpet_time_init(void);
extern void time_init(void);
extern bool pit_timer_init(void);
extern bool tsc_clocksource_watchdog_disabled(void);

extern struct clock_event_device *global_clock_event;

+2 −0
Original line number Diff line number Diff line
@@ -1091,6 +1091,8 @@ int __init hpet_enable(void)
	if (!hpet_counting())
		goto out_nohpet;

	if (tsc_clocksource_watchdog_disabled())
		clocksource_hpet.flags |= CLOCK_SOURCE_MUST_VERIFY;
	clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);

	if (id & HPET_ID_LEGSUP) {
+50 −5
Original line number Diff line number Diff line
@@ -48,6 +48,8 @@ static DEFINE_STATIC_KEY_FALSE(__use_tsc);

int tsc_clocksource_reliable;

static int __read_mostly tsc_force_recalibrate;

static u32 art_to_tsc_numerator;
static u32 art_to_tsc_denominator;
static u64 art_to_tsc_offset;
@@ -292,6 +294,7 @@ __setup("notsc", notsc_setup);

static int no_sched_irq_time;
static int no_tsc_watchdog;
static int tsc_as_watchdog;

static int __init tsc_setup(char *str)
{
@@ -301,8 +304,22 @@ static int __init tsc_setup(char *str)
		no_sched_irq_time = 1;
	if (!strcmp(str, "unstable"))
		mark_tsc_unstable("boot parameter");
	if (!strcmp(str, "nowatchdog"))
	if (!strcmp(str, "nowatchdog")) {
		no_tsc_watchdog = 1;
		if (tsc_as_watchdog)
			pr_alert("%s: Overriding earlier tsc=watchdog with tsc=nowatchdog\n",
				 __func__);
		tsc_as_watchdog = 0;
	}
	if (!strcmp(str, "recalibrate"))
		tsc_force_recalibrate = 1;
	if (!strcmp(str, "watchdog")) {
		if (no_tsc_watchdog)
			pr_alert("%s: tsc=watchdog overridden by earlier tsc=nowatchdog\n",
				 __func__);
		else
			tsc_as_watchdog = 1;
	}
	return 1;
}

@@ -1186,6 +1203,12 @@ static void __init tsc_disable_clocksource_watchdog(void)
	clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
}

bool tsc_clocksource_watchdog_disabled(void)
{
	return !(clocksource_tsc.flags & CLOCK_SOURCE_MUST_VERIFY) &&
	       tsc_as_watchdog && !no_tsc_watchdog;
}

static void __init check_system_tsc_reliable(void)
{
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
@@ -1374,6 +1397,25 @@ static void tsc_refine_calibration_work(struct work_struct *work)
	else
		freq = calc_pmtimer_ref(delta, ref_start, ref_stop);

	/* Will hit this only if tsc_force_recalibrate has been set */
	if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) {

		/* Warn if the deviation exceeds 500 ppm */
		if (abs(tsc_khz - freq) > (tsc_khz >> 11)) {
			pr_warn("Warning: TSC freq calibrated by CPUID/MSR differs from what is calibrated by HW timer, please check with vendor!!\n");
			pr_info("Previous calibrated TSC freq:\t %lu.%03lu MHz\n",
				(unsigned long)tsc_khz / 1000,
				(unsigned long)tsc_khz % 1000);
		}

		pr_info("TSC freq recalibrated by [%s]:\t %lu.%03lu MHz\n",
			hpet ? "HPET" : "PM_TIMER",
			(unsigned long)freq / 1000,
			(unsigned long)freq % 1000);

		return;
	}

	/* Make sure we're within 1% */
	if (abs(tsc_khz - freq) > tsc_khz/100)
		goto out;
@@ -1407,8 +1449,10 @@ static int __init init_tsc_clocksource(void)
	if (!boot_cpu_has(X86_FEATURE_TSC) || !tsc_khz)
		return 0;

	if (tsc_unstable)
		goto unreg;
	if (tsc_unstable) {
		clocksource_unregister(&clocksource_tsc_early);
		return 0;
	}

	if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
		clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@@ -1421,8 +1465,9 @@ static int __init init_tsc_clocksource(void)
		if (boot_cpu_has(X86_FEATURE_ART))
			art_related_clocksource = &clocksource_tsc;
		clocksource_register_khz(&clocksource_tsc, tsc_khz);
unreg:
		clocksource_unregister(&clocksource_tsc_early);

		if (!tsc_force_recalibrate)
			return 0;
	}

+4 −2
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <asm/io.h>
#include <asm/time.h>

/*
 * The I/O port the PMTMR resides at.
@@ -210,8 +211,9 @@ static int __init init_acpi_pm_clocksource(void)
		return -ENODEV;
	}

	return clocksource_register_hz(&clocksource_acpi_pm,
						PMTMR_TICKS_PER_SEC);
	if (tsc_clocksource_watchdog_disabled())
		clocksource_acpi_pm.flags |= CLOCK_SOURCE_MUST_VERIFY;
	return clocksource_register_hz(&clocksource_acpi_pm, PMTMR_TICKS_PER_SEC);
}

/* We use fs_initcall because we want the PCI fixups to have run
Loading