Commit 2b3be65d authored by David Woodhouse's avatar David Woodhouse Committed by Peter Zijlstra
Browse files

x86/smpboot: Split up native_cpu_up() into separate phases and document them



There are four logical parts to what native_cpu_up() does on the BSP (or
on the controlling CPU for a later hotplug):

 1) Wake the AP by sending the INIT/SIPI/SIPI sequence.

 2) Wait for the AP to make it as far as wait_for_master_cpu() which
    sets that CPU's bit in cpu_initialized_mask, then sets the bit in
    cpu_callout_mask to let the AP proceed through cpu_init().

 3) Wait for the AP to finish cpu_init() and get as far as the
    smp_callin() call, which sets that CPU's bit in cpu_callin_mask.

 4) Perform the TSC synchronization and wait for the AP to actually
    mark itself online in cpu_online_mask.

In preparation to allow these phases to operate in parallel on multiple
APs, split them out into separate functions and document the interactions
a little more clearly in both the BP and AP code paths.

No functional change intended.

Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: default avatarMichael Kelley <mikelley@microsoft.com>
Tested-by: default avatarOleksandr Natalenko <oleksandr@natalenko.name>
Tested-by: Helge Deller <deller@gmx.de> # parisc
Tested-by: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck
Link: https://lore.kernel.org/r/20230512205255.928917242@linutronix.de
parent c7f15dd3
Loading
Loading
Loading
Loading
+119 −65
Original line number Diff line number Diff line
@@ -193,6 +193,10 @@ static void smp_callin(void)

	wmb();

	/*
	 * This runs the AP through all the cpuhp states to its target
	 * state CPUHP_ONLINE.
	 */
	notify_cpu_starting(cpuid);

	/*
@@ -233,12 +237,28 @@ static void notrace start_secondary(void *unused)
	load_cr3(swapper_pg_dir);
	__flush_tlb_all();
#endif
	/*
	 * Sync point with wait_cpu_initialized(). Before proceeding through
	 * cpu_init(), the AP will call wait_for_master_cpu() which sets its
	 * own bit in cpu_initialized_mask and then waits for the BSP to set
	 * its bit in cpu_callout_mask to release it.
	 */
	cpu_init_secondary();
	rcu_cpu_starting(raw_smp_processor_id());
	x86_cpuinit.early_percpu_clock_init();

	/*
	 * Sync point with wait_cpu_callin(). The AP doesn't wait here
	 * but just sets the bit to let the controlling CPU (BSP) know that
	 * it's got this far.
	 */
	smp_callin();

	/* Check TSC synchronization with the control CPU: */
	/*
	 * Check TSC synchronization with the control CPU, which will do
	 * its part of this from wait_cpu_online(), making it an implicit
	 * synchronization point.
	 */
	check_tsc_sync_target();

	/*
@@ -257,6 +277,7 @@ static void notrace start_secondary(void *unused)
	 * half valid vector space.
	 */
	lock_vector_lock();
	/* Sync point with do_wait_cpu_online() */
	set_cpu_online(smp_processor_id(), true);
	lapic_online();
	unlock_vector_lock();
@@ -979,17 +1000,13 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
/*
 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
 * Returns zero if CPU booted OK, else error code from
 * Returns zero if startup was successfully sent, else error code from
 * ->wakeup_secondary_cpu.
 */
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
{
	/* start_ip had better be page-aligned! */
	unsigned long start_ip = real_mode_header->trampoline_start;

	unsigned long boot_error = 0;
	unsigned long timeout;

#ifdef CONFIG_X86_64
	/* If 64-bit wakeup method exists, use the 64-bit mode trampoline IP */
	if (apic->wakeup_secondary_cpu_64)
@@ -1046,60 +1063,89 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
	 * - Use an INIT boot APIC message
	 */
	if (apic->wakeup_secondary_cpu_64)
		boot_error = apic->wakeup_secondary_cpu_64(apicid, start_ip);
		return apic->wakeup_secondary_cpu_64(apicid, start_ip);
	else if (apic->wakeup_secondary_cpu)
		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
	else
		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
		return apic->wakeup_secondary_cpu(apicid, start_ip);

	return wakeup_secondary_cpu_via_init(apicid, start_ip);
}

static int wait_cpu_cpumask(unsigned int cpu, const struct cpumask *mask)
{
	unsigned long timeout;

	if (!boot_error) {
	/*
		 * Wait 10s total for first sign of life from AP
	 * Wait up to 10s for the CPU to report in.
	 */
		boot_error = -1;
	timeout = jiffies + 10*HZ;
	while (time_before(jiffies, timeout)) {
			if (cpumask_test_cpu(cpu, cpu_initialized_mask)) {
				/*
				 * Tell AP to proceed with initialization
				 */
				cpumask_set_cpu(cpu, cpu_callout_mask);
				boot_error = 0;
				break;
			}
		if (cpumask_test_cpu(cpu, mask))
			return 0;

		schedule();
	}
	return -1;
}

	if (!boot_error) {
/*
		 * Wait till AP completes initial initialization
 * Bringup step two: Wait for the target AP to reach cpu_init_secondary()
 * and thus wait_for_master_cpu(), then set cpu_callout_mask to allow it
 * to proceed.  The AP will then proceed past setting its 'callin' bit
 * and end up waiting in check_tsc_sync_target() until we reach
 * do_wait_cpu_online() to tend to it.
 */
		while (!cpumask_test_cpu(cpu, cpu_callin_mask)) {
static int wait_cpu_initialized(unsigned int cpu)
{
	/*
			 * Allow other tasks to run while we wait for the
			 * AP to come online. This also gives a chance
			 * for the MTRR work(triggered by the AP coming online)
			 * to be completed in the stop machine context.
	 * Wait for first sign of life from AP.
	 */
			schedule();
		}
	if (wait_cpu_cpumask(cpu, cpu_initialized_mask))
		return -1;

	cpumask_set_cpu(cpu, cpu_callout_mask);
	return 0;
}

	if (x86_platform.legacy.warm_reset) {
/*
		 * Cleanup possible dangling ends...
 * Bringup step three: Wait for the target AP to reach smp_callin().
 * The AP is not waiting for us here so we don't need to parallelise
 * this step. Not entirely clear why we care about this, since we just
 * proceed directly to TSC synchronization which is the next sync
 * point with the AP anyway.
 */
		smpboot_restore_warm_reset_vector();
static void wait_cpu_callin(unsigned int cpu)
{
	while (!cpumask_test_cpu(cpu, cpu_callin_mask))
		schedule();
}

	return boot_error;
/*
 * Bringup step four: Synchronize the TSC and wait for the target AP
 * to reach set_cpu_online() in start_secondary().
 */
static void wait_cpu_online(unsigned int cpu)
{
	unsigned long flags;

	/*
	 * Check TSC synchronization with the AP (keep irqs disabled
	 * while doing so):
	 */
	local_irq_save(flags);
	check_tsc_sync_source(cpu);
	local_irq_restore(flags);

	/*
	 * Wait for the AP to mark itself online, so the core caller
	 * can drop sparse_irq_lock.
	 */
	while (!cpu_online(cpu))
		schedule();
}

int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
static int native_kick_ap(unsigned int cpu, struct task_struct *tidle)
{
	int apicid = apic->cpu_present_to_apicid(cpu);
	unsigned long flags;
	int err;

	lockdep_assert_irqs_enabled();
@@ -1140,25 +1186,33 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
		return err;

	err = do_boot_cpu(apicid, cpu, tidle);
	if (err) {
	if (err)
		pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);

	return err;
}

	/*
	 * Check TSC synchronization with the AP (keep irqs disabled
	 * while doing so):
	 */
	local_irq_save(flags);
	check_tsc_sync_source(cpu);
	local_irq_restore(flags);
int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
{
	int ret;

	while (!cpu_online(cpu)) {
		cpu_relax();
		touch_nmi_watchdog();
	}
	ret = native_kick_ap(cpu, tidle);
	if (ret)
		goto out;

	return 0;
	ret = wait_cpu_initialized(cpu);
	if (ret)
		goto out;

	wait_cpu_callin(cpu);
	wait_cpu_online(cpu);

out:
	/* Cleanup possible dangling ends... */
	if (x86_platform.legacy.warm_reset)
		smpboot_restore_warm_reset_vector();

	return ret;
}

/**