Commit e57ef2ed authored by Thomas Gleixner's avatar Thomas Gleixner Committed by Peter Zijlstra
Browse files

x86: Put hot per CPU variables into a struct



The layout of per-cpu variables is at the mercy of the compiler. This
can lead to random performance fluctuations from build to build.

Create a structure to hold some of the hottest per-cpu variables,
starting with current_task.

Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.179707194@infradead.org
parent fdc9ee7e
Loading
Loading
Loading
Loading
+16 −3
Original line number Diff line number Diff line
@@ -3,16 +3,29 @@
#define _ASM_X86_CURRENT_H

#include <linux/compiler.h>
#include <asm/percpu.h>

#ifndef __ASSEMBLY__

#include <linux/cache.h>
#include <asm/percpu.h>

struct task_struct;

DECLARE_PER_CPU(struct task_struct *, current_task);
struct pcpu_hot {
	union {
		struct {
			struct task_struct	*current_task;
		};
		u8	pad[64];
	};
};
static_assert(sizeof(struct pcpu_hot) == 64);

DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);

static __always_inline struct task_struct *get_current(void)
{
	return this_cpu_read_stable(current_task);
	return this_cpu_read_stable(pcpu_hot.current_task);
}

#define current get_current()
+5 −9
Original line number Diff line number Diff line
@@ -2012,18 +2012,16 @@ static __init int setup_clearcpuid(char *arg)
}
__setup("clearcpuid=", setup_clearcpuid);

DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
	.current_task	= &init_task,
};
EXPORT_PER_CPU_SYMBOL(pcpu_hot);

#ifdef CONFIG_X86_64
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);

/*
 * The following percpu variables are hot.  Align current_task to
 * cacheline size such that they fall in the same cacheline.
 */
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
	&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);

DEFINE_PER_CPU(void *, hardirq_stack_ptr);
DEFINE_PER_CPU(bool, hardirq_stack_inuse);
@@ -2083,8 +2081,6 @@ void syscall_init(void)

#else	/* CONFIG_X86_64 */

DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);

+1 −1
Original line number Diff line number Diff line
@@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
	if (prev->gs | next->gs)
		loadsegment(gs, next->gs);

	this_cpu_write(current_task, next_p);
	raw_cpu_write(pcpu_hot.current_task, next_p);

	switch_fpu_finish();

+1 −1
Original line number Diff line number Diff line
@@ -617,7 +617,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
	/*
	 * Switch the PDA and FPU contexts.
	 */
	this_cpu_write(current_task, next_p);
	raw_cpu_write(pcpu_hot.current_task, next_p);
	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));

	switch_fpu_finish();
+1 −1
Original line number Diff line number Diff line
@@ -1046,7 +1046,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
	/* Just in case we booted with a single CPU. */
	alternatives_enable_smp();

	per_cpu(current_task, cpu) = idle;
	per_cpu(pcpu_hot.current_task, cpu) = idle;
	cpu_init_stack_canary(cpu, idle);

	/* Initialize the interrupt stack(s) */