Commit 635de956 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull x86 tlb updates from Ingo Molnar:
 "The x86 MM changes in this cycle were:

   - Implement concurrent TLB flushes, which overlaps the local TLB
     flush with the remote TLB flush.

     In testing this improved sysbench performance measurably by a
     couple of percentage points, especially if TLB-heavy security
     mitigations are active.

   - Further micro-optimizations to improve the performance of TLB
     flushes"

* tag 'x86-mm-2021-04-29' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  smp: Micro-optimize smp_call_function_many_cond()
  smp: Inline on_each_cpu_cond() and on_each_cpu()
  x86/mm/tlb: Remove unnecessary uses of the inline keyword
  cpumask: Mark functions as pure
  x86/mm/tlb: Do not make is_lazy dirty for no reason
  x86/mm/tlb: Privatize cpu_tlbstate
  x86/mm/tlb: Flush remote and local TLBs concurrently
  x86/mm/tlb: Open-code on_each_cpu_cond_mask() for tlb_is_not_lazy()
  x86/mm/tlb: Unify flush_tlb_func_local() and flush_tlb_func_remote()
  smp: Run functions concurrently in smp_call_function_many_cond()
parents d0cc7eca a500fc91
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
	return gva_n - offset;
}

static void hyperv_flush_tlb_others(const struct cpumask *cpus,
static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
				   const struct flush_tlb_info *info)
{
	int cpu, vcpu, gva_n, max_gvas;
@@ -61,7 +61,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
	u64 status;
	unsigned long flags;

	trace_hyperv_mmu_flush_tlb_others(cpus, info);
	trace_hyperv_mmu_flush_tlb_multi(cpus, info);

	if (!hv_hypercall_pg)
		goto do_native;
@@ -164,7 +164,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
	if (hv_result_success(status))
		return;
do_native:
	native_flush_tlb_others(cpus, info);
	native_flush_tlb_multi(cpus, info);
}

static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
@@ -239,6 +239,6 @@ void hyperv_setup_mmu_ops(void)
		return;

	pr_info("Using hypercall for remote TLB flush\n");
	pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
}
+3 −3
Original line number Diff line number Diff line
@@ -63,7 +63,7 @@ static inline void slow_down_io(void)
void native_flush_tlb_local(void);
void native_flush_tlb_global(void);
void native_flush_tlb_one_user(unsigned long addr);
void native_flush_tlb_others(const struct cpumask *cpumask,
void native_flush_tlb_multi(const struct cpumask *cpumask,
			     const struct flush_tlb_info *info);

static inline void __flush_tlb_local(void)
@@ -81,10 +81,10 @@ static inline void __flush_tlb_one_user(unsigned long addr)
	PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
}

static inline void __flush_tlb_others(const struct cpumask *cpumask,
static inline void __flush_tlb_multi(const struct cpumask *cpumask,
				      const struct flush_tlb_info *info)
{
	PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
	PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
}

static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+2 −2
Original line number Diff line number Diff line
@@ -161,7 +161,7 @@ struct pv_mmu_ops {
	void (*flush_tlb_user)(void);
	void (*flush_tlb_kernel)(void);
	void (*flush_tlb_one_user)(unsigned long addr);
	void (*flush_tlb_others)(const struct cpumask *cpus,
	void (*flush_tlb_multi)(const struct cpumask *cpus,
				const struct flush_tlb_info *info);

	void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+26 −22
Original line number Diff line number Diff line
@@ -89,23 +89,6 @@ struct tlb_state {
	u16 loaded_mm_asid;
	u16 next_asid;

	/*
	 * We can be in one of several states:
	 *
	 *  - Actively using an mm.  Our CPU's bit will be set in
	 *    mm_cpumask(loaded_mm) and is_lazy == false;
	 *
	 *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
	 *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
	 *
	 *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
	 *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
	 *    We're heuristically guessing that the CR3 load we
	 *    skipped more than makes up for the overhead added by
	 *    lazy mode.
	 */
	bool is_lazy;

	/*
	 * If set we changed the page tables in such a way that we
	 * needed an invalidation of all contexts (aka. PCIDs / ASIDs).
@@ -151,7 +134,27 @@ struct tlb_state {
	 */
	struct tlb_context ctxs[TLB_NR_DYN_ASIDS];
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate);

struct tlb_state_shared {
	/*
	 * We can be in one of several states:
	 *
	 *  - Actively using an mm.  Our CPU's bit will be set in
	 *    mm_cpumask(loaded_mm) and is_lazy == false;
	 *
	 *  - Not using a real mm.  loaded_mm == &init_mm.  Our CPU's bit
	 *    will not be set in mm_cpumask(&init_mm) and is_lazy == false.
	 *
	 *  - Lazily using a real mm.  loaded_mm != &init_mm, our bit
	 *    is set in mm_cpumask(loaded_mm), but is_lazy == true.
	 *    We're heuristically guessing that the CR3 load we
	 *    skipped more than makes up for the overhead added by
	 *    lazy mode.
	 */
	bool is_lazy;
};
DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);

bool nmi_uaccess_okay(void);
#define nmi_uaccess_okay nmi_uaccess_okay
@@ -175,7 +178,7 @@ extern void initialize_tlbstate_and_flush(void);
 *  - flush_tlb_page(vma, vmaddr) flushes one page
 *  - flush_tlb_range(vma, start, end) flushes a range of pages
 *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
 *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
 *  - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
 *
 * ..but the i386 has somewhat limited tlb flushing capabilities,
 * and page-granular flushes are available only on i486 and up.
@@ -201,14 +204,15 @@ struct flush_tlb_info {
	unsigned long		start;
	unsigned long		end;
	u64			new_tlb_gen;
	unsigned int		stride_shift;
	bool			freed_tables;
	unsigned int		initiating_cpu;
	u8			stride_shift;
	u8			freed_tables;
};

void flush_tlb_local(void);
void flush_tlb_one_user(unsigned long addr);
void flush_tlb_one_kernel(unsigned long addr);
void flush_tlb_others(const struct cpumask *cpumask,
void flush_tlb_multi(const struct cpumask *cpumask,
		      const struct flush_tlb_info *info);

#ifdef CONFIG_PARAVIRT
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@

#if IS_ENABLED(CONFIG_HYPERV)

TRACE_EVENT(hyperv_mmu_flush_tlb_others,
TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
	    TP_PROTO(const struct cpumask *cpus,
		     const struct flush_tlb_info *info),
	    TP_ARGS(cpus, info),
Loading