Commit 4ce94eab authored by Nadav Amit's avatar Nadav Amit Committed by Ingo Molnar
Browse files

x86/mm/tlb: Flush remote and local TLBs concurrently



To improve TLB shootdown performance, flush the remote and local TLBs
concurrently. Introduce flush_tlb_multi() that does so. Introduce
paravirtual versions of flush_tlb_multi() for KVM, Xen and hyper-v (Xen
and hyper-v are only compile-tested).

While the updated smp infrastructure is capable of running a function on
a single local core, it is not optimized for this case. The multiple
function calls and the indirect branch introduce some overhead, and
might make local TLB flushes slower than they were before the recent
changes.

Before calling the SMP infrastructure, check if only a local TLB flush
is needed to restore the lost performance in this common case. This
requires to check mm_cpumask() one more time, but unless this mask is
updated very frequently, this should impact performance negatively.

Signed-off-by: default avatarNadav Amit <namit@vmware.com>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Reviewed-by: Michael Kelley <mikelley@microsoft.com> # Hyper-v parts
Reviewed-by: Juergen Gross <jgross@suse.com> # Xen and paravirt parts
Reviewed-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/r/20210220231712.2475218-5-namit@vmware.com
parent 6035152d
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -52,7 +52,7 @@ static inline int fill_gva_list(u64 gva_list[], int offset,
	return gva_n - offset;
}

static void hyperv_flush_tlb_others(const struct cpumask *cpus,
static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
				   const struct flush_tlb_info *info)
{
	int cpu, vcpu, gva_n, max_gvas;
@@ -61,7 +61,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
	u64 status = U64_MAX;
	unsigned long flags;

	trace_hyperv_mmu_flush_tlb_others(cpus, info);
	trace_hyperv_mmu_flush_tlb_multi(cpus, info);

	if (!hv_hypercall_pg)
		goto do_native;
@@ -164,7 +164,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
	if (!(status & HV_HYPERCALL_RESULT_MASK))
		return;
do_native:
	native_flush_tlb_others(cpus, info);
	native_flush_tlb_multi(cpus, info);
}

static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
@@ -239,6 +239,6 @@ void hyperv_setup_mmu_ops(void)
		return;

	pr_info("Using hypercall for remote TLB flush\n");
	pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others;
	pv_ops.mmu.flush_tlb_multi = hyperv_flush_tlb_multi;
	pv_ops.mmu.tlb_remove_table = tlb_remove_table;
}
+3 −3
Original line number Diff line number Diff line
@@ -50,7 +50,7 @@ static inline void slow_down_io(void)
void native_flush_tlb_local(void);
void native_flush_tlb_global(void);
void native_flush_tlb_one_user(unsigned long addr);
void native_flush_tlb_others(const struct cpumask *cpumask,
void native_flush_tlb_multi(const struct cpumask *cpumask,
			     const struct flush_tlb_info *info);

static inline void __flush_tlb_local(void)
@@ -68,10 +68,10 @@ static inline void __flush_tlb_one_user(unsigned long addr)
	PVOP_VCALL1(mmu.flush_tlb_one_user, addr);
}

static inline void __flush_tlb_others(const struct cpumask *cpumask,
static inline void __flush_tlb_multi(const struct cpumask *cpumask,
				      const struct flush_tlb_info *info)
{
	PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info);
	PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
}

static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
+2 −2
Original line number Diff line number Diff line
@@ -188,7 +188,7 @@ struct pv_mmu_ops {
	void (*flush_tlb_user)(void);
	void (*flush_tlb_kernel)(void);
	void (*flush_tlb_one_user)(unsigned long addr);
	void (*flush_tlb_others)(const struct cpumask *cpus,
	void (*flush_tlb_multi)(const struct cpumask *cpus,
				const struct flush_tlb_info *info);

	void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
+2 −2
Original line number Diff line number Diff line
@@ -175,7 +175,7 @@ extern void initialize_tlbstate_and_flush(void);
 *  - flush_tlb_page(vma, vmaddr) flushes one page
 *  - flush_tlb_range(vma, start, end) flushes a range of pages
 *  - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
 *  - flush_tlb_others(cpumask, info) flushes TLBs on other cpus
 *  - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus
 *
 * ..but the i386 has somewhat limited tlb flushing capabilities,
 * and page-granular flushes are available only on i486 and up.
@@ -209,7 +209,7 @@ struct flush_tlb_info {
void flush_tlb_local(void);
void flush_tlb_one_user(unsigned long addr);
void flush_tlb_one_kernel(unsigned long addr);
void flush_tlb_others(const struct cpumask *cpumask,
void flush_tlb_multi(const struct cpumask *cpumask,
		      const struct flush_tlb_info *info);

#ifdef CONFIG_PARAVIRT
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@

#if IS_ENABLED(CONFIG_HYPERV)

TRACE_EVENT(hyperv_mmu_flush_tlb_others,
TRACE_EVENT(hyperv_mmu_flush_tlb_multi,
	    TP_PROTO(const struct cpumask *cpus,
		     const struct flush_tlb_info *info),
	    TP_ARGS(cpus, info),
Loading