Commit ebfca9b4 authored by zhaolichang's avatar zhaolichang
Browse files

tlbi: Do not force the broadcasting of TLBI and ICache, and add TLB flush helpers based on IPI.

kunpeng inclusion
category:feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4BLL0



--------------------------------

adds new boot parameter 'disable_tlbflush_is' to disable TLB flush
within the same inner shareable domain for performance tuning.

When this new parameter is specified, TLB entry is invalidated by
__tlbi(aside1, asid) only on the CPUs specified by mm_cpumask(mm).

By using TLB.IS, all CPUs within the same inner shareable domain
check if there are TLB entries which have this ASID, this causes
performance noise, especially at large-scale HPC environment, which
has more than thousand nodes with low latency interconnect.

Several control bits are added to control different flush path.

mm: use tlb invalidation ipi for flush_tlb_mm
page: use tlb invalidation ipi for flush_tlb_page
switch: don't local_flush_tlb_mm when switch_mm

Signed-off-by: default avatarzhaolichang <zhaolichang@huawei.com>
parent 76dc5383
Loading
Loading
Loading
Loading
+28 −0
Original line number Diff line number Diff line
@@ -2672,6 +2672,34 @@ config ARCH_CPUIDLE_HALTPOLL

endmenu # "Power management options"

menu "TLB options"

config ARM64_TLBI_IPI
	bool "IPI based ARM64 TLB invalidation(EXPERIMENTAL)"
	depends on ARM64
	default n
	help
	  adds new boot parameter 'disable_tlbflush_is' to disable TLB flush
	  within the same inner shareable domain for performance tuning.

	  When this new parameter is specified, TLB entry is invalidated by
	  __tlbi(aside1, asid) only on the CPUs specified by mm_cpumask(mm).

	  By using TLB.IS, all CPUs within the same inner shareable domain
	  check if there are TLB entries which have this ASID, this causes
	  performance noise, especially at large-scale HPC environment, which
	  has more than thousand nodes with low latency interconnect.

	  NOTE(Important)
	  This feature is used for learning and debugging only. Please don't
	  enable it on commercial products.
	  If you know exactly what the impact of the feature is, you can
	  configure it as you do.

	  If unsure, say N.

endmenu

menu "CPU Power Management"

source "drivers/cpuidle/Kconfig"
+5 −0
Original line number Diff line number Diff line
@@ -632,6 +632,11 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
CONFIG_ARCH_CPUIDLE_HALTPOLL=y
# end of Power management options

#
# TLB options
#
# CONFIG_ARM64_TLBI_IPI is not set

#
# CPU Power Management
#
+8 −1
Original line number Diff line number Diff line
@@ -265,8 +265,15 @@ static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
	  struct task_struct *tsk)
{
	if (prev != next)
	if (prev != next) {
		__switch_mm(next);
#ifdef CONFIG_ARM64_TLBI_IPI
		if (unlikely(test_tlbi_ipi_switch())) {
			cpumask_clear_cpu(smp_processor_id(), mm_cpumask(prev));
			local_flush_tlb_mm(prev);
		}
#endif
	}

	/*
	 * Update the saved TTBR0_EL1 of the scheduled-in task as the previous
+4 −0
Original line number Diff line number Diff line
@@ -946,7 +946,11 @@ static inline int __ptep_clear_flush_young(struct vm_area_struct *vma,
		 * context-switch, which provides a DSB to complete the TLB
		 * invalidation.
		 */
#ifdef CONFIG_ARM64_TLBI_IPI
		flush_tlb_page_nosync_ipi(vma, address);
#else
		flush_tlb_page_nosync(vma, address);
#endif
	}

	return young;
+101 −0
Original line number Diff line number Diff line
@@ -16,6 +16,8 @@
#include <linux/mmu_notifier.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
#include <linux/smp.h>
#include <linux/ctype.h>

/*
 * Raw TLBI operations.
@@ -250,6 +252,100 @@ static inline void flush_tlb_all(void)
	isb();
}

#ifdef CONFIG_ARM64_TLBI_IPI
static unsigned int disable_tlbflush_is;

#define FLAG_TLBFLUSH_PAGE      0x0002
#define FLAG_TLBFLUSH_SWITCH    0x0004
#define FLAG_TLBFLUSH_MM        0x0008

#define TEST_TLBFLUSH_FLAG_EXTERN(flag, FLAG)                   \
bool test_tlbi_ipi_##flag(void)                                 \
{                                                               \
    return !!(disable_tlbflush_is & FLAG_TLBFLUSH_##FLAG);  \
}

#define TEST_TLBFLUSH_FLAG(flag, FLAG)                          \
static __always_inline TEST_TLBFLUSH_FLAG_EXTERN(flag, FLAG)

TEST_TLBFLUSH_FLAG(mm, MM)
TEST_TLBFLUSH_FLAG(page, PAGE)
TEST_TLBFLUSH_FLAG(switch, SWITCH)

static inline void local_flush_tlb_mm(struct mm_struct *mm)
{
    unsigned long asid = __TLBI_VADDR(0, ASID(mm));

    dsb(nshst);
    __tlbi(aside1, asid);
    __tlbi_user(aside1, asid);
    dsb(nsh);
}

static inline void __flush_tlb_mm(struct mm_struct *mm)
{
    unsigned long asid = __TLBI_VADDR(0, ASID(mm));

    dsb(ishst);
    __tlbi(aside1is, asid);
    __tlbi_user(aside1is, asid);
    dsb(ish);
}

static inline void ipi_flush_tlb_mm(void *arg)
{
    struct mm_struct *mm = arg;

    local_flush_tlb_mm(mm);
}

static inline void flush_tlb_mm(struct mm_struct *mm)
{
    if (unlikely(test_tlbi_ipi_mm()))
		on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm,
				(void *)mm, true);
    else
		__flush_tlb_mm(mm);
    mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}

static inline void __flush_tlb_page_nosync_ipi(unsigned long addr)
{
    dsb(ishst);
    __tlbi(vale1is, addr);
    __tlbi_user(vale1is, addr);
}

static inline void __local_flush_tlb_page_nosync(unsigned long addr)
{
    dsb(nshst);
    __tlbi(vale1, addr);
    __tlbi_user(vale1, addr);
    dsb(nsh);
}

static inline void ipi_flush_tlb_page_nosync(void *arg)
{
    unsigned long addr = *(unsigned long *)arg;

    __local_flush_tlb_page_nosync(addr);
}

static inline void flush_tlb_page_nosync_ipi(struct vm_area_struct *vma, unsigned long uaddr)
{
    unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));

    if (unlikely(test_tlbi_ipi_page()))
		on_each_cpu_mask(mm_cpumask(vma->vm_mm),
				ipi_flush_tlb_page_nosync, &addr, true);
    else
		__flush_tlb_page_nosync_ipi(addr);
    mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, uaddr & PAGE_MASK,
					(uaddr & PAGE_MASK) + PAGE_SIZE);
}

#else /* CONFIG_ARM64_TLBI_IPI */

static inline void flush_tlb_mm(struct mm_struct *mm)
{
	unsigned long asid;
@@ -261,6 +357,7 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
	dsb(ish);
	mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
#endif /* CONFIG_ARM64_TLBI_IPI */

static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
					   unsigned long uaddr)
@@ -284,7 +381,11 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
static inline void flush_tlb_page(struct vm_area_struct *vma,
				  unsigned long uaddr)
{
#ifdef CONFIG_ARM64_TLBI_IPI
	flush_tlb_page_nosync_ipi(vma, uaddr);
#else
	flush_tlb_page_nosync(vma, uaddr);
#endif
	dsb(ish);
}

Loading