!1852 arm64: support batched/deferred tlb shootdown during page reclamation/migration (06bae869) · Commits · EulixOS / Software / Kernel

Documentation/features/vm/TLB/arch-support.txt

+1 −1

Original line number	Diff line number	Diff line
		@@ -9,7 +9,7 @@
		\| alpha: \| TODO \|
		\| arc: \| TODO \|
		\| arm: \| TODO \|
		\| arm64: \| TODO \|
		\| arm64: \| ok \|
		\| c6x: \| .. \|
		\| csky: \| TODO \|
		\| h8300: \| .. \|

arch/arm64/Kconfig

+1 −0

Original line number	Diff line number	Diff line
		@@ -78,6 +78,7 @@ config ARM64
		select ARCH_SUPPORTS_ATOMIC_RMW
		select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 \|\| CC_IS_CLANG)
		select ARCH_SUPPORTS_NUMA_BALANCING
		select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
		select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
		select ARCH_WANT_DEFAULT_BPF_JIT
		select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT

arch/arm64/include/asm/tlbbatch.h

0 → 100644

+12 −0

Original line number	Diff line number	Diff line
		/* SPDX-License-Identifier: GPL-2.0 */
		#ifndef _ARCH_ARM64_TLBBATCH_H
		#define _ARCH_ARM64_TLBBATCH_H

		struct arch_tlbflush_unmap_batch {
		/*
		* For arm64, HW can do tlb shootdown, so we don't
		* need to record cpumask for sending IPI
		*/
		};

		#endif /* _ARCH_ARM64_TLBBATCH_H */

arch/arm64/include/asm/tlbflush.h

+41 −3

Original line number	Diff line number	Diff line
		@@ -254,17 +254,23 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
		dsb(ish);
		}

		static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
		static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
		unsigned long uaddr)
		{
		unsigned long addr;

		dsb(ishst);
		addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
		addr = __TLBI_VADDR(uaddr, ASID(mm));
		__tlbi(vale1is, addr);
		__tlbi_user(vale1is, addr);
		}

		static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
		unsigned long uaddr)
		{
		return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
		}

		static inline void flush_tlb_page(struct vm_area_struct *vma,
		unsigned long uaddr)
		{
		@@ -272,6 +278,38 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
		dsb(ish);
		}

		static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
		{
		#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
		/*
		* TLB flush deferral is not required on systems which are affected by
		* ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
		* will have two consecutive TLBI instructions with a dsb(ish) in between
		* defeating the purpose (i.e save overall 'dsb ish' cost).
		*/
		if (unlikely(cpus_have_const_cap(ARM64_WORKAROUND_REPEAT_TLBI)))
		return false;
		#endif
		return true;
		}

		static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
		struct mm_struct *mm,
		unsigned long uaddr)
		{
		__flush_tlb_page_nosync(mm, uaddr);
		}

		static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
		{
		dsb(ish);
		}

		static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
		{
		dsb(ish);
		}

		/*
		* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
		* necessarily a performance improvement.

arch/x86/include/asm/tlbflush.h

+20 −2

Original line number	Diff line number	Diff line
		@@ -239,6 +239,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
		flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
		}

		static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
		{
		bool should_defer = false;

		/* If remote CPUs need to be flushed then defer batch the flush */
		if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
		should_defer = true;
		put_cpu();

		return should_defer;
		}

		static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
		{
		/*
		@@ -250,13 +262,19 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
		return atomic64_inc_return(&mm->context.tlb_gen);
		}

		static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
		struct mm_struct *mm)
		static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
		struct mm_struct *mm,
		unsigned long uaddr)
		{
		inc_mm_tlb_gen(mm);
		cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
		}

		static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
		{
		flush_tlb_mm(mm);
		}

		extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);

		#endif /* !MODULE */

Original line number	Diff line number	Diff line
		@@ -9,7 +9,7 @@
		\| alpha: \| TODO \|
		\| arc: \| TODO \|
		\| arm: \| TODO \|
		\| arm64: \| TODO \|
		\| arm64: \| ok \|
		\| c6x: \| .. \|
		\| csky: \| TODO \|
		\| h8300: \| .. \|