!6588 Add hugetlb MADV_DONTNEED support (d25aa47f) · Commits · EulixOS / Software / Kernel

include/linux/hugetlb.h

+3 −2

Original line number	Diff line number	Diff line
		@@ -142,7 +142,7 @@ void unmap_hugepage_range(struct vm_area_struct *,
		void __unmap_hugepage_range_final(struct mmu_gather *tlb,
		struct vm_area_struct *vma,
		unsigned long start, unsigned long end,
		struct page *ref_page);
		struct page *ref_page, zap_flags_t zap_flags);
		void __unmap_hugepage_range(struct mmu_gather tlb, struct vm_area_struct vma,
		unsigned long start, unsigned long end,
		struct page *ref_page);
		@@ -371,7 +371,8 @@ static inline unsigned long hugetlb_change_protection(

		static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
		struct vm_area_struct *vma, unsigned long start,
		unsigned long end, struct page *ref_page)
		unsigned long end, struct page *ref_page,
		zap_flags_t zap_flags)
		{
		BUG();
		}

include/linux/mm.h

+6 −0

Original line number	Diff line number	Diff line
		@@ -1706,8 +1706,12 @@ struct zap_details {
		pgoff_t first_index; /* Lowest page->index to unmap */
		pgoff_t last_index; /* Highest page->index to unmap */
		struct page single_page; / Locked page to be unmapped */
		zap_flags_t zap_flags; /* Extra flags for zapping */
		};

		/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */
		#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1))

		struct page vm_normal_page(struct vm_area_struct vma, unsigned long addr,
		pte_t pte);
		struct page vm_normal_page_pmd(struct vm_area_struct vma, unsigned long addr,
		@@ -1717,6 +1721,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
		unsigned long size);
		void zap_page_range(struct vm_area_struct *vma, unsigned long address,
		unsigned long size);
		void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
		unsigned long size, struct zap_details *details);
		void unmap_vmas(struct mmu_gather tlb, struct vm_area_struct start_vma,
		unsigned long start, unsigned long end);

include/linux/mm_types.h

+2 −0

Original line number	Diff line number	Diff line
		@@ -898,4 +898,6 @@ typedef struct {
		unsigned long val;
		} swp_entry_t;

		typedef unsigned int __bitwise zap_flags_t;

		#endif /* _LINUX_MM_TYPES_H */

mm/hugetlb.c

+16 −12

Original line number	Diff line number	Diff line
		@@ -4512,22 +4512,26 @@ void __unmap_hugepage_range(struct mmu_gather tlb, struct vm_area_struct vma,

		void __unmap_hugepage_range_final(struct mmu_gather *tlb,
		struct vm_area_struct *vma, unsigned long start,
		unsigned long end, struct page *ref_page)
		unsigned long end, struct page *ref_page,
		zap_flags_t zap_flags)
		{
		__unmap_hugepage_range(tlb, vma, start, end, ref_page);

		if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */
		/*
		* Clear this flag so that x86's huge_pmd_share page_table_shareable
		* test will fail on a vma being torn down, and not grab a page table
		* on its way out. We're lucky that the flag has such an appropriate
		* name, and can in fact be safely cleared here. We could clear it
		* before the __unmap_hugepage_range above, but all that's necessary
		* Clear this flag so that x86's huge_pmd_share
		* page_table_shareable test will fail on a vma being torn
		* down, and not grab a page table on its way out. We're lucky
		* that the flag has such an appropriate name, and can in fact
		* be safely cleared here. We could clear it before the
		* __unmap_hugepage_range above, but all that's necessary
		* is to clear it before releasing the i_mmap_rwsem. This works
		* because in the context this is called, the VMA is about to be
		* destroyed and the i_mmap_rwsem is held.
		* because in the context this is called, the VMA is about to
		* be destroyed and the i_mmap_rwsem is held.
		*/
		vma->vm_flags &= ~VM_MAYSHARE;
		}
		}

		void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
		unsigned long end, struct page *ref_page)

mm/madvise.c

+42 −5

Original line number	Diff line number	Diff line
		@@ -525,6 +525,11 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
		tlb_end_vma(tlb, vma);
		}

		static inline bool can_madv_lru_non_huge_vma(struct vm_area_struct *vma)
		{
		return !(vma->vm_flags & (VM_LOCKED\|VM_PFNMAP));
		}

		static long madvise_cold(struct vm_area_struct *vma,
		struct vm_area_struct **prev,
		unsigned long start_addr, unsigned long end_addr)
		@@ -772,8 +777,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
		* Application no longer needs these pages. If the pages are dirty,
		* it's OK to just throw them away. The app will be more careful about
		* data it wants to keep. Be sure to free swap resources too. The
		* zap_page_range call sets things up for shrink_active_list to actually free
		* these pages later if no one else has touched them in the meantime,
		* zap_page_range_single call sets things up for shrink_active_list to actually
		* free these pages later if no one else has touched them in the meantime,
		* although we could add these pages to a global reuse list for
		* shrink_active_list to pick up before reclaiming other pages.
		*
		@@ -790,10 +795,34 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
		static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
		unsigned long start, unsigned long end)
		{
		zap_page_range(vma, start, end - start);
		zap_page_range_single(vma, start, end - start, NULL);
		return 0;
		}

		static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
		unsigned long start,
		unsigned long *end,
		int behavior)
		{
		if (!is_vm_hugetlb_page(vma))
		return can_madv_lru_non_huge_vma(vma);

		if (behavior != MADV_DONTNEED)
		return false;
		if (start & ~huge_page_mask(hstate_vma(vma)))
		return false;

		/*
		* Madvise callers expect the length to be rounded up to PAGE_SIZE
		* boundaries, and may be unaware that this VMA uses huge pages.
		* Avoid unexpected data loss by rounding down the number of
		* huge pages freed.
		*/
		end = ALIGN_DOWN(end, huge_page_size(hstate_vma(vma)));

		return true;
		}

		static long madvise_dontneed_free(struct vm_area_struct *vma,
		struct vm_area_struct **prev,
		unsigned long start, unsigned long end,
		@@ -802,9 +831,12 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
		struct mm_struct *mm = vma->vm_mm;

		*prev = vma;
		if (!can_madv_lru_vma(vma))
		if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior))
		return -EINVAL;

		if (start == end)
		return 0;

		if (!userfaultfd_remove(vma, start, end)) {
		prev = NULL; / mmap_lock has been dropped, prev is stale */

		@@ -824,7 +856,12 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
		*/
		return -ENOMEM;
		}
		if (!can_madv_lru_vma(vma))
		/*
		* Potential end adjustment for hugetlb vma is OK as
		* the check below keeps end within vma.
		*/
		if (!madvise_dontneed_free_valid_vma(vma, start, &end,
		behavior))
		return -EINVAL;
		if (end > vma->vm_end) {
		/*