Unverified Commit d25aa47f authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!6588 Add hugetlb MADV_DONTNEED support

Merge Pull Request from: @ci-robot 
 
PR sync from: Ze Zuo <zuoze1@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/YMMAHOTCHL5MMEMR4G2E56QEGR5STD2M/ 
MADV_DONTNEED is currently disabled for hugetlb mappings.  This certainly
makes sense in shared file mappings as the pagecache maintains a reference
to the page and it will never be freed.  However, it could be useful to
unmap and free pages in private mappings.

Mike Kravetz (3):
  mm: enable MADV_DONTNEED for hugetlb mappings
  madvise: use zap_page_range_single for madvise dontneed
  hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing

Rik van Riel (1):
  mm,madvise,hugetlb: fix unexpected data loss with MADV_DONTNEED on
    hugetlbfs


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I9GVYW 
 
Link:https://gitee.com/openeuler/kernel/pulls/6588

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents b8e86304 5d4b821d
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -142,7 +142,7 @@ void unmap_hugepage_range(struct vm_area_struct *,
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
			  struct vm_area_struct *vma,
			  unsigned long start, unsigned long end,
			  struct page *ref_page);
			  struct page *ref_page, zap_flags_t zap_flags);
void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
				unsigned long start, unsigned long end,
				struct page *ref_page);
@@ -371,7 +371,8 @@ static inline unsigned long hugetlb_change_protection(

static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
			struct vm_area_struct *vma, unsigned long start,
			unsigned long end, struct page *ref_page)
			unsigned long end, struct page *ref_page,
			zap_flags_t zap_flags)
{
	BUG();
}
+6 −0
Original line number Diff line number Diff line
@@ -1706,8 +1706,12 @@ struct zap_details {
	pgoff_t	first_index;			/* Lowest page->index to unmap */
	pgoff_t last_index;			/* Highest page->index to unmap */
	struct page *single_page;		/* Locked page to be unmapped */
	zap_flags_t zap_flags;			/* Extra flags for zapping */
};

/* Set in unmap_vmas() to indicate a final unmap call.  Only used by hugetlb */
#define  ZAP_FLAG_UNMAP		((__force zap_flags_t) BIT(1))

struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
			     pte_t pte);
struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
@@ -1717,6 +1721,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
		  unsigned long size);
void zap_page_range(struct vm_area_struct *vma, unsigned long address,
		    unsigned long size);
void zap_page_range_single(struct vm_area_struct *vma, unsigned long address,
		unsigned long size, struct zap_details *details);
void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
		unsigned long start, unsigned long end);

+2 −0
Original line number Diff line number Diff line
@@ -898,4 +898,6 @@ typedef struct {
	unsigned long val;
} swp_entry_t;

typedef unsigned int __bitwise zap_flags_t;

#endif /* _LINUX_MM_TYPES_H */
+16 −12
Original line number Diff line number Diff line
@@ -4512,22 +4512,26 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,

void __unmap_hugepage_range_final(struct mmu_gather *tlb,
			  struct vm_area_struct *vma, unsigned long start,
			  unsigned long end, struct page *ref_page)
			  unsigned long end, struct page *ref_page,
			  zap_flags_t zap_flags)
{
	__unmap_hugepage_range(tlb, vma, start, end, ref_page);

	if (zap_flags & ZAP_FLAG_UNMAP) {	/* final unmap */
		/*
	 * Clear this flag so that x86's huge_pmd_share page_table_shareable
	 * test will fail on a vma being torn down, and not grab a page table
	 * on its way out.  We're lucky that the flag has such an appropriate
	 * name, and can in fact be safely cleared here. We could clear it
	 * before the __unmap_hugepage_range above, but all that's necessary
		 * Clear this flag so that x86's huge_pmd_share
		 * page_table_shareable test will fail on a vma being torn
		 * down, and not grab a page table on its way out.  We're lucky
		 * that the flag has such an appropriate name, and can in fact
		 * be safely cleared here. We could clear it before the
		 * __unmap_hugepage_range above, but all that's necessary
		 * is to clear it before releasing the i_mmap_rwsem. This works
	 * because in the context this is called, the VMA is about to be
	 * destroyed and the i_mmap_rwsem is held.
		 * because in the context this is called, the VMA is about to
		 * be destroyed and the i_mmap_rwsem is held.
		 */
		vma->vm_flags &= ~VM_MAYSHARE;
	}
}

void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
			  unsigned long end, struct page *ref_page)
+42 −5
Original line number Diff line number Diff line
@@ -525,6 +525,11 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
	tlb_end_vma(tlb, vma);
}

static inline bool can_madv_lru_non_huge_vma(struct vm_area_struct *vma)
{
	return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP));
}

static long madvise_cold(struct vm_area_struct *vma,
			struct vm_area_struct **prev,
			unsigned long start_addr, unsigned long end_addr)
@@ -772,8 +777,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
 * Application no longer needs these pages.  If the pages are dirty,
 * it's OK to just throw them away.  The app will be more careful about
 * data it wants to keep.  Be sure to free swap resources too.  The
 * zap_page_range call sets things up for shrink_active_list to actually free
 * these pages later if no one else has touched them in the meantime,
 * zap_page_range_single call sets things up for shrink_active_list to actually
 * free these pages later if no one else has touched them in the meantime,
 * although we could add these pages to a global reuse list for
 * shrink_active_list to pick up before reclaiming other pages.
 *
@@ -790,10 +795,34 @@ static int madvise_free_single_vma(struct vm_area_struct *vma,
static long madvise_dontneed_single_vma(struct vm_area_struct *vma,
					unsigned long start, unsigned long end)
{
	zap_page_range(vma, start, end - start);
	zap_page_range_single(vma, start, end - start, NULL);
	return 0;
}

static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
					    unsigned long start,
					    unsigned long *end,
					    int behavior)
{
	if (!is_vm_hugetlb_page(vma))
		return can_madv_lru_non_huge_vma(vma);

	if (behavior != MADV_DONTNEED)
		return false;
	if (start & ~huge_page_mask(hstate_vma(vma)))
		return false;

	/*
	 * Madvise callers expect the length to be rounded up to PAGE_SIZE
	 * boundaries, and may be unaware that this VMA uses huge pages.
	 * Avoid unexpected data loss by rounding down the number of
	 * huge pages freed.
	 */
	*end = ALIGN_DOWN(*end, huge_page_size(hstate_vma(vma)));

	return true;
}

static long madvise_dontneed_free(struct vm_area_struct *vma,
				  struct vm_area_struct **prev,
				  unsigned long start, unsigned long end,
@@ -802,9 +831,12 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
	struct mm_struct *mm = vma->vm_mm;

	*prev = vma;
	if (!can_madv_lru_vma(vma))
	if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior))
		return -EINVAL;

	if (start == end)
		return 0;

	if (!userfaultfd_remove(vma, start, end)) {
		*prev = NULL; /* mmap_lock has been dropped, prev is stale */

@@ -824,7 +856,12 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
			 */
			return -ENOMEM;
		}
		if (!can_madv_lru_vma(vma))
		/*
		 * Potential end adjustment for hugetlb vma is OK as
		 * the check below keeps end within vma.
		 */
		if (!madvise_dontneed_free_valid_vma(vma, start, &end,
						     behavior))
			return -EINVAL;
		if (end > vma->vm_end) {
			/*
Loading