mm, hugetlb: fix racy resv_huge_pages underflow on UFFDIO_COPY (8cc5fcbb) · Commits · EulixOS / Software / Kernel

include/linux/migrate.h

+4 −0

Original line number	Diff line number	Diff line
		@@ -51,6 +51,7 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping,
		struct page newpage, struct page page);
		extern int migrate_page_move_mapping(struct address_space *mapping,
		struct page newpage, struct page page, int extra_count);
		extern void copy_huge_page(struct page dst, struct page src);
		#else

		static inline void putback_movable_pages(struct list_head *l) {}
		@@ -77,6 +78,9 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
		return -ENOSYS;
		}

		static inline void copy_huge_page(struct page dst, struct page src)
		{
		}
		#endif /* CONFIG_MIGRATION */

		#ifdef CONFIG_COMPACTION

mm/hugetlb.c

+39 −9

Original line number	Diff line number	Diff line
		@@ -30,6 +30,7 @@
		#include <linux/numa.h>
		#include <linux/llist.h>
		#include <linux/cma.h>
		#include <linux/migrate.h>

		#include <asm/page.h>
		#include <asm/pgalloc.h>
		@@ -5076,20 +5077,17 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
		struct page **pagep)
		{
		bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
		struct address_space *mapping;
		pgoff_t idx;
		struct hstate *h = hstate_vma(dst_vma);
		struct address_space *mapping = dst_vma->vm_file->f_mapping;
		pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
		unsigned long size;
		int vm_shared = dst_vma->vm_flags & VM_SHARED;
		struct hstate *h = hstate_vma(dst_vma);
		pte_t _dst_pte;
		spinlock_t *ptl;
		int ret;
		int ret = -ENOMEM;
		struct page *page;
		int writable;

		mapping = dst_vma->vm_file->f_mapping;
		idx = vma_hugecache_offset(h, dst_vma, dst_addr);

		if (is_continue) {
		ret = -EFAULT;
		page = find_lock_page(mapping, idx);
		@@ -5118,12 +5116,44 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
		/* fallback to copy_from_user outside mmap_lock */
		if (unlikely(ret)) {
		ret = -ENOENT;
		/* Free the allocated page which may have
		* consumed a reservation.
		*/
		restore_reserve_on_error(h, dst_vma, dst_addr, page);
		put_page(page);

		/* Allocate a temporary page to hold the copied
		* contents.
		*/
		page = alloc_huge_page_vma(h, dst_vma, dst_addr);
		if (!page) {
		ret = -ENOMEM;
		goto out;
		}
		*pagep = page;
		/* don't free the page */
		/* Set the outparam pagep and return to the caller to
		* copy the contents outside the lock. Don't free the
		* page.
		*/
		goto out;
		}
		} else {
		page = *pagep;
		if (vm_shared &&
		hugetlbfs_pagecache_present(h, dst_vma, dst_addr)) {
		put_page(*pagep);
		ret = -EEXIST;
		*pagep = NULL;
		goto out;
		}

		page = alloc_huge_page(dst_vma, dst_addr, 0);
		if (IS_ERR(page)) {
		ret = -ENOMEM;
		*pagep = NULL;
		goto out;
		}
		copy_huge_page(page, *pagep);
		put_page(*pagep);
		*pagep = NULL;
		}

mm/migrate.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -553,7 +553,7 @@ static void __copy_gigantic_page(struct page dst, struct page src,
		}
		}

		static void copy_huge_page(struct page dst, struct page src)
		void copy_huge_page(struct page dst, struct page src)
		{
		int i;
		int nr_pages;

mm/userfaultfd.c

+1 −49

Original line number	Diff line number	Diff line
		@@ -209,7 +209,6 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
		unsigned long len,
		enum mcopy_atomic_mode mode)
		{
		int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
		int vm_shared = dst_vma->vm_flags & VM_SHARED;
		ssize_t err;
		pte_t *dst_pte;
		@@ -308,7 +307,6 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,

		mutex_unlock(&hugetlb_fault_mutex_table[hash]);
		i_mmap_unlock_read(mapping);
		vm_alloc_shared = vm_shared;

		cond_resched();

		@@ -346,54 +344,8 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm,
		out_unlock:
		mmap_read_unlock(dst_mm);
		out:
		if (page) {
		/*
		* We encountered an error and are about to free a newly
		* allocated huge page.
		*
		* Reservation handling is very subtle, and is different for
		* private and shared mappings. See the routine
		* restore_reserve_on_error for details. Unfortunately, we
		* can not call restore_reserve_on_error now as it would
		* require holding mmap_lock.
		*
		* If a reservation for the page existed in the reservation
		* map of a private mapping, the map was modified to indicate
		* the reservation was consumed when the page was allocated.
		* We clear the HPageRestoreReserve flag now so that the global
		* reserve count will not be incremented in free_huge_page.
		* The reservation map will still indicate the reservation
		* was consumed and possibly prevent later page allocation.
		* This is better than leaking a global reservation. If no
		* reservation existed, it is still safe to clear
		* HPageRestoreReserve as no adjustments to reservation counts
		* were made during allocation.
		*
		* The reservation map for shared mappings indicates which
		* pages have reservations. When a huge page is allocated
		* for an address with a reservation, no change is made to
		* the reserve map. In this case HPageRestoreReserve will be
		* set to indicate that the global reservation count should be
		* incremented when the page is freed. This is the desired
		* behavior. However, when a huge page is allocated for an
		* address without a reservation a reservation entry is added
		* to the reservation map, and HPageRestoreReserve will not be
		* set. When the page is freed, the global reserve count will
		* NOT be incremented and it will appear as though we have
		* leaked reserved page. In this case, set HPageRestoreReserve
		* so that the global reserve count will be incremented to
		* match the reservation map entry which was created.
		*
		* Note that vm_alloc_shared is based on the flags of the vma
		* for which the page was originally allocated. dst_vma could
		* be different or NULL on error.
		*/
		if (vm_alloc_shared)
		SetHPageRestoreReserve(page);
		else
		ClearHPageRestoreReserve(page);
		if (page)
		put_page(page);
		}
		BUG_ON(copied < 0);
		BUG_ON(err > 0);
		BUG_ON(!copied && !err);