mm/munlock: mlock_pte_range() when mlocking or munlocking (34b67923) · Commits · EulixOS / Software / Kernel

mm/internal.h

+2 −1

Original line number	Diff line number	Diff line
		@@ -412,7 +412,8 @@ void mlock_page(struct page *page);
		static inline void mlock_vma_page(struct page *page,
		struct vm_area_struct *vma, bool compound)
		{
		if (unlikely(vma->vm_flags & VM_LOCKED) &&
		/* VM_IO check prevents migration from double-counting during mlock */
		if (unlikely((vma->vm_flags & (VM_LOCKED\|VM_IO)) == VM_LOCKED) &&
		(compound \|\| !PageTransCompound(page)))
		mlock_page(page);
		}

mm/mlock.c

+89 −22

Original line number	Diff line number	Diff line
		@@ -14,6 +14,7 @@
		#include <linux/swapops.h>
		#include <linux/pagemap.h>
		#include <linux/pagevec.h>
		#include <linux/pagewalk.h>
		#include <linux/mempolicy.h>
		#include <linux/syscalls.h>
		#include <linux/sched.h>
		@@ -127,25 +128,91 @@ void munlock_page(struct page *page)
		unlock_page_memcg(page);
		}

		static int mlock_pte_range(pmd_t *pmd, unsigned long addr,
		unsigned long end, struct mm_walk *walk)

		{
		struct vm_area_struct *vma = walk->vma;
		spinlock_t *ptl;
		pte_t start_pte, pte;
		struct page *page;

		ptl = pmd_trans_huge_lock(pmd, vma);
		if (ptl) {
		if (!pmd_present(*pmd))
		goto out;
		if (is_huge_zero_pmd(*pmd))
		goto out;
		page = pmd_page(*pmd);
		if (vma->vm_flags & VM_LOCKED)
		mlock_page(page);
		else
		munlock_page(page);
		goto out;
		}

		start_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
		for (pte = start_pte; addr != end; pte++, addr += PAGE_SIZE) {
		if (!pte_present(*pte))
		continue;
		page = vm_normal_page(vma, addr, *pte);
		if (!page)
		continue;
		if (PageTransCompound(page))
		continue;
		if (vma->vm_flags & VM_LOCKED)
		mlock_page(page);
		else
		munlock_page(page);
		}
		pte_unmap(start_pte);
		out:
		spin_unlock(ptl);
		cond_resched();
		return 0;
		}

		/*
		* munlock_vma_pages_range() - munlock all pages in the vma range.'
		* @vma - vma containing range to be munlock()ed.
		* mlock_vma_pages_range() - mlock any pages already in the range,
		* or munlock all pages in the range.
		* @vma - vma containing range to be mlock()ed or munlock()ed
		* @start - start address in @vma of the range
		* @end - end of range in @vma.
		*
		* For mremap(), munmap() and exit().
		* @end - end of range in @vma
		* @newflags - the new set of flags for @vma.
		*
		* Called with @vma VM_LOCKED.
		*
		* Returns with VM_LOCKED cleared. Callers must be prepared to
		* deal with this.
		* Called for mlock(), mlock2() and mlockall(), to set @vma VM_LOCKED;
		* called for munlock() and munlockall(), to clear VM_LOCKED from @vma.
		*/
		static void munlock_vma_pages_range(struct vm_area_struct *vma,
		unsigned long start, unsigned long end)
		static void mlock_vma_pages_range(struct vm_area_struct *vma,
		unsigned long start, unsigned long end, vm_flags_t newflags)
		{
		vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
		static const struct mm_walk_ops mlock_walk_ops = {
		.pmd_entry = mlock_pte_range,
		};

		/*
		* There is a slight chance that concurrent page migration,
		* or page reclaim finding a page of this now-VM_LOCKED vma,
		* will call mlock_vma_page() and raise page's mlock_count:
		* double counting, leaving the page unevictable indefinitely.
		* Communicate this danger to mlock_vma_page() with VM_IO,
		* which is a VM_SPECIAL flag not allowed on VM_LOCKED vmas.
		* mmap_lock is held in write mode here, so this weird
		* combination should not be visible to other mmap_lock users;
		* but WRITE_ONCE so rmap walkers must see VM_IO if VM_LOCKED.
		*/
		if (newflags & VM_LOCKED)
		newflags \|= VM_IO;
		WRITE_ONCE(vma->vm_flags, newflags);

		/* Reimplementation to follow in later commit */
		lru_add_drain();
		walk_page_range(vma->vm_mm, start, end, &mlock_walk_ops, NULL);
		lru_add_drain();

		if (newflags & VM_IO) {
		newflags &= ~VM_IO;
		WRITE_ONCE(vma->vm_flags, newflags);
		}
		}

		/*
		@@ -164,10 +231,9 @@ static int mlock_fixup(struct vm_area_struct vma, struct vm_area_struct *prev,
		pgoff_t pgoff;
		int nr_pages;
		int ret = 0;
		int lock = !!(newflags & VM_LOCKED);
		vm_flags_t old_flags = vma->vm_flags;
		vm_flags_t oldflags = vma->vm_flags;

		if (newflags == vma->vm_flags \|\| (vma->vm_flags & VM_SPECIAL) \|\|
		if (newflags == oldflags \|\| (oldflags & VM_SPECIAL) \|\|
		is_vm_hugetlb_page(vma) \|\| vma == get_gate_vma(current->mm) \|\|
		vma_is_dax(vma) \|\| vma_is_secretmem(vma))
		/* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */
		@@ -199,9 +265,9 @@ static int mlock_fixup(struct vm_area_struct vma, struct vm_area_struct *prev,
		* Keep track of amount of locked VM.
		*/
		nr_pages = (end - start) >> PAGE_SHIFT;
		if (!lock)
		if (!(newflags & VM_LOCKED))
		nr_pages = -nr_pages;
		else if (old_flags & VM_LOCKED)
		else if (oldflags & VM_LOCKED)
		nr_pages = 0;
		mm->locked_vm += nr_pages;

		@@ -211,11 +277,12 @@ static int mlock_fixup(struct vm_area_struct vma, struct vm_area_struct *prev,
		* set VM_LOCKED, populate_vma_page_range will bring it back.
		*/

		if (lock)
		if ((newflags & VM_LOCKED) && (oldflags & VM_LOCKED)) {
		/* No work to do, and mlocking twice would be wrong */
		vma->vm_flags = newflags;
		else
		munlock_vma_pages_range(vma, start, end);

		} else {
		mlock_vma_pages_range(vma, start, end, newflags);
		}
		out:
		*prev = vma;
		return ret;