Commit 66850be5 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton
Browse files

mm/mempolicy: use vma iterator & maple state instead of vma linked list

Reworked the way mbind_range() finds the first VMA to reuse the maple
state and limit the number of tree walks needed.

Note, this drops the VM_BUG_ON(!vma) call, which would catch a start
address higher than the last VMA.  The code was written in a way that
allowed no VMA updates to occur and still return success.  There should be
no functional change to this scenario with the new code.

Link: https://lkml.kernel.org/r/20220906194824.2110408-57-Liam.Howlett@oracle.com


Signed-off-by: default avatarLiam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: default avatarYu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent ba0aff8e
Loading
Loading
Loading
Loading
+32 −24
Original line number Diff line number Diff line
@@ -381,9 +381,10 @@ void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new)
void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
	struct vm_area_struct *vma;
	VMA_ITERATOR(vmi, mm, 0);

	mmap_write_lock(mm);
	for (vma = mm->mmap; vma; vma = vma->vm_next)
	for_each_vma(vmi, vma)
		mpol_rebind_policy(vma->vm_policy, new);
	mmap_write_unlock(mm);
}
@@ -654,7 +655,7 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
static int queue_pages_test_walk(unsigned long start, unsigned long end,
				struct mm_walk *walk)
{
	struct vm_area_struct *vma = walk->vma;
	struct vm_area_struct *next, *vma = walk->vma;
	struct queue_pages *qp = walk->private;
	unsigned long endvma = vma->vm_end;
	unsigned long flags = qp->flags;
@@ -669,9 +670,10 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
			/* hole at head side of range */
			return -EFAULT;
	}
	next = find_vma(vma->vm_mm, vma->vm_end);
	if (!(flags & MPOL_MF_DISCONTIG_OK) &&
		((vma->vm_end < qp->end) &&
		(!vma->vm_next || vma->vm_end < vma->vm_next->vm_start)))
		(!next || vma->vm_end < next->vm_start)))
		/* hole at middle or tail of range */
		return -EFAULT;

@@ -785,26 +787,24 @@ static int vma_replace_policy(struct vm_area_struct *vma,
static int mbind_range(struct mm_struct *mm, unsigned long start,
		       unsigned long end, struct mempolicy *new_pol)
{
	MA_STATE(mas, &mm->mm_mt, start - 1, start - 1);
	struct vm_area_struct *prev;
	struct vm_area_struct *vma;
	int err = 0;
	pgoff_t pgoff;
	unsigned long vmstart;
	unsigned long vmend;

	vma = find_vma(mm, start);
	VM_BUG_ON(!vma);
	prev = mas_find_rev(&mas, 0);
	if (prev && (start < prev->vm_end))
		vma = prev;
	else
		vma = mas_next(&mas, end - 1);

	prev = vma->vm_prev;
	if (start > vma->vm_start)
		prev = vma;

	for (; vma && vma->vm_start < end; prev = vma, vma = vma->vm_next) {
		vmstart = max(start, vma->vm_start);
		vmend   = min(end, vma->vm_end);
	for (; vma; vma = mas_next(&mas, end - 1)) {
		unsigned long vmstart = max(start, vma->vm_start);
		unsigned long vmend = min(end, vma->vm_end);

		if (mpol_equal(vma_policy(vma), new_pol))
			continue;
			goto next;

		pgoff = vma->vm_pgoff +
			((vmstart - vma->vm_start) >> PAGE_SHIFT);
@@ -813,6 +813,8 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
				 new_pol, vma->vm_userfaultfd_ctx,
				 anon_vma_name(vma));
		if (prev) {
			/* vma_merge() invalidated the mas */
			mas_pause(&mas);
			vma = prev;
			goto replace;
		}
@@ -820,16 +822,22 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
			err = split_vma(vma->vm_mm, vma, vmstart, 1);
			if (err)
				goto out;
			/* split_vma() invalidated the mas */
			mas_pause(&mas);
		}
		if (vma->vm_end != vmend) {
			err = split_vma(vma->vm_mm, vma, vmend, 0);
			if (err)
				goto out;
			/* split_vma() invalidated the mas */
			mas_pause(&mas);
		}
replace:
		err = vma_replace_policy(vma, new_pol);
		if (err)
			goto out;
next:
		prev = vma;
	}

out:
@@ -1049,6 +1057,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
			   int flags)
{
	nodemask_t nmask;
	struct vm_area_struct *vma;
	LIST_HEAD(pagelist);
	int err = 0;
	struct migration_target_control mtc = {
@@ -1064,8 +1073,9 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
	 * need migration.  Between passing in the full user address
	 * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
	 */
	vma = find_vma(mm, 0);
	VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
	queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
	queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
			flags | MPOL_MF_DISCONTIG_OK, &pagelist);

	if (!list_empty(&pagelist)) {
@@ -1195,14 +1205,13 @@ static struct page *new_page(struct page *page, unsigned long start)
	struct folio *dst, *src = page_folio(page);
	struct vm_area_struct *vma;
	unsigned long address;
	VMA_ITERATOR(vmi, current->mm, start);
	gfp_t gfp = GFP_HIGHUSER_MOVABLE | __GFP_RETRY_MAYFAIL;

	vma = find_vma(current->mm, start);
	while (vma) {
	for_each_vma(vmi, vma) {
		address = page_address_in_vma(page, vma);
		if (address != -EFAULT)
			break;
		vma = vma->vm_next;
	}

	if (folio_test_hugetlb(src))
@@ -1480,6 +1489,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
	unsigned long vmend;
	unsigned long end;
	int err = -ENOENT;
	VMA_ITERATOR(vmi, mm, start);

	start = untagged_addr(start);
	if (start & ~PAGE_MASK)
@@ -1505,9 +1515,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
	if (end == start)
		return 0;
	mmap_write_lock(mm);
	vma = find_vma(mm, start);
	for (; vma && vma->vm_start < end;  vma = vma->vm_next) {

	for_each_vma_range(vmi, vma, end) {
		vmstart = max(start, vma->vm_start);
		vmend   = min(end, vma->vm_end);
		new = mpol_dup(vma_policy(vma));