Commit 8220543d authored by Matthew Wilcox (Oracle)'s avatar Matthew Wilcox (Oracle) Committed by Andrew Morton
Browse files

nommu: remove uses of VMA linked list

Use the maple tree or VMA iterator instead.  This is faster and will allow
us to shrink the VMA.

Link: https://lkml.kernel.org/r/20220906194824.2110408-66-Liam.Howlett@oracle.com


Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: default avatarLiam R. Howlett <Liam.Howlett@Oracle.com>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Tested-by: default avatarYu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent f683b9d6
Loading
Loading
Loading
Loading
+109 −37
Original line number Diff line number Diff line
@@ -557,26 +557,14 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
	mas_store_prealloc(mas, NULL);
}

/*
 * add a VMA into a process's mm_struct in the appropriate place in the list
 * and tree and add to the address space's page tree also if not an anonymous
 * page
 * - should be called with mm->mmap_lock held writelocked
 */
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
{
	struct address_space *mapping;
	struct vm_area_struct *prev;
	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);

	BUG_ON(!vma->vm_region);

	mm->map_count++;
	vma->vm_mm = mm;

	/* add the VMA to the mapping */
	if (vma->vm_file) {
		mapping = vma->vm_file->f_mapping;
		struct address_space *mapping = vma->vm_file->f_mapping;

		i_mmap_lock_write(mapping);
		flush_dcache_mmap_lock(mapping);
@@ -584,21 +572,52 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
		flush_dcache_mmap_unlock(mapping);
		i_mmap_unlock_write(mapping);
	}
}

/*
 * mas_add_vma_to_mm() - Maple state variant of add_mas_to_mm().
 * @mas: The maple state with preallocations.
 * @mm: The mm_struct
 * @vma: The vma to add
 *
 */
static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm,
			      struct vm_area_struct *vma)
{
	struct vm_area_struct *prev;

	BUG_ON(!vma->vm_region);

	setup_vma_to_mm(vma, mm);

	prev = mas_prev(&mas, 0);
	mas_reset(&mas);
	prev = mas_prev(mas, 0);
	mas_reset(mas);
	/* add the VMA to the tree */
	vma_mas_store(vma, &mas);
	vma_mas_store(vma, mas);
	__vma_link_list(mm, vma, prev);
}

/*
 * delete a VMA from its owning mm_struct and address space
 * add a VMA into a process's mm_struct in the appropriate place in the list
 * and tree and add to the address space's page tree also if not an anonymous
 * page
 * - should be called with mm->mmap_lock held writelocked
 */
static void delete_vma_from_mm(struct vm_area_struct *vma)
static int add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
{
	MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0);
	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);

	if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
		pr_warn("Allocation of vma tree for process %d failed\n",
		       current->pid);
		return -ENOMEM;
	}
	mas_add_vma_to_mm(&mas, mm, vma);
	return 0;
}

static void cleanup_vma_from_mm(struct vm_area_struct *vma)
{
	vma->vm_mm->map_count--;
	/* remove the VMA from the mapping */
	if (vma->vm_file) {
@@ -611,10 +630,25 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
		flush_dcache_mmap_unlock(mapping);
		i_mmap_unlock_write(mapping);
	}
}
/*
 * delete a VMA from its owning mm_struct and address space
 */
static int delete_vma_from_mm(struct vm_area_struct *vma)
{
	MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0);

	if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
		pr_warn("Allocation of vma tree for process %d failed\n",
		       current->pid);
		return -ENOMEM;
	}
	cleanup_vma_from_mm(vma);

	/* remove from the MM's tree and list */
	vma_mas_remove(vma, &mas);
	__vma_unlink_list(vma->vm_mm, vma);
	return 0;
}

/*
@@ -1024,6 +1058,7 @@ unsigned long do_mmap(struct file *file,
	vm_flags_t vm_flags;
	unsigned long capabilities, result;
	int ret;
	MA_STATE(mas, &current->mm->mm_mt, 0, 0);

	*populate = 0;

@@ -1042,6 +1077,7 @@ unsigned long do_mmap(struct file *file,
	 * now know into VMA flags */
	vm_flags = determine_vm_flags(file, prot, flags, capabilities);


	/* we're going to need to record the mapping */
	region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
	if (!region)
@@ -1051,6 +1087,9 @@ unsigned long do_mmap(struct file *file,
	if (!vma)
		goto error_getting_vma;

	if (mas_preallocate(&mas, vma, GFP_KERNEL))
		goto error_maple_preallocate;

	region->vm_usage = 1;
	region->vm_flags = vm_flags;
	region->vm_pgoff = pgoff;
@@ -1191,7 +1230,7 @@ unsigned long do_mmap(struct file *file,
	current->mm->total_vm += len >> PAGE_SHIFT;

share:
	add_vma_to_mm(current->mm, vma);
	mas_add_vma_to_mm(&mas, current->mm, vma);

	/* we flush the region from the icache only when the first executable
	 * mapping of it is made  */
@@ -1217,6 +1256,7 @@ unsigned long do_mmap(struct file *file,

sharing_violation:
	up_write(&nommu_region_sem);
	mas_destroy(&mas);
	pr_warn("Attempt to share mismatched mappings\n");
	ret = -EINVAL;
	goto error;
@@ -1233,6 +1273,14 @@ unsigned long do_mmap(struct file *file,
			len, current->pid);
	show_free_areas(0, NULL);
	return -ENOMEM;

error_maple_preallocate:
	kmem_cache_free(vm_region_jar, region);
	vm_area_free(vma);
	pr_warn("Allocation of vma tree for process %d failed\n", current->pid);
	show_free_areas(0, NULL);
	return -ENOMEM;

}

unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
@@ -1298,6 +1346,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
	struct vm_area_struct *new;
	struct vm_region *region;
	unsigned long npages;
	MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);

	/* we're only permitted to split anonymous regions (these should have
	 * only a single usage on the region) */
@@ -1312,9 +1361,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
		return -ENOMEM;

	new = vm_area_dup(vma);
	if (!new) {
		kmem_cache_free(vm_region_jar, region);
		return -ENOMEM;
	if (!new)
		goto err_vma_dup;

	if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
		pr_warn("Allocation of vma tree for process %d failed\n",
			current->pid);
		goto err_mas_preallocate;
	}

	/* most fields are the same, copy all, and then fixup */
@@ -1333,7 +1386,6 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
	if (new->vm_ops && new->vm_ops->open)
		new->vm_ops->open(new);

	delete_vma_from_mm(vma);
	down_write(&nommu_region_sem);
	delete_nommu_region(vma->vm_region);
	if (new_below) {
@@ -1346,9 +1398,19 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
	add_nommu_region(vma->vm_region);
	add_nommu_region(new->vm_region);
	up_write(&nommu_region_sem);
	add_vma_to_mm(mm, vma);
	add_vma_to_mm(mm, new);

	setup_vma_to_mm(vma, mm);
	setup_vma_to_mm(new, mm);
	mas_set_range(&mas, vma->vm_start, vma->vm_end - 1);
	mas_store(&mas, vma);
	vma_mas_store(new, &mas);
	return 0;

err_mas_preallocate:
	vm_area_free(new);
err_vma_dup:
	kmem_cache_free(vm_region_jar, region);
	return -ENOMEM;
}

/*
@@ -1363,12 +1425,14 @@ static int shrink_vma(struct mm_struct *mm,

	/* adjust the VMA's pointers, which may reposition it in the MM's tree
	 * and list */
	delete_vma_from_mm(vma);
	if (delete_vma_from_mm(vma))
		return -ENOMEM;
	if (from > vma->vm_start)
		vma->vm_end = from;
	else
		vma->vm_start = to;
	add_vma_to_mm(mm, vma);
	if (add_vma_to_mm(mm, vma))
		return -ENOMEM;

	/* cut the backing region down to size */
	region = vma->vm_region;
@@ -1396,9 +1460,10 @@ static int shrink_vma(struct mm_struct *mm,
 */
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
{
	MA_STATE(mas, &mm->mm_mt, start, start);
	struct vm_area_struct *vma;
	unsigned long end;
	int ret;
	int ret = 0;

	len = PAGE_ALIGN(len);
	if (len == 0)
@@ -1407,7 +1472,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
	end = start + len;

	/* find the first potentially overlapping VMA */
	vma = find_vma(mm, start);
	vma = mas_find(&mas, end - 1);
	if (!vma) {
		static int limit;
		if (limit < 5) {
@@ -1426,7 +1491,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
				return -EINVAL;
			if (end == vma->vm_end)
				goto erase_whole_vma;
			vma = vma->vm_next;
			vma = mas_next(&mas, end - 1);
		} while (vma);
		return -EINVAL;
	} else {
@@ -1448,9 +1513,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
	}

erase_whole_vma:
	delete_vma_from_mm(vma);
	if (delete_vma_from_mm(vma))
		ret = -ENOMEM;
	delete_vma(mm, vma);
	return 0;
	return ret;
}

int vm_munmap(unsigned long addr, size_t len)
@@ -1475,6 +1541,7 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
 */
void exit_mmap(struct mm_struct *mm)
{
	VMA_ITERATOR(vmi, mm, 0);
	struct vm_area_struct *vma;

	if (!mm)
@@ -1482,13 +1549,18 @@ void exit_mmap(struct mm_struct *mm)

	mm->total_vm = 0;

	while ((vma = mm->mmap)) {
		mm->mmap = vma->vm_next;
		delete_vma_from_mm(vma);
	/*
	 * Lock the mm to avoid assert complaining even though this is the only
	 * user of the mm
	 */
	mmap_write_lock(mm);
	for_each_vma(vmi, vma) {
		cleanup_vma_from_mm(vma);
		delete_vma(mm, vma);
		cond_resched();
	}
	__mt_destroy(&mm->mm_mt);
	mmap_write_unlock(mm);
}

int vm_brk(unsigned long addr, unsigned long len)