Commit 75e26b59 authored by Lorenzo Stoakes's avatar Lorenzo Stoakes Committed by Wupeng Ma
Browse files

mm: resolve faulty mmap_region() error path behaviour

stable inclusion
from stable-v5.10.231
commit 43323a4e5b3f8ccc08e2f835abfdc7ee9da8f6ed
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB7051
CVE: CVE-2024-53096

Reference: https://lore.kernel.org/linux-mm/99f72d6dc52835126ca6d2e79732d397f6bfa20b.1731670097.git.lorenzo.stoakes@oracle.com/T/

--------------------------------

[ Upstream commit 5de195060b2e251a835f622759550e6202167641 ]

The mmap_region() function is somewhat terrifying, with spaghetti-like
control flow and numerous means by which issues can arise and incomplete
state, memory leaks and other unpleasantness can occur.

A large amount of the complexity arises from trying to handle errors late
in the process of mapping a VMA, which forms the basis of recently
observed issues with resource leaks and observable inconsistent state.

Taking advantage of previous patches in this series we move a number of
checks earlier in the code, simplifying things by moving the core of the
logic into a static internal function __mmap_region().

Doing this allows us to perform a number of checks up front before we do
any real work, and allows us to unwind the writable unmap check
unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE
validation unconditionally also.

We move a number of things here:

1. We preallocate memory for the iterator before we call the file-backed
   memory hook, allowing us to exit early and avoid having to perform
   complicated and error-prone close/free logic. We carefully free
   iterator state on both success and error paths.

2. The enclosing mmap_region() function handles the mapping_map_writable()
   logic early. Previously the logic had the mapping_map_writable() at the
   point of mapping a newly allocated file-backed VMA, and a matching
   mapping_unmap_writable() on success and error paths.

   We now do this unconditionally if this is a file-backed, shared writable
   mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however
   doing so does not invalidate the seal check we just performed, and we in
   any case always decrement the counter in the wrapper.

   We perform a debug assert to ensure a driver does not attempt to do the
   opposite.

3. We also move arch_validate_flags() up into the mmap_region()
   function. This is only relevant on arm64 and sparc64, and the check is
   only meaningful for SPARC with ADI enabled. We explicitly add a warning
   for this arch if a driver invalidates this check, though the code ought
   eventually to be fixed to eliminate the need for this.

With all of these measures in place, we no longer need to explicitly close
the VMA on error paths, as we place all checks which might fail prior to a
call to any driver mmap hook.

This eliminates an entire class of errors, makes the code easier to reason
about and more robust.

Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com


Fixes: deb0f656 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails")
Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reported-by: default avatarJann Horn <jannh@google.com>
Reviewed-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Tested-by: default avatarMark Brown <broonie@kernel.org>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Helge Deller <deller@gmx.de>
Cc: James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Will Deacon <will@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
	mm/mmap.c
[Ma Wupeng: conflict with ascend change of __mmap_region]
Signed-off-by: default avatarMa Wupeng <mawupeng1@huawei.com>
parent 747db89c
Loading
Loading
Loading
Loading
+54 −32
Original line number Diff line number Diff line
@@ -1411,7 +1411,7 @@ static inline bool file_mmap_ok(struct file *file, struct inode *inode,
	return true;
}

static unsigned long __mmap_region(struct mm_struct *mm,
static unsigned long __mmap_region_ext(struct mm_struct *mm,
				   struct file *file, unsigned long addr,
				   unsigned long len, vm_flags_t vm_flags,
				   unsigned long pgoff, struct list_head *uf);
@@ -1606,7 +1606,7 @@ unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file,
	if (flags & MAP_CHECKNODE)
		set_vm_checknode(&vm_flags, flags);

	addr = __mmap_region(mm, file, addr, len, vm_flags, pgoff, uf);
	addr = __mmap_region_ext(mm, file, addr, len, vm_flags, pgoff, uf);
	if (!IS_ERR_VALUE(addr) &&
	    ((vm_flags & VM_LOCKED) ||
	     (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
@@ -1852,11 +1852,6 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
			if (error)
				goto free_vma;
		}
		if (vm_flags & VM_SHARED) {
			error = mapping_map_writable(file->f_mapping);
			if (error)
				goto allow_write_and_free_vma;
		}

		/* ->mmap() can change vma->vm_file, but must guarantee that
		 * vma_link() below can deny write-access if VM_DENYWRITE is set
@@ -1866,7 +1861,7 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
		vma->vm_file = get_file(file);
		error = mmap_file(file, vma);
		if (error)
			goto unmap_and_free_vma;
			goto unmap_and_free_file_vma;

		/* Can addr have changed??
		 *
@@ -1877,6 +1872,14 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
		 */
		WARN_ON_ONCE(addr != vma->vm_start);

		/*
		 * Drivers should not permit writability when previously it was
		 * disallowed.
		 */
		VM_WARN_ON_ONCE(vm_flags != vma->vm_flags &&
				!(vm_flags & VM_MAYWRITE) &&
				(vma->vm_flags & VM_MAYWRITE));

		addr = vma->vm_start;

		/* If vm_flags changed after mmap_file(), we should try merge vma again
@@ -1908,21 +1911,14 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
		vma_set_anonymous(vma);
	}

	/* Allow architectures to sanity-check the vm_flags */
	if (!arch_validate_flags(vma->vm_flags)) {
		error = -EINVAL;
		if (file)
			goto close_and_free_vma;
		else
			goto free_vma;
	}
#ifdef CONFIG_SPARC64
	/* TODO: Fix SPARC ADI! */
	WARN_ON_ONCE(!arch_validate_flags(vm_flags));
#endif

	vma_link(mm, vma, prev, rb_link, rb_parent);
	/* Once vma denies write, undo our temporary denial count */
	if (file) {
unmap_writable:
		if (vm_flags & VM_SHARED)
			mapping_unmap_writable(file->f_mapping);
		if (vm_flags & VM_DENYWRITE)
			allow_write_access(file);
	}
@@ -1957,17 +1953,12 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,

	return addr;

close_and_free_vma:
	vma_close(vma);
unmap_and_free_vma:
unmap_and_free_file_vma:
	vma->vm_file = NULL;
	fput(file);

	/* Undo any partial mapping done by a device driver. */
	unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
	if (vm_flags & VM_SHARED)
		mapping_unmap_writable(file->f_mapping);
allow_write_and_free_vma:
	if (vm_flags & VM_DENYWRITE)
		allow_write_access(file);
free_vma:
@@ -1978,13 +1969,6 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file,
	return error;
}

unsigned long mmap_region(struct file *file, unsigned long addr,
		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
		struct list_head *uf)
{
	return __mmap_region(current->mm, file, addr, len, vm_flags, pgoff, uf);
}

static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{
	/*
@@ -3028,6 +3012,44 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
	return __do_munmap(mm, start, len, uf, false);
}

static unsigned long __mmap_region_ext(struct mm_struct *mm, struct file *file,
				       unsigned long addr, unsigned long len,
				       vm_flags_t vm_flags, unsigned long pgoff,
				       struct list_head *uf)
{
	unsigned long ret;
	bool writable_file_mapping = false;

	/* Allow architectures to sanity-check the vm_flags. */
	if (!arch_validate_flags(vm_flags))
		return -EINVAL;

	/* Map writable and ensure this isn't a sealed memfd. */
	if (file && (vm_flags & VM_SHARED)) {
		int error = mapping_map_writable(file->f_mapping);

		if (error)
			return error;
		writable_file_mapping = true;
	}

	ret = __mmap_region(mm, file, addr, len, vm_flags, pgoff, uf);

	/* Clear our write mapping regardless of error. */
	if (writable_file_mapping)
		mapping_unmap_writable(file->f_mapping);

	validate_mm(current->mm);
	return ret;
}

unsigned long mmap_region(struct file *file, unsigned long addr,
		unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
		struct list_head *uf)
{
	return __mmap_region_ext(current->mm, file, addr, len, vm_flags, pgoff, uf);
}

static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
{
	int ret;