Commit d4af56c5 authored by Liam R. Howlett's avatar Liam R. Howlett Committed by Andrew Morton
Browse files

mm: start tracking VMAs with maple tree

Start tracking the VMAs with the new maple tree structure in parallel with
the rb_tree.  Add debug and trace events for maple tree operations and
duplicate the rb_tree that is created on forks into the maple tree.

The maple tree is added to the mm_struct including the mm_init struct,
added support in required mm/mmap functions, added tracking in kernel/fork
for process forking, and used to find the unmapped_area and checked
against what the rbtree finds.

This also moves the mmap_lock() in exit_mmap() since the oom reaper call
does walk the VMAs.  Otherwise lockdep will be unhappy if oom happens.

When splitting a vma fails due to allocations of the maple tree nodes,
the error path in __split_vma() calls new->vm_ops->close(new).  The page
accounting for hugetlb is actually in the close() operation,  so it
accounts for the removal of 1/2 of the VMA which was not adjusted.  This
results in a negative exit value.  To avoid the negative charge, set
vm_start = vm_end and vm_pgoff = 0.

There is also a potential accounting issue in special mappings from
insert_vm_struct() failing to allocate, so reverse the charge there in
the failure scenario.

Link: https://lkml.kernel.org/r/20220906194824.2110408-9-Liam.Howlett@oracle.com


Signed-off-by: default avatarLiam R. Howlett <Liam.Howlett@Oracle.com>
Signed-off-by: default avatarMatthew Wilcox (Oracle) <willy@infradead.org>
Tested-by: default avatarYu Zhao <yuzhao@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: SeongJae Park <sj@kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent e15e06a8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -96,6 +96,7 @@ void __init tboot_probe(void)
static pgd_t *tboot_pg_dir;
static struct mm_struct tboot_mm = {
	.mm_rb          = RB_ROOT,
	.mm_mt          = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
	.pgd            = swapper_pg_dir,
	.mm_users       = ATOMIC_INIT(2),
	.mm_count       = ATOMIC_INIT(1),
+1 −0
Original line number Diff line number Diff line
@@ -58,6 +58,7 @@ static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;

struct mm_struct efi_mm = {
	.mm_rb			= RB_ROOT,
	.mm_mt			= MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
	.mm_users		= ATOMIC_INIT(2),
	.mm_count		= ATOMIC_INIT(1),
	.write_protect_seq      = SEQCNT_ZERO(efi_mm.write_protect_seq),
+5 −0
Original line number Diff line number Diff line
@@ -2567,6 +2567,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
/* mmap.c */
void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);

/* interval_tree.c */
void vma_interval_tree_insert(struct vm_area_struct *node,
@@ -2630,6 +2632,9 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
	bool *need_rmap_locks);
extern void exit_mmap(struct mm_struct *);

void vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas);
void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas);

static inline int check_data_rlimit(unsigned long rlim,
				    unsigned long new,
				    unsigned long start,
+3 −0
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/maple_tree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -486,6 +487,7 @@ struct kioctx_table;
struct mm_struct {
	struct {
		struct vm_area_struct *mmap;		/* list of VMAs */
		struct maple_tree mm_mt;
		struct rb_root mm_rb;
		u64 vmacache_seqnum;                   /* per-thread vmacache */
#ifdef CONFIG_MMU
@@ -697,6 +699,7 @@ struct mm_struct {
	unsigned long cpu_bitmap[];
};

#define MM_MT_FLAGS	(MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
extern struct mm_struct init_mm;

/* Pointer magic because the dynamic array size confuses some compilers. */
+73 −0
Original line number Diff line number Diff line
@@ -42,6 +42,79 @@ TRACE_EVENT(vm_unmapped_area,
		__entry->low_limit, __entry->high_limit, __entry->align_mask,
		__entry->align_offset)
);

TRACE_EVENT(vma_mas_szero,
	TP_PROTO(struct maple_tree *mt, unsigned long start,
		 unsigned long end),

	TP_ARGS(mt, start, end),

	TP_STRUCT__entry(
			__field(struct maple_tree *, mt)
			__field(unsigned long, start)
			__field(unsigned long, end)
	),

	TP_fast_assign(
			__entry->mt		= mt;
			__entry->start		= start;
			__entry->end		= end;
	),

	TP_printk("mt_mod %p, (NULL), SNULL, %lu, %lu,",
		  __entry->mt,
		  (unsigned long) __entry->start,
		  (unsigned long) __entry->end
	)
);

TRACE_EVENT(vma_store,
	TP_PROTO(struct maple_tree *mt, struct vm_area_struct *vma),

	TP_ARGS(mt, vma),

	TP_STRUCT__entry(
			__field(struct maple_tree *, mt)
			__field(struct vm_area_struct *, vma)
			__field(unsigned long, vm_start)
			__field(unsigned long, vm_end)
	),

	TP_fast_assign(
			__entry->mt		= mt;
			__entry->vma		= vma;
			__entry->vm_start	= vma->vm_start;
			__entry->vm_end		= vma->vm_end - 1;
	),

	TP_printk("mt_mod %p, (%p), STORE, %lu, %lu,",
		  __entry->mt, __entry->vma,
		  (unsigned long) __entry->vm_start,
		  (unsigned long) __entry->vm_end
	)
);


TRACE_EVENT(exit_mmap,
	TP_PROTO(struct mm_struct *mm),

	TP_ARGS(mm),

	TP_STRUCT__entry(
			__field(struct mm_struct *, mm)
			__field(struct maple_tree *, mt)
	),

	TP_fast_assign(
		       __entry->mm		= mm;
		       __entry->mt		= &mm->mm_mt;
	),

	TP_printk("mt_mod %p, DESTROY\n",
		  __entry->mt
	)
);

#endif

/* This part must be outside protection */
Loading