Commit 9b369d35 authored by Mateusz Guzik's avatar Mateusz Guzik Committed by Jinjie Ruan
Browse files

mm: batch unlink_file_vma calls in free_pgd_range

mainline inclusion
from mainline-v6.11-rc1
commit 3577dbb192419e37b6f54aced8777b6c81cd03d4
category: performance
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB1S01

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3577dbb192419e37b6f54aced8777b6c81cd03d4

--------------------------------

Execs of dynamically linked binaries at 20-ish cores are bottlenecked on
the i_mmap_rwsem semaphore, while the biggest singular contributor is
free_pgd_range inducing the lock acquire back-to-back for all consecutive
mappings of a given file.

Tracing the count of said acquires while building the kernel shows:
[1, 2)     799579 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
[2, 3)          0 |                                                    |
[3, 4)       3009 |                                                    |
[4, 5)       3009 |                                                    |
[5, 6)     326442 |@@@@@@@@@@@@@@@@@@@@@                               |

So in particular there were 326442 opportunities to coalesce 5 acquires
into 1.

Doing so increases execs per second by 4% (~50k to ~52k) when running
the benchmark linked below.

The lock remains the main bottleneck, I have not looked at other spots
yet.

Bench can be found here:
http://apollo.backplane.com/DFlyMisc/doexec.c

$ cc -O2 -o shared-doexec doexec.c
$ ./shared-doexec $(nproc)

Note this particular test makes sure binaries are separate, but the
loader is shared.

Stats collected on the patched kernel (+ "noinline") with:
bpftrace -e 'kprobe:unlink_file_vma_batch_process
{ @ = lhist(((struct unlink_vma_file_batch *)arg0)->count, 0, 8, 1); }'

Link: https://lkml.kernel.org/r/20240521234321.359501-1-mjguzik@gmail.com


Signed-off-by: default avatarMateusz Guzik <mjguzik@gmail.com>
Reviewed-by: default avatarLiam R. Howlett <Liam.Howlett@oracle.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Conflicts:
	mm/internal.h
[Context conflict]
Signed-off-by: default avatarJinjie Ruan <ruanjinjie@huawei.com>
parent 776e664e
Loading
Loading
Loading
Loading
+10 −0
Original line number Diff line number Diff line
@@ -1446,4 +1446,14 @@ void __meminit __init_single_page(struct page *page, unsigned long pfn,
#ifdef CONFIG_PAGE_CACHE_LIMIT
unsigned long shrink_memory(unsigned long nr_to_reclaim, bool may_swap);
#endif /* CONFIG_PAGE_CACHE_LIMIT */

struct unlink_vma_file_batch {
	int count;
	struct vm_area_struct *vmas[8];
};

void unlink_file_vma_batch_init(struct unlink_vma_file_batch *);
void unlink_file_vma_batch_add(struct unlink_vma_file_batch *, struct vm_area_struct *);
void unlink_file_vma_batch_final(struct unlink_vma_file_batch *);

#endif	/* __MM_INTERNAL_H */
+8 −2
Original line number Diff line number Diff line
@@ -368,6 +368,8 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
		   struct vm_area_struct *vma, unsigned long floor,
		   unsigned long ceiling, bool mm_wr_locked)
{
	struct unlink_vma_file_batch vb;

	do {
		unsigned long addr = vma->vm_start;
		struct vm_area_struct *next;
@@ -387,12 +389,15 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
		if (mm_wr_locked)
			vma_start_write(vma);
		unlink_anon_vmas(vma);
		unlink_file_vma(vma);

		if (is_vm_hugetlb_page(vma)) {
			unlink_file_vma(vma);
			hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
				floor, next ? next->vm_start : ceiling);
		} else {
			unlink_file_vma_batch_init(&vb);
			unlink_file_vma_batch_add(&vb, vma);

			/*
			 * Optimization: gather nearby vmas into one call down
			 */
@@ -405,8 +410,9 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
				if (mm_wr_locked)
					vma_start_write(vma);
				unlink_anon_vmas(vma);
				unlink_file_vma(vma);
				unlink_file_vma_batch_add(&vb, vma);
			}
			unlink_file_vma_batch_final(&vb);
			free_pgd_range(tlb, addr, vma->vm_end,
				floor, next ? next->vm_start : ceiling);
		}
+41 −0
Original line number Diff line number Diff line
@@ -132,6 +132,47 @@ void unlink_file_vma(struct vm_area_struct *vma)
	}
}

void unlink_file_vma_batch_init(struct unlink_vma_file_batch *vb)
{
	vb->count = 0;
}

static void unlink_file_vma_batch_process(struct unlink_vma_file_batch *vb)
{
	struct address_space *mapping;
	int i;

	mapping = vb->vmas[0]->vm_file->f_mapping;
	i_mmap_lock_write(mapping);
	for (i = 0; i < vb->count; i++) {
		VM_WARN_ON_ONCE(vb->vmas[i]->vm_file->f_mapping != mapping);
		__remove_shared_vm_struct(vb->vmas[i], mapping);
	}
	i_mmap_unlock_write(mapping);

	unlink_file_vma_batch_init(vb);
}

void unlink_file_vma_batch_add(struct unlink_vma_file_batch *vb,
			       struct vm_area_struct *vma)
{
	if (vma->vm_file == NULL)
		return;

	if ((vb->count > 0 && vb->vmas[0]->vm_file != vma->vm_file) ||
	    vb->count == ARRAY_SIZE(vb->vmas))
		unlink_file_vma_batch_process(vb);

	vb->vmas[vb->count] = vma;
	vb->count++;
}

void unlink_file_vma_batch_final(struct unlink_vma_file_batch *vb)
{
	if (vb->count > 0)
		unlink_file_vma_batch_process(vb);
}

/*
 * Close a vm structure and free it.
 */