Unverified Commit 22ff6706 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!786 Support userswap feature

Merge Pull Request from: @anred 
 
This patch series optimizes userswap mainly including swap-in and
swap-out.

We tested the concurrent scenario of multi-threaded page fault and
multi-threaded swap-in in the uswap demo;and the remapping in the
swap-out phase and the copy-free function in the swap-in phase were ok.
During the test, related debugging functions including CONFIG_DEBUG_VM,
lockdep, slub debug, kasan and kmemleak are enabled. 
 
Link:https://gitee.com/openeuler/kernel/pulls/786

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents dcc34901 d042e603
Loading
Loading
Loading
Loading
+30 −38
Original line number Diff line number Diff line
@@ -27,13 +27,11 @@
#include <linux/ioctl.h>
#include <linux/security.h>
#include <linux/hugetlb.h>
#include <linux/userswap.h>

int sysctl_unprivileged_userfaultfd __read_mostly = 1;

static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
#ifdef CONFIG_USERSWAP
int enable_userswap;
#endif

/*
 * Start with fault_pending_wqh and fault_wqh so they're more likely
@@ -220,6 +218,9 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
		msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
	if (features & UFFD_FEATURE_THREAD_ID)
		msg.arg.pagefault.feat.ptid = task_pid_vnr(current);
#ifdef CONFIG_USERSWAP
	uswap_get_cpu_id(reason, &msg);
#endif
	return msg;
}

@@ -334,8 +335,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
	 * changes under us.
	 */
#ifdef CONFIG_USERSWAP
	if ((reason & VM_USWAP) && (!pte_present(*pte)))
		ret = true;
	uswap_must_wait(reason, *pte, &ret);
#endif
	if (pte_none(*pte))
		ret = true;
@@ -408,8 +408,12 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)

	BUG_ON(ctx->mm != mm);

#ifdef CONFIG_USERSWAP
	VM_BUG_ON(uswap_vm_flag_bug_on(reason));
#else
	VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
	VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
#endif

	if (ctx->features & UFFD_FEATURE_SIGBUS)
		goto out;
@@ -483,6 +487,10 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	uwq.wq.private = current;
	uwq.msg = userfault_msg(vmf->address, vmf->flags, reason,
			ctx->features);
#ifdef CONFIG_USERSWAP
	if (reason & VM_USWAP && pte_none(vmf->orig_pte))
		uwq.msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_FPF;
#endif
	uwq.ctx = ctx;
	uwq.waken = false;

@@ -866,8 +874,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
	for (vma = mm->mmap; vma; vma = vma->vm_next) {
		userfault_flags = VM_UFFD_MISSING | VM_UFFD_WP;
#ifdef CONFIG_USERSWAP
		if (enable_userswap)
			userfault_flags |= VM_USWAP;
		uswap_release(&userfault_flags);
#endif
		cond_resched();
		BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
@@ -1275,6 +1282,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
	bool found;
	bool basic_ioctls;
	unsigned long start, end, vma_end;
#ifdef CONFIG_USERSWAP
	bool uswap_mode = false;
#endif

	user_uffdio_register = (struct uffdio_register __user *) arg;

@@ -1288,26 +1298,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
		goto out;
	vm_flags = 0;
#ifdef CONFIG_USERSWAP
	/*
	 * register the whole vma overlapping with the address range to avoid
	 * splitting the vma.
	 */
	if (enable_userswap && (uffdio_register.mode & UFFDIO_REGISTER_MODE_USWAP)) {
		uffdio_register.mode &= ~UFFDIO_REGISTER_MODE_USWAP;
		if (!uffdio_register.mode)
			goto out;
		vm_flags |= VM_USWAP;
		end = uffdio_register.range.start + uffdio_register.range.len - 1;
		vma = find_vma(mm, uffdio_register.range.start);
		if (!vma)
			goto out;
		uffdio_register.range.start = vma->vm_start;

		vma = find_vma(mm, end);
		if (!vma)
	if (!uswap_register(&uffdio_register, &uswap_mode))
		goto out;
		uffdio_register.range.len = vma->vm_end - uffdio_register.range.start;
	}
#endif
	if (uffdio_register.mode & ~(UFFDIO_REGISTER_MODE_MISSING|
				     UFFDIO_REGISTER_MODE_WP))
@@ -1321,7 +1313,13 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
			     uffdio_register.range.len);
	if (ret)
		goto out;

#ifdef CONFIG_USERSWAP
	if (uswap_mode && !uswap_adjust_uffd_range(&uffdio_register,
						   &vm_flags, mm)) {
		ret = -EINVAL;
		goto out;
	}
#endif
	start = uffdio_register.range.start;
	end = start + uffdio_register.range.len;

@@ -1717,7 +1715,10 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
	ret = -EINVAL;
	if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
		goto out;
	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE |
				 UFFDIO_COPY_MODE_WP |
				 IS_ENABLED(CONFIG_USERSWAP) ?
				 UFFDIO_COPY_MODE_DIRECT_MAP : 0))
		goto out;
	if (mmget_not_zero(ctx->mm)) {
		ret = mcopy_atomic(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
@@ -2029,15 +2030,6 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
	return fd;
}

#ifdef CONFIG_USERSWAP
static int __init enable_userswap_setup(char *str)
{
	enable_userswap = true;
	return 1;
}
__setup("enable_userswap", enable_userswap_setup);
#endif

static int __init userfaultfd_init(void)
{
	userfaultfd_ctx_cachep = kmem_cache_create("userfaultfd_ctx_cache",
+0 −3
Original line number Diff line number Diff line
@@ -31,9 +31,6 @@
#define UFFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS)

extern int sysctl_unprivileged_userfaultfd;
#ifdef CONFIG_USERSWAP
extern int enable_userswap;
#endif

extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);

+115 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
 * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
 */

#ifndef _LINUX_USERSWAP_H
#define _LINUX_USERSWAP_H

#include <linux/mman.h>
#include <linux/userfaultfd.h>

#ifdef CONFIG_USERSWAP

extern struct static_key_false userswap_enabled;

/*
 * In uswap situation, we use the bit 0 of the returned address to indicate
 * whether the pages are dirty.
 */
#define USWAP_PAGES_DIRTY	1

int mfill_atomic_pte_nocopy(struct mm_struct *dst_mm,
			    pmd_t *dst_pmd,
			    struct vm_area_struct *dst_vma,
			    unsigned long dst_addr,
			    unsigned long src_addr);

unsigned long uswap_mremap(unsigned long old_addr, unsigned long old_len,
			   unsigned long new_addr, unsigned long new_len);

bool uswap_register(struct uffdio_register *uffdio_register, bool *uswap_mode);

bool uswap_adjust_uffd_range(struct uffdio_register *uffdio_register,
			     unsigned long *vm_flags, struct mm_struct *mm);

bool do_uswap_page(swp_entry_t entry, struct vm_fault *vmf,
		   struct vm_area_struct *vma, vm_fault_t *ret);

static inline bool uswap_check_copy(struct vm_area_struct *vma,
				    unsigned long src_addr,
				    unsigned long len, __u64 mode)
{
	if (vma->vm_flags & VM_USWAP) {
		if (!(mode & UFFDIO_COPY_MODE_DIRECT_MAP))
			return false;
		if (offset_in_page(src_addr))
			return false;
		if (src_addr > TASK_SIZE || src_addr > TASK_SIZE - len)
			return false;
	} else {
		if (mode & UFFDIO_COPY_MODE_DIRECT_MAP)
			return false;
	}

	return true;
}

static inline bool uswap_validate_mremap_flags(unsigned long flags)
{
	if (static_branch_unlikely(&userswap_enabled)) {
		if (flags & MREMAP_USWAP_SET_PTE &&
		    flags & ~MREMAP_USWAP_SET_PTE)
			return false;
		if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE |
			      MREMAP_DONTUNMAP | MREMAP_USWAP_SET_PTE))
			return false;
	} else {
		if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE |
			      MREMAP_DONTUNMAP))
			return false;
	}
	return true;
}

/* When CONFIG_USERSWAP=y, VM_UFFD_MISSING|VM_USWAP is right;
 * 0 or > 1 flags set is a bug; we expect exactly 1.
 */
static inline bool uswap_vm_flag_bug_on(unsigned long reason)
{
	if (reason & ~(VM_UFFD_MISSING | VM_UFFD_WP | VM_USWAP))
		return true;
	if (reason & VM_USWAP)
		return !(reason & VM_UFFD_MISSING) ||
		       reason & ~(VM_USWAP|VM_UFFD_MISSING);
	return !(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP);
}

static inline bool uswap_missing(struct vm_area_struct *vma)
{
	if (vma->vm_flags & VM_USWAP && vma->vm_flags & VM_UFFD_MISSING)
		return true;
	return false;
}

static inline void uswap_get_cpu_id(unsigned long reason, struct uffd_msg *msg)
{
	if (reason & VM_USWAP)
		msg->reserved3 = smp_processor_id();
}

static inline void uswap_release(unsigned long *userfault_flags)
{
	if (static_branch_unlikely(&userswap_enabled))
		*userfault_flags |= VM_USWAP;
}

static inline void uswap_must_wait(unsigned long reason, pte_t pte, bool *ret)
{
	if ((reason & VM_USWAP) && (!pte_present(pte)))
		*ret = true;
}

#endif /* CONFIG_USERSWAP */

#endif /* _LINUX_USERSWAP_H */
+0 −2
Original line number Diff line number Diff line
@@ -30,8 +30,6 @@
#define MAP_SYNC		0x080000 /* perform synchronous page faults for the mapping */
#define MAP_FIXED_NOREPLACE	0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */

#define MAP_REPLACE		0x1000000

#define MAP_UNINITIALIZED 0x4000000	/* For anonymous mmap, memory could be
					 * uninitialized */

+1 −0
Original line number Diff line number Diff line
@@ -8,6 +8,7 @@
#define MREMAP_MAYMOVE		1
#define MREMAP_FIXED		2
#define MREMAP_DONTUNMAP	4
#define MREMAP_USWAP_SET_PTE	64

#define OVERCOMMIT_GUESS		0
#define OVERCOMMIT_ALWAYS		1
Loading