Unverified Commit b0757196 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3352 support userswap feature

Merge Pull Request from: @ci-robot 
 
PR sync from: Peng Zhang <zhangpeng362@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7CGN5HLEJLLCN2XRLF4JUOO2ADKHZASE/ 
From: ZhangPeng <zhangpeng362@huawei.com>

This patch series support userswap feature including registration,
unregistration, swap-out and swap-in.

Userswap feature depends on CONFIG_USERSWAP and can be enabled by
cmdline enable_userswap.

We tested the concurrent scenario of multi-threaded page fault and
multi-threaded swap-in in the uswap demo;and the remapping in the
swap-out phase and the copy-free function in the swap-in phase were ok.
During the test, related debugging functions including CONFIG_DEBUG_VM,
lockdep, slub debug, kasan and kmemleak are enabled.

ChangeLog:
v3->v4:
- define UFFDIO_REGISTER_MODE_USWAP ((__u64)1<<10) rather than
  ((__u64)1<<3)

v2->v3:
- update patch 7 commit

v1->v2:
- enable CONFIG_USERSWAP for openeuler_defconfig

ZhangPeng (7):
  mm/userswap: add VM_USWAP and SWP_USERSWAP_ENTRY
  mm/userswap: add enable_userswap boot option
  mm/userswap: introduce MREMAP_USWAP_SET_PTE
  mm/userswap: support userswap via userfaultfd
  mm/userswap: introduce UFFDIO_COPY_MODE_DIRECT_MAP
  mm/userswap: provide cpu info in userfault msg
  mm/userswap: openeuler_defconfig: enable userswap


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8KESX 
 
Link:https://gitee.com/openeuler/kernel/pulls/3352

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: default avatarLiu Chao <liuchao173@huawei.com>
Reviewed-by: default avatarWeilong Chen <chenweilong@huawei.com>
Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Reviewed-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents dd04bc62 945d1ffa
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1104,6 +1104,7 @@ CONFIG_SECRETMEM=y
# CONFIG_ANON_VMA_NAME is not set
CONFIG_USERFAULTFD=y
CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y
CONFIG_USERSWAP=y
CONFIG_LRU_GEN=y
# CONFIG_LRU_GEN_ENABLED is not set
# CONFIG_LRU_GEN_STATS is not set
+1 −0
Original line number Diff line number Diff line
@@ -1125,6 +1125,7 @@ CONFIG_USERFAULTFD=y
CONFIG_HAVE_ARCH_USERFAULTFD_WP=y
CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y
CONFIG_PTE_MARKER_UFFD_WP=y
CONFIG_USERSWAP=y
CONFIG_LRU_GEN=y
# CONFIG_LRU_GEN_ENABLED is not set
# CONFIG_LRU_GEN_STATS is not set
+3 −0
Original line number Diff line number Diff line
@@ -700,6 +700,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
#ifdef CONFIG_X86_USER_SHADOW_STACK
		[ilog2(VM_SHADOW_STACK)] = "ss",
#endif
#ifdef CONFIG_USERSWAP
		[ilog2(VM_USWAP)]	= "us",
#endif /* CONFIG_USERSWAP */
	};
	size_t i;

+39 −4
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/hugetlb.h>
#include <linux/swapops.h>
#include <linux/miscdevice.h>
#include <linux/userswap.h>

static int sysctl_unprivileged_userfaultfd __read_mostly;

@@ -268,6 +269,9 @@ static inline struct uffd_msg userfault_msg(unsigned long address,
		msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_MINOR;
	if (features & UFFD_FEATURE_THREAD_ID)
		msg.arg.pagefault.feat.ptid = task_pid_vnr(current);
#ifdef CONFIG_USERSWAP
	uswap_get_cpu_id(reason, &msg);
#endif
	return msg;
}

@@ -373,6 +377,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
	 * ptes here.
	 */
	ptent = ptep_get(pte);
#ifdef CONFIG_USERSWAP
	uswap_must_wait(reason, ptent, &ret);
#endif
	if (pte_none_mostly(ptent))
		ret = true;
	if (!pte_write(ptent) && (reason & VM_UFFD_WP))
@@ -442,10 +449,14 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	BUG_ON(ctx->mm != mm);

	/* Any unrecognized flag is a bug. */
	VM_BUG_ON(reason & ~__VM_UFFD_FLAGS);
	VM_BUG_ON(reason & ~(__VM_UFFD_FLAGS | VM_USWAP));
	/* 0 or > 1 flags set is a bug; we expect exactly 1. */
	VM_BUG_ON(!reason || (reason & (reason - 1)));

	if (IS_ENABLED(CONFIG_USERSWAP) && (reason == VM_UFFD_MISSING) &&
	    (vma->vm_flags & VM_USWAP))
		reason |= VM_USWAP;

	if (ctx->features & UFFD_FEATURE_SIGBUS)
		goto out;
	if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY))
@@ -520,6 +531,10 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	uwq.wq.private = current;
	uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
				reason, ctx->features);
#ifdef CONFIG_USERSWAP
	if ((reason & VM_USWAP) && pte_none(vmf->orig_pte))
		uwq.msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_FIRST;
#endif
	uwq.ctx = ctx;
	uwq.waken = false;

@@ -921,7 +936,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
			prev = vma;
			continue;
		}
		new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
		new_flags = vma->vm_flags & ~(__VM_UFFD_FLAGS | VM_USWAP);
		prev = vma_merge(&vmi, mm, prev, vma->vm_start, vma->vm_end,
				 new_flags, vma->anon_vma,
				 vma->vm_file, vma->vm_pgoff,
@@ -1326,6 +1341,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
	unsigned long start, end, vma_end;
	struct vma_iterator vmi;
	pgoff_t pgoff;
#ifdef CONFIG_USERSWAP
	bool uswap_mode = false;
#endif

	user_uffdio_register = (struct uffdio_register __user *) arg;

@@ -1337,6 +1355,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
	ret = -EINVAL;
	if (!uffdio_register.mode)
		goto out;
#ifdef CONFIG_USERSWAP
	if (!uswap_register(&uffdio_register, &uswap_mode))
		goto out;
#endif
	if (uffdio_register.mode & ~UFFD_API_REGISTER_MODES)
		goto out;
	vm_flags = 0;
@@ -1359,6 +1381,13 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
			     uffdio_register.range.len);
	if (ret)
		goto out;
#ifdef CONFIG_USERSWAP
	if (unlikely(uswap_mode)) {
		ret = -EINVAL;
		if (!uswap_adjust_uffd_range(&uffdio_register, &vm_flags, mm))
			goto out;
	}
#endif

	start = uffdio_register.range.start;
	end = start + uffdio_register.range.len;
@@ -1663,7 +1692,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
		if (userfaultfd_wp(vma))
			uffd_wp_range(vma, start, vma_end - start, false);

		new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
		new_flags = vma->vm_flags & ~(__VM_UFFD_FLAGS | VM_USWAP);
		pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
		prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
				 vma->anon_vma, vma->vm_file, pgoff,
@@ -1771,10 +1800,16 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
		goto out;

	ret = -EINVAL;
	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP))
	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE |
				 UFFDIO_COPY_MODE_WP |
				 IS_ENABLED(CONFIG_USERSWAP) ?
				 UFFDIO_COPY_MODE_DIRECT_MAP : 0))
		goto out;
	if (uffdio_copy.mode & UFFDIO_COPY_MODE_WP)
		flags |= MFILL_ATOMIC_WP;
	if (IS_ENABLED(CONFIG_USERSWAP) &&
	    (uffdio_copy.mode & UFFDIO_COPY_MODE_DIRECT_MAP))
		flags |= MFILL_ATOMIC_DIRECT_MAP;
	if (mmget_not_zero(ctx->mm)) {
		ret = mfill_atomic_copy(ctx->mm, uffdio_copy.dst, uffdio_copy.src,
					uffdio_copy.len, &ctx->mmap_changing,
+8 −0
Original line number Diff line number Diff line
@@ -313,6 +313,13 @@ extern unsigned int kobjsize(const void *objp);
#define VM_NOHUGEPAGE	0x40000000	/* MADV_NOHUGEPAGE marked this vma */
#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */

#ifdef CONFIG_USERSWAP
# define VM_USWAP_BIT	62
#define VM_USWAP	BIT(VM_USWAP_BIT)
#else /* !CONFIG_USERSWAP */
#define VM_USWAP	VM_NONE
#endif /* CONFIG_USERSWAP */

#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS
#define VM_HIGH_ARCH_BIT_0	32	/* bit only usable on 64-bit architectures */
#define VM_HIGH_ARCH_BIT_1	33	/* bit only usable on 64-bit architectures */
@@ -2485,6 +2492,7 @@ int set_page_dirty_lock(struct page *page);

int get_cmdline(struct task_struct *task, char *buffer, int buflen);

extern pud_t *get_old_pud(struct mm_struct *mm, unsigned long addr);
extern unsigned long move_page_tables(struct vm_area_struct *vma,
		unsigned long old_addr, struct vm_area_struct *new_vma,
		unsigned long new_addr, unsigned long len,
Loading