Commit b54b55c3 authored by ZhangPeng's avatar ZhangPeng Committed by Peng Zhang
Browse files

mm/userswap: support userswap via userfaultfd

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8KESX


CVE: NA

--------------------------------

This patch modify the userfaultfd to support userswap. VM_USWAP is set
in userfaultfd_register() and cleared in userfaultfd_unregister() and
userfaultfd_release(). Use do_uswap_page() to handle page faults of the
userswap swap entry in do_swap_page(). Add uswap_must_wait() to handle
userswap type userfaults in userfaultfd_must_wait().

Signed-off-by: default avatarZhangPeng <zhangpeng362@huawei.com>
parent 3ef2eace
Loading
Loading
Loading
Loading
+29 −3
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <linux/hugetlb.h>
#include <linux/swapops.h>
#include <linux/miscdevice.h>
#include <linux/userswap.h>

static int sysctl_unprivileged_userfaultfd __read_mostly;

@@ -373,6 +374,9 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
	 * ptes here.
	 */
	ptent = ptep_get(pte);
#ifdef CONFIG_USERSWAP
	uswap_must_wait(reason, ptent, &ret);
#endif
	if (pte_none_mostly(ptent))
		ret = true;
	if (!pte_write(ptent) && (reason & VM_UFFD_WP))
@@ -442,10 +446,14 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	BUG_ON(ctx->mm != mm);

	/* Any unrecognized flag is a bug. */
	VM_BUG_ON(reason & ~__VM_UFFD_FLAGS);
	VM_BUG_ON(reason & ~(__VM_UFFD_FLAGS | VM_USWAP));
	/* 0 or > 1 flags set is a bug; we expect exactly 1. */
	VM_BUG_ON(!reason || (reason & (reason - 1)));

	if (IS_ENABLED(CONFIG_USERSWAP) && (reason == VM_UFFD_MISSING) &&
	    (vma->vm_flags & VM_USWAP))
		reason |= VM_USWAP;

	if (ctx->features & UFFD_FEATURE_SIGBUS)
		goto out;
	if (!(vmf->flags & FAULT_FLAG_USER) && (ctx->flags & UFFD_USER_MODE_ONLY))
@@ -520,6 +528,10 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	uwq.wq.private = current;
	uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
				reason, ctx->features);
#ifdef CONFIG_USERSWAP
	if ((reason & VM_USWAP) && pte_none(vmf->orig_pte))
		uwq.msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_FIRST;
#endif
	uwq.ctx = ctx;
	uwq.waken = false;

@@ -921,7 +933,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
			prev = vma;
			continue;
		}
		new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
		new_flags = vma->vm_flags & ~(__VM_UFFD_FLAGS | VM_USWAP);
		prev = vma_merge(&vmi, mm, prev, vma->vm_start, vma->vm_end,
				 new_flags, vma->anon_vma,
				 vma->vm_file, vma->vm_pgoff,
@@ -1326,6 +1338,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
	unsigned long start, end, vma_end;
	struct vma_iterator vmi;
	pgoff_t pgoff;
#ifdef CONFIG_USERSWAP
	bool uswap_mode = false;
#endif

	user_uffdio_register = (struct uffdio_register __user *) arg;

@@ -1337,6 +1352,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
	ret = -EINVAL;
	if (!uffdio_register.mode)
		goto out;
#ifdef CONFIG_USERSWAP
	if (!uswap_register(&uffdio_register, &uswap_mode))
		goto out;
#endif
	if (uffdio_register.mode & ~UFFD_API_REGISTER_MODES)
		goto out;
	vm_flags = 0;
@@ -1359,6 +1378,13 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
			     uffdio_register.range.len);
	if (ret)
		goto out;
#ifdef CONFIG_USERSWAP
	if (unlikely(uswap_mode)) {
		ret = -EINVAL;
		if (!uswap_adjust_uffd_range(&uffdio_register, &vm_flags, mm))
			goto out;
	}
#endif

	start = uffdio_register.range.start;
	end = start + uffdio_register.range.len;
@@ -1663,7 +1689,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
		if (userfaultfd_wp(vma))
			uffd_wp_range(vma, start, vma_end - start, false);

		new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
		new_flags = vma->vm_flags & ~(__VM_UFFD_FLAGS | VM_USWAP);
		pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
		prev = vma_merge(&vmi, mm, prev, start, vma_end, new_flags,
				 vma->anon_vma, vma->vm_file, pgoff,
+16 −0
Original line number Diff line number Diff line
@@ -21,5 +21,21 @@ extern struct static_key_false userswap_enabled;
unsigned long uswap_mremap(unsigned long old_addr, unsigned long old_len,
			   unsigned long new_addr, unsigned long new_len);

bool uswap_register(struct uffdio_register *uffdio_register, bool *uswap_mode);

bool uswap_adjust_uffd_range(struct uffdio_register *uffdio_register,
			     unsigned long *vm_flags, struct mm_struct *mm);

vm_fault_t do_uswap_page(swp_entry_t entry, struct vm_fault *vmf,
			 struct vm_area_struct *vma);

static inline void uswap_must_wait(unsigned long reason, pte_t pte, bool *ret)
{
	if (!static_branch_unlikely(&userswap_enabled))
		return;
	if ((reason & VM_USWAP) && (!pte_present(pte)))
		*ret = true;
}

#endif /* CONFIG_USERSWAP */
#endif /* _LINUX_USERSWAP_H */
+2 −0
Original line number Diff line number Diff line
@@ -152,6 +152,7 @@ struct uffd_msg {
#define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
#define UFFD_PAGEFAULT_FLAG_WP		(1<<1)	/* If reason is VM_UFFD_WP */
#define UFFD_PAGEFAULT_FLAG_MINOR	(1<<2)	/* If reason is VM_UFFD_MINOR */
#define UFFD_PAGEFAULT_FLAG_FIRST	(1<<10) /* USWAP first page fault */

struct uffdio_api {
	/* userland asks for an API number and the features to enable */
@@ -247,6 +248,7 @@ struct uffdio_register {
#define UFFDIO_REGISTER_MODE_MISSING	((__u64)1<<0)
#define UFFDIO_REGISTER_MODE_WP		((__u64)1<<1)
#define UFFDIO_REGISTER_MODE_MINOR	((__u64)1<<2)
#define UFFDIO_REGISTER_MODE_USWAP	((__u64)1<<10)
	__u64 mode;

	/*
+5 −0
Original line number Diff line number Diff line
@@ -77,6 +77,7 @@
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
#include <linux/sched/sysctl.h>
#include <linux/userswap.h>

#include <trace/events/kmem.h>

@@ -3778,6 +3779,10 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
		goto out;

	entry = pte_to_swp_entry(vmf->orig_pte);
#ifdef CONFIG_USERSWAP
	if (is_userswap_entry(entry))
		return do_uswap_page(entry, vmf, vma);
#endif
	if (unlikely(non_swap_entry(entry))) {
		if (is_migration_entry(entry)) {
			migration_entry_wait(vma->vm_mm, vmf->pmd,
+70 −0
Original line number Diff line number Diff line
@@ -389,6 +389,76 @@ unsigned long uswap_mremap(unsigned long old_addr, unsigned long old_len,
	return ret;
}

bool uswap_register(struct uffdio_register *uffdio_register, bool *uswap_mode)
{
	if (!static_branch_unlikely(&userswap_enabled))
		return true;
	if (!(uffdio_register->mode & UFFDIO_REGISTER_MODE_USWAP))
		return true;
	uffdio_register->mode &= ~UFFDIO_REGISTER_MODE_USWAP;
	if (uffdio_register->mode != UFFDIO_REGISTER_MODE_MISSING)
		return false;
	*uswap_mode = true;
	return true;
}

/*
 * register the whole vma overlapping with the address range to avoid splitting
 * the vma which could reduce fragmentation.
 */
bool uswap_adjust_uffd_range(struct uffdio_register *uffdio_register,
			     unsigned long *vm_flags, struct mm_struct *mm)
{
	struct vm_area_struct *vma, *cur;
	unsigned long end;
	bool ret = false;

	VMA_ITERATOR(vmi, mm, uffdio_register->range.start);

	end = uffdio_register->range.start + uffdio_register->range.len - 1;

	mmap_read_lock(mm);
	vma = find_vma(mm, uffdio_register->range.start);
	if (!vma || vma->vm_start >= end)
		goto out_unlock;
	for_each_vma_range(vmi, cur, end)
		if (!vma_uswap_compatible(cur))
			goto out_unlock;

	uffdio_register->range.start = vma->vm_start;
	vma = find_vma(mm, end);
	if (vma && end >= vma->vm_start)
		uffdio_register->range.len = vma->vm_end - uffdio_register->range.start;

	*vm_flags |= VM_USWAP;

	ret = true;
out_unlock:
	mmap_read_unlock(mm);
	return ret;
}

vm_fault_t do_uswap_page(swp_entry_t entry, struct vm_fault *vmf,
			 struct vm_area_struct *vma)
{
	const char *process_prefix = "uswap";

	/* print error if we come across a nested fault */
	if (!strncmp(current->comm, process_prefix, strlen(process_prefix))) {
		pr_err("USWAP: fault %lx is triggered by %s\n", vmf->address,
			current->comm);
		return VM_FAULT_SIGBUS;
	}

	if (!(vma->vm_flags & VM_UFFD_MISSING)) {
		pr_err("USWAP: addr %lx flags %lx is not a user swap page",
			vmf->address, vma->vm_flags);
		return VM_FAULT_SIGBUS;
	}

	return handle_userfault(vmf, VM_UFFD_MISSING);
}

static int __init enable_userswap_setup(char *str)
{
	static_branch_enable(&userswap_enabled);