Unverified Commit 39622c1a authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3059 Handle more faults under the VMA lock

Merge Pull Request from: @ci-robot 
 
PR sync from: Tong Tiangen <tongtiangen@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/BOA4N5D5I6AZIOQV5BKTK4F2JBETTTR2/ 
At this point, we're handling the majority of file-backed page faults
under the VMA lock, using the ->map_pages entry point.  This patch set
attempts to expand that for the following siutations:

 - We have to do a read.  This could be because we've hit the point in
   the readahead window where we need to kick off the next readahead,
   or because the page is simply not present in cache.
 - We're handling a write fault.  Most applications don't do I/O by writes
   to shared mmaps for very good reasons, but some do, and it'd be nice
   to not make that slow unnecessarily.
 - We're doing a COW of a private mapping (both PTE already present
   and PTE not-present).  These are two different codepaths and I handle
   both of them in this patch set.

There is no support in this patch set for drivers to mark themselves
as being VMA lock friendly; they could implement the ->map_pages
vm_operation, but if they do, they would be the first.  This is probably
something we want to change at some point in the future, and I've marked
where to make that change in the code.

There is very little performance change in the benchmarks we've run;
mostly because the vast majority of page faults are handled through the
other paths.  I still think this patch series is useful for workloads
that may take these paths more often, and just for cleaning up the
fault path in general (it's now clearer why we have to retry in these
cases).

Matthew Wilcox (Oracle) (6):
  mm: make lock_folio_maybe_drop_mmap() VMA lock aware
  mm: call wp_page_copy() under the VMA lock
  mm: handle shared faults under the VMA lock
  mm: handle COW faults under the VMA lock
  mm: handle read faults under the VMA lock
  mm: handle write faults to RO pages under the VMA lock


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8K7C7 
 
Link:https://gitee.com/openeuler/kernel/pulls/3059

 

Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents 357e8ab1 00214085
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -3104,7 +3104,7 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,

	/*
	 * NOTE! This will make us return with VM_FAULT_RETRY, but with
	 * the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
	 * the fault lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
	 * is supposed to work. We have way too many special cases..
	 */
	if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
@@ -3114,13 +3114,14 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
	if (vmf->flags & FAULT_FLAG_KILLABLE) {
		if (__folio_lock_killable(folio)) {
			/*
			 * We didn't have the right flags to drop the mmap_lock,
			 * but all fault_handlers only check for fatal signals
			 * if we return VM_FAULT_RETRY, so we need to drop the
			 * mmap_lock here and return 0 if we don't have a fpin.
			 * We didn't have the right flags to drop the
			 * fault lock, but all fault_handlers only check
			 * for fatal signals if we return VM_FAULT_RETRY,
			 * so we need to drop the fault lock here and
			 * return 0 if we don't have a fpin.
			 */
			if (*fpin == NULL)
				mmap_read_unlock(vmf->vma->vm_mm);
				release_fault_lock(vmf);
			return 0;
		}
	} else
+58 −35
Original line number Diff line number Diff line
@@ -3044,6 +3044,36 @@ static inline void wp_page_reuse(struct vm_fault *vmf, struct folio *folio)
	count_vm_event(PGREUSE);
}

/*
 * We could add a bitflag somewhere, but for now, we know that all
 * vm_ops that have a ->map_pages have been audited and don't need
 * the mmap_lock to be held.
 */
static inline vm_fault_t vmf_can_call_fault(const struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;

	if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK))
		return 0;
	vma_end_read(vma);
	return VM_FAULT_RETRY;
}

static vm_fault_t vmf_anon_prepare(struct vm_fault *vmf)
{
	struct vm_area_struct *vma = vmf->vma;

	if (likely(vma->anon_vma))
		return 0;
	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
		vma_end_read(vma);
		return VM_FAULT_RETRY;
	}
	if (__anon_vma_prepare(vma))
		return VM_FAULT_OOM;
	return 0;
}

/*
 * Handle the case of a page which we actually need to copy to a new page,
 * either due to COW or unsharing.
@@ -3071,27 +3101,29 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
	pte_t entry;
	int page_copied = 0;
	struct mmu_notifier_range range;
	int ret;
	vm_fault_t ret;

	delayacct_wpcopy_start();

	if (vmf->page)
		old_folio = page_folio(vmf->page);
	if (unlikely(anon_vma_prepare(vma)))
		goto oom;
	ret = vmf_anon_prepare(vmf);
	if (unlikely(ret))
		goto out;

	if (is_zero_pfn(pte_pfn(vmf->orig_pte))) {
		new_folio = vma_alloc_zeroed_movable_folio(vma, vmf->address);
		if (!new_folio)
			goto oom;
	} else {
		int err;
		new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma,
				vmf->address, false);
		if (!new_folio)
			goto oom;

		ret = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
		if (ret) {
		err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf);
		if (err) {
			/*
			 * COW failed, if the fault was solved by other,
			 * it's fine. If not, userspace would re-fault on
@@ -3104,7 +3136,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
				folio_put(old_folio);

			delayacct_wpcopy_end();
			return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
			return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
		}
		kmsan_copy_page_meta(&new_folio->page, vmf->page);
	}
@@ -3214,11 +3246,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
oom_free_new:
	folio_put(new_folio);
oom:
	ret = VM_FAULT_OOM;
out:
	if (old_folio)
		folio_put(old_folio);

	delayacct_wpcopy_end();
	return VM_FAULT_OOM;
	return ret;
}

/**
@@ -3270,10 +3304,9 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
		vm_fault_t ret;

		pte_unmap_unlock(vmf->pte, vmf->ptl);
		if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
			vma_end_read(vmf->vma);
			return VM_FAULT_RETRY;
		}
		ret = vmf_can_call_fault(vmf);
		if (ret)
			return ret;

		vmf->flags |= FAULT_FLAG_MKWRITE;
		ret = vma->vm_ops->pfn_mkwrite(vmf);
@@ -3297,10 +3330,10 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf, struct folio *folio)
		vm_fault_t tmp;

		pte_unmap_unlock(vmf->pte, vmf->ptl);
		if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
		tmp = vmf_can_call_fault(vmf);
		if (tmp) {
			folio_put(folio);
			vma_end_read(vmf->vma);
			return VM_FAULT_RETRY;
			return tmp;
		}

		tmp = do_page_mkwrite(vmf, folio);
@@ -3444,12 +3477,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
		return 0;
	}
copy:
	if ((vmf->flags & FAULT_FLAG_VMA_LOCK) && !vma->anon_vma) {
		pte_unmap_unlock(vmf->pte, vmf->ptl);
		vma_end_read(vmf->vma);
		return VM_FAULT_RETRY;
	}

	/*
	 * Ok, we need to copy. Oh, well..
	 */
@@ -4575,10 +4602,9 @@ static vm_fault_t do_read_fault(struct vm_fault *vmf)
			return ret;
	}

	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
		vma_end_read(vmf->vma);
		return VM_FAULT_RETRY;
	}
	ret = vmf_can_call_fault(vmf);
	if (ret)
		return ret;

	ret = __do_fault(vmf);
	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
@@ -4597,13 +4623,11 @@ static vm_fault_t do_cow_fault(struct vm_fault *vmf)
	struct vm_area_struct *vma = vmf->vma;
	vm_fault_t ret;

	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
		vma_end_read(vma);
		return VM_FAULT_RETRY;
	}

	if (unlikely(anon_vma_prepare(vma)))
		return VM_FAULT_OOM;
	ret = vmf_can_call_fault(vmf);
	if (!ret)
		ret = vmf_anon_prepare(vmf);
	if (ret)
		return ret;

	vmf->cow_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
	if (!vmf->cow_page)
@@ -4642,10 +4666,9 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf)
	vm_fault_t ret, tmp;
	struct folio *folio;

	if (vmf->flags & FAULT_FLAG_VMA_LOCK) {
		vma_end_read(vma);
		return VM_FAULT_RETRY;
	}
	ret = vmf_can_call_fault(vmf);
	if (ret)
		return ret;

	ret = __do_fault(vmf);
	if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))