Commit 55f81639 authored by Shiyang Ruan's avatar Shiyang Ruan Committed by Darrick J. Wong
Browse files

fsdax: factor out helpers to simplify the dax fault code



The dax page fault code is too long and a bit difficult to read. And it
is hard to understand when we trying to add new features. Some of the
PTE/PMD codes have similar logic. So, factor out helper functions to
simplify the code.

Signed-off-by: default avatarShiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: default avatarRitesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
[hch: minor cleanups]
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
parent b74b1293
Loading
Loading
Loading
Loading
+84 −69
Original line number Diff line number Diff line
@@ -1255,6 +1255,53 @@ static bool dax_fault_is_synchronous(unsigned long flags,
		&& (iomap->flags & IOMAP_F_DIRTY);
}

/*
 * When handling a synchronous page fault and the inode need a fsync, we can
 * insert the PTE/PMD into page tables only after that fsync happened. Skip
 * insertion for now and return the pfn so that caller can insert it after the
 * fsync is done.
 */
static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
{
	if (WARN_ON_ONCE(!pfnp))
		return VM_FAULT_SIGBUS;
	*pfnp = pfn;
	return VM_FAULT_NEEDDSYNC;
}

static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap,
		loff_t pos)
{
	sector_t sector = dax_iomap_sector(iomap, pos);
	unsigned long vaddr = vmf->address;
	vm_fault_t ret;
	int error = 0;

	switch (iomap->type) {
	case IOMAP_HOLE:
	case IOMAP_UNWRITTEN:
		clear_user_highpage(vmf->cow_page, vaddr);
		break;
	case IOMAP_MAPPED:
		error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, sector,
					  vmf->cow_page, vaddr);
		break;
	default:
		WARN_ON_ONCE(1);
		error = -EIO;
		break;
	}

	if (error)
		return dax_fault_return(error);

	__SetPageUptodate(vmf->cow_page);
	ret = finish_fault(vmf);
	if (!ret)
		return VM_FAULT_DONE_COW;
	return ret;
}

static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
			       int *iomap_errp, const struct iomap_ops *ops)
{
@@ -1323,30 +1370,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
	}

	if (vmf->cow_page) {
		sector_t sector = dax_iomap_sector(&iomap, pos);

		switch (iomap.type) {
		case IOMAP_HOLE:
		case IOMAP_UNWRITTEN:
			clear_user_highpage(vmf->cow_page, vaddr);
			break;
		case IOMAP_MAPPED:
			error = copy_cow_page_dax(iomap.bdev, iomap.dax_dev,
						  sector, vmf->cow_page, vaddr);
			break;
		default:
			WARN_ON_ONCE(1);
			error = -EIO;
			break;
		}

		if (error)
			goto error_finish_iomap;

		__SetPageUptodate(vmf->cow_page);
		ret = finish_fault(vmf);
		if (!ret)
			ret = VM_FAULT_DONE_COW;
		ret = dax_fault_cow_page(vmf, &iomap, pos);
		goto finish_iomap;
	}

@@ -1366,19 +1390,8 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
		entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
						 0, write && !sync);

		/*
		 * If we are doing synchronous page fault and inode needs fsync,
		 * we can insert PTE into page tables only after that happens.
		 * Skip insertion for now and return the pfn so that caller can
		 * insert it after fsync is done.
		 */
		if (sync) {
			if (WARN_ON_ONCE(!pfnp)) {
				error = -EIO;
				goto error_finish_iomap;
			}
			*pfnp = pfn;
			ret = VM_FAULT_NEEDDSYNC | major;
			ret = dax_fault_synchronous_pfnp(pfnp, pfn);
			goto finish_iomap;
		}
		trace_dax_insert_mapping(inode, vmf, entry);
@@ -1477,13 +1490,45 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
	return VM_FAULT_FALLBACK;
}

static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
		pgoff_t max_pgoff)
{
	unsigned long pmd_addr = vmf->address & PMD_MASK;
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	/*
	 * Make sure that the faulting address's PMD offset (color) matches
	 * the PMD offset from the start of the file.  This is necessary so
	 * that a PMD range in the page table overlaps exactly with a PMD
	 * range in the page cache.
	 */
	if ((vmf->pgoff & PG_PMD_COLOUR) !=
	    ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
		return true;

	/* Fall back to PTEs if we're going to COW */
	if (write && !(vmf->vma->vm_flags & VM_SHARED))
		return true;

	/* If the PMD would extend outside the VMA */
	if (pmd_addr < vmf->vma->vm_start)
		return true;
	if ((pmd_addr + PMD_SIZE) > vmf->vma->vm_end)
		return true;

	/* If the PMD would extend beyond the file size */
	if ((xas->xa_index | PG_PMD_COLOUR) >= max_pgoff)
		return true;

	return false;
}

static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
			       const struct iomap_ops *ops)
{
	struct vm_area_struct *vma = vmf->vma;
	struct address_space *mapping = vma->vm_file->f_mapping;
	XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
	unsigned long pmd_addr = vmf->address & PMD_MASK;
	bool write = vmf->flags & FAULT_FLAG_WRITE;
	bool sync;
	unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
@@ -1506,33 +1551,12 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,

	trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);

	/*
	 * Make sure that the faulting address's PMD offset (color) matches
	 * the PMD offset from the start of the file.  This is necessary so
	 * that a PMD range in the page table overlaps exactly with a PMD
	 * range in the page cache.
	 */
	if ((vmf->pgoff & PG_PMD_COLOUR) !=
	    ((vmf->address >> PAGE_SHIFT) & PG_PMD_COLOUR))
		goto fallback;

	/* Fall back to PTEs if we're going to COW */
	if (write && !(vma->vm_flags & VM_SHARED))
		goto fallback;

	/* If the PMD would extend outside the VMA */
	if (pmd_addr < vma->vm_start)
		goto fallback;
	if ((pmd_addr + PMD_SIZE) > vma->vm_end)
		goto fallback;

	if (xas.xa_index >= max_pgoff) {
		result = VM_FAULT_SIGBUS;
		goto out;
	}

	/* If the PMD would extend beyond the file size */
	if ((xas.xa_index | PG_PMD_COLOUR) >= max_pgoff)
	if (dax_fault_check_fallback(vmf, &xas, max_pgoff))
		goto fallback;

	/*
@@ -1584,17 +1608,8 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
		entry = dax_insert_entry(&xas, mapping, vmf, entry, pfn,
						DAX_PMD, write && !sync);

		/*
		 * If we are doing synchronous page fault and inode needs fsync,
		 * we can insert PMD into page tables only after that happens.
		 * Skip insertion for now and return the pfn so that caller can
		 * insert it after fsync is done.
		 */
		if (sync) {
			if (WARN_ON_ONCE(!pfnp))
				goto finish_iomap;
			*pfnp = pfn;
			result = VM_FAULT_NEEDDSYNC;
			result = dax_fault_synchronous_pfnp(pfnp, pfn);
			goto finish_iomap;
		}