Commit 65dd814a authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Darrick J. Wong
Browse files

fsdax: switch the fault handlers to use iomap_iter



Avoid the open coded calls to ->iomap_begin and ->iomap_end and call
iomap_iter instead.

Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
parent c2436190
Loading
Loading
Loading
Loading
+75 −118
Original line number Diff line number Diff line
@@ -1010,7 +1010,7 @@ static sector_t dax_iomap_sector(const struct iomap *iomap, loff_t pos)
	return (iomap->addr + (pos & PAGE_MASK) - iomap->offset) >> 9;
}

static int dax_iomap_pfn(struct iomap *iomap, loff_t pos, size_t size,
static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size,
			 pfn_t *pfnp)
{
	const sector_t sector = dax_iomap_sector(iomap, pos);
@@ -1068,7 +1068,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,

#ifdef CONFIG_FS_DAX_PMD
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
		struct iomap *iomap, void **entry)
		const struct iomap *iomap, void **entry)
{
	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
	unsigned long pmd_addr = vmf->address & PMD_MASK;
@@ -1120,7 +1120,7 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
}
#else
static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
		struct iomap *iomap, void **entry)
		const struct iomap *iomap, void **entry)
{
	return VM_FAULT_FALLBACK;
}
@@ -1309,7 +1309,7 @@ static vm_fault_t dax_fault_return(int error)
 * flushed on write-faults (non-cow), but not read-faults.
 */
static bool dax_fault_is_synchronous(unsigned long flags,
		struct vm_area_struct *vma, struct iomap *iomap)
		struct vm_area_struct *vma, const struct iomap *iomap)
{
	return (flags & IOMAP_WRITE) && (vma->vm_flags & VM_SYNC)
		&& (iomap->flags & IOMAP_F_DIRTY);
@@ -1329,22 +1329,22 @@ static vm_fault_t dax_fault_synchronous_pfnp(pfn_t *pfnp, pfn_t pfn)
	return VM_FAULT_NEEDDSYNC;
}

static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap,
		loff_t pos)
static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf,
		const struct iomap_iter *iter)
{
	sector_t sector = dax_iomap_sector(iomap, pos);
	sector_t sector = dax_iomap_sector(&iter->iomap, iter->pos);
	unsigned long vaddr = vmf->address;
	vm_fault_t ret;
	int error = 0;

	switch (iomap->type) {
	switch (iter->iomap.type) {
	case IOMAP_HOLE:
	case IOMAP_UNWRITTEN:
		clear_user_highpage(vmf->cow_page, vaddr);
		break;
	case IOMAP_MAPPED:
		error = copy_cow_page_dax(iomap->bdev, iomap->dax_dev, sector,
					  vmf->cow_page, vaddr);
		error = copy_cow_page_dax(iter->iomap.bdev, iter->iomap.dax_dev,
					  sector, vmf->cow_page, vaddr);
		break;
	default:
		WARN_ON_ONCE(1);
@@ -1363,29 +1363,31 @@ static vm_fault_t dax_fault_cow_page(struct vm_fault *vmf, struct iomap *iomap,
}

/**
 * dax_fault_actor - Common actor to handle pfn insertion in PTE/PMD fault.
 * dax_fault_iter - Common actor to handle pfn insertion in PTE/PMD fault.
 * @vmf:	vm fault instance
 * @iter:	iomap iter
 * @pfnp:	pfn to be returned
 * @xas:	the dax mapping tree of a file
 * @entry:	an unlocked dax entry to be inserted
 * @pmd:	distinguish whether it is a pmd fault
 * @flags:	iomap flags
 * @iomap:	from iomap_begin()
 * @srcmap:	from iomap_begin(), not equal to iomap if it is a CoW
 */
static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
		struct xa_state *xas, void **entry, bool pmd,
		unsigned int flags, struct iomap *iomap, struct iomap *srcmap)
static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
		const struct iomap_iter *iter, pfn_t *pfnp,
		struct xa_state *xas, void **entry, bool pmd)
{
	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
	const struct iomap *iomap = &iter->iomap;
	size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
	loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
	bool write = vmf->flags & FAULT_FLAG_WRITE;
	bool sync = dax_fault_is_synchronous(flags, vmf->vma, iomap);
	bool sync = dax_fault_is_synchronous(iter->flags, vmf->vma, iomap);
	unsigned long entry_flags = pmd ? DAX_PMD : 0;
	int err = 0;
	pfn_t pfn;

	if (!pmd && vmf->cow_page)
		return dax_fault_cow_page(vmf, iter);

	/* if we are reading UNWRITTEN and HOLE, return a hole. */
	if (!write &&
	    (iomap->type == IOMAP_UNWRITTEN || iomap->type == IOMAP_HOLE)) {
@@ -1399,7 +1401,7 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
		return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS;
	}

	err = dax_iomap_pfn(iomap, pos, size, &pfn);
	err = dax_iomap_pfn(&iter->iomap, pos, size, &pfn);
	if (err)
		return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);

@@ -1422,32 +1424,31 @@ static vm_fault_t dax_fault_actor(struct vm_fault *vmf, pfn_t *pfnp,
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
			       int *iomap_errp, const struct iomap_ops *ops)
{
	struct vm_area_struct *vma = vmf->vma;
	struct address_space *mapping = vma->vm_file->f_mapping;
	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
	XA_STATE(xas, &mapping->i_pages, vmf->pgoff);
	struct inode *inode = mapping->host;
	loff_t pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
	struct iomap iomap = { .type = IOMAP_HOLE };
	struct iomap srcmap = { .type = IOMAP_HOLE };
	unsigned flags = IOMAP_FAULT;
	int error;
	bool write = vmf->flags & FAULT_FLAG_WRITE;
	vm_fault_t ret = 0, major = 0;
	struct iomap_iter iter = {
		.inode		= mapping->host,
		.pos		= (loff_t)vmf->pgoff << PAGE_SHIFT,
		.len		= PAGE_SIZE,
		.flags		= IOMAP_FAULT,
	};
	vm_fault_t ret = 0;
	void *entry;
	int error;

	trace_dax_pte_fault(inode, vmf, ret);
	trace_dax_pte_fault(iter.inode, vmf, ret);
	/*
	 * Check whether offset isn't beyond end of file now. Caller is supposed
	 * to hold locks serializing us with truncate / punch hole so this is
	 * a reliable test.
	 */
	if (pos >= i_size_read(inode)) {
	if (iter.pos >= i_size_read(iter.inode)) {
		ret = VM_FAULT_SIGBUS;
		goto out;
	}

	if (write && !vmf->cow_page)
		flags |= IOMAP_WRITE;
	if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
		iter.flags |= IOMAP_WRITE;

	entry = grab_mapping_entry(&xas, mapping, 0);
	if (xa_is_internal(entry)) {
@@ -1466,59 +1467,34 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
		goto unlock_entry;
	}

	/*
	 * Note that we don't bother to use iomap_iter here: DAX required
	 * the file system block size to be equal the page size, which means
	 * that we never have to deal with more than a single extent here.
	 */
	error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap, &srcmap);
	if (iomap_errp)
		*iomap_errp = error;
	if (error) {
		ret = dax_fault_return(error);
		goto unlock_entry;
	}
	if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
		ret = VM_FAULT_SIGBUS;	/* fs corruption? */
		goto finish_iomap;
	while ((error = iomap_iter(&iter, ops)) > 0) {
		if (WARN_ON_ONCE(iomap_length(&iter) < PAGE_SIZE)) {
			iter.processed = -EIO;	/* fs corruption? */
			continue;
		}

	if (vmf->cow_page) {
		ret = dax_fault_cow_page(vmf, &iomap, pos);
		goto finish_iomap;
		ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, false);
		if (ret != VM_FAULT_SIGBUS &&
		    (iter.iomap.flags & IOMAP_F_NEW)) {
			count_vm_event(PGMAJFAULT);
			count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
			ret |= VM_FAULT_MAJOR;
		}

	ret = dax_fault_actor(vmf, pfnp, &xas, &entry, false, flags,
			      &iomap, &srcmap);
	if (ret == VM_FAULT_SIGBUS)
		goto finish_iomap;

	/* read/write MAPPED, CoW UNWRITTEN */
	if (iomap.flags & IOMAP_F_NEW) {
		count_vm_event(PGMAJFAULT);
		count_memcg_event_mm(vma->vm_mm, PGMAJFAULT);
		major = VM_FAULT_MAJOR;
		if (!(ret & VM_FAULT_ERROR))
			iter.processed = PAGE_SIZE;
	}

finish_iomap:
	if (ops->iomap_end) {
		int copied = PAGE_SIZE;
	if (iomap_errp)
		*iomap_errp = error;
	if (!ret && error)
		ret = dax_fault_return(error);

		if (ret & VM_FAULT_ERROR)
			copied = 0;
		/*
		 * The fault is done by now and there's no way back (other
		 * thread may be already happily using PTE we have installed).
		 * Just ignore error from ->iomap_end since we cannot do much
		 * with it.
		 */
		ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
	}
unlock_entry:
	dax_unlock_entry(&xas, entry);
out:
	trace_dax_pte_fault_done(inode, vmf, ret);
	return ret | major;
	trace_dax_pte_fault_done(iter.inode, vmf, ret);
	return ret;
}

#ifdef CONFIG_FS_DAX_PMD
@@ -1558,28 +1534,29 @@ static bool dax_fault_check_fallback(struct vm_fault *vmf, struct xa_state *xas,
static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
			       const struct iomap_ops *ops)
{
	struct vm_area_struct *vma = vmf->vma;
	struct address_space *mapping = vma->vm_file->f_mapping;
	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
	XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, PMD_ORDER);
	bool write = vmf->flags & FAULT_FLAG_WRITE;
	unsigned int flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
	struct inode *inode = mapping->host;
	struct iomap_iter iter = {
		.inode		= mapping->host,
		.len		= PMD_SIZE,
		.flags		= IOMAP_FAULT,
	};
	vm_fault_t ret = VM_FAULT_FALLBACK;
	struct iomap iomap = { .type = IOMAP_HOLE };
	struct iomap srcmap = { .type = IOMAP_HOLE };
	pgoff_t max_pgoff;
	void *entry;
	loff_t pos;
	int error;

	if (vmf->flags & FAULT_FLAG_WRITE)
		iter.flags |= IOMAP_WRITE;

	/*
	 * Check whether offset isn't beyond end of file now. Caller is
	 * supposed to hold locks serializing us with truncate / punch hole so
	 * this is a reliable test.
	 */
	max_pgoff = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
	max_pgoff = DIV_ROUND_UP(i_size_read(iter.inode), PAGE_SIZE);

	trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
	trace_dax_pmd_fault(iter.inode, vmf, max_pgoff, 0);

	if (xas.xa_index >= max_pgoff) {
		ret = VM_FAULT_SIGBUS;
@@ -1613,45 +1590,25 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
		goto unlock_entry;
	}

	/*
	 * Note that we don't use iomap_iter here.  We aren't doing I/O, only
	 * setting up a mapping, so really we're using iomap_begin() as a way
	 * to look up our filesystem block.
	 */
	pos = (loff_t)xas.xa_index << PAGE_SHIFT;
	error = ops->iomap_begin(inode, pos, PMD_SIZE, flags, &iomap, &srcmap);
	if (error)
		goto unlock_entry;

	if (iomap.offset + iomap.length < pos + PMD_SIZE)
		goto finish_iomap;
	iter.pos = (loff_t)xas.xa_index << PAGE_SHIFT;
	while ((error = iomap_iter(&iter, ops)) > 0) {
		if (iomap_length(&iter) < PMD_SIZE)
			continue; /* actually breaks out of the loop */

	ret = dax_fault_actor(vmf, pfnp, &xas, &entry, true, flags,
			      &iomap, &srcmap);

finish_iomap:
	if (ops->iomap_end) {
		int copied = PMD_SIZE;

		if (ret == VM_FAULT_FALLBACK)
			copied = 0;
		/*
		 * The fault is done by now and there's no way back (other
		 * thread may be already happily using PMD we have installed).
		 * Just ignore error from ->iomap_end since we cannot do much
		 * with it.
		 */
		ops->iomap_end(inode, pos, PMD_SIZE, copied, flags, &iomap);
		ret = dax_fault_iter(vmf, &iter, pfnp, &xas, &entry, true);
		if (ret != VM_FAULT_FALLBACK)
			iter.processed = PMD_SIZE;
	}

unlock_entry:
	dax_unlock_entry(&xas, entry);
fallback:
	if (ret == VM_FAULT_FALLBACK) {
		split_huge_pmd(vma, vmf->pmd, vmf->address);
		split_huge_pmd(vmf->vma, vmf->pmd, vmf->address);
		count_vm_event(THP_FAULT_FALLBACK);
	}
out:
	trace_dax_pmd_fault_done(inode, vmf, max_pgoff, ret);
	trace_dax_pmd_fault_done(iter.inode, vmf, max_pgoff, ret);
	return ret;
}
#else