Unverified Commit 6f31e5b4 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!12219 v7 xfs: some fix for forcealign

Merge Pull Request from: @ci-robot 
 
PR sync from: Long Li <leo.lilong@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/RKR32TVZ5RP22QGS3G44FORXROPDEUWV/ 
This patch set fix some bug for forcealign:

patch 1 ~ 5 : fix tail alignment issue while approach to no space.
patch 6 : fix forcealign not compatible with reflink and realtime.
patch 7 : only datafork need bunmap algin for focealign
patch 8 ~ 11 : fix truncate for forcealign

Dave Chinner (3):
  xfs: only allow minlen allocations when near ENOSPC
  xfs: always tail align maxlen allocations
  xfs: align args->minlen for forced allocation alignment

John Garry (1):
  xfs: Don't revert allocated offset for forcealign

Long Li (5):
  xfs: don't attempting non-aligned fallbacks alloc for forcealign
  xfs: simplify extent allocation alignment
  xfs: forcealign not compatible with reflink and realtime device
  xfs: only bunmap align in datafork for forcealign
  xfs: correct the truncate blocksize of forcealign

Zhang Yi (3):
  math64: add rem_u64() to just return the remainder
  iomap: pass blocksize to iomap_truncate_page()
  xfs: refactor the truncating order


-- 
2.39.2
 
https://gitee.com/openeuler/kernel/issues/I9VTE3 
 
Link:https://gitee.com/openeuler/kernel/pulls/12219

 

Reviewed-by: default avatarzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: default avatarYang Yingliang <yangyingliang@huawei.com>
parents 470ace06 4d559248
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include <linux/bio.h>
#include <linux/sched/signal.h>
#include <linux/migrate.h>
#include <linux/math64.h>
#include "trace.h"

#include "../internal.h"
@@ -1044,11 +1045,10 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
EXPORT_SYMBOL_GPL(iomap_zero_range);

int
iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
		const struct iomap_ops *ops)
iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize,
		bool *did_zero, const struct iomap_ops *ops)
{
	unsigned int blocksize = i_blocksize(inode);
	unsigned int off = pos & (blocksize - 1);
	unsigned int off = rem_u64(pos, blocksize);

	/* Block boundary? Nothing to do */
	if (!off)
+19 −12
Original line number Diff line number Diff line
@@ -408,20 +408,18 @@ xfs_alloc_compute_diff(
 * Fix up the length, based on mod and prod.
 * len should be k * prod + mod for some k.
 * If len is too small it is returned unchanged.
 * If len hits maxlen it is left alone.
 */
STATIC void
static void
xfs_alloc_fix_len(
	xfs_alloc_arg_t	*args)		/* allocation argument structure */
	struct xfs_alloc_arg	*args)
{
	xfs_extlen_t		k;
	xfs_extlen_t	rlen;
	xfs_extlen_t		rlen = args->len;

	ASSERT(args->mod < args->prod);
	rlen = args->len;
	ASSERT(rlen >= args->minlen);
	ASSERT(rlen <= args->maxlen);
	if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
	if (args->prod <= 1 || rlen < args->mod ||
	    (args->mod == 0 && rlen < args->prod))
		return;
	k = rlen % args->prod;
@@ -2385,14 +2383,23 @@ xfs_alloc_space_available(
	if (available < (int)max(args->total, alloc_len))
		return false;

	if (flags & XFS_ALLOC_FLAG_CHECK)
		return true;

	/*
	 * Clamp maxlen to the amount of free space available for the actual
	 * extent allocation.
	 * If we can't do a maxlen allocation, then we must reduce the size of
	 * the allocation to match the available free space. We know how big
	 * the largest contiguous free space we can allocate is, so that's our
	 * upper bound. However, we don't exaclty know what alignment/size
	 * constraints have been placed on the allocation, so we can't
	 * arbitrarily select some new max size. Hence make this a minlen
	 * allocation as we know that will definitely succeed and match the
	 * callers alignment constraints.
	 */
	if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
		args->maxlen = available;
	alloc_len = args->maxlen + (args->alignment - 1) + args->minalignslop;
	if (longest < alloc_len) {
		args->maxlen = args->minlen;
		ASSERT(args->maxlen > 0);
		ASSERT(args->maxlen >= args->minlen);
	}

	return true;
+74 −66
Original line number Diff line number Diff line
@@ -3253,33 +3253,52 @@ xfs_bmap_longest_free_extent(
	return error;
}

static void
static int
xfs_bmap_select_minlen(
	struct xfs_bmalloca	*ap,
	struct xfs_alloc_arg	*args,
	xfs_extlen_t		*blen,
	int			notinit)
{
	xfs_extlen_t nlen = 0;

	/* Adjust best length for extent start alignment. */
	if (*blen > args->alignment)
		*blen -= args->alignment;

	if (notinit || *blen < ap->minlen) {
		/*
		 * Since we did a BUF_TRYLOCK above, it is possible that
		 * there is space for this request.
		 */
		args->minlen = ap->minlen;
		nlen = ap->minlen;
	} else if (*blen < args->maxlen) {
		/*
		 * If the best seen length is less than the request length,
		 * use the best as the minimum.
		 */
		args->minlen = *blen;

		nlen = *blen;
	} else {
		/*
		 * Otherwise we've seen an extent as big as maxlen, use that
		 * as the minimum.
		 */
		args->minlen = args->maxlen;
		nlen = args->maxlen;
	}

	if (args->alignment > 1) {
		nlen = rounddown(nlen, args->alignment);
		if (nlen < ap->minlen) {
			if (xfs_inode_forcealign(ap->ip) &&
				(ap->datatype & XFS_ALLOC_USERDATA))
				return -ENOSPC;
			nlen = ap->minlen;
		}
	}
	args->minlen = nlen;
	return 0;
}

STATIC int
xfs_bmap_btalloc_nullfb(
@@ -3311,8 +3330,8 @@ xfs_bmap_btalloc_nullfb(
			break;
	}

	xfs_bmap_select_minlen(ap, args, blen, notinit);
	return 0;
	error = xfs_bmap_select_minlen(ap, args, blen, notinit);
	return error;
}

STATIC int
@@ -3349,7 +3368,9 @@ xfs_bmap_btalloc_filestreams(

	}

	xfs_bmap_select_minlen(ap, args, blen, notinit);
	error = xfs_bmap_select_minlen(ap, args, blen, notinit);
	if (error)
		return error;

	/*
	 * Set the failure fallback case to look in the selected AG as stream
@@ -3419,9 +3440,8 @@ xfs_bmap_btalloc(
	xfs_fileoff_t	orig_offset;
	xfs_extlen_t	orig_length;
	xfs_extlen_t	blen;
	xfs_extlen_t	nextminlen = 0;
	xfs_extlen_t    alignment;
	int		nullfb;		/* true if ap->firstblock isn't set */
	int		isaligned;
	int		tryagain;
	int		error;
	int		stripe_align;
@@ -3480,7 +3500,7 @@ xfs_bmap_btalloc(
	/*
	 * Normal allocation, done through xfs_alloc_vextent.
	 */
	tryagain = isaligned = 0;
	tryagain = 0;
	memset(&args, 0, sizeof(args));
	args.tp = ap->tp;
	args.mp = mp;
@@ -3491,13 +3511,12 @@ xfs_bmap_btalloc(
	 * xfs_get_cowextsz_hint() returns extsz_hint for when forcealign is
	 * set as forcealign and cowextsz_hint are mutually exclusive
	 */
	if (xfs_inode_forcealign(ap->ip) && align) {
	if (xfs_inode_forcealign(ap->ip))
		args.alignment = align;
		if (stripe_align == 0 || stripe_align % align)
			stripe_align = align;
	} else {
	else if (stripe_align)
		args.alignment = stripe_align;
	else
		args.alignment = 1;
	}

	/* Trim the allocation back to the maximum an AG can fit. */
	args.maxlen = min(ap->length, mp->m_ag_max_usable);
@@ -3548,47 +3567,27 @@ xfs_bmap_btalloc(
	 * is only set if the allocation length is >= the stripe unit and the
	 * allocation offset is at the end of file.
	 */
	if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
		if (!ap->offset) {
			args.alignment = stripe_align;
			atype = args.type;
			isaligned = 1;
			/*
			 * Adjust minlen to try and preserve alignment if we
			 * can't guarantee an aligned maxlen extent.
			 */
			if (blen > args.alignment &&
			    blen <= args.maxlen + args.alignment)
				args.minlen = blen - args.alignment;
	args.minalignslop = 0;
		} else {
	if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
		if (args.alignment > 1 && xfs_inode_forcealign(ap->ip)) {
			args.fsbno = NULLFSBLOCK;
			goto alloc_out;
		}
		args.alignment = 1;
	} else if (ap->aeof && ap->offset) {
		/*
		 * First try an exact bno allocation.
		 * If it fails then do a near or start bno
		 * allocation with alignment turned on.
		 */
		alignment = args.alignment;
		atype = args.type;
		tryagain = 1;
		args.type = XFS_ALLOCTYPE_THIS_BNO;
			/*
			 * Compute the minlen+alignment for the
			 * next case.  Set slop so that the value
			 * of minlen+alignment+slop doesn't go up
			 * between the calls.
			 */
			if (blen > stripe_align && blen <= args.maxlen)
				nextminlen = blen - stripe_align;
			else
				nextminlen = args.minlen;
			if (nextminlen + stripe_align > args.minlen + 1)
				args.minalignslop =
					nextminlen + stripe_align -
					args.minlen - 1;
			else
				args.minalignslop = 0;
		}
	} else {
		args.minalignslop = 0;
		args.fsbno = ap->blkno;

		args.alignment = 1;
		args.minalignslop = alignment - args.alignment;
	}
	args.postallocs = 1;
	args.minleft = ap->minleft;
@@ -3607,21 +3606,26 @@ xfs_bmap_btalloc(
		 */
		args.type = atype;
		args.fsbno = ap->blkno;
		args.alignment = stripe_align;
		args.minlen = nextminlen;
		args.alignment = alignment;
		args.minalignslop = 0;
		isaligned = 1;
		if ((error = xfs_alloc_vextent(&args)))
			return error;
	}

	if (isaligned && args.fsbno == NULLFSBLOCK &&
		(args.alignment <= 1 || !xfs_inode_forcealign(ap->ip))) {
	if (args.fsbno == NULLFSBLOCK && args.alignment > 1 &&
		xfs_inode_forcealign(ap->ip)) {
		/*
		 * Don't attempting non-aligned fallbacks alloc
		 * for forcealign
		 */
		goto alloc_out;
	}

	if (args.alignment > 1 && args.fsbno == NULLFSBLOCK) {
		/*
		 * allocation failed, so turn off alignment and
		 * try again.
		 */
		args.type = atype;
		args.fsbno = ap->blkno;
		args.alignment = 0;
		if ((error = xfs_alloc_vextent(&args)))
@@ -3643,6 +3647,8 @@ xfs_bmap_btalloc(
			return error;
		ap->tp->t_flags |= XFS_TRANS_LOWMODE;
	}

alloc_out:
	if (args.fsbno != NULLFSBLOCK) {
		/*
		 * check the allocation happened at the same or higher AG than
@@ -3669,10 +3675,12 @@ xfs_bmap_btalloc(
		 * very fragmented so we're unlikely to be able to satisfy the
		 * hints anyway.
		 */
		if (!(xfs_inode_forcealign(ap->ip) && align)) {
			if (ap->length <= orig_length)
				ap->offset = orig_offset;
			else if (ap->offset + ap->length < orig_offset + orig_length)
				ap->offset = orig_offset + orig_length - ap->length;
		}
		xfs_bmap_btalloc_accounting(ap, &args);
	} else {
		ap->blkno = NULLFSBLOCK;
@@ -5289,7 +5297,7 @@ __xfs_bunmapi(
	isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
	end = start + len;
	if (xfs_inode_forcealign(ip) && ip->i_d.di_extsize > 1
			&& S_ISREG(VFS_I(ip)->i_mode)) {
		&& S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
		start = roundup_64(start, ip->i_d.di_extsize);
		end = rounddown_64(end, ip->i_d.di_extsize);
		len  = end - start;
+67 −57
Original line number Diff line number Diff line
@@ -769,6 +769,8 @@ xfs_setattr_size(
	int			error;
	uint			lock_flags = 0;
	bool			did_zeroing = false;
	bool                    write_back = false;
	unsigned int            blocksize = 0;

	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
@@ -776,6 +778,11 @@ xfs_setattr_size(
	ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
		ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0);

	if (xfs_inode_forcealign(ip) && ip->i_d.di_extsize > 1)
		blocksize = ip->i_d.di_extsize << i_blocksize(inode);
	else
		blocksize = i_blocksize(inode);

	oldsize = inode->i_size;
	newsize = iattr->ia_size;

@@ -805,21 +812,8 @@ xfs_setattr_size(
	 */
	inode_dio_wait(inode);

	/*
	 * File data changes must be complete before we start the transaction to
	 * modify the inode.  This needs to be done before joining the inode to
	 * the transaction because the inode cannot be unlocked once it is a
	 * part of the transaction.
	 *
	 * Start with zeroing any data beyond EOF that we may expose on file
	 * extension, or zeroing out the rest of the block on a downward
	 * truncate.
	 */
	if (newsize > oldsize) {
		trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
		error = iomap_zero_range(inode, oldsize, newsize - oldsize,
				&did_zeroing, &xfs_buffered_write_iomap_ops);
	} else {
	write_back = newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size;
	if (newsize < oldsize) {
		/*
		 * iomap won't detect a dirty page over an unwritten block (or a
		 * cow block over a hole) and subsequently skips zeroing the
@@ -827,54 +821,70 @@ xfs_setattr_size(
		 * convert the block before the pagecache truncate.
		 */
		error = filemap_write_and_wait_range(inode->i_mapping, newsize,
						     newsize);
					roundup_64(newsize, blocksize) - 1);
		if (error)
			return error;
		error = iomap_truncate_page(inode, newsize, &did_zeroing,
				&xfs_buffered_write_iomap_ops);
	}

		error = iomap_truncate_page(inode, newsize, blocksize,
				&did_zeroing, &xfs_buffered_write_iomap_ops);
		if (error)
			return error;
		/*
		 * We are going to log the inode size change in this transaction
		 * so any previous writes that are beyond the on disk EOF and
		 * the new EOF that have not been written out need to be written
		 * here.  If we do not write the data out, we expose ourselves
		 * to the null files problem. Note that this includes any block
		 * zeroing we did above; otherwise those blocks may not be
		 * zeroed after a crash.
		 */
		if (did_zeroing || write_back) {
			error = filemap_write_and_wait_range(inode->i_mapping,
					min_t(loff_t, ip->i_d.di_size, newsize),
					roundup_64(newsize, blocksize) - 1);
			if (error)
				return error;
		}

		/*
	 * We've already locked out new page faults, so now we can safely remove
	 * pages from the page cache knowing they won't get refaulted until we
	 * drop the XFS_MMAP_EXCL lock after the extent manipulations are
	 * complete. The truncate_setsize() call also cleans partial EOF page
	 * PTEs on extending truncates and hence ensures sub-page block size
	 * filesystems are correctly handled, too.
	 *
	 * We have to do all the page cache truncate work outside the
	 * transaction context as the "lock" order is page lock->log space
	 * reservation as defined by extent allocation in the writeback path.
	 * Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
	 * having already truncated the in-memory version of the file (i.e. made
	 * user visible changes). There's not much we can do about this, except
	 * to hope that the caller sees ENOMEM and retries the truncate
	 * operation.
		 * Updating i_size after writing back to make sure the zeroed
		 * blocks could been written out, and drop all the page cache
		 * range that beyond blocksize aligned new EOF block.
		 *
	 * And we update in-core i_size and truncate page cache beyond newsize
	 * before writeback the [di_size, newsize] range, so we're guaranteed
	 * not to write stale data past the new EOF on truncate down.
		 * We've already locked out new page faults, so now we can
		 * safely remove pages from the page cache knowing they won't
		 * get refaulted until we drop the XFS_MMAP_EXCL lock after the
		 * extent manipulations are complete.
		 */
	truncate_setsize(inode, newsize);
		i_size_write(inode, newsize);
		truncate_pagecache(inode, roundup_64(newsize, blocksize));
	} else {
		/*
		 * Start with zeroing any data beyond EOF that we may expose on
		 * file extension.
		 */
		if (newsize > oldsize) {
			trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
			error = iomap_zero_range(inode, oldsize, newsize - oldsize,
					&did_zeroing, &xfs_buffered_write_iomap_ops);
			if (error)
				return error;
		}

		/*
	 * We are going to log the inode size change in this transaction so
	 * any previous writes that are beyond the on disk EOF and the new
	 * EOF that have not been written out need to be written here.  If we
	 * do not write the data out, we expose ourselves to the null files
	 * problem. Note that this includes any block zeroing we did above;
	 * otherwise those blocks may not be zeroed after a crash.
		 * The truncate_setsize() call also cleans partial EOF page
		 * PTEs on extending truncates and hence ensures sub-page block
		 * size filesystems are correctly handled, too.
		 */
	if (did_zeroing ||
	    (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
		truncate_setsize(inode, newsize);

		if (did_zeroing || write_back) {
			error = filemap_write_and_wait_range(inode->i_mapping,
					ip->i_d.di_size, newsize - 1);
			if (error)
				return error;
		}
	}

	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
	if (error)
+18 −1
Original line number Diff line number Diff line
@@ -1658,10 +1658,19 @@ xfs_fc_fill_super(
		}
	}

	if (xfs_has_forcealign(mp))
	if (xfs_has_forcealign(mp)) {
		xfs_warn(mp,
"EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!");

		if (xfs_has_realtime(mp)) {
			xfs_alert(mp,
	"forcealign not supported for realtime device!");
			error = -EINVAL;
			goto out_filestream_unmount;
		}

	}

	if (xfs_has_atomicwrites(mp))
		xfs_warn(mp,
"EXPERIMENTAL atomicwrites feature in use. Use at your own risk!");
@@ -1674,6 +1683,14 @@ xfs_fc_fill_super(
			goto out_filestream_unmount;
		}

		if (xfs_has_forcealign(mp)) {
			xfs_alert(mp,
	"reflink not compatible with forcealign!");
			error = -EINVAL;
			goto out_filestream_unmount;
		}


		if (xfs_globals.always_cow) {
			xfs_info(mp, "using DEBUG-only always_cow mode.");
			mp->m_always_cow = true;
Loading