!12219 v7 xfs: some fix for forcealign (6f31e5b4) · Commits · EulixOS / Software / Kernel

fs/iomap/buffered-io.c

+4 −4

Original line number	Diff line number	Diff line
		@@ -17,6 +17,7 @@
		#include <linux/bio.h>
		#include <linux/sched/signal.h>
		#include <linux/migrate.h>
		#include <linux/math64.h>
		#include "trace.h"

		#include "../internal.h"
		@@ -1044,11 +1045,10 @@ iomap_zero_range(struct inode inode, loff_t pos, loff_t len, bool did_zero,
		EXPORT_SYMBOL_GPL(iomap_zero_range);

		int
		iomap_truncate_page(struct inode inode, loff_t pos, bool did_zero,
		const struct iomap_ops *ops)
		iomap_truncate_page(struct inode *inode, loff_t pos, unsigned int blocksize,
		bool did_zero, const struct iomap_ops ops)
		{
		unsigned int blocksize = i_blocksize(inode);
		unsigned int off = pos & (blocksize - 1);
		unsigned int off = rem_u64(pos, blocksize);

		/* Block boundary? Nothing to do */
		if (!off)

fs/xfs/libxfs/xfs_alloc.c

+19 −12

Original line number	Diff line number	Diff line
		@@ -408,20 +408,18 @@ xfs_alloc_compute_diff(
		* Fix up the length, based on mod and prod.
		* len should be k * prod + mod for some k.
		* If len is too small it is returned unchanged.
		* If len hits maxlen it is left alone.
		*/
		STATIC void
		static void
		xfs_alloc_fix_len(
		xfs_alloc_arg_t args) / allocation argument structure */
		struct xfs_alloc_arg *args)
		{
		xfs_extlen_t k;
		xfs_extlen_t rlen;
		xfs_extlen_t rlen = args->len;

		ASSERT(args->mod < args->prod);
		rlen = args->len;
		ASSERT(rlen >= args->minlen);
		ASSERT(rlen <= args->maxlen);
		if (args->prod <= 1 \|\| rlen < args->mod \|\| rlen == args->maxlen \|\|
		if (args->prod <= 1 \|\| rlen < args->mod \|\|
		(args->mod == 0 && rlen < args->prod))
		return;
		k = rlen % args->prod;
		@@ -2385,14 +2383,23 @@ xfs_alloc_space_available(
		if (available < (int)max(args->total, alloc_len))
		return false;

		if (flags & XFS_ALLOC_FLAG_CHECK)
		return true;

		/*
		* Clamp maxlen to the amount of free space available for the actual
		* extent allocation.
		* If we can't do a maxlen allocation, then we must reduce the size of
		* the allocation to match the available free space. We know how big
		* the largest contiguous free space we can allocate is, so that's our
		* upper bound. However, we don't exaclty know what alignment/size
		* constraints have been placed on the allocation, so we can't
		* arbitrarily select some new max size. Hence make this a minlen
		* allocation as we know that will definitely succeed and match the
		* callers alignment constraints.
		*/
		if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
		args->maxlen = available;
		alloc_len = args->maxlen + (args->alignment - 1) + args->minalignslop;
		if (longest < alloc_len) {
		args->maxlen = args->minlen;
		ASSERT(args->maxlen > 0);
		ASSERT(args->maxlen >= args->minlen);
		}

		return true;

fs/xfs/libxfs/xfs_bmap.c

+74 −66

Original line number	Diff line number	Diff line
		@@ -3253,33 +3253,52 @@ xfs_bmap_longest_free_extent(
		return error;
		}

		static void
		static int
		xfs_bmap_select_minlen(
		struct xfs_bmalloca *ap,
		struct xfs_alloc_arg *args,
		xfs_extlen_t *blen,
		int notinit)
		{
		xfs_extlen_t nlen = 0;

		/* Adjust best length for extent start alignment. */
		if (*blen > args->alignment)
		*blen -= args->alignment;

		if (notinit \|\| *blen < ap->minlen) {
		/*
		* Since we did a BUF_TRYLOCK above, it is possible that
		* there is space for this request.
		*/
		args->minlen = ap->minlen;
		nlen = ap->minlen;
		} else if (*blen < args->maxlen) {
		/*
		* If the best seen length is less than the request length,
		* use the best as the minimum.
		*/
		args->minlen = *blen;

		nlen = *blen;
		} else {
		/*
		* Otherwise we've seen an extent as big as maxlen, use that
		* as the minimum.
		*/
		args->minlen = args->maxlen;
		nlen = args->maxlen;
		}

		if (args->alignment > 1) {
		nlen = rounddown(nlen, args->alignment);
		if (nlen < ap->minlen) {
		if (xfs_inode_forcealign(ap->ip) &&
		(ap->datatype & XFS_ALLOC_USERDATA))
		return -ENOSPC;
		nlen = ap->minlen;
		}
		}
		args->minlen = nlen;
		return 0;
		}

		STATIC int
		xfs_bmap_btalloc_nullfb(
		@@ -3311,8 +3330,8 @@ xfs_bmap_btalloc_nullfb(
		break;
		}

		xfs_bmap_select_minlen(ap, args, blen, notinit);
		return 0;
		error = xfs_bmap_select_minlen(ap, args, blen, notinit);
		return error;
		}

		STATIC int
		@@ -3349,7 +3368,9 @@ xfs_bmap_btalloc_filestreams(

		}

		xfs_bmap_select_minlen(ap, args, blen, notinit);
		error = xfs_bmap_select_minlen(ap, args, blen, notinit);
		if (error)
		return error;

		/*
		* Set the failure fallback case to look in the selected AG as stream
		@@ -3419,9 +3440,8 @@ xfs_bmap_btalloc(
		xfs_fileoff_t orig_offset;
		xfs_extlen_t orig_length;
		xfs_extlen_t blen;
		xfs_extlen_t nextminlen = 0;
		xfs_extlen_t alignment;
		int nullfb; /* true if ap->firstblock isn't set */
		int isaligned;
		int tryagain;
		int error;
		int stripe_align;
		@@ -3480,7 +3500,7 @@ xfs_bmap_btalloc(
		/*
		* Normal allocation, done through xfs_alloc_vextent.
		*/
		tryagain = isaligned = 0;
		tryagain = 0;
		memset(&args, 0, sizeof(args));
		args.tp = ap->tp;
		args.mp = mp;
		@@ -3491,13 +3511,12 @@ xfs_bmap_btalloc(
		* xfs_get_cowextsz_hint() returns extsz_hint for when forcealign is
		* set as forcealign and cowextsz_hint are mutually exclusive
		*/
		if (xfs_inode_forcealign(ap->ip) && align) {
		if (xfs_inode_forcealign(ap->ip))
		args.alignment = align;
		if (stripe_align == 0 \|\| stripe_align % align)
		stripe_align = align;
		} else {
		else if (stripe_align)
		args.alignment = stripe_align;
		else
		args.alignment = 1;
		}

		/* Trim the allocation back to the maximum an AG can fit. */
		args.maxlen = min(ap->length, mp->m_ag_max_usable);
		@@ -3548,47 +3567,27 @@ xfs_bmap_btalloc(
		* is only set if the allocation length is >= the stripe unit and the
		* allocation offset is at the end of file.
		*/
		if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
		if (!ap->offset) {
		args.alignment = stripe_align;
		atype = args.type;
		isaligned = 1;
		/*
		* Adjust minlen to try and preserve alignment if we
		* can't guarantee an aligned maxlen extent.
		*/
		if (blen > args.alignment &&
		blen <= args.maxlen + args.alignment)
		args.minlen = blen - args.alignment;
		args.minalignslop = 0;
		} else {
		if (ap->tp->t_flags & XFS_TRANS_LOWMODE) {
		if (args.alignment > 1 && xfs_inode_forcealign(ap->ip)) {
		args.fsbno = NULLFSBLOCK;
		goto alloc_out;
		}
		args.alignment = 1;
		} else if (ap->aeof && ap->offset) {
		/*
		* First try an exact bno allocation.
		* If it fails then do a near or start bno
		* allocation with alignment turned on.
		*/
		alignment = args.alignment;
		atype = args.type;
		tryagain = 1;
		args.type = XFS_ALLOCTYPE_THIS_BNO;
		/*
		* Compute the minlen+alignment for the
		* next case. Set slop so that the value
		* of minlen+alignment+slop doesn't go up
		* between the calls.
		*/
		if (blen > stripe_align && blen <= args.maxlen)
		nextminlen = blen - stripe_align;
		else
		nextminlen = args.minlen;
		if (nextminlen + stripe_align > args.minlen + 1)
		args.minalignslop =
		nextminlen + stripe_align -
		args.minlen - 1;
		else
		args.minalignslop = 0;
		}
		} else {
		args.minalignslop = 0;
		args.fsbno = ap->blkno;

		args.alignment = 1;
		args.minalignslop = alignment - args.alignment;
		}
		args.postallocs = 1;
		args.minleft = ap->minleft;
		@@ -3607,21 +3606,26 @@ xfs_bmap_btalloc(
		*/
		args.type = atype;
		args.fsbno = ap->blkno;
		args.alignment = stripe_align;
		args.minlen = nextminlen;
		args.alignment = alignment;
		args.minalignslop = 0;
		isaligned = 1;
		if ((error = xfs_alloc_vextent(&args)))
		return error;
		}

		if (isaligned && args.fsbno == NULLFSBLOCK &&
		(args.alignment <= 1 \|\| !xfs_inode_forcealign(ap->ip))) {
		if (args.fsbno == NULLFSBLOCK && args.alignment > 1 &&
		xfs_inode_forcealign(ap->ip)) {
		/*
		* Don't attempting non-aligned fallbacks alloc
		* for forcealign
		*/
		goto alloc_out;
		}

		if (args.alignment > 1 && args.fsbno == NULLFSBLOCK) {
		/*
		* allocation failed, so turn off alignment and
		* try again.
		*/
		args.type = atype;
		args.fsbno = ap->blkno;
		args.alignment = 0;
		if ((error = xfs_alloc_vextent(&args)))
		@@ -3643,6 +3647,8 @@ xfs_bmap_btalloc(
		return error;
		ap->tp->t_flags \|= XFS_TRANS_LOWMODE;
		}

		alloc_out:
		if (args.fsbno != NULLFSBLOCK) {
		/*
		* check the allocation happened at the same or higher AG than
		@@ -3669,10 +3675,12 @@ xfs_bmap_btalloc(
		* very fragmented so we're unlikely to be able to satisfy the
		* hints anyway.
		*/
		if (!(xfs_inode_forcealign(ap->ip) && align)) {
		if (ap->length <= orig_length)
		ap->offset = orig_offset;
		else if (ap->offset + ap->length < orig_offset + orig_length)
		ap->offset = orig_offset + orig_length - ap->length;
		}
		xfs_bmap_btalloc_accounting(ap, &args);
		} else {
		ap->blkno = NULLFSBLOCK;
		@@ -5289,7 +5297,7 @@ __xfs_bunmapi(
		isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
		end = start + len;
		if (xfs_inode_forcealign(ip) && ip->i_d.di_extsize > 1
		&& S_ISREG(VFS_I(ip)->i_mode)) {
		&& S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK) {
		start = roundup_64(start, ip->i_d.di_extsize);
		end = rounddown_64(end, ip->i_d.di_extsize);
		len = end - start;

fs/xfs/xfs_iops.c

+67 −57

Original line number	Diff line number	Diff line
		@@ -769,6 +769,8 @@ xfs_setattr_size(
		int error;
		uint lock_flags = 0;
		bool did_zeroing = false;
		bool write_back = false;
		unsigned int blocksize = 0;

		ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
		ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
		@@ -776,6 +778,11 @@ xfs_setattr_size(
		ASSERT((iattr->ia_valid & (ATTR_UID\|ATTR_GID\|ATTR_ATIME\|ATTR_ATIME_SET\|
		ATTR_MTIME_SET\|ATTR_TIMES_SET)) == 0);

		if (xfs_inode_forcealign(ip) && ip->i_d.di_extsize > 1)
		blocksize = ip->i_d.di_extsize << i_blocksize(inode);
		else
		blocksize = i_blocksize(inode);

		oldsize = inode->i_size;
		newsize = iattr->ia_size;

		@@ -805,21 +812,8 @@ xfs_setattr_size(
		*/
		inode_dio_wait(inode);

		/*
		* File data changes must be complete before we start the transaction to
		* modify the inode. This needs to be done before joining the inode to
		* the transaction because the inode cannot be unlocked once it is a
		* part of the transaction.
		*
		* Start with zeroing any data beyond EOF that we may expose on file
		* extension, or zeroing out the rest of the block on a downward
		* truncate.
		*/
		if (newsize > oldsize) {
		trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
		error = iomap_zero_range(inode, oldsize, newsize - oldsize,
		&did_zeroing, &xfs_buffered_write_iomap_ops);
		} else {
		write_back = newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size;
		if (newsize < oldsize) {
		/*
		* iomap won't detect a dirty page over an unwritten block (or a
		* cow block over a hole) and subsequently skips zeroing the
		@@ -827,54 +821,70 @@ xfs_setattr_size(
		* convert the block before the pagecache truncate.
		*/
		error = filemap_write_and_wait_range(inode->i_mapping, newsize,
		newsize);
		roundup_64(newsize, blocksize) - 1);
		if (error)
		return error;
		error = iomap_truncate_page(inode, newsize, &did_zeroing,
		&xfs_buffered_write_iomap_ops);
		}

		error = iomap_truncate_page(inode, newsize, blocksize,
		&did_zeroing, &xfs_buffered_write_iomap_ops);
		if (error)
		return error;
		/*
		* We are going to log the inode size change in this transaction
		* so any previous writes that are beyond the on disk EOF and
		* the new EOF that have not been written out need to be written
		* here. If we do not write the data out, we expose ourselves
		* to the null files problem. Note that this includes any block
		* zeroing we did above; otherwise those blocks may not be
		* zeroed after a crash.
		*/
		if (did_zeroing \|\| write_back) {
		error = filemap_write_and_wait_range(inode->i_mapping,
		min_t(loff_t, ip->i_d.di_size, newsize),
		roundup_64(newsize, blocksize) - 1);
		if (error)
		return error;
		}

		/*
		* We've already locked out new page faults, so now we can safely remove
		* pages from the page cache knowing they won't get refaulted until we
		* drop the XFS_MMAP_EXCL lock after the extent manipulations are
		* complete. The truncate_setsize() call also cleans partial EOF page
		* PTEs on extending truncates and hence ensures sub-page block size
		* filesystems are correctly handled, too.
		*
		* We have to do all the page cache truncate work outside the
		* transaction context as the "lock" order is page lock->log space
		* reservation as defined by extent allocation in the writeback path.
		* Hence a truncate can fail with ENOMEM from xfs_trans_alloc(), but
		* having already truncated the in-memory version of the file (i.e. made
		* user visible changes). There's not much we can do about this, except
		* to hope that the caller sees ENOMEM and retries the truncate
		* operation.
		* Updating i_size after writing back to make sure the zeroed
		* blocks could been written out, and drop all the page cache
		* range that beyond blocksize aligned new EOF block.
		*
		* And we update in-core i_size and truncate page cache beyond newsize
		* before writeback the [di_size, newsize] range, so we're guaranteed
		* not to write stale data past the new EOF on truncate down.
		* We've already locked out new page faults, so now we can
		* safely remove pages from the page cache knowing they won't
		* get refaulted until we drop the XFS_MMAP_EXCL lock after the
		* extent manipulations are complete.
		*/
		truncate_setsize(inode, newsize);
		i_size_write(inode, newsize);
		truncate_pagecache(inode, roundup_64(newsize, blocksize));
		} else {
		/*
		* Start with zeroing any data beyond EOF that we may expose on
		* file extension.
		*/
		if (newsize > oldsize) {
		trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
		error = iomap_zero_range(inode, oldsize, newsize - oldsize,
		&did_zeroing, &xfs_buffered_write_iomap_ops);
		if (error)
		return error;
		}

		/*
		* We are going to log the inode size change in this transaction so
		* any previous writes that are beyond the on disk EOF and the new
		* EOF that have not been written out need to be written here. If we
		* do not write the data out, we expose ourselves to the null files
		* problem. Note that this includes any block zeroing we did above;
		* otherwise those blocks may not be zeroed after a crash.
		* The truncate_setsize() call also cleans partial EOF page
		* PTEs on extending truncates and hence ensures sub-page block
		* size filesystems are correctly handled, too.
		*/
		if (did_zeroing \|\|
		(newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
		truncate_setsize(inode, newsize);

		if (did_zeroing \|\| write_back) {
		error = filemap_write_and_wait_range(inode->i_mapping,
		ip->i_d.di_size, newsize - 1);
		if (error)
		return error;
		}
		}

		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
		if (error)

fs/xfs/xfs_super.c

+18 −1

Original line number	Diff line number	Diff line
		@@ -1658,10 +1658,19 @@ xfs_fc_fill_super(
		}
		}

		if (xfs_has_forcealign(mp))
		if (xfs_has_forcealign(mp)) {
		xfs_warn(mp,
		"EXPERIMENTAL forced data extent alignment feature in use. Use at your own risk!");

		if (xfs_has_realtime(mp)) {
		xfs_alert(mp,
		"forcealign not supported for realtime device!");
		error = -EINVAL;
		goto out_filestream_unmount;
		}

		}

		if (xfs_has_atomicwrites(mp))
		xfs_warn(mp,
		"EXPERIMENTAL atomicwrites feature in use. Use at your own risk!");
		@@ -1674,6 +1683,14 @@ xfs_fc_fill_super(
		goto out_filestream_unmount;
		}

		if (xfs_has_forcealign(mp)) {
		xfs_alert(mp,
		"reflink not compatible with forcealign!");
		error = -EINVAL;
		goto out_filestream_unmount;
		}


		if (xfs_globals.always_cow) {
		xfs_info(mp, "using DEBUG-only always_cow mode.");
		mp->m_always_cow = true;