Commit 220c8d57 authored by Chandan Babu R's avatar Chandan Babu R
Browse files

Merge tag 'scrub-bmap-fixes-6.6_2023-08-10' of...

Merge tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux

 into xfs-6.6-mergeA

xfs: fixes for the block mapping checker

This series amends the file extent map checking code so that nonexistent
cow/attr forks get the ENOENT return they're supposed to; and fixes some
incorrect logic about the presence of a cow fork vs. reflink iflag.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandan.babu@oracle.com>

* tag 'scrub-bmap-fixes-6.6_2023-08-10' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: don't check reflink iflag state when checking cow fork
  xfs: simplify returns in xchk_bmap
  xfs: rewrite xchk_inode_is_allocated to work properly
  xfs: hide xfs_inode_is_allocated in scrub common code
parents 939c9de8 e27a1369
Loading
Loading
Loading
Loading
+15 −18
Original line number Diff line number Diff line
@@ -841,7 +841,7 @@ xchk_bmap(

	/* Non-existent forks can be ignored. */
	if (!ifp)
		goto out;
		return -ENOENT;

	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
	info.whichfork = whichfork;
@@ -850,10 +850,10 @@ xchk_bmap(

	switch (whichfork) {
	case XFS_COW_FORK:
		/* No CoW forks on non-reflink inodes/filesystems. */
		if (!xfs_is_reflink_inode(ip)) {
		/* No CoW forks on non-reflink filesystems. */
		if (!xfs_has_reflink(mp)) {
			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
			goto out;
			return 0;
		}
		break;
	case XFS_ATTR_FORK:
@@ -873,31 +873,31 @@ xchk_bmap(
		/* No mappings to check. */
		if (whichfork == XFS_COW_FORK)
			xchk_fblock_set_corrupt(sc, whichfork, 0);
		goto out;
		return 0;
	case XFS_DINODE_FMT_EXTENTS:
		break;
	case XFS_DINODE_FMT_BTREE:
		if (whichfork == XFS_COW_FORK) {
			xchk_fblock_set_corrupt(sc, whichfork, 0);
			goto out;
			return 0;
		}

		error = xchk_bmap_btree(sc, whichfork, &info);
		if (error)
			goto out;
			return error;
		break;
	default:
		xchk_fblock_set_corrupt(sc, whichfork, 0);
		goto out;
		return 0;
	}

	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
		goto out;
		return 0;

	/* Find the offset of the last extent in the mapping. */
	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
		goto out;
		return error;

	/*
	 * Scrub extent records.  We use a special iterator function here that
@@ -910,12 +910,12 @@ xchk_bmap(
	while (xchk_bmap_iext_iter(&info, &irec)) {
		if (xchk_should_terminate(sc, &error) ||
		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
			goto out;
			return 0;

		if (irec.br_startoff >= endoff) {
			xchk_fblock_set_corrupt(sc, whichfork,
					irec.br_startoff);
			goto out;
			return 0;
		}

		if (isnullstartblock(irec.br_startblock))
@@ -928,12 +928,12 @@ xchk_bmap(
	if (xchk_bmap_want_check_rmaps(&info)) {
		error = xchk_bmap_check_rmaps(sc, whichfork);
		if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
			goto out;
	}
out:
			return error;
	}

	return 0;
}

/* Scrub an inode's data fork. */
int
xchk_bmap_data(
@@ -955,8 +955,5 @@ int
xchk_bmap_cow(
	struct xfs_scrub	*sc)
{
	if (!xfs_is_reflink_inode(sc->ip))
		return -ENOENT;

	return xchk_bmap(sc, XFS_COW_FORK);
}
+152 −0
Original line number Diff line number Diff line
@@ -1230,3 +1230,155 @@ xchk_fsgates_enable(

	sc->flags |= scrub_fsgates;
}

/*
 * Decide if this is this a cached inode that's also allocated.  The caller
 * must hold a reference to an AG and the AGI buffer lock to prevent inodes
 * from being allocated or freed.
 *
 * Look up an inode by number in the given file system.  If the inode number
 * is invalid, return -EINVAL.  If the inode is not in cache, return -ENODATA.
 * If the inode is being reclaimed, return -ENODATA because we know the inode
 * cache cannot be updating the ondisk metadata.
 *
 * Otherwise, the incore inode is the one we want, and it is either live,
 * somewhere in the inactivation machinery, or reclaimable.  The inode is
 * allocated if i_mode is nonzero.  In all three cases, the cached inode will
 * be more up to date than the ondisk inode buffer, so we must use the incore
 * i_mode.
 */
int
xchk_inode_is_allocated(
	struct xfs_scrub	*sc,
	xfs_agino_t		agino,
	bool			*inuse)
{
	struct xfs_mount	*mp = sc->mp;
	struct xfs_perag	*pag = sc->sa.pag;
	xfs_ino_t		ino;
	struct xfs_inode	*ip;
	int			error;

	/* caller must hold perag reference */
	if (pag == NULL) {
		ASSERT(pag != NULL);
		return -EINVAL;
	}

	/* caller must have AGI buffer */
	if (sc->sa.agi_bp == NULL) {
		ASSERT(sc->sa.agi_bp != NULL);
		return -EINVAL;
	}

	/* reject inode numbers outside existing AGs */
	ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
	if (!xfs_verify_ino(mp, ino))
		return -EINVAL;

	error = -ENODATA;
	rcu_read_lock();
	ip = radix_tree_lookup(&pag->pag_ici_root, agino);
	if (!ip) {
		/* cache miss */
		goto out_rcu;
	}

	/*
	 * If the inode number doesn't match, the incore inode got reused
	 * during an RCU grace period and the radix tree hasn't been updated.
	 * This isn't the inode we want.
	 */
	spin_lock(&ip->i_flags_lock);
	if (ip->i_ino != ino)
		goto out_skip;

	trace_xchk_inode_is_allocated(ip);

	/*
	 * We have an incore inode that matches the inode we want, and the
	 * caller holds the perag structure and the AGI buffer.  Let's check
	 * our assumptions below:
	 */

#ifdef DEBUG
	/*
	 * (1) If the incore inode is live (i.e. referenced from the dcache),
	 * it will not be INEW, nor will it be in the inactivation or reclaim
	 * machinery.  The ondisk inode had better be allocated.  This is the
	 * most trivial case.
	 */
	if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
			     XFS_INACTIVATING))) {
		/* live inode */
		ASSERT(VFS_I(ip)->i_mode != 0);
	}

	/*
	 * If the incore inode is INEW, there are several possibilities:
	 *
	 * (2) For a file that is being created, note that we allocate the
	 * ondisk inode before allocating, initializing, and adding the incore
	 * inode to the radix tree.
	 *
	 * (3) If the incore inode is being recycled, the inode has to be
	 * allocated because we don't allow freed inodes to be recycled.
	 * Recycling doesn't touch i_mode.
	 */
	if (ip->i_flags & XFS_INEW) {
		/* created on disk already or recycling */
		ASSERT(VFS_I(ip)->i_mode != 0);
	}

	/*
	 * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
	 * inactivation has not started (!INACTIVATING), it is still allocated.
	 */
	if ((ip->i_flags & XFS_NEED_INACTIVE) &&
	    !(ip->i_flags & XFS_INACTIVATING)) {
		/* definitely before difree */
		ASSERT(VFS_I(ip)->i_mode != 0);
	}
#endif

	/*
	 * If the incore inode is undergoing inactivation (INACTIVATING), there
	 * are two possibilities:
	 *
	 * (5) It is before the point where it would get freed ondisk, in which
	 * case i_mode is still nonzero.
	 *
	 * (6) It has already been freed, in which case i_mode is zero.
	 *
	 * We don't take the ILOCK here, but difree and dialloc update the AGI,
	 * and we've taken the AGI buffer lock, which prevents that from
	 * happening.
	 */

	/*
	 * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
	 * reclaim (IRECLAIMABLE) could be allocated or free.  i_mode still
	 * reflects the ondisk state.
	 */

	/*
	 * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
	 * the flush code uses i_mode to format the ondisk inode.
	 */

	/*
	 * (9) If the inode is in IRECLAIM and was reachable via the radix
	 * tree, it still has the same i_mode as it did before it entered
	 * reclaim.  The inode object is still alive because we hold the RCU
	 * read lock.
	 */

	*inuse = VFS_I(ip)->i_mode != 0;
	error = 0;

out_skip:
	spin_unlock(&ip->i_flags_lock);
out_rcu:
	rcu_read_unlock();
	return error;
}
+3 −0
Original line number Diff line number Diff line
@@ -203,4 +203,7 @@ static inline bool xchk_need_intent_drain(struct xfs_scrub *sc)

void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);

int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
		bool *inuse);

#endif	/* __XFS_SCRUB_COMMON_H__ */
+1 −2
Original line number Diff line number Diff line
@@ -328,8 +328,7 @@ xchk_iallocbt_check_cluster_ifree(
		goto out;
	}

	error = xfs_icache_inode_is_allocated(mp, bs->cur->bc_tp, fsino,
			&ino_inuse);
	error = xchk_inode_is_allocated(bs->sc, agino, &ino_inuse);
	if (error == -ENODATA) {
		/* Not cached, just read the disk buffer */
		freemask_ok = irec_free ^ !!(dip->di_mode);
+22 −0
Original line number Diff line number Diff line
@@ -640,6 +640,28 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
		  __entry->cluster_ino)
)

TRACE_EVENT(xchk_inode_is_allocated,
	TP_PROTO(struct xfs_inode *ip),
	TP_ARGS(ip),
	TP_STRUCT__entry(
		__field(dev_t, dev)
		__field(xfs_ino_t, ino)
		__field(unsigned long, iflags)
		__field(umode_t, mode)
	),
	TP_fast_assign(
		__entry->dev = VFS_I(ip)->i_sb->s_dev;
		__entry->ino = ip->i_ino;
		__entry->iflags = ip->i_flags;
		__entry->mode = VFS_I(ip)->i_mode;
	),
	TP_printk("dev %d:%d ino 0x%llx iflags 0x%lx mode 0x%x",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->ino,
		  __entry->iflags,
		  __entry->mode)
);

TRACE_EVENT(xchk_fscounters_calc,
	TP_PROTO(struct xfs_mount *mp, uint64_t icount, uint64_t ifree,
		 uint64_t fdblocks, uint64_t delalloc),
Loading