Commit ef1616f2 authored by Long Li's avatar Long Li
Browse files

xfs: handle attr node/leaf blocks atomically during inactive

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IBALAU
CVE: NA

--------------------------------

When inactiving an unlinked inode and it's attrs, if xlog is shutdown
either during or just after the process of recurse deleting attribute
nodes/leafs in xfs_attr3_root_inactive(), the log will records some
buffer cancel items, but doesn't contain the corresponding extent
entries and inode updates, this is incomplete and inconsistent. Because
of the inactiving process is not completed and the unlinked inode is
still in the agi_unlinked table, it will continue to be inactived after
replaying the log on the next mount, the attr node/leaf blocks' created
record before the cancel items could not be replayed but the inode
does. So we could get corrupted data when reading the canceled blocks.

 XFS (pmem0): Metadata corruption detected at
 xfs_da3_node_read_verify+0x53/0x220, xfs_da3_node block 0x78
 XFS (pmem0): Unmount and run xfs_repair
 XFS (pmem0): First 128 bytes of corrupted metadata buffer:
 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
 XFS (pmem0): metadata I/O error in "xfs_da_read_buf+0x104/0x190" at daddr 0x78 len 8 error 117

To fix this issue, put the index node/leaf block entries and the canceling
of node/leaf blocks in the same transaction. For non-root blocks, it's
straightforward. For root blocks, we need to put block cancel and the
unmap of root block in the same transaction, so we split attr bmap
truncation into two parts.

During attr inactive process, even with any interruption, empty attr node
blocks should never be indexed, as this is a corrupted image for xfs,
while empty attr leaf blocks are allowed. To prevent empty root node
blocks in the attr fork, we convert them to root leaf blocks in such
scenarios.

Fixes: 8fd6df1c ("xfs: atomic drop extent entries when inactiving attr")
Link: https://patchwork.kernel.org/project/xfs/patch/20230613030434.2944173-3-yi.zhang@huaweicloud.com/


Signed-off-by: default avatarLong Li <leo.lilong@huawei.com>
parent b08cfbed
Loading
Loading
Loading
Loading
+51 −43
Original line number Diff line number Diff line
@@ -139,7 +139,7 @@ xfs_attr3_node_inactive(
	xfs_daddr_t		parent_blkno, child_blkno;
	struct xfs_buf		*child_bp;
	struct xfs_da3_icnode_hdr ichdr;
	int			error, i;
	int			error;

	/*
	 * Since this code is recursive (gasp!) we must protect ourselves.
@@ -165,7 +165,7 @@ xfs_attr3_node_inactive(
	 * over the leaves removing all of them.  If this is higher up
	 * in the tree, recurse downward.
	 */
	for (i = 0; i < ichdr.count; i++) {
	while (ichdr.count > 0) {
		/*
		 * Read the subsidiary block to see what we have to work with.
		 * Don't do this in a transaction.  This is a depth-first
@@ -215,23 +215,25 @@ xfs_attr3_node_inactive(
		xfs_trans_binval(*trans, child_bp);
		child_bp = NULL;

		/*
		 * If we're not done, re-read the parent to get the next
		 * child block number.
		 */
		if (i + 1 < ichdr.count) {
			struct xfs_da3_icnode_hdr phdr;

		error = xfs_da3_node_read_mapped(*trans, dp,
				parent_blkno, &bp, XFS_ATTR_FORK);
		if (error)
			return error;
			xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr,
						  bp->b_addr);
			child_fsb = be32_to_cpu(phdr.btree[i + 1].before);
			xfs_trans_brelse(*trans, bp);

		/*
		 * Remove entry form parent node, prevents being indexed to.
		 */
		xfs_da3_node_entry_remove(*trans, dp, bp, 0);

		xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr);
		bp = NULL;
		}

		if (ichdr.count > 0) {
			/*
			 * If we're not done, get the next child block number.
			 */
			child_fsb = be32_to_cpu(ichdr.btree[0].before);

			/*
			 * Atomically commit the whole invalidate stuff.
			 */
@@ -239,6 +241,7 @@ xfs_attr3_node_inactive(
			if (error)
				return  error;
		}
	}

	return 0;
}
@@ -254,10 +257,8 @@ xfs_attr3_root_inactive(
	struct xfs_trans	**trans,
	struct xfs_inode	*dp)
{
	struct xfs_mount	*mp = dp->i_mount;
	struct xfs_da_blkinfo	*info;
	struct xfs_buf		*bp;
	xfs_daddr_t		blkno;
	int			error;

	/*
@@ -269,7 +270,6 @@ xfs_attr3_root_inactive(
	error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
	if (error)
		return error;
	blkno = bp->b_bn;

	/*
	 * Invalidate the tree, even if the "tree" is only a single leaf block.
@@ -280,6 +280,16 @@ xfs_attr3_root_inactive(
	case cpu_to_be16(XFS_DA_NODE_MAGIC):
	case cpu_to_be16(XFS_DA3_NODE_MAGIC):
		error = xfs_attr3_node_inactive(trans, dp, bp, 1);
		if (error)
			return error;

		/*
		 * Empty root node blocks are not allowed, convert it to leaf.
		 */
		error = xfs_attr3_leaf_init(*trans, dp, 0);
		if (error)
			return error;
		error = xfs_trans_roll_inode(trans, dp);
		break;
	case cpu_to_be16(XFS_ATTR_LEAF_MAGIC):
	case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
@@ -291,26 +301,6 @@ xfs_attr3_root_inactive(
		xfs_trans_brelse(*trans, bp);
		break;
	}
	if (error)
		return error;

	/*
	 * Invalidate the incore copy of the root block.
	 */
	error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
			XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
	if (error)
		return error;
	error = bp->b_error;
	if (error) {
		xfs_trans_brelse(*trans, bp);
		return error;
	}
	xfs_trans_binval(*trans, bp);	/* remove from cache */
	/*
	 * Commit the invalidate and start the next transaction.
	 */
	error = xfs_trans_roll_inode(trans, dp);

	return error;
}
@@ -329,6 +319,7 @@ xfs_attr_inactive(
{
	struct xfs_trans	*trans;
	struct xfs_mount	*mp;
	struct xfs_buf          *bp;
	int			lock_mode = XFS_ILOCK_SHARED;
	int			error = 0;

@@ -365,10 +356,27 @@ xfs_attr_inactive(
	 * removal below.
	 */
	if (dp->i_af.if_nextents > 0) {
		/*
		 * Invalidate and truncate all blocks but leave the root block.
		 */
		error = xfs_attr3_root_inactive(&trans, dp);
		if (error)
			goto out_shutdown;

		error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK,
				XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount));
		if (error)
			goto out_cancel;

		/*
		 * Invalidate and truncate the root block.
		 */
		error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK);
		if (error)
			goto out_cancel;

		xfs_trans_binval(trans, bp);

		error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
		if (error)
			goto out_cancel;
+2 −1
Original line number Diff line number Diff line
@@ -1530,6 +1530,7 @@ xfs_itruncate_extents_flags(
	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
	ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
	       xfs_isilocked(ip, XFS_IOLOCK_EXCL));
	if (whichfork == XFS_DATA_FORK)
		ASSERT(new_size <= XFS_ISIZE(ip));
	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
	ASSERT(ip->i_itemp != NULL);