Commit e7cef2fe authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner
Browse files

Merge tag 'scrub-detect-refcount-gaps-6.4_2023-04-11' of...

Merge tag 'scrub-detect-refcount-gaps-6.4_2023-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux

 into guilt/xfs-for-next

xfs: detect incorrect gaps in refcount btree [v24.5]

The next few patchsets address a deficiency in scrub that I found while
QAing the refcount btree scrubber.  If there's a gap between refcount
records, we need to cross-reference that gap with the reverse mappings
to ensure that there are no overlapping records in the rmap btree.  If
we find any, then the refcount btree is not consistent.  This is not a
property that is specific to the refcount btree; they all need to have
this sort of keyspace scanning logic to detect inconsistencies.

To do this accurately, we need to be able to scan the keyspace of a
btree (which we already do) to be able to tell the caller if the
keyspace is empty, sparse, or fully covered by records.  The first few
patches add the keyspace scanner to the generic btree code, along with
the ability to mask off parts of btree keys because when we scan the
rmapbt, we only care about space usage, not the owners.

The final patch closes the scanning gap in the refcountbt scanner.

v23.1: create helpers for the key extraction and comparison functions,
       improve documentation, and eliminate the ->mask_key indirect
       calls

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parents 6858c887 7ac14fa2
Loading
Loading
Loading
Loading
+7 −4
Original line number Diff line number Diff line
@@ -3745,13 +3745,16 @@ xfs_alloc_query_all(
	return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
}

/* Is there a record covering a given extent? */
/*
 * Scan part of the keyspace of the free space and tell us if the area has no
 * records, is fully mapped by records, or is partially filled.
 */
int
xfs_alloc_has_record(
xfs_alloc_has_records(
	struct xfs_btree_cur	*cur,
	xfs_agblock_t		bno,
	xfs_extlen_t		len,
	bool			*exists)
	enum xbtree_recpacking	*outcome)
{
	union xfs_btree_irec	low;
	union xfs_btree_irec	high;
@@ -3761,7 +3764,7 @@ xfs_alloc_has_record(
	memset(&high, 0xFF, sizeof(high));
	high.a.ar_startblock = bno + len - 1;

	return xfs_btree_has_record(cur, &low, &high, exists);
	return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
}

/*
+2 −2
Original line number Diff line number Diff line
@@ -213,8 +213,8 @@ int xfs_alloc_query_range(struct xfs_btree_cur *cur,
int xfs_alloc_query_all(struct xfs_btree_cur *cur, xfs_alloc_query_range_fn fn,
		void *priv);

int xfs_alloc_has_record(struct xfs_btree_cur *cur, xfs_agblock_t bno,
		xfs_extlen_t len, bool *exist);
int xfs_alloc_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,
		xfs_extlen_t len, enum xbtree_recpacking *outcome);

typedef int (*xfs_agfl_walk_fn)(struct xfs_mount *mp, xfs_agblock_t bno,
		void *priv);
+25 −3
Original line number Diff line number Diff line
@@ -260,8 +260,11 @@ STATIC int64_t
xfs_bnobt_diff_two_keys(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_key	*k1,
	const union xfs_btree_key	*k2)
	const union xfs_btree_key	*k2,
	const union xfs_btree_key	*mask)
{
	ASSERT(!mask || mask->alloc.ar_startblock);

	return (int64_t)be32_to_cpu(k1->alloc.ar_startblock) -
			be32_to_cpu(k2->alloc.ar_startblock);
}
@@ -270,10 +273,14 @@ STATIC int64_t
xfs_cntbt_diff_two_keys(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_key	*k1,
	const union xfs_btree_key	*k2)
	const union xfs_btree_key	*k2,
	const union xfs_btree_key	*mask)
{
	int64_t				diff;

	ASSERT(!mask || (mask->alloc.ar_blockcount &&
			 mask->alloc.ar_startblock));

	diff =  be32_to_cpu(k1->alloc.ar_blockcount) -
		be32_to_cpu(k2->alloc.ar_blockcount);
	if (diff)
@@ -423,6 +430,19 @@ xfs_cntbt_recs_inorder(
		 be32_to_cpu(r2->alloc.ar_startblock));
}

STATIC enum xbtree_key_contig
xfs_allocbt_keys_contiguous(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_key	*key1,
	const union xfs_btree_key	*key2,
	const union xfs_btree_key	*mask)
{
	ASSERT(!mask || mask->alloc.ar_startblock);

	return xbtree_key_contig(be32_to_cpu(key1->alloc.ar_startblock),
				 be32_to_cpu(key2->alloc.ar_startblock));
}

static const struct xfs_btree_ops xfs_bnobt_ops = {
	.rec_len		= sizeof(xfs_alloc_rec_t),
	.key_len		= sizeof(xfs_alloc_key_t),
@@ -443,6 +463,7 @@ static const struct xfs_btree_ops xfs_bnobt_ops = {
	.diff_two_keys		= xfs_bnobt_diff_two_keys,
	.keys_inorder		= xfs_bnobt_keys_inorder,
	.recs_inorder		= xfs_bnobt_recs_inorder,
	.keys_contiguous	= xfs_allocbt_keys_contiguous,
};

static const struct xfs_btree_ops xfs_cntbt_ops = {
@@ -465,6 +486,7 @@ static const struct xfs_btree_ops xfs_cntbt_ops = {
	.diff_two_keys		= xfs_cntbt_diff_two_keys,
	.keys_inorder		= xfs_cntbt_keys_inorder,
	.recs_inorder		= xfs_cntbt_recs_inorder,
	.keys_contiguous	= NULL, /* not needed right now */
};

/* Allocate most of a new allocation btree cursor. */
+18 −1
Original line number Diff line number Diff line
@@ -382,11 +382,14 @@ STATIC int64_t
xfs_bmbt_diff_two_keys(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_key	*k1,
	const union xfs_btree_key	*k2)
	const union xfs_btree_key	*k2,
	const union xfs_btree_key	*mask)
{
	uint64_t			a = be64_to_cpu(k1->bmbt.br_startoff);
	uint64_t			b = be64_to_cpu(k2->bmbt.br_startoff);

	ASSERT(!mask || mask->bmbt.br_startoff);

	/*
	 * Note: This routine previously casted a and b to int64 and subtracted
	 * them to generate a result.  This lead to problems if b was the
@@ -500,6 +503,19 @@ xfs_bmbt_recs_inorder(
		xfs_bmbt_disk_get_startoff(&r2->bmbt);
}

STATIC enum xbtree_key_contig
xfs_bmbt_keys_contiguous(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_key	*key1,
	const union xfs_btree_key	*key2,
	const union xfs_btree_key	*mask)
{
	ASSERT(!mask || mask->bmbt.br_startoff);

	return xbtree_key_contig(be64_to_cpu(key1->bmbt.br_startoff),
				 be64_to_cpu(key2->bmbt.br_startoff));
}

static const struct xfs_btree_ops xfs_bmbt_ops = {
	.rec_len		= sizeof(xfs_bmbt_rec_t),
	.key_len		= sizeof(xfs_bmbt_key_t),
@@ -520,6 +536,7 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
	.buf_ops		= &xfs_bmbt_buf_ops,
	.keys_inorder		= xfs_bmbt_keys_inorder,
	.recs_inorder		= xfs_bmbt_recs_inorder,
	.keys_contiguous	= xfs_bmbt_keys_contiguous,
};

/*
+150 −54
Original line number Diff line number Diff line
@@ -2067,8 +2067,7 @@ xfs_btree_get_leaf_keys(
		for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
			rec = xfs_btree_rec_addr(cur, n, block);
			cur->bc_ops->init_high_key_from_rec(&hkey, rec);
			if (cur->bc_ops->diff_two_keys(cur, &hkey, &max_hkey)
					> 0)
			if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
				max_hkey = hkey;
		}

@@ -2096,7 +2095,7 @@ xfs_btree_get_node_keys(
		max_hkey = xfs_btree_high_key_addr(cur, 1, block);
		for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
			hkey = xfs_btree_high_key_addr(cur, n, block);
			if (cur->bc_ops->diff_two_keys(cur, hkey, max_hkey) > 0)
			if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
				max_hkey = hkey;
		}

@@ -2183,8 +2182,8 @@ __xfs_btree_updkeys(
		nlkey = xfs_btree_key_addr(cur, ptr, block);
		nhkey = xfs_btree_high_key_addr(cur, ptr, block);
		if (!force_all &&
		    !(cur->bc_ops->diff_two_keys(cur, nlkey, lkey) != 0 ||
		      cur->bc_ops->diff_two_keys(cur, nhkey, hkey) != 0))
		    xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
		    xfs_btree_keycmp_eq(cur, nhkey, hkey))
			break;
		xfs_btree_copy_keys(cur, nlkey, lkey, 1);
		xfs_btree_log_keys(cur, bp, ptr, ptr);
@@ -4716,7 +4715,6 @@ xfs_btree_simple_query_range(
{
	union xfs_btree_rec		*recp;
	union xfs_btree_key		rec_key;
	int64_t				diff;
	int				stat;
	bool				firstrec = true;
	int				error;
@@ -4746,20 +4744,17 @@ xfs_btree_simple_query_range(
		if (error || !stat)
			break;

		/* Skip if high_key(rec) < low_key. */
		/* Skip if low_key > high_key(rec). */
		if (firstrec) {
			cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
			firstrec = false;
			diff = cur->bc_ops->diff_two_keys(cur, low_key,
					&rec_key);
			if (diff > 0)
			if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
				goto advloop;
		}

		/* Stop if high_key < low_key(rec). */
		/* Stop if low_key(rec) > high_key. */
		cur->bc_ops->init_key_from_rec(&rec_key, recp);
		diff = cur->bc_ops->diff_two_keys(cur, &rec_key, high_key);
		if (diff > 0)
		if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
			break;

		/* Callback */
@@ -4813,8 +4808,6 @@ xfs_btree_overlapped_query_range(
	union xfs_btree_key		*hkp;
	union xfs_btree_rec		*recp;
	struct xfs_btree_block		*block;
	int64_t				ldiff;
	int64_t				hdiff;
	int				level;
	struct xfs_buf			*bp;
	int				i;
@@ -4854,25 +4847,23 @@ xfs_btree_overlapped_query_range(
					block);

			cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
			ldiff = cur->bc_ops->diff_two_keys(cur, &rec_hkey,
					low_key);

			cur->bc_ops->init_key_from_rec(&rec_key, recp);
			hdiff = cur->bc_ops->diff_two_keys(cur, high_key,
					&rec_key);

			/*
			 * If (query's high key < record's low key), then there
			 * are no more interesting records in this block.  Pop
			 * up to the leaf level to find more record blocks.
			 *
			 * If (record's high key >= query's low key) and
			 *    (query's high key >= record's low key), then
			 * this record overlaps the query range; callback.
			 */
			if (ldiff >= 0 && hdiff >= 0) {
			if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
				goto pop_up;
			if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
				error = fn(cur, recp, priv);
				if (error)
					break;
			} else if (hdiff < 0) {
				/* Record is larger than high key; pop. */
				goto pop_up;
			}
			cur->bc_levels[level].ptr++;
			continue;
@@ -4884,15 +4875,18 @@ xfs_btree_overlapped_query_range(
				block);
		pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);

		ldiff = cur->bc_ops->diff_two_keys(cur, hkp, low_key);
		hdiff = cur->bc_ops->diff_two_keys(cur, high_key, lkp);

		/*
		 * If (query's high key < pointer's low key), then there are no
		 * more interesting keys in this block.  Pop up one leaf level
		 * to continue looking for records.
		 *
		 * If (pointer's high key >= query's low key) and
		 *    (query's high key >= pointer's low key), then
		 * this record overlaps the query range; follow pointer.
		 */
		if (ldiff >= 0 && hdiff >= 0) {
		if (xfs_btree_keycmp_lt(cur, high_key, lkp))
			goto pop_up;
		if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
			level--;
			error = xfs_btree_lookup_get_block(cur, level, pp,
					&block);
@@ -4907,9 +4901,6 @@ xfs_btree_overlapped_query_range(
#endif
			cur->bc_levels[level].ptr = 1;
			continue;
		} else if (hdiff < 0) {
			/* The low key is larger than the upper range; pop. */
			goto pop_up;
		}
		cur->bc_levels[level].ptr++;
	}
@@ -4937,6 +4928,19 @@ xfs_btree_overlapped_query_range(
	return error;
}

static inline void
xfs_btree_key_from_irec(
	struct xfs_btree_cur		*cur,
	union xfs_btree_key		*key,
	const union xfs_btree_irec	*irec)
{
	union xfs_btree_rec		rec;

	cur->bc_rec = *irec;
	cur->bc_ops->init_rec_from_cur(cur, &rec);
	cur->bc_ops->init_key_from_rec(key, &rec);
}

/*
 * Query a btree for all records overlapping a given interval of keys.  The
 * supplied function will be called with each record found; return one of the
@@ -4951,21 +4955,15 @@ xfs_btree_query_range(
	xfs_btree_query_range_fn	fn,
	void				*priv)
{
	union xfs_btree_rec		rec;
	union xfs_btree_key		low_key;
	union xfs_btree_key		high_key;

	/* Find the keys of both ends of the interval. */
	cur->bc_rec = *high_rec;
	cur->bc_ops->init_rec_from_cur(cur, &rec);
	cur->bc_ops->init_key_from_rec(&high_key, &rec);

	cur->bc_rec = *low_rec;
	cur->bc_ops->init_rec_from_cur(cur, &rec);
	cur->bc_ops->init_key_from_rec(&low_key, &rec);
	xfs_btree_key_from_irec(cur, &high_key, high_rec);
	xfs_btree_key_from_irec(cur, &low_key, low_rec);

	/* Enforce low key < high key. */
	if (cur->bc_ops->diff_two_keys(cur, &low_key, &high_key) > 0)
	/* Enforce low key <= high key. */
	if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
		return -EINVAL;

	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
@@ -5027,34 +5025,132 @@ xfs_btree_diff_two_ptrs(
	return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
}

/* If there's an extent, we're done. */
struct xfs_btree_has_records {
	/* Keys for the start and end of the range we want to know about. */
	union xfs_btree_key		start_key;
	union xfs_btree_key		end_key;

	/* Mask for key comparisons, if desired. */
	const union xfs_btree_key	*key_mask;

	/* Highest record key we've seen so far. */
	union xfs_btree_key		high_key;

	enum xbtree_recpacking		outcome;
};

STATIC int
xfs_btree_has_record_helper(
xfs_btree_has_records_helper(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_rec	*rec,
	void				*priv)
{
	union xfs_btree_key		rec_key;
	union xfs_btree_key		rec_high_key;
	struct xfs_btree_has_records	*info = priv;
	enum xbtree_key_contig		key_contig;

	cur->bc_ops->init_key_from_rec(&rec_key, rec);

	if (info->outcome == XBTREE_RECPACKING_EMPTY) {
		info->outcome = XBTREE_RECPACKING_SPARSE;

		/*
		 * If the first record we find does not overlap the start key,
		 * then there is a hole at the start of the search range.
		 * Classify this as sparse and stop immediately.
		 */
		if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
					info->key_mask))
			return -ECANCELED;
	} else {
		/*
		 * If a subsequent record does not overlap with the any record
		 * we've seen so far, there is a hole in the middle of the
		 * search range.  Classify this as sparse and stop.
		 * If the keys overlap and this btree does not allow overlap,
		 * signal corruption.
		 */
		key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
					&rec_key, info->key_mask);
		if (key_contig == XBTREE_KEY_OVERLAP &&
				!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
			return -EFSCORRUPTED;
		if (key_contig == XBTREE_KEY_GAP)
			return -ECANCELED;
	}

/* Is there a record covering a given range of keys? */
	/*
	 * If high_key(rec) is larger than any other high key we've seen,
	 * remember it for later.
	 */
	cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
	if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
				info->key_mask))
		info->high_key = rec_high_key; /* struct copy */

	return 0;
}

/*
 * Scan part of the keyspace of a btree and tell us if that keyspace does not
 * map to any records; is fully mapped to records; or is partially mapped to
 * records.  This is the btree record equivalent to determining if a file is
 * sparse.
 *
 * For most btree types, the record scan should use all available btree key
 * fields to compare the keys encountered.  These callers should pass NULL for
 * @mask.  However, some callers (e.g.  scanning physical space in the rmapbt)
 * want to ignore some part of the btree record keyspace when performing the
 * comparison.  These callers should pass in a union xfs_btree_key object with
 * the fields that *should* be a part of the comparison set to any nonzero
 * value, and the rest zeroed.
 */
int
xfs_btree_has_record(
xfs_btree_has_records(
	struct xfs_btree_cur		*cur,
	const union xfs_btree_irec	*low,
	const union xfs_btree_irec	*high,
	bool				*exists)
	const union xfs_btree_key	*mask,
	enum xbtree_recpacking		*outcome)
{
	struct xfs_btree_has_records	info = {
		.outcome		= XBTREE_RECPACKING_EMPTY,
		.key_mask		= mask,
	};
	int				error;

	error = xfs_btree_query_range(cur, low, high,
			&xfs_btree_has_record_helper, NULL);
	if (error == -ECANCELED) {
		*exists = true;
		return 0;
	/* Not all btrees support this operation. */
	if (!cur->bc_ops->keys_contiguous) {
		ASSERT(0);
		return -EOPNOTSUPP;
	}
	*exists = false;

	xfs_btree_key_from_irec(cur, &info.start_key, low);
	xfs_btree_key_from_irec(cur, &info.end_key, high);

	error = xfs_btree_query_range(cur, low, high,
			xfs_btree_has_records_helper, &info);
	if (error == -ECANCELED)
		goto out;
	if (error)
		return error;

	if (info.outcome == XBTREE_RECPACKING_EMPTY)
		goto out;

	/*
	 * If the largest high_key(rec) we saw during the walk is greater than
	 * the end of the search range, classify this as full.  Otherwise,
	 * there is a hole at the end of the search range.
	 */
	if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
				mask))
		info.outcome = XBTREE_RECPACKING_FULL;

out:
	*outcome = info.outcome;
	return 0;
}

/* Are there more records in this btree? */
Loading