Commit b634abac authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner
Browse files

Merge tag 'scrub-drain-intents-6.4_2023-04-11' of...

Merge tag 'scrub-drain-intents-6.4_2023-04-11' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux

 into guilt/xfs-for-next

xfs: drain deferred work items when scrubbing [v24.5]

The design doc for XFS online fsck contains a long discussion of the
eventual consistency models in use for XFS metadata.  In that chapter,
we note that it is possible for scrub to collide with a chain of
deferred space metadata updates, and proposes a lightweight solution:
The use of a pending-intents counter so that scrub can wait for the
system to drain all chains.

This patchset implements that scrub drain.  The first patch implements
the basic mechanism, and the subsequent patches reduce the runtime
overhead by converting the implementation to use sloppy counters and
introducing jump labels to avoid walking into scrub hooks when it isn't
running.  This last paradigm repeats elsewhere in this megaseries.

v23.1: make intent items take an active ref to the perag structure and
       document why we bump and drop the intent counts when we do

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parents 793f5c2c 88accf17
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -93,10 +93,15 @@ config XFS_RT

	  If unsure, say N.

config XFS_DRAIN_INTENTS
	bool
	select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL

config XFS_ONLINE_SCRUB
	bool "XFS online metadata check support"
	default n
	depends on XFS_FS
	select XFS_DRAIN_INTENTS
	help
	  If you say Y here you will be able to check metadata on a
	  mounted XFS filesystem.  This feature is intended to reduce
+2 −0
Original line number Diff line number Diff line
@@ -136,6 +136,8 @@ ifeq ($(CONFIG_MEMORY_FAILURE),y)
xfs-$(CONFIG_FS_DAX)		+= xfs_notify_failure.o
endif

xfs-$(CONFIG_XFS_DRAIN_INTENTS)	+= xfs_drain.o

# online scrub/repair
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)

+4 −0
Original line number Diff line number Diff line
@@ -260,6 +260,7 @@ xfs_free_perag(
		spin_unlock(&mp->m_perag_lock);
		ASSERT(pag);
		XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
		xfs_defer_drain_free(&pag->pag_intents_drain);

		cancel_delayed_work_sync(&pag->pag_blockgc_work);
		xfs_buf_hash_destroy(pag);
@@ -385,6 +386,7 @@ xfs_initialize_perag(
		spin_lock_init(&pag->pag_state_lock);
		INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
		xfs_defer_drain_init(&pag->pag_intents_drain);
		init_waitqueue_head(&pag->pagb_wait);
		init_waitqueue_head(&pag->pag_active_wq);
		pag->pagb_count = 0;
@@ -421,6 +423,7 @@ xfs_initialize_perag(
	return 0;

out_remove_pag:
	xfs_defer_drain_free(&pag->pag_intents_drain);
	radix_tree_delete(&mp->m_perag_tree, index);
out_free_pag:
	kmem_free(pag);
@@ -431,6 +434,7 @@ xfs_initialize_perag(
		if (!pag)
			break;
		xfs_buf_hash_destroy(pag);
		xfs_defer_drain_free(&pag->pag_intents_drain);
		kmem_free(pag);
	}
	return error;
+8 −0
Original line number Diff line number Diff line
@@ -101,6 +101,14 @@ struct xfs_perag {
	/* background prealloc block trimming */
	struct delayed_work	pag_blockgc_work;

	/*
	 * We use xfs_drain to track the number of deferred log intent items
	 * that have been queued (but not yet processed) so that waiters (e.g.
	 * scrub) will not lock resources when other threads are in the middle
	 * of processing a chain of intent items only to find momentary
	 * inconsistencies.
	 */
	struct xfs_defer_drain	pag_intents_drain;
#endif /* __KERNEL__ */
};

+4 −2
Original line number Diff line number Diff line
@@ -397,6 +397,7 @@ xfs_defer_cancel_list(
		list_for_each_safe(pwi, n, &dfp->dfp_work) {
			list_del(pwi);
			dfp->dfp_count--;
			trace_xfs_defer_cancel_item(mp, dfp, pwi);
			ops->cancel_item(pwi);
		}
		ASSERT(dfp->dfp_count == 0);
@@ -476,6 +477,7 @@ xfs_defer_finish_one(
	list_for_each_safe(li, n, &dfp->dfp_work) {
		list_del(li);
		dfp->dfp_count--;
		trace_xfs_defer_finish_item(tp->t_mountp, dfp, li);
		error = ops->finish_item(tp, dfp->dfp_done, li, &state);
		if (error == -EAGAIN) {
			int		ret;
@@ -623,7 +625,7 @@ xfs_defer_add(
	struct list_head		*li)
{
	struct xfs_defer_pending	*dfp = NULL;
	const struct xfs_defer_op_type	*ops;
	const struct xfs_defer_op_type	*ops = defer_op_types[type];

	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
	BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
@@ -636,7 +638,6 @@ xfs_defer_add(
	if (!list_empty(&tp->t_dfops)) {
		dfp = list_last_entry(&tp->t_dfops,
				struct xfs_defer_pending, dfp_list);
		ops = defer_op_types[dfp->dfp_type];
		if (dfp->dfp_type != type ||
		    (ops->max_items && dfp->dfp_count >= ops->max_items))
			dfp = NULL;
@@ -653,6 +654,7 @@ xfs_defer_add(
	}

	list_add_tail(li, &dfp->dfp_work);
	trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
	dfp->dfp_count++;
}

Loading