Commit c0927a7a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-6.3-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull moar xfs updates from Darrick Wong:
 "This contains a fix for a deadlock in the allocator. It continues the
  slow march towards being able to offline AGs, and it refactors the
  interface to the xfs allocator to be less indirection happy.

  Summary:

   - Fix a deadlock in the free space allocator due to the AG-walking
     algorithm forgetting to follow AG-order locking rules

   - Make the inode allocator prefer existing free inodes instead of
     failing to allocate new inode chunks when free space is low

   - Set minleft correctly when setting allocator parameters for bmap
     changes

   - Fix uninitialized variable access in the getfsmap code

   - Make a distinction between active and passive per-AG structure
     references. For now, active references are taken to perform some
     work in an AG on behalf of a high level operation; passive
     references are used by lower level code to finish operations
     started by other threads. Eventually this will become part of
     online shrink

   - Split out all the different allocator strategies into separate
     functions to move us away from design antipattern of filling out a
     huge structure for various differentish things and issuing a single
     function multiplexing call

   - Various cleanups in the filestreams allocator code, which we might
     very well want to deprecate instead of continuing

   - Fix a bug with the agi rotor code that was introduced earlier in
     this series"

* tag 'xfs-6.3-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (44 commits)
  xfs: restore old agirotor behavior
  xfs: fix uninitialized variable access
  xfs: refactor the filestreams allocator pick functions
  xfs: return a referenced perag from filestreams allocator
  xfs: pass perag to filestreams tracing
  xfs: use for_each_perag_wrap in xfs_filestream_pick_ag
  xfs: track an active perag reference in filestreams
  xfs: factor out MRU hit case in xfs_filestream_select_ag
  xfs: remove xfs_filestream_select_ag() longest extent check
  xfs: merge new filestream AG selection into xfs_filestream_select_ag()
  xfs: merge filestream AG lookup into xfs_filestream_select_ag()
  xfs: move xfs_bmap_btalloc_filestreams() to xfs_filestreams.c
  xfs: use xfs_bmap_longest_free_extent() in filestreams
  xfs: get rid of notinit from xfs_bmap_longest_free_extent
  xfs: factor out filestreams from xfs_bmap_btalloc_nullfb
  xfs: convert trim to use for_each_perag_range
  xfs: convert xfs_alloc_vextent_iterate_ags() to use perag walker
  xfs: move the minimum agno checks into xfs_alloc_vextent_check_args
  xfs: fold xfs_alloc_ag_vextent() into callers
  xfs: move allocation accounting to xfs_alloc_vextent_set_fsbno()
  ...
parents 1899946a 6e2985c9
Loading
Loading
Loading
Loading
+77 −16
Original line number Diff line number Diff line
@@ -44,16 +44,15 @@ xfs_perag_get(
	xfs_agnumber_t		agno)
{
	struct xfs_perag	*pag;
	int			ref = 0;

	rcu_read_lock();
	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
	if (pag) {
		trace_xfs_perag_get(pag, _RET_IP_);
		ASSERT(atomic_read(&pag->pag_ref) >= 0);
		ref = atomic_inc_return(&pag->pag_ref);
		atomic_inc(&pag->pag_ref);
	}
	rcu_read_unlock();
	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
	return pag;
}

@@ -68,7 +67,6 @@ xfs_perag_get_tag(
{
	struct xfs_perag	*pag;
	int			found;
	int			ref;

	rcu_read_lock();
	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
@@ -77,9 +75,9 @@ xfs_perag_get_tag(
		rcu_read_unlock();
		return NULL;
	}
	ref = atomic_inc_return(&pag->pag_ref);
	trace_xfs_perag_get_tag(pag, _RET_IP_);
	atomic_inc(&pag->pag_ref);
	rcu_read_unlock();
	trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
	return pag;
}

@@ -87,11 +85,68 @@ void
xfs_perag_put(
	struct xfs_perag	*pag)
{
	int	ref;

	trace_xfs_perag_put(pag, _RET_IP_);
	ASSERT(atomic_read(&pag->pag_ref) > 0);
	ref = atomic_dec_return(&pag->pag_ref);
	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
	atomic_dec(&pag->pag_ref);
}

/*
 * Active references for perag structures. This is for short term access to the
 * per ag structures for walking trees or accessing state. If an AG is being
 * shrunk or is offline, then this will fail to find that AG and return NULL
 * instead.
 */
struct xfs_perag *
xfs_perag_grab(
	struct xfs_mount	*mp,
	xfs_agnumber_t		agno)
{
	struct xfs_perag	*pag;

	rcu_read_lock();
	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
	if (pag) {
		trace_xfs_perag_grab(pag, _RET_IP_);
		if (!atomic_inc_not_zero(&pag->pag_active_ref))
			pag = NULL;
	}
	rcu_read_unlock();
	return pag;
}

/*
 * search from @first to find the next perag with the given tag set.
 */
struct xfs_perag *
xfs_perag_grab_tag(
	struct xfs_mount	*mp,
	xfs_agnumber_t		first,
	int			tag)
{
	struct xfs_perag	*pag;
	int			found;

	rcu_read_lock();
	found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
					(void **)&pag, first, 1, tag);
	if (found <= 0) {
		rcu_read_unlock();
		return NULL;
	}
	trace_xfs_perag_grab_tag(pag, _RET_IP_);
	if (!atomic_inc_not_zero(&pag->pag_active_ref))
		pag = NULL;
	rcu_read_unlock();
	return pag;
}

void
xfs_perag_rele(
	struct xfs_perag	*pag)
{
	trace_xfs_perag_rele(pag, _RET_IP_);
	if (atomic_dec_and_test(&pag->pag_active_ref))
		wake_up(&pag->pag_active_wq);
}

/*
@@ -196,6 +251,10 @@ xfs_free_perag(
		cancel_delayed_work_sync(&pag->pag_blockgc_work);
		xfs_buf_hash_destroy(pag);

		/* drop the mount's active reference */
		xfs_perag_rele(pag);
		XFS_IS_CORRUPT(pag->pag_mount,
				atomic_read(&pag->pag_active_ref) != 0);
		call_rcu(&pag->rcu_head, __xfs_free_perag);
	}
}
@@ -314,6 +373,7 @@ xfs_initialize_perag(
		INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
		INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
		init_waitqueue_head(&pag->pagb_wait);
		init_waitqueue_head(&pag->pag_active_wq);
		pag->pagb_count = 0;
		pag->pagb_tree = RB_ROOT;
#endif /* __KERNEL__ */
@@ -322,6 +382,9 @@ xfs_initialize_perag(
		if (error)
			goto out_remove_pag;

		/* Active ref owned by mount indicates AG is online. */
		atomic_set(&pag->pag_active_ref, 1);

		/* first new pag is fully initialized */
		if (first_initialised == NULLAGNUMBER)
			first_initialised = index;
@@ -824,7 +887,7 @@ xfs_ag_shrink_space(
	struct xfs_alloc_arg	args = {
		.tp	= *tpp,
		.mp	= mp,
		.type	= XFS_ALLOCTYPE_THIS_BNO,
		.pag	= pag,
		.minlen = delta,
		.maxlen = delta,
		.oinfo	= XFS_RMAP_OINFO_SKIP_UPDATE,
@@ -856,14 +919,11 @@ xfs_ag_shrink_space(
	if (delta >= aglen)
		return -EINVAL;

	args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta);

	/*
	 * Make sure that the last inode cluster cannot overlap with the new
	 * end of the AG, even if it's sparse.
	 */
	error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp,
			aglen - delta);
	error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta);
	if (error)
		return error;

@@ -876,7 +936,8 @@ xfs_ag_shrink_space(
		return error;

	/* internal log shouldn't also show up in the free space btrees */
	error = xfs_alloc_vextent(&args);
	error = xfs_alloc_vextent_exact_bno(&args,
			XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta));
	if (!error && args.agbno == NULLAGBLOCK)
		error = -ENOSPC;

+96 −15
Original line number Diff line number Diff line
@@ -32,14 +32,12 @@ struct xfs_ag_resv {
struct xfs_perag {
	struct xfs_mount *pag_mount;	/* owner filesystem */
	xfs_agnumber_t	pag_agno;	/* AG this structure belongs to */
	atomic_t	pag_ref;	/* perag reference count */
	char		pagf_init;	/* this agf's entry is initialized */
	char		pagi_init;	/* this agi's entry is initialized */
	char		pagf_metadata;	/* the agf is preferred to be metadata */
	char		pagi_inodeok;	/* The agi is ok for inodes */
	atomic_t	pag_ref;	/* passive reference count */
	atomic_t	pag_active_ref;	/* active reference count */
	wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */
	unsigned long	pag_opstate;
	uint8_t		pagf_levels[XFS_BTNUM_AGF];
					/* # of levels in bno & cnt btree */
	bool		pagf_agflreset; /* agfl requires reset before use */
	uint32_t	pagf_flcount;	/* count of blocks in freelist */
	xfs_extlen_t	pagf_freeblks;	/* total free blocks */
	xfs_extlen_t	pagf_longest;	/* longest free space */
@@ -106,16 +104,44 @@ struct xfs_perag {
#endif /* __KERNEL__ */
};

/*
 * Per-AG operational state. These are atomic flag bits.
 */
#define XFS_AGSTATE_AGF_INIT		0
#define XFS_AGSTATE_AGI_INIT		1
#define XFS_AGSTATE_PREFERS_METADATA	2
#define XFS_AGSTATE_ALLOWS_INODES	3
#define XFS_AGSTATE_AGFL_NEEDS_RESET	4

#define __XFS_AG_OPSTATE(name, NAME) \
static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \
{ \
	return test_bit(XFS_AGSTATE_ ## NAME, &pag->pag_opstate); \
}

__XFS_AG_OPSTATE(initialised_agf, AGF_INIT)
__XFS_AG_OPSTATE(initialised_agi, AGI_INIT)
__XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)

int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount,
			xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi);
int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
void xfs_free_perag(struct xfs_mount *mp);

/* Passive AG references */
struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
		unsigned int tag);
void xfs_perag_put(struct xfs_perag *pag);

/* Active AG references */
struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t,
				   int tag);
void xfs_perag_rele(struct xfs_perag *pag);

/*
 * Per-ag geometry infomation and validation
 */
@@ -193,31 +219,86 @@ xfs_perag_next(
	struct xfs_mount	*mp = pag->pag_mount;

	*agno = pag->pag_agno + 1;
	xfs_perag_put(pag);
	if (*agno > end_agno)
	xfs_perag_rele(pag);
	while (*agno <= end_agno) {
		pag = xfs_perag_grab(mp, *agno);
		if (pag)
			return pag;
		(*agno)++;
	}
	return NULL;
	return xfs_perag_get(mp, *agno);
}

#define for_each_perag_range(mp, agno, end_agno, pag) \
	for ((pag) = xfs_perag_get((mp), (agno)); \
	for ((pag) = xfs_perag_grab((mp), (agno)); \
		(pag) != NULL; \
		(pag) = xfs_perag_next((pag), &(agno), (end_agno)))

#define for_each_perag_from(mp, agno, pag) \
	for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))


#define for_each_perag(mp, agno, pag) \
	(agno) = 0; \
	for_each_perag_from((mp), (agno), (pag))

#define for_each_perag_tag(mp, agno, pag, tag) \
	for ((agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
	for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \
		(pag) != NULL; \
		(agno) = (pag)->pag_agno + 1, \
		xfs_perag_put(pag), \
		(pag) = xfs_perag_get_tag((mp), (agno), (tag)))
		xfs_perag_rele(pag), \
		(pag) = xfs_perag_grab_tag((mp), (agno), (tag)))

static inline struct xfs_perag *
xfs_perag_next_wrap(
	struct xfs_perag	*pag,
	xfs_agnumber_t		*agno,
	xfs_agnumber_t		stop_agno,
	xfs_agnumber_t		restart_agno,
	xfs_agnumber_t		wrap_agno)
{
	struct xfs_mount	*mp = pag->pag_mount;

	*agno = pag->pag_agno + 1;
	xfs_perag_rele(pag);
	while (*agno != stop_agno) {
		if (*agno >= wrap_agno) {
			if (restart_agno >= stop_agno)
				break;
			*agno = restart_agno;
		}

		pag = xfs_perag_grab(mp, *agno);
		if (pag)
			return pag;
		(*agno)++;
	}
	return NULL;
}

/*
 * Iterate all AGs from start_agno through wrap_agno, then restart_agno through
 * (start_agno - 1).
 */
#define for_each_perag_wrap_range(mp, start_agno, restart_agno, wrap_agno, agno, pag) \
	for ((agno) = (start_agno), (pag) = xfs_perag_grab((mp), (agno)); \
		(pag) != NULL; \
		(pag) = xfs_perag_next_wrap((pag), &(agno), (start_agno), \
				(restart_agno), (wrap_agno)))
/*
 * Iterate all AGs from start_agno through wrap_agno, then 0 through
 * (start_agno - 1).
 */
#define for_each_perag_wrap_at(mp, start_agno, wrap_agno, agno, pag) \
	for_each_perag_wrap_range((mp), (start_agno), 0, (wrap_agno), (agno), (pag))

/*
 * Iterate all AGs from start_agno through to the end of the filesystem, then 0
 * through (start_agno - 1).
 */
#define for_each_perag_wrap(mp, start_agno, agno, pag) \
	for_each_perag_wrap_at((mp), (start_agno), (mp)->m_sb.sb_agcount, \
				(agno), (pag))


struct aghdr_init_data {
	/* per ag data */
+1 −1
Original line number Diff line number Diff line
@@ -264,7 +264,7 @@ xfs_ag_resv_init(
		if (error)
			goto out;

		error = xfs_finobt_calc_reserves(mp, tp, pag, &ask, &used);
		error = xfs_finobt_calc_reserves(pag, tp, &ask, &used);
		if (error)
			goto out;

+431 −252

File changed.

Preview size limit exceeded, changes collapsed.

+36 −25
Original line number Diff line number Diff line
@@ -16,25 +16,6 @@ extern struct workqueue_struct *xfs_alloc_wq;

unsigned int xfs_agfl_size(struct xfs_mount *mp);

/*
 * Freespace allocation types.  Argument to xfs_alloc_[v]extent.
 */
#define XFS_ALLOCTYPE_FIRST_AG	0x02	/* ... start at ag 0 */
#define XFS_ALLOCTYPE_THIS_AG	0x08	/* anywhere in this a.g. */
#define XFS_ALLOCTYPE_START_BNO	0x10	/* near this block else anywhere */
#define XFS_ALLOCTYPE_NEAR_BNO	0x20	/* in this a.g. and near this block */
#define XFS_ALLOCTYPE_THIS_BNO	0x40	/* at exactly this block */

/* this should become an enum again when the tracing code is fixed */
typedef unsigned int xfs_alloctype_t;

#define XFS_ALLOC_TYPES \
	{ XFS_ALLOCTYPE_FIRST_AG,	"FIRST_AG" }, \
	{ XFS_ALLOCTYPE_THIS_AG,	"THIS_AG" }, \
	{ XFS_ALLOCTYPE_START_BNO,	"START_BNO" }, \
	{ XFS_ALLOCTYPE_NEAR_BNO,	"NEAR_BNO" }, \
	{ XFS_ALLOCTYPE_THIS_BNO,	"THIS_BNO" }

/*
 * Flags for xfs_alloc_fix_freelist.
 */
@@ -68,8 +49,6 @@ typedef struct xfs_alloc_arg {
	xfs_agblock_t	min_agbno;	/* set an agbno range for NEAR allocs */
	xfs_agblock_t	max_agbno;	/* ... */
	xfs_extlen_t	len;		/* output: actual size of extent */
	xfs_alloctype_t	type;		/* allocation type XFS_ALLOCTYPE_... */
	xfs_alloctype_t	otype;		/* original allocation type */
	int		datatype;	/* mask defining data type treatment */
	char		wasdel;		/* set if allocation was prev delayed */
	char		wasfromfl;	/* set if allocation is from freelist */
@@ -118,11 +97,43 @@ xfs_alloc_log_agf(
	uint32_t	fields);/* mask of fields to be logged (XFS_AGF_...) */

/*
 * Allocate an extent (variable-size).
 * Allocate an extent anywhere in the specific AG given. If there is no
 * space matching the requirements in that AG, then the allocation will fail.
 */
int				/* error */
xfs_alloc_vextent(
	xfs_alloc_arg_t	*args);	/* allocation argument structure */
int xfs_alloc_vextent_this_ag(struct xfs_alloc_arg *args, xfs_agnumber_t agno);

/*
 * Allocate an extent as close to the target as possible. If there are not
 * viable candidates in the AG, then fail the allocation.
 */
int xfs_alloc_vextent_near_bno(struct xfs_alloc_arg *args,
		xfs_fsblock_t target);

/*
 * Allocate an extent exactly at the target given. If this is not possible
 * then the allocation fails.
 */
int xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg *args,
		xfs_fsblock_t target);

/*
 * Best effort full filesystem allocation scan.
 *
 * Locality aware allocation will be attempted in the initial AG, but on failure
 * non-localised attempts will be made. The AGs are constrained by previous
 * allocations in the current transaction. Two passes will be made - the first
 * non-blocking, the second blocking.
 */
int xfs_alloc_vextent_start_ag(struct xfs_alloc_arg *args,
		xfs_fsblock_t target);

/*
 * Iterate from the AG indicated from args->fsbno through to the end of the
 * filesystem attempting blocking allocation. This is for use in last
 * resort allocation attempts when everything else has failed.
 */
int xfs_alloc_vextent_first_ag(struct xfs_alloc_arg *args,
		xfs_fsblock_t target);

/*
 * Free an extent.
Loading