Commit bba7d682 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'xfs-5.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
 "This cycle we've worked on fixing bugs and improving XFS' memory
  footprint.

  The most notable fixes include: fixing a corruption warning (and free
  space accounting skew) if copy on write fails; fixing slab cache
  misuse if SLOB is enabled, which apparently was broken for years
  without anybody noticing; and fixing a potential race with online
  shrinkfs.

  Otherwise, the bulk of the changes here involve setting up separate
  slab caches for frequently used items such as btree cursors and log
  intent items, and compacting the structures to reduce memory usage of
  those items substantially. This also sets us up to support larger
  btrees in future kernels. We also switch parts of online fsck to
  allocate scrub context information from the heap instead of using
  stack space.

  Summary:

   - Bug fixes and cleanups for kernel memory allocation usage, this
     time without touching the mm code.

   - Refactor the log recovery mechanism that preserves held resources
     across a transaction roll so that it uses the exact same mechanism
     that we use for that during regular runtime.

   - Fix bugs and tighten checking around btree heights.

   - Remove more old typedefs.

   - Fix perag reference leaks when racing with growfs.

   - Remove unused fields from xfs_btree_cur.

   - Allocate various scrub structures on the heap to reduce stack
     usage.

   - Pack xfs_btree_cur fields and rearrange to support arbitrary
     heights.

   - Compute maximum possible heights for each btree height, and use
     that to set up slab caches for each btree type.

   - Finally remove kmem_zone_t, since these have always been struct
     kmem_cache on Linux.

   - Compact the structures used to coordinate work intent items.

   - Set up slab caches for each work intent item type.

   - Rename the "bmap_add_free" function to "free_extent_later", which
     more accurately describes what it does.

   - Fix corruption warning on unmount when a CoW preallocation covers a
     data fork delalloc reservation but then the CoW fails.

   - Add some more minor code improvements"

* tag 'xfs-5.16-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (45 commits)
  xfs: use swap() to make code cleaner
  xfs: Remove duplicated include in xfs_super
  xfs: punch out data fork delalloc blocks on COW writeback failure
  xfs: remove unused parameter from refcount code
  xfs: reduce the size of struct xfs_extent_free_item
  xfs: rename xfs_bmap_add_free to xfs_free_extent_later
  xfs: create slab caches for frequently-used deferred items
  xfs: compact deferred intent item structures
  xfs: rename _zone variables to _cache
  xfs: remove kmem_zone typedef
  xfs: use separate btree cursor cache for each btree type
  xfs: compute absolute maximum nlevels for each btree type
  xfs: kill XFS_BTREE_MAXLEVELS
  xfs: compute the maximum height of the rmap btree when reflink enabled
  xfs: clean up xfs_btree_{calc_size,compute_maxlevels}
  xfs: compute maximum AG btree height for critical reservation calculation
  xfs: rename m_ag_maxlevels to m_allocbt_maxlevels
  xfs: dynamically allocate cursors based on maxlevels
  xfs: encode the max btree height in the cursor
  xfs: refactor btree cursor allocation function
  ...
parents a64a325b 2a09b575
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -72,10 +72,6 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
/*
 * Zone interfaces
 */

#define kmem_zone	kmem_cache
#define kmem_zone_t	struct kmem_cache

static inline struct page *
kmem_to_page(void *addr)
{
+1 −1
Original line number Diff line number Diff line
@@ -850,7 +850,7 @@ xfs_ag_shrink_space(
		if (err2 != -ENOSPC)
			goto resv_err;

		__xfs_bmap_add_free(*tpp, args.fsbno, delta, NULL, true);
		__xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, true);

		/*
		 * Roll the transaction before trying to re-init the per-ag
+21 −15
Original line number Diff line number Diff line
@@ -116,23 +116,29 @@ void xfs_perag_put(struct xfs_perag *pag);

/*
 * Perag iteration APIs
 *
 * XXX: for_each_perag_range() usage really needs an iterator to clean up when
 * we terminate at end_agno because we may have taken a reference to the perag
 * beyond end_agno. Right now callers have to be careful to catch and clean that
 * up themselves. This is not necessary for the callers of for_each_perag() and
 * for_each_perag_from() because they terminate at sb_agcount where there are
 * no perag structures in tree beyond end_agno.
 */
#define for_each_perag_range(mp, next_agno, end_agno, pag) \
	for ((pag) = xfs_perag_get((mp), (next_agno)); \
		(pag) != NULL && (next_agno) <= (end_agno); \
		(next_agno) = (pag)->pag_agno + 1, \
		xfs_perag_put(pag), \
		(pag) = xfs_perag_get((mp), (next_agno)))
static inline struct xfs_perag *
xfs_perag_next(
	struct xfs_perag	*pag,
	xfs_agnumber_t		*agno,
	xfs_agnumber_t		end_agno)
{
	struct xfs_mount	*mp = pag->pag_mount;

	*agno = pag->pag_agno + 1;
	xfs_perag_put(pag);
	if (*agno > end_agno)
		return NULL;
	return xfs_perag_get(mp, *agno);
}

#define for_each_perag_range(mp, agno, end_agno, pag) \
	for ((pag) = xfs_perag_get((mp), (agno)); \
		(pag) != NULL; \
		(pag) = xfs_perag_next((pag), &(agno), (end_agno)))

#define for_each_perag_from(mp, next_agno, pag) \
	for_each_perag_range((mp), (next_agno), (mp)->m_sb.sb_agcount, (pag))
#define for_each_perag_from(mp, agno, pag) \
	for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))


#define for_each_perag(mp, agno, pag) \
+2 −1
Original line number Diff line number Diff line
@@ -91,7 +91,8 @@ xfs_ag_resv_critical(
	trace_xfs_ag_resv_critical(pag, type, avail);

	/* Critically low if less than 10% or max btree height remains. */
	return XFS_TEST_ERROR(avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS,
	return XFS_TEST_ERROR(avail < orig / 10 ||
			      avail < pag->pag_mount->m_agbtree_maxlevels,
			pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
}

+97 −23
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@
#include "xfs_ag_resv.h"
#include "xfs_bmap.h"

extern kmem_zone_t	*xfs_bmap_free_item_zone;
struct kmem_cache	*xfs_extfree_item_cache;

struct workqueue_struct *xfs_alloc_wq;

@@ -426,8 +426,8 @@ xfs_alloc_fix_len(
 */
STATIC int				/* error code */
xfs_alloc_fixup_trees(
	xfs_btree_cur_t	*cnt_cur,	/* cursor for by-size btree */
	xfs_btree_cur_t	*bno_cur,	/* cursor for by-block btree */
	struct xfs_btree_cur *cnt_cur,	/* cursor for by-size btree */
	struct xfs_btree_cur *bno_cur,	/* cursor for by-block btree */
	xfs_agblock_t	fbno,		/* starting block of free extent */
	xfs_extlen_t	flen,		/* length of free extent */
	xfs_agblock_t	rbno,		/* starting block of returned extent */
@@ -488,8 +488,8 @@ xfs_alloc_fixup_trees(
		struct xfs_btree_block	*bnoblock;
		struct xfs_btree_block	*cntblock;

		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
		bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);

		if (XFS_IS_CORRUPT(mp,
				   bnoblock->bb_numrecs !=
@@ -1200,8 +1200,8 @@ xfs_alloc_ag_vextent_exact(
	xfs_alloc_arg_t	*args)	/* allocation argument structure */
{
	struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
	xfs_btree_cur_t	*bno_cur;/* by block-number btree cursor */
	xfs_btree_cur_t	*cnt_cur;/* by count btree cursor */
	struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
	struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
	int		error;
	xfs_agblock_t	fbno;	/* start block of found extent */
	xfs_extlen_t	flen;	/* length of found extent */
@@ -1512,7 +1512,7 @@ xfs_alloc_ag_vextent_lastblock(
	 * than minlen.
	 */
	if (*len || args->alignment > 1) {
		acur->cnt->bc_ptrs[0] = 1;
		acur->cnt->bc_levels[0].ptr = 1;
		do {
			error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
			if (error)
@@ -1658,8 +1658,8 @@ xfs_alloc_ag_vextent_size(
	xfs_alloc_arg_t	*args)		/* allocation argument structure */
{
	struct xfs_agf	*agf = args->agbp->b_addr;
	xfs_btree_cur_t	*bno_cur;	/* cursor for bno btree */
	xfs_btree_cur_t	*cnt_cur;	/* cursor for cnt btree */
	struct xfs_btree_cur *bno_cur;	/* cursor for bno btree */
	struct xfs_btree_cur *cnt_cur;	/* cursor for cnt btree */
	int		error;		/* error result */
	xfs_agblock_t	fbno;		/* start of found freespace */
	xfs_extlen_t	flen;		/* length of found freespace */
@@ -2190,14 +2190,15 @@ xfs_free_ag_extent(
 */

/*
 * Compute and fill in value of m_ag_maxlevels.
 * Compute and fill in value of m_alloc_maxlevels.
 */
void
xfs_alloc_compute_maxlevels(
	xfs_mount_t	*mp)	/* file system mount structure */
{
	mp->m_ag_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
	mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
			(mp->m_sb.sb_agblocks + 1) / 2);
	ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
}

/*
@@ -2255,14 +2256,14 @@ xfs_alloc_min_freelist(
	const uint8_t		*levels = pag ? pag->pagf_levels : fake_levels;
	unsigned int		min_free;

	ASSERT(mp->m_ag_maxlevels > 0);
	ASSERT(mp->m_alloc_maxlevels > 0);

	/* space needed by-bno freespace btree */
	min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
				       mp->m_ag_maxlevels);
				       mp->m_alloc_maxlevels);
	/* space needed by-size freespace btree */
	min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
				       mp->m_ag_maxlevels);
				       mp->m_alloc_maxlevels);
	/* space needed reverse mapping used space btree */
	if (xfs_has_rmapbt(mp))
		min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
@@ -2439,7 +2440,7 @@ xfs_agfl_reset(

/*
 * Defer an AGFL block free. This is effectively equivalent to
 * xfs_bmap_add_free() with some special handling particular to AGFL blocks.
 * xfs_free_extent_later() with some special handling particular to AGFL blocks.
 *
 * Deferring AGFL frees helps prevent log reservation overruns due to too many
 * allocation operations in a transaction. AGFL frees are prone to this problem
@@ -2458,21 +2459,74 @@ xfs_defer_agfl_block(
	struct xfs_mount		*mp = tp->t_mountp;
	struct xfs_extent_free_item	*new;		/* new element */

	ASSERT(xfs_bmap_free_item_zone != NULL);
	ASSERT(xfs_extfree_item_cache != NULL);
	ASSERT(oinfo != NULL);

	new = kmem_cache_alloc(xfs_bmap_free_item_zone,
	new = kmem_cache_zalloc(xfs_extfree_item_cache,
			       GFP_KERNEL | __GFP_NOFAIL);
	new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
	new->xefi_blockcount = 1;
	new->xefi_oinfo = *oinfo;
	new->xefi_skip_discard = false;
	new->xefi_owner = oinfo->oi_owner;

	trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);

	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list);
}

/*
 * Add the extent to the list of extents to be free at transaction end.
 * The list is maintained sorted (by block number).
 */
void
__xfs_free_extent_later(
	struct xfs_trans		*tp,
	xfs_fsblock_t			bno,
	xfs_filblks_t			len,
	const struct xfs_owner_info	*oinfo,
	bool				skip_discard)
{
	struct xfs_extent_free_item	*new;		/* new element */
#ifdef DEBUG
	struct xfs_mount		*mp = tp->t_mountp;
	xfs_agnumber_t			agno;
	xfs_agblock_t			agbno;

	ASSERT(bno != NULLFSBLOCK);
	ASSERT(len > 0);
	ASSERT(len <= MAXEXTLEN);
	ASSERT(!isnullstartblock(bno));
	agno = XFS_FSB_TO_AGNO(mp, bno);
	agbno = XFS_FSB_TO_AGBNO(mp, bno);
	ASSERT(agno < mp->m_sb.sb_agcount);
	ASSERT(agbno < mp->m_sb.sb_agblocks);
	ASSERT(len < mp->m_sb.sb_agblocks);
	ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
#endif
	ASSERT(xfs_extfree_item_cache != NULL);

	new = kmem_cache_zalloc(xfs_extfree_item_cache,
			       GFP_KERNEL | __GFP_NOFAIL);
	new->xefi_startblock = bno;
	new->xefi_blockcount = (xfs_extlen_t)len;
	if (skip_discard)
		new->xefi_flags |= XFS_EFI_SKIP_DISCARD;
	if (oinfo) {
		ASSERT(oinfo->oi_offset == 0);

		if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
			new->xefi_flags |= XFS_EFI_ATTR_FORK;
		if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
			new->xefi_flags |= XFS_EFI_BMBT_BLOCK;
		new->xefi_owner = oinfo->oi_owner;
	} else {
		new->xefi_owner = XFS_RMAP_OWN_NULL;
	}
	trace_xfs_bmap_free_defer(tp->t_mountp,
			XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
			XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
	xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list);
}

#ifdef DEBUG
/*
 * Check if an AGF has a free extent record whose length is equal to
@@ -2903,13 +2957,16 @@ xfs_agf_verify(

	if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > mp->m_ag_maxlevels ||
	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > mp->m_ag_maxlevels)
	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
						mp->m_alloc_maxlevels ||
	    be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
						mp->m_alloc_maxlevels)
		return __this_address;

	if (xfs_has_rmapbt(mp) &&
	    (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > mp->m_rmap_maxlevels))
	     be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
						mp->m_rmap_maxlevels))
		return __this_address;

	if (xfs_has_rmapbt(mp) &&
@@ -3495,3 +3552,20 @@ xfs_agfl_walk(

	return 0;
}

int __init
xfs_extfree_intent_init_cache(void)
{
	xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
			sizeof(struct xfs_extent_free_item),
			0, 0, NULL);

	return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
}

void
xfs_extfree_intent_destroy_cache(void)
{
	kmem_cache_destroy(xfs_extfree_item_cache);
	xfs_extfree_item_cache = NULL;
}
Loading