Commit 2229276c authored by Darrick J. Wong's avatar Darrick J. Wong Committed by Dave Chinner
Browse files

xfs: use a separate frextents counter for rt extent reservations



As mentioned in the previous commit, the kernel misuses sb_frextents in
the incore mount to reflect both incore reservations made by running
transactions as well as the actual count of free rt extents on disk.
This results in the superblock being written to the log with an
underestimate of the number of rt extents that are marked free in the
rtbitmap.

Teaching XFS to recompute frextents after log recovery avoids
operational problems in the current mount, but it doesn't solve the
problem of us writing undercounted frextents which are then recovered by
an older kernel that doesn't have that fix.

Create an incore percpu counter to mirror the ondisk frextents.  This
new counter will track transaction reservations and the only time we
will touch the incore super counter (i.e the one that gets logged) is
when those transactions commit updates to the rt bitmap.  This is in
contrast to the lazysbcount counters (e.g. fdblocks), where we know that
log recovery will always fix any incorrect counter that we log.
As a bonus, we only take m_sb_lock at transaction commit time.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent 5a605fd6
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -911,6 +911,11 @@ xfs_log_sb(
	 * reservations that have been taken out percpu counters. If we have an
	 * unclean shutdown, this will be corrected by log recovery rebuilding
	 * the counters from the AGF block counts.
	 *
	 * Do not update sb_frextents here because it is not part of the lazy
	 * sb counters, despite having a percpu counter. It is always kept
	 * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas()
	 * and hence we don't need have to update it here.
	 */
	if (xfs_has_lazysbcount(mp)) {
		mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+1 −4
Original line number Diff line number Diff line
@@ -349,10 +349,7 @@ xfs_fs_counts(
	cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
	cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
						xfs_fdblocks_unavailable(mp);

	spin_lock(&mp->m_sb_lock);
	cnt->freertx = mp->m_sb.sb_frextents;
	spin_unlock(&mp->m_sb_lock);
	cnt->freertx = percpu_counter_read_positive(&mp->m_frextents);
}

/*
+6 −3
Original line number Diff line number Diff line
@@ -1916,13 +1916,16 @@ xfs_inodegc_want_queue_rt_file(
	struct xfs_inode	*ip)
{
	struct xfs_mount	*mp = ip->i_mount;
	uint64_t		freertx;

	if (!XFS_IS_REALTIME_INODE(ip))
		return false;

	freertx = READ_ONCE(mp->m_sb.sb_frextents);
	return freertx < mp->m_low_rtexts[XFS_LOWSP_5_PCNT];
	if (__percpu_counter_compare(&mp->m_frextents,
				mp->m_low_rtexts[XFS_LOWSP_5_PCNT],
				XFS_FDBLOCKS_BATCH) < 0)
		return true;

	return false;
}
#else
# define xfs_inodegc_want_queue_rt_file(ip)	(false)
+21 −29
Original line number Diff line number Diff line
@@ -1110,24 +1110,33 @@ xfs_fs_writable(
	return true;
}

/* Adjust m_fdblocks or m_frextents. */
int
xfs_mod_fdblocks(
xfs_mod_freecounter(
	struct xfs_mount	*mp,
	struct percpu_counter	*counter,
	int64_t			delta,
	bool			rsvd)
{
	int64_t			lcounter;
	long long		res_used;
	uint64_t		set_aside = 0;
	s32			batch;
	uint64_t		set_aside;
	bool			has_resv_pool;

	ASSERT(counter == &mp->m_fdblocks || counter == &mp->m_frextents);
	has_resv_pool = (counter == &mp->m_fdblocks);
	if (rsvd)
		ASSERT(has_resv_pool);

	if (delta > 0) {
		/*
		 * If the reserve pool is depleted, put blocks back into it
		 * first. Most of the time the pool is full.
		 */
		if (likely(mp->m_resblks == mp->m_resblks_avail)) {
			percpu_counter_add(&mp->m_fdblocks, delta);
		if (likely(!has_resv_pool ||
			   mp->m_resblks == mp->m_resblks_avail)) {
			percpu_counter_add(counter, delta);
			return 0;
		}

@@ -1139,7 +1148,7 @@ xfs_mod_fdblocks(
		} else {
			delta -= res_used;
			mp->m_resblks_avail = mp->m_resblks;
			percpu_counter_add(&mp->m_fdblocks, delta);
			percpu_counter_add(counter, delta);
		}
		spin_unlock(&mp->m_sb_lock);
		return 0;
@@ -1153,7 +1162,7 @@ xfs_mod_fdblocks(
	 * then make everything serialise as we are real close to
	 * ENOSPC.
	 */
	if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
	if (__percpu_counter_compare(counter, 2 * XFS_FDBLOCKS_BATCH,
				     XFS_FDBLOCKS_BATCH) < 0)
		batch = 1;
	else
@@ -1170,9 +1179,10 @@ xfs_mod_fdblocks(
	 * problems (i.e. transaction abort, pagecache discards, etc.) than
	 * slightly premature -ENOSPC.
	 */
	if (has_resv_pool)
		set_aside = xfs_fdblocks_unavailable(mp);
	percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
	if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
	percpu_counter_add_batch(counter, delta, batch);
	if (__percpu_counter_compare(counter, set_aside,
				     XFS_FDBLOCKS_BATCH) >= 0) {
		/* we had space! */
		return 0;
@@ -1183,8 +1193,8 @@ xfs_mod_fdblocks(
	 * that took us to ENOSPC.
	 */
	spin_lock(&mp->m_sb_lock);
	percpu_counter_add(&mp->m_fdblocks, -delta);
	if (!rsvd)
	percpu_counter_add(counter, -delta);
	if (!has_resv_pool || !rsvd)
		goto fdblocks_enospc;

	lcounter = (long long)mp->m_resblks_avail + delta;
@@ -1201,24 +1211,6 @@ xfs_mod_fdblocks(
	return -ENOSPC;
}

int
xfs_mod_frextents(
	struct xfs_mount	*mp,
	int64_t			delta)
{
	int64_t			lcounter;
	int			ret = 0;

	spin_lock(&mp->m_sb_lock);
	lcounter = mp->m_sb.sb_frextents + delta;
	if (lcounter < 0)
		ret = -ENOSPC;
	else
		mp->m_sb.sb_frextents = lcounter;
	spin_unlock(&mp->m_sb_lock);
	return ret;
}

/*
 * Used to free the superblock along various error paths.
 */
+16 −3
Original line number Diff line number Diff line
@@ -183,6 +183,8 @@ typedef struct xfs_mount {
	struct percpu_counter	m_icount;	/* allocated inodes counter */
	struct percpu_counter	m_ifree;	/* free inodes counter */
	struct percpu_counter	m_fdblocks;	/* free block counter */
	struct percpu_counter	m_frextents;	/* free rt extent counter */

	/*
	 * Count of data device blocks reserved for delayed allocations,
	 * including indlen blocks.  Does not include allocated CoW staging
@@ -494,9 +496,20 @@ xfs_fdblocks_unavailable(
	return mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
}

extern int	xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
				 bool reserved);
extern int	xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
int xfs_mod_freecounter(struct xfs_mount *mp, struct percpu_counter *counter,
		int64_t delta, bool rsvd);

static inline int
xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta, bool reserved)
{
	return xfs_mod_freecounter(mp, &mp->m_fdblocks, delta, reserved);
}

static inline int
xfs_mod_frextents(struct xfs_mount *mp, int64_t delta)
{
	return xfs_mod_freecounter(mp, &mp->m_frextents, delta, false);
}

extern int	xfs_readsb(xfs_mount_t *, int);
extern void	xfs_freesb(xfs_mount_t *);
Loading