Commit f1bc5c56 authored by Darrick J. Wong's avatar Darrick J. Wong
Browse files

xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag



Merge these two inode walk loops together, since they're pretty similar
now.

Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
parent 9d5ee837
Loading
Loading
Loading
Loading
+48 −114
Original line number Diff line number Diff line
@@ -43,6 +43,7 @@ enum xfs_icwalk_goal {

	/* Goals directly associated with tagged inodes. */
	XFS_ICWALK_BLOCKGC	= XFS_ICI_BLOCKGC_TAG,
	XFS_ICWALK_RECLAIM	= XFS_ICI_RECLAIM_TAG,
};

#define XFS_ICWALK_NULL_TAG	(-1U)
@@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
#define XFS_ICWALK_FLAG_DROP_GDQUOT	(1U << 30)
#define XFS_ICWALK_FLAG_DROP_PDQUOT	(1U << 29)

/* Stop scanning after icw_scan_limit inodes. */
#define XFS_ICWALK_FLAG_SCAN_LIMIT	(1U << 28)

#define XFS_ICWALK_PRIVATE_FLAGS	(XFS_ICWALK_FLAG_DROP_UDQUOT | \
					 XFS_ICWALK_FLAG_DROP_GDQUOT | \
					 XFS_ICWALK_FLAG_DROP_PDQUOT)
					 XFS_ICWALK_FLAG_DROP_PDQUOT | \
					 XFS_ICWALK_FLAG_SCAN_LIMIT)

/*
 * Allocate and initialise an xfs_inode.
@@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated(
	return 0;
}

/*
 * The inode lookup is done in batches to keep the amount of lock traffic and
 * radix tree lookups to a minimum. The batch size is a trade off between
 * lookup reduction and stack usage. This is in the reclaim path, so we can't
 * be too greedy.
 *
 * XXX: This will be moved closer to xfs_icwalk* once we get rid of the
 * separate reclaim walk functions.
 */
#define XFS_LOOKUP_BATCH	32

#ifdef CONFIG_XFS_QUOTA
/* Decide if we want to grab this inode to drop its dquots. */
static bool
@@ -880,7 +874,7 @@ xfs_dqrele_all_inodes(
 * Return true if we grabbed it, false otherwise.
 */
static bool
xfs_reclaim_inode_grab(
xfs_reclaim_igrab(
	struct xfs_inode	*ip)
{
	ASSERT(rcu_read_lock_held());
@@ -990,108 +984,13 @@ xfs_reclaim_inode(
	xfs_iflags_clear(ip, XFS_IRECLAIM);
}

/*
 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
 * corrupted, we still want to try to reclaim all the inodes. If we don't,
 * then a shut down during filesystem unmount reclaim walk leak all the
 * unreclaimed inodes.
 *
 * Returns non-zero if any AGs or inodes were skipped in the reclaim pass
 * so that callers that want to block until all dirty inodes are written back
 * and reclaimed can sanely loop.
 */
static void
xfs_reclaim_inodes_ag(
	struct xfs_mount	*mp,
	int			*nr_to_scan)
{
	struct xfs_perag	*pag;
	xfs_agnumber_t		ag = 0;

	while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
		unsigned long	first_index = 0;
		int		done = 0;
		int		nr_found = 0;

		ag = pag->pag_agno + 1;

		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
		do {
			struct xfs_inode *batch[XFS_LOOKUP_BATCH];
			int	i;

			rcu_read_lock();
			nr_found = radix_tree_gang_lookup_tag(
					&pag->pag_ici_root,
					(void **)batch, first_index,
					XFS_LOOKUP_BATCH,
					XFS_ICI_RECLAIM_TAG);
			if (!nr_found) {
				done = 1;
				rcu_read_unlock();
				break;
			}

			/*
			 * Grab the inodes before we drop the lock. if we found
			 * nothing, nr == 0 and the loop will be skipped.
			 */
			for (i = 0; i < nr_found; i++) {
				struct xfs_inode *ip = batch[i];

				if (done || !xfs_reclaim_inode_grab(ip))
					batch[i] = NULL;

				/*
				 * Update the index for the next lookup. Catch
				 * overflows into the next AG range which can
				 * occur if we have inodes in the last block of
				 * the AG and we are currently pointing to the
				 * last inode.
				 *
				 * Because we may see inodes that are from the
				 * wrong AG due to RCU freeing and
				 * reallocation, only update the index if it
				 * lies in this AG. It was a race that lead us
				 * to see this inode, so another lookup from
				 * the same index will not find it again.
				 */
				if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
								pag->pag_agno)
					continue;
				first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
				if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
					done = 1;
			}

			/* unlock now we've grabbed the inodes. */
			rcu_read_unlock();

			for (i = 0; i < nr_found; i++) {
				if (batch[i])
					xfs_reclaim_inode(batch[i], pag);
			}

			*nr_to_scan -= XFS_LOOKUP_BATCH;
			cond_resched();
		} while (nr_found && !done && *nr_to_scan > 0);

		if (done)
			first_index = 0;
		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
		xfs_perag_put(pag);
	}
}

void
xfs_reclaim_inodes(
	struct xfs_mount	*mp)
{
	int		nr_to_scan = INT_MAX;

	while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
		xfs_ail_push_all_sync(mp->m_ail);
		xfs_reclaim_inodes_ag(mp, &nr_to_scan);
		xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
	}
}

@@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr(
	struct xfs_mount	*mp,
	int			nr_to_scan)
{
	struct xfs_eofblocks	eofb = {
		.eof_flags	= XFS_ICWALK_FLAG_SCAN_LIMIT,
		.icw_scan_limit	= nr_to_scan,
	};

	/* kick background reclaimer and push the AIL */
	xfs_reclaim_work_queue(mp);
	xfs_ail_push_all(mp->m_ail);

	xfs_reclaim_inodes_ag(mp, &nr_to_scan);
	xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb);
	return 0;
}

@@ -1221,9 +1125,8 @@ xfs_reclaim_worker(
{
	struct xfs_mount *mp = container_of(to_delayed_work(work),
					struct xfs_mount, m_reclaim_work);
	int		nr_to_scan = INT_MAX;

	xfs_reclaim_inodes_ag(mp, &nr_to_scan);
	xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
	xfs_reclaim_work_queue(mp);
}

@@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota(

/* XFS Inode Cache Walking Code */

/*
 * The inode lookup is done in batches to keep the amount of lock traffic and
 * radix tree lookups to a minimum. The batch size is a trade off between
 * lookup reduction and stack usage. This is in the reclaim path, so we can't
 * be too greedy.
 */
#define XFS_LOOKUP_BATCH	32


/*
 * Decide if we want to grab this inode in anticipation of doing work towards
 * the goal.
@@ -1707,6 +1619,8 @@ xfs_icwalk_igrab(
		return xfs_dqrele_igrab(ip);
	case XFS_ICWALK_BLOCKGC:
		return xfs_blockgc_igrab(ip);
	case XFS_ICWALK_RECLAIM:
		return xfs_reclaim_igrab(ip);
	default:
		return false;
	}
@@ -1720,6 +1634,7 @@ static inline int
xfs_icwalk_process_inode(
	enum xfs_icwalk_goal	goal,
	struct xfs_inode	*ip,
	struct xfs_perag	*pag,
	struct xfs_eofblocks	*eofb)
{
	int			error = 0;
@@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode(
	case XFS_ICWALK_BLOCKGC:
		error = xfs_blockgc_scan_inode(ip, eofb);
		break;
	case XFS_ICWALK_RECLAIM:
		xfs_reclaim_inode(ip, pag);
		break;
	}
	return error;
}
@@ -1755,6 +1673,9 @@ xfs_icwalk_ag(
restart:
	done = false;
	skipped = 0;
	if (goal == XFS_ICWALK_RECLAIM)
		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
	else
		first_index = 0;
	nr_found = 0;
	do {
@@ -1776,6 +1697,7 @@ xfs_icwalk_ag(
					XFS_LOOKUP_BATCH, tag);

		if (!nr_found) {
			done = true;
			rcu_read_unlock();
			break;
		}
@@ -1815,7 +1737,8 @@ xfs_icwalk_ag(
		for (i = 0; i < nr_found; i++) {
			if (!batch[i])
				continue;
			error = xfs_icwalk_process_inode(goal, batch[i], eofb);
			error = xfs_icwalk_process_inode(goal, batch[i], pag,
					eofb);
			if (error == -EAGAIN) {
				skipped++;
				continue;
@@ -1830,8 +1753,19 @@ xfs_icwalk_ag(

		cond_resched();

		if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
			eofb->icw_scan_limit -= XFS_LOOKUP_BATCH;
			if (eofb->icw_scan_limit <= 0)
				break;
		}
	} while (nr_found && !done);

	if (goal == XFS_ICWALK_RECLAIM) {
		if (done)
			first_index = 0;
		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
	}

	if (skipped) {
		delay(1);
		goto restart;
+1 −0
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ struct xfs_eofblocks {
	kgid_t		eof_gid;
	prid_t		eof_prid;
	__u64		eof_min_file_size;
	int		icw_scan_limit;
};

/*
+4 −1
Original line number Diff line number Diff line
@@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
		__field(uint32_t, gid)
		__field(prid_t, prid)
		__field(__u64, min_file_size)
		__field(int, scan_limit)
		__field(unsigned long, caller_ip)
	),
	TP_fast_assign(
@@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
						eofb->eof_gid) : 0;
		__entry->prid = eofb ? eofb->eof_prid : 0;
		__entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
		__entry->scan_limit = eofb ? eofb->icw_scan_limit : 0;
		__entry->caller_ip = caller_ip;
	),
	TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS",
	TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS",
		  MAJOR(__entry->dev), MINOR(__entry->dev),
		  __entry->flags,
		  __entry->uid,
		  __entry->gid,
		  __entry->prid,
		  __entry->min_file_size,
		  __entry->scan_limit,
		  (char *)__entry->caller_ip)
);
#define DEFINE_EOFBLOCKS_EVENT(name)	\