xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag (f1bc5c56) · Commits · EulixOS / Software / Kernel

fs/xfs/xfs_icache.c

+48 −114

Original line number	Diff line number	Diff line
		@@ -43,6 +43,7 @@ enum xfs_icwalk_goal {

		/* Goals directly associated with tagged inodes. */
		XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG,
		XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG,
		};

		#define XFS_ICWALK_NULL_TAG (-1U)
		@@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
		#define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30)
		#define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29)

		/* Stop scanning after icw_scan_limit inodes. */
		#define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28)

		#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT \| \
		XFS_ICWALK_FLAG_DROP_GDQUOT \| \
		XFS_ICWALK_FLAG_DROP_PDQUOT)
		XFS_ICWALK_FLAG_DROP_PDQUOT \| \
		XFS_ICWALK_FLAG_SCAN_LIMIT)

		/*
		* Allocate and initialise an xfs_inode.
		@@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated(
		return 0;
		}

		/*
		* The inode lookup is done in batches to keep the amount of lock traffic and
		* radix tree lookups to a minimum. The batch size is a trade off between
		* lookup reduction and stack usage. This is in the reclaim path, so we can't
		* be too greedy.
		*
		* XXX: This will be moved closer to xfs_icwalk* once we get rid of the
		* separate reclaim walk functions.
		*/
		#define XFS_LOOKUP_BATCH 32

		#ifdef CONFIG_XFS_QUOTA
		/* Decide if we want to grab this inode to drop its dquots. */
		static bool
		@@ -880,7 +874,7 @@ xfs_dqrele_all_inodes(
		* Return true if we grabbed it, false otherwise.
		*/
		static bool
		xfs_reclaim_inode_grab(
		xfs_reclaim_igrab(
		struct xfs_inode *ip)
		{
		ASSERT(rcu_read_lock_held());
		@@ -990,108 +984,13 @@ xfs_reclaim_inode(
		xfs_iflags_clear(ip, XFS_IRECLAIM);
		}

		/*
		* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
		* corrupted, we still want to try to reclaim all the inodes. If we don't,
		* then a shut down during filesystem unmount reclaim walk leak all the
		* unreclaimed inodes.
		*
		* Returns non-zero if any AGs or inodes were skipped in the reclaim pass
		* so that callers that want to block until all dirty inodes are written back
		* and reclaimed can sanely loop.
		*/
		static void
		xfs_reclaim_inodes_ag(
		struct xfs_mount *mp,
		int *nr_to_scan)
		{
		struct xfs_perag *pag;
		xfs_agnumber_t ag = 0;

		while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
		unsigned long first_index = 0;
		int done = 0;
		int nr_found = 0;

		ag = pag->pag_agno + 1;

		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
		do {
		struct xfs_inode *batch[XFS_LOOKUP_BATCH];
		int i;

		rcu_read_lock();
		nr_found = radix_tree_gang_lookup_tag(
		&pag->pag_ici_root,
		(void **)batch, first_index,
		XFS_LOOKUP_BATCH,
		XFS_ICI_RECLAIM_TAG);
		if (!nr_found) {
		done = 1;
		rcu_read_unlock();
		break;
		}

		/*
		* Grab the inodes before we drop the lock. if we found
		* nothing, nr == 0 and the loop will be skipped.
		*/
		for (i = 0; i < nr_found; i++) {
		struct xfs_inode *ip = batch[i];

		if (done \|\| !xfs_reclaim_inode_grab(ip))
		batch[i] = NULL;

		/*
		* Update the index for the next lookup. Catch
		* overflows into the next AG range which can
		* occur if we have inodes in the last block of
		* the AG and we are currently pointing to the
		* last inode.
		*
		* Because we may see inodes that are from the
		* wrong AG due to RCU freeing and
		* reallocation, only update the index if it
		* lies in this AG. It was a race that lead us
		* to see this inode, so another lookup from
		* the same index will not find it again.
		*/
		if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
		pag->pag_agno)
		continue;
		first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
		if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
		done = 1;
		}

		/* unlock now we've grabbed the inodes. */
		rcu_read_unlock();

		for (i = 0; i < nr_found; i++) {
		if (batch[i])
		xfs_reclaim_inode(batch[i], pag);
		}

		*nr_to_scan -= XFS_LOOKUP_BATCH;
		cond_resched();
		} while (nr_found && !done && *nr_to_scan > 0);

		if (done)
		first_index = 0;
		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
		xfs_perag_put(pag);
		}
		}

		void
		xfs_reclaim_inodes(
		struct xfs_mount *mp)
		{
		int nr_to_scan = INT_MAX;

		while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
		xfs_ail_push_all_sync(mp->m_ail);
		xfs_reclaim_inodes_ag(mp, &nr_to_scan);
		xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
		}
		}

		@@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr(
		struct xfs_mount *mp,
		int nr_to_scan)
		{
		struct xfs_eofblocks eofb = {
		.eof_flags = XFS_ICWALK_FLAG_SCAN_LIMIT,
		.icw_scan_limit = nr_to_scan,
		};

		/* kick background reclaimer and push the AIL */
		xfs_reclaim_work_queue(mp);
		xfs_ail_push_all(mp->m_ail);

		xfs_reclaim_inodes_ag(mp, &nr_to_scan);
		xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb);
		return 0;
		}

		@@ -1221,9 +1125,8 @@ xfs_reclaim_worker(
		{
		struct xfs_mount *mp = container_of(to_delayed_work(work),
		struct xfs_mount, m_reclaim_work);
		int nr_to_scan = INT_MAX;

		xfs_reclaim_inodes_ag(mp, &nr_to_scan);
		xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
		xfs_reclaim_work_queue(mp);
		}

		@@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota(

		/* XFS Inode Cache Walking Code */

		/*
		* The inode lookup is done in batches to keep the amount of lock traffic and
		* radix tree lookups to a minimum. The batch size is a trade off between
		* lookup reduction and stack usage. This is in the reclaim path, so we can't
		* be too greedy.
		*/
		#define XFS_LOOKUP_BATCH 32


		/*
		* Decide if we want to grab this inode in anticipation of doing work towards
		* the goal.
		@@ -1707,6 +1619,8 @@ xfs_icwalk_igrab(
		return xfs_dqrele_igrab(ip);
		case XFS_ICWALK_BLOCKGC:
		return xfs_blockgc_igrab(ip);
		case XFS_ICWALK_RECLAIM:
		return xfs_reclaim_igrab(ip);
		default:
		return false;
		}
		@@ -1720,6 +1634,7 @@ static inline int
		xfs_icwalk_process_inode(
		enum xfs_icwalk_goal goal,
		struct xfs_inode *ip,
		struct xfs_perag *pag,
		struct xfs_eofblocks *eofb)
		{
		int error = 0;
		@@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode(
		case XFS_ICWALK_BLOCKGC:
		error = xfs_blockgc_scan_inode(ip, eofb);
		break;
		case XFS_ICWALK_RECLAIM:
		xfs_reclaim_inode(ip, pag);
		break;
		}
		return error;
		}
		@@ -1755,6 +1673,9 @@ xfs_icwalk_ag(
		restart:
		done = false;
		skipped = 0;
		if (goal == XFS_ICWALK_RECLAIM)
		first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
		else
		first_index = 0;
		nr_found = 0;
		do {
		@@ -1776,6 +1697,7 @@ xfs_icwalk_ag(
		XFS_LOOKUP_BATCH, tag);

		if (!nr_found) {
		done = true;
		rcu_read_unlock();
		break;
		}
		@@ -1815,7 +1737,8 @@ xfs_icwalk_ag(
		for (i = 0; i < nr_found; i++) {
		if (!batch[i])
		continue;
		error = xfs_icwalk_process_inode(goal, batch[i], eofb);
		error = xfs_icwalk_process_inode(goal, batch[i], pag,
		eofb);
		if (error == -EAGAIN) {
		skipped++;
		continue;
		@@ -1830,8 +1753,19 @@ xfs_icwalk_ag(

		cond_resched();

		if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
		eofb->icw_scan_limit -= XFS_LOOKUP_BATCH;
		if (eofb->icw_scan_limit <= 0)
		break;
		}
		} while (nr_found && !done);

		if (goal == XFS_ICWALK_RECLAIM) {
		if (done)
		first_index = 0;
		WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
		}

		if (skipped) {
		delay(1);
		goto restart;

fs/xfs/xfs_icache.h

+1 −0

Original line number	Diff line number	Diff line
		@@ -15,6 +15,7 @@ struct xfs_eofblocks {
		kgid_t eof_gid;
		prid_t eof_prid;
		__u64 eof_min_file_size;
		int icw_scan_limit;
		};

		/*

fs/xfs/xfs_trace.h

+4 −1

Original line number	Diff line number	Diff line
		@@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
		__field(uint32_t, gid)
		__field(prid_t, prid)
		__field(__u64, min_file_size)
		__field(int, scan_limit)
		__field(unsigned long, caller_ip)
		),
		TP_fast_assign(
		@@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
		eofb->eof_gid) : 0;
		__entry->prid = eofb ? eofb->eof_prid : 0;
		__entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
		__entry->scan_limit = eofb ? eofb->icw_scan_limit : 0;
		__entry->caller_ip = caller_ip;
		),
		TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS",
		TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS",
		MAJOR(__entry->dev), MINOR(__entry->dev),
		__entry->flags,
		__entry->uid,
		__entry->gid,
		__entry->prid,
		__entry->min_file_size,
		__entry->scan_limit,
		(char *)__entry->caller_ip)
		);
		#define DEFINE_EOFBLOCKS_EVENT(name) \