Loading fs/xfs/xfs_icache.c +48 −114 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ enum xfs_icwalk_goal { /* Goals directly associated with tagged inodes. */ XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG, }; #define XFS_ICWALK_NULL_TAG (-1U) Loading @@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, #define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30) #define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29) /* Stop scanning after icw_scan_limit inodes. */ #define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28) #define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \ XFS_ICWALK_FLAG_DROP_GDQUOT | \ XFS_ICWALK_FLAG_DROP_PDQUOT) XFS_ICWALK_FLAG_DROP_PDQUOT | \ XFS_ICWALK_FLAG_SCAN_LIMIT) /* * Allocate and initialise an xfs_inode. Loading Loading @@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated( return 0; } /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between * lookup reduction and stack usage. This is in the reclaim path, so we can't * be too greedy. * * XXX: This will be moved closer to xfs_icwalk* once we get rid of the * separate reclaim walk functions. */ #define XFS_LOOKUP_BATCH 32 #ifdef CONFIG_XFS_QUOTA /* Decide if we want to grab this inode to drop its dquots. */ static bool Loading Loading @@ -880,7 +874,7 @@ xfs_dqrele_all_inodes( * Return true if we grabbed it, false otherwise. */ static bool xfs_reclaim_inode_grab( xfs_reclaim_igrab( struct xfs_inode *ip) { ASSERT(rcu_read_lock_held()); Loading Loading @@ -990,108 +984,13 @@ xfs_reclaim_inode( xfs_iflags_clear(ip, XFS_IRECLAIM); } /* * Walk the AGs and reclaim the inodes in them. Even if the filesystem is * corrupted, we still want to try to reclaim all the inodes. If we don't, * then a shut down during filesystem unmount reclaim walk leak all the * unreclaimed inodes. * * Returns non-zero if any AGs or inodes were skipped in the reclaim pass * so that callers that want to block until all dirty inodes are written back * and reclaimed can sanely loop. */ static void xfs_reclaim_inodes_ag( struct xfs_mount *mp, int *nr_to_scan) { struct xfs_perag *pag; xfs_agnumber_t ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { unsigned long first_index = 0; int done = 0; int nr_found = 0; ag = pag->pag_agno + 1; first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); do { struct xfs_inode *batch[XFS_LOOKUP_BATCH]; int i; rcu_read_lock(); nr_found = radix_tree_gang_lookup_tag( &pag->pag_ici_root, (void **)batch, first_index, XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { done = 1; rcu_read_unlock(); break; } /* * Grab the inodes before we drop the lock. if we found * nothing, nr == 0 and the loop will be skipped. */ for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; if (done || !xfs_reclaim_inode_grab(ip)) batch[i] = NULL; /* * Update the index for the next lookup. Catch * overflows into the next AG range which can * occur if we have inodes in the last block of * the AG and we are currently pointing to the * last inode. * * Because we may see inodes that are from the * wrong AG due to RCU freeing and * reallocation, only update the index if it * lies in this AG. It was a race that lead us * to see this inode, so another lookup from * the same index will not find it again. */ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) continue; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) done = 1; } /* unlock now we've grabbed the inodes. */ rcu_read_unlock(); for (i = 0; i < nr_found; i++) { if (batch[i]) xfs_reclaim_inode(batch[i], pag); } *nr_to_scan -= XFS_LOOKUP_BATCH; cond_resched(); } while (nr_found && !done && *nr_to_scan > 0); if (done) first_index = 0; WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); xfs_perag_put(pag); } } void xfs_reclaim_inodes( struct xfs_mount *mp) { int nr_to_scan = INT_MAX; while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL); } } Loading @@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr( struct xfs_mount *mp, int nr_to_scan) { struct xfs_eofblocks eofb = { .eof_flags = XFS_ICWALK_FLAG_SCAN_LIMIT, .icw_scan_limit = nr_to_scan, }; /* kick background reclaimer and push the AIL */ xfs_reclaim_work_queue(mp); xfs_ail_push_all(mp->m_ail); xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb); return 0; } Loading Loading @@ -1221,9 +1125,8 @@ xfs_reclaim_worker( { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_reclaim_work); int nr_to_scan = INT_MAX; xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL); xfs_reclaim_work_queue(mp); } Loading Loading @@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota( /* XFS Inode Cache Walking Code */ /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between * lookup reduction and stack usage. This is in the reclaim path, so we can't * be too greedy. */ #define XFS_LOOKUP_BATCH 32 /* * Decide if we want to grab this inode in anticipation of doing work towards * the goal. Loading @@ -1707,6 +1619,8 @@ xfs_icwalk_igrab( return xfs_dqrele_igrab(ip); case XFS_ICWALK_BLOCKGC: return xfs_blockgc_igrab(ip); case XFS_ICWALK_RECLAIM: return xfs_reclaim_igrab(ip); default: return false; } Loading @@ -1720,6 +1634,7 @@ static inline int xfs_icwalk_process_inode( enum xfs_icwalk_goal goal, struct xfs_inode *ip, struct xfs_perag *pag, struct xfs_eofblocks *eofb) { int error = 0; Loading @@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode( case XFS_ICWALK_BLOCKGC: error = xfs_blockgc_scan_inode(ip, eofb); break; case XFS_ICWALK_RECLAIM: xfs_reclaim_inode(ip, pag); break; } return error; } Loading @@ -1755,6 +1673,9 @@ xfs_icwalk_ag( restart: done = false; skipped = 0; if (goal == XFS_ICWALK_RECLAIM) first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); else first_index = 0; nr_found = 0; do { Loading @@ -1776,6 +1697,7 @@ xfs_icwalk_ag( XFS_LOOKUP_BATCH, tag); if (!nr_found) { done = true; rcu_read_unlock(); break; } Loading Loading @@ -1815,7 +1737,8 @@ xfs_icwalk_ag( for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; error = xfs_icwalk_process_inode(goal, batch[i], eofb); error = xfs_icwalk_process_inode(goal, batch[i], pag, eofb); if (error == -EAGAIN) { skipped++; continue; Loading @@ -1830,8 +1753,19 @@ xfs_icwalk_ag( cond_resched(); if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) { eofb->icw_scan_limit -= XFS_LOOKUP_BATCH; if (eofb->icw_scan_limit <= 0) break; } } while (nr_found && !done); if (goal == XFS_ICWALK_RECLAIM) { if (done) first_index = 0; WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); } if (skipped) { delay(1); goto restart; Loading fs/xfs/xfs_icache.h +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; int icw_scan_limit; }; /* Loading fs/xfs/xfs_trace.h +4 −1 Original line number Diff line number Diff line Loading @@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class, __field(uint32_t, gid) __field(prid_t, prid) __field(__u64, min_file_size) __field(int, scan_limit) __field(unsigned long, caller_ip) ), TP_fast_assign( Loading @@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class, eofb->eof_gid) : 0; __entry->prid = eofb ? eofb->eof_prid : 0; __entry->min_file_size = eofb ? eofb->eof_min_file_size : 0; __entry->scan_limit = eofb ? eofb->icw_scan_limit : 0; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS", TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags, __entry->uid, __entry->gid, __entry->prid, __entry->min_file_size, __entry->scan_limit, (char *)__entry->caller_ip) ); #define DEFINE_EOFBLOCKS_EVENT(name) \ Loading Loading
fs/xfs/xfs_icache.c +48 −114 Original line number Diff line number Diff line Loading @@ -43,6 +43,7 @@ enum xfs_icwalk_goal { /* Goals directly associated with tagged inodes. */ XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG, }; #define XFS_ICWALK_NULL_TAG (-1U) Loading @@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, #define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30) #define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29) /* Stop scanning after icw_scan_limit inodes. */ #define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28) #define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \ XFS_ICWALK_FLAG_DROP_GDQUOT | \ XFS_ICWALK_FLAG_DROP_PDQUOT) XFS_ICWALK_FLAG_DROP_PDQUOT | \ XFS_ICWALK_FLAG_SCAN_LIMIT) /* * Allocate and initialise an xfs_inode. Loading Loading @@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated( return 0; } /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between * lookup reduction and stack usage. This is in the reclaim path, so we can't * be too greedy. * * XXX: This will be moved closer to xfs_icwalk* once we get rid of the * separate reclaim walk functions. */ #define XFS_LOOKUP_BATCH 32 #ifdef CONFIG_XFS_QUOTA /* Decide if we want to grab this inode to drop its dquots. */ static bool Loading Loading @@ -880,7 +874,7 @@ xfs_dqrele_all_inodes( * Return true if we grabbed it, false otherwise. */ static bool xfs_reclaim_inode_grab( xfs_reclaim_igrab( struct xfs_inode *ip) { ASSERT(rcu_read_lock_held()); Loading Loading @@ -990,108 +984,13 @@ xfs_reclaim_inode( xfs_iflags_clear(ip, XFS_IRECLAIM); } /* * Walk the AGs and reclaim the inodes in them. Even if the filesystem is * corrupted, we still want to try to reclaim all the inodes. If we don't, * then a shut down during filesystem unmount reclaim walk leak all the * unreclaimed inodes. * * Returns non-zero if any AGs or inodes were skipped in the reclaim pass * so that callers that want to block until all dirty inodes are written back * and reclaimed can sanely loop. */ static void xfs_reclaim_inodes_ag( struct xfs_mount *mp, int *nr_to_scan) { struct xfs_perag *pag; xfs_agnumber_t ag = 0; while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { unsigned long first_index = 0; int done = 0; int nr_found = 0; ag = pag->pag_agno + 1; first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); do { struct xfs_inode *batch[XFS_LOOKUP_BATCH]; int i; rcu_read_lock(); nr_found = radix_tree_gang_lookup_tag( &pag->pag_ici_root, (void **)batch, first_index, XFS_LOOKUP_BATCH, XFS_ICI_RECLAIM_TAG); if (!nr_found) { done = 1; rcu_read_unlock(); break; } /* * Grab the inodes before we drop the lock. if we found * nothing, nr == 0 and the loop will be skipped. */ for (i = 0; i < nr_found; i++) { struct xfs_inode *ip = batch[i]; if (done || !xfs_reclaim_inode_grab(ip)) batch[i] = NULL; /* * Update the index for the next lookup. Catch * overflows into the next AG range which can * occur if we have inodes in the last block of * the AG and we are currently pointing to the * last inode. * * Because we may see inodes that are from the * wrong AG due to RCU freeing and * reallocation, only update the index if it * lies in this AG. It was a race that lead us * to see this inode, so another lookup from * the same index will not find it again. */ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) continue; first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) done = 1; } /* unlock now we've grabbed the inodes. */ rcu_read_unlock(); for (i = 0; i < nr_found; i++) { if (batch[i]) xfs_reclaim_inode(batch[i], pag); } *nr_to_scan -= XFS_LOOKUP_BATCH; cond_resched(); } while (nr_found && !done && *nr_to_scan > 0); if (done) first_index = 0; WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); xfs_perag_put(pag); } } void xfs_reclaim_inodes( struct xfs_mount *mp) { int nr_to_scan = INT_MAX; while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { xfs_ail_push_all_sync(mp->m_ail); xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL); } } Loading @@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr( struct xfs_mount *mp, int nr_to_scan) { struct xfs_eofblocks eofb = { .eof_flags = XFS_ICWALK_FLAG_SCAN_LIMIT, .icw_scan_limit = nr_to_scan, }; /* kick background reclaimer and push the AIL */ xfs_reclaim_work_queue(mp); xfs_ail_push_all(mp->m_ail); xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb); return 0; } Loading Loading @@ -1221,9 +1125,8 @@ xfs_reclaim_worker( { struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount, m_reclaim_work); int nr_to_scan = INT_MAX; xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL); xfs_reclaim_work_queue(mp); } Loading Loading @@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota( /* XFS Inode Cache Walking Code */ /* * The inode lookup is done in batches to keep the amount of lock traffic and * radix tree lookups to a minimum. The batch size is a trade off between * lookup reduction and stack usage. This is in the reclaim path, so we can't * be too greedy. */ #define XFS_LOOKUP_BATCH 32 /* * Decide if we want to grab this inode in anticipation of doing work towards * the goal. Loading @@ -1707,6 +1619,8 @@ xfs_icwalk_igrab( return xfs_dqrele_igrab(ip); case XFS_ICWALK_BLOCKGC: return xfs_blockgc_igrab(ip); case XFS_ICWALK_RECLAIM: return xfs_reclaim_igrab(ip); default: return false; } Loading @@ -1720,6 +1634,7 @@ static inline int xfs_icwalk_process_inode( enum xfs_icwalk_goal goal, struct xfs_inode *ip, struct xfs_perag *pag, struct xfs_eofblocks *eofb) { int error = 0; Loading @@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode( case XFS_ICWALK_BLOCKGC: error = xfs_blockgc_scan_inode(ip, eofb); break; case XFS_ICWALK_RECLAIM: xfs_reclaim_inode(ip, pag); break; } return error; } Loading @@ -1755,6 +1673,9 @@ xfs_icwalk_ag( restart: done = false; skipped = 0; if (goal == XFS_ICWALK_RECLAIM) first_index = READ_ONCE(pag->pag_ici_reclaim_cursor); else first_index = 0; nr_found = 0; do { Loading @@ -1776,6 +1697,7 @@ xfs_icwalk_ag( XFS_LOOKUP_BATCH, tag); if (!nr_found) { done = true; rcu_read_unlock(); break; } Loading Loading @@ -1815,7 +1737,8 @@ xfs_icwalk_ag( for (i = 0; i < nr_found; i++) { if (!batch[i]) continue; error = xfs_icwalk_process_inode(goal, batch[i], eofb); error = xfs_icwalk_process_inode(goal, batch[i], pag, eofb); if (error == -EAGAIN) { skipped++; continue; Loading @@ -1830,8 +1753,19 @@ xfs_icwalk_ag( cond_resched(); if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) { eofb->icw_scan_limit -= XFS_LOOKUP_BATCH; if (eofb->icw_scan_limit <= 0) break; } } while (nr_found && !done); if (goal == XFS_ICWALK_RECLAIM) { if (done) first_index = 0; WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index); } if (skipped) { delay(1); goto restart; Loading
fs/xfs/xfs_icache.h +1 −0 Original line number Diff line number Diff line Loading @@ -15,6 +15,7 @@ struct xfs_eofblocks { kgid_t eof_gid; prid_t eof_prid; __u64 eof_min_file_size; int icw_scan_limit; }; /* Loading
fs/xfs/xfs_trace.h +4 −1 Original line number Diff line number Diff line Loading @@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class, __field(uint32_t, gid) __field(prid_t, prid) __field(__u64, min_file_size) __field(int, scan_limit) __field(unsigned long, caller_ip) ), TP_fast_assign( Loading @@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class, eofb->eof_gid) : 0; __entry->prid = eofb ? eofb->eof_prid : 0; __entry->min_file_size = eofb ? eofb->eof_min_file_size : 0; __entry->scan_limit = eofb ? eofb->icw_scan_limit : 0; __entry->caller_ip = caller_ip; ), TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS", TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->flags, __entry->uid, __entry->gid, __entry->prid, __entry->min_file_size, __entry->scan_limit, (char *)__entry->caller_ip) ); #define DEFINE_EOFBLOCKS_EVENT(name) \ Loading