Commit cbfecb92 authored by Lukas Czerner's avatar Lukas Czerner Committed by Theodore Ts'o
Browse files

fs: record I_DIRTY_TIME even if inode already has I_DIRTY_INODE



Currently the I_DIRTY_TIME will never get set if the inode already has
I_DIRTY_INODE with assumption that it supersedes I_DIRTY_TIME.  That's
true, however ext4 will only update the on-disk inode in
->dirty_inode(), not on actual writeback. As a result if the inode
already has I_DIRTY_INODE state by the time we get to
__mark_inode_dirty() only with I_DIRTY_TIME, the time was already filled
into on-disk inode and will not get updated until the next I_DIRTY_INODE
update, which might never come if we crash or get a power failure.

The problem can be reproduced on ext4 by running xfstest generic/622
with -o iversion mount option.

Fix it by allowing I_DIRTY_TIME to be set even if the inode already has
I_DIRTY_INODE. Also make sure that the case is properly handled in
writeback_single_inode() as well. Additionally changes in
xfs_fs_dirty_inode() was made to accommodate for I_DIRTY_TIME in flag.

Thanks Jan Kara for suggestions on how to make this work properly.

Cc: Dave Chinner <david@fromorbit.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: stable@kernel.org
Signed-off-by: default avatarLukas Czerner <lczerner@redhat.com>
Suggested-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20220825100657.44217-1-lczerner@redhat.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 50f094a5
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -274,6 +274,9 @@ or bottom half).
	This is specifically for the inode itself being marked dirty,
	not its data.  If the update needs to be persisted by fdatasync(),
	then I_DIRTY_DATASYNC will be set in the flags argument.
	I_DIRTY_TIME will be set in the flags in case lazytime is enabled
	and struct inode has times updated since the last ->dirty_inode
	call.

``write_inode``
	this method is called when the VFS needs to write an inode to
+25 −12
Original line number Diff line number Diff line
@@ -1718,9 +1718,14 @@ static int writeback_single_inode(struct inode *inode,
	 */
	if (!(inode->i_state & I_DIRTY_ALL))
		inode_cgwb_move_to_attached(inode, wb);
	else if (!(inode->i_state & I_SYNC_QUEUED) &&
		 (inode->i_state & I_DIRTY))
	else if (!(inode->i_state & I_SYNC_QUEUED)) {
		if ((inode->i_state & I_DIRTY))
			redirty_tail_locked(inode, wb);
		else if (inode->i_state & I_DIRTY_TIME) {
			inode->dirtied_when = jiffies;
			inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
		}
	}

	spin_unlock(&wb->list_lock);
	inode_sync_complete(inode);
@@ -2369,6 +2374,20 @@ void __mark_inode_dirty(struct inode *inode, int flags)
	trace_writeback_mark_inode_dirty(inode, flags);

	if (flags & I_DIRTY_INODE) {
		/*
		 * Inode timestamp update will piggback on this dirtying.
		 * We tell ->dirty_inode callback that timestamps need to
		 * be updated by setting I_DIRTY_TIME in flags.
		 */
		if (inode->i_state & I_DIRTY_TIME) {
			spin_lock(&inode->i_lock);
			if (inode->i_state & I_DIRTY_TIME) {
				inode->i_state &= ~I_DIRTY_TIME;
				flags |= I_DIRTY_TIME;
			}
			spin_unlock(&inode->i_lock);
		}

		/*
		 * Notify the filesystem about the inode being dirtied, so that
		 * (if needed) it can update on-disk fields and journal the
@@ -2378,7 +2397,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
		 */
		trace_writeback_dirty_inode_start(inode, flags);
		if (sb->s_op->dirty_inode)
			sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
			sb->s_op->dirty_inode(inode,
				flags & (I_DIRTY_INODE | I_DIRTY_TIME));
		trace_writeback_dirty_inode(inode, flags);

		/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
@@ -2399,21 +2419,15 @@ void __mark_inode_dirty(struct inode *inode, int flags)
	 */
	smp_mb();

	if (((inode->i_state & flags) == flags) ||
	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
	if ((inode->i_state & flags) == flags)
		return;

	spin_lock(&inode->i_lock);
	if (dirtytime && (inode->i_state & I_DIRTY_INODE))
		goto out_unlock_inode;
	if ((inode->i_state & flags) != flags) {
		const int was_dirty = inode->i_state & I_DIRTY;

		inode_attach_wb(inode, NULL);

		/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
		if (flags & I_DIRTY_INODE)
			inode->i_state &= ~I_DIRTY_TIME;
		inode->i_state |= flags;

		/*
@@ -2486,7 +2500,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
out_unlock:
	if (wb)
		spin_unlock(&wb->list_lock);
out_unlock_inode:
	spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(__mark_inode_dirty);
+8 −2
Original line number Diff line number Diff line
@@ -653,7 +653,7 @@ xfs_fs_destroy_inode(
static void
xfs_fs_dirty_inode(
	struct inode			*inode,
	int				flag)
	int				flags)
{
	struct xfs_inode		*ip = XFS_I(inode);
	struct xfs_mount		*mp = ip->i_mount;
@@ -661,7 +661,13 @@ xfs_fs_dirty_inode(

	if (!(inode->i_sb->s_flags & SB_LAZYTIME))
		return;
	if (flag != I_DIRTY_SYNC || !(inode->i_state & I_DIRTY_TIME))

	/*
	 * Only do the timestamp update if the inode is dirty (I_DIRTY_SYNC)
	 * and has dirty timestamp (I_DIRTY_TIME). I_DIRTY_TIME can be passed
	 * in flags possibly together with I_DIRTY_SYNC.
	 */
	if ((flags & ~I_DIRTY_TIME) != I_DIRTY_SYNC || !(flags & I_DIRTY_TIME))
		return;

	if (xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp))
+5 −4
Original line number Diff line number Diff line
@@ -2371,13 +2371,14 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
 *			don't have to write inode on fdatasync() when only
 *			e.g. the timestamps have changed.
 * I_DIRTY_PAGES	Inode has dirty pages.  Inode itself may be clean.
 * I_DIRTY_TIME		The inode itself only has dirty timestamps, and the
 * I_DIRTY_TIME		The inode itself has dirty timestamps, and the
 *			lazytime mount option is enabled.  We keep track of this
 *			separately from I_DIRTY_SYNC in order to implement
 *			lazytime.  This gets cleared if I_DIRTY_INODE
 *			(I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set.  I.e.
 *			either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
 *			i_state, but not both.  I_DIRTY_PAGES may still be set.
 *			(I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. But
 *			I_DIRTY_TIME can still be set if I_DIRTY_SYNC is already
 *			in place because writeback might already be in progress
 *			and we don't want to lose the time update
 * I_NEW		Serves as both a mutex and completion notification.
 *			New inodes set I_NEW.  If two processes both create
 *			the same inode, one of them will release its inode and