Commit e85c81ba authored by Xin Yin's avatar Xin Yin Committed by Theodore Ts'o
Browse files

ext4: fast commit may not fallback for ineligible commit



For the follow scenario:
1. jbd start commit transaction n
2. task A get new handle for transaction n+1
3. task A do some ineligible actions and mark FC_INELIGIBLE
4. jbd complete transaction n and clean FC_INELIGIBLE
5. task A call fsync

In this case fast commit will not fallback to full commit and
transaction n+1 also not handled by jbd.

Make ext4_fc_mark_ineligible() also record transaction tid for
latest ineligible case, when call ext4_fc_cleanup() check
current transaction tid, if small than latest ineligible tid
do not clear the EXT4_MF_FC_INELIGIBLE.

Reported-by: default avatarkernel test robot <lkp@intel.com>
Reported-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
Reported-by: default avatarRitesh Harjani <riteshh@linux.ibm.com>
Suggested-by: default avatarHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Signed-off-by: default avatarXin Yin <yinxin.x@bytedance.com>
Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
parent 31a074a0
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -1749,6 +1749,7 @@ struct ext4_sb_info {
	spinlock_t s_fc_lock;
	struct buffer_head *s_fc_bh;
	struct ext4_fc_stats s_fc_stats;
	tid_t s_fc_ineligible_tid;
#ifdef CONFIG_EXT4_DEBUG
	int s_fc_debug_max_replay;
#endif
@@ -2925,7 +2926,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
			    struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode);
+2 −2
Original line number Diff line number Diff line
@@ -5336,7 +5336,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
		ret = PTR_ERR(handle);
		goto out_mmap;
	}
	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode, 0);
@@ -5476,7 +5476,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
		ret = PTR_ERR(handle);
		goto out_mmap;
	}
	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);

	/* Expand file to avoid data loss if there is error while shifting */
	inode->i_size += len;
+25 −8
Original line number Diff line number Diff line
@@ -300,18 +300,32 @@ void ext4_fc_del(struct inode *inode)
}

/*
 * Mark file system as fast commit ineligible. This means that next commit
 * operation would result in a full jbd2 commit.
 * Mark file system as fast commit ineligible, and record latest
 * ineligible transaction tid. This means until the recorded
 * transaction, commit operation would result in a full jbd2 commit.
 */
void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	tid_t tid;

	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
	    (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
		return;

	ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
	if (handle && !IS_ERR(handle))
		tid = handle->h_transaction->t_tid;
	else {
		read_lock(&sbi->s_journal->j_state_lock);
		tid = sbi->s_journal->j_running_transaction ?
				sbi->s_journal->j_running_transaction->t_tid : 0;
		read_unlock(&sbi->s_journal->j_state_lock);
	}
	spin_lock(&sbi->s_fc_lock);
	if (sbi->s_fc_ineligible_tid < tid)
		sbi->s_fc_ineligible_tid = tid;
	spin_unlock(&sbi->s_fc_lock);
	WARN_ON(reason >= EXT4_FC_REASON_MAX);
	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
}
@@ -387,7 +401,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
	mutex_unlock(&ei->i_fc_lock);
	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
	if (!node) {
		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
		mutex_lock(&ei->i_fc_lock);
		return -ENOMEM;
	}
@@ -400,7 +414,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
		if (!node->fcd_name.name) {
			kmem_cache_free(ext4_fc_dentry_cachep, node);
			ext4_fc_mark_ineligible(inode->i_sb,
				EXT4_FC_REASON_NOMEM);
				EXT4_FC_REASON_NOMEM, NULL);
			mutex_lock(&ei->i_fc_lock);
			return -ENOMEM;
		}
@@ -502,7 +516,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)

	if (ext4_should_journal_data(inode)) {
		ext4_fc_mark_ineligible(inode->i_sb,
					EXT4_FC_REASON_INODE_JOURNAL_DATA);
					EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
		return;
	}

@@ -1179,7 +1193,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
 * Fast commit cleanup routine. This is called after every fast commit and
 * full commit. full is true if we are called after a full commit.
 */
static void ext4_fc_cleanup(journal_t *journal, int full)
static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{
	struct super_block *sb = journal->j_private;
	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1227,7 +1241,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
				&sbi->s_fc_q[FC_Q_MAIN]);

	ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
	if (tid >= sbi->s_fc_ineligible_tid) {
		sbi->s_fc_ineligible_tid = 0;
		ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
	}

	if (full)
		sbi->s_fc_bytes = 0;
+2 −2
Original line number Diff line number Diff line
@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
	return;
no_delete:
	if (!list_empty(&EXT4_I(inode)->i_fc_list))
		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
	ext4_clear_inode(inode);	/* We must guarantee clearing of inode... */
}

@@ -5976,7 +5976,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
		return PTR_ERR(handle);

	ext4_fc_mark_ineligible(inode->i_sb,
		EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
		EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
	err = ext4_mark_inode_dirty(handle, inode);
	ext4_handle_sync(handle);
	ext4_journal_stop(handle);
+2 −2
Original line number Diff line number Diff line
@@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
		err = -EINVAL;
		goto err_out;
	}
	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
	ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);

	/* Protect extent tree against block allocations via delalloc */
	ext4_double_down_write_data_sem(inode, inode_bl);
@@ -1373,7 +1373,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)

		err = ext4_resize_fs(sb, n_blocks_count);
		if (EXT4_SB(sb)->s_journal) {
			ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
			ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
Loading