Commit 01d5d965 authored by Leah Rumancik's avatar Leah Rumancik Committed by Theodore Ts'o
Browse files

ext4: add discard/zeroout flags to journal flush



Add a flags argument to jbd2_journal_flush to enable discarding or
zero-filling the journal blocks while flushing the journal.

Signed-off-by: default avatarLeah Rumancik <leah.rumancik@gmail.com>
Link: https://lore.kernel.org/r/20210518151327.130198-1-leah.rumancik@gmail.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 8f6840c4
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -3223,7 +3223,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
		ext4_clear_inode_state(inode, EXT4_STATE_JDATA);
		journal = EXT4_JOURNAL(inode);
		jbd2_journal_lock_updates(journal);
		err = jbd2_journal_flush(journal);
		err = jbd2_journal_flush(journal, 0);
		jbd2_journal_unlock_updates(journal);

		if (err)
@@ -6005,7 +6005,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
	if (val)
		ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
	else {
		err = jbd2_journal_flush(journal);
		err = jbd2_journal_flush(journal, 0);
		if (err < 0) {
			jbd2_journal_unlock_updates(journal);
			percpu_up_write(&sbi->s_writepages_rwsem);
+3 −3
Original line number Diff line number Diff line
@@ -706,7 +706,7 @@ static long ext4_ioctl_group_add(struct file *file,
	err = ext4_group_add(sb, input);
	if (EXT4_SB(sb)->s_journal) {
		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
		err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
	}
	if (err == 0)
@@ -884,7 +884,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
		if (EXT4_SB(sb)->s_journal) {
			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
		}
		if (err == 0)
@@ -1027,7 +1027,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
		if (EXT4_SB(sb)->s_journal) {
			ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
			jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
			err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
			jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
		}
		if (err == 0)
+3 −3
Original line number Diff line number Diff line
@@ -5653,7 +5653,7 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
		return 0;
	}
	jbd2_journal_lock_updates(journal);
	err = jbd2_journal_flush(journal);
	err = jbd2_journal_flush(journal, 0);
	if (err < 0)
		goto out;

@@ -5795,7 +5795,7 @@ static int ext4_freeze(struct super_block *sb)
		 * Don't clear the needs_recovery flag if we failed to
		 * flush the journal.
		 */
		error = jbd2_journal_flush(journal);
		error = jbd2_journal_flush(journal, 0);
		if (error < 0)
			goto out;

@@ -6389,7 +6389,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
		 * otherwise be livelocked...
		 */
		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
		if (err)
			return err;
+116 −3
Original line number Diff line number Diff line
@@ -1686,6 +1686,110 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
	write_unlock(&journal->j_state_lock);
}

/**
 * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
 * @journal: The journal to erase.
 * @flags: A discard/zeroout request is sent for each physically contigous
 *	region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
 *	JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
 *	to perform.
 *
 * Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
 * will be explicitly written if no hardware offload is available, see
 * blkdev_issue_zeroout for more details.
 */
static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
{
	int err = 0;
	unsigned long block, log_offset; /* logical */
	unsigned long long phys_block, block_start, block_stop; /* physical */
	loff_t byte_start, byte_stop, byte_count;
	struct request_queue *q = bdev_get_queue(journal->j_dev);

	/* flags must be set to either discard or zeroout */
	if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
			((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
			(flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
		return -EINVAL;

	if (!q)
		return -ENXIO;

	if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) && !blk_queue_discard(q))
		return -EOPNOTSUPP;

	/*
	 * lookup block mapping and issue discard/zeroout for each
	 * contiguous region
	 */
	log_offset = be32_to_cpu(journal->j_superblock->s_first);
	block_start =  ~0ULL;
	for (block = log_offset; block < journal->j_total_len; block++) {
		err = jbd2_journal_bmap(journal, block, &phys_block);
		if (err) {
			pr_err("JBD2: bad block at offset %lu", block);
			return err;
		}

		if (block_start == ~0ULL) {
			block_start = phys_block;
			block_stop = block_start - 1;
		}

		/*
		 * last block not contiguous with current block,
		 * process last contiguous region and return to this block on
		 * next loop
		 */
		if (phys_block != block_stop + 1) {
			block--;
		} else {
			block_stop++;
			/*
			 * if this isn't the last block of journal,
			 * no need to process now because next block may also
			 * be part of this contiguous region
			 */
			if (block != journal->j_total_len - 1)
				continue;
		}

		/*
		 * end of contiguous region or this is last block of journal,
		 * take care of the region
		 */
		byte_start = block_start * journal->j_blocksize;
		byte_stop = block_stop * journal->j_blocksize;
		byte_count = (block_stop - block_start + 1) *
				journal->j_blocksize;

		truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
				byte_start, byte_stop);

		if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
			err = blkdev_issue_discard(journal->j_dev,
					byte_start >> SECTOR_SHIFT,
					byte_count >> SECTOR_SHIFT,
					GFP_NOFS, 0);
		} else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
			err = blkdev_issue_zeroout(journal->j_dev,
					byte_start >> SECTOR_SHIFT,
					byte_count >> SECTOR_SHIFT,
					GFP_NOFS, 0);
		}

		if (unlikely(err != 0)) {
			pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
					err, block_start, block_stop);
			return err;
		}

		/* reset start and stop after processing a region */
		block_start = ~0ULL;
	}

	return blkdev_issue_flush(journal->j_dev);
}

/**
 * jbd2_journal_update_sb_errno() - Update error in the journal.
@@ -2246,13 +2350,18 @@ EXPORT_SYMBOL(jbd2_journal_clear_features);
/**
 * jbd2_journal_flush() - Flush journal
 * @journal: Journal to act on.
 * @flags: optional operation on the journal blocks after the flush (see below)
 *
 * Flush all data for a given journal to disk and empty the journal.
 * Filesystems can use this when remounting readonly to ensure that
 * recovery does not need to happen on remount.
 * recovery does not need to happen on remount. Optionally, a discard or zeroout
 * can be issued on the journal blocks after flushing.
 *
 * flags:
 *	JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
 *	JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
 */

int jbd2_journal_flush(journal_t *journal)
int jbd2_journal_flush(journal_t *journal, unsigned int flags)
{
	int err = 0;
	transaction_t *transaction = NULL;
@@ -2306,6 +2415,10 @@ int jbd2_journal_flush(journal_t *journal)
	 * commits of data to the journal will restore the current
	 * s_start value. */
	jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);

	if (flags)
		err = __jbd2_journal_erase(journal, flags);

	mutex_unlock(&journal->j_checkpoint_mutex);
	write_lock(&journal->j_state_lock);
	J_ASSERT(!journal->j_running_transaction);
+1 −1
Original line number Diff line number Diff line
@@ -6018,7 +6018,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
	 * Then truncate log will be replayed resulting in cluster double free.
	 */
	jbd2_journal_lock_updates(journal->j_journal);
	status = jbd2_journal_flush(journal->j_journal);
	status = jbd2_journal_flush(journal->j_journal, 0);
	jbd2_journal_unlock_updates(journal->j_journal);
	if (status < 0) {
		mlog_errno(status);
Loading