Commit 73a3fcda authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "In this cycle, we've mainly investigated the zoned block device
  support along with patches such as correcting write pointers between
  f2fs and storage, adding asynchronous zone reset flow, and managing
  the number of open zones.

  Other than them, f2fs adds another mount option, "errors=x" to specify
  how to handle when it detects an unexpected behavior at runtime.

  Enhancements:
   - support 'errors=remount-ro|continue|panic' mount option
   - enforce some inode flag policies
   - allow .tmp compression given extensions
   - add some ioctls to manage the f2fs compression
   - improve looped node chain flow
   - avoid issuing small-sized discard commands during checkpoint
   - implement an asynchronous zone reset

  Bug fixes:
   - fix deadlock in xattr and inode page lock
   - fix and add sanity check in some error paths
   - fix to avoid NULL pointer dereference f2fs_write_end_io() along
     with put_super
   - set proper flags to quota files
   - fix potential deadlock due to unpaired node_write lock use
   - fix over-estimating free section during FG GC
   - fix the wrong condition to determine atomic context

  As usual, also there are a number of patches with code refactoring and
  minor clean-ups"

* tag 'f2fs-for-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (46 commits)
  f2fs: fix to do sanity check on direct node in truncate_dnode()
  f2fs: only set release for file that has compressed data
  f2fs: fix compile warning in f2fs_destroy_node_manager()
  f2fs: fix error path handling in truncate_dnode()
  f2fs: fix deadlock in i_xattr_sem and inode page lock
  f2fs: remove unneeded page uptodate check/set
  f2fs: update mtime and ctime in move file range method
  f2fs: compress tmp files given extension
  f2fs: refactor struct f2fs_attr macro
  f2fs: convert to use sbi directly
  f2fs: remove redundant assignment to variable err
  f2fs: do not issue small discard commands during checkpoint
  f2fs: check zone write pointer points to the end of zone
  f2fs: add f2fs_ioc_get_compress_blocks
  f2fs: cleanup MIN_INLINE_XATTR_SIZE
  f2fs: add helper to check compression level
  f2fs: set FMODE_CAN_ODIRECT instead of a dummy direct_IO method
  f2fs: do more sanity check on inode
  f2fs: compress: fix to check validity of i_compress_flag field
  f2fs: add sanity compress level check for compressed file
  ...
parents bb8e7e9f a6ec8378
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -351,6 +351,22 @@ age_extent_cache Enable an age extent cache based on rb-tree. It records
			 data block update frequency of the extent per inode, in
			 order to provide better temperature hints for data block
			 allocation.
errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
			 "panic", "continue" and "remount-ro", respectively, trigger
			 panic immediately, continue without doing anything, and remount
			 the partition in read-only mode. By default it uses "continue"
			 mode.
			 ====================== =============== =============== ========
			 mode			continue	remount-ro	panic
			 ====================== =============== =============== ========
			 access ops		normal		noraml		N/A
			 syscall errors		-EIO		-EROFS		N/A
			 mount option		rw		ro		N/A
			 pending dir write	keep		keep		N/A
			 pending non-dir write	drop		keep		N/A
			 pending node write	drop		keep		N/A
			 pending meta write	keep		keep		N/A
			 ====================== =============== =============== ========
======================== ============================================================

Debugfs Entries
+2 −5
Original line number Diff line number Diff line
@@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
						unsigned char reason)
{
	f2fs_build_fault_attr(sbi, 0, 0);
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
	if (!end_io) {
	if (!end_io)
		f2fs_flush_merged_writes(sbi);

		f2fs_handle_stop(sbi, reason);
	}
	f2fs_handle_critical_error(sbi, reason, end_io);
}

/*
+34 −7
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@ struct f2fs_compress_ops {
	int (*init_decompress_ctx)(struct decompress_io_ctx *dic);
	void (*destroy_decompress_ctx)(struct decompress_io_ctx *dic);
	int (*decompress_pages)(struct decompress_io_ctx *dic);
	bool (*is_level_valid)(int level);
};

static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index)
@@ -308,17 +309,25 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
	return 0;
}

static bool lz4_is_level_valid(int lvl)
{
#ifdef CONFIG_F2FS_FS_LZ4HC
	return !lvl || (lvl >= LZ4HC_MIN_CLEVEL && lvl <= LZ4HC_MAX_CLEVEL);
#else
	return lvl == 0;
#endif
}

static const struct f2fs_compress_ops f2fs_lz4_ops = {
	.init_compress_ctx	= lz4_init_compress_ctx,
	.destroy_compress_ctx	= lz4_destroy_compress_ctx,
	.compress_pages		= lz4_compress_pages,
	.decompress_pages	= lz4_decompress_pages,
	.is_level_valid		= lz4_is_level_valid,
};
#endif

#ifdef CONFIG_F2FS_FS_ZSTD
#define F2FS_ZSTD_DEFAULT_CLEVEL	1

static int zstd_init_compress_ctx(struct compress_ctx *cc)
{
	zstd_parameters params;
@@ -327,6 +336,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
	unsigned int workspace_size;
	unsigned char level = F2FS_I(cc->inode)->i_compress_level;

	/* Need to remain this for backward compatibility */
	if (!level)
		level = F2FS_ZSTD_DEFAULT_CLEVEL;

@@ -477,6 +487,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic)
	return 0;
}

static bool zstd_is_level_valid(int lvl)
{
	return lvl >= zstd_min_clevel() && lvl <= zstd_max_clevel();
}

static const struct f2fs_compress_ops f2fs_zstd_ops = {
	.init_compress_ctx	= zstd_init_compress_ctx,
	.destroy_compress_ctx	= zstd_destroy_compress_ctx,
@@ -484,6 +499,7 @@ static const struct f2fs_compress_ops f2fs_zstd_ops = {
	.init_decompress_ctx	= zstd_init_decompress_ctx,
	.destroy_decompress_ctx	= zstd_destroy_decompress_ctx,
	.decompress_pages	= zstd_decompress_pages,
	.is_level_valid		= zstd_is_level_valid,
};
#endif

@@ -542,6 +558,16 @@ bool f2fs_is_compress_backend_ready(struct inode *inode)
	return f2fs_cops[F2FS_I(inode)->i_compress_algorithm];
}

bool f2fs_is_compress_level_valid(int alg, int lvl)
{
	const struct f2fs_compress_ops *cops = f2fs_cops[alg];

	if (cops->is_level_valid)
		return cops->is_level_valid(lvl);

	return lvl == 0;
}

static mempool_t *compress_page_pool;
static int num_compress_pages = 512;
module_param(num_compress_pages, uint, 0444);
@@ -743,8 +769,8 @@ void f2fs_decompress_cluster(struct decompress_io_ctx *dic, bool in_task)
		ret = -EFSCORRUPTED;

		/* Avoid f2fs_commit_super in irq context */
		if (in_task)
			f2fs_save_errors(sbi, ERROR_FAIL_DECOMPRESSION);
		if (!in_task)
			f2fs_handle_error_async(sbi, ERROR_FAIL_DECOMPRESSION);
		else
			f2fs_handle_error(sbi, ERROR_FAIL_DECOMPRESSION);
		goto out_release;
@@ -1215,6 +1241,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
	unsigned int last_index = cc->cluster_size - 1;
	loff_t psize;
	int i, err;
	bool quota_inode = IS_NOQUOTA(inode);

	/* we should bypass data pages to proceed the kworker jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
@@ -1222,7 +1249,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
		goto out_free;
	}

	if (IS_NOQUOTA(inode)) {
	if (quota_inode) {
		/*
		 * We need to wait for node_write to avoid block allocation during
		 * checkpoint. This can only happen to quota writes which can cause
@@ -1344,7 +1371,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);

	f2fs_put_dnode(&dn);
	if (IS_NOQUOTA(inode))
	if (quota_inode)
		f2fs_up_read(&sbi->node_write);
	else
		f2fs_unlock_op(sbi);
@@ -1370,7 +1397,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
out_put_dnode:
	f2fs_put_dnode(&dn);
out_unlock_op:
	if (IS_NOQUOTA(inode))
	if (quota_inode)
		f2fs_up_read(&sbi->node_write);
	else
		f2fs_unlock_op(sbi);
+66 −5
Original line number Diff line number Diff line
@@ -383,6 +383,17 @@ static void f2fs_write_end_io(struct bio *bio)
	bio_put(bio);
}

#ifdef CONFIG_BLK_DEV_ZONED
static void f2fs_zone_write_end_io(struct bio *bio)
{
	struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;

	bio->bi_private = io->bi_private;
	complete(&io->zone_wait);
	f2fs_write_end_io(bio);
}
#endif

struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
		block_t blk_addr, sector_t *sector)
{
@@ -639,6 +650,11 @@ int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
			INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
			INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
			init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
#ifdef CONFIG_BLK_DEV_ZONED
			init_completion(&sbi->write_io[i][j].zone_wait);
			sbi->write_io[i][j].zone_pending_bio = NULL;
			sbi->write_io[i][j].bi_private = NULL;
#endif
		}
	}

@@ -965,6 +981,26 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
	return 0;
}

#ifdef CONFIG_BLK_DEV_ZONED
static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int devi = 0;

	if (f2fs_is_multi_device(sbi)) {
		devi = f2fs_target_device_index(sbi, blkaddr);
		if (blkaddr < FDEV(devi).start_blk ||
		    blkaddr > FDEV(devi).end_blk) {
			f2fs_err(sbi, "Invalid block %x", blkaddr);
			return false;
		}
		blkaddr -= FDEV(devi).start_blk;
	}
	return bdev_zoned_model(FDEV(devi).bdev) == BLK_ZONED_HM &&
		f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
		(blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
}
#endif

void f2fs_submit_page_write(struct f2fs_io_info *fio)
{
	struct f2fs_sb_info *sbi = fio->sbi;
@@ -975,6 +1011,16 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
	f2fs_bug_on(sbi, is_read_io(fio->op));

	f2fs_down_write(&io->io_rwsem);

#ifdef CONFIG_BLK_DEV_ZONED
	if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
		wait_for_completion_io(&io->zone_wait);
		bio_put(io->zone_pending_bio);
		io->zone_pending_bio = NULL;
		io->bi_private = NULL;
	}
#endif

next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
@@ -1038,6 +1084,18 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio)
	if (fio->in_list)
		goto next;
out:
#ifdef CONFIG_BLK_DEV_ZONED
	if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
			is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
		bio_get(io->bio);
		reinit_completion(&io->zone_wait);
		io->bi_private = io->bio->bi_private;
		io->bio->bi_private = io;
		io->bio->bi_end_io = f2fs_zone_write_end_io;
		io->zone_pending_bio = io->bio;
		__submit_merged_bio(io);
	}
#endif
	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
				!f2fs_is_checkpoint_ready(sbi))
		__submit_merged_bio(io);
@@ -2173,7 +2231,6 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
	f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
							F2FS_BLKSIZE);
	*last_block_in_bio = block_nr;
	goto out;
out:
	*bio_ret = bio;
	return ret;
@@ -2775,6 +2832,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
	loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
	unsigned offset = 0;
	bool need_balance_fs = false;
	bool quota_inode = IS_NOQUOTA(inode);
	int err = 0;
	struct f2fs_io_info fio = {
		.sbi = sbi,
@@ -2807,6 +2865,10 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
		if (S_ISDIR(inode->i_mode) &&
				!is_sbi_flag_set(sbi, SBI_IS_CLOSE))
			goto redirty_out;

		/* keep data pages in remount-ro mode */
		if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
			goto redirty_out;
		goto out;
	}

@@ -2832,19 +2894,19 @@ int f2fs_write_single_data_page(struct page *page, int *submitted,
		goto out;

	/* Dentry/quota blocks are controlled by checkpoint */
	if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
	if (S_ISDIR(inode->i_mode) || quota_inode) {
		/*
		 * We need to wait for node_write to avoid block allocation during
		 * checkpoint. This can only happen to quota writes which can cause
		 * the below discard race condition.
		 */
		if (IS_NOQUOTA(inode))
		if (quota_inode)
			f2fs_down_read(&sbi->node_write);

		fio.need_lock = LOCK_DONE;
		err = f2fs_do_write_data_page(&fio);

		if (IS_NOQUOTA(inode))
		if (quota_inode)
			f2fs_up_read(&sbi->node_write);

		goto done;
@@ -4067,7 +4129,6 @@ const struct address_space_operations f2fs_dblock_aops = {
	.migrate_folio	= filemap_migrate_folio,
	.invalidate_folio = f2fs_invalidate_folio,
	.release_folio	= f2fs_release_folio,
	.direct_IO	= noop_direct_IO,
	.bmap		= f2fs_bmap,
	.swap_activate  = f2fs_swap_activate,
	.swap_deactivate = f2fs_swap_deactivate,
+8 −1
Original line number Diff line number Diff line
@@ -775,8 +775,15 @@ int f2fs_add_dentry(struct inode *dir, const struct f2fs_filename *fname,
{
	int err = -EAGAIN;

	if (f2fs_has_inline_dentry(dir))
	if (f2fs_has_inline_dentry(dir)) {
		/*
		 * Should get i_xattr_sem to keep the lock order:
		 * i_xattr_sem -> inode_page lock used by f2fs_setxattr.
		 */
		f2fs_down_read(&F2FS_I(dir)->i_xattr_sem);
		err = f2fs_add_inline_entry(dir, fname, inode, ino, mode);
		f2fs_up_read(&F2FS_I(dir)->i_xattr_sem);
	}
	if (err == -EAGAIN)
		err = f2fs_add_regular_entry(dir, fname, inode, ino, mode);

Loading