Commit 5664896b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull f2fs updates from Jaegeuk Kim:
 "In this cycle, we've applied relatively small number of patches which
  fix subtle corner cases mainly, while introducing a new mount option
  to be able to fragment the disk intentionally for performance tests.

  Enhancements:

   - add a mount option to fragmente on-disk layout to understand the
     performance

   - support direct IO for multi-partitions

   - add a fault injection of dquot_initialize

  Bug fixes:

   - address some lockdep complaints

   - fix a deadlock issue with quota

   - fix a memory tuning condition

   - fix compression condition to improve the ratio

   - fix disabling compression on the non-empty compressed file

   - invalidate cached pages before IPU/DIO writes

  And, we've added some minor clean-ups as usual"

* tag 'f2fs-for-5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs:
  f2fs: fix UAF in f2fs_available_free_memory
  f2fs: invalidate META_MAPPING before IPU/DIO write
  f2fs: support fault injection for dquot_initialize()
  f2fs: fix incorrect return value in f2fs_sanity_check_ckpt()
  f2fs: compress: disallow disabling compress on non-empty compressed file
  f2fs: compress: fix overwrite may reduce compress ratio unproperly
  f2fs: multidevice: support direct IO
  f2fs: introduce fragment allocation mode mount option
  f2fs: replace snprintf in show functions with sysfs_emit
  f2fs: include non-compressed blocks in compr_written_block
  f2fs: fix wrong condition to trigger background checkpoint correctly
  f2fs: fix to use WHINT_MODE
  f2fs: fix up f2fs_lookup tracepoints
  f2fs: set SBI_NEED_FSCK flag when inconsistent node block found
  f2fs: introduce excess_dirty_threshold()
  f2fs: avoid attaching SB_ACTIVE flag during mount
  f2fs: quota: fix potential deadlock
  f2fs: should use GFP_NOFS for directory inodes
parents 0f7ddea6 5429c9db
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -512,3 +512,19 @@ Date: July 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	You can	control the multiplier value of	bdi device readahead window size
		between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.

What:		/sys/fs/f2fs/<disk>/max_fragment_chunk
Date:		August 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
		in the length of 1..<max_fragment_hole> by turns. This value can be set
		between 1..512 and the default value is 4.

What:		/sys/fs/f2fs/<disk>/max_fragment_hole
Date:		August 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
		in the length of 1..<max_fragment_hole> by turns. This value can be set
		between 1..512 and the default value is 4.
+19 −0
Original line number Diff line number Diff line
@@ -197,10 +197,29 @@ fault_type=%d Support configuring fault injection type, should be
			 FAULT_DISCARD		  0x000002000
			 FAULT_WRITE_IO		  0x000004000
			 FAULT_SLAB_ALLOC	  0x000008000
			 FAULT_DQUOT_INIT	  0x000010000
			 ===================	  ===========
mode=%s			 Control block allocation mode which supports "adaptive"
			 and "lfs". In "lfs" mode, there should be no random
			 writes towards main area.
			 "fragment:segment" and "fragment:block" are newly added here.
			 These are developer options for experiments to simulate filesystem
			 fragmentation/after-GC situation itself. The developers use these
			 modes to understand filesystem fragmentation/after-GC condition well,
			 and eventually get some insights to handle them better.
			 In "fragment:segment", f2fs allocates a new segment in ramdom
			 position. With this, we can simulate the after-GC condition.
			 In "fragment:block", we can scatter block allocation with
			 "max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
			 We added some randomness to both chunk and hole size to make
			 it close to realistic IO pattern. So, in this mode, f2fs will allocate
			 1..<max_fragment_chunk> blocks in a chunk and make a hole in the
			 length of 1..<max_fragment_hole> by turns. With this, the newly
			 allocated blocks will be scattered throughout the whole partition.
			 Note that "fragment:block" implicitly enables "fragment:segment"
			 option for more randomness.
			 Please, use these options for your experiments and we strongly
			 recommend to re-format the filesystem after using these options.
io_bits=%u		 Set the bit size of write IO requests. It should be set
			 with "mode=lfs".
usrquota		 Enable plain user disk quota accounting.
+3 −5
Original line number Diff line number Diff line
@@ -653,7 +653,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
		return PTR_ERR(inode);
	}

	err = dquot_initialize(inode);
	err = f2fs_dquot_initialize(inode);
	if (err) {
		iput(inode);
		goto err_out;
@@ -705,9 +705,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
	}

#ifdef CONFIG_QUOTA
	/* Needed for iput() to work correctly and not trash data */
	sbi->sb->s_flags |= SB_ACTIVE;

	/*
	 * Turn on quotas which were not enabled for read-only mounts if
	 * filesystem has quota feature, so that they are updated correctly.
@@ -1162,7 +1159,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
	if (!is_journalled_quota(sbi))
		return false;

	down_write(&sbi->quota_sem);
	if (!down_write_trylock(&sbi->quota_sem))
		return true;
	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
		ret = false;
	} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {
+20 −0
Original line number Diff line number Diff line
@@ -882,6 +882,25 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
	return is_page_in_cluster(cc, index);
}

bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
				int index, int nr_pages)
{
	unsigned long pgidx;
	int i;

	if (nr_pages - index < cc->cluster_size)
		return false;

	pgidx = pvec->pages[index]->index;

	for (i = 1; i < cc->cluster_size; i++) {
		if (pvec->pages[index + i]->index != pgidx + i)
			return false;
	}

	return true;
}

static bool cluster_has_invalid_data(struct compress_ctx *cc)
{
	loff_t i_size = i_size_read(cc->inode);
@@ -1531,6 +1550,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
	if (cluster_may_compress(cc)) {
		err = f2fs_compress_pages(cc);
		if (err == -EAGAIN) {
			add_compr_block_stat(cc->inode, cc->cluster_size);
			goto write;
		} else if (err) {
			f2fs_put_rpages_wbc(cc, wbc, true, 1);
+74 −21
Original line number Diff line number Diff line
@@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	struct extent_info ei = {0, };
	block_t blkaddr;
	unsigned int start_pgofs;
	int bidx = 0;

	if (!maxblocks)
		return 0;

	map->m_bdev = inode->i_sb->s_bdev;
	map->m_multidev_dio =
		f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);

	map->m_len = 0;
	map->m_flags = 0;

@@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
		if (flag == F2FS_GET_BLOCK_DIO)
			f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);

		if (map->m_multidev_dio) {
			block_t blk_addr = map->m_pblk;

			bidx = f2fs_target_device_index(sbi, map->m_pblk);

			map->m_bdev = FDEV(bidx).bdev;
			map->m_pblk -= FDEV(bidx).start_blk;
			map->m_len = min(map->m_len,
				FDEV(bidx).end_blk + 1 - map->m_pblk);

			if (map->m_may_create)
				f2fs_update_device_state(sbi, inode->i_ino,
							blk_addr, map->m_len);
		}
		goto out;
	}

@@ -1609,6 +1629,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

	if (map->m_multidev_dio)
		bidx = f2fs_target_device_index(sbi, blkaddr);

	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
@@ -1617,10 +1640,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,

		map->m_pblk = blkaddr;
		map->m_len = 1;

		if (map->m_multidev_dio)
			map->m_bdev = FDEV(bidx).bdev;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
			flag == F2FS_GET_BLOCK_PRE_DIO) {
		if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
			goto sync_out;
		ofs++;
		map->m_len++;
	} else {
@@ -1673,10 +1701,32 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,

sync_out:

	/* for hardware encryption, but to avoid potential issue in future */
	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
		/*
		 * for hardware encryption, but to avoid potential issue
		 * in future
		 */
		f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);
		invalidate_mapping_pages(META_MAPPING(sbi),
						map->m_pblk, map->m_pblk);

		if (map->m_multidev_dio) {
			block_t blk_addr = map->m_pblk;

			bidx = f2fs_target_device_index(sbi, map->m_pblk);

			map->m_bdev = FDEV(bidx).bdev;
			map->m_pblk -= FDEV(bidx).start_blk;

			if (map->m_may_create)
				f2fs_update_device_state(sbi, inode->i_ino,
							blk_addr, map->m_len);

			f2fs_bug_on(sbi, blk_addr + map->m_len >
						FDEV(bidx).end_blk + 1);
		}
	}

	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
@@ -1696,7 +1746,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
		f2fs_balance_fs(sbi, dn.node_changed);
	}
out:
	trace_f2fs_map_blocks(inode, map, err);
	trace_f2fs_map_blocks(inode, map, create, flag, err);
	return err;
}

@@ -1755,6 +1805,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
		bh->b_size = blks_to_bytes(inode, map.m_len);

		if (map.m_multidev_dio)
			bh->b_bdev = map.m_bdev;
	}
	return err;
}
@@ -2989,6 +3042,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
			need_readd = false;
#ifdef CONFIG_F2FS_FS_COMPRESSION
			if (f2fs_compressed_file(inode)) {
				void *fsdata = NULL;
				struct page *pagep;
				int ret2;

				ret = f2fs_init_compress_ctx(&cc);
				if (ret) {
					done = 1;
@@ -3007,10 +3064,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
				if (unlikely(f2fs_cp_error(sbi)))
					goto lock_page;

				if (f2fs_cluster_is_empty(&cc)) {
					void *fsdata = NULL;
					struct page *pagep;
					int ret2;
				if (!f2fs_cluster_is_empty(&cc))
					goto lock_page;

				ret2 = f2fs_prepare_compress_overwrite(
							inode, &pagep,
@@ -3020,15 +3075,13 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
					done = 1;
					break;
				} else if (ret2 &&
						!f2fs_compress_write_end(inode,
								fsdata, page->index,
								1)) {
					(!f2fs_compress_write_end(inode,
						fsdata, page->index, 1) ||
					 !f2fs_all_cluster_page_loaded(&cc,
						&pvec, i, nr_pages))) {
					retry = 1;
					break;
				}
				} else {
					goto lock_page;
				}
			}
#endif
			/* give a priority to WB_SYNC threads */
Loading