Merge tag 'f2fs-for-5.18' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs (ef510682) · Commits · EulixOS / Software / Kernel

Documentation/ABI/testing/sysfs-fs-f2fs

+47 −7

Original line number	Diff line number	Diff line
		@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy.
		0x04 F2FS_IPU_UTIL
		0x08 F2FS_IPU_SSR_UTIL
		0x10 F2FS_IPU_FSYNC
		0x20 F2FS_IPU_ASYNC,
		0x20 F2FS_IPU_ASYNC
		0x40 F2FS_IPU_NOCACHE
		0x80 F2FS_IPU_HONOR_OPU_WRITE
		==== =================

		Refer segment.h for details.
		@@ -98,6 +99,33 @@ Description: Controls the issue rate of discard commands that consist of small
		checkpoint is triggered, and issued during the checkpoint.
		By default, it is disabled with 0.

		What: /sys/fs/f2fs/<disk>/max_discard_request
		Date: December 2021
		Contact: "Konstantin Vyshetsky" <vkon@google.com>
		Description: Controls the number of discards a thread will issue at a time.
		Higher number will allow the discard thread to finish its work
		faster, at the cost of higher latency for incomming I/O.

		What: /sys/fs/f2fs/<disk>/min_discard_issue_time
		Date: December 2021
		Contact: "Konstantin Vyshetsky" <vkon@google.com>
		Description: Controls the interval the discard thread will wait between
		issuing discard requests when there are discards to be issued and
		no I/O aware interruptions occur.

		What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
		Date: December 2021
		Contact: "Konstantin Vyshetsky" <vkon@google.com>
		Description: Controls the interval the discard thread will wait between
		issuing discard requests when there are discards to be issued and
		an I/O aware interruption occurs.

		What: /sys/fs/f2fs/<disk>/max_discard_issue_time
		Date: December 2021
		Contact: "Konstantin Vyshetsky" <vkon@google.com>
		Description: Controls the interval the discard thread will wait when there are
		no discard operations to be issued.

		What: /sys/fs/f2fs/<disk>/discard_granularity
		Date: July 2017
		Contact: "Chao Yu" <yuchao0@huawei.com>
		@@ -269,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily
		What: /sys/fs/f2fs/<disk>/gc_urgent
		Date: August 2017
		Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
		Description: Do background GC aggressively when set. When gc_urgent = 1,
		background thread starts to do GC by given gc_urgent_sleep_time
		interval. When gc_urgent = 2, F2FS will lower the bar of
		checking idle in order to process outstanding discard commands
		and GC a little bit aggressively. It is set to 0 by default.
		Description: Do background GC aggressively when set. Set to 0 by default.
		gc urgent high(1): does GC forcibly in a period of given
		gc_urgent_sleep_time and ignores I/O idling check. uses greedy
		GC approach and turns SSR mode on.
		gc urgent low(2): lowers the bar of checking I/O idling in
		order to process outstanding discard commands and GC a
		little bit aggressively. uses cost benefit GC approach.
		gc urgent mid(3): does GC forcibly in a period of given
		gc_urgent_sleep_time and executes a mid level of I/O idling check.
		uses cost benefit GC approach.

		What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
		Date: August 2017
		@@ -430,6 +463,7 @@ Description: Show status of f2fs superblock in real time.
		0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
		0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
		0x2000 SBI_IS_RESIZEFS resizefs is in process
		0x4000 SBI_IS_FREEZING freefs is in process
		====== ===================== =================================

		What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
		@@ -503,7 +537,7 @@ Date: July 2021
		Contact: "Daeho Jeong" <daehojeong@google.com>
		Description: Show how many segments have been reclaimed by GC during a specific
		GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
		3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
		3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
		You can re-initialize this value to "0".

		What: /sys/fs/f2fs/<disk>/gc_segment_mode
		@@ -540,3 +574,9 @@ Contact: "Daeho Jeong" <daehojeong@google.com>
		Description: You can set the trial count limit for GC urgent high mode with this value.
		If GC thread gets to the limit, the mode will turn back to GC normal mode.
		By default, the value is zero, which means there is no limit like before.

		What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
		Date: January 2022
		Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
		Description: Controls max # of node block writes to be used for roll forward
		recovery. This can limit the roll forward recovery time.

fs/f2fs/Kconfig

+7 −0

Original line number	Diff line number	Diff line
		@@ -143,3 +143,10 @@ config F2FS_IOSTAT
		Support getting IO statistics through sysfs and printing out periodic
		IO statistics tracepoint events. You have to turn on "iostat_enable"
		sysfs node to enable this feature.

		config F2FS_UNFAIR_RWSEM
		bool "F2FS unfair rw_semaphore"
		depends on F2FS_FS && BLK_CGROUP
		help
		Use unfair rw_semaphore, if system configured IO priority by block
		cgroup.

fs/f2fs/acl.c

+12 −9

Original line number	Diff line number	Diff line
		@@ -204,7 +204,8 @@ struct posix_acl f2fs_get_acl(struct inode inode, int type, bool rcu)
		return __f2fs_get_acl(inode, type, NULL);
		}

		static int f2fs_acl_update_mode(struct inode inode, umode_t mode_p,
		static int f2fs_acl_update_mode(struct user_namespace *mnt_userns,
		struct inode inode, umode_t mode_p,
		struct posix_acl **acl)
		{
		umode_t mode = inode->i_mode;
		@@ -218,14 +219,15 @@ static int f2fs_acl_update_mode(struct inode inode, umode_t mode_p,
		return error;
		if (error == 0)
		*acl = NULL;
		if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
		!capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
		if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
		!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
		mode &= ~S_ISGID;
		*mode_p = mode;
		return 0;
		}

		static int __f2fs_set_acl(struct inode *inode, int type,
		static int __f2fs_set_acl(struct user_namespace *mnt_userns,
		struct inode *inode, int type,
		struct posix_acl acl, struct page ipage)
		{
		int name_index;
		@@ -238,7 +240,8 @@ static int __f2fs_set_acl(struct inode *inode, int type,
		case ACL_TYPE_ACCESS:
		name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
		if (acl && !ipage) {
		error = f2fs_acl_update_mode(inode, &mode, &acl);
		error = f2fs_acl_update_mode(mnt_userns, inode,
		&mode, &acl);
		if (error)
		return error;
		set_acl_inode(inode, mode);
		@@ -279,7 +282,7 @@ int f2fs_set_acl(struct user_namespace mnt_userns, struct inode inode,
		if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
		return -EIO;

		return __f2fs_set_acl(inode, type, acl, NULL);
		return __f2fs_set_acl(mnt_userns, inode, type, acl, NULL);
		}

		/*
		@@ -419,7 +422,7 @@ int f2fs_init_acl(struct inode inode, struct inode dir, struct page *ipage,
		f2fs_mark_inode_dirty_sync(inode, true);

		if (default_acl) {
		error = __f2fs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl,
		error = __f2fs_set_acl(NULL, inode, ACL_TYPE_DEFAULT, default_acl,
		ipage);
		posix_acl_release(default_acl);
		} else {
		@@ -427,7 +430,7 @@ int f2fs_init_acl(struct inode inode, struct inode dir, struct page *ipage,
		}
		if (acl) {
		if (!error)
		error = __f2fs_set_acl(inode, ACL_TYPE_ACCESS, acl,
		error = __f2fs_set_acl(NULL, inode, ACL_TYPE_ACCESS, acl,
		ipage);
		posix_acl_release(acl);
		} else {

fs/f2fs/checkpoint.c

+36 −22

Original line number	Diff line number	Diff line
		@@ -98,6 +98,13 @@ static struct page __get_meta_page(struct f2fs_sb_info sbi, pgoff_t index,
		}

		if (unlikely(!PageUptodate(page))) {
		if (page->index == sbi->metapage_eio_ofs &&
		sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) {
		set_ckpt_flags(sbi, CP_ERROR_FLAG);
		} else {
		sbi->metapage_eio_ofs = page->index;
		sbi->metapage_eio_cnt = 0;
		}
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
		}
		@@ -282,18 +289,22 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
		return blkno - start;
		}

		void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
		void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index,
		unsigned int ra_blocks)
		{
		struct page *page;
		bool readahead = false;

		if (ra_blocks == RECOVERY_MIN_RA_BLOCKS)
		return;

		page = find_get_page(META_MAPPING(sbi), index);
		if (!page \|\| !PageUptodate(page))
		readahead = true;
		f2fs_put_page(page, 0);

		if (readahead)
		f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true);
		f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true);
		}

		static int __f2fs_write_meta_page(struct page *page,
		@@ -351,13 +362,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
		goto skip_write;

		/* if locked failed, cp will flush dirty pages instead */
		if (!down_write_trylock(&sbi->cp_global_sem))
		if (!f2fs_down_write_trylock(&sbi->cp_global_sem))
		goto skip_write;

		trace_f2fs_writepages(mapping->host, wbc, META);
		diff = nr_pages_to_write(sbi, META, wbc);
		written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
		up_write(&sbi->cp_global_sem);
		f2fs_up_write(&sbi->cp_global_sem);
		wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
		return 0;

		@@ -864,6 +875,7 @@ static struct page validate_checkpoint(struct f2fs_sb_info sbi,
		struct page cp_page_1 = NULL, cp_page_2 = NULL;
		struct f2fs_checkpoint *cp_block = NULL;
		unsigned long long cur_version = 0, pre_version = 0;
		unsigned int cp_blocks;
		int err;

		err = get_checkpoint_version(sbi, cp_addr, &cp_block,
		@@ -871,15 +883,16 @@ static struct page validate_checkpoint(struct f2fs_sb_info sbi,
		if (err)
		return NULL;

		if (le32_to_cpu(cp_block->cp_pack_total_block_count) >
		sbi->blocks_per_seg) {
		cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count);

		if (cp_blocks > sbi->blocks_per_seg \|\| cp_blocks <= F2FS_CP_PACKS) {
		f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u",
		le32_to_cpu(cp_block->cp_pack_total_block_count));
		goto invalid_cp;
		}
		pre_version = *version;

		cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
		cp_addr += cp_blocks - 1;
		err = get_checkpoint_version(sbi, cp_addr, &cp_block,
		&cp_page_2, version);
		if (err)
		@@ -1159,7 +1172,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
		if (!is_journalled_quota(sbi))
		return false;

		if (!down_write_trylock(&sbi->quota_sem))
		if (!f2fs_down_write_trylock(&sbi->quota_sem))
		return true;
		if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
		ret = false;
		@@ -1171,7 +1184,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
		} else if (get_pages(sbi, F2FS_DIRTY_QDATA)) {
		ret = true;
		}
		up_write(&sbi->quota_sem);
		f2fs_up_write(&sbi->quota_sem);
		return ret;
		}

		@@ -1228,10 +1241,10 @@ static int block_operations(struct f2fs_sb_info *sbi)
		* POR: we should ensure that there are no dirty node pages
		* until finishing nat/sit flush. inode->i_blocks can be updated.
		*/
		down_write(&sbi->node_change);
		f2fs_down_write(&sbi->node_change);

		if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		up_write(&sbi->node_change);
		f2fs_up_write(&sbi->node_change);
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
		@@ -1241,15 +1254,15 @@ static int block_operations(struct f2fs_sb_info *sbi)
		}

		retry_flush_nodes:
		down_write(&sbi->node_write);
		f2fs_down_write(&sbi->node_write);

		if (get_pages(sbi, F2FS_DIRTY_NODES)) {
		up_write(&sbi->node_write);
		f2fs_up_write(&sbi->node_write);
		atomic_inc(&sbi->wb_sync_req[NODE]);
		err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
		atomic_dec(&sbi->wb_sync_req[NODE]);
		if (err) {
		up_write(&sbi->node_change);
		f2fs_up_write(&sbi->node_change);
		f2fs_unlock_all(sbi);
		return err;
		}
		@@ -1262,13 +1275,13 @@ static int block_operations(struct f2fs_sb_info *sbi)
		* dirty node blocks and some checkpoint values by block allocation.
		*/
		__prepare_cp_block(sbi);
		up_write(&sbi->node_change);
		f2fs_up_write(&sbi->node_change);
		return err;
		}

		static void unblock_operations(struct f2fs_sb_info *sbi)
		{
		up_write(&sbi->node_write);
		f2fs_up_write(&sbi->node_write);
		f2fs_unlock_all(sbi);
		}

		@@ -1543,6 +1556,7 @@ static int do_checkpoint(struct f2fs_sb_info sbi, struct cp_control cpc)
		/* update user_block_counts */
		sbi->last_valid_block_count = sbi->total_valid_block_count;
		percpu_counter_set(&sbi->alloc_valid_block_count, 0);
		percpu_counter_set(&sbi->rf_node_block_count, 0);

		/* Here, we have one bio having CP pack except cp pack 2 page */
		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
		@@ -1612,7 +1626,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info sbi, struct cp_control cpc)
		f2fs_warn(sbi, "Start checkpoint disabled!");
		}
		if (cpc->reason != CP_RESIZE)
		down_write(&sbi->cp_global_sem);
		f2fs_down_write(&sbi->cp_global_sem);

		if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
		((cpc->reason & CP_FASTBOOT) \|\| (cpc->reason & CP_SYNC) \|\|
		@@ -1693,7 +1707,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info sbi, struct cp_control cpc)
		trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
		out:
		if (cpc->reason != CP_RESIZE)
		up_write(&sbi->cp_global_sem);
		f2fs_up_write(&sbi->cp_global_sem);
		return err;
		}

		@@ -1741,9 +1755,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi)
		struct cp_control cpc = { .reason = CP_SYNC, };
		int err;

		down_write(&sbi->gc_lock);
		f2fs_down_write(&sbi->gc_lock);
		err = f2fs_write_checkpoint(sbi, &cpc);
		up_write(&sbi->gc_lock);
		f2fs_up_write(&sbi->gc_lock);

		return err;
		}
		@@ -1831,9 +1845,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi)
		if (!test_opt(sbi, MERGE_CHECKPOINT) \|\| cpc.reason != CP_SYNC) {
		int ret;

		down_write(&sbi->gc_lock);
		f2fs_down_write(&sbi->gc_lock);
		ret = f2fs_write_checkpoint(sbi, &cpc);
		up_write(&sbi->gc_lock);
		f2fs_up_write(&sbi->gc_lock);

		return ret;
		}

fs/f2fs/compress.c

+5 −6

Original line number	Diff line number	Diff line
		@@ -314,10 +314,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic)
		}

		if (ret != PAGE_SIZE << dic->log_cluster_size) {
		printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, "
		printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, "
		"expected:%lu\n", KERN_ERR,
		F2FS_I_SB(dic->inode)->sb->s_id,
		dic->rlen,
		F2FS_I_SB(dic->inode)->sb->s_id, ret,
		PAGE_SIZE << dic->log_cluster_size);
		return -EIO;
		}
		@@ -1267,7 +1266,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
		* checkpoint. This can only happen to quota writes which can cause
		* the below discard race condition.
		*/
		down_read(&sbi->node_write);
		f2fs_down_read(&sbi->node_write);
		} else if (!f2fs_trylock_op(sbi)) {
		goto out_free;
		}
		@@ -1384,7 +1383,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,

		f2fs_put_dnode(&dn);
		if (IS_NOQUOTA(inode))
		up_read(&sbi->node_write);
		f2fs_up_read(&sbi->node_write);
		else
		f2fs_unlock_op(sbi);

		@@ -1410,7 +1409,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
		f2fs_put_dnode(&dn);
		out_unlock_op:
		if (IS_NOQUOTA(inode))
		up_read(&sbi->node_write);
		f2fs_up_read(&sbi->node_write);
		else
		f2fs_unlock_op(sbi);
		out_free: