Commit 6691d940 authored by Daeho Jeong's avatar Daeho Jeong Committed by Jaegeuk Kim
Browse files

f2fs: introduce fragment allocation mode mount option



Added two options into "mode=" mount option to make it possible for
developers to simulate filesystem fragmentation/after-GC situation
itself. The developers use these modes to understand filesystem
fragmentation/after-GC condition well, and eventually get some
insights to handle them better.

"fragment:segment": f2fs allocates a new segment in ramdom position.
		With this, we can simulate the after-GC condition.
"fragment:block" : We can scatter block allocation with
		"max_fragment_chunk" and "max_fragment_hole" sysfs
		nodes. f2fs will allocate 1..<max_fragment_chunk>
		blocks in a chunk and make a hole in the length of
		1..<max_fragment_hole> by turns	in a newly allocated
		free segment. Plus, this mode implicitly enables
		"fragment:segment" option for more randomness.

Reviewed-by: default avatarChao Yu <chao@kernel.org>
Signed-off-by: default avatarDaeho Jeong <daehojeong@google.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 84eab2a8
Loading
Loading
Loading
Loading
+16 −0
Original line number Diff line number Diff line
@@ -512,3 +512,19 @@ Date: July 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	You can	control the multiplier value of	bdi device readahead window size
		between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.

What:		/sys/fs/f2fs/<disk>/max_fragment_chunk
Date:		August 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
		in the length of 1..<max_fragment_hole> by turns. This value can be set
		between 1..512 and the default value is 4.

What:		/sys/fs/f2fs/<disk>/max_fragment_hole
Date:		August 2021
Contact:	"Daeho Jeong" <daehojeong@google.com>
Description:	With "mode=fragment:block" mount options, we can scatter block allocation.
		f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
		in the length of 1..<max_fragment_hole> by turns. This value can be set
		between 1..512 and the default value is 4.
+18 −0
Original line number Diff line number Diff line
@@ -201,6 +201,24 @@ fault_type=%d Support configuring fault injection type, should be
mode=%s			 Control block allocation mode which supports "adaptive"
			 and "lfs". In "lfs" mode, there should be no random
			 writes towards main area.
			 "fragment:segment" and "fragment:block" are newly added here.
			 These are developer options for experiments to simulate filesystem
			 fragmentation/after-GC situation itself. The developers use these
			 modes to understand filesystem fragmentation/after-GC condition well,
			 and eventually get some insights to handle them better.
			 In "fragment:segment", f2fs allocates a new segment in ramdom
			 position. With this, we can simulate the after-GC condition.
			 In "fragment:block", we can scatter block allocation with
			 "max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
			 We added some randomness to both chunk and hole size to make
			 it close to realistic IO pattern. So, in this mode, f2fs will allocate
			 1..<max_fragment_chunk> blocks in a chunk and make a hole in the
			 length of 1..<max_fragment_hole> by turns. With this, the newly
			 allocated blocks will be scattered throughout the whole partition.
			 Note that "fragment:block" implicitly enables "fragment:segment"
			 option for more randomness.
			 Please, use these options for your experiments and we strongly
			 recommend to re-format the filesystem after using these options.
io_bits=%u		 Set the bit size of write IO requests. It should be set
			 with "mode=lfs".
usrquota		 Enable plain user disk quota accounting.
+17 −2
Original line number Diff line number Diff line
@@ -1289,6 +1289,8 @@ enum {
enum {
	FS_MODE_ADAPTIVE,		/* use both lfs/ssr allocation */
	FS_MODE_LFS,			/* use lfs allocation only */
	FS_MODE_FRAGMENT_SEG,		/* segment fragmentation mode */
	FS_MODE_FRAGMENT_BLK,		/* block fragmentation mode */
};

enum {
@@ -1759,6 +1761,9 @@ struct f2fs_sb_info {

	unsigned long seq_file_ra_mul;		/* multiplier for ra_pages of seq. files in fadvise */

	int max_fragment_chunk;			/* max chunk size for block fragmentation mode */
	int max_fragment_hole;			/* max hole size for block fragmentation mode */

#ifdef CONFIG_F2FS_FS_COMPRESSION
	struct kmem_cache *page_array_slab;	/* page array entry */
	unsigned int page_array_slab_size;	/* default page array slab size */
@@ -3519,6 +3524,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
			unsigned int segno);

#define DEF_FRAGMENT_SIZE	4
#define MIN_FRAGMENT_SIZE	1
#define MAX_FRAGMENT_SIZE	512

static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi)
{
	return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG ||
		F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK;
}

/*
 * checkpoint.c
 */
+4 −1
Original line number Diff line number Diff line
@@ -14,6 +14,7 @@
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>

#include "f2fs.h"
#include "node.h"
@@ -257,7 +258,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
		p->max_search = sbi->max_victim_search;

	/* let's select beginning hot/small space first in no_heap mode*/
	if (test_opt(sbi, NOHEAP) &&
	if (f2fs_need_rand_seg(sbi))
		p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);
	else if (test_opt(sbi, NOHEAP) &&
		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
		p->offset = 0;
	else
+18 −2
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include <linux/timer.h>
#include <linux/freezer.h>
#include <linux/sched/signal.h>
#include <linux/random.h>

#include "f2fs.h"
#include "segment.h"
@@ -2649,6 +2650,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
	unsigned short seg_type = curseg->seg_type;

	sanity_check_seg_type(sbi, seg_type);
	if (f2fs_need_rand_seg(sbi))
		return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec);

	/* if segs_per_sec is large than 1, we need to keep original policy. */
	if (__is_large_section(sbi))
@@ -2700,6 +2703,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 1);
	curseg->alloc_type = LFS;
	if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK)
		curseg->fragment_remained_chunk =
				prandom_u32() % sbi->max_fragment_chunk + 1;
}

static int __next_free_blkoff(struct f2fs_sb_info *sbi,
@@ -2726,12 +2732,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi,
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
				struct curseg_info *seg)
{
	if (seg->alloc_type == SSR)
	if (seg->alloc_type == SSR) {
		seg->next_blkoff =
			__next_free_blkoff(sbi, seg->segno,
						seg->next_blkoff + 1);
	else
	} else {
		seg->next_blkoff++;
		if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) {
			/* To allocate block chunks in different sizes, use random number */
			if (--seg->fragment_remained_chunk <= 0) {
				seg->fragment_remained_chunk =
				   prandom_u32() % sbi->max_fragment_chunk + 1;
				seg->next_blkoff +=
				   prandom_u32() % sbi->max_fragment_hole + 1;
			}
		}
	}
}

bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
Loading