Commit 7041d5d2 authored by Chao Yu's avatar Chao Yu Committed by Jaegeuk Kim
Browse files

f2fs: combine nat_bits and free_nid_bitmap cache



Both nat_bits cache and free_nid_bitmap cache provide same functionality
as a intermediate cache between free nid cache and disk, but with
different granularity of indicating free nid range, and different
persistence policy. nat_bits cache provides better persistence ability,
and free_nid_bitmap provides better granularity.

In this patch we combine advantage of both caches, so finally policy of
the intermediate cache would be:
- init: load free nid status from nat_bits into free_nid_bitmap
- lookup: scan free_nid_bitmap before load NAT blocks
- update: update free_nid_bitmap in real-time
- persistence: udpate and persist nat_bits in checkpoint

This patch also resolves performance regression reported by lkp-robot.

commit:
  4ac91242 ("f2fs: introduce free nid bitmap")
  d00030cf9cd0bb96fdccc41e33d3c91dcbb672ba ("f2fs: use __set{__clear}_bit_le")
  1382c0f3f9d3f936c8bc42ed1591cf7a593ef9f7 ("f2fs: combine nat_bits and free_nid_bitmap cache")

4ac91242 d00030cf9cd0bb96fdccc41e33 1382c0f3f9d3f936c8bc42ed15
---------------- -------------------------- --------------------------
         %stddev     %change         %stddev     %change         %stddev
             \          |                \          |                \
     77863 ±  0%      +2.1%      79485 ±  1%     +50.8%     117404 ±  0%  aim7.jobs-per-min
    231.63 ±  0%      -2.0%     227.01 ±  1%     -33.6%     153.80 ±  0%  aim7.time.elapsed_time
    231.63 ±  0%      -2.0%     227.01 ±  1%     -33.6%     153.80 ±  0%  aim7.time.elapsed_time.max
    896604 ±  0%      -0.8%     889221 ±  3%     -20.2%     715260 ±  1%  aim7.time.involuntary_context_switches
      2394 ±  1%      +4.6%       2503 ±  1%      +3.7%       2481 ±  2%  aim7.time.maximum_resident_set_size
      6240 ±  0%      -1.5%       6145 ±  1%     -14.1%       5360 ±  1%  aim7.time.system_time
   1111357 ±  3%      +1.9%    1132509 ±  2%      -6.2%    1041932 ±  2%  aim7.time.voluntary_context_switches
...

Signed-off-by: default avatarChao Yu <yuchao0@huawei.com>
Tested-by: default avatarXiaolong Ye <xiaolong.ye@intel.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 586d1492
Loading
Loading
Loading
Loading
+47 −78
Original line number Diff line number Diff line
@@ -338,9 +338,6 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
		set_nat_flag(e, IS_CHECKPOINTED, false);
	__set_nat_cache_dirty(nm_i, e);

	if (enabled_nat_bits(sbi, NULL) && new_blkaddr == NEW_ADDR)
		__clear_bit_le(NAT_BLOCK_OFFSET(ni->nid), nm_i->empty_nat_bits);

	/* update fsync_mark if its inode nat entry is still alive */
	if (ni->nid != ni->ino)
		e = __lookup_nat_cache(nm_i, ni->ino);
@@ -1824,7 +1821,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
}

static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
							bool set, bool build)
			bool set, bool build, bool locked)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
@@ -1838,11 +1835,13 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
	else
		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);

	if (!locked)
		spin_lock(&nm_i->free_nid_lock);
	if (set)
		nm_i->free_nid_count[nat_ofs]++;
	else if (!build)
		nm_i->free_nid_count[nat_ofs]--;
	if (!locked)
		spin_unlock(&nm_i->free_nid_lock);
}

@@ -1872,7 +1871,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
		if (blk_addr == NULL_ADDR)
			freed = add_free_nid(sbi, start_nid, true);
		update_free_nid_bitmap(sbi, start_nid, freed, true);
		update_free_nid_bitmap(sbi, start_nid, freed, true, false);
	}
}

@@ -1920,58 +1919,6 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
	up_read(&nm_i->nat_tree_lock);
}

static int scan_nat_bits(struct f2fs_sb_info *sbi)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct page *page;
	unsigned int i = 0;
	nid_t nid;

	if (!enabled_nat_bits(sbi, NULL))
		return -EAGAIN;

	down_read(&nm_i->nat_tree_lock);
check_empty:
	i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
	if (i >= nm_i->nat_blocks) {
		i = 0;
		goto check_partial;
	}

	for (nid = i * NAT_ENTRY_PER_BLOCK; nid < (i + 1) * NAT_ENTRY_PER_BLOCK;
									nid++) {
		if (unlikely(nid >= nm_i->max_nid))
			break;
		add_free_nid(sbi, nid, true);
	}

	if (nm_i->nid_cnt[FREE_NID_LIST] >= MAX_FREE_NIDS)
		goto out;
	i++;
	goto check_empty;

check_partial:
	i = find_next_zero_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
	if (i >= nm_i->nat_blocks) {
		disable_nat_bits(sbi, true);
		up_read(&nm_i->nat_tree_lock);
		return -EINVAL;
	}

	nid = i * NAT_ENTRY_PER_BLOCK;
	page = get_current_nat_page(sbi, nid);
	scan_nat_page(sbi, page, nid);
	f2fs_put_page(page, 1);

	if (nm_i->nid_cnt[FREE_NID_LIST] < MAX_FREE_NIDS) {
		i++;
		goto check_partial;
	}
out:
	up_read(&nm_i->nat_tree_lock);
	return 0;
}

static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
@@ -1993,21 +1940,6 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)

		if (nm_i->nid_cnt[FREE_NID_LIST])
			return;

		/* try to find free nids with nat_bits */
		if (!scan_nat_bits(sbi) && nm_i->nid_cnt[FREE_NID_LIST])
			return;
	}

	/* find next valid candidate */
	if (enabled_nat_bits(sbi, NULL)) {
		int idx = find_next_zero_bit_le(nm_i->full_nat_bits,
					nm_i->nat_blocks, 0);

		if (idx >= nm_i->nat_blocks)
			set_sbi_flag(sbi, SBI_NEED_FSCK);
		else
			nid = idx * NAT_ENTRY_PER_BLOCK;
	}

	/* readahead nat pages to be scanned */
@@ -2094,7 +2026,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
		__insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
		nm_i->available_nids--;

		update_free_nid_bitmap(sbi, *nid, false, false);
		update_free_nid_bitmap(sbi, *nid, false, false, false);

		spin_unlock(&nm_i->nid_list_lock);
		return true;
@@ -2150,7 +2082,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)

	nm_i->available_nids++;

	update_free_nid_bitmap(sbi, nid, true, false);
	update_free_nid_bitmap(sbi, nid, true, false, false);

	spin_unlock(&nm_i->nid_list_lock);

@@ -2480,11 +2412,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
			add_free_nid(sbi, nid, false);
			spin_lock(&NM_I(sbi)->nid_list_lock);
			NM_I(sbi)->available_nids++;
			update_free_nid_bitmap(sbi, nid, true, false);
			update_free_nid_bitmap(sbi, nid, true, false, false);
			spin_unlock(&NM_I(sbi)->nid_list_lock);
		} else {
			spin_lock(&NM_I(sbi)->nid_list_lock);
			update_free_nid_bitmap(sbi, nid, false, false);
			update_free_nid_bitmap(sbi, nid, false, false, false);
			spin_unlock(&NM_I(sbi)->nid_list_lock);
		}
	}
@@ -2590,6 +2522,40 @@ static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
	return 0;
}

inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned int i = 0;
	nid_t nid, last_nid;

	if (!enabled_nat_bits(sbi, NULL))
		return;

	for (i = 0; i < nm_i->nat_blocks; i++) {
		i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
		if (i >= nm_i->nat_blocks)
			break;

		__set_bit_le(i, nm_i->nat_block_bitmap);

		nid = i * NAT_ENTRY_PER_BLOCK;
		last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK;

		spin_lock(&nm_i->free_nid_lock);
		for (; nid < last_nid; nid++)
			update_free_nid_bitmap(sbi, nid, true, true, true);
		spin_unlock(&nm_i->free_nid_lock);
	}

	for (i = 0; i < nm_i->nat_blocks; i++) {
		i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
		if (i >= nm_i->nat_blocks)
			break;

		__set_bit_le(i, nm_i->nat_block_bitmap);
	}
}

static int init_node_manager(struct f2fs_sb_info *sbi)
{
	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
@@ -2691,6 +2657,9 @@ int build_node_manager(struct f2fs_sb_info *sbi)
	if (err)
		return err;

	/* load free nid status from nat_bits table */
	load_free_nid_bitmap(sbi);

	build_free_nids(sbi, true, true);
	return 0;
}