Commit 27bc446e authored by brookxu's avatar brookxu Committed by Theodore Ts'o
Browse files

ext4: limit the length of per-inode prealloc list



In the scenario of writing sparse files, the per-inode prealloc list may
be very long, resulting in high overhead for ext4_mb_use_preallocated().
To circumvent this problem, we limit the maximum length of per-inode
prealloc list to 512 and allow users to modify it.

After patching, we observed that the sys ratio of cpu has dropped, and
the system throughput has increased significantly. We created a process
to write the sparse file, and the running time of the process on the
fixed kernel was significantly reduced, as follows:

Running time on unfixed kernel:
[root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat
real    0m2.051s
user    0m0.008s
sys     0m2.026s

Running time on fixed kernel:
[root@TENCENT64 ~]# time taskset 0x01 ./sparse /data1/sparce.dat
real    0m0.471s
user    0m0.004s
sys     0m0.395s

Signed-off-by: default avatarChunguang Xu <brookxu@tencent.com>
Link: https://lore.kernel.org/r/d7a98178-056b-6db5-6bce-4ead23f4a257@gmail.com


Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 66d5e027
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -482,6 +482,9 @@ Files in /sys/fs/ext4/<devname>:
        multiple of this tuning parameter if the stripe size is not set in the
        ext4 superblock

  mb_max_inode_prealloc
        The maximum length of per-inode ext4_prealloc_space list.

  mb_max_to_scan
        The maximum number of extents the multiblock allocator will search to
        find the best extent.
+3 −1
Original line number Diff line number Diff line
@@ -1070,6 +1070,7 @@ struct ext4_inode_info {
	struct timespec64 i_crtime;

	/* mballoc */
	atomic_t i_prealloc_active;
	struct list_head i_prealloc_list;
	spinlock_t i_prealloc_lock;

@@ -1518,6 +1519,7 @@ struct ext4_sb_info {
	unsigned int s_mb_stats;
	unsigned int s_mb_order2_reqs;
	unsigned int s_mb_group_prealloc;
	unsigned int s_mb_max_inode_prealloc;
	unsigned int s_max_dir_size_kb;
	/* where last allocation was done - for stream allocation */
	unsigned long s_mb_last_group;
@@ -2682,7 +2684,7 @@ extern int ext4_mb_release(struct super_block *);
extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
				struct ext4_allocation_request *, int *);
extern int ext4_mb_reserve_blocks(struct super_block *, int);
extern void ext4_discard_preallocations(struct inode *);
extern void ext4_discard_preallocations(struct inode *, unsigned int);
extern int __init ext4_init_mballoc(void);
extern void ext4_exit_mballoc(void);
extern ext4_group_t ext4_mb_prefetch(struct super_block *sb,
+5 −5
Original line number Diff line number Diff line
@@ -100,7 +100,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped)
	 * i_mutex. So we can safely drop the i_data_sem here.
	 */
	BUG_ON(EXT4_JOURNAL(inode) == NULL);
	ext4_discard_preallocations(inode);
	ext4_discard_preallocations(inode, 0);
	up_write(&EXT4_I(inode)->i_data_sem);
	*dropped = 1;
	return 0;
@@ -4266,7 +4266,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
			 * not a good idea to call discard here directly,
			 * but otherwise we'd need to call it every free().
			 */
			ext4_discard_preallocations(inode);
			ext4_discard_preallocations(inode, 0);
			if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
				fb_flags = EXT4_FREE_BLOCKS_NO_QUOT_UPDATE;
			ext4_free_blocks(handle, inode, NULL, newblock,
@@ -5293,7 +5293,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
	}

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode);
	ext4_discard_preallocations(inode, 0);

	ret = ext4_es_remove_extent(inode, punch_start,
				    EXT_MAX_BLOCKS - punch_start);
@@ -5307,7 +5307,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
		up_write(&EXT4_I(inode)->i_data_sem);
		goto out_stop;
	}
	ext4_discard_preallocations(inode);
	ext4_discard_preallocations(inode, 0);

	ret = ext4_ext_shift_extents(inode, handle, punch_stop,
				     punch_stop - punch_start, SHIFT_LEFT);
@@ -5439,7 +5439,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
		goto out_stop;

	down_write(&EXT4_I(inode)->i_data_sem);
	ext4_discard_preallocations(inode);
	ext4_discard_preallocations(inode, 0);

	path = ext4_find_extent(inode, offset_lblk, NULL, 0);
	if (IS_ERR(path)) {
+1 −1
Original line number Diff line number Diff line
@@ -147,7 +147,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
			(atomic_read(&inode->i_writecount) == 1) &&
			!EXT4_I(inode)->i_reserved_data_blocks) {
		down_write(&EXT4_I(inode)->i_data_sem);
		ext4_discard_preallocations(inode);
		ext4_discard_preallocations(inode, 0);
		up_write(&EXT4_I(inode)->i_data_sem);
	}
	if (is_dx(inode) && filp->private_data)
+1 −1
Original line number Diff line number Diff line
@@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
	 * i_mutex. So we can safely drop the i_data_sem here.
	 */
	BUG_ON(EXT4_JOURNAL(inode) == NULL);
	ext4_discard_preallocations(inode);
	ext4_discard_preallocations(inode, 0);
	up_write(&EXT4_I(inode)->i_data_sem);
	*dropped = 1;
	return 0;
Loading