Commit 0cfcde1f authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull ext4 updates from Ted Ts'o:
 "There are a number of major cleanups in ext4 this cycle:

   - The data=journal writepath has been significantly cleaned up and
     simplified, and reduces a large number of data=journal special
     cases by Jan Kara.

   - Ojaswin Muhoo has replaced linked list used to track extents that
     have been used for inode preallocation with a red-black tree in the
     multi-block allocator. This improves performance for workloads
     which do a large number of random allocating writes.

   - Thanks to Kemeng Shi for a lot of cleanup and bug fixes in the
     multi-block allocator.

   - Matthew wilcox has converted the code paths for reading and writing
     ext4 pages to use folios.

   - Jason Yan has continued to factor out ext4_fill_super() into
     smaller functions for improve ease of maintenance and
     comprehension.

   - Josh Triplett has created an uapi header for ext4 userspace API's"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (105 commits)
  ext4: Add a uapi header for ext4 userspace APIs
  ext4: remove useless conditional branch code
  ext4: remove unneeded check of nr_to_submit
  ext4: move dax and encrypt checking into ext4_check_feature_compatibility()
  ext4: factor out ext4_block_group_meta_init()
  ext4: move s_reserved_gdt_blocks and addressable checking into ext4_check_geometry()
  ext4: rename two functions with 'check'
  ext4: factor out ext4_flex_groups_free()
  ext4: use ext4_group_desc_free() in ext4_put_super() to save some duplicated code
  ext4: factor out ext4_percpu_param_init() and ext4_percpu_param_destroy()
  ext4: factor out ext4_hash_info_init()
  Revert "ext4: Fix warnings when freezing filesystem with journaled data"
  ext4: Update comment in mpage_prepare_extent_to_map()
  ext4: Simplify handling of journalled data in ext4_bmap()
  ext4: Drop special handling of journalled data from ext4_quota_on()
  ext4: Drop special handling of journalled data from ext4_evict_inode()
  ext4: Fix special handling of journalled data from extent zeroing
  ext4: Drop special handling of journalled data from extent shifting operations
  ext4: Drop special handling of journalled data from ext4_sync_file()
  ext4: Commit transaction before writing back pages in data=journal mode
  ...
parents c3558a6b 519fe1ba
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -489,9 +489,6 @@ Files in /sys/fs/ext4/<devname>:
        multiple of this tuning parameter if the stripe size is not set in the
        ext4 superblock

  mb_max_inode_prealloc
        The maximum length of per-inode ext4_prealloc_space list.

  mb_max_to_scan
        The maximum number of extents the multiblock allocator will search to
        find the best extent.
+1 −0
Original line number Diff line number Diff line
@@ -7745,6 +7745,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
F:	Documentation/filesystems/ext4/
F:	fs/ext4/
F:	include/trace/events/ext4.h
F:	include/uapi/linux/ext4.h
Extended Verification Module (EVM)
M:	Mimi Zohar <zohar@linux.ibm.com>
+1 −0
Original line number Diff line number Diff line
@@ -1159,6 +1159,7 @@ bool bio_add_folio(struct bio *bio, struct folio *folio, size_t len,
		return false;
	return bio_add_page(bio, &folio->page, len, off) > 0;
}
EXPORT_SYMBOL(bio_add_folio);

void __bio_release_pages(struct bio *bio, bool mark_dirty)
{
+62 −62
Original line number Diff line number Diff line
@@ -80,32 +80,56 @@ static inline int ext4_block_in_group(struct super_block *sb,
	return (actual_group == block_group) ? 1 : 0;
}

/* Return the number of clusters used for file system metadata; this
/*
 * Return the number of clusters used for file system metadata; this
 * represents the overhead needed by the file system.
 */
static unsigned ext4_num_overhead_clusters(struct super_block *sb,
					   ext4_group_t block_group,
					   struct ext4_group_desc *gdp)
{
	unsigned num_clusters;
	int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
	unsigned base_clusters, num_clusters;
	int block_cluster = -1, inode_cluster;
	int itbl_cluster_start = -1, itbl_cluster_end = -1;
	ext4_fsblk_t start = ext4_group_first_block_no(sb, block_group);
	ext4_fsblk_t itbl_blk;
	ext4_fsblk_t end = start + EXT4_BLOCKS_PER_GROUP(sb) - 1;
	ext4_fsblk_t itbl_blk_start, itbl_blk_end;
	struct ext4_sb_info *sbi = EXT4_SB(sb);

	/* This is the number of clusters used by the superblock,
	 * block group descriptors, and reserved block group
	 * descriptor blocks */
	num_clusters = ext4_num_base_meta_clusters(sb, block_group);
	base_clusters = ext4_num_base_meta_clusters(sb, block_group);
	num_clusters = base_clusters;

	/*
	 * For the allocation bitmaps and inode table, we first need
	 * to check to see if the block is in the block group.  If it
	 * is, then check to see if the cluster is already accounted
	 * for in the clusters used for the base metadata cluster, or
	 * if we can increment the base metadata cluster to include
	 * that block.  Otherwise, we will have to track the cluster
	 * used for the allocation bitmap or inode table explicitly.
	 * Account and record inode table clusters if any cluster
	 * is in the block group, or inode table cluster range is
	 * [-1, -1] and won't overlap with block/inode bitmap cluster
	 * accounted below.
	 */
	itbl_blk_start = ext4_inode_table(sb, gdp);
	itbl_blk_end = itbl_blk_start + sbi->s_itb_per_group - 1;
	if (itbl_blk_start <= end && itbl_blk_end >= start) {
		itbl_blk_start = itbl_blk_start >= start ?
			itbl_blk_start : start;
		itbl_blk_end = itbl_blk_end <= end ?
			itbl_blk_end : end;

		itbl_cluster_start = EXT4_B2C(sbi, itbl_blk_start - start);
		itbl_cluster_end = EXT4_B2C(sbi, itbl_blk_end - start);

		num_clusters += itbl_cluster_end - itbl_cluster_start + 1;
		/* check if border cluster is overlapped */
		if (itbl_cluster_start == base_clusters - 1)
			num_clusters--;
	}

	/*
	 * For the allocation bitmaps, we first need to check to see
	 * if the block is in the block group.  If it is, then check
	 * to see if the cluster is already accounted for in the clusters
	 * used for the base metadata cluster and inode tables cluster.
	 * Normally all of these blocks are contiguous, so the special
	 * case handling shouldn't be necessary except for *very*
	 * unusual file system layouts.
@@ -113,46 +137,26 @@ static unsigned ext4_num_overhead_clusters(struct super_block *sb,
	if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) {
		block_cluster = EXT4_B2C(sbi,
					 ext4_block_bitmap(sb, gdp) - start);
		if (block_cluster < num_clusters)
			block_cluster = -1;
		else if (block_cluster == num_clusters) {
		if (block_cluster >= base_clusters &&
		    (block_cluster < itbl_cluster_start ||
		    block_cluster > itbl_cluster_end))
			num_clusters++;
			block_cluster = -1;
		}
	}

	if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) {
		inode_cluster = EXT4_B2C(sbi,
					 ext4_inode_bitmap(sb, gdp) - start);
		if (inode_cluster < num_clusters)
			inode_cluster = -1;
		else if (inode_cluster == num_clusters) {
		/*
		 * Additional check if inode bitmap is in just accounted
		 * block_cluster
		 */
		if (inode_cluster != block_cluster &&
		    inode_cluster >= base_clusters &&
		    (inode_cluster < itbl_cluster_start ||
		    inode_cluster > itbl_cluster_end))
			num_clusters++;
			inode_cluster = -1;
		}
	}

	itbl_blk = ext4_inode_table(sb, gdp);
	for (i = 0; i < sbi->s_itb_per_group; i++) {
		if (ext4_block_in_group(sb, itbl_blk + i, block_group)) {
			c = EXT4_B2C(sbi, itbl_blk + i - start);
			if ((c < num_clusters) || (c == inode_cluster) ||
			    (c == block_cluster) || (c == itbl_cluster))
				continue;
			if (c == num_clusters) {
				num_clusters++;
				continue;
			}
			num_clusters++;
			itbl_cluster = c;
		}
	}

	if (block_cluster != -1)
		num_clusters++;
	if (inode_cluster != -1)
		num_clusters++;

	return num_clusters;
}

@@ -187,8 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,

	ASSERT(buffer_locked(bh));

	/* If checksum is bad mark all blocks used to prevent allocation
	 * essentially implementing a per-group read-only flag. */
	if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
		ext4_mark_group_bitmap_corrupted(sb, block_group,
					EXT4_GROUP_INFO_BBITMAP_CORRUPT |
@@ -350,13 +352,13 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
	blk = ext4_inode_table(sb, desc);
	offset = blk - group_first_block;
	if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit ||
	    EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit)
	    EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) >= max_bit)
		return blk;
	next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
			EXT4_B2C(sbi, offset + sbi->s_itb_per_group),
			EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1,
			EXT4_B2C(sbi, offset));
	if (next_zero_bit <
	    EXT4_B2C(sbi, offset + sbi->s_itb_per_group))
	    EXT4_B2C(sbi, offset + sbi->s_itb_per_group - 1) + 1)
		/* bad bitmap for inode tables */
		return blk;
	return 0;
@@ -383,8 +385,7 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
	ext4_lock_group(sb, block_group);
	if (buffer_verified(bh))
		goto verified;
	if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
						    desc, bh) ||
	if (unlikely(!ext4_block_bitmap_csum_verify(sb, desc, bh) ||
		     ext4_simulate_fail(sb, EXT4_SIM_BBITMAP_CRC))) {
		ext4_unlock_group(sb, block_group);
		ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
@@ -474,17 +475,19 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group,
			goto out;
		}
		err = ext4_init_block_bitmap(sb, bh, block_group, desc);
		set_bitmap_uptodate(bh);
		set_buffer_uptodate(bh);
		set_buffer_verified(bh);
		if (err) {
			ext4_unlock_group(sb, block_group);
			unlock_buffer(bh);
		if (err) {
			ext4_error(sb, "Failed to init block bitmap for group "
				   "%u: %d", block_group, err);
			goto out;
		}
		goto verify;
		set_bitmap_uptodate(bh);
		set_buffer_uptodate(bh);
		set_buffer_verified(bh);
		ext4_unlock_group(sb, block_group);
		unlock_buffer(bh);
		return bh;
	}
	ext4_unlock_group(sb, block_group);
	if (buffer_uptodate(bh)) {
@@ -842,9 +845,6 @@ static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
	if (!ext4_bg_has_super(sb, group))
		return 0;

	if (ext4_has_feature_meta_bg(sb))
		return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
	else
	return EXT4_SB(sb)->s_gdb_count;
}

@@ -887,11 +887,11 @@ static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
	    block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
			  sbi->s_desc_per_block) {
		if (num) {
			num += ext4_bg_num_gdb(sb, block_group);
			num += ext4_bg_num_gdb_nometa(sb, block_group);
			num += le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
		}
	} else { /* For META_BG_BLOCK_GROUPS */
		num += ext4_bg_num_gdb(sb, block_group);
		num += ext4_bg_num_gdb_meta(sb, block_group);
	}
	return EXT4_NUM_B2C(sbi, num);
}
+5 −8
Original line number Diff line number Diff line
@@ -16,7 +16,7 @@ unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
	return numchars * BITS_PER_BYTE - memweight(bitmap, numchars);
}

int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
int ext4_inode_bitmap_csum_verify(struct super_block *sb,
				  struct ext4_group_desc *gdp,
				  struct buffer_head *bh, int sz)
{
@@ -38,7 +38,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
	return provided == calculated;
}

void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
void ext4_inode_bitmap_csum_set(struct super_block *sb,
				struct ext4_group_desc *gdp,
				struct buffer_head *bh, int sz)
{
@@ -54,7 +54,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
		gdp->bg_inode_bitmap_csum_hi = cpu_to_le16(csum >> 16);
}

int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
int ext4_block_bitmap_csum_verify(struct super_block *sb,
				  struct ext4_group_desc *gdp,
				  struct buffer_head *bh)
{
@@ -74,13 +74,10 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
	} else
		calculated &= 0xFFFF;

	if (provided == calculated)
		return 1;

	return 0;
	return provided == calculated;
}

void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
void ext4_block_bitmap_csum_set(struct super_block *sb,
				struct ext4_group_desc *gdp,
				struct buffer_head *bh)
{
Loading