Commit 4ae361af authored by Zhihao Cheng's avatar Zhihao Cheng Committed by openeuler-sync-bot
Browse files

ext4: Validate inode pa before using preallocation blocks

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I97HJA
CVE: NA

--------------------------------

In ext4 continue & no-journal mode, physical blocks could be allocated
more than once (caused by writing extent entries failed & reclaiming
extent cache) in preallocation process, which could trigger a BUG_ON
(pa->pa_free < len) in ext4_mb_use_inode_pa().

 kernel BUG at fs/ext4/mballoc.c:4681!
 invalid opcode: 0000 [#1] PREEMPT SMP
 CPU: 3 PID: 97 Comm: kworker/u8:3 Not tainted 6.8.0-rc7
 RIP: 0010:ext4_mb_use_inode_pa+0x1b6/0x1e0
 Call Trace:
  ext4_mb_use_preallocated.constprop.0+0x19e/0x540
  ext4_mb_new_blocks+0x220/0x1f30
  ext4_ext_map_blocks+0xf3c/0x2900
  ext4_map_blocks+0x264/0xa40
  ext4_do_writepages+0xb15/0x1400
  do_writepages+0x8c/0x260
  writeback_sb_inodes+0x224/0x720
  wb_writeback+0xd8/0x580
  wb_workfn+0x148/0x820

Details are shown as following:

0. Given a file with i_size=4096 with one mapped block
1. Write block no 1, blocks 1~3 are preallocated.
   ext4_ext_map_blocks
    ext4_mb_normalize_request
     size = 16 * 1024
     size = end - start // Allocate 3 blocks (bs = 4096)
    ext4_mb_regular_allocator
     ext4_mb_regular_allocator
     ext4_mb_regular_allocator
     ext4_mb_use_inode_pa
      pa->pa_free -= len // 3 - 1 = 2
2. Extent buffer head is written failed, es cache and buffer head are
   reclaimed.
3. Write blocks 1~3
   ext4_ext_map_blocks
    newex.ee_len = 3
    ext4_ext_check_overlap // Find nothing, there should have been block 1
    allocated = map->m_len  // 3
    ext4_mb_new_blocks
     ext4_mb_use_preallocated
      ext4_mb_use_inode_pa
       BUG_ON(pa->pa_free < len) // 2 < 3!

Fix it by adding validation checking for inode pa. If invalid pa is
detected, stop using inode preallocation, drop invalid pa to avoid it
being used again, mark group block bitmap as corrupted to avoid allocating
from the erroneous group.

Fetch a reproducer in Link.

Cc: stable@vger.kernel.org
Link: https://bugzilla.kernel.org/show_bug.cgi?id=218576


Signed-off-by: default avatarZhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: default avatarZhang Yi <yi.zhang@huawei.com>
(cherry picked from commit 37f15b21)
parent 231bde25
Loading
Loading
Loading
Loading
+62 −0
Original line number Diff line number Diff line
@@ -351,6 +351,9 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
						ext4_group_t group);
static void ext4_mb_new_preallocation(struct ext4_allocation_context *ac);

static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
			struct super_block *sb, struct ext4_prealloc_space *pa);

/*
 * The algorithm using this percpu seq counter goes below:
 * 1. We sample the percpu discard_pa_seq counter before trying for block
@@ -3713,6 +3716,47 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
		pa->pa_free += ac->ac_b_ex.fe_len;
}

/*
 * check if found pa is valid
 */
static bool ext4_mb_pa_is_valid(struct ext4_allocation_context *ac,
				struct ext4_prealloc_space *pa)
{
	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
	ext4_fsblk_t start;
	ext4_fsblk_t end;
	int len;

	start = pa->pa_pstart + (ac->ac_o_ex.fe_logical - pa->pa_lstart);
	end = min(pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len),
		  start + EXT4_C2B(sbi, ac->ac_o_ex.fe_len));
	len = EXT4_NUM_B2C(sbi, end - start);

	if (unlikely(start < pa->pa_pstart)) {
		ext4_msg(ac->ac_sb, KERN_ERR,
			 "invalid pa, start(%llu) < pa_pstart(%llu)",
			 start, pa->pa_pstart);
		return false;
	}
	if (unlikely(end > pa->pa_pstart + EXT4_C2B(sbi, pa->pa_len))) {
		ext4_msg(ac->ac_sb, KERN_ERR,
			 "invalid pa, end(%llu) > pa_pstart(%llu) + pa_len(%d)",
			 end, pa->pa_pstart, EXT4_C2B(sbi, pa->pa_len));
		return false;
	}
	if (unlikely(pa->pa_free < len)) {
		ext4_msg(ac->ac_sb, KERN_ERR,
			 "invalid pa, pa_free(%d) < len(%d)", pa->pa_free, len);
		return false;
	}
	if (unlikely(len <= 0)) {
		ext4_msg(ac->ac_sb, KERN_ERR, "invalid pa, len(%d) <= 0", len);
		return false;
	}

	return true;
}

/*
 * use blocks preallocated to inode
 */
@@ -3833,6 +3877,23 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)

		/* found preallocated blocks, use them */
		spin_lock(&pa->pa_lock);
		if (unlikely(!ext4_mb_pa_is_valid(ac, pa))) {
			ext4_group_t group;

			pa->pa_free = 0;
			atomic_inc(&pa->pa_count);
			spin_unlock(&pa->pa_lock);
			rcu_read_unlock();
			ext4_mb_put_pa(ac, ac->ac_sb, pa);
			group = ext4_get_group_number(ac->ac_sb, pa->pa_pstart);
			ext4_lock_group(ac->ac_sb, group);
			ext4_mark_group_bitmap_corrupted(ac->ac_sb, group,
					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
			ext4_unlock_group(ac->ac_sb, group);
			ext4_error(ac->ac_sb, "drop pa and mark group %u block bitmap corrupted",
				   group);
			goto try_group_pa;
		}
		if (pa->pa_deleted == 0 && pa->pa_free) {
			atomic_inc(&pa->pa_count);
			ext4_mb_use_inode_pa(ac, pa);
@@ -3845,6 +3906,7 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
	}
	rcu_read_unlock();

try_group_pa:
	/* can we use group allocation? */
	if (!(ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC))
		return false;