Unverified Commit dd657781 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!10044 btrfs: fix CVE-2024-39496

Merge Pull Request from: @ci-robot 
 
PR sync from: Long Li <leo.lilong@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/JD2IDXBGOETCBVUCXUWXDK223FH226LO/ 
This patch set fix CVE-2024-39496.

Christoph Hellwig (4):
  btrfs: zoned: introduce a zone_info struct in
    btrfs_load_block_group_zone_info
  btrfs: zoned: factor out per-zone logic from
    btrfs_load_block_group_zone_info
  btrfs: zoned: factor out single bg handling from
    btrfs_load_block_group_zone_info
  btrfs: zoned: factor out DUP bg handling from
    btrfs_load_block_group_zone_info

Filipe Manana (1):
  btrfs: zoned: fix use-after-free due to race with dev replace


-- 
2.39.2
 
https://gitee.com/src-openeuler/kernel/issues/IACZX0 
 
Link:https://gitee.com/openeuler/kernel/pulls/10044

 

Reviewed-by: default avatarZhang Peng <zhangpeng362@huawei.com>
Signed-off-by: default avatarZhang Peng <zhangpeng362@huawei.com>
parents fa213da1 f456cdb8
Loading
Loading
Loading
Loading
+170 −160
Original line number Diff line number Diff line
@@ -1282,21 +1282,175 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
	return ret;
}

struct zone_info {
	u64 physical;
	u64 capacity;
	u64 alloc_offset;
};

static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
				struct zone_info *info, unsigned long *active,
				struct map_lookup *map)
{
	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
	struct btrfs_device *device;
	int dev_replace_is_ongoing = 0;
	unsigned int nofs_flag;
	struct blk_zone zone;
	int ret;

	info->physical = map->stripes[zone_idx].physical;

	down_read(&dev_replace->rwsem);
	device = map->stripes[zone_idx].dev;

	if (!device->bdev) {
		up_read(&dev_replace->rwsem);
		info->alloc_offset = WP_MISSING_DEV;
		return 0;
	}

	/* Consider a zone as active if we can allow any number of active zones. */
	if (!device->zone_info->max_active_zones)
		__set_bit(zone_idx, active);

	if (!btrfs_dev_is_sequential(device, info->physical)) {
		up_read(&dev_replace->rwsem);
		info->alloc_offset = WP_CONVENTIONAL;
		return 0;
	}

	/* This zone will be used for allocation, so mark this zone non-empty. */
	btrfs_dev_clear_zone_empty(device, info->physical);

	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
		btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);

	/*
	 * The group is mapped to a sequential zone. Get the zone write pointer
	 * to determine the allocation offset within the zone.
	 */
	WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
	nofs_flag = memalloc_nofs_save();
	ret = btrfs_get_dev_zone(device, info->physical, &zone);
	memalloc_nofs_restore(nofs_flag);
	if (ret) {
		up_read(&dev_replace->rwsem);
		if (ret != -EIO && ret != -EOPNOTSUPP)
			return ret;
		info->alloc_offset = WP_MISSING_DEV;
		return 0;
	}

	if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
		btrfs_err_in_rcu(fs_info,
		"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
			zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
			device->devid);
		up_read(&dev_replace->rwsem);
		return -EIO;
	}

	info->capacity = (zone.capacity << SECTOR_SHIFT);

	switch (zone.cond) {
	case BLK_ZONE_COND_OFFLINE:
	case BLK_ZONE_COND_READONLY:
		btrfs_err(fs_info,
		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
			  (info->physical >> device->zone_info->zone_size_shift),
			  rcu_str_deref(device->name), device->devid);
		info->alloc_offset = WP_MISSING_DEV;
		break;
	case BLK_ZONE_COND_EMPTY:
		info->alloc_offset = 0;
		break;
	case BLK_ZONE_COND_FULL:
		info->alloc_offset = info->capacity;
		break;
	default:
		/* Partially used zone. */
		info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT);
		__set_bit(zone_idx, active);
		break;
	}

	up_read(&dev_replace->rwsem);

	return 0;
}

static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
					 struct zone_info *info,
					 unsigned long *active)
{
	if (info->alloc_offset == WP_MISSING_DEV) {
		btrfs_err(bg->fs_info,
			"zoned: cannot recover write pointer for zone %llu",
			info->physical);
		return -EIO;
	}

	bg->alloc_offset = info->alloc_offset;
	bg->zone_capacity = info->capacity;
	if (test_bit(0, active))
		set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
	return 0;
}

static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
				      struct map_lookup *map,
				      struct zone_info *zone_info,
				      unsigned long *active)
{
	if (map->type & BTRFS_BLOCK_GROUP_DATA) {
		btrfs_err(bg->fs_info,
			  "zoned: profile DUP not yet supported on data bg");
		return -EINVAL;
	}

	if (zone_info[0].alloc_offset == WP_MISSING_DEV) {
		btrfs_err(bg->fs_info,
			  "zoned: cannot recover write pointer for zone %llu",
			  zone_info[0].physical);
		return -EIO;
	}
	if (zone_info[1].alloc_offset == WP_MISSING_DEV) {
		btrfs_err(bg->fs_info,
			  "zoned: cannot recover write pointer for zone %llu",
			  zone_info[1].physical);
		return -EIO;
	}
	if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
		btrfs_err(bg->fs_info,
			  "zoned: write pointer offset mismatch of zones in DUP profile");
		return -EIO;
	}

	if (test_bit(0, active) != test_bit(1, active)) {
		if (!btrfs_zone_activate(bg))
			return -EIO;
	} else if (test_bit(0, active)) {
		set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
	}

	bg->alloc_offset = zone_info[0].alloc_offset;
	bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity);
	return 0;
}

int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
{
	struct btrfs_fs_info *fs_info = cache->fs_info;
	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
	struct extent_map *em;
	struct map_lookup *map;
	struct btrfs_device *device;
	u64 logical = cache->start;
	u64 length = cache->length;
	struct zone_info *zone_info = NULL;
	int ret;
	int i;
	unsigned int nofs_flag;
	u64 *alloc_offsets = NULL;
	u64 *caps = NULL;
	u64 *physical = NULL;
	unsigned long *active = NULL;
	u64 last_alloc = 0;
	u32 num_sequential = 0, num_conventional = 0;
@@ -1328,20 +1482,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
		goto out;
	}

	alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
	if (!alloc_offsets) {
		ret = -ENOMEM;
		goto out;
	}

	caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS);
	if (!caps) {
		ret = -ENOMEM;
		goto out;
	}

	physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
	if (!physical) {
	zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS);
	if (!zone_info) {
		ret = -ENOMEM;
		goto out;
	}
@@ -1353,98 +1495,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
	}

	for (i = 0; i < map->num_stripes; i++) {
		bool is_sequential;
		struct blk_zone zone;
		struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
		int dev_replace_is_ongoing = 0;

		device = map->stripes[i].dev;
		physical[i] = map->stripes[i].physical;

		if (device->bdev == NULL) {
			alloc_offsets[i] = WP_MISSING_DEV;
			continue;
		}

		is_sequential = btrfs_dev_is_sequential(device, physical[i]);
		if (is_sequential)
			num_sequential++;
		else
			num_conventional++;

		/*
		 * Consider a zone as active if we can allow any number of
		 * active zones.
		 */
		if (!device->zone_info->max_active_zones)
			__set_bit(i, active);

		if (!is_sequential) {
			alloc_offsets[i] = WP_CONVENTIONAL;
			continue;
		}

		/*
		 * This zone will be used for allocation, so mark this zone
		 * non-empty.
		 */
		btrfs_dev_clear_zone_empty(device, physical[i]);

		down_read(&dev_replace->rwsem);
		dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
		if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
		up_read(&dev_replace->rwsem);

		/*
		 * The group is mapped to a sequential zone. Get the zone write
		 * pointer to determine the allocation offset within the zone.
		 */
		WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
		nofs_flag = memalloc_nofs_save();
		ret = btrfs_get_dev_zone(device, physical[i], &zone);
		memalloc_nofs_restore(nofs_flag);
		if (ret == -EIO || ret == -EOPNOTSUPP) {
			ret = 0;
			alloc_offsets[i] = WP_MISSING_DEV;
			continue;
		} else if (ret) {
			goto out;
		}

		if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
			btrfs_err_in_rcu(fs_info,
	"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
				zone.start << SECTOR_SHIFT,
				rcu_str_deref(device->name), device->devid);
			ret = -EIO;
		ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map);
		if (ret)
			goto out;
		}

		caps[i] = (zone.capacity << SECTOR_SHIFT);

		switch (zone.cond) {
		case BLK_ZONE_COND_OFFLINE:
		case BLK_ZONE_COND_READONLY:
			btrfs_err(fs_info,
		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
				  physical[i] >> device->zone_info->zone_size_shift,
				  rcu_str_deref(device->name), device->devid);
			alloc_offsets[i] = WP_MISSING_DEV;
			break;
		case BLK_ZONE_COND_EMPTY:
			alloc_offsets[i] = 0;
			break;
		case BLK_ZONE_COND_FULL:
			alloc_offsets[i] = caps[i];
			break;
		default:
			/* Partially used zone */
			alloc_offsets[i] =
					((zone.wp - zone.start) << SECTOR_SHIFT);
			__set_bit(i, active);
			break;
		}
		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
			num_conventional++;
		else
			num_sequential++;
	}

	if (num_sequential > 0)
@@ -1468,56 +1526,10 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)

	switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
	case 0: /* single */
		if (alloc_offsets[0] == WP_MISSING_DEV) {
			btrfs_err(fs_info,
			"zoned: cannot recover write pointer for zone %llu",
				physical[0]);
			ret = -EIO;
			goto out;
		}
		cache->alloc_offset = alloc_offsets[0];
		cache->zone_capacity = caps[0];
		if (test_bit(0, active))
			set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
		ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
		break;
	case BTRFS_BLOCK_GROUP_DUP:
		if (map->type & BTRFS_BLOCK_GROUP_DATA) {
			btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
			ret = -EINVAL;
			goto out;
		}
		if (alloc_offsets[0] == WP_MISSING_DEV) {
			btrfs_err(fs_info,
			"zoned: cannot recover write pointer for zone %llu",
				physical[0]);
			ret = -EIO;
			goto out;
		}
		if (alloc_offsets[1] == WP_MISSING_DEV) {
			btrfs_err(fs_info,
			"zoned: cannot recover write pointer for zone %llu",
				physical[1]);
			ret = -EIO;
			goto out;
		}
		if (alloc_offsets[0] != alloc_offsets[1]) {
			btrfs_err(fs_info,
			"zoned: write pointer offset mismatch of zones in DUP profile");
			ret = -EIO;
			goto out;
		}
		if (test_bit(0, active) != test_bit(1, active)) {
			if (!btrfs_zone_activate(cache)) {
				ret = -EIO;
				goto out;
			}
		} else {
			if (test_bit(0, active))
				set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
					&cache->runtime_flags);
		}
		cache->alloc_offset = alloc_offsets[0];
		cache->zone_capacity = min(caps[0], caps[1]);
		ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
		break;
	case BTRFS_BLOCK_GROUP_RAID1:
	case BTRFS_BLOCK_GROUP_RAID0:
@@ -1570,9 +1582,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
		cache->physical_map = NULL;
	}
	bitmap_free(active);
	kfree(physical);
	kfree(caps);
	kfree(alloc_offsets);
	kfree(zone_info);
	free_extent_map(em);

	return ret;