Commit d3e54e91 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Long Li
Browse files

btrfs: zoned: factor out per-zone logic from btrfs_load_block_group_zone_info

mainline inclusion
from mainline-v6.6-rc5
commit 09a46725cc84165af452d978a3532d6b97a28796
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACZX0
CVE: CVE-2024-39496

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=09a46725cc84165af452d978a3532d6b97a28796



--------------------------------

Split out a helper for the body of the per-zone loop in
btrfs_load_block_group_zone_info to make the function easier to read and
modify.

Reviewed-by: default avatarJohannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarLong Li <leo.lilong@huawei.com>
parent 39ba1dc8
Loading
Loading
Loading
Loading
+92 −92
Original line number Diff line number Diff line
@@ -1288,19 +1288,103 @@ struct zone_info {
	u64 alloc_offset;
};

static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
				struct zone_info *info, unsigned long *active,
				struct map_lookup *map)
{
	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
	struct btrfs_device *device = map->stripes[zone_idx].dev;
	int dev_replace_is_ongoing = 0;
	unsigned int nofs_flag;
	struct blk_zone zone;
	int ret;

	info->physical = map->stripes[zone_idx].physical;

	if (!device->bdev) {
		info->alloc_offset = WP_MISSING_DEV;
		return 0;
	}

	/* Consider a zone as active if we can allow any number of active zones. */
	if (!device->zone_info->max_active_zones)
		__set_bit(zone_idx, active);

	if (!btrfs_dev_is_sequential(device, info->physical)) {
		info->alloc_offset = WP_CONVENTIONAL;
		return 0;
	}

	/* This zone will be used for allocation, so mark this zone non-empty. */
	btrfs_dev_clear_zone_empty(device, info->physical);

	down_read(&dev_replace->rwsem);
	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
		btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);
	up_read(&dev_replace->rwsem);

	/*
	 * The group is mapped to a sequential zone. Get the zone write pointer
	 * to determine the allocation offset within the zone.
	 */
	WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
	nofs_flag = memalloc_nofs_save();
	ret = btrfs_get_dev_zone(device, info->physical, &zone);
	memalloc_nofs_restore(nofs_flag);
	if (ret) {
		if (ret != -EIO && ret != -EOPNOTSUPP)
			return ret;
		info->alloc_offset = WP_MISSING_DEV;
		return 0;
	}

	if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
		btrfs_err_in_rcu(fs_info,
		"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
			zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
			device->devid);
		return -EIO;
	}

	info->capacity = (zone.capacity << SECTOR_SHIFT);

	switch (zone.cond) {
	case BLK_ZONE_COND_OFFLINE:
	case BLK_ZONE_COND_READONLY:
		btrfs_err(fs_info,
		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
			  (info->physical >> device->zone_info->zone_size_shift),
			  rcu_str_deref(device->name), device->devid);
		info->alloc_offset = WP_MISSING_DEV;
		break;
	case BLK_ZONE_COND_EMPTY:
		info->alloc_offset = 0;
		break;
	case BLK_ZONE_COND_FULL:
		info->alloc_offset = info->capacity;
		break;
	default:
		/* Partially used zone. */
		info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT);
		__set_bit(zone_idx, active);
		break;
	}

	return 0;
}

int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
{
	struct btrfs_fs_info *fs_info = cache->fs_info;
	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
	struct extent_map *em;
	struct map_lookup *map;
	struct btrfs_device *device;
	u64 logical = cache->start;
	u64 length = cache->length;
	struct zone_info *zone_info = NULL;
	int ret;
	int i;
	unsigned int nofs_flag;
	unsigned long *active = NULL;
	u64 last_alloc = 0;
	u32 num_sequential = 0, num_conventional = 0;
@@ -1345,98 +1429,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
	}

	for (i = 0; i < map->num_stripes; i++) {
		struct zone_info *info = &zone_info[i];
		bool is_sequential;
		struct blk_zone zone;
		struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
		int dev_replace_is_ongoing = 0;

		device = map->stripes[i].dev;
		info->physical = map->stripes[i].physical;

		if (device->bdev == NULL) {
			info->alloc_offset = WP_MISSING_DEV;
			continue;
		}

		is_sequential = btrfs_dev_is_sequential(device, info->physical);
		if (is_sequential)
			num_sequential++;
		else
			num_conventional++;

		/*
		 * Consider a zone as active if we can allow any number of
		 * active zones.
		 */
		if (!device->zone_info->max_active_zones)
			__set_bit(i, active);

		if (!is_sequential) {
			info->alloc_offset = WP_CONVENTIONAL;
			continue;
		}

		/*
		 * This zone will be used for allocation, so mark this zone
		 * non-empty.
		 */
		btrfs_dev_clear_zone_empty(device, info->physical);

		down_read(&dev_replace->rwsem);
		dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
		if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical);
		up_read(&dev_replace->rwsem);

		/*
		 * The group is mapped to a sequential zone. Get the zone write
		 * pointer to determine the allocation offset within the zone.
		 */
		WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size));
		nofs_flag = memalloc_nofs_save();
		ret = btrfs_get_dev_zone(device, info->physical, &zone);
		memalloc_nofs_restore(nofs_flag);
		if (ret == -EIO || ret == -EOPNOTSUPP) {
			ret = 0;
			info->alloc_offset = WP_MISSING_DEV;
			continue;
		} else if (ret) {
			goto out;
		}

		if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
			btrfs_err_in_rcu(fs_info,
	"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
				zone.start << SECTOR_SHIFT,
				rcu_str_deref(device->name), device->devid);
			ret = -EIO;
		ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map);
		if (ret)
			goto out;
		}

		info->capacity = (zone.capacity << SECTOR_SHIFT);

		switch (zone.cond) {
		case BLK_ZONE_COND_OFFLINE:
		case BLK_ZONE_COND_READONLY:
			btrfs_err(fs_info,
		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
				  info->physical >> device->zone_info->zone_size_shift,
				  rcu_str_deref(device->name), device->devid);
			info->alloc_offset = WP_MISSING_DEV;
			break;
		case BLK_ZONE_COND_EMPTY:
			info->alloc_offset = 0;
			break;
		case BLK_ZONE_COND_FULL:
			info->alloc_offset = info->capacity;
			break;
		default:
			/* Partially used zone */
			info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT);
			__set_bit(i, active);
			break;
		}
		if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
			num_conventional++;
		else
			num_sequential++;
	}

	if (num_sequential > 0)