Commit 00965807 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba
Browse files

btrfs: scrub: introduce error reporting functionality for scrub_stripe



The new helper, scrub_stripe_report_errors(), will report the result of
the scrub to system log.

The main reporting is done by introducing a new helper,
scrub_print_common_warning(), which is mostly the same content from
scrub_print_wanring(), but without the need for a scrub_block.

Since we're reporting the errors, it's the perfect time to update the
scrub stats too.

Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Reviewed-by: default avatarDavid Sterba <dsterba@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 058e09e6
Loading
Loading
Loading
Loading
+157 −11
Original line number Diff line number Diff line
@@ -105,6 +105,7 @@ enum scrub_stripe_flags {
 * Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
 */
struct scrub_stripe {
	struct scrub_ctx *sctx;
	struct btrfs_block_group *bg;

	struct page *pages[SCRUB_STRIPE_PAGES];
@@ -119,6 +120,13 @@ struct scrub_stripe {
	/* Should be BTRFS_STRIPE_LEN / sectorsize. */
	u16 nr_sectors;

	/*
	 * How many data/meta extents are in this stripe.  Only for scrub status
	 * reporting purposes.
	 */
	u16 nr_data_extents;
	u16 nr_meta_extents;

	atomic_t pending_io;
	wait_queue_head_t io_wait;
	wait_queue_head_t repair_wait;
@@ -377,6 +385,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
	kfree(stripe->csums);
	stripe->sectors = NULL;
	stripe->csums = NULL;
	stripe->sctx = NULL;
	stripe->state = 0;
}

@@ -1046,10 +1055,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
	return 0;
}

static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
static void scrub_print_common_warning(const char *errstr, struct btrfs_device *dev,
				       bool is_super, u64 logical, u64 physical)
{
	struct btrfs_device *dev;
	struct btrfs_fs_info *fs_info;
	struct btrfs_fs_info *fs_info = dev->fs_info;
	struct btrfs_path *path;
	struct btrfs_key found_key;
	struct extent_buffer *eb;
@@ -1062,22 +1071,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
	u8 ref_level = 0;
	int ret;

	WARN_ON(sblock->sector_count < 1);
	dev = sblock->dev;
	fs_info = sblock->sctx->fs_info;

	/* Super block error, no need to search extent tree. */
	if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
	if (is_super) {
		btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
			errstr, btrfs_dev_name(dev), sblock->physical);
				  errstr, btrfs_dev_name(dev), physical);
		return;
	}
	path = btrfs_alloc_path();
	if (!path)
		return;

	swarn.physical = sblock->physical;
	swarn.logical = sblock->logical;
	swarn.physical = physical;
	swarn.logical = logical;
	swarn.errstr = errstr;
	swarn.dev = NULL;

@@ -1126,6 +1131,13 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
	btrfs_free_path(path);
}

static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
{
	scrub_print_common_warning(errstr, sblock->dev,
			sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER,
			sblock->logical, sblock->physical);
}

static inline void scrub_get_recover(struct scrub_recover *recover)
{
	refcount_inc(&recover->refs);
@@ -2453,6 +2465,131 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
	}
}

static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
				       struct scrub_stripe *stripe)
{
	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
				      DEFAULT_RATELIMIT_BURST);
	struct btrfs_fs_info *fs_info = sctx->fs_info;
	struct btrfs_device *dev = NULL;
	u64 physical = 0;
	int nr_data_sectors = 0;
	int nr_meta_sectors = 0;
	int nr_nodatacsum_sectors = 0;
	int nr_repaired_sectors = 0;
	int sector_nr;

	/*
	 * Init needed infos for error reporting.
	 *
	 * Although our scrub_stripe infrastucture is mostly based on btrfs_submit_bio()
	 * thus no need for dev/physical, error reporting still needs dev and physical.
	 */
	if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) {
		u64 mapped_len = fs_info->sectorsize;
		struct btrfs_io_context *bioc = NULL;
		int stripe_index = stripe->mirror_num - 1;
		int ret;

		/* For scrub, our mirror_num should always start at 1. */
		ASSERT(stripe->mirror_num >= 1);
		ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
				       stripe->logical, &mapped_len, &bioc);
		/*
		 * If we failed, dev will be NULL, and later detailed reports
		 * will just be skipped.
		 */
		if (ret < 0)
			goto skip;
		physical = bioc->stripes[stripe_index].physical;
		dev = bioc->stripes[stripe_index].dev;
		btrfs_put_bioc(bioc);
	}

skip:
	for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
		bool repaired = false;

		if (stripe->sectors[sector_nr].is_metadata) {
			nr_meta_sectors++;
		} else {
			nr_data_sectors++;
			if (!stripe->sectors[sector_nr].csum)
				nr_nodatacsum_sectors++;
		}

		if (test_bit(sector_nr, &stripe->init_error_bitmap) &&
		    !test_bit(sector_nr, &stripe->error_bitmap)) {
			nr_repaired_sectors++;
			repaired = true;
		}

		/* Good sector from the beginning, nothing need to be done. */
		if (!test_bit(sector_nr, &stripe->init_error_bitmap))
			continue;

		/*
		 * Report error for the corrupted sectors.  If repaired, just
		 * output the message of repaired message.
		 */
		if (repaired) {
			if (dev) {
				btrfs_err_rl_in_rcu(fs_info,
			"fixed up error at logical %llu on dev %s physical %llu",
					    stripe->logical, btrfs_dev_name(dev),
					    physical);
			} else {
				btrfs_err_rl_in_rcu(fs_info,
			"fixed up error at logical %llu on mirror %u",
					    stripe->logical, stripe->mirror_num);
			}
			continue;
		}

		/* The remaining are all for unrepaired. */
		if (dev) {
			btrfs_err_rl_in_rcu(fs_info,
	"unable to fixup (regular) error at logical %llu on dev %s physical %llu",
					    stripe->logical, btrfs_dev_name(dev),
					    physical);
		} else {
			btrfs_err_rl_in_rcu(fs_info,
	"unable to fixup (regular) error at logical %llu on mirror %u",
					    stripe->logical, stripe->mirror_num);
		}

		if (test_bit(sector_nr, &stripe->io_error_bitmap))
			if (__ratelimit(&rs) && dev)
				scrub_print_common_warning("i/o error", dev, false,
						     stripe->logical, physical);
		if (test_bit(sector_nr, &stripe->csum_error_bitmap))
			if (__ratelimit(&rs) && dev)
				scrub_print_common_warning("checksum error", dev, false,
						     stripe->logical, physical);
		if (test_bit(sector_nr, &stripe->meta_error_bitmap))
			if (__ratelimit(&rs) && dev)
				scrub_print_common_warning("header error", dev, false,
						     stripe->logical, physical);
	}

	spin_lock(&sctx->stat_lock);
	sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
	sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
	sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
	sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
	sctx->stat.no_csum += nr_nodatacsum_sectors;
	sctx->stat.read_errors +=
		bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors);
	sctx->stat.csum_errors +=
		bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors);
	sctx->stat.verify_errors +=
		bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors);
	sctx->stat.uncorrectable_errors +=
		bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
	sctx->stat.corrected_errors += nr_repaired_sectors;
	spin_unlock(&sctx->stat_lock);
}

/*
 * The main entrance for all read related scrub work, including:
 *
@@ -2526,6 +2663,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
			goto out;
	}
out:
	scrub_stripe_report_errors(stripe->sctx, stripe);
	set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
	wake_up(&stripe->repair_wait);
}
@@ -4189,6 +4327,10 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
	if (ret)
		goto out;
	get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
	if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
		stripe->nr_meta_extents++;
	if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
		stripe->nr_data_extents++;
	cur_logical = max(extent_start, cur_logical);

	/*
@@ -4222,6 +4364,10 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
		}
		get_extent_info(&path, &extent_start, &extent_len,
				&extent_flags, &extent_gen);
		if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
			stripe->nr_meta_extents++;
		if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
			stripe->nr_data_extents++;
		fill_one_extent_info(fs_info, stripe, extent_start, extent_len,
				     extent_flags, extent_gen);
		cur_logical = extent_start + extent_len;