btrfs: raid56: switch scrub path to use a single function (6bfd0133) · Commits · EulixOS / Software / Kernel

fs/btrfs/raid56.c

+79 −318

Original line number	Diff line number	Diff line
		@@ -64,7 +64,6 @@ struct sector_ptr {
		unsigned int uptodate:8;
		};

		static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
		static void rmw_rbio_work(struct work_struct *work);
		static void rmw_rbio_work_locked(struct work_struct *work);
		static int fail_bio_stripe(struct btrfs_raid_bio rbio, struct bio bio);
		@@ -72,9 +71,8 @@ static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
		static void index_rbio_pages(struct btrfs_raid_bio *rbio);
		static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);

		static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
		int need_check);
		static void scrub_parity_work(struct work_struct *work);
		static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
		static void scrub_rbio_work_locked(struct work_struct *work);

		static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
		{
		@@ -819,7 +817,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
		start_async_work(next, rmw_rbio_work_locked);
		} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
		steal_rbio(rbio, next);
		start_async_work(next, scrub_parity_work);
		start_async_work(next, scrub_rbio_work_locked);
		}

		goto done_nolock;
		@@ -880,35 +878,6 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
		rbio_endio_bio_list(extra, err);
		}

		/*
		* end io function used by finish_rmw. When we finally
		* get here, we've written a full stripe
		*/
		static void raid_write_end_io(struct bio *bio)
		{
		struct btrfs_raid_bio *rbio = bio->bi_private;
		blk_status_t err = bio->bi_status;
		int max_errors;

		if (err)
		fail_bio_stripe(rbio, bio);

		bio_put(bio);

		if (!atomic_dec_and_test(&rbio->stripes_pending))
		return;

		err = BLK_STS_OK;

		/* OK, we have read all the stripes we need to. */
		max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ?
		0 : rbio->bioc->max_errors;
		if (atomic_read(&rbio->error) > max_errors)
		err = BLK_STS_IOERR;

		rbio_orig_end_io(rbio, err);
		}

		/*
		* Get a sector pointer specified by its @stripe_nr and @sector_nr.
		*
		@@ -1319,87 +1288,6 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
		return -EIO;
		}

		/*
		* this is called from one of two situations. We either
		* have a full stripe from the higher layers, or we've read all
		* the missing bits off disk.
		*
		* This will calculate the parity and then send down any
		* changed blocks.
		*/
		static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
		{
		/* The total sector number inside the full stripe. */
		/* Sector number inside a stripe. */
		int sectornr;
		struct bio_list bio_list;
		struct bio *bio;
		int ret;

		bio_list_init(&bio_list);

		/* We should have at least one data sector. */
		ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors));

		/* at this point we either have a full stripe,
		* or we've read the full stripe from the drive.
		* recalculate the parity and write the new results.
		*
		* We're not allowed to add any new bios to the
		* bio list here, anyone else that wants to
		* change this stripe needs to do their own rmw.
		*/
		spin_lock_irq(&rbio->bio_list_lock);
		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
		spin_unlock_irq(&rbio->bio_list_lock);

		atomic_set(&rbio->error, 0);

		/*
		* now that we've set rmw_locked, run through the
		* bio list one last time and map the page pointers
		*
		* We don't cache full rbios because we're assuming
		* the higher layers are unlikely to use this area of
		* the disk again soon. If they do use it again,
		* hopefully they will send another full bio.
		*/
		index_rbio_pages(rbio);
		if (!rbio_is_full(rbio))
		cache_rbio_pages(rbio);
		else
		clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);

		for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++)
		generate_pq_vertical(rbio, sectornr);

		ret = rmw_assemble_write_bios(rbio, &bio_list);
		if (ret < 0)
		goto cleanup;

		atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list));
		BUG_ON(atomic_read(&rbio->stripes_pending) == 0);

		while ((bio = bio_list_pop(&bio_list))) {
		bio->bi_end_io = raid_write_end_io;

		if (trace_raid56_write_stripe_enabled()) {
		struct raid56_bio_trace_info trace_info = { 0 };

		bio_get_trace_info(rbio, bio, &trace_info);
		trace_raid56_write_stripe(rbio, bio, &trace_info);
		}
		submit_bio(bio);
		}
		return;

		cleanup:
		rbio_orig_end_io(rbio, BLK_STS_IOERR);

		while ((bio = bio_list_pop(&bio_list)))
		bio_put(bio);
		}

		/*
		* helper to find the stripe number for a given bio. Used to figure out which
		* stripe has failed. This expects the bio to correspond to a physical disk,
		@@ -1568,22 +1456,6 @@ static void submit_read_bios(struct btrfs_raid_bio *rbio,
		}
		}

		static void raid56_bio_end_io(struct bio *bio)
		{
		struct btrfs_raid_bio *rbio = bio->bi_private;

		if (bio->bi_status)
		fail_bio_stripe(rbio, bio);
		else
		set_bio_pages_uptodate(rbio, bio);

		bio_put(bio);

		if (atomic_dec_and_test(&rbio->stripes_pending))
		queue_work(rbio->bioc->fs_info->endio_raid56_workers,
		&rbio->end_io_work);
		}

		static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
		struct bio_list *bio_list)
		{
		@@ -1968,60 +1840,6 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
		return ret;
		}

		/*
		* all parity reconstruction happens here. We've read in everything
		* we can find from the drives and this does the heavy lifting of
		* sorting the good from the bad.
		*/
		static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
		{
		int ret;

		ret = recover_sectors(rbio);

		/*
		* Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
		* valid rbio which is consistent with ondisk content, thus such a
		* valid rbio can be cached to avoid further disk reads.
		*/
		if (rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
		rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
		/*
		* - In case of two failures, where rbio->failb != -1:
		*
		* Do not cache this rbio since the above read reconstruction
		* (raid6_datap_recov() or raid6_2data_recov()) may have
		* changed some content of stripes which are not identical to
		* on-disk content any more, otherwise, a later write/recover
		* may steal stripe_pages from this rbio and end up with
		* corruptions or rebuild failures.
		*
		* - In case of single failure, where rbio->failb == -1:
		*
		* Cache this rbio iff the above read reconstruction is
		* executed without problems.
		*/
		if (!ret && rbio->failb < 0)
		cache_rbio_pages(rbio);
		else
		clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);

		rbio_orig_end_io(rbio, errno_to_blk_status(ret));
		} else if (!ret) {
		rbio->faila = -1;
		rbio->failb = -1;

		if (rbio->operation == BTRFS_RBIO_WRITE)
		finish_rmw(rbio);
		else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB)
		finish_parity_scrub(rbio, 0);
		else
		BUG();
		} else {
		rbio_orig_end_io(rbio, errno_to_blk_status(ret));
		}
		}

		static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
		struct bio_list *bio_list)
		{
		@@ -2449,8 +2267,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
		return 0;
		}

		static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
		int need_check)
		static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
		{
		struct btrfs_io_context *bioc = rbio->bioc;
		const u32 sectorsize = bioc->fs_info->sectorsize;
		@@ -2493,7 +2310,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,

		p_sector.page = alloc_page(GFP_NOFS);
		if (!p_sector.page)
		goto cleanup;
		return -ENOMEM;
		p_sector.pgoff = 0;
		p_sector.uptodate = 1;

		@@ -2503,7 +2320,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
		if (!q_sector.page) {
		__free_page(p_sector.page);
		p_sector.page = NULL;
		goto cleanup;
		return -ENOMEM;
		}
		q_sector.pgoff = 0;
		q_sector.uptodate = 1;
		@@ -2590,33 +2407,13 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
		}

		submit_write:
		nr_data = bio_list_size(&bio_list);
		if (!nr_data) {
		/* Every parity is right */
		rbio_orig_end_io(rbio, BLK_STS_OK);
		return;
		}

		atomic_set(&rbio->stripes_pending, nr_data);

		while ((bio = bio_list_pop(&bio_list))) {
		bio->bi_end_io = raid_write_end_io;

		if (trace_raid56_scrub_write_stripe_enabled()) {
		struct raid56_bio_trace_info trace_info = { 0 };

		bio_get_trace_info(rbio, bio, &trace_info);
		trace_raid56_scrub_write_stripe(rbio, bio, &trace_info);
		}
		submit_bio(bio);
		}
		return;
		submit_write_bios(rbio, &bio_list);
		return 0;

		cleanup:
		rbio_orig_end_io(rbio, BLK_STS_IOERR);

		while ((bio = bio_list_pop(&bio_list)))
		bio_put(bio);
		return ret;
		}

		static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
		@@ -2626,20 +2423,13 @@ static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
		return 0;
		}

		/*
		* While we're doing the parity check and repair, we could have errors
		* in reading pages off the disk. This checks for errors and if we're
		* not able to read the page it'll trigger parity reconstruction. The
		* parity scrub will be finished after we've reconstructed the failed
		* stripes
		*/
		static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
		static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
		{
		if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
		goto cleanup;

		if (rbio->faila >= 0 \|\| rbio->failb >= 0) {
		int dfail = 0, failp = -1;
		int ret;

		/* No error case should be already handled by the caller. */
		ASSERT(rbio->faila >= 0 \|\| rbio->failb >= 0);

		if (is_data_stripe(rbio, rbio->faila))
		dfail++;
		@@ -2657,16 +2447,14 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
		* (In the case of RAID5, we can not repair anything)
		*/
		if (dfail > rbio->bioc->max_errors - 1)
		goto cleanup;
		return -EIO;

		/*
		* If all data is good, only parity is correctly, just
		* repair the parity.
		*/
		if (dfail == 0) {
		finish_parity_scrub(rbio, 0);
		return;
		}
		if (dfail == 0)
		return 0;

		/*
		* Here means we got one corrupted data stripe and one
		@@ -2675,36 +2463,11 @@ static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio)
		* the data, or we can not repair the data stripe.
		*/
		if (failp != rbio->scrubp)
		goto cleanup;

		__raid_recover_end_io(rbio);
		} else {
		finish_parity_scrub(rbio, 1);
		}
		return;

		cleanup:
		rbio_orig_end_io(rbio, BLK_STS_IOERR);
		}

		/*
		* end io for the read phase of the rmw cycle. All the bios here are physical
		* stripe bios we've read from the disk so we can recalculate the parity of the
		* stripe.
		*
		* This will usually kick off finish_rmw once all the bios are read in, but it
		* may trigger parity reconstruction if we had any errors along the way
		*/
		static void raid56_parity_scrub_end_io_work(struct work_struct *work)
		{
		struct btrfs_raid_bio *rbio =
		container_of(work, struct btrfs_raid_bio, end_io_work);
		return -EIO;

		/*
		* This will normally call finish_rmw to start our write, but if there
		* are any failed stripes we'll reconstruct from parity first
		*/
		validate_rbio_for_parity_scrub(rbio);
		/* We have some corrupted sectors, need to repair them. */
		ret = recover_sectors(rbio);
		return ret;
		}

		static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
		@@ -2756,9 +2519,9 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio,
		return ret;
		}

		static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
		static int scrub_rbio(struct btrfs_raid_bio *rbio)
		{
		int bios_to_read = 0;
		bool need_check = false;
		struct bio_list bio_list;
		int ret;
		struct bio *bio;
		@@ -2774,61 +2537,59 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio)
		if (ret < 0)
		goto cleanup;

		bios_to_read = bio_list_size(&bio_list);
		if (!bios_to_read) {
		submit_read_bios(rbio, &bio_list);
		wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);

		if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
		ret = -EIO;
		goto cleanup;
		}
		/*
		* this can happen if others have merged with
		* us, it means there is nothing left to read.
		* But if there are missing devices it may not be
		* safe to do the full stripe write yet.
		* No error during read, can finish the scrub and need to verify the
		* P/Q sectors;
		*/
		if (atomic_read(&rbio->error) == 0) {
		need_check = true;
		goto finish;
		}

		/* We have some failures, need to recover the failed sectors first. */
		ret = recover_scrub_rbio(rbio);
		if (ret < 0)
		goto cleanup;

		finish:
		/*
		* The bioc may be freed once we submit the last bio. Make sure not to
		* touch it after that.
		* We have every sector properly prepared. Can finish the scrub
		* and writeback the good content.
		*/
		atomic_set(&rbio->stripes_pending, bios_to_read);
		INIT_WORK(&rbio->end_io_work, raid56_parity_scrub_end_io_work);
		while ((bio = bio_list_pop(&bio_list))) {
		bio->bi_end_io = raid56_bio_end_io;

		if (trace_raid56_scrub_read_enabled()) {
		struct raid56_bio_trace_info trace_info = { 0 };

		bio_get_trace_info(rbio, bio, &trace_info);
		trace_raid56_scrub_read(rbio, bio, &trace_info);
		}
		submit_bio(bio);
		}
		/* the actual write will happen once the reads are done */
		return;
		ret = finish_parity_scrub(rbio, need_check);
		wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
		if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
		ret = -EIO;
		return ret;

		cleanup:
		rbio_orig_end_io(rbio, BLK_STS_IOERR);

		while ((bio = bio_list_pop(&bio_list)))
		bio_put(bio);

		return;

		finish:
		validate_rbio_for_parity_scrub(rbio);
		return ret;
		}

		static void scrub_parity_work(struct work_struct *work)
		static void scrub_rbio_work_locked(struct work_struct *work)
		{
		struct btrfs_raid_bio *rbio;
		int ret;

		rbio = container_of(work, struct btrfs_raid_bio, work);
		raid56_parity_scrub_stripe(rbio);
		ret = scrub_rbio(rbio);
		rbio_orig_end_io(rbio, errno_to_blk_status(ret));
		}

		void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
		{
		if (!lock_stripe_add(rbio))
		start_async_work(rbio, scrub_parity_work);
		start_async_work(rbio, scrub_rbio_work_locked);
		}

		/* The following code is used for dev replace of a missing RAID 5/6 device. */