Commit 9c5ff9b4 authored by Qu Wenruo's avatar Qu Wenruo Committed by David Sterba
Browse files

btrfs: raid56: extract the vertical stripe recovery code into recover_vertical()



This refactor includes the following behavior change first:

- Don't error out if only P/Q is corrupted

  The old code will directly error out if only P/Q is corrupted.
  Although it is an logical error if we go into rebuild path with
  only P/Q corrupted, there is no need to error out.

  Just skip the rebuild and return the already good data.

Then comes the following refactor which shouldn't cause behavior
changes:

- Introduce a helper to do vertical stripe recovery

  This not only reduce one indent level, but also paves the road for
  later data checksum verification in RMW cycles.

- Sort rbio->faila/b before recovery

  So we don't need to do the same swap every vertical stripe

- Replace a BUG_ON() with ASSERT()

  Or checkpatch won't let me pass.

- Mark recovered sectors uptodate after the recover loop

- Do the cleanup for pointers unconditionally

  We only need to initialize @pointers and @unmap_array to NULL, so
  we can safely free them unconditionally.

- Mark the repaired sector uptodate in recover_vertical()

Signed-off-by: default avatarQu Wenruo <wqu@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent ee5f017d
Loading
Loading
Loading
Loading
+149 −136
Original line number Diff line number Diff line
@@ -1887,100 +1887,64 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
}

/*
 * all parity reconstruction happens here.  We've read in everything
 * we can find from the drives and this does the heavy lifting of
 * sorting the good from the bad.
 * Recover a vertical stripe specified by @sector_nr.
 * @*pointers are the pre-allocated pointers by the caller, so we don't
 * need to allocate/free the pointers again and again.
 */
static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
static void recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
			     void **pointers, void **unmap_array)
{
	const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
	int sectornr, stripe;
	void **pointers;
	void **unmap_array;
	int faila = -1, failb = -1;
	blk_status_t err;
	int i;

	/*
	 * This array stores the pointer for each sector, thus it has the extra
	 * pgoff value added from each sector
	 */
	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
	if (!pointers) {
		err = BLK_STS_RESOURCE;
		goto cleanup_io;
	}

	/*
	 * Store copy of pointers that does not get reordered during
	 * reconstruction so that kunmap_local works.
	 */
	unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
	if (!unmap_array) {
		err = BLK_STS_RESOURCE;
		goto cleanup_pointers;
	}

	faila = rbio->faila;
	failb = rbio->failb;

	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
		spin_lock_irq(&rbio->bio_list_lock);
		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
		spin_unlock_irq(&rbio->bio_list_lock);
	}

	index_rbio_pages(rbio);

	for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
	struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
	struct sector_ptr *sector;
	const u32 sectorsize = fs_info->sectorsize;
	const int faila = rbio->faila;
	const int failb = rbio->failb;
	int stripe_nr;

	/*
	 * Now we just use bitmap to mark the horizontal stripes in
	 * which we have data when doing parity scrub.
	 */
	if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
		    !test_bit(sectornr, &rbio->dbitmap))
			continue;
	    !test_bit(sector_nr, &rbio->dbitmap))
		return;

	/*
	 * Setup our array of pointers with sectors from each stripe
	 *
	 * NOTE: store a duplicate array of pointers to preserve the
		 * pointer order
	 * pointer order.
	 */
		for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
	for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
		/*
		 * If we're rebuilding a read, we have to use
		 * pages from the bio list
		 */
		if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
		     rbio->operation == BTRFS_RBIO_REBUILD_MISSING) &&
			    (stripe == faila || stripe == failb)) {
				sector = sector_in_rbio(rbio, stripe, sectornr, 0);
		    (stripe_nr == faila || stripe_nr == failb)) {
			sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
		} else {
				sector = rbio_stripe_sector(rbio, stripe, sectornr);
			sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
		}
		ASSERT(sector->page);
			pointers[stripe] = kmap_local_page(sector->page) +
		pointers[stripe_nr] = kmap_local_page(sector->page) +
				   sector->pgoff;
			unmap_array[stripe] = pointers[stripe];
		unmap_array[stripe_nr] = pointers[stripe_nr];
	}

	/* All raid6 handling here */
	if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
		/* Single failure, rebuild from parity raid5 style */
		if (failb < 0) {
				if (faila == rbio->nr_data) {
			if (faila == rbio->nr_data)
				/*
				 * Just the P stripe has failed, without
				 * a bad data or Q stripe.
					 * TODO, we should redo the xor here.
				 * We have nothing to do, just skip the
				 * recovery for this stripe.
				 */
					err = BLK_STS_IOERR;
				goto cleanup;
				}
			/*
			 * a single failure in raid6 is rebuilt
			 * in the pstripe code below
@@ -1988,83 +1952,132 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
			goto pstripe;
		}

			/* make sure our ps and qs are in order */
			if (faila > failb)
				swap(faila, failb);

			/* if the q stripe is failed, do a pstripe reconstruction
			 * from the xors.
		/*
		 * If the q stripe is failed, do a pstripe reconstruction from
		 * the xors.
		 * If both the q stripe and the P stripe are failed, we're
		 * here due to a crc mismatch and we can't give them the
			 * data they want
		 * data they want.
		 */
		if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) {
			if (rbio->bioc->raid_map[faila] ==
				    RAID5_P_STRIPE) {
					err = BLK_STS_IOERR;
			    RAID5_P_STRIPE)
				/*
				 * Only P and Q are corrupted.
				 * We only care about data stripes recovery,
				 * can skip this vertical stripe.
				 */
				goto cleanup;
				}
			/*
				 * otherwise we have one bad data stripe and
			 * Otherwise we have one bad data stripe and
			 * a good P stripe.  raid5!
			 */
			goto pstripe;
		}

		if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) {
				raid6_datap_recov(rbio->real_stripes,
						  sectorsize, faila, pointers);
			raid6_datap_recov(rbio->real_stripes, sectorsize,
					  faila, pointers);
		} else {
				raid6_2data_recov(rbio->real_stripes,
						  sectorsize, faila, failb,
						  pointers);
			raid6_2data_recov(rbio->real_stripes, sectorsize,
					  faila, failb, pointers);
		}
	} else {
		void *p;

			/* rebuild from P stripe here (raid5 or raid6) */
			BUG_ON(failb != -1);
		/* Rebuild from P stripe here (raid5 or raid6). */
		ASSERT(failb == -1);
pstripe:
		/* Copy parity block into failed block to start with */
		memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize);

			/* rearrange the pointer array */
		/* Rearrange the pointer array */
		p = pointers[faila];
			for (stripe = faila; stripe < rbio->nr_data - 1; stripe++)
				pointers[stripe] = pointers[stripe + 1];
		for (stripe_nr = faila; stripe_nr < rbio->nr_data - 1;
		     stripe_nr++)
			pointers[stripe_nr] = pointers[stripe_nr + 1];
		pointers[rbio->nr_data - 1] = p;

			/* xor in the rest */
		/* Xor in the rest */
		run_xor(pointers, rbio->nr_data - 1, sectorsize);

	}

	/*
	 * No matter if this is a RMW or recovery, we should have all
		 * failed sectors repaired, thus they are now uptodate.
	 * failed sectors repaired in the vertical stripe, thus they are now
	 * uptodate.
	 * Especially if we determine to cache the rbio, we need to
	 * have at least all data sectors uptodate.
	 */
		for (i = 0;  i < rbio->stripe_nsectors; i++) {
			if (faila != -1) {
				sector = rbio_stripe_sector(rbio, faila, i);
	if (rbio->faila >= 0) {
		sector = rbio_stripe_sector(rbio, rbio->faila, sector_nr);
		sector->uptodate = 1;
	}
			if (failb != -1) {
				sector = rbio_stripe_sector(rbio, failb, i);
	if (rbio->failb >= 0) {
		sector = rbio_stripe_sector(rbio, rbio->failb, sector_nr);
		sector->uptodate = 1;
	}

cleanup:
	for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
		kunmap_local(unmap_array[stripe_nr]);
}

/*
 * all parity reconstruction happens here.  We've read in everything
 * we can find from the drives and this does the heavy lifting of
 * sorting the good from the bad.
 */
static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
{
	int sectornr;
	void **pointers = NULL;
	void **unmap_array = NULL;
	blk_status_t err;

	/*
	 * This array stores the pointer for each sector, thus it has the extra
	 * pgoff value added from each sector
	 */
	pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
	if (!pointers) {
		err = BLK_STS_RESOURCE;
		goto cleanup;
	}

	/*
	 * Store copy of pointers that does not get reordered during
	 * reconstruction so that kunmap_local works.
	 */
	unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
	if (!unmap_array) {
		err = BLK_STS_RESOURCE;
		goto cleanup;
	}
		for (stripe = rbio->real_stripes - 1; stripe >= 0; stripe--)
			kunmap_local(unmap_array[stripe]);

	/* Make sure faila and fail b are in order. */
	if (rbio->faila >= 0 && rbio->failb >= 0 && rbio->faila > rbio->failb)
		swap(rbio->faila, rbio->failb);

	if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
	    rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
		spin_lock_irq(&rbio->bio_list_lock);
		set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
		spin_unlock_irq(&rbio->bio_list_lock);
	}

	index_rbio_pages(rbio);

	for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++)
		recover_vertical(rbio, sectornr, pointers, unmap_array);

	err = BLK_STS_OK;

cleanup:
	kfree(unmap_array);
cleanup_pointers:
	kfree(pointers);

cleanup_io:
	/*
	 * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a
	 * valid rbio which is consistent with ondisk content, thus such a