btrfs: raid56: switch write path to rmw_rbio() (93723095) · Commits · EulixOS / Software / Kernel

fs/btrfs/raid56.c

+42 −308

Original line number	Diff line number	Diff line
		@@ -64,9 +64,9 @@ struct sector_ptr {
		unsigned int uptodate:8;
		};

		static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
		static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
		static void rmw_work(struct work_struct *work);
		static void rmw_rbio_work(struct work_struct *work);
		static void rmw_rbio_work_locked(struct work_struct *work);
		static int fail_bio_stripe(struct btrfs_raid_bio rbio, struct bio bio);
		static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
		static void index_rbio_pages(struct btrfs_raid_bio *rbio);
		@@ -816,7 +816,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
		start_async_work(next, recover_rbio_work_locked);
		} else if (next->operation == BTRFS_RBIO_WRITE) {
		steal_rbio(rbio, next);
		start_async_work(next, rmw_work);
		start_async_work(next, rmw_rbio_work_locked);
		} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
		steal_rbio(rbio, next);
		start_async_work(next, scrub_parity_work);
		@@ -1108,23 +1108,6 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
		return 0;
		}

		/*
		* while we're doing the read/modify/write cycle, we could
		* have errors in reading pages off the disk. This checks
		* for errors and if we're not able to read the page it'll
		* trigger parity reconstruction. The rmw will be finished
		* after we've reconstructed the failed stripes
		*/
		static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
		{
		if (rbio->faila >= 0 \|\| rbio->failb >= 0) {
		BUG_ON(rbio->faila == rbio->real_stripes - 1);
		__raid56_parity_recover(rbio);
		} else {
		finish_rmw(rbio);
		}
		}

		static void index_one_bio(struct btrfs_raid_bio rbio, struct bio bio)
		{
		const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
		@@ -1601,31 +1584,6 @@ static void raid56_bio_end_io(struct bio *bio)
		&rbio->end_io_work);
		}

		/*
		* End io handler for the read phase of the RMW cycle. All the bios here are
		* physical stripe bios we've read from the disk so we can recalculate the
		* parity of the stripe.
		*
		* This will usually kick off finish_rmw once all the bios are read in, but it
		* may trigger parity reconstruction if we had any errors along the way
		*/
		static void raid56_rmw_end_io_work(struct work_struct *work)
		{
		struct btrfs_raid_bio *rbio =
		container_of(work, struct btrfs_raid_bio, end_io_work);

		if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
		rbio_orig_end_io(rbio, BLK_STS_IOERR);
		return;
		}

		/*
		* This will normally call finish_rmw to start our write but if there
		* are any failed stripes we'll reconstruct from parity first.
		*/
		validate_rbio_for_rmw(rbio);
		}

		static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
		struct bio_list *bio_list)
		{
		@@ -1686,122 +1644,6 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
		return 0;
		}

		/*
		* the stripe must be locked by the caller. It will
		* unlock after all the writes are done
		*/
		static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
		{
		int bios_to_read = 0;
		struct bio_list bio_list;
		int ret;
		struct bio *bio;

		bio_list_init(&bio_list);

		ret = alloc_rbio_pages(rbio);
		if (ret)
		goto cleanup;

		index_rbio_pages(rbio);

		atomic_set(&rbio->error, 0);

		ret = rmw_assemble_read_bios(rbio, &bio_list);
		if (ret < 0)
		goto cleanup;

		bios_to_read = bio_list_size(&bio_list);
		if (!bios_to_read) {
		/*
		* this can happen if others have merged with
		* us, it means there is nothing left to read.
		* But if there are missing devices it may not be
		* safe to do the full stripe write yet.
		*/
		goto finish;
		}

		/*
		* The bioc may be freed once we submit the last bio. Make sure not to
		* touch it after that.
		*/
		atomic_set(&rbio->stripes_pending, bios_to_read);
		INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
		while ((bio = bio_list_pop(&bio_list))) {
		bio->bi_end_io = raid56_bio_end_io;

		if (trace_raid56_read_partial_enabled()) {
		struct raid56_bio_trace_info trace_info = { 0 };

		bio_get_trace_info(rbio, bio, &trace_info);
		trace_raid56_read_partial(rbio, bio, &trace_info);
		}
		submit_bio(bio);
		}
		/* the actual write will happen once the reads are done */
		return 0;

		cleanup:
		rbio_orig_end_io(rbio, BLK_STS_IOERR);

		while ((bio = bio_list_pop(&bio_list)))
		bio_put(bio);

		return -EIO;

		finish:
		validate_rbio_for_rmw(rbio);
		return 0;
		}

		/*
		* if the upper layers pass in a full stripe, we thank them by only allocating
		* enough pages to hold the parity, and sending it all down quickly.
		*/
		static int full_stripe_write(struct btrfs_raid_bio *rbio)
		{
		int ret;

		ret = alloc_rbio_parity_pages(rbio);
		if (ret)
		return ret;

		ret = lock_stripe_add(rbio);
		if (ret == 0)
		finish_rmw(rbio);
		return 0;
		}

		/*
		* partial stripe writes get handed over to async helpers.
		* We're really hoping to merge a few more writes into this
		* rbio before calculating new parity
		*/
		static int partial_stripe_write(struct btrfs_raid_bio *rbio)
		{
		int ret;

		ret = lock_stripe_add(rbio);
		if (ret == 0)
		start_async_work(rbio, rmw_work);
		return 0;
		}

		/*
		* sometimes while we were reading from the drive to
		* recalculate parity, enough new bios come into create
		* a full stripe. So we do a check here to see if we can
		* go directly to finish_rmw
		*/
		static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
		{
		/* head off into rmw land if we don't have a full stripe */
		if (!rbio_is_full(rbio))
		return partial_stripe_write(rbio);
		return full_stripe_write(rbio);
		}

		/*
		* We use plugging call backs to collect full stripes.
		* Any time we get a partial stripe write while plugged
		@@ -1836,28 +1678,22 @@ static int plug_cmp(void priv, const struct list_head a,
		return 0;
		}

		static void run_plug(struct btrfs_plug_cb *plug)
		static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
		{
		struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb);
		struct btrfs_raid_bio *cur;
		struct btrfs_raid_bio *last = NULL;

		/*
		* sort our plug list then try to merge
		* everything we can in hopes of creating full
		* stripes.
		*/
		list_sort(NULL, &plug->rbio_list, plug_cmp);

		while (!list_empty(&plug->rbio_list)) {
		cur = list_entry(plug->rbio_list.next,
		struct btrfs_raid_bio, plug_list);
		list_del_init(&cur->plug_list);

		if (rbio_is_full(cur)) {
		int ret;

		/* we have a full stripe, send it down */
		ret = full_stripe_write(cur);
		BUG_ON(ret);
		/* We have a full stripe, queue it down. */
		start_async_work(cur, rmw_rbio_work);
		continue;
		}
		if (last) {
		@@ -1865,42 +1701,16 @@ static void run_plug(struct btrfs_plug_cb *plug)
		merge_rbio(last, cur);
		free_raid_bio(cur);
		continue;

		}
		__raid56_parity_write(last);
		start_async_work(last, rmw_rbio_work);
		}
		last = cur;
		}
		if (last) {
		__raid56_parity_write(last);
		}
		if (last)
		start_async_work(last, rmw_rbio_work);
		kfree(plug);
		}

		/*
		* if the unplug comes from schedule, we have to push the
		* work off to a helper thread
		*/
		static void unplug_work(struct work_struct *work)
		{
		struct btrfs_plug_cb *plug;
		plug = container_of(work, struct btrfs_plug_cb, work);
		run_plug(plug);
		}

		static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
		{
		struct btrfs_plug_cb *plug;
		plug = container_of(cb, struct btrfs_plug_cb, cb);

		if (from_schedule) {
		INIT_WORK(&plug->work, unplug_work);
		queue_work(plug->info->rmw_workers, &plug->work);
		return;
		}
		run_plug(plug);
		}

		/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
		static void rbio_add_bio(struct btrfs_raid_bio rbio, struct bio orig_bio)
		{
		@@ -1948,19 +1758,13 @@ void raid56_parity_write(struct bio bio, struct btrfs_io_context bioc)
		rbio_add_bio(rbio, bio);

		/*
		* don't plug on full rbios, just get them out the door
		* Don't plug on full rbios, just get them out the door
		* as quickly as we can
		*/
		if (rbio_is_full(rbio)) {
		ret = full_stripe_write(rbio);
		if (ret) {
		free_raid_bio(rbio);
		goto fail;
		}
		return;
		}
		if (rbio_is_full(rbio))
		goto queue_rbio;

		cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
		cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
		if (cb) {
		plug = container_of(cb, struct btrfs_plug_cb, cb);
		if (!plug->info) {
		@@ -1968,13 +1772,14 @@ void raid56_parity_write(struct bio bio, struct btrfs_io_context bioc)
		INIT_LIST_HEAD(&plug->rbio_list);
		}
		list_add_tail(&rbio->plug_list, &plug->rbio_list);
		} else {
		ret = __raid56_parity_write(rbio);
		if (ret) {
		free_raid_bio(rbio);
		goto fail;
		}
		return;
		}
		queue_rbio:
		/*
		* Either we don't have any existing plug, or we're doing a full stripe,
		* can queue the rmw work now.
		*/
		start_async_work(rbio, rmw_rbio_work);

		return;

		@@ -2217,21 +2022,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
		}
		}

		/*
		* This is called only for stripes we've read from disk to reconstruct the
		* parity.
		*/
		static void raid_recover_end_io_work(struct work_struct *work)
		{
		struct btrfs_raid_bio *rbio =
		container_of(work, struct btrfs_raid_bio, end_io_work);

		if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
		rbio_orig_end_io(rbio, BLK_STS_IOERR);
		else
		__raid_recover_end_io(rbio);
		}

		static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
		struct bio_list *bio_list)
		{
		@@ -2348,79 +2138,6 @@ static void recover_rbio_work_locked(struct work_struct *work)
		rbio_orig_end_io(rbio, errno_to_blk_status(ret));
		}

		/*
		* reads everything we need off the disk to reconstruct
		* the parity. endio handlers trigger final reconstruction
		* when the IO is done.
		*
		* This is used both for reads from the higher layers and for
		* parity construction required to finish a rmw cycle.
		*/
		static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
		{
		int bios_to_read = 0;
		struct bio_list bio_list;
		int ret;
		struct bio *bio;

		bio_list_init(&bio_list);

		ret = alloc_rbio_pages(rbio);
		if (ret)
		goto cleanup;

		atomic_set(&rbio->error, 0);

		ret = recover_assemble_read_bios(rbio, &bio_list);
		if (ret < 0)
		goto cleanup;

		bios_to_read = bio_list_size(&bio_list);
		if (!bios_to_read) {
		/*
		* we might have no bios to read just because the pages
		* were up to date, or we might have no bios to read because
		* the devices were gone.
		*/
		if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
		__raid_recover_end_io(rbio);
		return 0;
		} else {
		goto cleanup;
		}
		}

		/*
		* The bioc may be freed once we submit the last bio. Make sure not to
		* touch it after that.
		*/
		atomic_set(&rbio->stripes_pending, bios_to_read);
		INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
		while ((bio = bio_list_pop(&bio_list))) {
		bio->bi_end_io = raid56_bio_end_io;

		if (trace_raid56_scrub_read_recover_enabled()) {
		struct raid56_bio_trace_info trace_info = { 0 };

		bio_get_trace_info(rbio, bio, &trace_info);
		trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
		}
		submit_bio(bio);
		}

		return 0;

		cleanup:
		if (rbio->operation == BTRFS_RBIO_READ_REBUILD \|\|
		rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
		rbio_orig_end_io(rbio, BLK_STS_IOERR);

		while ((bio = bio_list_pop(&bio_list)))
		bio_put(bio);

		return -EIO;
		}

		/*
		* the main entry point for reads from the higher layers. This
		* is really only called when the normal read path had a failure,
		@@ -2529,7 +2246,7 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
		}
		}

		int rmw_rbio(struct btrfs_raid_bio *rbio)
		static int rmw_rbio(struct btrfs_raid_bio *rbio)
		{
		struct bio_list bio_list;
		int sectornr;
		@@ -2615,12 +2332,29 @@ int rmw_rbio(struct btrfs_raid_bio *rbio)
		return ret;
		}

		static void rmw_work(struct work_struct *work)
		static void rmw_rbio_work(struct work_struct *work)
		{
		struct btrfs_raid_bio *rbio;
		int ret;

		rbio = container_of(work, struct btrfs_raid_bio, work);
		raid56_rmw_stripe(rbio);

		ret = lock_stripe_add(rbio);
		if (ret == 0) {
		ret = rmw_rbio(rbio);
		rbio_orig_end_io(rbio, errno_to_blk_status(ret));
		}
		}

		static void rmw_rbio_work_locked(struct work_struct *work)
		{
		struct btrfs_raid_bio *rbio;
		int ret;

		rbio = container_of(work, struct btrfs_raid_bio, work);

		ret = rmw_rbio(rbio);
		rbio_orig_end_io(rbio, errno_to_blk_status(ret));
		}

		/*

fs/btrfs/raid56.h

+0 −5

Original line number	Diff line number	Diff line
		@@ -185,9 +185,4 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
		int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
		void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);

		/*
		* Placeholder definition to avoid warning, will be removed when
		* the full write path is migrated.
		*/
		int rmw_rbio(struct btrfs_raid_bio *rbio);
		#endif