Commit 7d30c933 authored by Yu Kuai's avatar Yu Kuai Committed by Wentao Guan
Browse files

md/md-bitmap: move bitmap_{start, end}write to md upper layer

stable inclusion
from stable-v6.6.79
commit 96156eb5772629b741c3e1b440aaf66eca742bf6
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IBXANC

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=96156eb5772629b741c3e1b440aaf66eca742bf6

--------------------------------

commit cd5fc653381811f1e0ba65f5d169918cab61476f upstream.

There are two BUG reports that raid5 will hang at
bitmap_startwrite([1],[2]), root cause is that bitmap start write and end
write is unbalanced, it's not quite clear where, and while reviewing raid5
code, it's found that bitmap operations can be optimized. For example,
for a 4 disks raid5, with chunksize=8k, if user issue a IO (0 + 48k) to
the array:

┌────────────────────────────────────────────────────────────┐
│chunk 0                                                     │
│      ┌────────────┬─────────────┬─────────────┬────────────┼
│  sh0 │A0: 0 + 4k  │A1: 8k + 4k  │A2: 16k + 4k │A3: P       │
│      ┼────────────┼─────────────┼─────────────┼────────────┼
│  sh1 │B0: 4k + 4k │B1: 12k + 4k │B2: 20k + 4k │B3: P       │
┼──────┴────────────┴─────────────┴─────────────┴────────────┼
│chunk 1                                                     │
│      ┌────────────┬─────────────┬─────────────┬────────────┤
│  sh2 │C0: 24k + 4k│C1: 32k + 4k │C2: P        │C3: 40k + 4k│
│      ┼────────────┼─────────────┼─────────────┼────────────┼
│  sh3 │D0: 28k + 4k│D1: 36k + 4k │D2: P        │D3: 44k + 4k│
└──────┴────────────┴─────────────┴─────────────┴────────────┘

Before this patch, 4 stripe head will be used, and each sh will attach
bio for 3 disks, and each attached bio will trigger
bitmap_startwrite() once, which means total 12 times.
 - 3 times (0 + 4k), for (A0, A1 and A2)
 - 3 times (4 + 4k), for (B0, B1 and B2)
 - 3 times (8 + 4k), for (C0, C1 and C3)
 - 3 times (12 + 4k), for (D0, D1 and D3)

After this patch, md upper layer will calculate that IO range (0 + 48k)
is corresponding to the bitmap (0 + 16k), and call bitmap_startwrite()
just once.

Noted that this patch will align bitmap ranges to the chunks, for example,
if user issue a IO (0 + 4k) to array:

- Before this patch, 1 time (0 + 4k), for A0;
- After this patch, 1 time (0 + 8k) for chunk 0;

Usually, one bitmap bit will represent more than one disk chunk, and this
doesn't have any difference. And even if user really created a array
that one chunk contain multiple bits, the overhead is that more data
will be recovered after power failure.

Also remove STRIPE_BITMAP_PENDING since it's not used anymore.

[1] https://lore.kernel.org/all/CAJpMwyjmHQLvm6zg1cmQErttNNQPDAAXPKM3xgTjMhbfts986Q@mail.gmail.com/
[2] https://lore.kernel.org/all/ADF7D720-5764-4AF3-B68E-1845988737AA@flyingcircus.io/



Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20250109015145.158868-6-yukuai1@huaweicloud.com


Signed-off-by: default avatarSong Liu <song@kernel.org>
[There is no bitmap_operations, resolve conflicts by replacing
bitmap_ops->{startwrite, endwrite} with md_bitmap_{startwrite, endwrite}]
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 96156eb5772629b741c3e1b440aaf66eca742bf6)
Signed-off-by: default avatarWentao Guan <guanwentao@uniontech.com>
parent 7e4c47c7
Loading
Loading
Loading
Loading
+0 −2
Original line number Diff line number Diff line
@@ -1517,7 +1517,6 @@ int md_bitmap_startwrite(struct bitmap *bitmap, sector_t offset,
	}
	return 0;
}
EXPORT_SYMBOL_GPL(md_bitmap_startwrite);

void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
			unsigned long sectors)
@@ -1564,7 +1563,6 @@ void md_bitmap_endwrite(struct bitmap *bitmap, sector_t offset,
			sectors = 0;
	}
}
EXPORT_SYMBOL_GPL(md_bitmap_endwrite);

static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks,
			       int degraded)
+26 −0
Original line number Diff line number Diff line
@@ -8706,12 +8706,32 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);

static void md_bitmap_start(struct mddev *mddev,
			    struct md_io_clone *md_io_clone)
{
	if (mddev->pers->bitmap_sector)
		mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
					   &md_io_clone->sectors);

	md_bitmap_startwrite(mddev->bitmap, md_io_clone->offset,
			     md_io_clone->sectors);
}

static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
{
	md_bitmap_endwrite(mddev->bitmap, md_io_clone->offset,
			   md_io_clone->sectors);
}

static void md_end_clone_io(struct bio *bio)
{
	struct md_io_clone *md_io_clone = bio->bi_private;
	struct bio *orig_bio = md_io_clone->orig_bio;
	struct mddev *mddev = md_io_clone->mddev;

	if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
		md_bitmap_end(mddev, md_io_clone);

	if (bio->bi_status && !orig_bio->bi_status)
		orig_bio->bi_status = bio->bi_status;

@@ -8736,6 +8756,12 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
	if (blk_queue_io_stat(bdev->bd_disk->queue))
		md_io_clone->start_time = bio_start_io_acct(*bio);

	if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
		md_io_clone->offset = (*bio)->bi_iter.bi_sector;
		md_io_clone->sectors = bio_sectors(*bio);
		md_bitmap_start(mddev, md_io_clone);
	}

	clone->bi_end_io = md_end_clone_io;
	clone->bi_private = md_io_clone;
	*bio = clone;
+2 −0
Original line number Diff line number Diff line
@@ -719,6 +719,8 @@ struct md_io_clone {
	struct mddev	*mddev;
	struct bio	*orig_bio;
	unsigned long	start_time;
	sector_t	offset;
	unsigned long	sectors;
	struct bio	bio_clone;
};

+0 −5
Original line number Diff line number Diff line
@@ -422,9 +422,6 @@ static void close_write(struct r1bio *r1_bio)
	}
	if (test_bit(R1BIO_BehindIO, &r1_bio->state))
		md_bitmap_end_behind_write(r1_bio->mddev);
	/* clear the bitmap if all writes complete successfully */
	md_bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
			   r1_bio->sectors);
	md_write_end(r1_bio->mddev);
}

@@ -1511,8 +1508,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,

			if (test_bit(R1BIO_BehindIO, &r1_bio->state))
				md_bitmap_start_behind_write(mddev);
			md_bitmap_startwrite(bitmap, r1_bio->sector,
					     r1_bio->sectors);
			first_clone = 0;
		}

+0 −4
Original line number Diff line number Diff line
@@ -429,9 +429,6 @@ static void raid10_end_read_request(struct bio *bio)

static void close_write(struct r10bio *r10_bio)
{
	/* clear the bitmap if all writes complete successfully */
	md_bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
			   r10_bio->sectors);
	md_write_end(r10_bio->mddev);
}

@@ -1504,7 +1501,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
	md_account_bio(mddev, &bio);
	r10_bio->master_bio = bio;
	atomic_set(&r10_bio->remaining, 1);
	md_bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors);

	for (i = 0; i < conf->copies; i++) {
		if (r10_bio->devs[i].bio)
Loading