Commit dcf314ad authored by Alex Lyakas's avatar Alex Lyakas Committed by Li Nan
Browse files

md: Whenassemble the array, consult the superblock of the freshest device

mainline inclusion
from mainline-next-20231220
commit dc1cc22ed58f11d58d8553c5ec5f11cbfc3e3039
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I8T02O

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=dc1cc22ed58f11d58d8553c5ec5f11cbfc3e3039



--------------------------------

Upon assembling the array, both kernel and mdadm allow the devices to have event
counter difference of 1, and still consider them as up-to-date.
However, a device whose event count is behind by 1, may in fact not be up-to-date,
and array resync with such a device may cause data corruption.
To avoid this, consult the superblock of the freshest device about the status
of a device, whose event counter is behind by 1.

Signed-off-by: default avatarAlex Lyakas <alex.lyakas@zadara.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/1702470271-16073-1-git-send-email-alex.lyakas@zadara.com


Signed-off-by: default avatarLi Nan <linan122@huawei.com>
parent 5569e4ed
Loading
Loading
Loading
Loading
+44 −10
Original line number Diff line number Diff line
@@ -1213,6 +1213,7 @@ struct super_type {
					  struct md_rdev *refdev,
					  int minor_version);
	int		    (*validate_super)(struct mddev *mddev,
					      struct md_rdev *freshest,
					      struct md_rdev *rdev);
	void		    (*sync_super)(struct mddev *mddev,
					  struct md_rdev *rdev);
@@ -1350,8 +1351,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor

/*
 * validate_super for 0.90.0
 * note: we are not using "freshest" for 0.9 superblock
 */
static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
{
	mdp_disk_t *desc;
	mdp_super_t *sb = page_address(rdev->sb_page);
@@ -1863,7 +1865,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
	return ret;
}

static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
{
	struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
	__u64 ev1 = le64_to_cpu(sb->events);
@@ -1959,13 +1961,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
		}
	} else if (mddev->pers == NULL) {
		/* Insist of good event counter while assembling, except for
		 * spares (which don't need an event count) */
		++ev1;
		 * spares (which don't need an event count).
		 * Similar to mdadm, we allow event counter difference of 1
		 * from the freshest device.
		 */
		if (rdev->desc_nr >= 0 &&
		    rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
		    (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
		     le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
			if (ev1 < mddev->events)
			if (ev1 + 1 < mddev->events)
				return -EINVAL;
	} else if (mddev->bitmap) {
		/* If adding to array with a bitmap, then we can accept an
@@ -1986,8 +1990,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
		    rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
			role = MD_DISK_ROLE_SPARE;
			rdev->desc_nr = -1;
		} else
		} else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
			/*
			 * If we are assembling, and our event counter is smaller than the
			 * highest event counter, we cannot trust our superblock about the role.
			 * It could happen that our rdev was marked as Faulty, and all other
			 * superblocks were updated with +1 event counter.
			 * Then, before the next superblock update, which typically happens when
			 * remove_and_add_spares() removes the device from the array, there was
			 * a crash or reboot.
			 * If we allow current rdev without consulting the freshest superblock,
			 * we could cause data corruption.
			 * Note that in this case our event counter is smaller by 1 than the
			 * highest, otherwise, this rdev would not be allowed into array;
			 * both kernel and mdadm allow event counter difference of 1.
			 */
			struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
			u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);

			if (rdev->desc_nr >= freshest_max_dev) {
				/* this is unexpected, better not proceed */
				pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
						mdname(mddev), rdev->bdev, rdev->desc_nr,
						freshest->bdev, freshest_max_dev);
				return -EUCLEAN;
			}

			role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
			pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n",
				     mdname(mddev), rdev->bdev, role, role, freshest->bdev);
		} else {
			role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
		}
		switch(role) {
		case MD_DISK_ROLE_SPARE: /* spare */
			break;
@@ -2894,7 +2928,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
		 * and should be added immediately.
		 */
		super_types[mddev->major_version].
			validate_super(mddev, rdev);
			validate_super(mddev, NULL/*freshest*/, rdev);
		err = mddev->pers->hot_add_disk(mddev, rdev);
		if (err) {
			md_kick_rdev_from_array(rdev);
@@ -3831,7 +3865,7 @@ static int analyze_sbs(struct mddev *mddev)
	}

	super_types[mddev->major_version].
		validate_super(mddev, freshest);
		validate_super(mddev, NULL/*freshest*/, freshest);

	i = 0;
	rdev_for_each_safe(rdev, tmp, mddev) {
@@ -3846,7 +3880,7 @@ static int analyze_sbs(struct mddev *mddev)
		}
		if (rdev != freshest) {
			if (super_types[mddev->major_version].
			    validate_super(mddev, rdev)) {
			    validate_super(mddev, freshest, rdev)) {
				pr_warn("md: kicking non-fresh %pg from array!\n",
					rdev->bdev);
				md_kick_rdev_from_array(rdev);
@@ -6840,7 +6874,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
			rdev->saved_raid_disk = rdev->raid_disk;
		} else
			super_types[mddev->major_version].
				validate_super(mddev, rdev);
				validate_super(mddev, NULL/*freshest*/, rdev);
		if ((info->state & (1<<MD_DISK_SYNC)) &&
		     rdev->raid_disk != info->raid_disk) {
			/* This was a hot-add request, but events doesn't