md: Set MD_BROKEN for RAID1 and RAID10 (9631abdb) · Commits · EulixOS / Software / Kernel

drivers/md/md.c

+15 −12

Original line number	Diff line number	Diff line
		@@ -2984,10 +2984,11 @@ state_store(struct md_rdev rdev, const char buf, size_t len)

		if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
		md_error(rdev->mddev, rdev);
		if (test_bit(Faulty, &rdev->flags))
		err = 0;
		else

		if (test_bit(MD_BROKEN, &rdev->mddev->flags))
		err = -EBUSY;
		else
		err = 0;
		} else if (cmd_match(buf, "remove")) {
		if (rdev->mddev->pers) {
		clear_bit(Blocked, &rdev->flags);
		@@ -4353,10 +4354,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO\|S_IWUSR,
		* like active, but no writes have been seen for a while (100msec).
		*
		* broken
		* RAID0/LINEAR-only: same as clean, but array is missing a member.
		* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
		* when a member is gone, so this state will at least alert the
		* user that something is wrong.
		* Array is failed. It's useful because mounted-arrays aren't stopped
		* when array is failed, so this state will at least alert the user that
		* something is wrong.
		*/
		enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
		write_pending, active_idle, broken, bad_word};
		@@ -7443,7 +7443,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
		err = -ENODEV;
		else {
		md_error(mddev, rdev);
		if (!test_bit(Faulty, &rdev->flags))
		if (test_bit(MD_BROKEN, &mddev->flags))
		err = -EBUSY;
		}
		rcu_read_unlock();
		@@ -7985,12 +7985,15 @@ void md_error(struct mddev mddev, struct md_rdev rdev)
		if (!mddev->pers \|\| !mddev->pers->error_handler)
		return;
		mddev->pers->error_handler(mddev, rdev);
		if (mddev->degraded)

		if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
		sysfs_notify_dirent_safe(rdev->sysfs_state);
		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
		if (!test_bit(MD_BROKEN, &mddev->flags)) {
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		md_wakeup_thread(mddev->thread);
		}
		if (mddev->event_work.func)
		queue_work(md_misc_wq, &mddev->event_work);
		md_new_event();

drivers/md/md.h

+35 −27

Original line number	Diff line number	Diff line
		@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
		int is_new);
		struct md_cluster_info;

		/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
		enum mddev_flags {
		MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
		MD_CLOSING, /* If set, we are closing the array, do not open
		* it then */
		MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
		MD_HAS_JOURNAL, /* The raid array has journal feature set */
		MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
		* already took resync lock, need to
		* release the lock */
		MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
		* supported as calls to md_error() will
		* never cause the array to become failed.
		*/
		MD_HAS_PPL, /* The raid array has PPL feature set */
		MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
		MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
		* the metadata without taking reconfig_mutex.
		*/
		MD_UPDATING_SB, /* md_check_recovery is updating the metadata
		* without explicitly holding reconfig_mutex.
		*/
		MD_NOT_READY, /* do_md_run() is active, so 'array_state'
		* must not report that array is ready yet
		*/
		MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
		* I/O in case an array member is gone/failed.
		/**
		* enum mddev_flags - md device flags.
		* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
		* @MD_CLOSING: If set, we are closing the array, do not open it then.
		* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
		* @MD_HAS_JOURNAL: The raid array has journal feature set.
		* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
		* resync lock, need to release the lock.
		* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
		* calls to md_error() will never cause the array to
		* become failed.
		* @MD_HAS_PPL: The raid array has PPL feature set.
		* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
		* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
		* without taking reconfig_mutex.
		* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
		* explicitly holding reconfig_mutex.
		* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
		* array is ready yet.
		* @MD_BROKEN: This is used to stop writes and mark array as failed.
		*
		* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
		*/
		enum mddev_flags {
		MD_ARRAY_FIRST_USE,
		MD_CLOSING,
		MD_JOURNAL_CLEAN,
		MD_HAS_JOURNAL,
		MD_CLUSTER_RESYNC_LOCKED,
		MD_FAILFAST_SUPPORTED,
		MD_HAS_PPL,
		MD_HAS_MULTIPLE_PPLS,
		MD_ALLOW_SB_UPDATE,
		MD_UPDATING_SB,
		MD_NOT_READY,
		MD_BROKEN,
		};

		enum mddev_sb_flags {

drivers/md/raid1.c

+26 −17

Original line number	Diff line number	Diff line
		@@ -1641,31 +1641,40 @@ static void raid1_status(struct seq_file seq, struct mddev mddev)
		seq_printf(seq, "]");
		}

		/**
		* raid1_error() - RAID1 error handler.
		* @mddev: affected md device.
		* @rdev: member device to fail.
		*
		* The routine acknowledges &rdev failure and determines new @mddev state.
		* If it failed, then:
		* - &MD_BROKEN flag is set in &mddev->flags.
		* - recovery is disabled.
		* Otherwise, it must be degraded:
		* - recovery is interrupted.
		* - &mddev->degraded is bumped.
		*
		* @rdev is marked as &Faulty excluding case when array is failed and
		* &mddev->fail_last_dev is off.
		*/
		static void raid1_error(struct mddev mddev, struct md_rdev rdev)
		{
		char b[BDEVNAME_SIZE];
		struct r1conf *conf = mddev->private;
		unsigned long flags;

		/*
		* If it is not operational, then we have already marked it as dead
		* else if it is the last working disks with "fail_last_dev == false",
		* ignore the error, let the next level up know.
		* else mark the drive as failed
		*/
		spin_lock_irqsave(&conf->device_lock, flags);
		if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
		&& (conf->raid_disks - mddev->degraded) == 1) {
		/*
		* Don't fail the drive, act as though we were just a
		* normal single drive.
		* However don't try a recovery from this drive as
		* it is very likely to fail.
		*/

		if (test_bit(In_sync, &rdev->flags) &&
		(conf->raid_disks - mddev->degraded) == 1) {
		set_bit(MD_BROKEN, &mddev->flags);

		if (!mddev->fail_last_dev) {
		conf->recovery_disabled = mddev->recovery_disabled;
		spin_unlock_irqrestore(&conf->device_lock, flags);
		return;
		}
		}
		set_bit(Blocked, &rdev->flags);
		if (test_and_clear_bit(In_sync, &rdev->flags))
		mddev->degraded++;

drivers/md/raid10.c

+24 −16

Original line number	Diff line number	Diff line
		@@ -1970,32 +1970,40 @@ static int enough(struct r10conf *conf, int ignore)
		_enough(conf, 1, ignore);
		}

		/**
		* raid10_error() - RAID10 error handler.
		* @mddev: affected md device.
		* @rdev: member device to fail.
		*
		* The routine acknowledges &rdev failure and determines new @mddev state.
		* If it failed, then:
		* - &MD_BROKEN flag is set in &mddev->flags.
		* Otherwise, it must be degraded:
		* - recovery is interrupted.
		* - &mddev->degraded is bumped.

		* @rdev is marked as &Faulty excluding case when array is failed and
		* &mddev->fail_last_dev is off.
		*/
		static void raid10_error(struct mddev mddev, struct md_rdev rdev)
		{
		char b[BDEVNAME_SIZE];
		struct r10conf *conf = mddev->private;
		unsigned long flags;

		/*
		* If it is not operational, then we have already marked it as dead
		* else if it is the last working disks with "fail_last_dev == false",
		* ignore the error, let the next level up know.
		* else mark the drive as failed
		*/
		spin_lock_irqsave(&conf->device_lock, flags);
		if (test_bit(In_sync, &rdev->flags) && !mddev->fail_last_dev
		&& !enough(conf, rdev->raid_disk)) {
		/*
		* Don't fail the drive, just return an IO error.
		*/

		if (test_bit(In_sync, &rdev->flags) && !enough(conf, rdev->raid_disk)) {
		set_bit(MD_BROKEN, &mddev->flags);

		if (!mddev->fail_last_dev) {
		spin_unlock_irqrestore(&conf->device_lock, flags);
		return;
		}
		}
		if (test_and_clear_bit(In_sync, &rdev->flags))
		mddev->degraded++;
		/*
		* If recovery is running, make sure it aborts.
		*/

		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
		set_bit(Blocked, &rdev->flags);
		set_bit(Faulty, &rdev->flags);