Commit f01e49fb authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.19/drivers

Pull MD updates from Song:

"1. Improve annotation in raid5 code, by Logan Gunthorpe.
 2. Support MD_BROKEN flag in raid-1/5/10, by Mariusz Tkaczyk.
 3. Other small fixes/cleanups."

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Replace role magic numbers with defined constants
  md/raid0: Ignore RAID0 layout if the second zone has only one device
  md/raid5: Annotate functions that hold device_lock with __must_hold
  md/raid5-ppl: Annotate with rcu_dereference_protected()
  md/raid5: Annotate rdev/replacement access when mddev_lock is held
  md/raid5: Annotate rdev/replacement accesses when nr_pending is elevated
  md/raid5: Add __rcu annotation to struct disk_info
  md/raid5: Un-nest struct raid5_percpu definition
  md/raid5: Cleanup setup_conf() error returns
  md: replace deprecated strlcpy & remove duplicated line
  md/bitmap: don't set sb values if can't pass sanity check
  md: fix an incorrect NULL check in md_reload_sb
  md: fix an incorrect NULL check in does_sb_need_changing
  raid5: introduce MD_BROKEN
  md: Set MD_BROKEN for RAID1 and RAID10
parents 8ba816b2 9151ad5d
Loading
Loading
Loading
Loading
+23 −22
Original line number Diff line number Diff line
@@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
	daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
	write_behind = le32_to_cpu(sb->write_behind);
	sectors_reserved = le32_to_cpu(sb->sectors_reserved);
	/* Setup nodes/clustername only if bitmap version is
	 * cluster-compatible
	 */
	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
		nodes = le32_to_cpu(sb->nodes);
		strlcpy(bitmap->mddev->bitmap_info.cluster_name,
				sb->cluster_name, 64);
	}

	/* verify that the bitmap-specific fields are valid */
	if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
@@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
		goto out;
	}

	/*
	 * Setup nodes/clustername only if bitmap version is
	 * cluster-compatible
	 */
	if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
		nodes = le32_to_cpu(sb->nodes);
		strscpy(bitmap->mddev->bitmap_info.cluster_name,
				sb->cluster_name, 64);
	}

	/* keep the array size field of the bitmap superblock up to date */
	sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);

@@ -695,14 +697,13 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
		set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
	strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
	err = 0;

out:
	kunmap_atomic(sb);
	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
		/* Assigning chunksize is required for "re_read" */
		bitmap->mddev->bitmap_info.chunksize = chunksize;
	if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
		err = md_setup_cluster(bitmap->mddev, nodes);
		if (err) {
			pr_warn("%s: Could not setup cluster service (%d)\n",
@@ -713,8 +714,8 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
		goto re_read;
	}


out_no_sb:
	if (err == 0) {
		if (test_bit(BITMAP_STALE, &bitmap->flags))
			bitmap->events_cleared = bitmap->mddev->events;
		bitmap->mddev->bitmap_info.chunksize = chunksize;
@@ -724,7 +725,7 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
		if (bitmap->mddev->bitmap_info.space == 0 ||
			bitmap->mddev->bitmap_info.space > sectors_reserved)
			bitmap->mddev->bitmap_info.space = sectors_reserved;
	if (err) {
	} else {
		md_bitmap_print_sb(bitmap);
		if (bitmap->cluster_slot < 0)
			md_cluster_stop(bitmap->mddev);
+1 −1
Original line number Diff line number Diff line
@@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
		pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
		goto out_err;
	}
	strlcpy(res->name, name, namelen + 1);
	strscpy(res->name, name, namelen + 1);
	if (with_lvb) {
		res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
		if (!res->lksb.sb_lvbptr) {
+35 −27
Original line number Diff line number Diff line
@@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)

static bool does_sb_need_changing(struct mddev *mddev)
{
	struct md_rdev *rdev;
	struct md_rdev *rdev = NULL, *iter;
	struct mdp_superblock_1 *sb;
	int role;

	/* Find a good rdev */
	rdev_for_each(rdev, mddev)
		if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
	rdev_for_each(iter, mddev)
		if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
			rdev = iter;
			break;
		}

	/* No good device found. */
	if (!rdev)
@@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
	rdev_for_each(rdev, mddev) {
		role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
		/* Device activated? */
		if (role == 0xffff && rdev->raid_disk >=0 &&
		if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
		    !test_bit(Faulty, &rdev->flags))
			return true;
		/* Device turned faulty? */
		if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
		if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
			return true;
	}

@@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)

	if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
		md_error(rdev->mddev, rdev);
		if (test_bit(Faulty, &rdev->flags))
			err = 0;
		else

		if (test_bit(MD_BROKEN, &rdev->mddev->flags))
			err = -EBUSY;
		else
			err = 0;
	} else if (cmd_match(buf, "remove")) {
		if (rdev->mddev->pers) {
			clear_bit(Blocked, &rdev->flags);
@@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
	oldpriv = mddev->private;
	mddev->pers = pers;
	mddev->private = priv;
	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
	strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
	mddev->level = mddev->new_level;
	mddev->layout = mddev->new_layout;
	mddev->chunk_sectors = mddev->new_chunk_sectors;
@@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
 *     like active, but no writes have been seen for a while (100msec).
 *
 * broken
 *     RAID0/LINEAR-only: same as clean, but array is missing a member.
 *     It's useful because RAID0/LINEAR mounted-arrays aren't stopped
 *     when a member is gone, so this state will at least alert the
 *     user that something is wrong.
*     Array is failed. It's useful because mounted-arrays aren't stopped
*     when array is failed, so this state will at least alert the user that
*     something is wrong.
 */
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
		   write_pending, active_idle, broken, bad_word};
@@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
		len--;
	if (len >= DISK_NAME_LEN)
		return -E2BIG;
	strlcpy(buf, val, len+1);
	strscpy(buf, val, len+1);
	if (strncmp(buf, "md_", 3) == 0)
		return md_alloc(0, buf);
	if (strncmp(buf, "md", 2) == 0 &&
@@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
		mddev->level = pers->level;
		mddev->new_level = pers->level;
	}
	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
	strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));

	if (mddev->reshape_position != MaxSector &&
	    pers->start_reshape == NULL) {
@@ -7443,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
		err =  -ENODEV;
	else {
		md_error(mddev, rdev);
		if (!test_bit(Faulty, &rdev->flags))
		if (test_bit(MD_BROKEN, &mddev->flags))
			err = -EBUSY;
	}
	rcu_read_unlock();
@@ -7985,12 +7987,15 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
	if (!mddev->pers || !mddev->pers->error_handler)
		return;
	mddev->pers->error_handler(mddev, rdev);
	if (mddev->degraded)

	if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
		set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
	sysfs_notify_dirent_safe(rdev->sysfs_state);
	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
	if (!test_bit(MD_BROKEN, &mddev->flags)) {
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
		md_wakeup_thread(mddev->thread);
	}
	if (mddev->event_work.func)
		queue_work(md_misc_wq, &mddev->event_work);
	md_new_event();
@@ -9670,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
		role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);

		if (test_bit(Candidate, &rdev2->flags)) {
			if (role == 0xfffe) {
			if (role == MD_DISK_ROLE_FAULTY) {
				pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
				md_kick_rdev_from_array(rdev2);
				continue;
@@ -9683,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
			/*
			 * got activated except reshape is happening.
			 */
			if (rdev2->raid_disk == -1 && role != 0xffff &&
			if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
			    !(le32_to_cpu(sb->feature_map) &
			      MD_FEATURE_RESHAPE_ACTIVE)) {
				rdev2->saved_raid_disk = role;
@@ -9700,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
			 * as faulty. The recovery is performed by the
			 * one who initiated the error.
			 */
			if ((role == 0xfffe) || (role == 0xfffd)) {
			if (role == MD_DISK_ROLE_FAULTY ||
			    role == MD_DISK_ROLE_JOURNAL) {
				md_error(mddev, rdev2);
				clear_bit(Blocked, &rdev2->flags);
			}
@@ -9790,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)

void md_reload_sb(struct mddev *mddev, int nr)
{
	struct md_rdev *rdev;
	struct md_rdev *rdev = NULL, *iter;
	int err;

	/* Find the rdev */
	rdev_for_each_rcu(rdev, mddev) {
		if (rdev->desc_nr == nr)
	rdev_for_each_rcu(iter, mddev) {
		if (iter->desc_nr == nr) {
			rdev = iter;
			break;
		}
	}

	if (!rdev || rdev->desc_nr != nr) {
	if (!rdev) {
		pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
		return;
	}
+35 −27
Original line number Diff line number Diff line
@@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
				int is_new);
struct md_cluster_info;

/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
enum mddev_flags {
	MD_ARRAY_FIRST_USE,	/* First use of array, needs initialization */
	MD_CLOSING,		/* If set, we are closing the array, do not open
				 * it then */
	MD_JOURNAL_CLEAN,	/* A raid with journal is already clean */
	MD_HAS_JOURNAL,		/* The raid array has journal feature set */
	MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
				   * already took resync lock, need to
				   * release the lock */
	MD_FAILFAST_SUPPORTED,	/* Using MD_FAILFAST on metadata writes is
				 * supported as calls to md_error() will
				 * never cause the array to become failed.
				 */
	MD_HAS_PPL,		/* The raid array has PPL feature set */
	MD_HAS_MULTIPLE_PPLS,	/* The raid array has multiple PPLs feature set */
	MD_ALLOW_SB_UPDATE,	/* md_check_recovery is allowed to update
				 * the metadata without taking reconfig_mutex.
				 */
	MD_UPDATING_SB,		/* md_check_recovery is updating the metadata
				 * without explicitly holding reconfig_mutex.
				 */
	MD_NOT_READY,		/* do_md_run() is active, so 'array_state'
				 * must not report that array is ready yet
				 */
	MD_BROKEN,              /* This is used in RAID-0/LINEAR only, to stop
				 * I/O in case an array member is gone/failed.
/**
 * enum mddev_flags - md device flags.
 * @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
 * @MD_CLOSING: If set, we are closing the array, do not open it then.
 * @MD_JOURNAL_CLEAN: A raid with journal is already clean.
 * @MD_HAS_JOURNAL: The raid array has journal feature set.
 * @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
 *			       resync lock, need to release the lock.
 * @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
 *			    calls to md_error() will never cause the array to
 *			    become failed.
 * @MD_HAS_PPL:  The raid array has PPL feature set.
 * @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
 * @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
 *			 without taking reconfig_mutex.
 * @MD_UPDATING_SB: md_check_recovery is updating the metadata without
 *		     explicitly holding reconfig_mutex.
 * @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
 *		   array is ready yet.
 * @MD_BROKEN: This is used to stop writes and mark array as failed.
 *
 * change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
 */
enum mddev_flags {
	MD_ARRAY_FIRST_USE,
	MD_CLOSING,
	MD_JOURNAL_CLEAN,
	MD_HAS_JOURNAL,
	MD_CLUSTER_RESYNC_LOCKED,
	MD_FAILFAST_SUPPORTED,
	MD_HAS_PPL,
	MD_HAS_MULTIPLE_PPLS,
	MD_ALLOW_SB_UPDATE,
	MD_UPDATING_SB,
	MD_NOT_READY,
	MD_BROKEN,
};

enum mddev_sb_flags {
+16 −15
Original line number Diff line number Diff line
@@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
	pr_debug("md/raid0:%s: FINAL %d zones\n",
		 mdname(mddev), conf->nr_strip_zones);

	if (conf->nr_strip_zones == 1) {
		conf->layout = RAID0_ORIG_LAYOUT;
	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
		conf->layout = mddev->layout;
	} else if (default_layout == RAID0_ORIG_LAYOUT ||
		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
		conf->layout = default_layout;
	} else {
		pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
		       mdname(mddev));
		pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
		err = -ENOTSUPP;
		goto abort;
	}
	/*
	 * now since we have the hard sector sizes, we can make sure
	 * chunk size is a multiple of that sector size
@@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
			 (unsigned long long)smallest->sectors);
	}

	if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
		conf->layout = RAID0_ORIG_LAYOUT;
	} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
		   mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
		conf->layout = mddev->layout;
	} else if (default_layout == RAID0_ORIG_LAYOUT ||
		   default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
		conf->layout = default_layout;
	} else {
		pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
		       mdname(mddev));
		pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
		err = -EOPNOTSUPP;
		goto abort;
	}

	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
	*private_conf = conf;

Loading