Commit 60701311 authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge tag 'md-next-20230613' of...

Merge tag 'md-next-20230613' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.5/block

Pull MD updates from Song:

"The major changes are:

 1. Protect md_thread with rcu, by Yu Kuai;
 2. Various non-urgent raid5 and raid1/10 fixes, by Yu Kuai;
 3. Non-urgent raid10 fixes, by Li Nan."

* tag 'md-next-20230613' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (29 commits)
  md/raid1-10: limit the number of plugged bio
  md/raid1-10: don't handle pluged bio by daemon thread
  md/md-bitmap: add a new helper to unplug bitmap asynchrously
  md/raid1-10: submit write io directly if bitmap is not enabled
  md/raid1-10: factor out a helper to submit normal write
  md/raid1-10: factor out a helper to add bio to plug
  md/raid10: prevent soft lockup while flush writes
  md/raid10: fix io loss while replacement replace rdev
  md/raid10: Do not add spare disk when recovery fails
  md/raid10: clean up md_add_new_disk()
  md/raid10: prioritize adding disk to 'removed' mirror
  md/raid10: improve code of mrdev in raid10_sync_request
  md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request
  md/raid5: don't start reshape when recovery or replace is in progress
  md: protect md_thread with rcu
  md/bitmap: factor out a helper to set timeout
  md/bitmap: always wake up md_thread in timeout_store
  dm-raid: remove useless checking in raid_message()
  md: factor out a helper to wake up md_thread directly
  md: fix duplicate filename for rdev
  ...
parents d44c4042 460af1f9
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -3750,11 +3750,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
		 * canceling read-auto mode
		 */
		mddev->ro = 0;
		if (!mddev->suspended && mddev->sync_thread)
		if (!mddev->suspended)
			md_wakeup_thread(mddev->sync_thread);
	}
	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	if (!mddev->suspended && mddev->thread)
	if (!mddev->suspended)
		md_wakeup_thread(mddev->thread);

	return 0;
+65 −28
Original line number Diff line number Diff line
@@ -54,14 +54,7 @@ __acquires(bitmap->lock)
{
	unsigned char *mappage;

	if (page >= bitmap->pages) {
		/* This can happen if bitmap_start_sync goes beyond
		 * End-of-device while looking for a whole page.
		 * It is harmless.
		 */
		return -EINVAL;
	}

	WARN_ON_ONCE(page >= bitmap->pages);
	if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
		return 0;

@@ -1023,7 +1016,6 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
	return set;
}


/* this gets called when the md device is ready to unplug its underlying
 * (slave) device queues -- before we let any writes go down, we need to
 * sync the dirty pages of the bitmap file to disk */
@@ -1033,8 +1025,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
	int dirty, need_write;
	int writing = 0;

	if (!bitmap || !bitmap->storage.filemap ||
	    test_bit(BITMAP_STALE, &bitmap->flags))
	if (!md_bitmap_enabled(bitmap))
		return;

	/* look at each page to see if there are any set bits that need to be
@@ -1063,6 +1054,35 @@ void md_bitmap_unplug(struct bitmap *bitmap)
}
EXPORT_SYMBOL(md_bitmap_unplug);

struct bitmap_unplug_work {
	struct work_struct work;
	struct bitmap *bitmap;
	struct completion *done;
};

static void md_bitmap_unplug_fn(struct work_struct *work)
{
	struct bitmap_unplug_work *unplug_work =
		container_of(work, struct bitmap_unplug_work, work);

	md_bitmap_unplug(unplug_work->bitmap);
	complete(unplug_work->done);
}

void md_bitmap_unplug_async(struct bitmap *bitmap)
{
	DECLARE_COMPLETION_ONSTACK(done);
	struct bitmap_unplug_work unplug_work;

	INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
	unplug_work.bitmap = bitmap;
	unplug_work.done = &done;

	queue_work(md_bitmap_wq, &unplug_work.work);
	wait_for_completion(&done);
}
EXPORT_SYMBOL(md_bitmap_unplug_async);

static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
 * the in-memory bitmap from the on-disk bitmap -- also, sets up the
@@ -1241,11 +1261,28 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
					       sector_t offset, sector_t *blocks,
					       int create);

static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
			      bool force)
{
	struct md_thread *thread;

	rcu_read_lock();
	thread = rcu_dereference(mddev->thread);

	if (!thread)
		goto out;

	if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
		thread->timeout = timeout;

out:
	rcu_read_unlock();
}

/*
 * bitmap daemon -- periodically wakes up to clean bits and flush pages
 *			out to disk
 */

void md_bitmap_daemon_work(struct mddev *mddev)
{
	struct bitmap *bitmap;
@@ -1269,7 +1306,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)

	bitmap->daemon_lastrun = jiffies;
	if (bitmap->allclean) {
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
		mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
		goto done;
	}
	bitmap->allclean = 1;
@@ -1366,8 +1403,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)

 done:
	if (bitmap->allclean == 0)
		mddev->thread->timeout =
			mddev->bitmap_info.daemon_sleep;
		mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
	mutex_unlock(&mddev->bitmap_info.mutex);
}

@@ -1387,6 +1423,14 @@ __acquires(bitmap->lock)
	sector_t csize;
	int err;

	if (page >= bitmap->pages) {
		/*
		 * This can happen if bitmap_start_sync goes beyond
		 * End-of-device while looking for a whole page or
		 * user set a huge number to sysfs bitmap_set_bits.
		 */
		return NULL;
	}
	err = md_bitmap_checkpage(bitmap, page, create, 0);

	if (bitmap->bp[page].hijacked ||
@@ -1820,8 +1864,7 @@ void md_bitmap_destroy(struct mddev *mddev)
	mddev->bitmap = NULL; /* disconnect from the md device */
	spin_unlock(&mddev->lock);
	mutex_unlock(&mddev->bitmap_info.mutex);
	if (mddev->thread)
		mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
	mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);

	md_bitmap_free(bitmap);
}
@@ -1964,7 +2007,7 @@ int md_bitmap_load(struct mddev *mddev)
	/* Kick recovery in case any bits were set */
	set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);

	mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
	mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
	md_wakeup_thread(mddev->thread);

	md_bitmap_update_sb(bitmap);
@@ -2469,17 +2512,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len)
		timeout = MAX_SCHEDULE_TIMEOUT-1;
	if (timeout < 1)
		timeout = 1;

	mddev->bitmap_info.daemon_sleep = timeout;
	if (mddev->thread) {
		/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
		 * the bitmap is all clean and we don't need to
		 * adjust the timeout right now
		 */
		if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
			mddev->thread->timeout = timeout;
	mddev_set_timeout(mddev, timeout, false);
	md_wakeup_thread(mddev->thread);
		}
	}

	return len;
}

+8 −0
Original line number Diff line number Diff line
@@ -264,6 +264,7 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev,
				 sector_t new_lo, sector_t new_hi);

void md_bitmap_unplug(struct bitmap *bitmap);
void md_bitmap_unplug_async(struct bitmap *bitmap);
void md_bitmap_daemon_work(struct mddev *mddev);

int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
@@ -273,6 +274,13 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
			     sector_t *lo, sector_t *hi, bool clear_bits);
void md_bitmap_free(struct bitmap *bitmap);
void md_bitmap_wait_behind_writes(struct mddev *mddev);

static inline bool md_bitmap_enabled(struct bitmap *bitmap)
{
	return bitmap && bitmap->storage.filemap &&
	       !test_bit(BITMAP_STALE, &bitmap->flags);
}

#endif

#endif
+11 −6
Original line number Diff line number Diff line
@@ -75,14 +75,14 @@ struct md_cluster_info {
	sector_t suspend_hi;
	int suspend_from; /* the slot which broadcast suspend_lo/hi */

	struct md_thread *recovery_thread;
	struct md_thread __rcu *recovery_thread;
	unsigned long recovery_map;
	/* communication loc resources */
	struct dlm_lock_resource *ack_lockres;
	struct dlm_lock_resource *message_lockres;
	struct dlm_lock_resource *token_lockres;
	struct dlm_lock_resource *no_new_dev_lockres;
	struct md_thread *recv_thread;
	struct md_thread __rcu *recv_thread;
	struct completion newdisk_completion;
	wait_queue_head_t wait;
	unsigned long state;
@@ -362,8 +362,8 @@ static void __recover_slot(struct mddev *mddev, int slot)

	set_bit(slot, &cinfo->recovery_map);
	if (!cinfo->recovery_thread) {
		cinfo->recovery_thread = md_register_thread(recover_bitmaps,
				mddev, "recover");
		rcu_assign_pointer(cinfo->recovery_thread,
			md_register_thread(recover_bitmaps, mddev, "recover"));
		if (!cinfo->recovery_thread) {
			pr_warn("md-cluster: Could not create recovery thread\n");
			return;
@@ -526,11 +526,15 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
{
	int got_lock = 0;
	struct md_thread *thread;
	struct md_cluster_info *cinfo = mddev->cluster_info;
	mddev->good_device_nr = le32_to_cpu(msg->raid_slot);

	dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
	wait_event(mddev->thread->wqueue,

	/* daemaon thread must exist */
	thread = rcu_dereference_protected(mddev->thread, true);
	wait_event(thread->wqueue,
		   (got_lock = mddev_trylock(mddev)) ||
		    test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state));
	md_reload_sb(mddev, mddev->good_device_nr);
@@ -889,7 +893,8 @@ static int join(struct mddev *mddev, int nodes)
	}
	/* Initiate the communication resources */
	ret = -ENOMEM;
	cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
	rcu_assign_pointer(cinfo->recv_thread,
			md_register_thread(recv_daemon, mddev, "cluster_recv"));
	if (!cinfo->recv_thread) {
		pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
		goto err;
+2 −2
Original line number Diff line number Diff line
@@ -400,8 +400,8 @@ static int multipath_run (struct mddev *mddev)
	if (ret)
		goto out_free_conf;

	mddev->thread = md_register_thread(multipathd, mddev,
					   "multipath");
	rcu_assign_pointer(mddev->thread,
			   md_register_thread(multipathd, mddev, "multipath"));
	if (!mddev->thread)
		goto out_free_conf;

Loading