Commit 4324796e authored by Jens Axboe's avatar Jens Axboe
Browse files

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.1/block

Pull MD updates and fixes from Song:

"1. Various raid5 fix and clean up, by Logan Gunthorpe and David Sloan.
 2. Raid10 performance optimization, by Yu Kuai."

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Fix spelling mistake in comments of r5l_log
  md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d
  md/raid10: convert resync_lock to use seqlock
  md/raid10: fix improper BUG_ON() in raise_barrier()
  md/raid10: prevent unnecessary calls to wake_up() in fast path
  md/raid10: don't modify 'nr_waitng' in wait_barrier() for the case nowait
  md/raid10: factor out code from wait_barrier() to stop_waiting_barrier()
  md: Remove extra mddev_get() in md_seq_start()
  md/raid5: Remove unnecessary bio_put() in raid5_read_one_chunk()
  md/raid5: Ensure stripe_fill happens on non-read IO with journal
  md/raid5: Don't read ->active_stripes if it's not needed
  md/raid5: Cleanup prototype of raid5_get_active_stripe()
  md/raid5: Drop extern on function declarations in raid5.h
  md/raid5: Refactor raid5_get_active_stripe()
  md: Replace snprintf with scnprintf
  md/raid10: fix compile warning
  md/raid5: Fix spelling mistakes in comments
parents 9713a670 65b94b52
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -8154,7 +8154,6 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos)
	list_for_each(tmp,&all_mddevs)
		if (!l--) {
			mddev = list_entry(tmp, struct mddev, all_mddevs);
			mddev_get(mddev);
			if (!mddev_get(mddev))
				continue;
			spin_unlock(&all_mddevs_lock);
+1 −1
Original line number Diff line number Diff line
@@ -47,7 +47,7 @@ static void dump_zones(struct mddev *mddev)
		int len = 0;

		for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
			len += snprintf(line+len, 200-len, "%s%pg", k?"/":"",
			len += scnprintf(line+len, 200-len, "%s%pg", k?"/":"",
				conf->devlist[j * raid_disks + k]->bdev);
		pr_debug("md: zone%d=[%s]\n", j, line);

+96 −55
Original line number Diff line number Diff line
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);

#include "raid1-10.c"

#define NULL_CMD
#define cmd_before(conf, cmd) \
	do { \
		write_sequnlock_irq(&(conf)->resync_lock); \
		cmd; \
	} while (0)
#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)

#define wait_event_barrier_cmd(conf, cond, cmd) \
	wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
		       cmd_after(conf))

#define wait_event_barrier(conf, cond) \
	wait_event_barrier_cmd(conf, cond, NULL_CMD)

/*
 * for resync bio, r10bio pointer can be retrieved from the per-bio
 * 'struct resync_pages'.
@@ -274,6 +289,12 @@ static void put_buf(struct r10bio *r10_bio)
	lower_barrier(conf);
}

static void wake_up_barrier(struct r10conf *conf)
{
	if (wq_has_sleeper(&conf->wait_barrier))
		wake_up(&conf->wait_barrier);
}

static void reschedule_retry(struct r10bio *r10_bio)
{
	unsigned long flags;
@@ -930,78 +951,101 @@ static void flush_pending_writes(struct r10conf *conf)

static void raise_barrier(struct r10conf *conf, int force)
{
	write_seqlock_irq(&conf->resync_lock);
	BUG_ON(force && !conf->barrier);
	spin_lock_irq(&conf->resync_lock);

	/* Wait until no block IO is waiting (unless 'force') */
	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
			    conf->resync_lock);
	wait_event_barrier(conf, force || !conf->nr_waiting);

	/* block any new IO from starting */
	conf->barrier++;
	WRITE_ONCE(conf->barrier, conf->barrier + 1);

	/* Now wait for all pending IO to complete */
	wait_event_lock_irq(conf->wait_barrier,
			    !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
			    conf->resync_lock);
	wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
				 conf->barrier < RESYNC_DEPTH);

	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
}

static void lower_barrier(struct r10conf *conf)
{
	unsigned long flags;
	spin_lock_irqsave(&conf->resync_lock, flags);
	conf->barrier--;
	spin_unlock_irqrestore(&conf->resync_lock, flags);

	write_seqlock_irqsave(&conf->resync_lock, flags);
	WRITE_ONCE(conf->barrier, conf->barrier - 1);
	write_sequnlock_irqrestore(&conf->resync_lock, flags);
	wake_up(&conf->wait_barrier);
}

static bool stop_waiting_barrier(struct r10conf *conf)
{
	struct bio_list *bio_list = current->bio_list;

	/* barrier is dropped */
	if (!conf->barrier)
		return true;

	/*
	 * If there are already pending requests (preventing the barrier from
	 * rising completely), and the pre-process bio queue isn't empty, then
	 * don't wait, as we need to empty that queue to get the nr_pending
	 * count down.
	 */
	if (atomic_read(&conf->nr_pending) && bio_list &&
	    (!bio_list_empty(&bio_list[0]) || !bio_list_empty(&bio_list[1])))
		return true;

	/* move on if recovery thread is blocked by us */
	if (conf->mddev->thread->tsk == current &&
	    test_bit(MD_RECOVERY_RUNNING, &conf->mddev->recovery) &&
	    conf->nr_queued > 0)
		return true;

	return false;
}

static bool wait_barrier_nolock(struct r10conf *conf)
{
	unsigned int seq = read_seqbegin(&conf->resync_lock);

	if (READ_ONCE(conf->barrier))
		return false;

	atomic_inc(&conf->nr_pending);
	if (!read_seqretry(&conf->resync_lock, seq))
		return true;

	if (atomic_dec_and_test(&conf->nr_pending))
		wake_up_barrier(conf);

	return false;
}

static bool wait_barrier(struct r10conf *conf, bool nowait)
{
	bool ret = true;

	spin_lock_irq(&conf->resync_lock);
	if (wait_barrier_nolock(conf))
		return true;

	write_seqlock_irq(&conf->resync_lock);
	if (conf->barrier) {
		struct bio_list *bio_list = current->bio_list;
		conf->nr_waiting++;
		/* Wait for the barrier to drop.
		 * However if there are already pending
		 * requests (preventing the barrier from
		 * rising completely), and the
		 * pre-process bio queue isn't empty,
		 * then don't wait, as we need to empty
		 * that queue to get the nr_pending
		 * count down.
		 */
		/* Return false when nowait flag is set */
		if (nowait) {
			ret = false;
		} else {
			conf->nr_waiting++;
			raid10_log(conf->mddev, "wait barrier");
			wait_event_lock_irq(conf->wait_barrier,
					    !conf->barrier ||
					    (atomic_read(&conf->nr_pending) &&
					     bio_list &&
					     (!bio_list_empty(&bio_list[0]) ||
					      !bio_list_empty(&bio_list[1]))) ||
					     /* move on if recovery thread is
					      * blocked by us
					      */
					     (conf->mddev->thread->tsk == current &&
					      test_bit(MD_RECOVERY_RUNNING,
						       &conf->mddev->recovery) &&
					      conf->nr_queued > 0),
					    conf->resync_lock);
		}
			wait_event_barrier(conf, stop_waiting_barrier(conf));
			conf->nr_waiting--;
		}
		if (!conf->nr_waiting)
			wake_up(&conf->wait_barrier);
	}
	/* Only increment nr_pending when we wait */
	if (ret)
		atomic_inc(&conf->nr_pending);
	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
	return ret;
}

@@ -1009,7 +1053,7 @@ static void allow_barrier(struct r10conf *conf)
{
	if ((atomic_dec_and_test(&conf->nr_pending)) ||
			(conf->array_freeze_pending))
		wake_up(&conf->wait_barrier);
		wake_up_barrier(conf);
}

static void freeze_array(struct r10conf *conf, int extra)
@@ -1026,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
	 * must match the number of pending IOs (nr_pending) before
	 * we continue.
	 */
	spin_lock_irq(&conf->resync_lock);
	write_seqlock_irq(&conf->resync_lock);
	conf->array_freeze_pending++;
	conf->barrier++;
	WRITE_ONCE(conf->barrier, conf->barrier + 1);
	conf->nr_waiting++;
	wait_event_lock_irq_cmd(conf->wait_barrier,
				atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
				conf->resync_lock,
				flush_pending_writes(conf));

	wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
			conf->nr_queued + extra, flush_pending_writes(conf));
	conf->array_freeze_pending--;
	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
}

static void unfreeze_array(struct r10conf *conf)
{
	/* reverse the effect of the freeze */
	spin_lock_irq(&conf->resync_lock);
	conf->barrier--;
	write_seqlock_irq(&conf->resync_lock);
	WRITE_ONCE(conf->barrier, conf->barrier - 1);
	conf->nr_waiting--;
	wake_up(&conf->wait_barrier);
	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
}

static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -1885,7 +1926,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
	__make_request(mddev, bio, sectors);

	/* In case raid10d snuck in to freeze_array */
	wake_up(&conf->wait_barrier);
	wake_up_barrier(conf);
	return true;
}

@@ -1980,7 +2021,7 @@ static int enough(struct r10conf *conf, int ignore)
 * Otherwise, it must be degraded:
 *	- recovery is interrupted.
 *	- &mddev->degraded is bumped.

 *
 * @rdev is marked as &Faulty excluding case when array is failed and
 * &mddev->fail_last_dev is off.
 */
@@ -4033,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
	INIT_LIST_HEAD(&conf->retry_list);
	INIT_LIST_HEAD(&conf->bio_end_io_list);

	spin_lock_init(&conf->resync_lock);
	seqlock_init(&conf->resync_lock);
	init_waitqueue_head(&conf->wait_barrier);
	atomic_set(&conf->nr_pending, 0);

@@ -4352,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
				rdev->new_raid_disk = rdev->raid_disk * 2;
				rdev->sectors = size;
			}
		conf->barrier = 1;
		WRITE_ONCE(conf->barrier, 1);
	}

	return conf;
+1 −1
Original line number Diff line number Diff line
@@ -76,7 +76,7 @@ struct r10conf {
	/* queue pending writes and submit them on unplug */
	struct bio_list		pending_bio_list;

	spinlock_t		resync_lock;
	seqlock_t		resync_lock;
	atomic_t		nr_pending;
	int			nr_waiting;
	int			nr_queued;
+6 −5
Original line number Diff line number Diff line
@@ -125,7 +125,7 @@ struct r5l_log {
					 * reclaimed.  if it's 0, reclaim spaces
					 * used by io_units which are in
					 * IO_UNIT_STRIPE_END state (eg, reclaim
					 * dones't wait for specific io_unit
					 * doesn't wait for specific io_unit
					 * switching to IO_UNIT_STRIPE_END
					 * state) */
	wait_queue_head_t iounit_wait;
@@ -1327,9 +1327,9 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
	 * superblock is updated to new log tail. Updating superblock (either
	 * directly call md_update_sb() or depend on md thread) must hold
	 * reconfig mutex. On the other hand, raid5_quiesce is called with
	 * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
	 * for all IO finish, hence waitting for reclaim thread, while reclaim
	 * thread is calling this function and waitting for reconfig mutex. So
	 * reconfig_mutex hold. The first step of raid5_quiesce() is waiting
	 * for all IO finish, hence waiting for reclaim thread, while reclaim
	 * thread is calling this function and waiting for reconfig mutex. So
	 * there is a deadlock. We workaround this issue with a trylock.
	 * FIXME: we could miss discard if we can't take reconfig mutex
	 */
@@ -1923,7 +1923,8 @@ r5c_recovery_alloc_stripe(
{
	struct stripe_head *sh;

	sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
	sh = raid5_get_active_stripe(conf, NULL, stripe_sect,
				     noblock ? R5_GAS_NOBLOCK : 0);
	if (!sh)
		return NULL;  /* no more stripe available */

Loading