Commit b9b083f9 authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu
Browse files

md/raid10: convert resync_lock to use seqlock



Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.

Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.

Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Reviewed-and-Tested-by: default avatarLogan Gunthorpe <logang@deltatee.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
parent 4f350284
Loading
Loading
Loading
Loading
+58 −29
Original line number Diff line number Diff line
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);

#include "raid1-10.c"

#define NULL_CMD
#define cmd_before(conf, cmd) \
	do { \
		write_sequnlock_irq(&(conf)->resync_lock); \
		cmd; \
	} while (0)
#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)

#define wait_event_barrier_cmd(conf, cond, cmd) \
	wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
		       cmd_after(conf))

#define wait_event_barrier(conf, cond) \
	wait_event_barrier_cmd(conf, cond, NULL_CMD)

/*
 * for resync bio, r10bio pointer can be retrieved from the per-bio
 * 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)

static void raise_barrier(struct r10conf *conf, int force)
{
	spin_lock_irq(&conf->resync_lock);
	write_seqlock_irq(&conf->resync_lock);
	BUG_ON(force && !conf->barrier);

	/* Wait until no block IO is waiting (unless 'force') */
	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
			    conf->resync_lock);
	wait_event_barrier(conf, force || !conf->nr_waiting);

	/* block any new IO from starting */
	conf->barrier++;
	WRITE_ONCE(conf->barrier, conf->barrier + 1);

	/* Now wait for all pending IO to complete */
	wait_event_lock_irq(conf->wait_barrier,
			    !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
			    conf->resync_lock);
	wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
				 conf->barrier < RESYNC_DEPTH);

	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
}

static void lower_barrier(struct r10conf *conf)
{
	unsigned long flags;
	spin_lock_irqsave(&conf->resync_lock, flags);
	conf->barrier--;
	spin_unlock_irqrestore(&conf->resync_lock, flags);

	write_seqlock_irqsave(&conf->resync_lock, flags);
	WRITE_ONCE(conf->barrier, conf->barrier - 1);
	write_sequnlock_irqrestore(&conf->resync_lock, flags);
	wake_up(&conf->wait_barrier);
}

@@ -990,11 +1004,31 @@ static bool stop_waiting_barrier(struct r10conf *conf)
	return false;
}

static bool wait_barrier_nolock(struct r10conf *conf)
{
	unsigned int seq = read_seqbegin(&conf->resync_lock);

	if (READ_ONCE(conf->barrier))
		return false;

	atomic_inc(&conf->nr_pending);
	if (!read_seqretry(&conf->resync_lock, seq))
		return true;

	if (atomic_dec_and_test(&conf->nr_pending))
		wake_up_barrier(conf);

	return false;
}

static bool wait_barrier(struct r10conf *conf, bool nowait)
{
	bool ret = true;

	spin_lock_irq(&conf->resync_lock);
	if (wait_barrier_nolock(conf))
		return true;

	write_seqlock_irq(&conf->resync_lock);
	if (conf->barrier) {
		/* Return false when nowait flag is set */
		if (nowait) {
@@ -1002,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
		} else {
			conf->nr_waiting++;
			raid10_log(conf->mddev, "wait barrier");
			wait_event_lock_irq(conf->wait_barrier,
					    stop_waiting_barrier(conf),
					    conf->resync_lock);
			wait_event_barrier(conf, stop_waiting_barrier(conf));
			conf->nr_waiting--;
		}
		if (!conf->nr_waiting)
@@ -1013,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
	/* Only increment nr_pending when we wait */
	if (ret)
		atomic_inc(&conf->nr_pending);
	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
	return ret;
}

@@ -1038,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
	 * must match the number of pending IOs (nr_pending) before
	 * we continue.
	 */
	spin_lock_irq(&conf->resync_lock);
	write_seqlock_irq(&conf->resync_lock);
	conf->array_freeze_pending++;
	conf->barrier++;
	WRITE_ONCE(conf->barrier, conf->barrier + 1);
	conf->nr_waiting++;
	wait_event_lock_irq_cmd(conf->wait_barrier,
				atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
				conf->resync_lock,
				flush_pending_writes(conf));

	wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
			conf->nr_queued + extra, flush_pending_writes(conf));
	conf->array_freeze_pending--;
	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
}

static void unfreeze_array(struct r10conf *conf)
{
	/* reverse the effect of the freeze */
	spin_lock_irq(&conf->resync_lock);
	conf->barrier--;
	write_seqlock_irq(&conf->resync_lock);
	WRITE_ONCE(conf->barrier, conf->barrier - 1);
	conf->nr_waiting--;
	wake_up(&conf->wait_barrier);
	spin_unlock_irq(&conf->resync_lock);
	write_sequnlock_irq(&conf->resync_lock);
}

static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4045,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
	INIT_LIST_HEAD(&conf->retry_list);
	INIT_LIST_HEAD(&conf->bio_end_io_list);

	spin_lock_init(&conf->resync_lock);
	seqlock_init(&conf->resync_lock);
	init_waitqueue_head(&conf->wait_barrier);
	atomic_set(&conf->nr_pending, 0);

@@ -4364,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
				rdev->new_raid_disk = rdev->raid_disk * 2;
				rdev->sectors = size;
			}
		conf->barrier = 1;
		WRITE_ONCE(conf->barrier, 1);
	}

	return conf;
+1 −1
Original line number Diff line number Diff line
@@ -76,7 +76,7 @@ struct r10conf {
	/* queue pending writes and submit them on unplug */
	struct bio_list		pending_bio_list;

	spinlock_t		resync_lock;
	seqlock_t		resync_lock;
	atomic_t		nr_pending;
	int			nr_waiting;
	int			nr_queued;