Commit a16578dd authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: Rework rcu_barrier() and callback-migration logic



This commit reworks rcu_barrier() and callback-migration logic to
permit allowing rcu_barrier() to run concurrently with CPU-hotplug
operations.  The key trick is for callback migration to check to see if
an rcu_barrier() is in flight, and, if so, enqueue the ->barrier_head
callback on its behalf.

This commit adds synchronization with RCU's CPU-hotplug notifiers.  Taken
together, this will permit a later commit to remove the cpus_read_lock()
and cpus_read_unlock() calls from rcu_barrier().

[ paulmck: Updated per kbuild test robot feedback. ]
[ paulmck: Updated per reviews session with Neeraj, Frederic, Uladzislau, and Boqun. ]

Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
parent 0cabb47a
Loading
Loading
Loading
Loading
+61 −16
Original line number Diff line number Diff line
@@ -3987,13 +3987,16 @@ static void rcu_barrier_callback(struct rcu_head *rhp)
}

/*
 * Called with preemption disabled, and from cross-cpu IRQ context.
 * If needed, entrain an rcu_barrier() callback on rdp->cblist.
 */
static void rcu_barrier_func(void *cpu_in)
static void rcu_barrier_entrain(struct rcu_data *rdp)
{
	uintptr_t cpu = (uintptr_t)cpu_in;
	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
	unsigned long gseq = READ_ONCE(rcu_state.barrier_sequence);
	unsigned long lseq = READ_ONCE(rdp->barrier_seq_snap);

	lockdep_assert_held(&rdp->barrier_lock);
	if (rcu_seq_state(lseq) || !rcu_seq_state(gseq) || rcu_seq_ctr(lseq) != rcu_seq_ctr(gseq))
		return;
	rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
	rdp->barrier_head.func = rcu_barrier_callback;
	debug_rcu_head_queue(&rdp->barrier_head);
@@ -4003,10 +4006,26 @@ static void rcu_barrier_func(void *cpu_in)
		atomic_inc(&rcu_state.barrier_cpu_count);
	} else {
		debug_rcu_head_unqueue(&rdp->barrier_head);
		rcu_barrier_trace(TPS("IRQNQ"), -1,
				  rcu_state.barrier_sequence);
		rcu_barrier_trace(TPS("IRQNQ"), -1, rcu_state.barrier_sequence);
	}
	rcu_nocb_unlock(rdp);
	smp_store_release(&rdp->barrier_seq_snap, gseq);
}

/*
 * Called with preemption disabled, and from cross-cpu IRQ context.
 */
static void rcu_barrier_handler(void *cpu_in)
{
	uintptr_t cpu = (uintptr_t)cpu_in;
	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);

	lockdep_assert_irqs_disabled();
	WARN_ON_ONCE(cpu != rdp->cpu);
	WARN_ON_ONCE(cpu != smp_processor_id());
	raw_spin_lock(&rdp->barrier_lock);
	rcu_barrier_entrain(rdp);
	raw_spin_unlock(&rdp->barrier_lock);
}

/**
@@ -4020,6 +4039,8 @@ static void rcu_barrier_func(void *cpu_in)
void rcu_barrier(void)
{
	uintptr_t cpu;
	unsigned long flags;
	unsigned long gseq;
	struct rcu_data *rdp;
	unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);

@@ -4038,6 +4059,7 @@ void rcu_barrier(void)

	/* Mark the start of the barrier operation. */
	rcu_seq_start(&rcu_state.barrier_sequence);
	gseq = rcu_state.barrier_sequence;
	rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);

	/*
@@ -4058,19 +4080,30 @@ void rcu_barrier(void)
	 */
	for_each_possible_cpu(cpu) {
		rdp = per_cpu_ptr(&rcu_data, cpu);
retry:
		if (smp_load_acquire(&rdp->barrier_seq_snap) == gseq)
			continue;
		raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
		if (!rcu_segcblist_n_cbs(&rdp->cblist)) {
			WRITE_ONCE(rdp->barrier_seq_snap, gseq);
			raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
			rcu_barrier_trace(TPS("NQ"), cpu, rcu_state.barrier_sequence);
			continue;
		}
		if (cpu_online(cpu)) {
			rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
			smp_call_function_single(cpu, rcu_barrier_func, (void *)cpu, 1);
		} else {
		if (!rcu_rdp_cpu_online(rdp)) {
			rcu_barrier_entrain(rdp);
			WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
			raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
			rcu_barrier_trace(TPS("OfflineNoCBQ"), cpu, rcu_state.barrier_sequence);
			local_irq_disable();
			rcu_barrier_func((void *)cpu);
			local_irq_enable();
			continue;
		}
		raw_spin_unlock_irqrestore(&rdp->barrier_lock, flags);
		if (smp_call_function_single(cpu, rcu_barrier_handler, (void *)cpu, 1)) {
			schedule_timeout_uninterruptible(1);
			goto retry;
		}
		WARN_ON_ONCE(READ_ONCE(rdp->barrier_seq_snap) != gseq);
		rcu_barrier_trace(TPS("OnlineQ"), cpu, rcu_state.barrier_sequence);
	}
	cpus_read_unlock();

@@ -4087,6 +4120,12 @@ void rcu_barrier(void)
	/* Mark the end of the barrier operation. */
	rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
	rcu_seq_end(&rcu_state.barrier_sequence);
	gseq = rcu_state.barrier_sequence;
	for_each_possible_cpu(cpu) {
		rdp = per_cpu_ptr(&rcu_data, cpu);

		WRITE_ONCE(rdp->barrier_seq_snap, gseq);
	}

	/* Other rcu_barrier() invocations can now safely proceed. */
	mutex_unlock(&rcu_state.barrier_mutex);
@@ -4134,6 +4173,8 @@ rcu_boot_init_percpu_data(int cpu)
	INIT_WORK(&rdp->strict_work, strict_work_handler);
	WARN_ON_ONCE(rdp->dynticks_nesting != 1);
	WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
	raw_spin_lock_init(&rdp->barrier_lock);
	rdp->barrier_seq_snap = rcu_state.barrier_sequence;
	rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
	rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
	rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
@@ -4284,8 +4325,10 @@ void rcu_cpu_starting(unsigned int cpu)
	local_irq_save(flags);
	arch_spin_lock(&rcu_state.ofl_lock);
	rcu_dynticks_eqs_online();
	raw_spin_lock(&rdp->barrier_lock);
	raw_spin_lock_rcu_node(rnp);
	WRITE_ONCE(rnp->qsmaskinitnext, rnp->qsmaskinitnext | mask);
	raw_spin_unlock(&rdp->barrier_lock);
	newcpu = !(rnp->expmaskinitnext & mask);
	rnp->expmaskinitnext |= mask;
	/* Allow lockless access for expedited grace periods. */
@@ -4372,7 +4415,9 @@ void rcutree_migrate_callbacks(int cpu)
	    rcu_segcblist_empty(&rdp->cblist))
		return;  /* No callbacks to migrate. */

	local_irq_save(flags);
	raw_spin_lock_irqsave(&rdp->barrier_lock, flags);
	WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
	rcu_barrier_entrain(rdp);
	my_rdp = this_cpu_ptr(&rcu_data);
	my_rnp = my_rdp->mynode;
	rcu_nocb_lock(my_rdp); /* irqs already disabled. */
@@ -4382,10 +4427,10 @@ void rcutree_migrate_callbacks(int cpu)
	needwake = rcu_advance_cbs(my_rnp, rdp) ||
		   rcu_advance_cbs(my_rnp, my_rdp);
	rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
	raw_spin_unlock(&rdp->barrier_lock); /* irqs remain disabled. */
	needwake = needwake || rcu_advance_cbs(my_rnp, my_rdp);
	rcu_segcblist_disable(&rdp->cblist);
	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
		     !rcu_segcblist_n_cbs(&my_rdp->cblist));
	WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != !rcu_segcblist_n_cbs(&my_rdp->cblist));
	if (rcu_rdp_is_offloaded(my_rdp)) {
		raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */
		__call_rcu_nocb_wake(my_rdp, true, flags);
+2 −0
Original line number Diff line number Diff line
@@ -188,6 +188,8 @@ struct rcu_data {
	bool rcu_forced_tick_exp;	/*   ... provide QS to expedited GP. */

	/* 4) rcu_barrier(), OOM callbacks, and expediting. */
	raw_spinlock_t barrier_lock;	/* Protects ->barrier_seq_snap. */
	unsigned long barrier_seq_snap;	/* Snap of rcu_state.barrier_sequence. */
	struct rcu_head barrier_head;
	int exp_dynticks_snap;		/* Double-check need for IPI. */