Commit bf95b2bc authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: Switch polled grace-period APIs to ->gp_seq_polled

This commit switches the existing polled grace-period APIs to use a
new ->gp_seq_polled counter in the rcu_state structure.  An additional
->gp_seq_polled_snap counter in that same structure allows the normal
grace period kthread to interact properly with the !SMP !PREEMPT fastpath
through synchronize_rcu().  The first of the two to note the end of a
given grace period will make knowledge of this transition available to
the polled API.

This commit is in preparation for polled expedited grace periods.

[ paulmck: Fix use of rcu_state.gp_seq_polled to start normal grace period. ]

Link: https://lore.kernel.org/all/20220121142454.1994916-1-bfoster@redhat.com/
Link: https://docs.google.com/document/d/1RNKWW9jQyfjxw2E8dsXVTdvZYh0HnYeSHDKog9jhdN8/edit?usp=sharing


Cc: Brian Foster <bfoster@redhat.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ian Kent <raven@themaw.net>
Co-developed-by: default avatarBoqun Feng <boqun.feng@gmail.com>
Signed-off-by: default avatarBoqun Feng <boqun.feng@gmail.com>
Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
parent d0eac20f
Loading
Loading
Loading
Loading
+94 −4
Original line number Diff line number Diff line
@@ -1775,6 +1775,78 @@ static void rcu_strict_gp_boundary(void *unused)
	invoke_rcu_core();
}

// Has rcu_init() been invoked?  This is used (for example) to determine
// whether spinlocks may be acquired safely.
static bool rcu_init_invoked(void)
{
	return !!rcu_state.n_online_cpus;
}

// Make the polled API aware of the beginning of a grace period.
static void rcu_poll_gp_seq_start(unsigned long *snap)
{
	struct rcu_node *rnp = rcu_get_root();

	if (rcu_init_invoked())
		raw_lockdep_assert_held_rcu_node(rnp);

	// If RCU was idle, note beginning of GP.
	if (!rcu_seq_state(rcu_state.gp_seq_polled))
		rcu_seq_start(&rcu_state.gp_seq_polled);

	// Either way, record current state.
	*snap = rcu_state.gp_seq_polled;
}

// Make the polled API aware of the end of a grace period.
static void rcu_poll_gp_seq_end(unsigned long *snap)
{
	struct rcu_node *rnp = rcu_get_root();

	if (rcu_init_invoked())
		raw_lockdep_assert_held_rcu_node(rnp);

	// If the previously noted GP is still in effect, record the
	// end of that GP.  Either way, zero counter to avoid counter-wrap
	// problems.
	if (*snap && *snap == rcu_state.gp_seq_polled) {
		rcu_seq_end(&rcu_state.gp_seq_polled);
		rcu_state.gp_seq_polled_snap = 0;
	} else {
		*snap = 0;
	}
}

// Make the polled API aware of the beginning of a grace period, but
// where caller does not hold the root rcu_node structure's lock.
static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
{
	struct rcu_node *rnp = rcu_get_root();

	if (rcu_init_invoked()) {
		lockdep_assert_irqs_enabled();
		raw_spin_lock_irq_rcu_node(rnp);
	}
	rcu_poll_gp_seq_start(snap);
	if (rcu_init_invoked())
		raw_spin_unlock_irq_rcu_node(rnp);
}

// Make the polled API aware of the end of a grace period, but where
// caller does not hold the root rcu_node structure's lock.
static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
{
	struct rcu_node *rnp = rcu_get_root();

	if (rcu_init_invoked()) {
		lockdep_assert_irqs_enabled();
		raw_spin_lock_irq_rcu_node(rnp);
	}
	rcu_poll_gp_seq_end(snap);
	if (rcu_init_invoked())
		raw_spin_unlock_irq_rcu_node(rnp);
}

/*
 * Initialize a new grace period.  Return false if no grace period required.
 */
@@ -1810,6 +1882,7 @@ static noinline_for_stack bool rcu_gp_init(void)
	rcu_seq_start(&rcu_state.gp_seq);
	ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
	trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
	rcu_poll_gp_seq_start(&rcu_state.gp_seq_polled_snap);
	raw_spin_unlock_irq_rcu_node(rnp);

	/*
@@ -2069,6 +2142,7 @@ static noinline void rcu_gp_cleanup(void)
	 * safe for us to drop the lock in order to mark the grace
	 * period as completed in all of the rcu_node structures.
	 */
	rcu_poll_gp_seq_end(&rcu_state.gp_seq_polled_snap);
	raw_spin_unlock_irq_rcu_node(rnp);

	/*
@@ -3837,8 +3911,18 @@ void synchronize_rcu(void)
			 lock_is_held(&rcu_lock_map) ||
			 lock_is_held(&rcu_sched_lock_map),
			 "Illegal synchronize_rcu() in RCU read-side critical section");
	if (rcu_blocking_is_gp())
	if (rcu_blocking_is_gp()) {
		// Note well that this code runs with !PREEMPT && !SMP.
		// In addition, all code that advances grace periods runs
		// at process level.  Therefore, this GP overlaps with other
		// GPs only by being fully nested within them, which allows
		// reuse of ->gp_seq_polled_snap.
		rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
		rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
		if (rcu_init_invoked())
			cond_resched_tasks_rcu_qs();
		return;  // Context allows vacuous grace periods.
	}
	if (rcu_gp_is_expedited())
		synchronize_rcu_expedited();
	else
@@ -3860,7 +3944,7 @@ unsigned long get_state_synchronize_rcu(void)
	 * before the load from ->gp_seq.
	 */
	smp_mb();  /* ^^^ */
	return rcu_seq_snap(&rcu_state.gp_seq);
	return rcu_seq_snap(&rcu_state.gp_seq_polled);
}
EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);

@@ -3889,7 +3973,13 @@ unsigned long start_poll_synchronize_rcu(void)
	rdp = this_cpu_ptr(&rcu_data);
	rnp = rdp->mynode;
	raw_spin_lock_rcu_node(rnp); // irqs already disabled.
	needwake = rcu_start_this_gp(rnp, rdp, gp_seq);
	// Note it is possible for a grace period to have elapsed between
	// the above call to get_state_synchronize_rcu() and the below call
	// to rcu_seq_snap.  This is OK, the worst that happens is that we
	// get a grace period that no one needed.  These accesses are ordered
	// by smp_mb(), and we are accessing them in the opposite order
	// from which they are updated at grace-period start, as required.
	needwake = rcu_start_this_gp(rnp, rdp, rcu_seq_snap(&rcu_state.gp_seq));
	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
	if (needwake)
		rcu_gp_kthread_wake();
@@ -3925,7 +4015,7 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
bool poll_state_synchronize_rcu(unsigned long oldstate)
{
	if (oldstate == RCU_GET_STATE_COMPLETED ||
	    rcu_seq_done_exact(&rcu_state.gp_seq, oldstate)) {
	    rcu_seq_done_exact(&rcu_state.gp_seq_polled, oldstate)) {
		smp_mb(); /* Ensure GP ends before subsequent accesses. */
		return true;
	}
+2 −0
Original line number Diff line number Diff line
@@ -323,6 +323,8 @@ struct rcu_state {
	short gp_state;				/* GP kthread sleep state. */
	unsigned long gp_wake_time;		/* Last GP kthread wake. */
	unsigned long gp_wake_seq;		/* ->gp_seq at ^^^. */
	unsigned long gp_seq_polled;		/* GP seq for polled API. */
	unsigned long gp_seq_polled_snap;	/* ->gp_seq_polled at normal GP start. */

	/* End of fields guarded by root rcu_node's lock. */