Commit 6be7436d authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

rcu: Add rcu_gp_might_be_stalled()



This commit adds rcu_gp_might_be_stalled(), which returns true if there
is some reason to believe that the RCU grace period is stalled.  The use
case is where an RCU free-memory path needs to allocate memory in order
to free it, a situation that should be avoided where possible.

But where it is necessary, there is always the alternative of using
synchronize_rcu() to wait for a grace period in order to avoid the
allocation.  And if the grace period is stalled, allocating memory to
asynchronously wait for it is a bad idea of epic proportions: Far better
to let others use the memory, because these others might actually be
able to free that memory before the grace period ends.

Thus, rcu_gp_might_be_stalled() can be used to help decide whether
allocating memory on an RCU free path is a semi-reasonable course
of action.

Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Uladzislau Rezki <urezki@gmail.com>
Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
parent a6a82ce1
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
static inline void rcu_momentary_dyntick_idle(void) { }
static inline void kfree_rcu_scheduler_running(void) { }
static inline bool rcu_gp_might_be_stalled(void) { return false; }

/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }
+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ void rcu_barrier(void);
bool rcu_eqs_special_set(int cpu);
void rcu_momentary_dyntick_idle(void);
void kfree_rcu_scheduler_running(void);
bool rcu_gp_might_be_stalled(void);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);

+36 −4
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@ int sysctl_panic_on_rcu_stall __read_mostly;
#else
#define RCU_STALL_DELAY_DELTA		0
#endif
#define RCU_STALL_MIGHT_DIV		8
#define RCU_STALL_MIGHT_MIN		(2 * HZ)

/* Limit-check stall timeouts specified at boottime and runtime. */
int rcu_jiffies_till_stall_check(void)
@@ -40,6 +42,36 @@ int rcu_jiffies_till_stall_check(void)
}
EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);

/**
 * rcu_gp_might_be_stalled - Is it likely that the grace period is stalled?
 *
 * Returns @true if the current grace period is sufficiently old that
 * it is reasonable to assume that it might be stalled.  This can be
 * useful when deciding whether to allocate memory to enable RCU-mediated
 * freeing on the one hand or just invoking synchronize_rcu() on the other.
 * The latter is preferable when the grace period is stalled.
 *
 * Note that sampling of the .gp_start and .gp_seq fields must be done
 * carefully to avoid false positives at the beginnings and ends of
 * grace periods.
 */
bool rcu_gp_might_be_stalled(void)
{
	unsigned long d = rcu_jiffies_till_stall_check() / RCU_STALL_MIGHT_DIV;
	unsigned long j = jiffies;

	if (d < RCU_STALL_MIGHT_MIN)
		d = RCU_STALL_MIGHT_MIN;
	smp_mb(); // jiffies before .gp_seq to avoid false positives.
	if (!rcu_gp_in_progress())
		return false;
	// Long delays at this point avoids false positive, but a delay
	// of ULONG_MAX/4 jiffies voids your no-false-positive warranty.
	smp_mb(); // .gp_seq before second .gp_start
	// And ditto here.
	return !time_before(j, READ_ONCE(rcu_state.gp_start) + d);
}

/* Don't do RCU CPU stall warnings during long sysrq printouts. */
void rcu_sysrq_start(void)
{
@@ -104,8 +136,8 @@ static void record_gp_stall_check_time(void)

	WRITE_ONCE(rcu_state.gp_start, j);
	j1 = rcu_jiffies_till_stall_check();
	/* Record ->gp_start before ->jiffies_stall. */
	smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
	smp_mb(); // ->gp_start before ->jiffies_stall and caller's ->gp_seq.
	WRITE_ONCE(rcu_state.jiffies_stall, j + j1);
	rcu_state.jiffies_resched = j + j1 / 2;
	rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
}