Loading Documentation/admin-guide/kernel-parameters.txt +9 −0 Original line number Diff line number Diff line Loading @@ -4152,6 +4152,15 @@ This wake_up() will be accompanied by a WARN_ONCE() splat and an ftrace_dump(). rcutree.rcu_unlock_delay= [KNL] In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, this specifies an rcu_read_unlock()-time delay in microseconds. This defaults to zero. Larger delays increase the probability of catching RCU pointer leaks, that is, buggy use of RCU-protected pointers after the relevant rcu_read_unlock() has completed. rcutree.sysrq_rcu= [KNL] Commandeer a sysrq key to dump out Tree RCU's rcu_node tree with an eye towards determining Loading include/linux/rcupdate.h +7 −0 Original line number Diff line number Diff line Loading @@ -55,6 +55,12 @@ void __rcu_read_unlock(void); #else /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TINY_RCU #define rcu_read_unlock_strict() do { } while (0) #else void rcu_read_unlock_strict(void); #endif static inline void __rcu_read_lock(void) { preempt_disable(); Loading @@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { preempt_enable(); rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) Loading kernel/rcu/Kconfig +5 −3 Original line number Diff line number Diff line Loading @@ -135,10 +135,12 @@ config RCU_FANOUT config RCU_FANOUT_LEAF int "Tree-based hierarchical RCU leaf-level fanout value" range 2 64 if 64BIT range 2 32 if !64BIT range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD range 2 3 if RCU_STRICT_GRACE_PERIOD depends on TREE_RCU && RCU_EXPERT default 16 default 16 if !RCU_STRICT_GRACE_PERIOD default 2 if RCU_STRICT_GRACE_PERIOD help This option controls the leaf-level fanout of hierarchical implementations of RCU, and allows trading off cache misses Loading kernel/rcu/Kconfig.debug +15 −0 Original line number Diff line number Diff line Loading @@ -114,4 +114,19 @@ config RCU_EQS_DEBUG Say N here if you need ultimate kernel/user switch latencies Say Y if you are unsure config RCU_STRICT_GRACE_PERIOD bool "Provide debug RCU implementation with short grace periods" depends on DEBUG_KERNEL && RCU_EXPERT default n select PREEMPT_COUNT if PREEMPT=n help Select this option to build an RCU variant that is strict about grace periods, making them as short as it can. This limits scalability, destroys real-time response, degrades battery lifetime and kills performance. Don't try this on large machines, as in systems with more than about 10 or 20 CPUs. But in conjunction with tools like KASAN, it can be helpful when looking for certain types of RCU usage bugs, for example, too-short RCU read-side critical sections. endmenu # "RCU Debugging" kernel/rcu/tree.c +62 −11 Original line number Diff line number Diff line Loading @@ -165,6 +165,12 @@ module_param(gp_init_delay, int, 0444); static int gp_cleanup_delay; module_param(gp_cleanup_delay, int, 0444); // Add delay to rcu_read_unlock() for strict grace periods. static int rcu_unlock_delay; #ifdef CONFIG_RCU_STRICT_GRACE_PERIOD module_param(rcu_unlock_delay, int, 0444); #endif /* * This rcu parameter is runtime-read-only. It reflects * a minimum allowed number of objects which can be cached Loading Loading @@ -455,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void) return __this_cpu_read(rcu_data.dynticks_nesting) == 0; } #define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */ #define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */ #define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10) // Maximum callbacks per rcu_do_batch ... #define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood. static long blimit = DEFAULT_RCU_BLIMIT; #define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */ #define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit. static long qhimark = DEFAULT_RCU_QHIMARK; #define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */ #define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit. static long qlowmark = DEFAULT_RCU_QLOMARK; #define DEFAULT_RCU_QOVLD_MULT 2 #define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK) static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */ static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */ static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS. static long qovld_calc = -1; // No pre-initialization lock acquisitions! module_param(blimit, long, 0444); module_param(qhimark, long, 0444); module_param(qlowmark, long, 0444); module_param(qovld, long, 0444); static ulong jiffies_till_first_fqs = ULONG_MAX; static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX; static bool rcu_kick_kthreads; static int rcu_divisor = 7; Loading Loading @@ -1571,6 +1578,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, raw_spin_unlock_rcu_node(rnp); } /* * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a * quiescent state. This is intended to be invoked when the CPU notices * a new grace period. */ static void rcu_strict_gp_check_qs(void) { if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) { rcu_read_lock(); rcu_read_unlock(); } } /* * Update CPU-local rcu_data state to record the beginnings and ends of * grace periods. The caller must hold the ->lock of the leaf rcu_node Loading Loading @@ -1641,6 +1661,7 @@ static void note_gp_changes(struct rcu_data *rdp) } needwake = __note_gp_changes(rnp, rdp); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); rcu_strict_gp_check_qs(); if (needwake) rcu_gp_kthread_wake(); } Loading Loading @@ -1678,6 +1699,15 @@ static void rcu_gp_torture_wait(void) } } /* * Handler for on_each_cpu() to invoke the target CPU's RCU core * processing. */ static void rcu_strict_gp_boundary(void *unused) { invoke_rcu_core(); } /* * Initialize a new grace period. Return false if no grace period required. */ Loading Loading @@ -1809,6 +1839,10 @@ static bool rcu_gp_init(void) WRITE_ONCE(rcu_state.gp_activity, jiffies); } // If strict, make all CPUs aware of new grace period. if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) on_each_cpu(rcu_strict_gp_boundary, NULL, 0); return true; } Loading Loading @@ -2025,6 +2059,10 @@ static void rcu_gp_cleanup(void) rcu_state.gp_flags & RCU_GP_FLAG_INIT); } raw_spin_unlock_irq_rcu_node(rnp); // If strict, make all CPUs aware of the end of the old grace period. if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) on_each_cpu(rcu_strict_gp_boundary, NULL, 0); } /* Loading Loading @@ -2203,7 +2241,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) * structure. This must be called from the specified CPU. */ static void rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) rcu_report_qs_rdp(struct rcu_data *rdp) { unsigned long flags; unsigned long mask; Loading @@ -2212,6 +2250,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) rcu_segcblist_is_offloaded(&rdp->cblist); struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); rnp = rdp->mynode; raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || Loading @@ -2228,7 +2267,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) return; } mask = rdp->grpmask; if (rdp->cpu == smp_processor_id()) rdp->core_needs_qs = false; if ((rnp->qsmask & mask) == 0) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); Loading Loading @@ -2278,7 +2316,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp) * Tell RCU we are done (but rcu_report_qs_rdp() will be the * judge of that). */ rcu_report_qs_rdp(rdp->cpu, rdp); rcu_report_qs_rdp(rdp); } /* Loading Loading @@ -2621,6 +2659,14 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); // Workqueue handler for an RCU reader for kernels enforcing struct RCU // grace periods. static void strict_work_handler(struct work_struct *work) { rcu_read_lock(); rcu_read_unlock(); } /* Perform RCU core processing work for the current CPU. */ static __latent_entropy void rcu_core(void) { Loading Loading @@ -2665,6 +2711,10 @@ static __latent_entropy void rcu_core(void) /* Do any needed deferred wakeups of rcuo kthreads. */ do_nocb_deferred_wakeup(rdp); trace_rcu_utilization(TPS("End RCU core")); // If strict GPs, schedule an RCU reader in a clean environment. if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work); } static void rcu_core_si(struct softirq_action *h) Loading Loading @@ -3862,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu) /* Set up local state, ensuring consistent view of global state. */ rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); INIT_WORK(&rdp->strict_work, strict_work_handler); WARN_ON_ONCE(rdp->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; Loading Loading
Documentation/admin-guide/kernel-parameters.txt +9 −0 Original line number Diff line number Diff line Loading @@ -4152,6 +4152,15 @@ This wake_up() will be accompanied by a WARN_ONCE() splat and an ftrace_dump(). rcutree.rcu_unlock_delay= [KNL] In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, this specifies an rcu_read_unlock()-time delay in microseconds. This defaults to zero. Larger delays increase the probability of catching RCU pointer leaks, that is, buggy use of RCU-protected pointers after the relevant rcu_read_unlock() has completed. rcutree.sysrq_rcu= [KNL] Commandeer a sysrq key to dump out Tree RCU's rcu_node tree with an eye towards determining Loading
include/linux/rcupdate.h +7 −0 Original line number Diff line number Diff line Loading @@ -55,6 +55,12 @@ void __rcu_read_unlock(void); #else /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TINY_RCU #define rcu_read_unlock_strict() do { } while (0) #else void rcu_read_unlock_strict(void); #endif static inline void __rcu_read_lock(void) { preempt_disable(); Loading @@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void) static inline void __rcu_read_unlock(void) { preempt_enable(); rcu_read_unlock_strict(); } static inline int rcu_preempt_depth(void) Loading
kernel/rcu/Kconfig +5 −3 Original line number Diff line number Diff line Loading @@ -135,10 +135,12 @@ config RCU_FANOUT config RCU_FANOUT_LEAF int "Tree-based hierarchical RCU leaf-level fanout value" range 2 64 if 64BIT range 2 32 if !64BIT range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD range 2 3 if RCU_STRICT_GRACE_PERIOD depends on TREE_RCU && RCU_EXPERT default 16 default 16 if !RCU_STRICT_GRACE_PERIOD default 2 if RCU_STRICT_GRACE_PERIOD help This option controls the leaf-level fanout of hierarchical implementations of RCU, and allows trading off cache misses Loading
kernel/rcu/Kconfig.debug +15 −0 Original line number Diff line number Diff line Loading @@ -114,4 +114,19 @@ config RCU_EQS_DEBUG Say N here if you need ultimate kernel/user switch latencies Say Y if you are unsure config RCU_STRICT_GRACE_PERIOD bool "Provide debug RCU implementation with short grace periods" depends on DEBUG_KERNEL && RCU_EXPERT default n select PREEMPT_COUNT if PREEMPT=n help Select this option to build an RCU variant that is strict about grace periods, making them as short as it can. This limits scalability, destroys real-time response, degrades battery lifetime and kills performance. Don't try this on large machines, as in systems with more than about 10 or 20 CPUs. But in conjunction with tools like KASAN, it can be helpful when looking for certain types of RCU usage bugs, for example, too-short RCU read-side critical sections. endmenu # "RCU Debugging"
kernel/rcu/tree.c +62 −11 Original line number Diff line number Diff line Loading @@ -165,6 +165,12 @@ module_param(gp_init_delay, int, 0444); static int gp_cleanup_delay; module_param(gp_cleanup_delay, int, 0444); // Add delay to rcu_read_unlock() for strict grace periods. static int rcu_unlock_delay; #ifdef CONFIG_RCU_STRICT_GRACE_PERIOD module_param(rcu_unlock_delay, int, 0444); #endif /* * This rcu parameter is runtime-read-only. It reflects * a minimum allowed number of objects which can be cached Loading Loading @@ -455,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void) return __this_cpu_read(rcu_data.dynticks_nesting) == 0; } #define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */ #define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */ #define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10) // Maximum callbacks per rcu_do_batch ... #define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood. static long blimit = DEFAULT_RCU_BLIMIT; #define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */ #define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit. static long qhimark = DEFAULT_RCU_QHIMARK; #define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */ #define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit. static long qlowmark = DEFAULT_RCU_QLOMARK; #define DEFAULT_RCU_QOVLD_MULT 2 #define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK) static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */ static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */ static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS. static long qovld_calc = -1; // No pre-initialization lock acquisitions! module_param(blimit, long, 0444); module_param(qhimark, long, 0444); module_param(qlowmark, long, 0444); module_param(qovld, long, 0444); static ulong jiffies_till_first_fqs = ULONG_MAX; static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX; static ulong jiffies_till_next_fqs = ULONG_MAX; static bool rcu_kick_kthreads; static int rcu_divisor = 7; Loading Loading @@ -1571,6 +1578,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, raw_spin_unlock_rcu_node(rnp); } /* * In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a * quiescent state. This is intended to be invoked when the CPU notices * a new grace period. */ static void rcu_strict_gp_check_qs(void) { if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) { rcu_read_lock(); rcu_read_unlock(); } } /* * Update CPU-local rcu_data state to record the beginnings and ends of * grace periods. The caller must hold the ->lock of the leaf rcu_node Loading Loading @@ -1641,6 +1661,7 @@ static void note_gp_changes(struct rcu_data *rdp) } needwake = __note_gp_changes(rnp, rdp); raw_spin_unlock_irqrestore_rcu_node(rnp, flags); rcu_strict_gp_check_qs(); if (needwake) rcu_gp_kthread_wake(); } Loading Loading @@ -1678,6 +1699,15 @@ static void rcu_gp_torture_wait(void) } } /* * Handler for on_each_cpu() to invoke the target CPU's RCU core * processing. */ static void rcu_strict_gp_boundary(void *unused) { invoke_rcu_core(); } /* * Initialize a new grace period. Return false if no grace period required. */ Loading Loading @@ -1809,6 +1839,10 @@ static bool rcu_gp_init(void) WRITE_ONCE(rcu_state.gp_activity, jiffies); } // If strict, make all CPUs aware of new grace period. if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) on_each_cpu(rcu_strict_gp_boundary, NULL, 0); return true; } Loading Loading @@ -2025,6 +2059,10 @@ static void rcu_gp_cleanup(void) rcu_state.gp_flags & RCU_GP_FLAG_INIT); } raw_spin_unlock_irq_rcu_node(rnp); // If strict, make all CPUs aware of the end of the old grace period. if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) on_each_cpu(rcu_strict_gp_boundary, NULL, 0); } /* Loading Loading @@ -2203,7 +2241,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) * structure. This must be called from the specified CPU. */ static void rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) rcu_report_qs_rdp(struct rcu_data *rdp) { unsigned long flags; unsigned long mask; Loading @@ -2212,6 +2250,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) rcu_segcblist_is_offloaded(&rdp->cblist); struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); rnp = rdp->mynode; raw_spin_lock_irqsave_rcu_node(rnp, flags); if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || Loading @@ -2228,7 +2267,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp) return; } mask = rdp->grpmask; if (rdp->cpu == smp_processor_id()) rdp->core_needs_qs = false; if ((rnp->qsmask & mask) == 0) { raw_spin_unlock_irqrestore_rcu_node(rnp, flags); Loading Loading @@ -2278,7 +2316,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp) * Tell RCU we are done (but rcu_report_qs_rdp() will be the * judge of that). */ rcu_report_qs_rdp(rdp->cpu, rdp); rcu_report_qs_rdp(rdp); } /* Loading Loading @@ -2621,6 +2659,14 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); // Workqueue handler for an RCU reader for kernels enforcing struct RCU // grace periods. static void strict_work_handler(struct work_struct *work) { rcu_read_lock(); rcu_read_unlock(); } /* Perform RCU core processing work for the current CPU. */ static __latent_entropy void rcu_core(void) { Loading Loading @@ -2665,6 +2711,10 @@ static __latent_entropy void rcu_core(void) /* Do any needed deferred wakeups of rcuo kthreads. */ do_nocb_deferred_wakeup(rdp); trace_rcu_utilization(TPS("End RCU core")); // If strict GPs, schedule an RCU reader in a clean environment. if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work); } static void rcu_core_si(struct softirq_action *h) Loading Loading @@ -3862,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu) /* Set up local state, ensuring consistent view of global state. */ rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); INIT_WORK(&rdp->strict_work, strict_work_handler); WARN_ON_ONCE(rdp->dynticks_nesting != 1); WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp))); rdp->rcu_ofl_gp_seq = rcu_state.gp_seq; Loading