Merge branch 'strictgp.2020.08.24a' into HEAD (7fbe67e4) · Commits · EulixOS / Software / Kernel

Documentation/admin-guide/kernel-parameters.txt

+9 −0

Original line number	Diff line number	Diff line
		@@ -4152,6 +4152,15 @@
		This wake_up() will be accompanied by a
		WARN_ONCE() splat and an ftrace_dump().

		rcutree.rcu_unlock_delay= [KNL]
		In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels,
		this specifies an rcu_read_unlock()-time delay
		in microseconds. This defaults to zero.
		Larger delays increase the probability of
		catching RCU pointer leaks, that is, buggy use
		of RCU-protected pointers after the relevant
		rcu_read_unlock() has completed.

		rcutree.sysrq_rcu= [KNL]
		Commandeer a sysrq key to dump out Tree RCU's
		rcu_node tree with an eye towards determining

include/linux/rcupdate.h

+7 −0

Original line number	Diff line number	Diff line
		@@ -55,6 +55,12 @@ void __rcu_read_unlock(void);

		#else /* #ifdef CONFIG_PREEMPT_RCU */

		#ifdef CONFIG_TINY_RCU
		#define rcu_read_unlock_strict() do { } while (0)
		#else
		void rcu_read_unlock_strict(void);
		#endif

		static inline void __rcu_read_lock(void)
		{
		preempt_disable();
		@@ -63,6 +69,7 @@ static inline void __rcu_read_lock(void)
		static inline void __rcu_read_unlock(void)
		{
		preempt_enable();
		rcu_read_unlock_strict();
		}

		static inline int rcu_preempt_depth(void)

kernel/rcu/Kconfig

+5 −3

Original line number	Diff line number	Diff line
		@@ -135,10 +135,12 @@ config RCU_FANOUT

		config RCU_FANOUT_LEAF
		int "Tree-based hierarchical RCU leaf-level fanout value"
		range 2 64 if 64BIT
		range 2 32 if !64BIT
		range 2 64 if 64BIT && !RCU_STRICT_GRACE_PERIOD
		range 2 32 if !64BIT && !RCU_STRICT_GRACE_PERIOD
		range 2 3 if RCU_STRICT_GRACE_PERIOD
		depends on TREE_RCU && RCU_EXPERT
		default 16
		default 16 if !RCU_STRICT_GRACE_PERIOD
		default 2 if RCU_STRICT_GRACE_PERIOD
		help
		This option controls the leaf-level fanout of hierarchical
		implementations of RCU, and allows trading off cache misses

kernel/rcu/Kconfig.debug

+15 −0

Original line number	Diff line number	Diff line
		@@ -114,4 +114,19 @@ config RCU_EQS_DEBUG
		Say N here if you need ultimate kernel/user switch latencies
		Say Y if you are unsure

		config RCU_STRICT_GRACE_PERIOD
		bool "Provide debug RCU implementation with short grace periods"
		depends on DEBUG_KERNEL && RCU_EXPERT
		default n
		select PREEMPT_COUNT if PREEMPT=n
		help
		Select this option to build an RCU variant that is strict about
		grace periods, making them as short as it can. This limits
		scalability, destroys real-time response, degrades battery
		lifetime and kills performance. Don't try this on large
		machines, as in systems with more than about 10 or 20 CPUs.
		But in conjunction with tools like KASAN, it can be helpful
		when looking for certain types of RCU usage bugs, for example,
		too-short RCU read-side critical sections.

		endmenu # "RCU Debugging"

kernel/rcu/tree.c

+62 −11

Original line number	Diff line number	Diff line
		@@ -165,6 +165,12 @@ module_param(gp_init_delay, int, 0444);
		static int gp_cleanup_delay;
		module_param(gp_cleanup_delay, int, 0444);

		// Add delay to rcu_read_unlock() for strict grace periods.
		static int rcu_unlock_delay;
		#ifdef CONFIG_RCU_STRICT_GRACE_PERIOD
		module_param(rcu_unlock_delay, int, 0444);
		#endif

		/*
		* This rcu parameter is runtime-read-only. It reflects
		* a minimum allowed number of objects which can be cached
		@@ -455,24 +461,25 @@ static int rcu_is_cpu_rrupt_from_idle(void)
		return __this_cpu_read(rcu_data.dynticks_nesting) == 0;
		}

		#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch ... */
		#define DEFAULT_MAX_RCU_BLIMIT 10000 /* ... even during callback flood. */
		#define DEFAULT_RCU_BLIMIT (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 1000 : 10)
		// Maximum callbacks per rcu_do_batch ...
		#define DEFAULT_MAX_RCU_BLIMIT 10000 // ... even during callback flood.
		static long blimit = DEFAULT_RCU_BLIMIT;
		#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
		#define DEFAULT_RCU_QHIMARK 10000 // If this many pending, ignore blimit.
		static long qhimark = DEFAULT_RCU_QHIMARK;
		#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
		#define DEFAULT_RCU_QLOMARK 100 // Once only this many pending, use blimit.
		static long qlowmark = DEFAULT_RCU_QLOMARK;
		#define DEFAULT_RCU_QOVLD_MULT 2
		#define DEFAULT_RCU_QOVLD (DEFAULT_RCU_QOVLD_MULT * DEFAULT_RCU_QHIMARK)
		static long qovld = DEFAULT_RCU_QOVLD; /* If this many pending, hammer QS. */
		static long qovld_calc = -1; /* No pre-initialization lock acquisitions! */
		static long qovld = DEFAULT_RCU_QOVLD; // If this many pending, hammer QS.
		static long qovld_calc = -1; // No pre-initialization lock acquisitions!

		module_param(blimit, long, 0444);
		module_param(qhimark, long, 0444);
		module_param(qlowmark, long, 0444);
		module_param(qovld, long, 0444);

		static ulong jiffies_till_first_fqs = ULONG_MAX;
		static ulong jiffies_till_first_fqs = IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ? 0 : ULONG_MAX;
		static ulong jiffies_till_next_fqs = ULONG_MAX;
		static bool rcu_kick_kthreads;
		static int rcu_divisor = 7;
		@@ -1571,6 +1578,19 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp,
		raw_spin_unlock_rcu_node(rnp);
		}

		/*
		* In CONFIG_RCU_STRICT_GRACE_PERIOD=y kernels, attempt to generate a
		* quiescent state. This is intended to be invoked when the CPU notices
		* a new grace period.
		*/
		static void rcu_strict_gp_check_qs(void)
		{
		if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD)) {
		rcu_read_lock();
		rcu_read_unlock();
		}
		}

		/*
		* Update CPU-local rcu_data state to record the beginnings and ends of
		* grace periods. The caller must hold the ->lock of the leaf rcu_node
		@@ -1641,6 +1661,7 @@ static void note_gp_changes(struct rcu_data *rdp)
		}
		needwake = __note_gp_changes(rnp, rdp);
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		rcu_strict_gp_check_qs();
		if (needwake)
		rcu_gp_kthread_wake();
		}
		@@ -1678,6 +1699,15 @@ static void rcu_gp_torture_wait(void)
		}
		}

		/*
		* Handler for on_each_cpu() to invoke the target CPU's RCU core
		* processing.
		*/
		static void rcu_strict_gp_boundary(void *unused)
		{
		invoke_rcu_core();
		}

		/*
		* Initialize a new grace period. Return false if no grace period required.
		*/
		@@ -1809,6 +1839,10 @@ static bool rcu_gp_init(void)
		WRITE_ONCE(rcu_state.gp_activity, jiffies);
		}

		// If strict, make all CPUs aware of new grace period.
		if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
		on_each_cpu(rcu_strict_gp_boundary, NULL, 0);

		return true;
		}

		@@ -2025,6 +2059,10 @@ static void rcu_gp_cleanup(void)
		rcu_state.gp_flags & RCU_GP_FLAG_INIT);
		}
		raw_spin_unlock_irq_rcu_node(rnp);

		// If strict, make all CPUs aware of the end of the old grace period.
		if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
		on_each_cpu(rcu_strict_gp_boundary, NULL, 0);
		}

		/*
		@@ -2203,7 +2241,7 @@ rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
		* structure. This must be called from the specified CPU.
		*/
		static void
		rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
		rcu_report_qs_rdp(struct rcu_data *rdp)
		{
		unsigned long flags;
		unsigned long mask;
		@@ -2212,6 +2250,7 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
		rcu_segcblist_is_offloaded(&rdp->cblist);
		struct rcu_node *rnp;

		WARN_ON_ONCE(rdp->cpu != smp_processor_id());
		rnp = rdp->mynode;
		raw_spin_lock_irqsave_rcu_node(rnp, flags);
		if (rdp->cpu_no_qs.b.norm \|\| rdp->gp_seq != rnp->gp_seq \|\|
		@@ -2228,7 +2267,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
		return;
		}
		mask = rdp->grpmask;
		if (rdp->cpu == smp_processor_id())
		rdp->core_needs_qs = false;
		if ((rnp->qsmask & mask) == 0) {
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		@@ -2278,7 +2316,7 @@ rcu_check_quiescent_state(struct rcu_data *rdp)
		* Tell RCU we are done (but rcu_report_qs_rdp() will be the
		* judge of that).
		*/
		rcu_report_qs_rdp(rdp->cpu, rdp);
		rcu_report_qs_rdp(rdp);
		}

		/*
		@@ -2621,6 +2659,14 @@ void rcu_force_quiescent_state(void)
		}
		EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);

		// Workqueue handler for an RCU reader for kernels enforcing struct RCU
		// grace periods.
		static void strict_work_handler(struct work_struct *work)
		{
		rcu_read_lock();
		rcu_read_unlock();
		}

		/* Perform RCU core processing work for the current CPU. */
		static __latent_entropy void rcu_core(void)
		{
		@@ -2665,6 +2711,10 @@ static __latent_entropy void rcu_core(void)
		/* Do any needed deferred wakeups of rcuo kthreads. */
		do_nocb_deferred_wakeup(rdp);
		trace_rcu_utilization(TPS("End RCU core"));

		// If strict GPs, schedule an RCU reader in a clean environment.
		if (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD))
		queue_work_on(rdp->cpu, rcu_gp_wq, &rdp->strict_work);
		}

		static void rcu_core_si(struct softirq_action *h)
		@@ -3862,6 +3912,7 @@ rcu_boot_init_percpu_data(int cpu)

		/* Set up local state, ensuring consistent view of global state. */
		rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
		INIT_WORK(&rdp->strict_work, strict_work_handler);
		WARN_ON_ONCE(rdp->dynticks_nesting != 1);
		WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
		rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;