rcu: Make call_rcu() lazy to save power (3cb278e7) · Commits · EulixOS / Software / Kernel

include/linux/rcupdate.h

+9 −0

Original line number	Diff line number	Diff line
		@@ -108,6 +108,15 @@ static inline int rcu_preempt_depth(void)

		#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

		#ifdef CONFIG_RCU_LAZY
		void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func);
		#else
		static inline void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
		{
		call_rcu(head, func);
		}
		#endif

		/* Internal to kernel */
		void rcu_init(void);
		extern int rcu_scheduler_active;

kernel/rcu/Kconfig

+8 −0

Original line number	Diff line number	Diff line
		@@ -311,4 +311,12 @@ config TASKS_TRACE_RCU_READ_MB
		Say N here if you hate read-side memory barriers.
		Take the default if you are unsure.

		config RCU_LAZY
		bool "RCU callback lazy invocation functionality"
		depends on RCU_NOCB_CPU
		default n
		help
		To save power, batch RCU callbacks and flush after delay, memory
		pressure, or callback list growing too big.

		endmenu # "RCU Subsystem"

kernel/rcu/rcu.h

+8 −0

Original line number	Diff line number	Diff line
		@@ -474,6 +474,14 @@ enum rcutorture_type {
		INVALID_RCU_FLAVOR
		};

		#if defined(CONFIG_RCU_LAZY)
		unsigned long rcu_lazy_get_jiffies_till_flush(void);
		void rcu_lazy_set_jiffies_till_flush(unsigned long j);
		#else
		static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; }
		static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { }
		#endif

		#if defined(CONFIG_TREE_RCU)
		void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
		unsigned long *gp_seq);

kernel/rcu/tiny.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -44,7 +44,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = {

		void rcu_barrier(void)
		{
		wait_rcu_gp(call_rcu);
		wait_rcu_gp(call_rcu_hurry);
		}
		EXPORT_SYMBOL(rcu_barrier);

kernel/rcu/tree.c

+83 −46

Original line number	Diff line number	Diff line
		@@ -2728,47 +2728,8 @@ static void check_cb_ovld(struct rcu_data *rdp)
		raw_spin_unlock_rcu_node(rnp);
		}

		/**
		* call_rcu() - Queue an RCU callback for invocation after a grace period.
		* @head: structure to be used for queueing the RCU updates.
		* @func: actual callback function to be invoked after the grace period
		*
		* The callback function will be invoked some time after a full grace
		* period elapses, in other words after all pre-existing RCU read-side
		* critical sections have completed. However, the callback function
		* might well execute concurrently with RCU read-side critical sections
		* that started after call_rcu() was invoked.
		*
		* RCU read-side critical sections are delimited by rcu_read_lock()
		* and rcu_read_unlock(), and may be nested. In addition, but only in
		* v5.0 and later, regions of code across which interrupts, preemption,
		* or softirqs have been disabled also serve as RCU read-side critical
		* sections. This includes hardware interrupt handlers, softirq handlers,
		* and NMI handlers.
		*
		* Note that all CPUs must agree that the grace period extended beyond
		* all pre-existing RCU read-side critical section. On systems with more
		* than one CPU, this means that when "func()" is invoked, each CPU is
		* guaranteed to have executed a full memory barrier since the end of its
		* last RCU read-side critical section whose beginning preceded the call
		* to call_rcu(). It also means that each CPU executing an RCU read-side
		* critical section that continues beyond the start of "func()" must have
		* executed a memory barrier after the call_rcu() but before the beginning
		* of that RCU read-side critical section. Note that these guarantees
		* include CPUs that are offline, idle, or executing in user mode, as
		* well as CPUs that are executing in the kernel.
		*
		* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
		* resulting RCU callback function "func()", then both CPU A and CPU B are
		* guaranteed to execute a full memory barrier during the time interval
		* between the call to call_rcu() and the invocation of "func()" -- even
		* if CPU A and CPU B are the same CPU (but again only if the system has
		* more than one CPU).
		*
		* Implementation of these memory-ordering guarantees is described here:
		* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
		*/
		void call_rcu(struct rcu_head *head, rcu_callback_t func)
		static void
		__call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy)
		{
		static atomic_t doublefrees;
		unsigned long flags;
		@@ -2809,7 +2770,7 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
		}

		check_cb_ovld(rdp);
		if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags))
		if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy))
		return; // Enqueued onto ->nocb_bypass, so just leave.
		// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
		rcu_segcblist_enqueue(&rdp->cblist, head);
		@@ -2831,8 +2792,84 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
		local_irq_restore(flags);
		}
		}
		EXPORT_SYMBOL_GPL(call_rcu);

		#ifdef CONFIG_RCU_LAZY
		/**
		* call_rcu_hurry() - Queue RCU callback for invocation after grace period, and
		* flush all lazy callbacks (including the new one) to the main ->cblist while
		* doing so.
		*
		* @head: structure to be used for queueing the RCU updates.
		* @func: actual callback function to be invoked after the grace period
		*
		* The callback function will be invoked some time after a full grace
		* period elapses, in other words after all pre-existing RCU read-side
		* critical sections have completed.
		*
		* Use this API instead of call_rcu() if you don't want the callback to be
		* invoked after very long periods of time, which can happen on systems without
		* memory pressure and on systems which are lightly loaded or mostly idle.
		* This function will cause callbacks to be invoked sooner than later at the
		* expense of extra power. Other than that, this function is identical to, and
		* reuses call_rcu()'s logic. Refer to call_rcu() for more details about memory
		* ordering and other functionality.
		*/
		void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func)
		{
		return __call_rcu_common(head, func, false);
		}
		EXPORT_SYMBOL_GPL(call_rcu_hurry);
		#endif

		/**
		* call_rcu() - Queue an RCU callback for invocation after a grace period.
		* By default the callbacks are 'lazy' and are kept hidden from the main
		* ->cblist to prevent starting of grace periods too soon.
		* If you desire grace periods to start very soon, use call_rcu_hurry().
		*
		* @head: structure to be used for queueing the RCU updates.
		* @func: actual callback function to be invoked after the grace period
		*
		* The callback function will be invoked some time after a full grace
		* period elapses, in other words after all pre-existing RCU read-side
		* critical sections have completed. However, the callback function
		* might well execute concurrently with RCU read-side critical sections
		* that started after call_rcu() was invoked.
		*
		* RCU read-side critical sections are delimited by rcu_read_lock()
		* and rcu_read_unlock(), and may be nested. In addition, but only in
		* v5.0 and later, regions of code across which interrupts, preemption,
		* or softirqs have been disabled also serve as RCU read-side critical
		* sections. This includes hardware interrupt handlers, softirq handlers,
		* and NMI handlers.
		*
		* Note that all CPUs must agree that the grace period extended beyond
		* all pre-existing RCU read-side critical section. On systems with more
		* than one CPU, this means that when "func()" is invoked, each CPU is
		* guaranteed to have executed a full memory barrier since the end of its
		* last RCU read-side critical section whose beginning preceded the call
		* to call_rcu(). It also means that each CPU executing an RCU read-side
		* critical section that continues beyond the start of "func()" must have
		* executed a memory barrier after the call_rcu() but before the beginning
		* of that RCU read-side critical section. Note that these guarantees
		* include CPUs that are offline, idle, or executing in user mode, as
		* well as CPUs that are executing in the kernel.
		*
		* Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
		* resulting RCU callback function "func()", then both CPU A and CPU B are
		* guaranteed to execute a full memory barrier during the time interval
		* between the call to call_rcu() and the invocation of "func()" -- even
		* if CPU A and CPU B are the same CPU (but again only if the system has
		* more than one CPU).
		*
		* Implementation of these memory-ordering guarantees is described here:
		* Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.rst.
		*/
		void call_rcu(struct rcu_head *head, rcu_callback_t func)
		{
		return __call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY));
		}
		EXPORT_SYMBOL_GPL(call_rcu);

		/* Maximum number of jiffies to wait before draining a batch. */
		#define KFREE_DRAIN_JIFFIES (5 * HZ)
		@@ -3507,7 +3544,7 @@ void synchronize_rcu(void)
		if (rcu_gp_is_expedited())
		synchronize_rcu_expedited();
		else
		wait_rcu_gp(call_rcu);
		wait_rcu_gp(call_rcu_hurry);
		return;
		}

		@@ -3910,7 +3947,7 @@ static void rcu_barrier_entrain(struct rcu_data *rdp)
		* if it's fully lazy.
		*/
		was_alldone = rcu_rdp_is_offloaded(rdp) && !rcu_segcblist_pend_cbs(&rdp->cblist);
		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false));
		wake_nocb = was_alldone && rcu_segcblist_pend_cbs(&rdp->cblist);
		if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head)) {
		atomic_inc(&rcu_state.barrier_cpu_count);
		@@ -4336,7 +4373,7 @@ void rcutree_migrate_callbacks(int cpu)
		my_rdp = this_cpu_ptr(&rcu_data);
		my_rnp = my_rdp->mynode;
		rcu_nocb_lock(my_rdp); /* irqs already disabled. */
		WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies));
		WARN_ON_ONCE(!rcu_nocb_flush_bypass(my_rdp, NULL, jiffies, false));
		raw_spin_lock_rcu_node(my_rnp); /* irqs already disabled. */
		/* Leverage recent GPs and set GP for new callbacks. */
		needwake = rcu_advance_cbs(my_rnp, rdp) \|\|