Merge branches 'consolidate.2019.01.26a' and 'fwd.2019.01.26a' into HEAD (7a968bb2) · Commits · EulixOS / Software / Kernel

Documentation/admin-guide/kernel-parameters.txt

+5 −0

Original line number	Diff line number	Diff line
		@@ -3721,6 +3721,11 @@
		This wake_up() will be accompanied by a
		WARN_ONCE() splat and an ftrace_dump().

		rcutree.sysrq_rcu= [KNL]
		Commandeer a sysrq key to dump out Tree RCU's
		rcu_node tree with an eye towards determining
		why a new grace period has not yet started.

		rcuperf.gp_async= [KNL]
		Measure performance of asynchronous
		grace-period primitives such as call_rcu().

kernel/rcu/tree.c

+72 −30

Original line number	Diff line number	Diff line
		@@ -62,6 +62,7 @@
		#include <linux/suspend.h>
		#include <linux/ftrace.h>
		#include <linux/tick.h>
		#include <linux/sysrq.h>

		#include "tree.h"
		#include "rcu.h"
		@@ -115,6 +116,9 @@ int num_rcu_lvl[] = NUM_RCU_LVL_INIT;
		int rcu_num_nodes __read_mostly = NUM_RCU_NODES; /* Total # rcu_nodes in use. */
		/* panic() on RCU Stall sysctl. */
		int sysctl_panic_on_rcu_stall __read_mostly;
		/* Commandeer a sysrq key to dump RCU's tree. */
		static bool sysrq_rcu;
		module_param(sysrq_rcu, bool, 0444);

		/*
		* The rcu_scheduler_active variable is initialized to the value
		@@ -502,6 +506,14 @@ unsigned long rcu_exp_batches_completed(void)
		}
		EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);

		/*
		* Return the root node of the rcu_state structure.
		*/
		static struct rcu_node *rcu_get_root(void)
		{
		return &rcu_state.node[0];
		}

		/*
		* Convert a ->gp_state value to a character string.
		*/
		@@ -519,19 +531,30 @@ void show_rcu_gp_kthreads(void)
		{
		int cpu;
		unsigned long j;
		unsigned long ja;
		unsigned long jr;
		unsigned long jw;
		struct rcu_data *rdp;
		struct rcu_node *rnp;

		j = jiffies - READ_ONCE(rcu_state.gp_activity);
		pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n",
		j = jiffies;
		ja = j - READ_ONCE(rcu_state.gp_activity);
		jr = j - READ_ONCE(rcu_state.gp_req_activity);
		jw = j - READ_ONCE(rcu_state.gp_wake_time);
		pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %lu ->gp_req_activity %lu ->gp_wake_time %lu ->gp_wake_seq %ld ->gp_seq %ld ->gp_seq_needed %ld ->gp_flags %#x\n",
		rcu_state.name, gp_state_getname(rcu_state.gp_state),
		rcu_state.gp_state, rcu_state.gp_kthread->state, j);
		rcu_state.gp_state,
		rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL,
		ja, jr, jw, (long)READ_ONCE(rcu_state.gp_wake_seq),
		(long)READ_ONCE(rcu_state.gp_seq),
		(long)READ_ONCE(rcu_get_root()->gp_seq_needed),
		READ_ONCE(rcu_state.gp_flags));
		rcu_for_each_node_breadth_first(rnp) {
		if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
		continue;
		pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",
		rnp->grplo, rnp->grphi, rnp->gp_seq,
		rnp->gp_seq_needed);
		pr_info("\trcu_node %d:%d ->gp_seq %ld ->gp_seq_needed %ld\n",
		rnp->grplo, rnp->grphi, (long)rnp->gp_seq,
		(long)rnp->gp_seq_needed);
		if (!rcu_is_leaf_node(rnp))
		continue;
		for_each_leaf_node_possible_cpu(rnp, cpu) {
		@@ -540,14 +563,35 @@ void show_rcu_gp_kthreads(void)
		ULONG_CMP_GE(rcu_state.gp_seq,
		rdp->gp_seq_needed))
		continue;
		pr_info("\tcpu %d ->gp_seq_needed %lu\n",
		cpu, rdp->gp_seq_needed);
		pr_info("\tcpu %d ->gp_seq_needed %ld\n",
		cpu, (long)rdp->gp_seq_needed);
		}
		}
		/* sched_show_task(rcu_state.gp_kthread); */
		}
		EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);

		/* Dump grace-period-request information due to commandeered sysrq. */
		static void sysrq_show_rcu(int key)
		{
		show_rcu_gp_kthreads();
		}

		static struct sysrq_key_op sysrq_rcudump_op = {
		.handler = sysrq_show_rcu,
		.help_msg = "show-rcu(y)",
		.action_msg = "Show RCU tree",
		.enable_mask = SYSRQ_ENABLE_DUMP,
		};

		static int __init rcu_sysrq_init(void)
		{
		if (sysrq_rcu)
		return register_sysrq_key('y', &sysrq_rcudump_op);
		return 0;
		}
		early_initcall(rcu_sysrq_init);

		/*
		* Send along grace-period-related data for rcutorture diagnostics.
		*/
		@@ -565,14 +609,6 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
		}
		EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);

		/*
		* Return the root node of the rcu_state structure.
		*/
		static struct rcu_node *rcu_get_root(void)
		{
		return &rcu_state.node[0];
		}

		/*
		* Enter an RCU extended quiescent state, which can be either the
		* idle loop or adaptive-tickless usermode execution.
		@@ -1169,7 +1205,7 @@ static void rcu_check_gp_kthread_starvation(void)
		pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
		rcu_state.name, j,
		(long)rcu_seq_current(&rcu_state.gp_seq),
		rcu_state.gp_flags,
		READ_ONCE(rcu_state.gp_flags),
		gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
		gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
		if (gpk) {
		@@ -1545,17 +1581,28 @@ static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
		}

		/*
		* Awaken the grace-period kthread. Don't do a self-awaken, and don't
		* bother awakening when there is nothing for the grace-period kthread
		* to do (as in several CPUs raced to awaken, and we lost), and finally
		* don't try to awaken a kthread that has not yet been created.
		* Awaken the grace-period kthread. Don't do a self-awaken (unless in
		* an interrupt or softirq handler), and don't bother awakening when there
		* is nothing for the grace-period kthread to do (as in several CPUs raced
		* to awaken, and we lost), and finally don't try to awaken a kthread that
		* has not yet been created. If all those checks are passed, track some
		* debug information and awaken.
		*
		* So why do the self-wakeup when in an interrupt or softirq handler
		* in the grace-period kthread's context? Because the kthread might have
		* been interrupted just as it was going to sleep, and just after the final
		* pre-sleep check of the awaken condition. In this case, a wakeup really
		* is required, and is therefore supplied.
		*/
		static void rcu_gp_kthread_wake(void)
		{
		if (current == rcu_state.gp_kthread \|\|
		if ((current == rcu_state.gp_kthread &&
		!in_interrupt() && !in_serving_softirq()) \|\|
		!READ_ONCE(rcu_state.gp_flags) \|\|
		!rcu_state.gp_kthread)
		return;
		WRITE_ONCE(rcu_state.gp_wake_time, jiffies);
		WRITE_ONCE(rcu_state.gp_wake_seq, READ_ONCE(rcu_state.gp_seq));
		swake_up_one(&rcu_state.gp_wq);
		}

		@@ -1699,7 +1746,7 @@ static bool __note_gp_changes(struct rcu_node rnp, struct rcu_data rdp)
		zero_cpu_stall_ticks(rdp);
		}
		rdp->gp_seq = rnp->gp_seq; /* Remember new grace-period state. */
		if (ULONG_CMP_GE(rnp->gp_seq_needed, rdp->gp_seq_needed) \|\| rdp->gpwrap)
		if (ULONG_CMP_LT(rdp->gp_seq_needed, rnp->gp_seq_needed) \|\| rdp->gpwrap)
		rdp->gp_seq_needed = rnp->gp_seq_needed;
		WRITE_ONCE(rdp->gpwrap, false);
		rcu_gpnum_ovf(rnp, rdp);
		@@ -1927,7 +1974,7 @@ static void rcu_gp_fqs_loop(void)
		if (!ret) {
		rcu_state.jiffies_force_qs = jiffies + j;
		WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
		jiffies + 3 * j);
		jiffies + (j ? 3 * j : 2));
		}
		trace_rcu_grace_period(rcu_state.name,
		READ_ONCE(rcu_state.gp_seq),
		@@ -2646,16 +2693,11 @@ rcu_check_gp_start_stall(struct rcu_node rnp, struct rcu_data rdp,
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		return;
		}
		pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
		__func__, (long)READ_ONCE(rcu_state.gp_seq),
		(long)READ_ONCE(rnp_root->gp_seq_needed),
		j - rcu_state.gp_req_activity, j - rcu_state.gp_activity,
		rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name,
		rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL);
		WARN_ON(1);
		if (rnp_root != rnp)
		raw_spin_unlock_rcu_node(rnp_root);
		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
		show_rcu_gp_kthreads();
		}

		/*

kernel/rcu/tree.h

+9 −8

Original line number	Diff line number	Diff line
		@@ -230,7 +230,13 @@ struct rcu_data {
		/* Leader CPU takes GP-end wakeups. */
		#endif /* #ifdef CONFIG_RCU_NOCB_CPU */

		/* 6) Diagnostic data, including RCU CPU stall warnings. */
		/* 6) RCU priority boosting. */
		struct task_struct *rcu_cpu_kthread_task;
		/* rcuc per-CPU kthread or NULL. */
		unsigned int rcu_cpu_kthread_status;
		char rcu_cpu_has_work;

		/* 7) Diagnostic data, including RCU CPU stall warnings. */
		unsigned int softirq_snap; /* Snapshot of softirq activity. */
		/* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
		struct irq_work rcu_iw; /* Check for non-irq activity. */
		@@ -299,6 +305,8 @@ struct rcu_state {
		struct swait_queue_head gp_wq; /* Where GP task waits. */
		short gp_flags; /* Commands for GP task. */
		short gp_state; /* GP kthread sleep state. */
		unsigned long gp_wake_time; /* Last GP kthread wake. */
		unsigned long gp_wake_seq; /* ->gp_seq at ^^^. */

		/* End of fields guarded by root rcu_node's lock. */

		@@ -398,13 +406,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;

		int rcu_dynticks_snap(struct rcu_data *rdp);

		#ifdef CONFIG_RCU_BOOST
		DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
		DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
		DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
		DECLARE_PER_CPU(char, rcu_cpu_has_work);
		#endif /* #ifdef CONFIG_RCU_BOOST */

		/* Forward declarations for rcutree_plugin.h */
		static void rcu_bootup_announce(void);
		static void rcu_qs(void);

kernel/rcu/tree_plugin.h

+28 −40

Original line number	Diff line number	Diff line
		@@ -34,17 +34,7 @@
		#include "../time/tick-internal.h"

		#ifdef CONFIG_RCU_BOOST

		#include "../locking/rtmutex_common.h"

		/*
		* Control variables for per-CPU and per-rcu_node kthreads.
		*/
		static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
		DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
		DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
		DEFINE_PER_CPU(char, rcu_cpu_has_work);

		#else /* #ifdef CONFIG_RCU_BOOST */

		/*
		@@ -1243,11 +1233,11 @@ static void invoke_rcu_callbacks_kthread(void)
		unsigned long flags;

		local_irq_save(flags);
		__this_cpu_write(rcu_cpu_has_work, 1);
		if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
		current != __this_cpu_read(rcu_cpu_kthread_task)) {
		rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task),
		__this_cpu_read(rcu_cpu_kthread_status));
		__this_cpu_write(rcu_data.rcu_cpu_has_work, 1);
		if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL &&
		current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) {
		rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task),
		__this_cpu_read(rcu_data.rcu_cpu_kthread_status));
		}
		local_irq_restore(flags);
		}
		@@ -1258,7 +1248,7 @@ static void invoke_rcu_callbacks_kthread(void)
		*/
		static bool rcu_is_callbacks_kthread(void)
		{
		return __this_cpu_read(rcu_cpu_kthread_task) == current;
		return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current;
		}

		#define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000)
		@@ -1315,12 +1305,12 @@ static void rcu_cpu_kthread_setup(unsigned int cpu)

		static void rcu_cpu_kthread_park(unsigned int cpu)
		{
		per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
		per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU;
		}

		static int rcu_cpu_kthread_should_run(unsigned int cpu)
		{
		return __this_cpu_read(rcu_cpu_has_work);
		return __this_cpu_read(rcu_data.rcu_cpu_has_work);
		}

		/*
		@@ -1330,15 +1320,14 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu)
		*/
		static void rcu_cpu_kthread(unsigned int cpu)
		{
		unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status);
		char work, *workp = this_cpu_ptr(&rcu_cpu_has_work);
		unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status);
		char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work);
		int spincnt;

		for (spincnt = 0; spincnt < 10; spincnt++) {
		trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
		local_bh_disable();
		*statusp = RCU_KTHREAD_RUNNING;
		this_cpu_inc(rcu_cpu_kthread_loops);
		local_irq_disable();
		work = *workp;
		*workp = 0;
		@@ -1390,7 +1379,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
		}

		static struct smp_hotplug_thread rcu_cpu_thread_spec = {
		.store = &rcu_cpu_kthread_task,
		.store = &rcu_data.rcu_cpu_kthread_task,
		.thread_should_run = rcu_cpu_kthread_should_run,
		.thread_fn = rcu_cpu_kthread,
		.thread_comm = "rcuc/%u",
		@@ -1407,7 +1396,7 @@ static void __init rcu_spawn_boost_kthreads(void)
		int cpu;

		for_each_possible_cpu(cpu)
		per_cpu(rcu_cpu_has_work, cpu) = 0;
		per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0;
		if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__))
		return;
		rcu_for_each_leaf_node(rnp)
		@@ -1773,22 +1762,24 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp)

		/*
		* Offload callback processing from the boot-time-specified set of CPUs
		* specified by rcu_nocb_mask. For each CPU in the set, there is a
		* kthread created that pulls the callbacks from the corresponding CPU,
		* waits for a grace period to elapse, and invokes the callbacks.
		* The no-CBs CPUs do a wake_up() on their kthread when they insert
		* a callback into any empty list, unless the rcu_nocb_poll boot parameter
		* has been specified, in which case each kthread actively polls its
		* CPU. (Which isn't so great for energy efficiency, but which does
		* reduce RCU's overhead on that CPU.)
		* specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads
		* created that pull the callbacks from the corresponding CPU, wait for
		* a grace period to elapse, and invoke the callbacks. These kthreads
		* are organized into leaders, which manage incoming callbacks, wait for
		* grace periods, and awaken followers, and the followers, which only
		* invoke callbacks. Each leader is its own follower. The no-CBs CPUs
		* do a wake_up() on their kthread when they insert a callback into any
		* empty list, unless the rcu_nocb_poll boot parameter has been specified,
		* in which case each kthread actively polls its CPU. (Which isn't so great
		* for energy efficiency, but which does reduce RCU's overhead on that CPU.)
		*
		* This is intended to be used in conjunction with Frederic Weisbecker's
		* adaptive-idle work, which would seriously reduce OS jitter on CPUs
		* running CPU-bound user-mode computations.
		*
		* Offloading of callback processing could also in theory be used as
		* an energy-efficiency measure because CPUs with no RCU callbacks
		* queued are more aggressive about entering dyntick-idle mode.
		* Offloading of callbacks can also be used as an energy-efficiency
		* measure because CPUs with no RCU callbacks queued are more aggressive
		* about entering dyntick-idle mode.
		*/


		@@ -1892,10 +1883,7 @@ static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
		raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
		}

		/*
		* Does the specified CPU need an RCU callback for this invocation
		* of rcu_barrier()?
		*/
		/* Does rcu_barrier need to queue an RCU callback on the specified CPU? */
		static bool rcu_nocb_cpu_needs_barrier(int cpu)
		{
		struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
		@@ -1911,8 +1899,8 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu)
		* callbacks would be posted. In the worst case, the first
		* barrier in rcu_barrier() suffices (but the caller cannot
		* necessarily rely on this, not a substitute for the caller
		* getting the concurrency design right!). There must also be
		* a barrier between the following load an posting of a callback
		* getting the concurrency design right!). There must also be a
		* barrier between the following load and posting of a callback
		* (if a callback is in fact needed). This is associated with an
		* atomic_inc() in the caller.
		*/