Commit 0670e5fd authored by Zhen Lei's avatar Zhen Lei
Browse files

rcu: Add RCU stall diagnosis information

mainline inclusion
from mainline-v6.3-rc1
commit be42f00b
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7OIXK

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=be42f00b73a0f50710d16eb7cb4efda0cce062dd



--------------------------------

Because RCU CPU stall warnings are driven from the scheduling-clock
interrupt handler, a workload consisting of a very large number of
short-duration hardware interrupts can result in misleading stall-warning
messages.  On systems supporting only a single level of interrupts,
that is, where interrupts handlers cannot be interrupted, this can
produce misleading diagnostics.  The stack traces will show the
innocent-bystander interrupted task, not the interrupts that are
at the very least exacerbating the stall.

This situation can be improved by displaying the number of interrupts
and the CPU time that they have consumed.  Diagnosing other types
of stalls can be eased by also providing the count of softirqs and
the CPU time that they consumed as well as the number of context
switches and the task-level CPU time consumed.

Consider the following output given this change:

rcu: INFO: rcu_preempt self-detected stall on CPU
rcu:     0-....: (1250 ticks this GP) <omitted>
rcu:          hardirqs   softirqs   csw/system
rcu:  number:      624         45            0
rcu: cputime:       69          1         2425   ==> 2500(ms)

This output shows that the number of hard and soft interrupts is small,
there are no context switches, and the system takes up a lot of time. This
indicates that the current task is looping with preemption disabled.

The impact on system performance is negligible because snapshot is
recorded only once for all continuous RCU stalls.

This added debugging information is suppressed by default and can be
enabled by building the kernel with CONFIG_RCU_CPU_STALL_CPUTIME=y or
by booting with rcupdate.rcu_cpu_stall_cputime=1.

Signed-off-by: default avatarZhen Lei <thunder.leizhen@huawei.com>
Reviewed-by: default avatarMukesh Ojha <quic_mojha@quicinc.com>
Reviewed-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
Conflicts:
	Documentation/admin-guide/kernel-parameters.txt
	kernel/rcu/Kconfig.debug
	[Change RCU_CPU_STALL_CPUTIME to be enabled by default]
	kernel/rcu/rcu.h
	kernel/rcu/tree.h
	kernel/rcu/tree_stall.h
	kernel/rcu/update.c

Signed-off-by: default avatarZhen Lei <thunder.leizhen@huawei.com>
parent 15390ab4
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -4751,6 +4751,12 @@
	rcupdate.rcu_cpu_stall_timeout= [KNL]
			Set timeout for RCU CPU stall warning messages.

	rcupdate.rcu_cpu_stall_cputime= [KNL]
			Provide statistics on the cputime and count of
			interrupts and tasks during the sampling period. For
			multiple continuous RCU stalls, all sampling periods
			begin at half of the first RCU stall timeout.

	rcupdate.rcu_expedited= [KNL]
			Use expedited grace-period primitives, for
			example, synchronize_rcu_expedited() instead
+13 −0
Original line number Diff line number Diff line
@@ -91,6 +91,19 @@ config RCU_CPU_STALL_TIMEOUT
	  RCU grace period persists, additional CPU stall warnings are
	  printed at more widely spaced intervals.

config RCU_CPU_STALL_CPUTIME
	bool "Provide additional RCU stall debug information"
	depends on RCU_STALL_COMMON
	default n
	help
	  Collect statistics during the sampling period, such as the number of
	  (hard interrupts, soft interrupts, task switches) and the cputime of
	  (hard interrupts, soft interrupts, kernel tasks) are added to the
	  RCU stall report. For multiple continuous RCU stalls, all sampling
	  periods begin at half of the first RCU stall timeout.
	  The boot option rcupdate.rcu_cpu_stall_cputime has the same function
	  as this one, but will override this if it exists.

config RCU_TRACE
	bool "Enable tracing for RCU"
	depends on DEBUG_KERNEL
+1 −0
Original line number Diff line number Diff line
@@ -210,6 +210,7 @@ static inline bool rcu_stall_is_suppressed_at_boot(void)
extern int rcu_cpu_stall_ftrace_dump;
extern int rcu_cpu_stall_suppress;
extern int rcu_cpu_stall_timeout;
extern int rcu_cpu_stall_cputime;
int rcu_jiffies_till_stall_check(void);

static inline bool rcu_stall_is_suppressed(void)
+18 −0
Original line number Diff line number Diff line
@@ -1346,6 +1346,24 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
			rdp->rcu_iw_gp_seq = rnp->gp_seq;
			irq_work_queue_on(&rdp->rcu_iw, rdp->cpu);
		}

		if (rcu_cpu_stall_cputime && rdp->snap_record.gp_seq != rdp->gp_seq) {
			int cpu = rdp->cpu;
			struct rcu_snap_record *rsrp;
			struct kernel_cpustat *kcsp;

			kcsp = &kcpustat_cpu(cpu);

			rsrp = &rdp->snap_record;
			rsrp->cputime_irq     = kcpustat_field(kcsp, CPUTIME_IRQ, cpu);
			rsrp->cputime_softirq = kcpustat_field(kcsp, CPUTIME_SOFTIRQ, cpu);
			rsrp->cputime_system  = kcpustat_field(kcsp, CPUTIME_SYSTEM, cpu);
			rsrp->nr_hardirqs = kstat_cpu_irqs_sum(rdp->cpu);
			rsrp->nr_softirqs = kstat_cpu_softirqs_sum(rdp->cpu);
			rsrp->nr_csw = nr_context_switches_cpu(rdp->cpu);
			rsrp->jiffies = jiffies;
			rsrp->gp_seq = rdp->gp_seq;
		}
	}

	return 0;
+19 −0
Original line number Diff line number Diff line
@@ -147,6 +147,23 @@ union rcu_noqs {
	u16 s; /* Set of bits, aggregate OR here. */
};

/*
 * Record the snapshot of the core stats at half of the first RCU stall timeout.
 * The member gp_seq is used to ensure that all members are updated only once
 * during the sampling period. The snapshot is taken only if this gp_seq is not
 * equal to rdp->gp_seq.
 */
struct rcu_snap_record {
	unsigned long	gp_seq;		/* Track rdp->gp_seq counter */
	u64		cputime_irq;	/* Accumulated cputime of hard irqs */
	u64		cputime_softirq;/* Accumulated cputime of soft irqs */
	u64		cputime_system; /* Accumulated cputime of kernel tasks */
	unsigned long	nr_hardirqs;	/* Accumulated number of hard irqs */
	unsigned int	nr_softirqs;	/* Accumulated number of soft irqs */
	unsigned long long nr_csw;	/* Accumulated number of task switches */
	unsigned long   jiffies;	/* Track jiffies value */
};

/* Per-CPU data for read-copy update. */
struct rcu_data {
	/* 1) quiescent-state and grace-period handling : */
@@ -251,6 +268,8 @@ struct rcu_data {
	unsigned long rcu_onl_gp_seq;	/* ->gp_seq at last online. */
	short rcu_onl_gp_flags;		/* ->gp_flags at last online. */
	unsigned long last_fqs_resched;	/* Time of last rcu_resched(). */
	struct rcu_snap_record snap_record; /* Snapshot of core stats at half of */
					    /* the first RCU stall timeout */

	int cpu;
};
Loading