Commit a3b2aeac authored by Yang Yang's avatar Yang Yang Committed by Andrew Morton
Browse files

delayacct: track delays from IRQ/SOFTIRQ

Delay accounting does not track the delay of IRQ/SOFTIRQ.  While
IRQ/SOFTIRQ could have obvious impact on some workloads productivity, such
as when workloads are running on system which is busy handling network
IRQ/SOFTIRQ.

Get the delay of IRQ/SOFTIRQ could help users to reduce such delay.  Such
as setting interrupt affinity or task affinity, using kernel thread for
NAPI etc.  This is inspired by "sched/psi: Add PSI_IRQ to track
IRQ/SOFTIRQ pressure"[1].  Also fix some code indent problems of older
code.

And update tools/accounting/getdelays.c:
    / # ./getdelays -p 156 -di
    print delayacct stats ON
    printing IO accounting
    PID     156

    CPU             count     real total  virtual total    delay total  delay average
                       15       15836008       16218149      275700790         18.380ms
    IO              count    delay total  delay average
                        0              0          0.000ms
    SWAP            count    delay total  delay average
                        0              0          0.000ms
    RECLAIM         count    delay total  delay average
                        0              0          0.000ms
    THRASHING       count    delay total  delay average
                        0              0          0.000ms
    COMPACT         count    delay total  delay average
                        0              0          0.000ms
    WPCOPY          count    delay total  delay average
                       36        7586118          0.211ms
    IRQ             count    delay total  delay average
                       42         929161          0.022ms

[1] commit 52b1364b("sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure")

Link: https://lkml.kernel.org/r/202304081728353557233@zte.com.cn


Signed-off-by: default avatarYang Yang <yang.yang29@zte.com.cn>
Cc: Jiang Xuexin <jiang.xuexin@zte.com.cn>
Cc: wangyong <wang.yong12@zte.com.cn>
Cc: junhua huang <huang.junhua@zte.com.cn>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 29692fc9
Loading
Loading
Loading
Loading
+5 −2
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ d) memory reclaim
e) thrashing
f) direct compact
g) write-protect copy
h) IRQ/SOFTIRQ

and makes these statistics available to userspace through
the taskstats interface.
@@ -49,7 +50,7 @@ this structure. See
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
cache, direct compact, write-protect copy etc.
cache, direct compact, write-protect copy, IRQ/SOFTIRQ etc.

Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -120,6 +121,8 @@ Get sum of delays, since system boot, for all pids with tgid 5::
                       0              0          0.000ms
	WPCOPY          count    delay total  delay average
                       0              0          0.000ms
	IRQ             count    delay total  delay average
                       0              0          0.000ms

Get IO accounting for pid 1, it works only with -p::

+15 −0
Original line number Diff line number Diff line
@@ -48,10 +48,13 @@ struct task_delay_info {
	u64 wpcopy_start;
	u64 wpcopy_delay;	/* wait for write-protect copy */

	u64 irq_delay;	/* wait for IRQ/SOFTIRQ */

	u32 freepages_count;	/* total count of memory reclaim */
	u32 thrashing_count;	/* total count of thrash waits */
	u32 compact_count;	/* total count of memory compact */
	u32 wpcopy_count;	/* total count of write-protect copy */
	u32 irq_count;	/* total count of IRQ/SOFTIRQ */
};
#endif

@@ -81,6 +84,7 @@ extern void __delayacct_compact_start(void);
extern void __delayacct_compact_end(void);
extern void __delayacct_wpcopy_start(void);
extern void __delayacct_wpcopy_end(void);
extern void __delayacct_irq(struct task_struct *task, u32 delta);

static inline void delayacct_tsk_init(struct task_struct *tsk)
{
@@ -215,6 +219,15 @@ static inline void delayacct_wpcopy_end(void)
		__delayacct_wpcopy_end();
}

static inline void delayacct_irq(struct task_struct *task, u32 delta)
{
	if (!static_branch_unlikely(&delayacct_key))
		return;

	if (task->delays)
		__delayacct_irq(task, delta);
}

#else
static inline void delayacct_init(void)
{}
@@ -253,6 +266,8 @@ static inline void delayacct_wpcopy_start(void)
{}
static inline void delayacct_wpcopy_end(void)
{}
static inline void delayacct_irq(struct task_struct *task, u32 delta)
{}

#endif /* CONFIG_TASK_DELAY_ACCT */

+5 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@
 */


#define TASKSTATS_VERSION	13
#define TASKSTATS_VERSION	14
#define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
					 * in linux/sched.h */

@@ -198,6 +198,10 @@ struct taskstats {
	/* v13: Delay waiting for write-protect copy */
	__u64    wpcopy_count;
	__u64    wpcopy_delay_total;

	/* v14: Delay waiting for IRQ/SOFTIRQ */
	__u64    irq_count;
	__u64    irq_delay_total;
};


+14 −0
Original line number Diff line number Diff line
@@ -179,12 +179,15 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
	tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
	d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
	tmp = d->irq_delay_total + tsk->delays->irq_delay;
	d->irq_delay_total = (tmp < d->irq_delay_total) ? 0 : tmp;
	d->blkio_count += tsk->delays->blkio_count;
	d->swapin_count += tsk->delays->swapin_count;
	d->freepages_count += tsk->delays->freepages_count;
	d->thrashing_count += tsk->delays->thrashing_count;
	d->compact_count += tsk->delays->compact_count;
	d->wpcopy_count += tsk->delays->wpcopy_count;
	d->irq_count += tsk->delays->irq_count;
	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);

	return 0;
@@ -274,3 +277,14 @@ void __delayacct_wpcopy_end(void)
		      &current->delays->wpcopy_delay,
		      &current->delays->wpcopy_count);
}

void __delayacct_irq(struct task_struct *task, u32 delta)
{
	unsigned long flags;

	raw_spin_lock_irqsave(&task->delays->lock, flags);
	task->delays->irq_delay += delta;
	task->delays->irq_count++;
	raw_spin_unlock_irqrestore(&task->delays->lock, flags);
}
+1 −0
Original line number Diff line number Diff line
@@ -704,6 +704,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
	rq->prev_irq_time += irq_delta;
	delta -= irq_delta;
	psi_account_irqtime(rq->curr, irq_delta);
	delayacct_irq(rq->curr, irq_delta);
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
	if (static_key_false((&paravirt_steal_rq_enabled))) {
Loading