Commit 662ce1dc authored by Yang Yang's avatar Yang Yang Committed by Andrew Morton
Browse files

delayacct: track delays from write-protect copy

Delay accounting does not track the delay of write-protect copy.  When
tasks trigger many write-protect copys(include COW and unsharing of
anonymous pages[1]), it may spend a amount of time waiting for them.  To
get the delay of tasks in write-protect copy, could help users to evaluate
the impact of using KSM or fork() or GUP.

Also update tools/accounting/getdelays.c:

    / # ./getdelays -dl -p 231
    print delayacct stats ON
    listen forever
    PID     231

    CPU             count     real total  virtual total    delay total  delay average
                     6247     1859000000     2154070021     1674255063          0.268ms
    IO              count    delay total  delay average
                        0              0              0ms
    SWAP            count    delay total  delay average
                        0              0              0ms
    RECLAIM         count    delay total  delay average
                        0              0              0ms
    THRASHING       count    delay total  delay average
                        0              0              0ms
    COMPACT         count    delay total  delay average
                        3          72758              0ms
    WPCOPY          count    delay total  delay average
                     3635      271567604              0ms

[1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages")

Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn


Signed-off-by: default avatarYang Yang <yang.yang29@zte.com.cn>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Reviewed-by: default avatarJiang Xuexin <jiang.xuexin@zte.com.cn>
Reviewed-by: default avatarRan Xiaokai <ran.xiaokai@zte.com.cn>
Reviewed-by: default avatarwangyong <wang.yong12@zte.com.cn>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 54eb8462
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@ c) swapping in pages
d) memory reclaim
e) thrashing page cache
f) direct compact
g) write-protect copy

and makes these statistics available to userspace through
the taskstats interface.
@@ -48,7 +49,7 @@ this structure. See
for a description of the fields pertaining to delay accounting.
It will generally be in the form of counters returning the cumulative
delay seen for cpu, sync block I/O, swapin, memory reclaim, thrash page
cache, direct compact etc.
cache, direct compact, write-protect copy etc.

Taking the difference of two successive readings of a given
counter (say cpu_delay_total) for a task will give the delay
@@ -117,6 +118,8 @@ Get sum of delays, since system boot, for all pids with tgid 5::
	                    0              0              0ms
	COMPACT         count    delay total  delay average
	                    0              0              0ms
        WPCOPY          count    delay total  delay average
                            0              0              0ms

Get IO accounting for pid 1, it works only with -p::

+28 −0
Original line number Diff line number Diff line
@@ -45,9 +45,13 @@ struct task_delay_info {
	u64 compact_start;
	u64 compact_delay;	/* wait for memory compact */

	u64 wpcopy_start;
	u64 wpcopy_delay;	/* wait for write-protect copy */

	u32 freepages_count;	/* total count of memory reclaim */
	u32 thrashing_count;	/* total count of thrash waits */
	u32 compact_count;	/* total count of memory compact */
	u32 wpcopy_count;	/* total count of write-protect copy */
};
#endif

@@ -75,6 +79,8 @@ extern void __delayacct_swapin_start(void);
extern void __delayacct_swapin_end(void);
extern void __delayacct_compact_start(void);
extern void __delayacct_compact_end(void);
extern void __delayacct_wpcopy_start(void);
extern void __delayacct_wpcopy_end(void);

static inline void delayacct_tsk_init(struct task_struct *tsk)
{
@@ -191,6 +197,24 @@ static inline void delayacct_compact_end(void)
		__delayacct_compact_end();
}

static inline void delayacct_wpcopy_start(void)
{
	if (!static_branch_unlikely(&delayacct_key))
		return;

	if (current->delays)
		__delayacct_wpcopy_start();
}

static inline void delayacct_wpcopy_end(void)
{
	if (!static_branch_unlikely(&delayacct_key))
		return;

	if (current->delays)
		__delayacct_wpcopy_end();
}

#else
static inline void delayacct_init(void)
{}
@@ -225,6 +249,10 @@ static inline void delayacct_compact_start(void)
{}
static inline void delayacct_compact_end(void)
{}
static inline void delayacct_wpcopy_start(void)
{}
static inline void delayacct_wpcopy_end(void)
{}

#endif /* CONFIG_TASK_DELAY_ACCT */

+5 −1
Original line number Diff line number Diff line
@@ -34,7 +34,7 @@
 */


#define TASKSTATS_VERSION	12
#define TASKSTATS_VERSION	13
#define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
					 * in linux/sched.h */

@@ -194,6 +194,10 @@ struct taskstats {
	__u64   ac_exe_dev;     /* program binary device ID */
	__u64   ac_exe_inode;   /* program binary inode number */
	/* v12 end */

	/* v13: Delay waiting for write-protect copy */
	__u64    wpcopy_count;
	__u64    wpcopy_delay_total;
};


+16 −0
Original line number Diff line number Diff line
@@ -177,11 +177,14 @@ int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
	tmp = d->compact_delay_total + tsk->delays->compact_delay;
	d->compact_delay_total = (tmp < d->compact_delay_total) ? 0 : tmp;
	tmp = d->wpcopy_delay_total + tsk->delays->wpcopy_delay;
	d->wpcopy_delay_total = (tmp < d->wpcopy_delay_total) ? 0 : tmp;
	d->blkio_count += tsk->delays->blkio_count;
	d->swapin_count += tsk->delays->swapin_count;
	d->freepages_count += tsk->delays->freepages_count;
	d->thrashing_count += tsk->delays->thrashing_count;
	d->compact_count += tsk->delays->compact_count;
	d->wpcopy_count += tsk->delays->wpcopy_count;
	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);

	return 0;
@@ -249,3 +252,16 @@ void __delayacct_compact_end(void)
		      &current->delays->compact_delay,
		      &current->delays->compact_count);
}

void __delayacct_wpcopy_start(void)
{
	current->delays->wpcopy_start = local_clock();
}

void __delayacct_wpcopy_end(void)
{
	delayacct_end(&current->delays->lock,
		      &current->delays->wpcopy_start,
		      &current->delays->wpcopy_delay,
		      &current->delays->wpcopy_count);
}
+8 −0
Original line number Diff line number Diff line
@@ -32,6 +32,7 @@
#include <linux/cma.h>
#include <linux/migrate.h>
#include <linux/nospec.h>
#include <linux/delayacct.h>

#include <asm/page.h>
#include <asm/pgalloc.h>
@@ -5230,6 +5231,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
	pte = huge_ptep_get(ptep);
	old_page = pte_page(pte);

	delayacct_wpcopy_start();

retry_avoidcopy:
	/*
	 * If no-one else is actually using this page, we're the exclusive
@@ -5240,6 +5243,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
			page_move_anon_rmap(old_page, vma);
		if (likely(!unshare))
			set_huge_ptep_writable(vma, haddr, ptep);

		delayacct_wpcopy_end();
		return 0;
	}
	VM_BUG_ON_PAGE(PageAnon(old_page) && PageAnonExclusive(old_page),
@@ -5309,6 +5314,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
			 * race occurs while re-acquiring page table
			 * lock, and our job is done.
			 */
			delayacct_wpcopy_end();
			return 0;
		}

@@ -5367,6 +5373,8 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
	put_page(old_page);

	spin_lock(ptl); /* Caller expects lock to be held */

	delayacct_wpcopy_end();
	return ret;
}

Loading