Commit e7f2be11 authored by Frederic Weisbecker's avatar Frederic Weisbecker Committed by Thomas Gleixner
Browse files

sched/cputime: Fix getrusage(RUSAGE_THREAD) with nohz_full



getrusage(RUSAGE_THREAD) with nohz_full may return shorter utime/stime
than the actual time.

task_cputime_adjusted() snapshots utime and stime and then adjust their
sum to match the scheduler maintained cputime.sum_exec_runtime.
Unfortunately in nohz_full, sum_exec_runtime is only updated once per
second in the worst case, causing a discrepancy against utime and stime
that can be updated anytime by the reader using vtime.

To fix this situation, perform an update of cputime.sum_exec_runtime
when the cputime snapshot reports the task as actually running while
the tick is disabled. The related overhead is then contained within the
relevant situations.

Reported-by: default avatarHasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: default avatarFrederic Weisbecker <frederic@kernel.org>
Signed-off-by: default avatarHasegawa Hitomi <hasegawa-hitomi@fujitsu.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Tested-by: default avatarMasayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Acked-by: default avatarPhil Auld <pauld@redhat.com>
Link: https://lore.kernel.org/r/20211026141055.57358-3-frederic@kernel.org
parent d58071a8
Loading
Loading
Loading
Loading
+3 −2
Original line number Diff line number Diff line
@@ -18,15 +18,16 @@
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */

#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
extern bool task_cputime(struct task_struct *t,
			 u64 *utime, u64 *stime);
extern u64 task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
static inline bool task_cputime(struct task_struct *t,
				u64 *utime, u64 *stime)
{
	*utime = t->utime;
	*stime = t->stime;
	return false;
}

static inline u64 task_gtime(struct task_struct *t)
+9 −3
Original line number Diff line number Diff line
@@ -615,7 +615,8 @@ void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
		.sum_exec_runtime = p->se.sum_exec_runtime,
	};

	task_cputime(p, &cputime.utime, &cputime.stime);
	if (task_cputime(p, &cputime.utime, &cputime.stime))
		cputime.sum_exec_runtime = task_sched_runtime(p);
	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
}
EXPORT_SYMBOL_GPL(task_cputime_adjusted);
@@ -828,19 +829,21 @@ u64 task_gtime(struct task_struct *t)
 * add up the pending nohz execution time since the last
 * cputime snapshot.
 */
void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
bool task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
{
	struct vtime *vtime = &t->vtime;
	unsigned int seq;
	u64 delta;
	int ret;

	if (!vtime_accounting_enabled()) {
		*utime = t->utime;
		*stime = t->stime;
		return;
		return false;
	}

	do {
		ret = false;
		seq = read_seqcount_begin(&vtime->seqcount);

		*utime = t->utime;
@@ -850,6 +853,7 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
		if (vtime->state < VTIME_SYS)
			continue;

		ret = true;
		delta = vtime_delta(vtime);

		/*
@@ -861,6 +865,8 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
		else
			*utime += vtime->utime + delta;
	} while (read_seqcount_retry(&vtime->seqcount, seq));

	return ret;
}

static int vtime_state_fetch(struct vtime *vtime, int cpu)