Commit 961e2393 authored by Chengming Zhou's avatar Chengming Zhou Committed by Lu Jialin
Browse files

sched/psi: Fix periodic aggregation shut off

mainline inclusion
from mainline-v6.1-rc1
commit c530a3c7
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I8BCV4

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c530a3c716b963625e43aa915e0de6b4d1ce8ad9



--------------------------------

We don't want to wake periodic aggregation work back up if the
task change is the aggregation worker itself going to sleep, or
we'll ping-pong forever.

Previously, we would use psi_task_change() in psi_dequeue() when
task going to sleep, so this check was put in psi_task_change().

But commit 4117cebf ("psi: Optimize task switch inside shared cgroups")
defer task sleep handling to psi_task_switch(), won't go through
psi_task_change() anymore.

So this patch move this check to psi_task_switch().

Fixes: 4117cebf ("psi: Optimize task switch inside shared cgroups")
Signed-off-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Link: https://lore.kernel.org/r/20220825164111.29534-2-zhouchengming@bytedance.com


Signed-off-by: default avatarLu Jialin <lujialin4@huawei.com>
parent c96a352b
Loading
Loading
Loading
Loading
+14 −14
Original line number Diff line number Diff line
@@ -818,7 +818,6 @@ void psi_task_change(struct task_struct *task, int clear, int set)
{
	int cpu = task_cpu(task);
	struct psi_group *group;
	bool wake_clock = true;
	void *iter = NULL;
	u64 now;

@@ -828,19 +827,9 @@ void psi_task_change(struct task_struct *task, int clear, int set)
	psi_flags_change(task, clear, set);

	now = cpu_clock(cpu);
	/*
	 * Periodic aggregation shuts off if there is a period of no
	 * task changes, so we wake it back up if necessary. However,
	 * don't do this if the task change is the aggregation worker
	 * itself going to sleep, or we'll ping-pong forever.
	 */
	if (unlikely((clear & TSK_RUNNING) &&
		     (task->flags & PF_WQ_WORKER) &&
		     wq_worker_last_func(task) == psi_avgs_work))
		wake_clock = false;

	while ((group = iterate_groups(task, &iter)))
		psi_group_change(group, cpu, clear, set, now, wake_clock);
		psi_group_change(group, cpu, clear, set, now, true);
}

void psi_task_switch(struct task_struct *prev, struct task_struct *next,
@@ -877,6 +866,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,

	if (prev->pid) {
		int clear = TSK_ONCPU, set = 0;
		bool wake_clock = true;

		/*
		 * When we're going to sleep, psi_dequeue() lets us
@@ -890,13 +880,23 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
				clear |= TSK_MEMSTALL_RUNNING;
			if (prev->in_iowait)
				set |= TSK_IOWAIT;

			/*
			 * Periodic aggregation shuts off if there is a period of no
			 * task changes, so we wake it back up if necessary. However,
			 * don't do this if the task change is the aggregation worker
			 * itself going to sleep, or we'll ping-pong forever.
			 */
			if (unlikely((prev->flags & PF_WQ_WORKER) &&
				     wq_worker_last_func(prev) == psi_avgs_work))
				wake_clock = false;
		}

		psi_flags_change(prev, clear, set);

		iter = NULL;
		while ((group = iterate_groups(prev, &iter)) && group != common)
			psi_group_change(group, cpu, clear, set, now, true);
			psi_group_change(group, cpu, clear, set, now, wake_clock);

		/*
		 * TSK_ONCPU is handled up to the common ancestor. If we're tasked
@@ -905,7 +905,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
		if (sleep) {
			clear &= ~TSK_ONCPU;
			for (; group; group = iterate_groups(prev, &iter))
				psi_group_change(group, cpu, clear, set, now, true);
				psi_group_change(group, cpu, clear, set, now, wake_clock);
		}
	}
}