Commit 0266adf3 authored by Chengming Zhou's avatar Chengming Zhou Committed by Lu Jialin
Browse files

sched/psi: Add PSI_IRQ to track IRQ/SOFTIRQ pressure

mainline inclusion
from mainline-v6.1-rc1
commit 52b1364b
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8BCV4

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=52b1364ba0b105122d6de0e719b36db705011ac1



--------------------------------

Now PSI already tracked workload pressure stall information for
CPU, memory and IO. Apart from these, IRQ/SOFTIRQ could have
obvious impact on some workload productivity, such as web service
workload.

When CONFIG_IRQ_TIME_ACCOUNTING, we can get IRQ/SOFTIRQ delta time
from update_rq_clock_task(), in which we can record that delta
to CPU curr task's cgroups as PSI_IRQ_FULL status.

Note we don't use PSI_IRQ_SOME since IRQ/SOFTIRQ always happen in
the current task on the CPU, make nothing productive could run
even if it were runnable, so we only use PSI_IRQ_FULL.

Signed-off-by: default avatarChengming Zhou <zhouchengming@bytedance.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Link: https://lore.kernel.org/r/20220825164111.29534-8-zhouchengming@bytedance.com


Conflict:
	Documentation/admin-guide/cgroup-v2.rst
	include/linux/psi_types.h
	kernel/sched/psi.c
	kernel/sched/stats.h
Signed-off-by: default avatarLu Jialin <lujialin4@huawei.com>
parent 78f517b8
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -951,6 +951,12 @@ All cgroup core files are prefixed with "cgroup."
	it's possible to delete a frozen (and empty) cgroup, as well as
	create new sub-cgroups.

irq.pressure
        A read-write nested-keyed file.

        Shows pressure stall information for IRQ/SOFTIRQ. See
        :ref:`Documentation/accounting/psi.rst <psi>` for details.

Controllers
===========

+1 −0
Original line number Diff line number Diff line
@@ -21,6 +21,7 @@ void psi_init(void);
void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next,
		     bool sleep);
void psi_account_irqtime(struct task_struct *task, u32 delta);

void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags);
+8 −2
Original line number Diff line number Diff line
@@ -63,7 +63,10 @@ enum psi_res {
	PSI_IO,
	PSI_MEM,
	PSI_CPU,
	NR_PSI_RESOURCES = 3,
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
	PSI_IRQ,
#endif
	NR_PSI_RESOURCES,
};

/*
@@ -98,9 +101,12 @@ enum psi_states {
	PSI_MEM_FULL,
	PSI_CPU_SOME,
	PSI_CPU_FULL,
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
	PSI_IRQ_FULL,
#endif
	/* Only per-CPU, to weigh the CPU in the global average: */
	PSI_NONIDLE,
	NR_PSI_STATES = 7,
	NR_PSI_STATES,
};
#endif

+26 −0
Original line number Diff line number Diff line
@@ -3751,6 +3751,23 @@ static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
	return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
}

#ifdef CONFIG_IRQ_TIME_ACCOUNTING
static int cgroup_irq_pressure_show(struct seq_file *seq, void *v)
{
	struct cgroup *cgrp = seq_css(seq)->cgroup;
	struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;

	return psi_show(seq, psi, PSI_IRQ);
}

static ssize_t cgroup_irq_pressure_write(struct kernfs_open_file *of,
					 char *buf, size_t nbytes,
					 loff_t off)
{
	return cgroup_pressure_write(of, buf, nbytes, PSI_IRQ);
}
#endif

static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
					  poll_table *pt)
{
@@ -5155,6 +5172,15 @@ static struct cftype cgroup_base_files[] = {
		.poll = cgroup_pressure_poll,
		.release = cgroup_pressure_release,
	},
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
	{
		.name = "irq.pressure",
		.seq_show = cgroup_irq_pressure_show,
		.write = cgroup_irq_pressure_write,
		.poll = cgroup_pressure_poll,
		.release = cgroup_pressure_release,
	},
#endif
#endif /* CONFIG_PSI */
	{ }	/* terminate */
};
+1 −0
Original line number Diff line number Diff line
@@ -629,6 +629,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)

	rq->prev_irq_time += irq_delta;
	delta -= irq_delta;
	psi_account_irqtime(rq->curr, irq_delta);
#endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
	if (static_key_false((&paravirt_steal_rq_enabled))) {
Loading