Commit 65494451 authored by Lu Jialin's avatar Lu Jialin Committed by yanhaitao
Browse files

sched/psi: add cpu fine grained stall tracking in pressure.stat

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8QUNW



-------------------------------

Introduce cpu fine grained stall tracking(cpu cfs bandwidth or cpu qos) in
pressure.stat. For cpu fine grained stall tracking, only "full"
information in pressure.stat.

for example:

/test # cat /tmp/cpuacct/test/pressure.stat
cgroup_memory_reclaim
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
global_memory_reclaim
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
compact
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
cgroup_async_memory_reclaim
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
swap
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
cpu_cfs_bandwidth
full avg10=21.76 avg60=4.58 avg300=0.98 total=3893827
cpu_qos
full avg10=0.00 avg60=0.00 avg300=0.00 total=0

Signed-off-by: default avatarLu Jialin <lujialin4@huawei.com>
parent 25d00f68
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -82,6 +82,8 @@ enum psi_aggregators {
};

#ifdef CONFIG_PSI_FINE_GRAINED
#define CPU_CFS_BANDWIDTH		1

enum psi_stat_states {
	PSI_MEMCG_RECLAIM_SOME,
	PSI_MEMCG_RECLAIM_FULL,
@@ -93,6 +95,10 @@ enum psi_stat_states {
	PSI_ASYNC_MEMCG_RECLAIM_FULL,
	PSI_SWAP_SOME,
	PSI_SWAP_FULL,
	PSI_CPU_CFS_BANDWIDTH_FULL,
#ifdef CONFIG_QOS_SCHED
	PSI_CPU_QOS_FULL,
#endif
	NR_PSI_STAT_STATES,
};

@@ -142,6 +148,8 @@ struct psi_group_cpu {
	unsigned int fine_grained_tasks[NR_PSI_STAT_TASK_COUNTS];
	u32 fine_grained_times_delta;
	u32 fine_grained_times_prev[NR_PSI_AGGREGATORS][NR_PSI_STAT_STATES];
	int prev_throttle;
	int cur_throttle;
#endif
};

+0 −6
Original line number Diff line number Diff line
@@ -139,12 +139,6 @@ int __weak arch_asym_cpu_priority(int cpu)

#ifdef CONFIG_QOS_SCHED

/*
 * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled
 * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1).
 */
#define QOS_THROTTLED	2

static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct hrtimer, qos_overload_timer);
static DEFINE_PER_CPU(int, qos_cpu_overload);
+57 −5
Original line number Diff line number Diff line
@@ -369,6 +369,16 @@ static void record_stat_times(struct psi_group_cpu *groupc, u32 delta)
		if (groupc->fine_grained_state_mask & (1 << PSI_SWAP_FULL))
			groupc->fine_grained_times[PSI_SWAP_FULL] += delta;
	}
#ifdef CONFIG_CFS_BANDWIDTH
	if (groupc->state_mask & (1 << PSI_CPU_FULL)) {
		if (groupc->prev_throttle == CPU_CFS_BANDWIDTH)
			groupc->fine_grained_times[PSI_CPU_CFS_BANDWIDTH_FULL] += delta;
#ifdef CONFIG_QOS_SCHED
		else if (groupc->prev_throttle == QOS_THROTTLED)
			groupc->fine_grained_times[PSI_CPU_QOS_FULL] += delta;
	}
#endif
#endif
}

static bool test_fine_grained_stat(unsigned int *stat_tasks,
@@ -422,7 +432,7 @@ static void psi_group_stat_change(struct psi_group *group, int cpu,
	for (t = 0; set; set &= ~(1 << t), t++)
		if (set & (1 << t))
			groupc->fine_grained_tasks[t]++;
	for (s = 0; s < NR_PSI_STAT_STATES; s++)
	for (s = 0; s < PSI_CPU_CFS_BANDWIDTH_FULL; s++)
		if (test_fine_grained_stat(groupc->fine_grained_tasks,
					   groupc->tasks[NR_RUNNING], s))
			state_mask |= (1 << s);
@@ -481,6 +491,32 @@ static inline void psi_stat_flags_change(struct task_struct *task,
					 int set, int clear) {}
#endif

#if defined(CONFIG_CFS_BANDWIDTH) && defined(CONFIG_CGROUP_CPUACCT) && \
	defined(CONFIG_PSI_FINE_GRAINED)
static void update_throttle_type(struct task_struct *task, int cpu, bool next)
{
	if (!cgroup_subsys_on_dfl(cpuacct_cgrp_subsys)) {
		struct cgroup *cpuacct_cgrp;
		struct psi_group_cpu *groupc;
		struct task_group *tsk_grp;

		rcu_read_lock();
		cpuacct_cgrp = task_cgroup(task, cpuacct_cgrp_id);
		if (cgroup_parent(cpuacct_cgrp)) {
			groupc = per_cpu_ptr(cgroup_psi(cpuacct_cgrp)->pcpu, cpu);
			tsk_grp = task_group(task);
			if (next)
				groupc->prev_throttle = groupc->cur_throttle;
			groupc->cur_throttle = tsk_grp->cfs_rq[cpu]->throttled;
		}
		rcu_read_unlock();
	}
}
#else
static inline void update_throttle_type(struct task_struct *task, int cpu,
					bool next) {}
#endif

static void collect_percpu_times(struct psi_group *group,
				 enum psi_aggregators aggregator,
				 u32 *pchanged_states)
@@ -1019,8 +1055,9 @@ static void psi_group_change(struct psi_group *group, int cpu,
		 * may have already incorporated the live state into times_prev;
		 * avoid a delta sample underflow when PSI is later re-enabled.
		 */
		if (unlikely(groupc->state_mask & (1 << PSI_NONIDLE)))
		if (unlikely(groupc->state_mask & (1 << PSI_NONIDLE))) {
			record_times(groupc, now);
		}

		groupc->state_mask = state_mask;

@@ -1136,6 +1173,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
	u64 now = cpu_clock(cpu);

	if (next->pid) {
		update_throttle_type(next, cpu, true);
		psi_flags_change(next, 0, TSK_ONCPU);
		/*
		 * Set TSK_ONCPU on @next's cgroups. If @next shares any
@@ -1162,6 +1200,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
		int stat_clear = 0;
		bool memstall_type_change = false;

		update_throttle_type(prev, cpu, false);
		/*
		 * When we're going to sleep, psi_dequeue() lets us
		 * handle TSK_RUNNING, TSK_MEMSTALL_RUNNING and
@@ -1861,8 +1900,22 @@ static const char *const psi_stat_names[] = {
	"compact",
	"cgroup_async_memory_reclaim",
	"swap",
	"cpu_cfs_bandwidth",
	"cpu_qos",
};

static void get_stat_names(struct seq_file *m, int i, bool is_full)
{
	if (i <= PSI_SWAP_FULL && !is_full)
		return seq_printf(m, "%s\n", psi_stat_names[i / 2]);
	else if (i == PSI_CPU_CFS_BANDWIDTH_FULL)
		return seq_printf(m, "%s\n", "cpu_cfs_bandwidth");
#ifdef CONFIG_QOS_SCHED
	else if (i == PSI_CPU_QOS_FULL)
		return seq_printf(m, "%s\n", "cpu_qos");
#endif
}

int psi_stat_show(struct seq_file *m, struct psi_group *group)
{
	int i;
@@ -1882,13 +1935,12 @@ int psi_stat_show(struct seq_file *m, struct psi_group *group)
		unsigned long avg[3] = {0, };
		int w;
		u64 total;
		bool is_full = i % 2;
		bool is_full = i % 2 || i > PSI_SWAP_FULL;

		for (w = 0; w < 3; w++)
			avg[w] = group->fine_grained_avg[i][w];
		total = div_u64(group->fine_grained_total[PSI_AVGS][i], NSEC_PER_USEC);
		if (!is_full)
			seq_printf(m, "%s\n", psi_stat_names[i / 2]);
		get_stat_names(m, i, is_full);
		seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
			   is_full ? "full" : "some",
			   LOAD_INT(avg[0]), LOAD_FRAC(avg[0]),
+8 −0
Original line number Diff line number Diff line
@@ -126,6 +126,14 @@ __schedstats_from_se(struct sched_entity *se)
	return &task_of(se)->stats;
}

#ifdef CONFIG_QOS_SCHED
/*
 * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled
 * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1).
 */
#define QOS_THROTTLED	2
#endif

#ifdef CONFIG_PSI
void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next,