Commit a65983d9 authored by Lu Jialin's avatar Lu Jialin Committed by yanhaitao
Browse files

sched/psi: Introduce fine grained stall time collect for cgroup reclaim

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I8QUNW



-------------------------------

PSI will tracking pressure stall for memory, cpu, io and irq. But, there
are differrnt pressure types which will cause memory pressure,
memory.pressure could not show the type of pressure effectively. The
same situation for cpu.pressure.
Introduce pressure.stat in psi, which will monitor specific reasons
for the memory.pressure and cpu.pressure, such as global/cgroup memory
reclaim, memory compact, cpu cfs bandwidth and so on. Therefore, userland
could make the right solution to reduce the pressure depends on the
specific pressure reasons.
This patch will introduce memory fine grained stall time collect for
cgroup reclaim.

Signed-off-by: default avatarLu Jialin <lujialin4@huawei.com>
parent 3dfdd2f9
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -1834,7 +1834,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now)
 */
static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay)
{
	unsigned long pflags;
	unsigned long pflags = 0;
	bool clamp;
	u64 now = ktime_to_ns(ktime_get());
	u64 exp;
+1 −1
Original line number Diff line number Diff line
@@ -475,7 +475,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
	u64 em_len;
	u64 em_start;
	struct extent_map *em;
	unsigned long pflags;
	unsigned long pflags = 0;
	int memstall = 0;
	blk_status_t ret;
	int ret2;
+1 −1
Original line number Diff line number Diff line
@@ -1636,7 +1636,7 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
	struct block_device *last_bdev;
	unsigned int nr_bios = 0;
	struct bio *bio = NULL;
	unsigned long pflags;
	unsigned long pflags = 0;
	int memstall = 0;

	/*
+38 −0
Original line number Diff line number Diff line
@@ -81,6 +81,20 @@ enum psi_aggregators {
	NR_PSI_AGGREGATORS,
};

#ifdef CONFIG_PSI_FINE_GRAINED
enum psi_stat_states {
	PSI_MEMCG_RECLAIM_SOME,
	PSI_MEMCG_RECLAIM_FULL,
	NR_PSI_STAT_STATES,
};

enum psi_stat_task_count {
	NR_MEMCG_RECLAIM,
	NR_MEMCG_RECLAIM_RUNNING,
	NR_PSI_STAT_TASK_COUNTS,
};
#endif /* CONFIG_PSI_FINE_GRAINED */

struct psi_group_cpu {
	/* 1st cacheline updated by the scheduler */

@@ -104,6 +118,13 @@ struct psi_group_cpu {
	/* Delta detection against the sampling buckets */
	u32 times_prev[NR_PSI_AGGREGATORS][NR_PSI_STATES]
			____cacheline_aligned_in_smp;

#ifdef CONFIG_PSI_FINE_GRAINED
	CACHELINE_PADDING(_pad1_);
	u32 fine_grained_state_mask;
	u32 fine_grained_times[NR_PSI_STAT_STATES];
	unsigned int fine_grained_tasks[NR_PSI_STAT_TASK_COUNTS];
#endif
};

/* PSI growth tracking window */
@@ -215,4 +236,21 @@ struct psi_group { };

#endif /* CONFIG_PSI */

#ifdef CONFIG_PSI_FINE_GRAINED
/*
 * one type should have two task stats: regular running and memstall
 * threads. The reason is the same as NR_MEMSTALL_RUNNING.
 * Because of the psi_memstall_type is start with 1, the correspondence
 * between psi_memstall_type and psi_stat_task_count should be as below:
 *
 * memstall : psi_memstall_type * 2 - 2;
 * running  : psi_memstall_type * 2 - 1;
 */
enum psi_memstall_type {
	PSI_MEMCG_RECLAIM = 1,
};
#else
#define PSI_MEMCG_RECLAIM		0
#endif /* CONFIG_PSI_FINE_GRAINED */

#endif /* _LINUX_PSI_TYPES_H */
+3 −0
Original line number Diff line number Diff line
@@ -1552,6 +1552,9 @@ struct task_struct {
	const cpumask_t			*select_cpus;
#endif

#ifdef CONFIG_PSI_FINE_GRAINED
	int memstall_type;
#endif
	/*
	 * New fields for task_struct should be added above here, so that
	 * they are included in the randomized portion of task_struct.
Loading