Commit f0f516f1 authored by Yipeng Zou's avatar Yipeng Zou
Browse files

sched: introduce smart grid qos zone

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7ZBSR


CVE: NA

----------------------------------------

Since commit b869720191ec ("sched: smart grid: init sched_grid_qos
structure on QOS purpose") introduced a smart_grid-based QOS
partitioningmechanism, this commit further expands the partitioning
mechanism to implement smart_grid zone.

In the default configuration smart_grid the entire system is divided
into two partitions:

1. Hot zone (performance first)
2. Warm zone (energy consumption priority)

In addition, the smart_grid will dynamically maintain the size of the hot
zone in the current system based on the task load status in the current
partition, which based on commit 65523f55989a ("sched: Introduce smart
grid scheduling strategy for cfs").

 --------        --------        --------
| group0 |      | group1 |      | group2 |
 --------        --------        --------
    |                |              |
    v                v              v
 -------------------------    --------------
|                         |  |              |
|         hot zone        |  |   warm zone  |
|                         |  |              |
 -------------------------   ---------------

Signed-off-by: default avatarYipeng Zou <zouyipeng@huawei.com>
parent ae319ae1
Loading
Loading
Loading
Loading
+21 −0
Original line number Diff line number Diff line
@@ -84,7 +84,28 @@ void sched_grid_qos_free(struct task_struct *p);

int sched_grid_preferred_interleave_nid(struct mempolicy *policy);
int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask);

enum sg_zone_type {
	SMART_GRID_ZONE_HOT = 0,
	SMART_GRID_ZONE_WARM,
	SMART_GRID_ZONE_NR
};

struct auto_affinity;
struct sched_grid_zone {
	raw_spinlock_t lock;
	struct cpumask cpus[SMART_GRID_ZONE_NR];
	struct list_head af_list_head;	/* struct auto_affinity list head */
};

int __init sched_grid_zone_init(void);
int sched_grid_zone_update(bool is_locked);
int sched_grid_zone_add_af(struct auto_affinity *af);
int sched_grid_zone_del_af(struct auto_affinity *af);
struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone);
#else
static inline int __init sched_grid_zone_init(void) { return 0; }

static inline int
sched_grid_preferred_interleave_nid(struct mempolicy *policy)
{
+6 −1
Original line number Diff line number Diff line
@@ -23,7 +23,7 @@
#include "../workqueue_internal.h"
#include "../../io_uring/io-wq.h"
#include "../smpboot.h"

#include <linux/sched/grid_qos.h>
#include "pelt.h"
#include "smp.h"

@@ -8143,6 +8143,7 @@ void __init sched_init_smp(void)

	sched_smp_initialized = true;

	sched_grid_zone_init();
	init_auto_affinity(&root_task_group);
}

@@ -9635,6 +9636,10 @@ static int cpu_affinity_stat_show(struct seq_file *sf, void *v)
	seq_printf(sf, "dcount %d\n", ad->dcount);
	seq_printf(sf, "domain_mask 0x%x\n", ad->domain_mask);
	seq_printf(sf, "curr_level %d\n", ad->curr_level);
	seq_printf(sf, "zone hot %*pbl\n",
			cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_HOT)));
	seq_printf(sf, "zone warm %*pbl\n",
			cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_WARM)));
	for (i = 0; i < ad->dcount; i++)
		seq_printf(sf, "sd_level %d, cpu list %*pbl, stay_cnt %llu\n",
			i, cpumask_pr_args(ad->domains[i]),
+7 −0
Original line number Diff line number Diff line
@@ -5874,6 +5874,7 @@ static void affinity_domain_up(struct task_group *tg)
		if (IS_DOMAIN_SET(level + 1, ad->domain_mask) &&
		    cpumask_weight(ad->domains[level + 1]) > 0) {
			ad->curr_level = level + 1;
			sched_grid_zone_update(false);
			return;
		}
		level++;
@@ -5894,6 +5895,7 @@ static void affinity_domain_down(struct task_group *tg)

		if (IS_DOMAIN_SET(level - 1, ad->domain_mask)) {
			ad->curr_level = level - 1;
			sched_grid_zone_update(false);
			return;
		}
		level--;
@@ -5959,6 +5961,7 @@ static int tg_update_affinity_domain_down(struct task_group *tg, void *data)
		}

	}
	sched_grid_zone_update(false);
	raw_spin_unlock_irqrestore(&auto_affi->lock, flags);

	return 0;
@@ -6019,6 +6022,7 @@ void stop_auto_affinity(struct auto_affinity *auto_affi)
	raw_spin_unlock_irq(&auto_affi->lock);

	smart_grid_usage_dec();
	sched_grid_zone_update(false);
	mutex_unlock(&smart_grid_used_mutex);
}

@@ -6220,6 +6224,8 @@ int init_auto_affinity(struct task_group *tg)

	auto_affi->tg = tg;
	tg->auto_affinity = auto_affi;
	INIT_LIST_HEAD(&auto_affi->af_list);
	sched_grid_zone_add_af(auto_affi);
	return 0;
}

@@ -6234,6 +6240,7 @@ static void destroy_auto_affinity(struct task_group *tg)
		smart_grid_usage_dec();

	hrtimer_cancel(&auto_affi->period_timer);
	sched_grid_zone_del_af(auto_affi);
	free_affinity_domains(&auto_affi->ad);

	kfree(tg->auto_affinity);
+83 −0
Original line number Diff line number Diff line
@@ -23,6 +23,7 @@
#include <linux/numa.h>
#include <linux/sched/grid_qos.h>
#include "internal.h"
#include <../kernel/sched/sched.h>

static inline int qos_affinity_set(struct task_struct *p)
{
@@ -153,3 +154,85 @@ int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask)

	return nd;
}

static struct sched_grid_zone sg_zone;

int __init sched_grid_zone_init(void)
{
	int index;

	for (index = 0; index < SMART_GRID_ZONE_NR; index++)
		cpumask_clear(&sg_zone.cpus[index]);

	raw_spin_lock_init(&sg_zone.lock);
	INIT_LIST_HEAD(&sg_zone.af_list_head);
	return 0;
}

int sched_grid_zone_update(bool is_locked)
{
	struct list_head *pos;
	struct auto_affinity *af_pos;
	unsigned long flags;

	if (!is_locked)
		raw_spin_lock_irqsave(&sg_zone.lock, flags);

	cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_HOT]);

	list_for_each(pos, &sg_zone.af_list_head) {
		af_pos = list_entry(pos, struct auto_affinity, af_list);

		/* when smart_grid not used we need calculate all task_group */
		/* when smart_grid used we only calculate enabled task_group */
		if (smart_grid_used() && af_pos->mode == 0)
			continue;

		cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_HOT], &sg_zone.cpus[SMART_GRID_ZONE_HOT],
			   af_pos->ad.domains[af_pos->ad.curr_level]);
	}

	cpumask_complement(&sg_zone.cpus[SMART_GRID_ZONE_WARM],
			   &sg_zone.cpus[SMART_GRID_ZONE_HOT]);

	if (!is_locked)
		raw_spin_unlock_irqrestore(&sg_zone.lock, flags);

	return 0;
}

int sched_grid_zone_add_af(struct auto_affinity *af)
{
	unsigned long flags;

	if (af == NULL)
		return -1;

	raw_spin_lock_irqsave(&sg_zone.lock, flags);
	list_add_tail(&af->af_list, &sg_zone.af_list_head);
	sched_grid_zone_update(true);
	raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
	return 0;
}

int sched_grid_zone_del_af(struct auto_affinity *af)
{
	unsigned long flags;

	if (af == NULL)
		return -1;

	raw_spin_lock_irqsave(&sg_zone.lock, flags);
	list_del(&af->af_list);
	sched_grid_zone_update(true);
	raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
	return 0;
}

struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone)
{
	if (zone >= SMART_GRID_ZONE_NR)
		return NULL;

	return &sg_zone.cpus[zone];
}
+1 −0
Original line number Diff line number Diff line
@@ -425,6 +425,7 @@ struct auto_affinity {
	int			period_active;
	struct affinity_domain	ad;
	struct task_group	*tg;
	struct list_head	af_list;
#endif
};