Commit 243865da authored by Hui Tang's avatar Hui Tang Committed by Yongqiang Liu
Browse files

cpuset: Introduce new interface for scheduler dynamic affinity

hulk inclusion
category: feature
bugzilla: 187173, https://gitee.com/openeuler/kernel/issues/I5G4IH


CVE: NA

--------------------------------

Add 'prefer_cpus' sysfs and related interface in cgroup cpuset.

Signed-off-by: default avatarHui Tang <tanghui20@huawei.com>
Reviewed-by: default avatarZhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: default avatarChen Hui <judy.chenhui@huawei.com>
Reviewed-by: default avatarChen Hui <judy.chenhui@huawei.com>
Signed-off-by: default avatarYongqiang Liu <liuyongqiang13@huawei.com>
parent 5cabb5b5
Loading
Loading
Loading
Loading
+17 −0
Original line number Diff line number Diff line
@@ -1247,7 +1247,16 @@ struct task_struct {
#else
	KABI_RESERVE(5)
#endif

#if !defined(__GENKSYMS__)
#if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY)
	cpumask_t			*prefer_cpus;
#else
	KABI_RESERVE(6)
#endif
#else
	KABI_RESERVE(6)
#endif
	KABI_RESERVE(7)
	KABI_RESERVE(8)

@@ -1964,4 +1973,12 @@ static inline int sched_qos_cpu_overload(void)
}
#endif

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int dynamic_affinity_enabled(void);
int set_prefer_cpus_ptr(struct task_struct *p,
			const struct cpumask *new_mask);
int sched_prefer_cpus_fork(struct task_struct *p, struct task_struct *orig);
void sched_prefer_cpus_free(struct task_struct *p);
#endif

#endif
+3 −0
Original line number Diff line number Diff line
@@ -180,6 +180,9 @@ struct task_struct init_task
#ifdef CONFIG_SECURITY
	.security	= NULL,
#endif
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	.prefer_cpus	= NULL,
#endif
#ifdef CONFIG_PID_RESERVE
	.fork_pid_union = {
		.fork_pid = 0,
+150 −1
Original line number Diff line number Diff line
@@ -104,6 +104,9 @@ struct cpuset {
	/* user-configured CPUs and Memory Nodes allow to tasks */
	cpumask_var_t cpus_allowed;
	nodemask_t mems_allowed;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_var_t prefer_cpus;
#endif

	/* effective CPUs and Memory Nodes allow to tasks */
	cpumask_var_t effective_cpus;
@@ -436,11 +439,22 @@ static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
		goto free_cs;
	if (!alloc_cpumask_var(&trial->effective_cpus, GFP_KERNEL))
		goto free_cpus;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if (!alloc_cpumask_var(&trial->prefer_cpus, GFP_KERNEL))
		goto free_prefer_cpus;
#endif

	cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
	cpumask_copy(trial->effective_cpus, cs->effective_cpus);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_copy(trial->prefer_cpus, cs->prefer_cpus);
#endif
	return trial;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_prefer_cpus:
	free_cpumask_var(trial->effective_cpus);
#endif
free_cpus:
	free_cpumask_var(trial->cpus_allowed);
free_cs:
@@ -456,6 +470,9 @@ static void free_trial_cpuset(struct cpuset *trial)
{
	free_cpumask_var(trial->effective_cpus);
	free_cpumask_var(trial->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	free_cpumask_var(trial->prefer_cpus);
#endif
	kfree(trial);
}

@@ -487,6 +504,11 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)

	rcu_read_lock();

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	ret = -EINVAL;
	if (!cpumask_subset(cur->prefer_cpus, trial->cpus_allowed))
		goto out;
#endif
	/* Each of our child cpusets must be a subset of us */
	ret = -EBUSY;
	cpuset_for_each_child(c, css, cur)
@@ -551,6 +573,66 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
	return ret;
}

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
static cpumask_var_t prefer_cpus_attach;

static void update_tasks_prefer_cpumask(struct cpuset *cs)
{
	struct css_task_iter it;
	struct task_struct *task;

	css_task_iter_start(&cs->css, 0, &it);
	while ((task = css_task_iter_next(&it)))
		set_prefer_cpus_ptr(task, cs->prefer_cpus);
	css_task_iter_end(&it);
}

/*
 * update_prefer_cpumask - update the prefer_cpus mask of a cpuset and
 *			   all tasks in it
 * @cs: the cpuset to consider
 * @trialcs: trial cpuset
 * @buf: buffer of cpu numbers written to this cpuset
 */
static int update_prefer_cpumask(struct cpuset *cs, struct cpuset *trialcs,
				 const char *buf)
{
	int retval;

	if (cs == &top_cpuset)
		return -EACCES;

	/*
	 * An empty prefer_cpus is ok which mean that the cpuset tasks disable
	 * dynamic affinity feature.
	 * Since cpulist_parse() fails on an empty mask, we special case
	 * that parsing.
	 */
	if (!*buf) {
		cpumask_clear(trialcs->prefer_cpus);
	} else {
		retval = cpulist_parse(buf, trialcs->prefer_cpus);
		if (retval < 0)
			return retval;
	}

	/* Nothing to do if the cpus didn't change */
	if (cpumask_equal(cs->prefer_cpus, trialcs->prefer_cpus))
		return 0;

	if (!cpumask_subset(trialcs->prefer_cpus, cs->cpus_allowed))
		return -EINVAL;

	update_tasks_prefer_cpumask(trialcs);

	spin_lock_irq(&callback_lock);
	cpumask_copy(cs->prefer_cpus, trialcs->prefer_cpus);
	spin_unlock_irq(&callback_lock);

	return 0;
}
#endif

#ifdef CONFIG_SMP
/*
 * Helper routine for generate_sched_domains().
@@ -1543,6 +1625,10 @@ static void cpuset_attach(struct cgroup_taskset *tset)
	else
		guarantee_online_cpus(cs, cpus_attach);

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_copy(prefer_cpus_attach, cs->prefer_cpus);
#endif

	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);

	cgroup_taskset_for_each(task, css, tset) {
@@ -1551,6 +1637,9 @@ static void cpuset_attach(struct cgroup_taskset *tset)
		 * fail.  TODO: have a better way to handle failure here
		 */
		WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
		set_prefer_cpus_ptr(task, prefer_cpus_attach);
#endif

		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
		cpuset_update_task_spread_flag(cs, task);
@@ -1610,6 +1699,9 @@ typedef enum {
	FILE_MEMORY_PRESSURE,
	FILE_SPREAD_PAGE,
	FILE_SPREAD_SLAB,
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	FILE_DYNAMIC_CPULIST,
#endif
} cpuset_filetype_t;

static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
@@ -1735,6 +1827,11 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
	case FILE_MEMLIST:
		retval = update_nodemask(cs, trialcs, buf);
		break;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	case FILE_DYNAMIC_CPULIST:
		retval = update_prefer_cpumask(cs, trialcs, buf);
		break;
#endif
	default:
		retval = -EINVAL;
		break;
@@ -1778,6 +1875,11 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v)
	case FILE_EFFECTIVE_MEMLIST:
		seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
		break;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	case FILE_DYNAMIC_CPULIST:
		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->prefer_cpus));
		break;
#endif
	default:
		ret = -EINVAL;
	}
@@ -1935,7 +2037,15 @@ static struct cftype files[] = {
		.write_u64 = cpuset_write_u64,
		.private = FILE_MEMORY_PRESSURE_ENABLED,
	},

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	{
		.name = "preferred_cpus",
		.seq_show = cpuset_common_seq_show,
		.write = cpuset_write_resmask,
		.max_write_len = (100U + 6 * NR_CPUS),
		.private = FILE_DYNAMIC_CPULIST,
	},
#endif
	{ }	/* terminate */
};

@@ -1959,17 +2069,28 @@ cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
		goto free_cs;
	if (!alloc_cpumask_var(&cs->effective_cpus, GFP_KERNEL))
		goto free_cpus;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if (!alloc_cpumask_var(&cs->prefer_cpus, GFP_KERNEL))
		goto free_effective_cpus;
#endif

	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
	cpumask_clear(cs->cpus_allowed);
	nodes_clear(cs->mems_allowed);
	cpumask_clear(cs->effective_cpus);
	nodes_clear(cs->effective_mems);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_clear(cs->prefer_cpus);
#endif
	fmeter_init(&cs->fmeter);
	cs->relax_domain_level = -1;

	return &cs->css;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
free_effective_cpus:
	free_cpumask_var(cs->effective_cpus);
#endif
free_cpus:
	free_cpumask_var(cs->cpus_allowed);
free_cs:
@@ -2034,6 +2155,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
	cs->effective_mems = parent->mems_allowed;
	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_copy(cs->prefer_cpus, parent->prefer_cpus);
#endif
	spin_unlock_irq(&callback_lock);
out_unlock:
	mutex_unlock(&cpuset_mutex);
@@ -2065,6 +2189,9 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
{
	struct cpuset *cs = css_cs(css);

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	free_cpumask_var(cs->prefer_cpus);
#endif
	free_cpumask_var(cs->effective_cpus);
	free_cpumask_var(cs->cpus_allowed);
	kfree(cs);
@@ -2099,6 +2226,9 @@ static void cpuset_fork(struct task_struct *task)
		return;

	set_cpus_allowed_ptr(task, &current->cpus_allowed);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	set_prefer_cpus_ptr(task, current->prefer_cpus);
#endif
	task->mems_allowed = current->mems_allowed;
}

@@ -2129,11 +2259,17 @@ int __init cpuset_init(void)

	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	BUG_ON(!alloc_cpumask_var(&top_cpuset.prefer_cpus, GFP_KERNEL));
#endif

	cpumask_setall(top_cpuset.cpus_allowed);
	nodes_setall(top_cpuset.mems_allowed);
	cpumask_setall(top_cpuset.effective_cpus);
	nodes_setall(top_cpuset.effective_mems);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_clear(top_cpuset.prefer_cpus);
#endif

	fmeter_init(&top_cpuset.fmeter);
	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
@@ -2144,6 +2280,9 @@ int __init cpuset_init(void)
		return err;

	BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	BUG_ON(!alloc_cpumask_var(&prefer_cpus_attach, GFP_KERNEL));
#endif

	return 0;
}
@@ -2180,6 +2319,9 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
			    struct cpumask *new_cpus, nodemask_t *new_mems,
			    bool cpus_updated, bool mems_updated)
{
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	cpumask_t prefer_cpus;
#endif
	bool is_empty;

	spin_lock_irq(&callback_lock);
@@ -2198,6 +2340,13 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
	if (mems_updated && !nodes_empty(cs->mems_allowed))
		update_tasks_nodemask(cs);

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	if (!cpumask_subset(cs->prefer_cpus, cs->cpus_allowed)) {
		cpumask_and(&prefer_cpus, cs->prefer_cpus, cs->cpus_allowed);
		cpumask_copy(cs->prefer_cpus, &prefer_cpus);
		update_tasks_prefer_cpumask(cs);
	}
#endif
	is_empty = cpumask_empty(cs->cpus_allowed) ||
		   nodes_empty(cs->mems_allowed);

+13 −0
Original line number Diff line number Diff line
@@ -459,6 +459,9 @@ void free_task(struct task_struct *tsk)
	arch_release_task_struct(tsk);
	if (tsk->flags & PF_KTHREAD)
		free_kthread_struct(tsk);
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	sched_prefer_cpus_free(tsk);
#endif
	free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
@@ -888,6 +891,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
	tsk->seccomp.filter = NULL;
#endif

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	tsk->prefer_cpus = NULL;
#endif

	setup_thread_stack(tsk, orig);
	clear_user_return_notifier(tsk);
	clear_tsk_need_resched(tsk);
@@ -1862,6 +1869,12 @@ static __latent_entropy struct task_struct *copy_process(
	if (retval < 0)
		goto bad_fork_free;

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
	retval = sched_prefer_cpus_fork(p, current);
	if (retval)
		goto bad_fork_free;
#endif

	/*
	 * If multiple threads are within copy_process(), then this check
	 * triggers too late. This doesn't hurt, the check is only there
+95 −0
Original line number Diff line number Diff line
@@ -7191,6 +7191,101 @@ static int __maybe_unused cpu_period_quota_parse(char *buf,
	return 0;
}

#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
int sched_prefer_cpus_fork(struct task_struct *p, struct task_struct *orig)
{
	p->prefer_cpus = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
	if (!p->prefer_cpus)
		return -ENOMEM;

	if (orig->prefer_cpus)
		cpumask_copy(p->prefer_cpus, orig->prefer_cpus);
	else
		cpumask_clear(p->prefer_cpus);

	return 0;
}

void sched_prefer_cpus_free(struct task_struct *p)
{
	kfree(p->prefer_cpus);
}

static void do_set_prefer_cpus(struct task_struct *p,
				const struct cpumask *new_mask)
{
	struct rq *rq = task_rq(p);
	bool queued, running;

	lockdep_assert_held(&p->pi_lock);

	queued = task_on_rq_queued(p);
	running = task_current(rq, p);

	if (queued) {
		/*
		 * Because __kthread_bind() calls this on blocked tasks without
		 * holding rq->lock.
		 */
		lockdep_assert_held(&rq->lock);
		dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK);
	}
	if (running)
		put_prev_task(rq, p);

	cpumask_copy(p->prefer_cpus, new_mask);

	if (queued)
		enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
	if (running)
		set_curr_task(rq, p);
}

/*
 * Change a given task's prefer CPU affinity. Prioritize migrate the thread to
 * prefer cpus according to preferred bitmask.
 *
 * NOTE: the caller must have a valid reference to the task, the
 * task must not exit() & deallocate itself prematurely. The
 * call is not atomic; no spinlocks may be held.
 */
static int __set_prefer_cpus_ptr(struct task_struct *p,
				  const struct cpumask *new_mask, bool check)
{
	struct rq_flags rf;
	struct rq *rq;
	int ret = 0;

	if (unlikely(!p->prefer_cpus))
		return -EINVAL;

	rq = task_rq_lock(p, &rf);
	update_rq_clock(rq);

	if (cpumask_equal(p->prefer_cpus, new_mask))
		goto out;

	if (!cpumask_subset(new_mask, &p->cpus_allowed)) {
		ret = -EINVAL;
		goto out;
	}

	do_set_prefer_cpus(p, new_mask);
out:
	task_rq_unlock(rq, p, &rf);

	return ret;
}

int set_prefer_cpus_ptr(struct task_struct *p, const struct cpumask *new_mask)
{
	if (p->sched_class != &fair_sched_class)
		return 0;

	return __set_prefer_cpus_ptr(p, new_mask, false);
}
#endif

#ifdef CONFIG_CFS_BANDWIDTH
static int cpu_max_show(struct seq_file *sf, void *v)
{