Unverified Commit 518c4345 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!7916 v5 Introduce NUMA isolation and consolidation

Merge Pull Request from: @ci-robot 
 
PR sync from: Hui Tang <tanghui20@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Q3Z5CC6SXAGYZMBWKVRFUSZBV3W7HLML/ 
Introduce NUMA isolation and consolidation.

Guan Jing (1):
  sched: Add can_migrate_task hook

Hui Tang (10):
  sched: Introduce CONFIG_TASK_PLACEMENT_BY_CPU_RANGE
  sched: Some fixes for select_rq hook
  bpf:programmable: Add nodemask operation collection
  sched: Introduce task relationship by net and memory
  bpf:programmable: Add helper to get memory and net relationship
  sched: Add ioctl to get relationship
  sched: Update numa group preferred node periodically
  bpf:programmable: Add helper to set preferred node
  sched: Introduce CONFIG_QOS_SCHED_NUMA_ICON
  config: Enable NUMA isolation and consolidation by default


-- 
2.34.1
 
https://gitee.com/openeuler/kernel/issues/I9GZAQ 
 
Link:https://gitee.com/openeuler/kernel/pulls/7916

 

Reviewed-by: default avatarZhang Jianhua <chris.zjh@huawei.com>
Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Reviewed-by: default avatarzhangyi (F) <yi.zhang@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents d07e4530 aab27d37
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -162,6 +162,8 @@ CONFIG_FAIR_GROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
CONFIG_SCHED_TASK_RELATIONSHIP=y
CONFIG_QOS_SCHED_NUMA_ICON=y
CONFIG_QOS_SCHED_SMART_GRID=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_RDMA=y
@@ -234,7 +236,7 @@ CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_BASE_RELATIVE=y
# CONFIG_BPF_LSM is not set
# CONFIG_BPF_SCHED is not set
CONFIG_BPF_SCHED=y
CONFIG_BPF_SYSCALL=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_BPF_JIT_ALWAYS_ON=y
+2 −0
Original line number Diff line number Diff line
@@ -167,6 +167,8 @@ CONFIG_FAIR_GROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y
# CONFIG_SCHED_TASK_RELATIONSHIP is not set
# CONFIG_QOS_SCHED_NUMA_ICON is not set
# CONFIG_QOS_SCHED_SMART_GRID is not set
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_RDMA=y
+2 −0
Original line number Diff line number Diff line
@@ -38,6 +38,7 @@
#include <linux/sched/coredump.h>
#include <linux/sched/signal.h>
#include <linux/sched/numa_balancing.h>
#include <linux/sched/relationship.h>
#include <linux/sched/task.h>
#include <linux/pagemap.h>
#include <linux/perf_event.h>
@@ -1822,6 +1823,7 @@ static int bprm_execve(struct linux_binprm *bprm,
	rseq_execve(current);
	acct_update_integrals(current);
	task_numa_free(current, false);
	task_relationship_free(current, true);
	return retval;

out:
+57 −1
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/resource.h>
#include <linux/latencytop.h>
#include <linux/sched/prio.h>
#include <linux/sched/relationship.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
#include <linux/mm_types_task.h>
@@ -1437,11 +1438,15 @@ struct task_struct {
	KABI_USE(7, void *pf_io_worker)
#if defined(CONFIG_QOS_SCHED_DYNAMIC_AFFINITY) && !defined(__GENKSYMS__)
	KABI_USE(8, cpumask_t *prefer_cpus)
	KABI_USE(9, const cpumask_t *select_cpus)
#else
	KABI_RESERVE(8)
#endif
#if defined(CONFIG_TASK_PLACEMENT_BY_CPU_RANGE) && !defined(__GENKSYMS__)
	KABI_USE(9, const cpumask_t *select_cpus)
#else
	KABI_RESERVE(9)
#endif

#if (defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)) && defined(CONFIG_X86)
	KABI_USE(10, unsigned int sequential_io)
	KABI_USE(11, unsigned int sequential_io_avg)
@@ -1464,7 +1469,11 @@ struct task_struct {
#else
	KABI_RESERVE(13)
#endif
#if defined(CONFIG_SCHED_TASK_RELATIONSHIP) && !defined(__GENKSYMS__)
	KABI_USE(14, struct task_relationship *rship)
#else
	KABI_RESERVE(14)
#endif
	KABI_RESERVE(15)
	KABI_RESERVE(16)
	KABI_AUX_PTR(task_struct)
@@ -2351,6 +2360,21 @@ struct bpf_sched_cpu_stats {
	KABI_RESERVE(4)
};

struct bpf_node_stats {
	unsigned long util;
	unsigned long compute_capacity;
	unsigned int weight;

	KABI_RESERVE(1)
	KABI_RESERVE(2)
	KABI_RESERVE(3)
	KABI_RESERVE(4)
	KABI_RESERVE(5)
	KABI_RESERVE(6)
	KABI_RESERVE(7)
	KABI_RESERVE(8)
};

struct cpumask_op_args {
	unsigned int op_type;
	void *arg1;
@@ -2374,6 +2398,28 @@ enum cpumask_op_type {
	CPUMASK_CPULIST_PARSE
};

enum nodemask_op_type {
	NODEMASK_EMPTY,
	NODEMASK_NODE_ISSET,
	NODEMASK_NODES_CLEAR,
	NODEMASK_NODE_SET,
	NODEMASK_NODE_CLEAR,
	NODEMASK_NODELIST_PARSE,
	NODEMASK_TO_CPUMASK,
	NODEMASK_NODES_ANDNOT,
	NODEMASK_NODES_AND,
	NODEMASK_NODES_OR,
	NODEMASK_WEIGHT,
	NODEMASK_ONLINE
};

struct nodemask_op_args {
	enum nodemask_op_type op_type;
	void *arg1;
	void *arg2;
	void *arg3;
};

struct sched_migrate_ctx {
	struct task_struct *task;
	struct cpumask *select_idle_mask;
@@ -2402,5 +2448,15 @@ struct sched_affine_ctx {
	KABI_RESERVE(3)
	KABI_RESERVE(4)
};

struct sched_migrate_node {
	int src_cpu;
	int dst_cpu;

	KABI_RESERVE(1)
	KABI_RESERVE(2)
	KABI_RESERVE(3)
	KABI_RESERVE(4)
};
#endif
#endif
+202 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_SCHED_RELATIONSHIP_H
#define _LINUX_SCHED_RELATIONSHIP_H

#include <linux/nodemask.h>
#include <linux/jump_label.h>
#include <linux/refcount.h>
#include <uapi/linux/sched_ctrl.h>

#define FAULT_NODES_MAX 4

struct task_struct;
struct rq;

#ifdef CONFIG_SCHED_DEBUG
struct seq_file;
#endif

struct fault_array_info {
	int nid;
	unsigned long val;
};

struct relationship_comm {
	int nr_tasks;
	int gid;
	nodemask_t preferred_node;
};

struct bpf_net_relationship {
	struct relationship_comm comm;
	unsigned long grp_rxtx_bytes;
	unsigned long grp_remote_rxtx_bytes;
};

struct bpf_mm_relationship {
	struct relationship_comm comm;
	unsigned long grp_total_faults;
	struct fault_array_info grp_faults_ordered[FAULT_NODES_MAX];
	struct fault_array_info grp_score_ordered[FAULT_NODES_MAX];
};

struct bpf_relationship_get_args {
	struct bpf_mm_relationship mm;
	struct bpf_net_relationship net;
};

struct bpf_relationship_set_args {
	nodemask_t preferred_node;
};

struct relationship_hdr {
	refcount_t refcount;
	spinlock_t lock;
	int nr_tasks;
	int gid;
	nodemask_t preferred_nid;
};

enum net_req_type {
	NET_RS_TYPE_INVALID = 0,
	NET_RS_TYPE_LOCAL,
	NET_RS_TYPE_RX,
	NET_RS_TYPE_TX,
	NET_RS_TYPE_MAX
};

struct net_relationship_req {
	enum net_req_type net_rship_type;
	pid_t rx_pid;
	pid_t tx_pid;
	int nic_nid;
	int rx_dev_idx;
	int rx_dev_queue_idx;
	u64 rx_dev_netns_cookie;
	unsigned long rxtx_bytes;

	/* reserved */
	unsigned long rxtx_cnt;
};

struct net_relationship_callback {
	struct callback_head twork;
	atomic_t active;
	pid_t src_pid;
	struct net_relationship_req req;
};

struct net_group {
	struct rcu_head rcu;
	struct relationship_hdr hdr;
	unsigned long rxtx_bytes;

	/* reserved */
	unsigned long rxtx_cnt;
};

struct numa_fault_ext {
	struct fault_array_info faults_ordered[FAULT_NODES_MAX];
};

struct task_relationship {
	/* network relationship */
	struct net_group __rcu *net_group;
	spinlock_t net_lock;
	int nic_nid;
	int rx_dev_idx;
	int rx_dev_queue_idx;
	unsigned long rx_dev_netns_cookie;
	unsigned long rxtx_remote_bytes;
	unsigned long rxtx_remote_update_next;
	unsigned long rxtx_remote_buffer;
	unsigned long rxtx_bytes;
	unsigned long rxtx_buffer;
	unsigned long rxtx_update_next;
	struct net_relationship_callback cb;

	/* extras numa fault data */
	struct numa_fault_ext faults;

#ifdef CONFIG_NUMA_BALANCING
	/* preferred nodes adjust */
	u64 node_stamp;
	struct callback_head node_work;
#endif
};

#ifdef CONFIG_BPF_SCHED
struct sched_preferred_node_ctx {
	struct task_struct *tsk;
	nodemask_t preferred_node;

	KABI_RESERVE(1)
	KABI_RESERVE(2)
	KABI_RESERVE(3)
	KABI_RESERVE(4)
};
#endif

extern void task_relationship_enable(void);
extern void task_relationship_disable(void);

#ifdef CONFIG_SCHED_DEBUG
extern void sched_show_relationship(struct task_struct *p, struct seq_file *m);
#endif

#ifdef CONFIG_SCHED_TASK_RELATIONSHIP
extern int sched_relationship_fork(struct task_struct *p);
extern void sched_relationship_free(struct task_struct *p);
void task_relationship_free(struct task_struct *tsk, bool reset);
extern bool task_relationship_supported(struct task_struct *tsk);
extern int sched_net_relationship_submit(struct net_relationship_req *req);
extern void
sctl_sched_get_net_relationship(struct task_struct *tsk,
				struct sctl_net_relationship_info *info);
extern void
sctl_sched_get_mem_relationship(struct task_struct *tsk,
				struct sctl_mem_relationship_info *info);
extern void sched_get_mm_relationship(struct task_struct *tsk,
			       struct bpf_relationship_get_args *args);
extern void sched_get_relationship(struct task_struct *tsk,
				   struct bpf_relationship_get_args *args);
extern void numa_faults_update_and_sort(int nid, int new,
					  struct fault_array_info *stats);
extern void task_tick_relationship(struct rq *rq, struct task_struct *curr);

extern void task_preferred_node_work(struct callback_head *work);
extern void
sched_set_curr_preferred_node(struct bpf_relationship_set_args *args);

DECLARE_STATIC_KEY_FALSE(__relationship_switch);
static inline bool task_relationship_used(void)
{
	return static_branch_unlikely(&__relationship_switch);
}
#else
static inline bool task_relationship_used(void)
{
	return false;
}

static inline int sched_relationship_fork(struct task_struct *p)
{
	return 0;
}

static inline void sched_relationship_free(struct task_struct *p) {}

static inline void
task_relationship_free(struct task_struct *tsk, bool reset) {}

static inline int
sched_net_relationship_submit(struct net_relationship_req *req)
{
	return 0;
}

static inline void
task_tick_relationship(struct rq *rq, struct task_struct *curr) {}
#endif

#endif
Loading