!13805 add ebpf sched (e64d5ad2) · Commits · EulixOS / Software / Kernel

include/linux/sched.h

+20 −0

Original line number	Original line	Diff line number	Diff line
	@@ -2556,6 +2556,26 @@ struct sched_migrate_node {
	int dst_cpu;		int dst_cpu;
	int dst_node;		int dst_node;
	};		};

			struct bpf_sched_cpu_stats {
			refcount_t usage;
			unsigned int nr_running;
			unsigned int cfs_nr_running;
			unsigned int cfs_h_nr_running;
			unsigned int cfs_idle_h_nr_running;
			unsigned int rt_nr_running;
			unsigned int rr_nr_running;

			KABI_RESERVE(1)
			KABI_RESERVE(2)
			KABI_RESERVE(3)
			KABI_RESERVE(4)
			KABI_RESERVE(5)
			KABI_RESERVE(6)
			KABI_RESERVE(7)
			KABI_RESERVE(8)
			};

	#endif		#endif

	#ifdef CONFIG_SCHED_CORE		#ifdef CONFIG_SCHED_CORE

include/linux/sched_hook_defs.h

+4 −0

Original line number	Original line	Diff line number	Diff line
	@@ -2,3 +2,7 @@
	BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx)		BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx)
	BPF_SCHED_HOOK(int, -1, cfs_can_migrate_task, struct task_struct *p,		BPF_SCHED_HOOK(int, -1, cfs_can_migrate_task, struct task_struct *p,
	struct sched_migrate_node *migrate_node)		struct sched_migrate_node *migrate_node)
			BPF_SCHED_HOOK(int, -1, cfs_tag_entity_eligible, struct sched_entity *se)
			BPF_SCHED_HOOK(int, -1, cfs_tag_pick_next_entity,
			const struct sched_entity *curr,
			const struct sched_entity *next)

kernel/bpf/btf.c

+3 −0

Original line number	Original line	Diff line number	Diff line
	@@ -217,6 +217,7 @@ enum btf_kfunc_hook {
	BTF_KFUNC_HOOK_SOCKET_FILTER,		BTF_KFUNC_HOOK_SOCKET_FILTER,
	BTF_KFUNC_HOOK_LWT,		BTF_KFUNC_HOOK_LWT,
	BTF_KFUNC_HOOK_NETFILTER,		BTF_KFUNC_HOOK_NETFILTER,
			BTF_KFUNC_HOOK_SCHED,
	BTF_KFUNC_HOOK_MAX,		BTF_KFUNC_HOOK_MAX,
	};		};

	@@ -7864,6 +7865,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
	return BTF_KFUNC_HOOK_LWT;		return BTF_KFUNC_HOOK_LWT;
	case BPF_PROG_TYPE_NETFILTER:		case BPF_PROG_TYPE_NETFILTER:
	return BTF_KFUNC_HOOK_NETFILTER;		return BTF_KFUNC_HOOK_NETFILTER;
			case BPF_PROG_TYPE_SCHED:
			return BTF_KFUNC_HOOK_SCHED;
	default:		default:
	return BTF_KFUNC_HOOK_MAX;		return BTF_KFUNC_HOOK_MAX;
	}		}

kernel/sched/bpf_sched.c

+190 −0

Original line number	Original line	Diff line number	Diff line
	// SPDX-License-Identifier: GPL-2.0		// SPDX-License-Identifier: GPL-2.0
	#include <linux/bpf.h>		#include <linux/bpf.h>
			#include <linux/bpf_mem_alloc.h>
	#include <linux/cgroup.h>		#include <linux/cgroup.h>
	#include <linux/bpf_verifier.h>		#include <linux/bpf_verifier.h>
	#include <linux/bpf_sched.h>		#include <linux/bpf_sched.h>
	@@ -62,3 +63,192 @@ const struct bpf_verifier_ops bpf_sched_verifier_ops = {
	.get_func_proto = bpf_sched_func_proto,		.get_func_proto = bpf_sched_func_proto,
	.is_valid_access = btf_ctx_access,		.is_valid_access = btf_ctx_access,
	};		};

			static struct bpf_mem_alloc bpf_cpustats_ma;

			__diag_push();
			__diag_ignore_all("-Wmissing-prototypes",
			"Global functions as their definitions will be in BTF");

			/**
			* bpf_sched_cpu_stats_create() - Create a mutable BPF cpustats context.
			*
			* Allocates a cpustats context that can be queried, mutated, acquired, and
			* released by a BPF program. The cpustats context returned by this function
			* must either be embedded in a map as a kptr, or freed with
			* bpf_sched_cpu_stats_release().
			*
			* bpf_sched_cpu_stats_create() allocates memory using the BPF memory allocator,
			* and will not block. It may return NULL if no memory is available.
			*/
			__bpf_kfunc struct bpf_sched_cpu_stats *bpf_sched_cpustats_create(void)
			{
			struct bpf_sched_cpu_stats *cpustats;

			cpustats = bpf_mem_cache_alloc(&bpf_cpustats_ma);
			if (!cpustats)
			return NULL;

			memset(cpustats, 0, sizeof(*cpustats));
			refcount_set(&cpustats->usage, 1);

			return cpustats;
			}

			/**
			* bpf_sched_cpu_stats_acquire() - Acquire a reference to a BPF cpustats.
			* @cpustats: The BPF cpustats being acquired. The cpustats must be a trusted
			* pointer.
			*
			* Acquires a reference to a BPF cpustats. The cpustats returned by this
			* function must either be embedded in a map as a kptr, or freed with
			* bpf_sched_cpu_stats_release().
			*/
			__bpf_kfunc struct bpf_sched_cpu_stats *bpf_sched_cpustats_acquire(
			struct bpf_sched_cpu_stats *cpustats)
			{
			refcount_inc(&cpustats->usage);
			return cpustats;
			}

			/**
			* bpf_sched_cpustats_release() - Release a previously acquired BPF cpustats.
			* @cpustats: The cpustats being released.
			*
			* Releases a previously acquired reference to a BPF cpustats. When the final
			* reference of the BPF cpustats has been released, it is subsequently freed in
			* an RCU callback in the BPF memory allocator.
			*/
			__bpf_kfunc void
			bpf_sched_cpustats_release(struct bpf_sched_cpu_stats *cpustats)
			{
			if (!refcount_dec_and_test(&cpustats->usage))
			return;

			migrate_disable();
			bpf_mem_cache_free_rcu(&bpf_cpustats_ma, cpustats);
			migrate_enable();
			}

			/**
			* bpf_sched_cpu_stats_of() - Acquire cpu sched statistics.
			* @cpuid: CPU ID, input.
			* @ctx: The cpu statistics is being stored.
			*
			* Return:
			* 0 - Success.
			* <0 - Fail.
			*/
			__bpf_kfunc s32 bpf_sched_cpu_stats_of(int cpuid,
			struct bpf_sched_cpu_stats *ctx)
			{
			struct rq *rq;
			int cpu = cpuid;

			if (!ctx)
			return -EINVAL;

			if ((unsigned int)cpu >= nr_cpu_ids)
			return -EINVAL;

			rq = cpu_rq(cpu);
			memset(ctx, 0, sizeof(*ctx));

			SCHED_WARN_ON(!rcu_read_lock_held());
			/* nr_running */
			ctx->nr_running = rq->nr_running;
			ctx->cfs_nr_running = rq->cfs.nr_running;
			ctx->cfs_h_nr_running = rq->cfs.h_nr_running;
			ctx->cfs_idle_h_nr_running = rq->cfs.idle_h_nr_running;
			ctx->rt_nr_running = rq->rt.rt_nr_running;
			ctx->rr_nr_running = rq->rt.rr_nr_running;

			return 0;
			}

			__diag_pop();

			BTF_SET8_START(sched_cpustats_kfunc_btf_ids)
			BTF_ID_FLAGS(func, bpf_sched_cpustats_create, KF_ACQUIRE \| KF_RET_NULL)
			BTF_ID_FLAGS(func, bpf_sched_cpustats_release, KF_RELEASE)
			BTF_ID_FLAGS(func, bpf_sched_cpustats_acquire, KF_ACQUIRE \| KF_TRUSTED_ARGS)
			BTF_ID_FLAGS(func, bpf_sched_cpu_stats_of, KF_RCU)
			BTF_SET8_END(sched_cpustats_kfunc_btf_ids)

			static const struct btf_kfunc_id_set cpustats_kfunc_set = {
			.owner = THIS_MODULE,
			.set = &sched_cpustats_kfunc_btf_ids,
			};

			BTF_ID_LIST(cpustats_dtor_ids)
			BTF_ID(struct, bpf_sched_cpu_stats)
			BTF_ID(func, bpf_sched_cpustats_release)

			__bpf_kfunc int bpf_sched_entity_is_task(struct sched_entity *se)
			{
			if (!se)
			return -EINVAL;

			return entity_is_task(se);
			}

			__bpf_kfunc struct task_struct bpf_sched_entity_to_task(struct sched_entity se)
			{
			if (se && entity_is_task(se))
			return task_of(se);

			return NULL;
			}

			__bpf_kfunc long bpf_sched_tag_of_entity(struct sched_entity *se)
			{
			if (!se)
			return -EINVAL;

			if (entity_is_task(se))
			return task_of(se)->tag;

			return group_cfs_rq(se)->tg->tag;
			}

			BTF_SET8_START(sched_entity_kfunc_btf_ids)
			BTF_ID_FLAGS(func, bpf_sched_entity_is_task)
			BTF_ID_FLAGS(func, bpf_sched_entity_to_task)
			BTF_ID_FLAGS(func, bpf_sched_tag_of_entity)
			BTF_SET8_END(sched_entity_kfunc_btf_ids)

			static const struct btf_kfunc_id_set sched_entity_kfunc_set = {
			.owner = THIS_MODULE,
			.set = &sched_entity_kfunc_btf_ids,
			};

			BTF_ID_LIST(sched_entity_dtor_ids)

			static int __init bpf_kfunc_init(void)
			{
			int ret;
			const struct btf_id_dtor_kfunc cpustats_dtors[] = {
			{
			.btf_id = cpustats_dtor_ids[0],
			.kfunc_btf_id = cpustats_dtor_ids[1]
			},
			};
			const struct btf_id_dtor_kfunc sched_entity_dtors[] = {
			{
			.btf_id = sched_entity_dtor_ids[0],
			.kfunc_btf_id = sched_entity_dtor_ids[1]
			},
			};

			ret = bpf_mem_alloc_init(&bpf_cpustats_ma, sizeof(struct bpf_sched_cpu_stats), false);
			ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &cpustats_kfunc_set);
			ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED, &cpustats_kfunc_set);
			ret = ret ?: register_btf_id_dtor_kfuncs(cpustats_dtors,
			ARRAY_SIZE(cpustats_dtors),
			THIS_MODULE);
			ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED, &sched_entity_kfunc_set);
			return ret ?: register_btf_id_dtor_kfuncs(sched_entity_dtors,
			ARRAY_SIZE(sched_entity_dtors),
			THIS_MODULE);
			}
			late_initcall(bpf_kfunc_init);

kernel/sched/fair.c

+14 −0

Original line number	Original line	Diff line number	Diff line
	@@ -708,6 +708,13 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
	static inline bool entity_before(const struct sched_entity *a,		static inline bool entity_before(const struct sched_entity *a,
	const struct sched_entity *b)		const struct sched_entity *b)
	{		{
			#ifdef CONFIG_BPF_SCHED
			if (bpf_sched_enabled()) {
			if (bpf_sched_cfs_tag_pick_next_entity(a, b) == 1)
			return true;
			}
			#endif

	/*		/*
	* Tiebreak on vruntime seems unnecessary since it can		* Tiebreak on vruntime seems unnecessary since it can
	* hardly happen.		* hardly happen.
	@@ -905,6 +912,13 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)

	int entity_eligible(struct cfs_rq cfs_rq, struct sched_entity se)		int entity_eligible(struct cfs_rq cfs_rq, struct sched_entity se)
	{		{
			#ifdef CONFIG_BPF_SCHED
			if (bpf_sched_enabled()) {
			if (bpf_sched_cfs_tag_entity_eligible(se) == 1)
			return 1;
			}
			#endif

	return vruntime_eligible(cfs_rq, se->vruntime);		return vruntime_eligible(cfs_rq, se->vruntime);
	}		}