Merge tag 'sched-urgent-2020-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip (590680d1) · Commits · EulixOS / Software / Kernel

kernel/sched/core.c

+2 −8

Original line number	Diff line number	Diff line
		@@ -2119,12 +2119,6 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
		return cpu;
		}

		static void update_avg(u64 *avg, u64 sample)
		{
		s64 diff = sample - *avg;
		*avg += diff >> 3;
		}

		void sched_set_stop_task(int cpu, struct task_struct *stop)
		{
		struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
		@@ -4126,7 +4120,8 @@ static inline void sched_submit_work(struct task_struct *tsk)
		* it wants to wake up a task to maintain concurrency.
		* As this function is called inside the schedule() context,
		* we disable preemption to avoid it calling schedule() again
		* in the possible wakeup of a kworker.
		* in the possible wakeup of a kworker and because wq_worker_sleeping()
		* requires it.
		*/
		if (tsk->flags & (PF_WQ_WORKER \| PF_IO_WORKER)) {
		preempt_disable();
		@@ -6699,7 +6694,6 @@ void __init sched_init(void)

		rq_attach_root(rq, &def_root_domain);
		#ifdef CONFIG_NO_HZ_COMMON
		rq->last_load_update_tick = jiffies;
		rq->last_blocked_load_update_tick = jiffies;
		atomic_set(&rq->nohz_flags, 0);
		#endif

kernel/sched/debug.c

+18 −26

Original line number	Diff line number	Diff line
		@@ -816,10 +816,12 @@ static int __init init_sched_debug_procfs(void)

		__initcall(init_sched_debug_procfs);

		#define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
		#define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
		#define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
		#define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
		#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
		#define __P(F) __PS(#F, F)
		#define P(F) __PS(#F, p->F)
		#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
		#define __PN(F) __PSN(#F, F)
		#define PN(F) __PSN(#F, p->F)


		#ifdef CONFIG_NUMA_BALANCING
		@@ -868,18 +870,9 @@ void proc_sched_show_task(struct task_struct p, struct pid_namespace ns,
		SEQ_printf(m,
		"---------------------------------------------------------"
		"----------\n");
		#define __P(F) \
		SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
		#define P(F) \
		SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
		#define P_SCHEDSTAT(F) \
		SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)schedstat_val(p->F))
		#define __PN(F) \
		SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
		#define PN(F) \
		SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
		#define PN_SCHEDSTAT(F) \
		SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(p->F)))

		#define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
		#define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))

		PN(se.exec_start);
		PN(se.vruntime);
		@@ -939,10 +932,8 @@ void proc_sched_show_task(struct task_struct p, struct pid_namespace ns,
		}

		__P(nr_switches);
		SEQ_printf(m, "%-45s:%21Ld\n",
		"nr_voluntary_switches", (long long)p->nvcsw);
		SEQ_printf(m, "%-45s:%21Ld\n",
		"nr_involuntary_switches", (long long)p->nivcsw);
		__PS("nr_voluntary_switches", p->nvcsw);
		__PS("nr_involuntary_switches", p->nivcsw);

		P(se.load.weight);
		#ifdef CONFIG_SMP
		@@ -955,6 +946,12 @@ void proc_sched_show_task(struct task_struct p, struct pid_namespace ns,
		P(se.avg.last_update_time);
		P(se.avg.util_est.ewma);
		P(se.avg.util_est.enqueued);
		#endif
		#ifdef CONFIG_UCLAMP_TASK
		__PS("uclamp.min", p->uclamp[UCLAMP_MIN].value);
		__PS("uclamp.max", p->uclamp[UCLAMP_MAX].value);
		__PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN));
		__PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX));
		#endif
		P(policy);
		P(prio);
		@@ -963,11 +960,7 @@ void proc_sched_show_task(struct task_struct p, struct pid_namespace ns,
		P(dl.deadline);
		}
		#undef PN_SCHEDSTAT
		#undef PN
		#undef __PN
		#undef P_SCHEDSTAT
		#undef P
		#undef __P

		{
		unsigned int this_cpu = raw_smp_processor_id();
		@@ -975,8 +968,7 @@ void proc_sched_show_task(struct task_struct p, struct pid_namespace ns,

		t0 = cpu_clock(this_cpu);
		t1 = cpu_clock(this_cpu);
		SEQ_printf(m, "%-45s:%21Ld\n",
		"clock-delta", (long long)(t1-t0));
		__PS("clock-delta", t1-t0);
		}

		sched_show_numa(p, m);

kernel/sched/fair.c

+21 −25

Original line number	Diff line number	Diff line
		@@ -4836,11 +4836,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
		resched_curr(rq);
		}

		static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
		static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
		{
		struct cfs_rq *cfs_rq;
		u64 runtime;
		u64 starting_runtime = remaining;
		u64 runtime, remaining = 1;

		rcu_read_lock();
		list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
		@@ -4855,10 +4854,13 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
		/* By the above check, this should never be true */
		SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);

		raw_spin_lock(&cfs_b->lock);
		runtime = -cfs_rq->runtime_remaining + 1;
		if (runtime > remaining)
		runtime = remaining;
		remaining -= runtime;
		if (runtime > cfs_b->runtime)
		runtime = cfs_b->runtime;
		cfs_b->runtime -= runtime;
		remaining = cfs_b->runtime;
		raw_spin_unlock(&cfs_b->lock);

		cfs_rq->runtime_remaining += runtime;

		@@ -4873,8 +4875,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
		break;
		}
		rcu_read_unlock();

		return starting_runtime - remaining;
		}

		/*
		@@ -4885,7 +4885,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining)
		*/
		static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
		{
		u64 runtime;
		int throttled;

		/* no need to continue the timer with no bandwidth constraint */
		@@ -4914,24 +4913,17 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, u
		cfs_b->nr_throttled += overrun;

		/*
		* This check is repeated as we are holding onto the new bandwidth while
		* we unthrottle. This can potentially race with an unthrottled group
		* trying to acquire new bandwidth from the global pool. This can result
		* in us over-using our runtime if it is all used during this loop, but
		* only by limited amounts in that extreme case.
		* This check is repeated as we release cfs_b->lock while we unthrottle.
		*/
		while (throttled && cfs_b->runtime > 0 && !cfs_b->distribute_running) {
		runtime = cfs_b->runtime;
		cfs_b->distribute_running = 1;
		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
		/* we can't nest cfs_b->lock while distributing bandwidth */
		runtime = distribute_cfs_runtime(cfs_b, runtime);
		distribute_cfs_runtime(cfs_b);
		raw_spin_lock_irqsave(&cfs_b->lock, flags);

		cfs_b->distribute_running = 0;
		throttled = !list_empty(&cfs_b->throttled_cfs_rq);

		lsub_positive(&cfs_b->runtime, runtime);
		}

		/*
		@@ -5065,10 +5057,9 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
		if (!runtime)
		return;

		runtime = distribute_cfs_runtime(cfs_b, runtime);
		distribute_cfs_runtime(cfs_b);

		raw_spin_lock_irqsave(&cfs_b->lock, flags);
		lsub_positive(&cfs_b->runtime, runtime);
		cfs_b->distribute_running = 0;
		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
		}
		@@ -6080,8 +6071,7 @@ static int select_idle_cpu(struct task_struct p, struct sched_domain sd, int t
		struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
		struct sched_domain *this_sd;
		u64 avg_cost, avg_idle;
		u64 time, cost;
		s64 delta;
		u64 time;
		int this = smp_processor_id();
		int cpu, nr = INT_MAX;

		@@ -6119,9 +6109,7 @@ static int select_idle_cpu(struct task_struct p, struct sched_domain sd, int t
		}

		time = cpu_clock(this) - time;
		cost = this_sd->avg_scan_cost;
		delta = (s64)(time - cost) / 8;
		this_sd->avg_scan_cost += delta;
		update_avg(&this_sd->avg_scan_cost, time);

		return cpu;
		}
		@@ -9048,6 +9036,14 @@ static inline void calculate_imbalance(struct lb_env env, struct sd_lb_stats s

		sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
		sds->total_capacity;
		/*
		* If the local group is more loaded than the selected
		* busiest group don't try to pull any tasks.
		*/
		if (local->avg_load >= busiest->avg_load) {
		env->imbalance = 0;
		return;
		}
		}

		/*

kernel/sched/sched.h

+6 −1

Original line number	Diff line number	Diff line
		@@ -195,6 +195,12 @@ static inline int task_has_dl_policy(struct task_struct *p)

		#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)

		static inline void update_avg(u64 *avg, u64 sample)
		{
		s64 diff = sample - *avg;
		*avg += diff / 8;
		}

		/*
		* !! For sched_setattr_nocheck() (kernel) only !!
		*
		@@ -882,7 +888,6 @@ struct rq {
		#endif
		#ifdef CONFIG_NO_HZ_COMMON
		#ifdef CONFIG_SMP
		unsigned long last_load_update_tick;
		unsigned long last_blocked_load_update_tick;
		unsigned int has_blocked_load;
		#endif /* CONFIG_SMP */

kernel/workqueue.c

+4 −2

Original line number	Diff line number	Diff line
		@@ -858,7 +858,8 @@ void wq_worker_running(struct task_struct *task)
		* @task: task going to sleep
		*
		* This function is called from schedule() when a busy worker is
		* going to sleep.
		* going to sleep. Preemption needs to be disabled to protect ->sleeping
		* assignment.
		*/
		void wq_worker_sleeping(struct task_struct *task)
		{
		@@ -875,7 +876,8 @@ void wq_worker_sleeping(struct task_struct *task)

		pool = worker->pool;

		if (WARN_ON_ONCE(worker->sleeping))
		/* Return if preempted before wq_worker_running() was reached */
		if (worker->sleeping)
		return;

		worker->sleeping = 1;