Commit 99f92594 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched-urgent-2021-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Misc fixes:

   - Fix performance regression caused by lack of intended batching of
     RCU callbacks by over-eager NOHZ-full code.

   - Fix cgroups related corruption of load_avg and load_sum metrics.

   - Three fixes to fix blocked load, util_sum/runnable_sum and util_est
     tracking bugs"

* tag 'sched-urgent-2021-06-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/fair: Fix util_est UTIL_AVG_UNCHANGED handling
  sched/pelt: Ensure that *_sum is always synced with *_avg
  tick/nohz: Only check for RCU deferred wakeup on user/guest entry when needed
  sched/fair: Make sure to update tg contrib for blocked load
  sched/fair: Keep load_avg and load_sum synced
parents 191aaf6c 68d7a190
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -3,6 +3,7 @@
#define __LINUX_ENTRYKVM_H

#include <linux/entry-common.h>
#include <linux/tick.h>

/* Transfer to guest mode work */
#ifdef CONFIG_KVM_XFER_TO_GUEST_WORK
@@ -57,7 +58,7 @@ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu);
static inline void xfer_to_guest_mode_prepare(void)
{
	lockdep_assert_irqs_disabled();
	rcu_nocb_flush_deferred_wakeup();
	tick_nohz_user_enter_prepare();
}

/**
+8 −0
Original line number Diff line number Diff line
@@ -350,11 +350,19 @@ struct load_weight {
 * Only for tasks we track a moving average of the past instantaneous
 * estimated utilization. This allows to absorb sporadic drops in utilization
 * of an otherwise almost periodic task.
 *
 * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg
 * updates. When a task is dequeued, its util_est should not be updated if its
 * util_avg has not been updated in the meantime.
 * This information is mapped into the MSB bit of util_est.enqueued at dequeue
 * time. Since max value of util_est.enqueued for a task is 1024 (PELT util_avg
 * for a task) it is safe to use MSB.
 */
struct util_est {
	unsigned int			enqueued;
	unsigned int			ewma;
#define UTIL_EST_WEIGHT_SHIFT		2
#define UTIL_AVG_UNCHANGED		0x80000000
} __attribute__((__aligned__(sizeof(u64))));

/*
+7 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@
#include <linux/context_tracking_state.h>
#include <linux/cpumask.h>
#include <linux/sched.h>
#include <linux/rcupdate.h>

#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void __init tick_init(void);
@@ -300,4 +301,10 @@ static inline void tick_nohz_task_switch(void)
		__tick_nohz_task_switch();
}

static inline void tick_nohz_user_enter_prepare(void)
{
	if (tick_nohz_full_cpu(smp_processor_id()))
		rcu_nocb_flush_deferred_wakeup();
}

#endif
+3 −2
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
#include <linux/highmem.h>
#include <linux/livepatch.h>
#include <linux/audit.h>
#include <linux/tick.h>

#include "common.h"

@@ -186,7 +187,7 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
		local_irq_disable_exit_to_user();

		/* Check if any of the above work has queued a deferred wakeup */
		rcu_nocb_flush_deferred_wakeup();
		tick_nohz_user_enter_prepare();

		ti_work = READ_ONCE(current_thread_info()->flags);
	}
@@ -202,7 +203,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
	lockdep_assert_irqs_disabled();

	/* Flush pending rcuog wakeup before the last need_resched() check */
	rcu_nocb_flush_deferred_wakeup();
	tick_nohz_user_enter_prepare();

	if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
		ti_work = exit_to_user_mode_loop(regs, ti_work);
+2 −1
Original line number Diff line number Diff line
@@ -885,6 +885,7 @@ static const struct seq_operations sched_debug_sops = {
#define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
#define __P(F) __PS(#F, F)
#define   P(F) __PS(#F, p->F)
#define   PM(F, M) __PS(#F, p->F & (M))
#define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
#define __PN(F) __PSN(#F, F)
#define   PN(F) __PSN(#F, p->F)
@@ -1011,7 +1012,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns,
	P(se.avg.util_avg);
	P(se.avg.last_update_time);
	P(se.avg.util_est.ewma);
	P(se.avg.util_est.enqueued);
	PM(se.avg.util_est.enqueued, ~UTIL_AVG_UNCHANGED);
#endif
#ifdef CONFIG_UCLAMP_TASK
	__PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value);
Loading