Commit 586b222d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - Allow unprivileged PSI poll()ing

 - Fix performance regression introduced by mm_cid

 - Improve livepatch stalls by adding livepatch task switching to
   cond_resched(). This resolves livepatching busy-loop stalls with
   certain CPU-bound kthreads

 - Improve sched_move_task() performance on autogroup configs

 - On core-scheduling CPUs, avoid selecting throttled tasks to run

 - Misc cleanups, fixes and improvements

* tag 'sched-core-2023-04-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/clock: Fix local_clock() before sched_clock_init()
  sched/rt: Fix bad task migration for rt tasks
  sched: Fix performance regression introduced by mm_cid
  sched/core: Make sched_dynamic_mutex static
  sched/psi: Allow unprivileged polling of N*2s period
  sched/psi: Extract update_triggers side effect
  sched/psi: Rename existing poll members in preparation
  sched/psi: Rearrange polling code in preparation
  sched/fair: Fix inaccurate tally of ttwu_move_affine
  vhost: Fix livepatch timeouts in vhost_worker()
  livepatch,sched: Add livepatch task switching to cond_resched()
  livepatch: Skip task_call_func() for current task
  livepatch: Convert stack entries array to percpu
  sched: Interleave cfs bandwidth timers for improved single thread performance at low utilization
  sched/core: Reduce cost of sched_move_task when config autogroup
  sched/core: Avoid selecting the task that is throttled to run when core-sched enable
  sched/topology: Make sched_energy_mutex,update static
parents 7c339778 f31dcb15
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -105,6 +105,10 @@ prevent overly frequent polling. Max limit is chosen as a high enough number
after which monitors are most likely not needed and psi averages can be used
instead.

Unprivileged users can also create monitors, with the only limitation that the
window size must be a multiple of 2s, in order to prevent excessive resource
usage.

When activated, psi monitor stays active for at least the duration of one
tracking window to avoid repeated activations/deactivations when system is
bouncing in and out of the stall state.
+1 −2
Original line number Diff line number Diff line
@@ -361,8 +361,7 @@ static int vhost_worker(void *data)
			kcov_remote_start_common(worker->kcov_handle);
			work->fn(work);
			kcov_remote_stop();
			if (need_resched())
				schedule();
			cond_resched();
		}
	}

+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@
#include <linux/ftrace.h>
#include <linux/completion.h>
#include <linux/list.h>
#include <linux/livepatch_sched.h>

#if IS_ENABLED(CONFIG_LIVEPATCH)

+29 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _LINUX_LIVEPATCH_SCHED_H_
#define _LINUX_LIVEPATCH_SCHED_H_

#include <linux/jump_label.h>
#include <linux/static_call_types.h>

#ifdef CONFIG_LIVEPATCH

void __klp_sched_try_switch(void);

#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)

DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key);

static __always_inline void klp_sched_try_switch(void)
{
	if (static_branch_unlikely(&klp_sched_try_switch_key))
		__klp_sched_try_switch();
}

#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */

#else /* !CONFIG_LIVEPATCH */
static inline void klp_sched_try_switch(void) {}
static inline void __klp_sched_try_switch(void) {}
#endif /* CONFIG_LIVEPATCH */

#endif /* _LINUX_LIVEPATCH_SCHED_H_ */
+74 −8
Original line number Diff line number Diff line
@@ -573,6 +573,13 @@ struct vm_area_struct {
	struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
} __randomize_layout;

#ifdef CONFIG_SCHED_MM_CID
struct mm_cid {
	u64 time;
	int cid;
};
#endif

struct kioctx_table;
struct mm_struct {
	struct {
@@ -623,15 +630,19 @@ struct mm_struct {
		atomic_t mm_count;
#ifdef CONFIG_SCHED_MM_CID
		/**
		 * @cid_lock: Protect cid bitmap updates vs lookups.
		 * @pcpu_cid: Per-cpu current cid.
		 *
		 * Prevent situations where updates to the cid bitmap happen
		 * concurrently with lookups. Those can lead to situations
		 * where a lookup cannot find a free bit simply because it was
		 * unlucky enough to load, non-atomically, bitmap words as they
		 * were being concurrently updated by the updaters.
		 * Keep track of the currently allocated mm_cid for each cpu.
		 * The per-cpu mm_cid values are serialized by their respective
		 * runqueue locks.
		 */
		raw_spinlock_t cid_lock;
		struct mm_cid __percpu *pcpu_cid;
		/*
		 * @mm_cid_next_scan: Next mm_cid scan (in jiffies).
		 *
		 * When the next mm_cid scan is due (in jiffies).
		 */
		unsigned long mm_cid_next_scan;
#endif
#ifdef CONFIG_MMU
		atomic_long_t pgtables_bytes;	/* size of all page tables */
@@ -899,6 +910,37 @@ static inline void vma_iter_init(struct vma_iterator *vmi,
}

#ifdef CONFIG_SCHED_MM_CID

enum mm_cid_state {
	MM_CID_UNSET = -1U,		/* Unset state has lazy_put flag set. */
	MM_CID_LAZY_PUT = (1U << 31),
};

static inline bool mm_cid_is_unset(int cid)
{
	return cid == MM_CID_UNSET;
}

static inline bool mm_cid_is_lazy_put(int cid)
{
	return !mm_cid_is_unset(cid) && (cid & MM_CID_LAZY_PUT);
}

static inline bool mm_cid_is_valid(int cid)
{
	return !(cid & MM_CID_LAZY_PUT);
}

static inline int mm_cid_set_lazy_put(int cid)
{
	return cid | MM_CID_LAZY_PUT;
}

static inline int mm_cid_clear_lazy_put(int cid)
{
	return cid & ~MM_CID_LAZY_PUT;
}

/* Accessor for struct mm_struct's cidmask. */
static inline cpumask_t *mm_cidmask(struct mm_struct *mm)
{
@@ -912,16 +954,40 @@ static inline cpumask_t *mm_cidmask(struct mm_struct *mm)

static inline void mm_init_cid(struct mm_struct *mm)
{
	raw_spin_lock_init(&mm->cid_lock);
	int i;

	for_each_possible_cpu(i) {
		struct mm_cid *pcpu_cid = per_cpu_ptr(mm->pcpu_cid, i);

		pcpu_cid->cid = MM_CID_UNSET;
		pcpu_cid->time = 0;
	}
	cpumask_clear(mm_cidmask(mm));
}

static inline int mm_alloc_cid(struct mm_struct *mm)
{
	mm->pcpu_cid = alloc_percpu(struct mm_cid);
	if (!mm->pcpu_cid)
		return -ENOMEM;
	mm_init_cid(mm);
	return 0;
}

static inline void mm_destroy_cid(struct mm_struct *mm)
{
	free_percpu(mm->pcpu_cid);
	mm->pcpu_cid = NULL;
}

static inline unsigned int mm_cid_size(void)
{
	return cpumask_size();
}
#else /* CONFIG_SCHED_MM_CID */
static inline void mm_init_cid(struct mm_struct *mm) { }
static inline int mm_alloc_cid(struct mm_struct *mm) { return 0; }
static inline void mm_destroy_cid(struct mm_struct *mm) { }
static inline unsigned int mm_cid_size(void)
{
	return 0;
Loading