Commit ed3b7923 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "Scheduler SMP load-balancer improvements:

   - Avoid unnecessary migrations within SMT domains on hybrid systems.

     Problem:

        On hybrid CPU systems, (processors with a mixture of
        higher-frequency SMT cores and lower-frequency non-SMT cores),
        under the old code lower-priority CPUs pulled tasks from the
        higher-priority cores if more than one SMT sibling was busy -
        resulting in many unnecessary task migrations.

     Solution:

        The new code improves the load balancer to recognize SMT cores
        with more than one busy sibling and allows lower-priority CPUs
        to pull tasks, which avoids superfluous migrations and lets
        lower-priority cores inspect all SMT siblings for the busiest
        queue.

   - Implement the 'runnable boosting' feature in the EAS balancer:
     consider CPU contention in frequency, EAS max util & load-balance
     busiest CPU selection.

     This improves CPU utilization for certain workloads, while leaves
     other key workloads unchanged.

  Scheduler infrastructure improvements:

   - Rewrite the scheduler topology setup code by consolidating it into
     the build_sched_topology() helper function and building it
     dynamically on the fly.

   - Resolve the local_clock() vs. noinstr complications by rewriting
     the code: provide separate sched_clock_noinstr() and
     local_clock_noinstr() functions to be used in instrumentation code,
     and make sure it is all instrumentation-safe.

  Fixes:

   - Fix a kthread_park() race with wait_woken()

   - Fix misc wait_task_inactive() bugs unearthed by the -rt merge:
       - Fix UP PREEMPT bug by unifying the SMP and UP implementations
       - Fix task_struct::saved_state handling

   - Fix various rq clock update bugs, unearthed by turning on the rq
     clock debugging code.

   - Fix the PSI WINDOW_MIN_US trigger limit, which was easy to trigger
     by creating enough cgroups, by removing the warnign and restricting
     window size triggers to PSI file write-permission or
     CAP_SYS_RESOURCE.

   - Propagate SMT flags in the topology when removing degenerate domain

   - Fix grub_reclaim() calculation bug in the deadline scheduler code

   - Avoid resetting the min update period when it is unnecessary, in
     psi_trigger_destroy().

   - Don't balance a task to its current running CPU in load_balance(),
     which was possible on certain NUMA topologies with overlapping
     groups.

   - Fix the sched-debug printing of rq->nr_uninterruptible

  Cleanups:

   - Address various -Wmissing-prototype warnings, as a preparation to
     (maybe) enable this warning in the future.

   - Remove unused code

   - Mark more functions __init

   - Fix shadow-variable warnings"

* tag 'sched-core-2023-06-27' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (50 commits)
  sched/core: Avoid multiple calling update_rq_clock() in __cfsb_csd_unthrottle()
  sched/core: Avoid double calling update_rq_clock() in __balance_push_cpu_stop()
  sched/core: Fixed missing rq clock update before calling set_rq_offline()
  sched/deadline: Update GRUB description in the documentation
  sched/deadline: Fix bandwidth reclaim equation in GRUB
  sched/wait: Fix a kthread_park race with wait_woken()
  sched/topology: Mark set_sched_topology() __init
  sched/fair: Rename variable cpu_util eff_util
  arm64/arch_timer: Fix MMIO byteswap
  sched/fair, cpufreq: Introduce 'runnable boosting'
  sched/fair: Refactor CPU utilization functions
  cpuidle: Use local_clock_noinstr()
  sched/clock: Provide local_clock_noinstr()
  x86/tsc: Provide sched_clock_noinstr()
  clocksource: hyper-v: Provide noinstr sched_clock()
  clocksource: hyper-v: Adjust hv_read_tsc_page_tsc() to avoid special casing U64_MAX
  x86/vdso: Fix gettimeofday masking
  math64: Always inline u128 version of mul_u64_u64_shr()
  s390/time: Provide sched_clock_noinstr()
  loongarch: Provide noinstr sched_clock_read()
  ...
parents e8f75c02 ebb83d84
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -203,12 +203,15 @@ Deadline Task Scheduling
  - Total bandwidth (this_bw): this is the sum of all tasks "belonging" to the
    runqueue, including the tasks in Inactive state.

  - Maximum usable bandwidth (max_bw): This is the maximum bandwidth usable by
    deadline tasks and is currently set to the RT capacity.


 The algorithm reclaims the bandwidth of the tasks in Inactive state.
 It does so by decrementing the runtime of the executing task Ti at a pace equal
 to

           dq = -max{ Ui / Umax, (1 - Uinact - Uextra) } dt
           dq = -(max{ Ui, (Umax - Uinact - Uextra) } / Umax) dt

 where:

+1 −7
Original line number Diff line number Diff line
@@ -88,13 +88,7 @@ static inline notrace u64 arch_timer_read_cntvct_el0(void)

#define arch_timer_reg_read_stable(reg)					\
	({								\
		u64 _val;						\
									\
		preempt_disable_notrace();				\
		_val = erratum_handler(read_ ## reg)();			\
		preempt_enable_notrace();				\
									\
		_val;							\
		erratum_handler(read_ ## reg)();			\
	})

/*
+6 −6
Original line number Diff line number Diff line
@@ -22,13 +22,13 @@
 * Generic IO read/write.  These perform native-endian accesses.
 */
#define __raw_writeb __raw_writeb
static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
static __always_inline void __raw_writeb(u8 val, volatile void __iomem *addr)
{
	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
}

#define __raw_writew __raw_writew
static inline void __raw_writew(u16 val, volatile void __iomem *addr)
static __always_inline void __raw_writew(u16 val, volatile void __iomem *addr)
{
	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
}
@@ -40,13 +40,13 @@ static __always_inline void __raw_writel(u32 val, volatile void __iomem *addr)
}

#define __raw_writeq __raw_writeq
static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
static __always_inline void __raw_writeq(u64 val, volatile void __iomem *addr)
{
	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
}

#define __raw_readb __raw_readb
static inline u8 __raw_readb(const volatile void __iomem *addr)
static __always_inline u8 __raw_readb(const volatile void __iomem *addr)
{
	u8 val;
	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
@@ -57,7 +57,7 @@ static inline u8 __raw_readb(const volatile void __iomem *addr)
}

#define __raw_readw __raw_readw
static inline u16 __raw_readw(const volatile void __iomem *addr)
static __always_inline u16 __raw_readw(const volatile void __iomem *addr)
{
	u16 val;

@@ -80,7 +80,7 @@ static __always_inline u32 __raw_readl(const volatile void __iomem *addr)
}

#define __raw_readq __raw_readq
static inline u64 __raw_readq(const volatile void __iomem *addr)
static __always_inline u64 __raw_readq(const volatile void __iomem *addr)
{
	u64 val;
	asm volatile(ALTERNATIVE("ldr %0, [%1]",
+1 −1
Original line number Diff line number Diff line
@@ -1167,7 +1167,7 @@ static __always_inline void iocsr_write64(u64 val, u32 reg)

#ifndef __ASSEMBLY__

static inline u64 drdtime(void)
static __always_inline u64 drdtime(void)
{
	int rID = 0;
	u64 val = 0;
+3 −3
Original line number Diff line number Diff line
@@ -190,9 +190,9 @@ static u64 read_const_counter(struct clocksource *clk)
	return drdtime();
}

static u64 native_sched_clock(void)
static noinstr u64 sched_clock_read(void)
{
	return read_const_counter(NULL);
	return drdtime();
}

static struct clocksource clocksource_const = {
@@ -211,7 +211,7 @@ int __init constant_clocksource_init(void)

	res = clocksource_register_hz(&clocksource_const, freq);

	sched_clock_register(native_sched_clock, 64, freq);
	sched_clock_register(sched_clock_read, 64, freq);

	pr_info("Constant clock source device register\n");

Loading