Unverified Commit d84571e9 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!2987 fix CFS bandwidth vs. hrtimer self deadlock

Merge Pull Request from: @ci-robot 
 
PR sync from: Yu Liao <liaoyu15@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5GSY2W6Y6MYZYO7QMQR6QPSDWZNM2RIF/ 
Fix the following issue:

	CPU1      		         CPU2                            CPU3

T1 sets cfs_quota
   starts hrtimer cfs_bandwidth 'period_timer'
T1 is migrated to CPU3
					T2(worker thread) initiates
					offlining of CPU1
Hotplug operation starts
  ...
'period_timer' expires and is
re-enqueued on CPU1
  ...
take_cpu_down()
  CPU1 shuts down and does not handle timers
  anymore. They have to be migrated in the
  post dead hotplug steps by the control task.

					T2(worker thread) runs the
					post dead offline operation
									T1 holds lockA
									T1 is scheduled out
									//throttled by CFS bandwidth control
									T1 waits for 'period_timer' to expire
					T2(worker thread) waits for lockA

T1 waits there forever if it is scheduled out before it can execute the
hrtimer offline callback hrtimers_dead_cpu().
Thus T2 waits for lockA forever.


Thomas Gleixner (1):
  hrtimers: Push pending hrtimers away from outgoing CPU earlier

Yu Liao (1):
  cpu/hotplug: fix kabi breakage in enum cpuhp_state


-- 
2.25.1
 
https://gitee.com/openeuler/kernel/issues/I8JEVI 
 
Link:https://gitee.com/openeuler/kernel/pulls/2987

 

Reviewed-by: default avatarXiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents e4fc2c35 6fc275da
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -543,9 +543,9 @@ extern void sysrq_timer_list_show(void);

int hrtimers_prepare_cpu(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
int hrtimers_dead_cpu(unsigned int cpu);
int hrtimers_cpu_dying(unsigned int cpu);
#else
#define hrtimers_dead_cpu	NULL
static inline int hrtimers_cpu_dying(unsigned int cpu) { return 0; }
#endif

#endif
+1 −0
Original line number Diff line number Diff line
@@ -278,5 +278,6 @@ int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par,
int smpcfd_prepare_cpu(unsigned int cpu);
int smpcfd_dead_cpu(unsigned int cpu);
int smpcfd_dying_cpu(unsigned int cpu);
int smpcfd_and_hrtimer_dying_cpu(unsigned int cpu);

#endif /* __LINUX_SMP_H */
+15 −2
Original line number Diff line number Diff line
@@ -1614,7 +1614,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
	[CPUHP_HRTIMERS_PREPARE] = {
		.name			= "hrtimers:prepare",
		.startup.single		= hrtimers_prepare_cpu,
		.teardown.single	= hrtimers_dead_cpu,
		.teardown.single	= NULL,
	},
	[CPUHP_SMPCFD_PREPARE] = {
		.name			= "smpcfd:prepare",
@@ -1676,11 +1676,24 @@ static struct cpuhp_step cpuhp_hp_states[] = {
		.startup.single		= NULL,
		.teardown.single	= rcutree_dying_cpu,
	},
	/*
	 * In order to fix the kabi breakage, we had to move the hrtimers:dying
	 * step into smpcfd:dying and create a new function smpcfd_and_hrtimer_dying_cpu().
	 * Please ensure that there are no other steps with teardown handler
	 * between smpcfd:dying and cpu:teardown.
	 */
	[CPUHP_AP_SMPCFD_DYING] = {
		.name			= "smpcfd:dying",
		.startup.single		= NULL,
		.teardown.single	= smpcfd_dying_cpu,
		.teardown.single	= smpcfd_and_hrtimer_dying_cpu,
	},

	/*
	 * Attention: Please do not add steps between smpcfd:dying
	 * and ap:online. Please refer to the above for specific
	 * reasons.
	 */

	/* Entry state on starting. Interrupts enabled from here on. Transient
	 * state for synchronsization */
	[CPUHP_AP_ONLINE] = {
+8 −0
Original line number Diff line number Diff line
@@ -75,6 +75,14 @@ int smpcfd_dead_cpu(unsigned int cpu)
	return 0;
}

int smpcfd_and_hrtimer_dying_cpu(unsigned int cpu)
{
	hrtimers_cpu_dying(cpu);
	smpcfd_dying_cpu(cpu);

	return 0;
}

int smpcfd_dying_cpu(unsigned int cpu)
{
	/*
+12 −21
Original line number Diff line number Diff line
@@ -2114,29 +2114,22 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
	}
}

int hrtimers_dead_cpu(unsigned int scpu)
int hrtimers_cpu_dying(unsigned int dying_cpu)
{
	struct hrtimer_cpu_base *old_base, *new_base;
	int i;
	int i, ncpu = cpumask_first(cpu_active_mask);

	BUG_ON(cpu_online(scpu));
	tick_cancel_sched_timer(scpu);
	tick_cancel_sched_timer(dying_cpu);

	old_base = this_cpu_ptr(&hrtimer_bases);
	new_base = &per_cpu(hrtimer_bases, ncpu);

	/*
	 * this BH disable ensures that raise_softirq_irqoff() does
	 * not wakeup ksoftirqd (and acquire the pi-lock) while
	 * holding the cpu_base lock
	 */
	local_bh_disable();
	local_irq_disable();
	old_base = &per_cpu(hrtimer_bases, scpu);
	new_base = this_cpu_ptr(&hrtimer_bases);
	/*
	 * The caller is globally serialized and nobody else
	 * takes two locks at once, deadlock is not possible.
	 */
	raw_spin_lock(&new_base->lock);
	raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
	raw_spin_lock(&old_base->lock);
	raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING);

	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
		migrate_hrtimer_list(&old_base->clock_base[i],
@@ -2147,15 +2140,13 @@ int hrtimers_dead_cpu(unsigned int scpu)
	 * The migration might have changed the first expiring softirq
	 * timer on this CPU. Update it.
	 */
	hrtimer_update_softirq_timer(new_base, false);
	__hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT);
	/* Tell the other CPU to retrigger the next event */
	smp_call_function_single(ncpu, retrigger_next_event, NULL, 0);

	raw_spin_unlock(&old_base->lock);
	raw_spin_unlock(&new_base->lock);
	raw_spin_unlock(&old_base->lock);

	/* Check, if we got expired work to do */
	__hrtimer_peek_ahead_timers();
	local_irq_enable();
	local_bh_enable();
	return 0;
}