Commit 0b0894ff authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'sched_urgent_for_v5.17_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fix from Borislav Petkov:
 "Fix task exposure order when forking tasks"

* tag 'sched_urgent_for_v5.17_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched: Fix yet more sched_fork() races
parents 6e8e752f b1e82065
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -54,8 +54,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);

extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_post_fork(struct task_struct *p,
			    struct kernel_clone_args *kargs);
extern void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs);
extern void sched_post_fork(struct task_struct *p);
extern void sched_dead(struct task_struct *p);

void __noreturn do_task_dead(void);
+12 −1
Original line number Diff line number Diff line
@@ -2266,6 +2266,17 @@ static __latent_entropy struct task_struct *copy_process(
	if (retval)
		goto bad_fork_put_pidfd;

	/*
	 * Now that the cgroups are pinned, re-clone the parent cgroup and put
	 * the new task on the correct runqueue. All this *before* the task
	 * becomes visible.
	 *
	 * This isn't part of ->can_fork() because while the re-cloning is
	 * cgroup specific, it unconditionally needs to place the task on a
	 * runqueue.
	 */
	sched_cgroup_fork(p, args);

	/*
	 * From this point on we must avoid any synchronous user-space
	 * communication until we take the tasklist-lock. In particular, we do
@@ -2375,7 +2386,7 @@ static __latent_entropy struct task_struct *copy_process(
		fd_install(pidfd, pidfile);

	proc_fork_connector(p);
	sched_post_fork(p, args);
	sched_post_fork(p);
	cgroup_post_fork(p, args);
	perf_event_fork(p);

+21 −13
Original line number Diff line number Diff line
@@ -1214,9 +1214,8 @@ int tg_nop(struct task_group *tg, void *data)
}
#endif

static void set_load_weight(struct task_struct *p)
static void set_load_weight(struct task_struct *p, bool update_load)
{
	bool update_load = !(READ_ONCE(p->__state) & TASK_NEW);
	int prio = p->static_prio - MAX_RT_PRIO;
	struct load_weight *load = &p->se.load;

@@ -4407,7 +4406,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
			p->static_prio = NICE_TO_PRIO(0);

		p->prio = p->normal_prio = p->static_prio;
		set_load_weight(p);
		set_load_weight(p, false);

		/*
		 * We don't need the reset flag anymore after the fork. It has
@@ -4425,6 +4424,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)

	init_entity_runnable_average(&p->se);


#ifdef CONFIG_SCHED_INFO
	if (likely(sched_info_on()))
		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -4440,18 +4440,23 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
	return 0;
}

void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
void sched_cgroup_fork(struct task_struct *p, struct kernel_clone_args *kargs)
{
	unsigned long flags;
#ifdef CONFIG_CGROUP_SCHED
	struct task_group *tg;
#endif

	/*
	 * Because we're not yet on the pid-hash, p->pi_lock isn't strictly
	 * required yet, but lockdep gets upset if rules are violated.
	 */
	raw_spin_lock_irqsave(&p->pi_lock, flags);
#ifdef CONFIG_CGROUP_SCHED
	if (1) {
		struct task_group *tg;
		tg = container_of(kargs->cset->subsys[cpu_cgrp_id],
				  struct task_group, css);
	p->sched_task_group = autogroup_task_group(p, tg);
		tg = autogroup_task_group(p, tg);
		p->sched_task_group = tg;
	}
#endif
	rseq_migrate(p);
	/*
@@ -4462,7 +4467,10 @@ void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs)
	if (p->sched_class->task_fork)
		p->sched_class->task_fork(p);
	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
}

void sched_post_fork(struct task_struct *p)
{
	uclamp_post_fork(p);
}

@@ -6922,7 +6930,7 @@ void set_user_nice(struct task_struct *p, long nice)
		put_prev_task(rq, p);

	p->static_prio = NICE_TO_PRIO(nice);
	set_load_weight(p);
	set_load_weight(p, true);
	old_prio = p->prio;
	p->prio = effective_prio(p);

@@ -7213,7 +7221,7 @@ static void __setscheduler_params(struct task_struct *p,
	 */
	p->rt_priority = attr->sched_priority;
	p->normal_prio = normal_prio(p);
	set_load_weight(p);
	set_load_weight(p, true);
}

/*
@@ -9446,7 +9454,7 @@ void __init sched_init(void)
#endif
	}

	set_load_weight(&init_task);
	set_load_weight(&init_task, false);

	/*
	 * The boot idle thread does lazy MMU switching as well: