Commit e92304a8 authored by Oleg Nesterov's avatar Oleg Nesterov Committed by Guo Mengqi
Browse files

fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats

mainline inclusion
from mainline-v6.8-rc4
commit 7601df8031fd67310af891897ef6cc0df4209305
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9E2EL
CVE: CVE-2024-26686

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=7601df8031fd67310af891897ef6cc0df4209305

--------------------------------

lock_task_sighand() can trigger a hard lockup.  If NR_CPUS threads call
do_task_stat() at the same time and the process has NR_THREADS, it will
spin with irqs disabled O(NR_CPUS * NR_THREADS) time.

Change do_task_stat() to use sig->stats_lock to gather the statistics
outside of ->siglock protected section, in the likely case this code will
run lockless.

Link: https://lkml.kernel.org/r/20240123153357.GA21857@redhat.com


Signed-off-by: default avatarOleg Nesterov <oleg@redhat.com>
Signed-off-by: default avatarDylan Hatch <dylanbhatch@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarZhao Wenhui <zhaowenhui8@huawei.com>
parent 3ce3b998
Loading
Loading
Loading
Loading
+32 −26
Original line number Diff line number Diff line
@@ -456,13 +456,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
	int permitted;
	struct mm_struct *mm;
	unsigned long long start_time;
	unsigned long cmin_flt = 0, cmaj_flt = 0;
	unsigned long  min_flt = 0,  maj_flt = 0;
	u64 cutime, cstime, utime, stime;
	u64 cgtime, gtime;
	unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
	u64 cutime, cstime, cgtime, utime, stime, gtime;
	unsigned long rsslim = 0;
	unsigned long flags;
	int exit_code = task->exit_code;
	struct signal_struct *sig = task->signal;
	unsigned int seq = 1;

	state = *get_task_state(task);
	vsize = eip = esp = 0;
@@ -490,12 +490,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,

	sigemptyset(&sigign);
	sigemptyset(&sigcatch);
	cutime = cstime = 0;
	cgtime = gtime = 0;

	if (lock_task_sighand(task, &flags)) {
		struct signal_struct *sig = task->signal;

		if (sig->tty) {
			struct pid *pgrp = tty_get_pgrp(sig->tty);
			tty_pgrp = pid_nr_ns(pgrp, ns);
@@ -506,27 +502,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
		num_threads = get_nr_threads(task);
		collect_sigign_sigcatch(task, &sigign, &sigcatch);

		cmin_flt = sig->cmin_flt;
		cmaj_flt = sig->cmaj_flt;
		cutime = sig->cutime;
		cstime = sig->cstime;
		cgtime = sig->cgtime;
		rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);

		/* add up live thread stats at the group level */
		if (whole) {
			struct task_struct *t;

			__for_each_thread(sig, t) {
				min_flt += t->min_flt;
				maj_flt += t->maj_flt;
				gtime += task_gtime(t);
			}

			min_flt += sig->min_flt;
			maj_flt += sig->maj_flt;
			gtime += sig->gtime;

			if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
				exit_code = sig->group_exit_code;
		}
@@ -541,6 +519,34 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
	if (permitted && (!whole || num_threads < 2))
		wchan = get_wchan(task);

	do {
		seq++; /* 2 on the 1st/lockless path, otherwise odd */
		flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);

		cmin_flt = sig->cmin_flt;
		cmaj_flt = sig->cmaj_flt;
		cutime = sig->cutime;
		cstime = sig->cstime;
		cgtime = sig->cgtime;

		if (whole) {
			struct task_struct *t;

			min_flt = sig->min_flt;
			maj_flt = sig->maj_flt;
			gtime = sig->gtime;

			rcu_read_lock();
			__for_each_thread(sig, t) {
				min_flt += t->min_flt;
				maj_flt += t->maj_flt;
				gtime += task_gtime(t);
			}
			rcu_read_unlock();
		}
	} while (need_seqretry(&sig->stats_lock, seq));
	done_seqretry_irqrestore(&sig->stats_lock, seq, flags);

	if (whole) {
		thread_group_cputime_adjusted(task, &utime, &stime);
	} else {