Commit 1afb95fe authored by Paul E. McKenney's avatar Paul E. McKenney
Browse files

torture: Maintain torture-specific set of CPUs-online books



The TREE01 rcutorture scenario intentionally creates confusion as to the
number of available CPUs by specifying the "maxcpus=8 nr_cpus=43" kernel
boot parameters.  This can disable rcutorture's load shedding, which
currently uses num_online_cpus(), which would count the extra 35 CPUs.
However, the rcutorture guest OS will be provisioned with only 8 CPUs,
which means that rcutorture will present full load even when all but one
of the original 8 CPUs are offline.  This can result in spurious errors
due to extreme overloading of that single remaining CPU.

This commit therefore keeps a separate set of books on the number of
usable online CPUs, so that torture_num_online_cpus() is used for load
shedding instead of num_online_cpus().  Note that initial sizing must
use num_online_cpus() because torture_num_online_cpus() will return
NR_CPUS until shortly after torture_onoff_init() is invoked.

Reported-by: default avatarFrederic Weisbecker <frederic@kernel.org>
[ paulmck: Apply feedback from kernel test robot. ]
Signed-off-by: default avatarPaul E. McKenney <paulmck@kernel.org>
parent 0b962c8f
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -48,6 +48,11 @@ do { \
void verbose_torout_sleep(void);

/* Definitions for online/offline exerciser. */
#ifdef CONFIG_HOTPLUG_CPU
int torture_num_online_cpus(void);
#else /* #ifdef CONFIG_HOTPLUG_CPU */
static inline int torture_num_online_cpus(void) { return 1; }
#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
typedef void torture_ofl_func(void);
bool torture_offline(int cpu, long *n_onl_attempts, long *n_onl_successes,
		     unsigned long *sum_offl, int *min_onl, int *max_onl);
+2 −2
Original line number Diff line number Diff line
@@ -1338,7 +1338,7 @@ static void rcu_torture_reader_do_mbchk(long myid, struct rcu_torture *rtp,
					struct torture_random_state *trsp)
{
	unsigned long loops;
	int noc = num_online_cpus();
	int noc = torture_num_online_cpus();
	int rdrchked;
	int rdrchker;
	struct rcu_torture_reader_check *rtrcp; // Me.
@@ -1658,7 +1658,7 @@ rcu_torture_reader(void *arg)
			torture_hrtimeout_us(500, 1000, &rand);
			lastsleep = jiffies + 10;
		}
		while (num_online_cpus() < mynumonline && !torture_must_stop())
		while (torture_num_online_cpus() < mynumonline && !torture_must_stop())
			schedule_timeout_interruptible(HZ / 5);
		stutter_wait("rcu_torture_reader");
	} while (!torture_must_stop());
+16 −0
Original line number Diff line number Diff line
@@ -175,6 +175,19 @@ static unsigned long sum_online;
static int min_online = -1;
static int max_online;

static int torture_online_cpus = NR_CPUS;

/*
 * Some torture testing leverages confusion as to the number of online
 * CPUs.  This function returns the torture-testing view of this number,
 * which allows torture tests to load-balance appropriately.
 */
int torture_num_online_cpus(void)
{
	return READ_ONCE(torture_online_cpus);
}
EXPORT_SYMBOL_GPL(torture_num_online_cpus);

/*
 * Attempt to take a CPU offline.  Return false if the CPU is already
 * offline or if it is not subject to CPU-hotplug operations.  The
@@ -229,6 +242,8 @@ bool torture_offline(int cpu, long *n_offl_attempts, long *n_offl_successes,
			*min_offl = delta;
		if (*max_offl < delta)
			*max_offl = delta;
		WRITE_ONCE(torture_online_cpus, torture_online_cpus - 1);
		WARN_ON_ONCE(torture_online_cpus <= 0);
	}

	return true;
@@ -285,6 +300,7 @@ bool torture_online(int cpu, long *n_onl_attempts, long *n_onl_successes,
			*min_onl = delta;
		if (*max_onl < delta)
			*max_onl = delta;
		WRITE_ONCE(torture_online_cpus, torture_online_cpus + 1);
	}

	return true;