Commit 46f81532 authored by Davidlohr Bueso's avatar Davidlohr Bueso Committed by Arnaldo Carvalho de Melo
Browse files

perf bench futex, requeue: Add --pi parameter



This extends the program to measure WAIT_REQUEUE_PI+CMP_REQUEUE_PI
pairs, which are the underlying machinery behind priority-inheritance
aware condition variables. The defaults are the same as with the regular
non-pi version, requeueing one task at a time, with the exception that
PI will always wakeup the first waiter.

Signed-off-by: default avatarDavidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lore.kernel.org/lkml/20210809043301.66002-8-dave@stgolabs.net


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@redhat.com>
parent 6f9661b2
Loading
Loading
Loading
Loading
+75 −25
Original line number Diff line number Diff line
@@ -6,7 +6,8 @@
 *                on futex2, N at a time.
 *
 * This program is particularly useful to measure the latency of nthread
 * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
 * requeues without waking up any tasks (in the non-pi case) -- thus
 * mimicking a regular futex_wait.
 */

/* For the CLR_() macros */
@@ -54,6 +55,8 @@ static const struct option options[] = {
	OPT_BOOLEAN( 'S', "shared",   &params.fshared, "Use shared futexes instead of private ones"),
	OPT_BOOLEAN( 'm', "mlockall", &params.mlockall, "Lock all current and future memory"),
	OPT_BOOLEAN( 'B', "broadcast", &params.broadcast, "Requeue all threads at once"),
	OPT_BOOLEAN( 'p', "pi", &params.pi, "Use PI-aware variants of FUTEX_CMP_REQUEUE"),

	OPT_END()
};

@@ -87,15 +90,31 @@ static void *workerfn(void *arg __maybe_unused)
	pthread_mutex_unlock(&thread_lock);

	while (1) {
		if (!params.pi) {
			ret = futex_wait(&futex1, 0, NULL, futex_flag);
			if (!ret)
				break;

			if (ret && errno != EAGAIN) {
				if (!params.silent)
				warn("futex_wait");
					warnx("futex_wait");
				break;
			}
		} else {
			ret = futex_wait_requeue_pi(&futex1, 0, &futex2,
						    NULL, futex_flag);
			if (!ret) {
				/* got the lock at futex2 */
				futex_unlock_pi(&futex2, futex_flag);
				break;
			}

			if (ret && errno != EAGAIN) {
				if (!params.silent)
					warnx("futex_wait_requeue_pi");
				break;
			}
		}
	}

	return NULL;
@@ -171,9 +190,10 @@ int bench_futex_requeue(int argc, const char **argv)
	if (params.broadcast)
		params.nrequeue = params.nthreads;

	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
	printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), "
	       "%d at a time.\n\n",  getpid(), params.nthreads,
	       params.fshared ? "shared":"private", &futex1, &futex2, params.nrequeue);
	       params.fshared ? "shared":"private", &futex1,
	       params.pi ? "PI ": "", &futex2, params.nrequeue);

	init_stats(&requeued_stats);
	init_stats(&requeuetime_stats);
@@ -183,7 +203,7 @@ int bench_futex_requeue(int argc, const char **argv)
	pthread_cond_init(&thread_worker, NULL);

	for (j = 0; j < bench_repeat && !done; j++) {
		unsigned int nrequeued = 0;
		unsigned int nrequeued = 0, wakeups = 0;
		struct timeval start, end, runtime;

		/* create, launch & block all threads */
@@ -201,13 +221,30 @@ int bench_futex_requeue(int argc, const char **argv)
		/* Ok, all threads are patiently blocked, start requeueing */
		gettimeofday(&start, NULL);
		while (nrequeued < params.nthreads) {
			int r;

			/*
			 * Do not wakeup any tasks blocked on futex1, allowing
			 * us to really measure futex_wait functionality.
			 * For the regular non-pi case, do not wakeup any tasks
			 * blocked on futex1, allowing us to really measure
			 * futex_wait functionality. For the PI case the first
			 * waiter is always awoken.
			 */
			nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
			if (!params.pi) {
				r = futex_cmp_requeue(&futex1, 0, &futex2, 0,
						      params.nrequeue,
						      futex_flag);
			} else {
				r = futex_cmp_requeue_pi(&futex1, 0, &futex2,
							 params.nrequeue,
							 futex_flag);
				wakeups++; /* assume no error */
			}

			if (r < 0)
				err(EXIT_FAILURE, "couldn't requeue from %p to %p",
				    &futex1, &futex2);

			nrequeued += r;
		}

		gettimeofday(&end, NULL);
@@ -217,16 +254,29 @@ int bench_futex_requeue(int argc, const char **argv)
		update_stats(&requeuetime_stats, runtime.tv_usec);

		if (!params.silent) {
			printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
			       j + 1, nrequeued, params.nthreads,
			if (!params.pi)
				printf("[Run %d]: Requeued %d of %d threads in "
				       "%.4f ms\n", j + 1, nrequeued,
				       params.nthreads,
				       runtime.tv_usec / (double)USEC_PER_MSEC);
			else {
				nrequeued -= wakeups;
				printf("[Run %d]: Awoke and Requeued (%d+%d) of "
				       "%d threads in %.4f ms\n",
				       j + 1, wakeups, nrequeued,
				       params.nthreads,
				       runtime.tv_usec / (double)USEC_PER_MSEC);
			}

		}

		if (!params.pi) {
			/* everybody should be blocked on futex2, wake'em up */
			nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
			if (params.nthreads != nrequeued)
				warnx("couldn't wakeup all tasks (%d/%d)",
				      nrequeued, params.nthreads);
		}

		for (i = 0; i < params.nthreads; i++) {
			ret = pthread_join(worker[i], NULL);
+36 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ struct bench_futex_parameters {
	bool fshared;
	bool mlockall;
	bool multi; /* lock-pi */
	bool pi; /* requeue-pi */
	bool broadcast; /* requeue */
	unsigned int runtime; /* seconds*/
	unsigned int nthreads;
@@ -99,4 +100,38 @@ futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wak
	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
		 val, opflags);
}

/**
 * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
 * @uaddr:	non-PI futex source
 * @uaddr2:	PI futex target
 *
 * This is the first half of the requeue_pi mechanism. It shall always be
 * paired with futex_cmp_requeue_pi().
 */
static inline int
futex_wait_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
		      struct timespec *timeout, int opflags)
{
	return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
		     opflags);
}

/**
 * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2
 * @uaddr:	non-PI futex source
 * @uaddr2:	PI futex target
 * @nr_requeue:	requeue up to this many tasks
 *
 * This is the second half of the requeue_pi mechanism. It shall always be
 * paired with futex_wait_requeue_pi(). The first waker is always awoken.
 */
static inline int
futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2,
		     int nr_requeue, int opflags)
{
	return futex(uaddr, FUTEX_CMP_REQUEUE_PI, 1, nr_requeue, uaddr2,
		     val, opflags);
}

#endif /* _FUTEX_H */