Commit c265e976 authored by Paolo Bonzini's avatar Paolo Bonzini
Browse files

cpus-common: lock-free fast path for cpu_exec_start/end



Set cpu->running without taking the cpu_list lock, only requiring it if
there is a concurrent exclusive section.  This requires adding a new
field to CPUState, which records whether a running CPU is being counted
in pending_cpus.

When an exclusive section is started concurrently with cpu_exec_start,
cpu_exec_start can use the new field to determine if it has to wait for
the end of the exclusive section.  Likewise, cpu_exec_end can use it to
see if start_exclusive is waiting for that CPU.

This a separate patch for easier bisection of issues.

Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 3359baad
Loading
Loading
Loading
Loading
+80 −15
Original line number Diff line number Diff line
@@ -28,6 +28,9 @@ static QemuCond exclusive_cond;
static QemuCond exclusive_resume;
static QemuCond qemu_work_cond;

/* >= 1 if a thread is inside start_exclusive/end_exclusive.  Written
 * under qemu_cpu_list_lock, read with atomic operations.
 */
static int pending_cpus;

void qemu_init_cpu_list(void)
@@ -177,18 +180,26 @@ static inline void exclusive_idle(void)
void start_exclusive(void)
{
    CPUState *other_cpu;
    int running_cpus;

    qemu_mutex_lock(&qemu_cpu_list_lock);
    exclusive_idle();

    /* Make all other cpus stop executing.  */
    pending_cpus = 1;
    atomic_set(&pending_cpus, 1);

    /* Write pending_cpus before reading other_cpu->running.  */
    smp_mb();
    running_cpus = 0;
    CPU_FOREACH(other_cpu) {
        if (other_cpu->running) {
            pending_cpus++;
        if (atomic_read(&other_cpu->running)) {
            other_cpu->has_waiter = true;
            running_cpus++;
            qemu_cpu_kick(other_cpu);
        }
    }

    atomic_set(&pending_cpus, running_cpus + 1);
    while (pending_cpus > 1) {
        qemu_cond_wait(&exclusive_cond, &qemu_cpu_list_lock);
    }
@@ -203,7 +214,7 @@ void start_exclusive(void)
void end_exclusive(void)
{
    qemu_mutex_lock(&qemu_cpu_list_lock);
    pending_cpus = 0;
    atomic_set(&pending_cpus, 0);
    qemu_cond_broadcast(&exclusive_resume);
    qemu_mutex_unlock(&qemu_cpu_list_lock);
}
@@ -211,25 +222,79 @@ void end_exclusive(void)
/* Wait for exclusive ops to finish, and begin cpu execution.  */
void cpu_exec_start(CPUState *cpu)
{
    atomic_set(&cpu->running, true);

    /* Write cpu->running before reading pending_cpus.  */
    smp_mb();

    /* 1. start_exclusive saw cpu->running == true and pending_cpus >= 1.
     * After taking the lock we'll see cpu->has_waiter == true and run---not
     * for long because start_exclusive kicked us.  cpu_exec_end will
     * decrement pending_cpus and signal the waiter.
     *
     * 2. start_exclusive saw cpu->running == false but pending_cpus >= 1.
     * This includes the case when an exclusive item is running now.
     * Then we'll see cpu->has_waiter == false and wait for the item to
     * complete.
     *
     * 3. pending_cpus == 0.  Then start_exclusive is definitely going to
     * see cpu->running == true, and it will kick the CPU.
     */
    if (unlikely(atomic_read(&pending_cpus))) {
        qemu_mutex_lock(&qemu_cpu_list_lock);
        if (!cpu->has_waiter) {
            /* Not counted in pending_cpus, let the exclusive item
             * run.  Since we have the lock, just set cpu->running to true
             * while holding it; no need to check pending_cpus again.
             */
            atomic_set(&cpu->running, false);
            exclusive_idle();
    cpu->running = true;
            /* Now pending_cpus is zero.  */
            atomic_set(&cpu->running, true);
        } else {
            /* Counted in pending_cpus, go ahead and release the
             * waiter at cpu_exec_end.
             */
        }
        qemu_mutex_unlock(&qemu_cpu_list_lock);
    }
}

/* Mark cpu as not executing, and release pending exclusive ops.  */
void cpu_exec_end(CPUState *cpu)
{
    atomic_set(&cpu->running, false);

    /* Write cpu->running before reading pending_cpus.  */
    smp_mb();

    /* 1. start_exclusive saw cpu->running == true.  Then it will increment
     * pending_cpus and wait for exclusive_cond.  After taking the lock
     * we'll see cpu->has_waiter == true.
     *
     * 2. start_exclusive saw cpu->running == false but here pending_cpus >= 1.
     * This includes the case when an exclusive item started after setting
     * cpu->running to false and before we read pending_cpus.  Then we'll see
     * cpu->has_waiter == false and not touch pending_cpus.  The next call to
     * cpu_exec_start will run exclusive_idle if still necessary, thus waiting
     * for the item to complete.
     *
     * 3. pending_cpus == 0.  Then start_exclusive is definitely going to
     * see cpu->running == false, and it can ignore this CPU until the
     * next cpu_exec_start.
     */
    if (unlikely(atomic_read(&pending_cpus))) {
        qemu_mutex_lock(&qemu_cpu_list_lock);
    cpu->running = false;
    if (pending_cpus > 1) {
        pending_cpus--;
        if (cpu->has_waiter) {
            cpu->has_waiter = false;
            atomic_set(&pending_cpus, pending_cpus - 1);
            if (pending_cpus == 1) {
                qemu_cond_signal(&exclusive_cond);
            }
        }
        qemu_mutex_unlock(&qemu_cpu_list_lock);
    }
}

void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func, void *data)
{
+50 −3
Original line number Diff line number Diff line
@@ -13,7 +13,8 @@
 *     gcc pan.c -O2
 *     ./a.out -a
 *
 * Tunable processor macros: N_CPUS, N_EXCLUSIVE, N_CYCLES, TEST_EXPENSIVE.
 * Tunable processor macros: N_CPUS, N_EXCLUSIVE, N_CYCLES, USE_MUTEX,
 *                           TEST_EXPENSIVE.
 */

// Define the missing parameters for the model
@@ -22,8 +23,10 @@
#warning defaulting to 2 CPU processes
#endif

// the expensive test is not so expensive for <= 3 CPUs
#if N_CPUS <= 3
// the expensive test is not so expensive for <= 2 CPUs
// If the mutex is used, it's also cheap (300 MB / 4 seconds) for 3 CPUs
// For 3 CPUs and the lock-free option it needs 1.5 GB of RAM
#if N_CPUS <= 2 || (N_CPUS <= 3 && defined USE_MUTEX)
#define TEST_EXPENSIVE
#endif

@@ -107,6 +110,8 @@ byte has_waiter[N_CPUS];
    COND_BROADCAST(exclusive_resume);                             \
    MUTEX_UNLOCK(mutex);

#ifdef USE_MUTEX
// Simple version using mutexes
#define cpu_exec_start(id)                                                   \
    MUTEX_LOCK(mutex);                                                       \
    exclusive_idle();                                                        \
@@ -127,6 +132,48 @@ byte has_waiter[N_CPUS];
        :: else -> skip;                                                     \
    fi;                                                                      \
    MUTEX_UNLOCK(mutex);
#else
// Wait-free fast path, only needs mutex when concurrent with
// an exclusive section
#define cpu_exec_start(id)                                                   \
    running[id] = 1;                                                         \
    if                                                                       \
        :: pending_cpus -> {                                                 \
            MUTEX_LOCK(mutex);                                               \
            if                                                               \
                :: !has_waiter[id] -> {                                      \
                    running[id] = 0;                                         \
                    exclusive_idle();                                        \
                    running[id] = 1;                                         \
                }                                                            \
                :: else -> skip;                                             \
            fi;                                                              \
            MUTEX_UNLOCK(mutex);                                             \
        }                                                                    \
        :: else -> skip;                                                     \
    fi;

#define cpu_exec_end(id)                                                     \
    running[id] = 0;                                                         \
    if                                                                       \
        :: pending_cpus -> {                                                 \
            MUTEX_LOCK(mutex);                                               \
            if                                                               \
                :: has_waiter[id] -> {                                       \
                    has_waiter[id] = 0;                                      \
                    pending_cpus--;                                          \
                    if                                                       \
                        :: pending_cpus == 1 -> COND_BROADCAST(exclusive_cond); \
                        :: else -> skip;                                     \
                    fi;                                                      \
                }                                                            \
                :: else -> skip;                                             \
            fi;                                                              \
            MUTEX_UNLOCK(mutex);                                             \
        }                                                                    \
        :: else -> skip;                                                     \
    fi
#endif

// Promela processes

+3 −2
Original line number Diff line number Diff line
@@ -242,7 +242,8 @@ struct qemu_work_item;
 * @nr_threads: Number of threads within this CPU.
 * @numa_node: NUMA node this CPU is belonging to.
 * @host_tid: Host thread ID.
 * @running: #true if CPU is currently running;
 * @running: #true if CPU is currently running (lockless).
 * @has_waiter: #true if a CPU is currently waiting for the cpu_exec_end;
 * valid under cpu_list_lock.
 * @created: Indicates whether the CPU thread has been successfully created.
 * @interrupt_request: Indicates a pending interrupt request.
@@ -296,7 +297,7 @@ struct CPUState {
#endif
    int thread_id;
    uint32_t host_tid;
    bool running;
    bool running, has_waiter;
    struct QemuCond *halt_cond;
    bool thread_kicked;
    bool created;