Commit 8d04fb55 authored by Jan Kiszka's avatar Jan Kiszka Committed by Alex Bennée
Browse files

tcg: drop global lock during TCG code execution



This finally allows TCG to benefit from the iothread introduction: Drop
the global mutex while running pure TCG CPU code. Reacquire the lock
when entering MMIO or PIO emulation, or when leaving the TCG loop.

We have to revert a few optimization for the current TCG threading
model, namely kicking the TCG thread in qemu_mutex_lock_iothread and not
kicking it in qemu_cpu_kick. We also need to disable RAM block
reordering until we have a more efficient locking mechanism at hand.

Still, a Linux x86 UP guest and my Musicpal ARM model boot fine here.
These numbers demonstrate where we gain something:

20338 jan       20   0  331m  75m 6904 R   99  0.9   0:50.95 qemu-system-arm
20337 jan       20   0  331m  75m 6904 S   20  0.9   0:26.50 qemu-system-arm

The guest CPU was fully loaded, but the iothread could still run mostly
independent on a second core. Without the patch we don't get beyond

32206 jan       20   0  330m  73m 7036 R   82  0.9   1:06.00 qemu-system-arm
32204 jan       20   0  330m  73m 7036 S   21  0.9   0:17.03 qemu-system-arm

We don't benefit significantly, though, when the guest is not fully
loading a host CPU.

Signed-off-by: default avatarJan Kiszka <jan.kiszka@siemens.com>
Message-Id: <1439220437-23957-10-git-send-email-fred.konrad@greensocs.com>
[FK: Rebase, fix qemu_devices_reset deadlock, rm address_space_* mutex]
Signed-off-by: default avatarKONRAD Frederic <fred.konrad@greensocs.com>
[EGC: fixed iothread lock for cpu-exec IRQ handling]
Signed-off-by: default avatarEmilio G. Cota <cota@braap.org>
[AJB: -smp single-threaded fix, clean commit msg, BQL fixes]
Signed-off-by: default avatarAlex Bennée <alex.bennee@linaro.org>
Reviewed-by: default avatarRichard Henderson <rth@twiddle.net>
Reviewed-by: default avatarPranith Kumar <bobby.prani@gmail.com>
[PM: target-arm changes]
Acked-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parent 791158d9
Loading
Loading
Loading
Loading
+21 −2
Original line number Diff line number Diff line
@@ -29,6 +29,7 @@
#include "qemu/rcu.h"
#include "exec/tb-hash.h"
#include "exec/log.h"
#include "qemu/main-loop.h"
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
#include "hw/i386/apic.h"
#endif
@@ -388,8 +389,10 @@ static inline bool cpu_handle_halt(CPUState *cpu)
        if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
            && replay_interrupt()) {
            X86CPU *x86_cpu = X86_CPU(cpu);
            qemu_mutex_lock_iothread();
            apic_poll_irq(x86_cpu->apic_state);
            cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
            qemu_mutex_unlock_iothread();
        }
#endif
        if (!cpu_has_work(cpu)) {
@@ -443,7 +446,9 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
#else
            if (replay_exception()) {
                CPUClass *cc = CPU_GET_CLASS(cpu);
                qemu_mutex_lock_iothread();
                cc->do_interrupt(cpu);
                qemu_mutex_unlock_iothread();
                cpu->exception_index = -1;
            } else if (!replay_has_interrupt()) {
                /* give a chance to iothread in replay mode */
@@ -469,9 +474,11 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
                                        TranslationBlock **last_tb)
{
    CPUClass *cc = CPU_GET_CLASS(cpu);
    int interrupt_request = cpu->interrupt_request;

    if (unlikely(interrupt_request)) {
    if (unlikely(atomic_read(&cpu->interrupt_request))) {
        int interrupt_request;
        qemu_mutex_lock_iothread();
        interrupt_request = cpu->interrupt_request;
        if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
            /* Mask out external interrupts for this step. */
            interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
@@ -479,6 +486,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
        if (interrupt_request & CPU_INTERRUPT_DEBUG) {
            cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
            cpu->exception_index = EXCP_DEBUG;
            qemu_mutex_unlock_iothread();
            return true;
        }
        if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
@@ -488,6 +496,7 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
            cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
            cpu->halted = 1;
            cpu->exception_index = EXCP_HLT;
            qemu_mutex_unlock_iothread();
            return true;
        }
#if defined(TARGET_I386)
@@ -498,12 +507,14 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
            do_cpu_init(x86_cpu);
            cpu->exception_index = EXCP_HALTED;
            qemu_mutex_unlock_iothread();
            return true;
        }
#else
        else if (interrupt_request & CPU_INTERRUPT_RESET) {
            replay_interrupt();
            cpu_reset(cpu);
            qemu_mutex_unlock_iothread();
            return true;
        }
#endif
@@ -526,7 +537,12 @@ static inline bool cpu_handle_interrupt(CPUState *cpu,
               the program flow was changed */
            *last_tb = NULL;
        }

        /* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
        qemu_mutex_unlock_iothread();
    }


    if (unlikely(atomic_read(&cpu->exit_request) || replay_has_interrupt())) {
        atomic_set(&cpu->exit_request, 0);
        cpu->exception_index = EXCP_INTERRUPT;
@@ -643,6 +659,9 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
        cpu->can_do_io = 1;
        tb_lock_reset();
        if (qemu_mutex_iothread_locked()) {
            qemu_mutex_unlock_iothread();
        }
    }

    /* if an exception is pending, we execute it here */
+5 −23
Original line number Diff line number Diff line
@@ -1027,8 +1027,6 @@ static void qemu_kvm_init_cpu_signals(CPUState *cpu)
#endif /* _WIN32 */

static QemuMutex qemu_global_mutex;
static QemuCond qemu_io_proceeded_cond;
static unsigned iothread_requesting_mutex;

static QemuThread io_thread;

@@ -1042,7 +1040,6 @@ void qemu_init_cpu_loop(void)
    qemu_init_sigbus();
    qemu_cond_init(&qemu_cpu_cond);
    qemu_cond_init(&qemu_pause_cond);
    qemu_cond_init(&qemu_io_proceeded_cond);
    qemu_mutex_init(&qemu_global_mutex);

    qemu_thread_get_self(&io_thread);
@@ -1085,10 +1082,6 @@ static void qemu_tcg_wait_io_event(CPUState *cpu)

    start_tcg_kick_timer();

    while (iothread_requesting_mutex) {
        qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
    }

    CPU_FOREACH(cpu) {
        qemu_wait_io_event_common(cpu);
    }
@@ -1249,9 +1242,11 @@ static int tcg_cpu_exec(CPUState *cpu)
        cpu->icount_decr.u16.low = decr;
        cpu->icount_extra = count;
    }
    qemu_mutex_unlock_iothread();
    cpu_exec_start(cpu);
    ret = cpu_exec(cpu);
    cpu_exec_end(cpu);
    qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
    tcg_time += profile_getclock() - ti;
#endif
@@ -1479,27 +1474,14 @@ bool qemu_mutex_iothread_locked(void)

void qemu_mutex_lock_iothread(void)
{
    atomic_inc(&iothread_requesting_mutex);
    /* In the simple case there is no need to bump the VCPU thread out of
     * TCG code execution.
     */
    if (!tcg_enabled() || qemu_in_vcpu_thread() ||
        !first_cpu || !first_cpu->created) {
    g_assert(!qemu_mutex_iothread_locked());
    qemu_mutex_lock(&qemu_global_mutex);
        atomic_dec(&iothread_requesting_mutex);
    } else {
        if (qemu_mutex_trylock(&qemu_global_mutex)) {
            qemu_cpu_kick_rr_cpu();
            qemu_mutex_lock(&qemu_global_mutex);
        }
        atomic_dec(&iothread_requesting_mutex);
        qemu_cond_broadcast(&qemu_io_proceeded_cond);
    }
    iothread_locked = true;
}

void qemu_mutex_unlock_iothread(void)
{
    g_assert(qemu_mutex_iothread_locked());
    iothread_locked = false;
    qemu_mutex_unlock(&qemu_global_mutex);
}
+20 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@
 */

#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
@@ -495,6 +496,7 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
    hwaddr physaddr = iotlbentry->addr;
    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
    uint64_t val;
    bool locked = false;

    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
    cpu->mem_io_pc = retaddr;
@@ -503,7 +505,16 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
    }

    cpu->mem_io_vaddr = addr;

    if (mr->global_locking) {
        qemu_mutex_lock_iothread();
        locked = true;
    }
    memory_region_dispatch_read(mr, physaddr, &val, size, iotlbentry->attrs);
    if (locked) {
        qemu_mutex_unlock_iothread();
    }

    return val;
}

@@ -514,15 +525,23 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
    CPUState *cpu = ENV_GET_CPU(env);
    hwaddr physaddr = iotlbentry->addr;
    MemoryRegion *mr = iotlb_to_region(cpu, physaddr, iotlbentry->attrs);
    bool locked = false;

    physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
    if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu->can_do_io) {
        cpu_io_recompile(cpu, retaddr);
    }

    cpu->mem_io_vaddr = addr;
    cpu->mem_io_pc = retaddr;

    if (mr->global_locking) {
        qemu_mutex_lock_iothread();
        locked = true;
    }
    memory_region_dispatch_write(mr, physaddr, val, size, iotlbentry->attrs);
    if (locked) {
        qemu_mutex_unlock_iothread();
    }
}

/* Return true if ADDR is present in the victim tlb, and has been copied
+9 −3
Original line number Diff line number Diff line
@@ -2134,9 +2134,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
                }
                cpu->watchpoint_hit = wp;

                /* The tb_lock will be reset when cpu_loop_exit or
                 * cpu_loop_exit_noexc longjmp back into the cpu_exec
                 * main loop.
                /* Both tb_lock and iothread_mutex will be reset when
                 * cpu_loop_exit or cpu_loop_exit_noexc longjmp
                 * back into the cpu_exec main loop.
                 */
                tb_lock();
                tb_check_watchpoint(cpu);
@@ -2371,8 +2371,14 @@ static void io_mem_init(void)
    memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
    memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
                          NULL, UINT64_MAX);

    /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
     * which can be called without the iothread mutex.
     */
    memory_region_init_io(&io_mem_notdirty, NULL, &notdirty_mem_ops, NULL,
                          NULL, UINT64_MAX);
    memory_region_clear_global_locking(&io_mem_notdirty);

    memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
                          NULL, UINT64_MAX);
}
+1 −0
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
#include "qemu-common.h"
#include "hw/irq.h"
#include "qom/object.h"
Loading