Commit 87f963be authored by Richard Henderson's avatar Richard Henderson Committed by Paolo Bonzini
Browse files

tcg: Really fix cpu_io_recompile



We have confused the number of instructions that have been
executed in the TB with the number of instructions needed
to repeat the I/O instruction.

We have used cpu_restore_state_from_tb, which means that
the guest pc is pointing to the I/O instruction.  The only
time the answer to the later question is not 1 is when
MIPS or SH4 need to re-execute the branch for the delay
slot as well.

We must rely on cpu->cflags_next_tb to generate the next TB,
as otherwise we have a race condition with other guest cpus
within the TB cache.

Fixes: 0790f868
Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Message-Id: <20180319031545.29359-1-richard.henderson@linaro.org>
Tested-by: default avatarPavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 8e029fd6
Loading
Loading
Loading
Loading
+10 −27
Original line number Diff line number Diff line
@@ -1728,8 +1728,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
    CPUArchState *env = cpu->env_ptr;
#endif
    TranslationBlock *tb;
    uint32_t n, flags;
    target_ulong pc, cs_base;
    uint32_t n;

    tb_lock();
    tb = tb_find_pc(retaddr);
@@ -1737,44 +1736,33 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
        cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
                  (void *)retaddr);
    }
    n = cpu->icount_decr.u16.low + tb->icount;
    cpu_restore_state_from_tb(cpu, tb, retaddr);
    /* Calculate how many instructions had been executed before the fault
       occurred.  */
    n = n - cpu->icount_decr.u16.low;
    /* Generate a new TB ending on the I/O insn.  */
    n++;

    /* On MIPS and SH, delay slot instructions can only be restarted if
       they were already the first instruction in the TB.  If this is not
       the first instruction in a TB then re-execute the preceding
       branch.  */
    n = 1;
#if defined(TARGET_MIPS)
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
    if ((env->hflags & MIPS_HFLAG_BMASK) != 0
        && env->active_tc.PC != tb->pc) {
        env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4);
        cpu->icount_decr.u16.low++;
        env->hflags &= ~MIPS_HFLAG_BMASK;
        n = 2;
    }
#elif defined(TARGET_SH4)
    if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
            && n > 1) {
        && env->pc != tb->pc) {
        env->pc -= 2;
        cpu->icount_decr.u16.low++;
        env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
        n = 2;
    }
#endif
    /* This should never happen.  */
    if (n > CF_COUNT_MASK) {
        cpu_abort(cpu, "TB too big during recompile");
    }

    pc = tb->pc;
    cs_base = tb->cs_base;
    flags = tb->flags;
    tb_phys_invalidate(tb, -1);

    /* Execute one IO instruction without caching
       instead of creating large TB. */
    cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | CF_NOCACHE | 1;
    /* Generate a new TB executing the I/O insn.  */
    cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;

    if (tb->cflags & CF_NOCACHE) {
        if (tb->orig_tb) {
@@ -1785,11 +1773,6 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
        tb_remove(tb);
    }

    /* Generate new TB instead of the current one. */
    /* FIXME: In theory this could raise an exception.  In practice
       we have already translated the block once so it's probably ok.  */
    tb_gen_code(cpu, pc, cs_base, flags, curr_cflags() | CF_LAST_IO | n);

    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
     * the first in the TB) then we end up generating a whole new TB and
     *  repeating the fault, which is horribly inefficient.