Commit fc40787a authored by Aurelien Jarno's avatar Aurelien Jarno
Browse files

target-mips: implement unaligned loads using TCG



Load/store from helpers should be avoided as they are quite
inefficient. Rewrite unaligned loads instructions using TCG and
aligned loads. The number of actual loads operations to implement
an unaligned load instruction is reduced from up to 8 to 1.

Note: As we can't rely on shift by 32 or 64 undefined behaviour,
the code loads already shift by one constants.

Reviewed-by: default avatarRichard Henderson <rth@twiddle.net>
Signed-off-by: default avatarAurelien Jarno <aurelien@aurel32.net>
parent 18bba4dc
Loading
Loading
Loading
Loading
+0 −4
Original line number Diff line number Diff line
@@ -4,13 +4,9 @@ DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
DEF_HELPER_2(raise_exception, noreturn, env, i32)

#ifdef TARGET_MIPS64
DEF_HELPER_4(ldl, tl, env, tl, tl, int)
DEF_HELPER_4(ldr, tl, env, tl, tl, int)
DEF_HELPER_4(sdl, void, env, tl, tl, int)
DEF_HELPER_4(sdr, void, env, tl, tl, int)
#endif
DEF_HELPER_4(lwl, tl, env, tl, tl, int)
DEF_HELPER_4(lwr, tl, env, tl, tl, int)
DEF_HELPER_4(swl, void, env, tl, tl, int)
DEF_HELPER_4(swr, void, env, tl, tl, int)

+0 −142
Original line number Diff line number Diff line
@@ -350,56 +350,6 @@ HELPER_ST_ATOMIC(scd, ld, sd, 0x7)
#define GET_OFFSET(addr, offset) (addr - (offset))
#endif

target_ulong helper_lwl(CPUMIPSState *env, target_ulong arg1,
                        target_ulong arg2, int mem_idx)
{
    target_ulong tmp;

    tmp = do_lbu(env, arg2, mem_idx);
    arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);

    if (GET_LMASK(arg2) <= 2) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
        arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
    }

    if (GET_LMASK(arg2) <= 1) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
        arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
    }

    if (GET_LMASK(arg2) == 0) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
        arg1 = (arg1 & 0xFFFFFF00) | tmp;
    }
    return (int32_t)arg1;
}

target_ulong helper_lwr(CPUMIPSState *env, target_ulong arg1,
                        target_ulong arg2, int mem_idx)
{
    target_ulong tmp;

    tmp = do_lbu(env, arg2, mem_idx);
    arg1 = (arg1 & 0xFFFFFF00) | tmp;

    if (GET_LMASK(arg2) >= 1) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
        arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
    }

    if (GET_LMASK(arg2) >= 2) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
        arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
    }

    if (GET_LMASK(arg2) == 3) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
        arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
    }
    return (int32_t)arg1;
}

void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
                int mem_idx)
{
@@ -440,98 +390,6 @@ void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
#define GET_LMASK64(v) (((v) & 7) ^ 7)
#endif

target_ulong helper_ldl(CPUMIPSState *env, target_ulong arg1,
                        target_ulong arg2, int mem_idx)
{
    uint64_t tmp;

    tmp = do_lbu(env, arg2, mem_idx);
    arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);

    if (GET_LMASK64(arg2) <= 6) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
        arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
    }

    if (GET_LMASK64(arg2) <= 5) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
        arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
    }

    if (GET_LMASK64(arg2) <= 4) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
        arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
    }

    if (GET_LMASK64(arg2) <= 3) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 4), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
    }

    if (GET_LMASK64(arg2) <= 2) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 5), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
    }

    if (GET_LMASK64(arg2) <= 1) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 6), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
    }

    if (GET_LMASK64(arg2) == 0) {
        tmp = do_lbu(env, GET_OFFSET(arg2, 7), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
    }

    return arg1;
}

target_ulong helper_ldr(CPUMIPSState *env, target_ulong arg1,
                        target_ulong arg2, int mem_idx)
{
    uint64_t tmp;

    tmp = do_lbu(env, arg2, mem_idx);
    arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;

    if (GET_LMASK64(arg2) >= 1) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp  << 8);
    }

    if (GET_LMASK64(arg2) >= 2) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
    }

    if (GET_LMASK64(arg2) >= 3) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
        arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
    }

    if (GET_LMASK64(arg2) >= 4) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -4), mem_idx);
        arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
    }

    if (GET_LMASK64(arg2) >= 5) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -5), mem_idx);
        arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
    }

    if (GET_LMASK64(arg2) >= 6) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -6), mem_idx);
        arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
    }

    if (GET_LMASK64(arg2) == 7) {
        tmp = do_lbu(env, GET_OFFSET(arg2, -7), mem_idx);
        arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
    }

    return arg1;
}

void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
                int mem_idx)
{
+62 −13
Original line number Diff line number Diff line
@@ -1580,7 +1580,7 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
                    int rt, int base, int16_t offset)
{
    const char *opn = "ld";
    TCGv t0, t1;
    TCGv t0, t1, t2;

    if (rt == 0 && env->insn_flags & (INSN_LOONGSON2E | INSN_LOONGSON2F)) {
        /* Loongson CPU uses a load to zero register for prefetch.
@@ -1612,21 +1612,45 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
        opn = "lld";
        break;
    case OPC_LDL:
        save_cpu_state(ctx, 1);
        t1 = tcg_temp_new();
        tcg_gen_andi_tl(t1, t0, 7);
#ifndef TARGET_WORDS_BIGENDIAN
        tcg_gen_xori_tl(t1, t1, 7);
#endif
        tcg_gen_shli_tl(t1, t1, 3);
        tcg_gen_andi_tl(t0, t0, ~7);
        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
        tcg_gen_shl_tl(t0, t0, t1);
        tcg_gen_xori_tl(t1, t1, 63);
        t2 = tcg_const_tl(0x7fffffffffffffffull);
        tcg_gen_shr_tl(t2, t2, t1);
        gen_load_gpr(t1, rt);
        gen_helper_1e2i(ldl, t1, t1, t0, ctx->mem_idx);
        gen_store_gpr(t1, rt);
        tcg_gen_and_tl(t1, t1, t2);
        tcg_temp_free(t2);
        tcg_gen_or_tl(t0, t0, t1);
        tcg_temp_free(t1);
        gen_store_gpr(t0, rt);
        opn = "ldl";
        break;
    case OPC_LDR:
        save_cpu_state(ctx, 1);
        t1 = tcg_temp_new();
        tcg_gen_andi_tl(t1, t0, 7);
#ifdef TARGET_WORDS_BIGENDIAN
        tcg_gen_xori_tl(t1, t1, 7);
#endif
        tcg_gen_shli_tl(t1, t1, 3);
        tcg_gen_andi_tl(t0, t0, ~7);
        tcg_gen_qemu_ld64(t0, t0, ctx->mem_idx);
        tcg_gen_shr_tl(t0, t0, t1);
        tcg_gen_xori_tl(t1, t1, 63);
        t2 = tcg_const_tl(0xfffffffffffffffeull);
        tcg_gen_shl_tl(t2, t2, t1);
        gen_load_gpr(t1, rt);
        gen_helper_1e2i(ldr, t1, t1, t0, ctx->mem_idx);
        gen_store_gpr(t1, rt);
        tcg_gen_and_tl(t1, t1, t2);
        tcg_temp_free(t2);
        tcg_gen_or_tl(t0, t0, t1);
        tcg_temp_free(t1);
        gen_store_gpr(t0, rt);
        opn = "ldr";
        break;
    case OPC_LDPC:
@@ -1672,21 +1696,46 @@ static void gen_ld (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
        opn = "lbu";
        break;
    case OPC_LWL:
        save_cpu_state(ctx, 1);
        t1 = tcg_temp_new();
        tcg_gen_andi_tl(t1, t0, 3);
#ifndef TARGET_WORDS_BIGENDIAN
        tcg_gen_xori_tl(t1, t1, 3);
#endif
        tcg_gen_shli_tl(t1, t1, 3);
        tcg_gen_andi_tl(t0, t0, ~3);
        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
        tcg_gen_shl_tl(t0, t0, t1);
        tcg_gen_xori_tl(t1, t1, 31);
        t2 = tcg_const_tl(0x7fffffffull);
        tcg_gen_shr_tl(t2, t2, t1);
        gen_load_gpr(t1, rt);
        gen_helper_1e2i(lwl, t1, t1, t0, ctx->mem_idx);
        gen_store_gpr(t1, rt);
        tcg_gen_and_tl(t1, t1, t2);
        tcg_temp_free(t2);
        tcg_gen_or_tl(t0, t0, t1);
        tcg_temp_free(t1);
        tcg_gen_ext32s_tl(t0, t0);
        gen_store_gpr(t0, rt);
        opn = "lwl";
        break;
    case OPC_LWR:
        save_cpu_state(ctx, 1);
        t1 = tcg_temp_new();
        tcg_gen_andi_tl(t1, t0, 3);
#ifdef TARGET_WORDS_BIGENDIAN
        tcg_gen_xori_tl(t1, t1, 3);
#endif
        tcg_gen_shli_tl(t1, t1, 3);
        tcg_gen_andi_tl(t0, t0, ~3);
        tcg_gen_qemu_ld32u(t0, t0, ctx->mem_idx);
        tcg_gen_shr_tl(t0, t0, t1);
        tcg_gen_xori_tl(t1, t1, 31);
        t2 = tcg_const_tl(0xfffffffeull);
        tcg_gen_shl_tl(t2, t2, t1);
        gen_load_gpr(t1, rt);
        gen_helper_1e2i(lwr, t1, t1, t0, ctx->mem_idx);
        gen_store_gpr(t1, rt);
        tcg_gen_and_tl(t1, t1, t2);
        tcg_temp_free(t2);
        tcg_gen_or_tl(t0, t0, t1);
        tcg_temp_free(t1);
        gen_store_gpr(t0, rt);
        opn = "lwr";
        break;
    case OPC_LL: