Commit 5964fca8 authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg/ppc: Change TCG_REG_RA to TCG_REG_TB



At this point the conversion is a wash.  Loading of TB+ofs is
smaller, but the actual return address from exit_tb is larger.
There are a few more insns required to transition between TBs.

But the expectation is that accesses to the constant pool will
on the whole be smaller.

Signed-off-by: default avatarRichard Henderson <rth@twiddle.net>
parent afe74dbd
Loading
Loading
Loading
Loading
+122 −151
Original line number Diff line number Diff line
@@ -39,29 +39,8 @@
# define TCG_REG_TMP1   TCG_REG_R12
#endif

/* For the 64-bit target, we don't like the 5 insn sequence needed to build
   full 64-bit addresses.  Better to have a base register to which we can
   apply a 32-bit displacement.

   There are generally three items of interest:
   (1) helper functions in the main executable,
   (2) TranslationBlock data structures,
   (3) the return address in the epilogue.

   For user-only, we USE_STATIC_CODE_GEN_BUFFER, so the code_gen_buffer
   will be inside the main executable, and thus near enough to make a
   pointer to the epilogue be within 2GB of all helper functions.

   For softmmu, we'll let the kernel choose the address of code_gen_buffer,
   and odds are it'll be somewhere close to the main malloc arena, and so
   a pointer to the epilogue will be within 2GB of the TranslationBlocks.

   For --enable-pie, everything will be kinda near everything else,
   somewhere in high memory.

   Thus we choose to keep the return address in a call-saved register.  */
#define TCG_REG_RA     TCG_REG_R31
#define USE_REG_RA     (TCG_TARGET_REG_BITS == 64)
#define TCG_REG_TB     TCG_REG_R31
#define USE_REG_TB     (TCG_TARGET_REG_BITS == 64)

/* Shorthand for size of a pointer.  Avoid promotion to unsigned.  */
#define SZP  ((int)sizeof(void *))
@@ -614,40 +593,53 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg dst, TCGReg src, int c)
    tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
}

static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
                             tcg_target_long arg, bool in_prologue)
{
    intptr_t tb_diff;
    int32_t high;

    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);

    if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) {
        arg = (int32_t)arg;
    }

    /* Load 16-bit immediates with one insn.  */
    if (arg == (int16_t)arg) {
        tcg_out32(s, ADDI | TAI(ret, 0, arg));
    } else {
        return;
    }

    /* Load addresses within the TB with one insn.  */
    tb_diff = arg - (intptr_t)s->code_gen_ptr;
    if (!in_prologue && USE_REG_TB && tb_diff == (int16_t)tb_diff) {
        tcg_out32(s, ADDI | TAI(ret, TCG_REG_TB, tb_diff));
        return;
    }

    /* Load 32-bit immediates with two insns.  */
    if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) {
        tcg_out32(s, ADDIS | TAI(ret, 0, arg >> 16));
        if (arg & 0xffff) {
            tcg_out32(s, ORI | SAI(ret, ret, arg));
        }
        return;
    }
}

static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
                         tcg_target_long arg)
{
    tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
    if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
        tcg_out_movi32(s, ret, arg);
    } else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
    if (arg == (uint32_t)arg && !(arg & 0x8000)) {
        tcg_out32(s, ADDI | TAI(ret, 0, arg));
        tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
    } else {
        int32_t high;

        if (USE_REG_RA) {
            intptr_t diff = arg - (intptr_t)tb_ret_addr;
            if (diff == (int32_t)diff) {
                tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_RA, diff);
        return;
    }

    /* Load addresses within 2GB of TB with 2 (or rarely 3) insns.  */
    if (!in_prologue && USE_REG_TB && tb_diff == (int32_t)tb_diff) {
        tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tb_diff);
        return;
    }

    high = arg >> 31 >> 1;
        tcg_out_movi32(s, ret, high);
    tcg_out_movi(s, TCG_TYPE_I32, ret, high);
    if (high) {
        tcg_out_shli64(s, ret, ret, 32);
    }
@@ -658,6 +650,11 @@ static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
        tcg_out32(s, ORI | SAI(ret, ret, arg));
    }
}

static inline void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
                                tcg_target_long arg)
{
    tcg_out_movi_int(s, type, ret, arg, false);
}

static bool mask_operand(uint32_t c, int *mb, int *me)
@@ -1293,29 +1290,27 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
    tcg_out32(s, insn);
}

#ifdef __powerpc64__
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                              uintptr_t addr)
{
    if (TCG_TARGET_REG_BITS == 64) {
        tcg_insn_unit i1, i2;
        intptr_t tb_diff = addr - tc_ptr;
        intptr_t br_diff = addr - (jmp_addr + 4);
        uint64_t pair;
    intptr_t diff = addr - jmp_addr;

    if (in_range_b(diff)) {
        i1 = B | (diff & 0x3fffffc);
        i2 = NOP;
    } else if (USE_REG_RA) {
        intptr_t lo, hi;
        diff = addr - (uintptr_t)tb_ret_addr;
        lo = (int16_t)diff;
        hi = (int32_t)(diff - lo);
        tcg_debug_assert(diff == hi + lo);
        i1 = ADDIS | TAI(TCG_REG_TMP1, TCG_REG_RA, hi >> 16);
        i2 = ADDI | TAI(TCG_REG_TMP1, TCG_REG_TMP1, lo);
    } else {
        tcg_debug_assert(TCG_TARGET_REG_BITS == 32 || addr == (int32_t)addr);
        i1 = ADDIS | TAI(TCG_REG_TMP1, 0, addr >> 16);
        i2 = ORI | SAI(TCG_REG_TMP1, TCG_REG_TMP1, addr);
        /* This does not exercise the range of the branch, but we do
           still need to be able to load the new value of TCG_REG_TB.
           But this does still happen quite often.  */
        if (tb_diff == (int16_t)tb_diff) {
            i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff);
            i2 = B | (br_diff & 0x3fffffc);
        } else {
            intptr_t lo = (int16_t)tb_diff;
            intptr_t hi = (int32_t)(tb_diff - lo);
            assert(tb_diff == hi + lo);
            i1 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16);
            i2 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo);
        }
#ifdef HOST_WORDS_BIGENDIAN
        pair = (uint64_t)i1 << 32 | i2;
@@ -1325,17 +1320,13 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,

        atomic_set((uint64_t *)jmp_addr, pair);
        flush_icache_range(jmp_addr, jmp_addr + 8);
}
#else
void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                              uintptr_t addr)
{
    } else {
        intptr_t diff = addr - jmp_addr;
        tcg_debug_assert(in_range_b(diff));
        atomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
        flush_icache_range(jmp_addr, jmp_addr + 4);
    }
#endif
}

static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
{
@@ -1897,44 +1888,20 @@ static void tcg_target_qemu_prologue(TCGContext *s)

#ifndef CONFIG_SOFTMMU
    if (guest_base) {
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
    }
#endif

    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);

    if (USE_REG_RA) {
#ifdef _CALL_AIX
        /* Make the caller load the value as the TOC into R2.  */
        tb_ret_addr = s->code_ptr + 2;
        desc[1] = tb_ret_addr;
        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
        tcg_out32(s, BCCTR | BO_ALWAYS);
#elif defined(_CALL_ELF) && _CALL_ELF == 2
        /* Compute from the incoming R12 value.  */
        tb_ret_addr = s->code_ptr + 2;
        tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
                                tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
        tcg_out32(s, BCCTR | BO_ALWAYS);
#else
        /* Reserve max 5 insns for the constant load.  */
        tb_ret_addr = s->code_ptr + 6;
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
        tcg_out32(s, BCCTR | BO_ALWAYS);
        while (s->code_ptr < tb_ret_addr) {
            tcg_out32(s, NOP);
    if (USE_REG_TB) {
        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]);
    }
#endif
    } else {
    tcg_out32(s, BCCTR | BO_ALWAYS);
        tb_ret_addr = s->code_ptr;
    }

    /* Epilogue */
    tcg_debug_assert(tb_ret_addr == s->code_ptr);
    s->code_gen_epilogue = tb_ret_addr;
    s->code_gen_epilogue = tb_ret_addr = s->code_ptr;

    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET);
    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
@@ -1954,44 +1921,48 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,

    switch (opc) {
    case INDEX_op_exit_tb:
        if (USE_REG_RA) {
            ptrdiff_t disp = tcg_pcrel_diff(s, tb_ret_addr);

            /* Use a direct branch if we can, otherwise use the value in RA.
               Note that the direct branch is always backward, thus we need
               to account for the possibility of 5 insns from the movi.  */
            if (!in_range_b(disp - 20)) {
                tcg_out32(s, MTSPR | RS(TCG_REG_RA) | CTR);
                tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
                tcg_out32(s, BCCTR | BO_ALWAYS);
                break;
            }
        }
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]);
        tcg_out_b(s, 0, tb_ret_addr);
        break;
    case INDEX_op_goto_tb:
        tcg_debug_assert(s->tb_jmp_insn_offset);
        if (s->tb_jmp_insn_offset) {
            /* Direct jump. */
#ifdef __powerpc64__
            if (TCG_TARGET_REG_BITS == 64) {
                /* Ensure the next insns are 8-byte aligned. */
                if ((uintptr_t)s->code_ptr & 7) {
                    tcg_out32(s, NOP);
                }
                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
        /* To be replaced by either a branch+nop or a load into TMP1.  */
        s->code_ptr += 2;
        tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
        tcg_out32(s, BCCTR | BO_ALWAYS);
#else
        /* To be replaced by a branch.  */
        s->code_ptr++;
#endif
                tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0));
                tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0));
            } else {
                s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
                tcg_out32(s, B);
                s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
                break;
            }
        } else {
            /* Indirect jump. */
            tcg_debug_assert(s->tb_jmp_insn_offset == NULL);
            tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0,
                       (intptr_t)(s->tb_jmp_insn_offset + args[0]));
        }
        tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
        tcg_out32(s, BCCTR | BO_ALWAYS);
        s->tb_jmp_reset_offset[args[0]] = c = tcg_current_code_size(s);
        if (USE_REG_TB) {
            /* For the unlinked case, need to reset TCG_REG_TB.  */
            c = -c;
            assert(c == (int16_t)c);
            tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
        }
        break;
    case INDEX_op_goto_ptr:
        tcg_out32(s, MTSPR | RS(args[0]) | CTR);
        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0);
        if (USE_REG_TB) {
            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, args[0]);
        }
        tcg_out32(s, ADDI | TAI(TCG_REG_R3, 0, 0));
        tcg_out32(s, BCCTR | BO_ALWAYS);
        break;
    case INDEX_op_br:
@@ -2761,8 +2732,8 @@ static void tcg_target_init(TCGContext *s)
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_R13); /* thread pointer */
#endif
    tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1); /* mem temp */
    if (USE_REG_RA) {
        tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);  /* return addr */
    if (USE_REG_TB) {
        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);  /* tb->tc_ptr */
    }
}