Commit ed224a56 authored by malc's avatar malc
Browse files

tcg/ppc: ld/st optimization



Signed-off-by: default avatarmalc <av1474@comtv.ru>
parent b51d7b2e
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -3882,7 +3882,7 @@ upper() {
}

case "$cpu" in
  i386|x86_64)
  i386|x86_64|ppc)
    echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_target_mak
  ;;
esac
+3 −0
Original line number Diff line number Diff line
@@ -335,6 +335,9 @@ extern uintptr_t tci_tb_ptr;
#  define GETRA() ((uintptr_t)__builtin_return_address(0))
#  define GETPC_LDST() ((uintptr_t)(GETRA() + 7 + \
                                    *(int32_t *)((void *)GETRA() + 3) - 1))
# elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
#  define GETRA() ((uintptr_t)__builtin_return_address(0))
#  define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() + 4)) - 1))
# else
#  error "CONFIG_QEMU_LDST_OPTIMIZATION needs GETPC_LDST() implementation!"
# endif
+258 −195
Original line number Diff line number Diff line
@@ -39,8 +39,6 @@ static uint8_t *tb_ret_addr;
#define LR_OFFSET 4
#endif

#define FAST_PATH

#ifndef GUEST_BASE
#define GUEST_BASE 0
#endif
@@ -520,6 +518,37 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)

#if defined(CONFIG_SOFTMMU)

static void add_qemu_ldst_label (TCGContext *s,
                                 int is_ld,
                                 int opc,
                                 int data_reg,
                                 int data_reg2,
                                 int addrlo_reg,
                                 int addrhi_reg,
                                 int mem_index,
                                 uint8_t *raddr,
                                 uint8_t *label_ptr)
{
    int idx;
    TCGLabelQemuLdst *label;

    if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
        tcg_abort();
    }

    idx = s->nb_qemu_ldst_labels++;
    label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
    label->is_ld = is_ld;
    label->opc = opc;
    label->datalo_reg = data_reg;
    label->datahi_reg = data_reg2;
    label->addrlo_reg = addrlo_reg;
    label->addrhi_reg = addrhi_reg;
    label->mem_index = mem_index;
    label->raddr = raddr;
    label->label_ptr[0] = label_ptr;
}

#include "../../softmmu_defs.h"

/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -539,36 +568,12 @@ static const void * const qemu_st_helpers[4] = {
    helper_stl_mmu,
    helper_stq_mmu,
};
#endif

static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int addr_reg2, int s_bits,
                               int offset1, int offset2, uint8_t **label_ptr)
{
    int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap;
#ifdef CONFIG_SOFTMMU
    int mem_index, s_bits, r2, ir;
    void *label1_ptr, *label2_ptr;
#if TARGET_LONG_BITS == 64
    int addr_reg2;
#endif
#endif

    data_reg = *args++;
    if (opc == 3)
        data_reg2 = *args++;
    else
        data_reg2 = 0;
    addr_reg = *args++;

#ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64
    addr_reg2 = *args++;
#endif
    mem_index = *args;
    s_bits = opc & 3;
    r0 = 3;
    r1 = 4;
    r2 = 0;
    rbase = 0;
    uint16_t retranst;

    tcg_out32 (s, (RLWINM
                   | RA (r0)
@@ -582,7 +587,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
    tcg_out32 (s, (LWZU
                   | RT (r1)
                   | RA (r0)
                   | offsetof (CPUArchState, tlb_table[mem_index][0].addr_read)
                   | offset1
                   )
        );
    tcg_out32 (s, (RLWINM
@@ -600,77 +605,58 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
    tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
    tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#endif
    *label_ptr = s->code_ptr;
    retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
    tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE);

    label1_ptr = s->code_ptr;
#ifdef FAST_PATH
    tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
#endif

    /* slow path */
    ir = 3;
    tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
#if TARGET_LONG_BITS == 32
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
    ir |= 1;
#endif
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg2);
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
    tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);

    tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
    switch (opc) {
    case 0|4:
        tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
        break;
    case 1|4:
        tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
        break;
    case 0:
    case 1:
    case 2:
        if (data_reg != 3)
            tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
        break;
    case 3:
        if (data_reg == 3) {
            if (data_reg2 == 4) {
                tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
                tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
                tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
            }
            else {
                tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
                tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
            }
        }
        else {
            if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
            if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
        }
        break;
    }
    label2_ptr = s->code_ptr;
    tcg_out32 (s, B);

    /* label1: fast path */
#ifdef FAST_PATH
    reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
#endif

    /* r0 now contains &env->tlb_table[mem_index][index].addr_read */
    /* r0 now contains &env->tlb_table[mem_index][index].addr_x */
    tcg_out32 (s, (LWZ
                   | RT (r0)
                   | RA (r0)
                   | (offsetof (CPUTLBEntry, addend)
                      - offsetof (CPUTLBEntry, addr_read))
                   ));
                   | offset2
                   )
        );
    /* r0 = env->tlb_table[mem_index][index].addend */
    tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
    /* r0 = env->tlb_table[mem_index][index].addend + addr */

}
#endif

static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
{
    int addr_reg, addr_reg2, data_reg, data_reg2, r0, r1, rbase, bswap;
#ifdef CONFIG_SOFTMMU
    int mem_index, s_bits, r2;
    uint8_t *label_ptr;
#endif

    data_reg = *args++;
    if (opc == 3)
        data_reg2 = *args++;
    else
        data_reg2 = 0;
    addr_reg = *args++;

#ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64
    addr_reg2 = *args++;
#else
    addr_reg2 = 0;
#endif
    mem_index = *args;
    s_bits = opc & 3;
    r0 = 3;
    r1 = 4;
    r2 = 0;
    rbase = 0;

    tcg_out_tlb_check (
        s, r0, r1, r2, addr_reg, addr_reg2, s_bits,
        offsetof (CPUArchState, tlb_table[mem_index][0].addr_read),
        offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read),
        &label_ptr
        );
#else  /* !CONFIG_SOFTMMU */
    r0 = addr_reg;
    r1 = 3;
@@ -736,21 +722,26 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
        }
        break;
    }

#ifdef CONFIG_SOFTMMU
    reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
    add_qemu_ldst_label (s,
                         1,
                         opc,
                         data_reg,
                         data_reg2,
                         addr_reg,
                         addr_reg2,
                         mem_index,
                         s->code_ptr,
                         label_ptr);
#endif
}

static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
{
    int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase;
    int addr_reg, addr_reg2, r0, r1, data_reg, data_reg2, bswap, rbase;
#ifdef CONFIG_SOFTMMU
    int mem_index, r2, ir;
    void *label1_ptr, *label2_ptr;
#if TARGET_LONG_BITS == 64
    int addr_reg2;
#endif
    int mem_index, r2;
    uint8_t *label_ptr;
#endif

    data_reg = *args++;
@@ -763,6 +754,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64
    addr_reg2 = *args++;
#else
    addr_reg2 = 0;
#endif
    mem_index = *args;
    r0 = 3;
@@ -770,41 +763,157 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
    r2 = 0;
    rbase = 0;

    tcg_out32 (s, (RLWINM
                   | RA (r0)
                   | RS (addr_reg)
                   | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS))
                   | MB (32 - (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS))
                   | ME (31 - CPU_TLB_ENTRY_BITS)
                   )
        );
    tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0));
    tcg_out32 (s, (LWZU
                   | RT (r1)
                   | RA (r0)
                   | offsetof (CPUArchState, tlb_table[mem_index][0].addr_write)
                   )
        );
    tcg_out32 (s, (RLWINM
                   | RA (r2)
                   | RS (addr_reg)
                   | SH (0)
                   | MB ((32 - opc) & 31)
                   | ME (31 - TARGET_PAGE_BITS)
                   )
    tcg_out_tlb_check (
        s, r0, r1, r2, addr_reg, addr_reg2, opc & 3,
        offsetof (CPUArchState, tlb_table[mem_index][0].addr_write),
        offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write),
        &label_ptr
        );
#else  /* !CONFIG_SOFTMMU */
    r0 = addr_reg;
    r1 = 3;
    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif

    tcg_out32 (s, CMP | (7 << 23) | RA (r2) | RB (r1));
#if TARGET_LONG_BITS == 64
    tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4);
    tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
    tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#ifdef TARGET_WORDS_BIGENDIAN
    bswap = 0;
#else
    bswap = 1;
#endif
    switch (opc) {
    case 0:
        tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
        break;
    case 1:
        if (bswap)
            tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
        else
            tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
        break;
    case 2:
        if (bswap)
            tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
        else
            tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
        break;
    case 3:
        if (bswap) {
            tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
            tcg_out32 (s, STWBRX | SAB (data_reg,  rbase, r0));
            tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1));
        }
        else {
#ifdef CONFIG_USE_GUEST_BASE
            tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0));
            tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
            tcg_out32 (s, STWX | SAB (data_reg,  rbase, r1));
#else
            tcg_out32 (s, STW | RS (data_reg2) | RA (r0));
            tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4);
#endif
        }
        break;
    }

#ifdef CONFIG_SOFTMMU
    add_qemu_ldst_label (s,
                         0,
                         opc,
                         data_reg,
                         data_reg2,
                         addr_reg,
                         addr_reg2,
                         mem_index,
                         s->code_ptr,
                         label_ptr);
#endif
}

#if defined(CONFIG_SOFTMMU)
static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
{
    int s_bits;
    int ir;
    int opc = label->opc;
    int mem_index = label->mem_index;
    int data_reg = label->datalo_reg;
    int data_reg2 = label->datahi_reg;
    int addr_reg = label->addrlo_reg;
    uint8_t *raddr = label->raddr;
    uint8_t **label_ptr = &label->label_ptr[0];

    s_bits = opc & 3;

    /* resolve label address */
    reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);

    label1_ptr = s->code_ptr;
#ifdef FAST_PATH
    tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
    /* slow path */
    ir = 3;
    tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
#if TARGET_LONG_BITS == 32
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
    ir |= 1;
#endif
    tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
    tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
    tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
    tcg_out32 (s, B | 8);
    tcg_out32 (s, (tcg_target_long) raddr);
    switch (opc) {
    case 0|4:
        tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
        break;
    case 1|4:
        tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
        break;
    case 0:
    case 1:
    case 2:
        if (data_reg != 3)
            tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
        break;
    case 3:
        if (data_reg == 3) {
            if (data_reg2 == 4) {
                tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
                tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
                tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
            }
            else {
                tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
                tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
            }
        }
        else {
            if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
            if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
        }
        break;
    }
    /* Jump to the code corresponding to next IR of qemu_st */
    tcg_out_b (s, 0, (tcg_target_long) raddr);
}

static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
{
    int s_bits;
    int ir;
    int opc = label->opc;
    int mem_index = label->mem_index;
    int data_reg = label->datalo_reg;
    int data_reg2 = label->datahi_reg;
    int addr_reg = label->addrlo_reg;
    uint8_t *raddr = label->raddr;
    uint8_t **label_ptr = &label->label_ptr[0];

    s_bits = opc & 3;

    /* resolve label address */
    reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);

    /* slow path */
    ir = 3;
@@ -815,7 +924,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
    ir |= 1;
#endif
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg2);
    tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
    tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif

@@ -851,74 +960,28 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)

    tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
    tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
    label2_ptr = s->code_ptr;
    tcg_out32 (s, B);

    /* label1: fast path */
#ifdef FAST_PATH
    reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
#endif

    tcg_out32 (s, (LWZ
                   | RT (r0)
                   | RA (r0)
                   | (offsetof (CPUTLBEntry, addend)
                      - offsetof (CPUTLBEntry, addr_write))
                   ));
    /* r0 = env->tlb_table[mem_index][index].addend */
    tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
    /* r0 = env->tlb_table[mem_index][index].addend + addr */
    tcg_out32 (s, B | 8);
    tcg_out32 (s, (tcg_target_long) raddr);
    tcg_out_b (s, 0, (tcg_target_long) raddr);
}

#else  /* !CONFIG_SOFTMMU */
    r0 = addr_reg;
    r1 = 3;
    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
#endif
void tcg_out_tb_finalize(TCGContext *s)
{
    int i;
    TCGLabelQemuLdst *label;

#ifdef TARGET_WORDS_BIGENDIAN
    bswap = 0;
#else
    bswap = 1;
#endif
    switch (opc) {
    case 0:
        tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
        break;
    case 1:
        if (bswap)
            tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
        else
            tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
        break;
    case 2:
        if (bswap)
            tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
        else
            tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
        break;
    case 3:
        if (bswap) {
            tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
            tcg_out32 (s, STWBRX | SAB (data_reg,  rbase, r0));
            tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1));
    /* qemu_ld/st slow paths */
    for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
        label = (TCGLabelQemuLdst *) &s->qemu_ldst_labels[i];
        if (label->is_ld) {
            tcg_out_qemu_ld_slow_path (s, label);
        }
        else {
#ifdef CONFIG_USE_GUEST_BASE
            tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0));
            tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
            tcg_out32 (s, STWX | SAB (data_reg,  rbase, r1));
#else
            tcg_out32 (s, STW | RS (data_reg2) | RA (r0));
            tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4);
#endif
            tcg_out_qemu_st_slow_path (s, label);
        }
        break;
    }

#ifdef CONFIG_SOFTMMU
    reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
#endif
}
#endif

static void tcg_target_qemu_prologue (TCGContext *s)
{