Commit 1e262b49 authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg/i386: Implement tcg_out_dupm_vec



At the same time, improve tcg_out_dupi_vec wrt broadcast
from the constant pool.

Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
parent d6ecb4a9
Loading
Loading
Loading
Loading
+43 −14
Original line number Diff line number Diff line
@@ -358,7 +358,6 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_MOVBE_MyGy  (0xf1 | P_EXT38)
#define OPC_MOVD_VyEy   (0x6e | P_EXT | P_DATA16)
#define OPC_MOVD_EyVy   (0x7e | P_EXT | P_DATA16)
#define OPC_MOVDDUP     (0x12 | P_EXT | P_SIMDF2)
#define OPC_MOVDQA_VxWx (0x6f | P_EXT | P_DATA16)
#define OPC_MOVDQA_WxVx (0x7f | P_EXT | P_DATA16)
#define OPC_MOVDQU_VxWx (0x6f | P_EXT | P_SIMDF3)
@@ -458,6 +457,10 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_UD2         (0x0b | P_EXT)
#define OPC_VPBLENDD    (0x02 | P_EXT3A | P_DATA16)
#define OPC_VPBLENDVB   (0x4c | P_EXT3A | P_DATA16)
#define OPC_VPINSRB     (0x20 | P_EXT3A | P_DATA16)
#define OPC_VPINSRW     (0xc4 | P_EXT | P_DATA16)
#define OPC_VBROADCASTSS (0x18 | P_EXT38 | P_DATA16)
#define OPC_VBROADCASTSD (0x19 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTB (0x78 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTW (0x79 | P_EXT38 | P_DATA16)
#define OPC_VPBROADCASTD (0x58 | P_EXT38 | P_DATA16)
@@ -855,16 +858,17 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
    return true;
}

static const int avx2_dup_insn[4] = {
    OPC_VPBROADCASTB, OPC_VPBROADCASTW,
    OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
};

static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
                            TCGReg r, TCGReg a)
{
    if (have_avx2) {
        static const int dup_insn[4] = {
            OPC_VPBROADCASTB, OPC_VPBROADCASTW,
            OPC_VPBROADCASTD, OPC_VPBROADCASTQ,
        };
        int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
        tcg_out_vex_modrm(s, dup_insn[vece] + vex_l, r, 0, a);
        tcg_out_vex_modrm(s, avx2_dup_insn[vece] + vex_l, r, 0, a);
    } else {
        switch (vece) {
        case MO_8:
@@ -894,9 +898,34 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
                             TCGReg r, TCGReg base, intptr_t offset)
{
    return false;
    if (have_avx2) {
        int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
        tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l,
                                 r, 0, base, offset);
    } else {
        switch (vece) {
        case MO_64:
            tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset);
            break;
        case MO_32:
            tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset);
            break;
        case MO_16:
            tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset);
            tcg_out8(s, 0); /* imm8 */
            tcg_out_dup_vec(s, type, vece, r, r);
            break;
        case MO_8:
            tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset);
            tcg_out8(s, 0); /* imm8 */
            tcg_out_dup_vec(s, type, vece, r, r);
            break;
        default:
            g_assert_not_reached();
        }
    }
    return true;
}


static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
                             TCGReg ret, tcg_target_long arg)
@@ -918,16 +947,16 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
        } else if (have_avx2) {
            tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTQ + vex_l, ret);
        } else {
            tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
            tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD, ret);
        }
        new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
    } else if (have_avx2) {
        tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
        new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
    } else {
        tcg_out_vex_modrm_pool(s, OPC_MOVD_VyEy, ret);
        if (have_avx2) {
            tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSD + vex_l, ret);
        } else {
            tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
        }
        new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
        tcg_out_dup_vec(s, type, MO_32, ret, ret);
    }
}