Commit dabae097 authored by Richard Henderson's avatar Richard Henderson
Browse files

tcg/ppc: Support vector shift by immediate



For Altivec, this is done via vector shift by vector,
and loading the immediate into a register.

Tested-by: default avatarMark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: default avatarAleksandar Markovic <amarkovic@wavecomp.com>
Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Signed-off-by: default avatarAleksandar Markovic <amarkovic@wavecomp.com>
parent e9d1a53a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -160,7 +160,7 @@ extern bool have_altivec;
#define TCG_TARGET_HAS_abs_vec          0
#define TCG_TARGET_HAS_shi_vec          0
#define TCG_TARGET_HAS_shs_vec          0
#define TCG_TARGET_HAS_shv_vec          0
#define TCG_TARGET_HAS_shv_vec          1
#define TCG_TARGET_HAS_cmp_vec          1
#define TCG_TARGET_HAS_mul_vec          0
#define TCG_TARGET_HAS_sat_vec          1
+56 −2
Original line number Diff line number Diff line
@@ -514,6 +514,16 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
#define VCMPGTUH   VX4(582)
#define VCMPGTUW   VX4(646)

#define VSLB       VX4(260)
#define VSLH       VX4(324)
#define VSLW       VX4(388)
#define VSRB       VX4(516)
#define VSRH       VX4(580)
#define VSRW       VX4(644)
#define VSRAB      VX4(772)
#define VSRAH      VX4(836)
#define VSRAW      VX4(900)

#define VAND       VX4(1028)
#define VANDC      VX4(1092)
#define VNOR       VX4(1284)
@@ -2860,8 +2870,14 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
    case INDEX_op_sssub_vec:
    case INDEX_op_usadd_vec:
    case INDEX_op_ussub_vec:
    case INDEX_op_shlv_vec:
    case INDEX_op_shrv_vec:
    case INDEX_op_sarv_vec:
        return vece <= MO_32;
    case INDEX_op_cmp_vec:
    case INDEX_op_shli_vec:
    case INDEX_op_shri_vec:
    case INDEX_op_sari_vec:
        return vece <= MO_32 ? -1 : 0;
    default:
        return 0;
@@ -2968,7 +2984,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
        umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 },
        smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 },
        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 },
        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 };
        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
        shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
        shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
        sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 };

    TCGType type = vecl + TCG_TYPE_V64;
    TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
@@ -3015,6 +3034,15 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
    case INDEX_op_umax_vec:
        insn = umax_op[vece];
        break;
    case INDEX_op_shlv_vec:
        insn = shlv_op[vece];
        break;
    case INDEX_op_shrv_vec:
        insn = shrv_op[vece];
        break;
    case INDEX_op_sarv_vec:
        insn = sarv_op[vece];
        break;
    case INDEX_op_and_vec:
        insn = VAND;
        break;
@@ -3059,6 +3087,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
    tcg_out32(s, insn | VRT(a0) | VRA(a1) | VRB(a2));
}

static void expand_vec_shi(TCGType type, unsigned vece, TCGv_vec v0,
                           TCGv_vec v1, TCGArg imm, TCGOpcode opci)
{
    TCGv_vec t1 = tcg_temp_new_vec(type);

    /* Splat w/bytes for xxspltib.  */
    tcg_gen_dupi_vec(MO_8, t1, imm & ((8 << vece) - 1));
    vec_gen_3(opci, type, vece, tcgv_vec_arg(v0),
              tcgv_vec_arg(v1), tcgv_vec_arg(t1));
    tcg_temp_free_vec(t1);
}

static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
                           TCGv_vec v1, TCGv_vec v2, TCGCond cond)
{
@@ -3110,14 +3150,25 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
{
    va_list va;
    TCGv_vec v0, v1, v2;
    TCGArg a2;

    va_start(va, a0);
    v0 = temp_tcgv_vec(arg_temp(a0));
    v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
    v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
    a2 = va_arg(va, TCGArg);

    switch (opc) {
    case INDEX_op_shli_vec:
        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shlv_vec);
        break;
    case INDEX_op_shri_vec:
        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_shrv_vec);
        break;
    case INDEX_op_sari_vec:
        expand_vec_shi(type, vece, v0, v1, a2, INDEX_op_sarv_vec);
        break;
    case INDEX_op_cmp_vec:
        v2 = temp_tcgv_vec(arg_temp(a2));
        expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
        break;
    default:
@@ -3317,6 +3368,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
    case INDEX_op_smin_vec:
    case INDEX_op_umax_vec:
    case INDEX_op_umin_vec:
    case INDEX_op_shlv_vec:
    case INDEX_op_shrv_vec:
    case INDEX_op_sarv_vec:
        return &v_v_v;
    case INDEX_op_not_vec:
    case INDEX_op_dup_vec: