Commit e329ad2a authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20190513' into staging



Improve code generation for vector duplication.
Add vector expansions for shifts by non-constant scalar.
Add vector expansions for shifts by vector.
Add integer and vector expansions for absolute value.
Several patches in preparation for Altivec.
Bug fix for tcg/aarch64 vs min/max.

# gpg: Signature made Tue 14 May 2019 00:58:02 BST
# gpg:                using RSA key 7A481E78868B4DB6A85A05C064DF38E8AF7E215F
# gpg:                issuer "richard.henderson@linaro.org"
# gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" [full]
# Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A  05C0 64DF 38E8 AF7E 215F

* remotes/rth/tags/pull-tcg-20190513: (31 commits)
  tcg/aarch64: Do not advertise minmax for MO_64
  target/xtensa: Use tcg_gen_abs_i32
  target/tricore: Use tcg_gen_abs_tl
  target/s390x: Use tcg_gen_abs_i64
  target/ppc: Use tcg_gen_abs_tl
  target/ppc: Use tcg_gen_abs_i32
  target/cris: Use tcg_gen_abs_tl
  target/arm: Use tcg_gen_abs_i64 and tcg_gen_gvec_abs
  tcg/aarch64: Support vector absolute value
  tcg/i386: Support vector absolute value
  tcg: Add support for vector absolute value
  tcg: Add support for integer absolute value
  tcg/i386: Support vector scalar shift opcodes
  tcg: Add gvec expanders for vector shift by scalar
  tcg/aarch64: Support vector variable shift opcodes
  tcg/i386: Support vector variable shift opcodes
  tcg: Add gvec expanders for variable shift
  tcg: Add INDEX_op_dupm_vec
  tcg/aarch64: Implement tcg_out_dupm_vec
  tcg/i386: Implement tcg_out_dupm_vec
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents e24f44db a7b6d286
Loading
Loading
Loading
Loading
+192 −0
Original line number Diff line number Diff line
@@ -398,6 +398,54 @@ void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
        int8_t aa = *(int8_t *)(a + i);
        *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
        int16_t aa = *(int16_t *)(a + i);
        *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
        int32_t aa = *(int32_t *)(a + i);
        *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
        int64_t aa = *(int64_t *)(a + i);
        *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
@@ -725,6 +773,150 @@ void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
        uint8_t sh = *(uint8_t *)(b + i) & 7;
        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
        uint8_t sh = *(uint16_t *)(b + i) & 15;
        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
        uint8_t sh = *(uint32_t *)(b + i) & 31;
        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
        uint8_t sh = *(uint64_t *)(b + i) & 63;
        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
        uint8_t sh = *(uint8_t *)(b + i) & 7;
        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
        uint8_t sh = *(uint16_t *)(b + i) & 15;
        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
        uint8_t sh = *(uint32_t *)(b + i) & 31;
        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
        uint8_t sh = *(uint64_t *)(b + i) & 63;
        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(vec8)) {
        uint8_t sh = *(uint8_t *)(b + i) & 7;
        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
        uint8_t sh = *(uint16_t *)(b + i) & 15;
        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(vec32)) {
        uint8_t sh = *(uint32_t *)(b + i) & 31;
        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    intptr_t i;

    for (i = 0; i < oprsz; i += sizeof(vec64)) {
        uint8_t sh = *(uint64_t *)(b + i) & 63;
        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
    }
    clear_high(d, oprsz, desc);
}

/* If vectors are enabled, the compiler fills in -1 for true.
   Otherwise, we must take care of this by hand.  */
#ifdef CONFIG_VECTOR16
+20 −0
Original line number Diff line number Diff line
@@ -225,6 +225,11 @@ DEF_HELPER_FLAGS_3(gvec_neg16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_neg64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(gvec_abs8, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs16, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs32, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_abs64, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(gvec_not, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_and, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_or, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -254,6 +259,21 @@ DEF_HELPER_FLAGS_3(gvec_sar16i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar32i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(gvec_sar64i, TCG_CALL_NO_RWG, void, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_shl8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shl64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_shr8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_shr64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_sar8v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar16v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar32v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_sar64v, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_4(gvec_eq8, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_eq32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+0 −2
Original line number Diff line number Diff line
@@ -352,8 +352,6 @@ DEF_HELPER_2(neon_ceq_u8, i32, i32, i32)
DEF_HELPER_2(neon_ceq_u16, i32, i32, i32)
DEF_HELPER_2(neon_ceq_u32, i32, i32, i32)

DEF_HELPER_1(neon_abs_s8, i32, i32)
DEF_HELPER_1(neon_abs_s16, i32, i32)
DEF_HELPER_1(neon_clz_u8, i32, i32)
DEF_HELPER_1(neon_clz_u16, i32, i32)
DEF_HELPER_1(neon_cls_s8, i32, i32)
+0 −5
Original line number Diff line number Diff line
@@ -1228,11 +1228,6 @@ NEON_VOP(ceq_u16, neon_u16, 2)
NEON_VOP(ceq_u32, neon_u32, 1)
#undef NEON_FN

#define NEON_FN(dest, src, dummy) dest = (src < 0) ? -src : src
NEON_VOP1(abs_s8, neon_s8, 4)
NEON_VOP1(abs_s16, neon_s16, 2)
#undef NEON_FN

/* Count Leading Sign/Zero Bits.  */
static inline int do_clz8(uint8_t x)
{
+5 −36
Original line number Diff line number Diff line
@@ -9468,11 +9468,7 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
        if (u) {
            tcg_gen_neg_i64(tcg_rd, tcg_rn);
        } else {
            TCGv_i64 tcg_zero = tcg_const_i64(0);
            tcg_gen_neg_i64(tcg_rd, tcg_rn);
            tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
                                tcg_rn, tcg_rd);
            tcg_temp_free_i64(tcg_zero);
            tcg_gen_abs_i64(tcg_rd, tcg_rn);
        }
        break;
    case 0x2f: /* FABS */
@@ -12366,11 +12362,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
        }
        break;
    case 0xb:
        if (u) { /* NEG */
        if (u) { /* ABS, NEG */
            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
            return;
        } else {
            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_abs, size);
        }
        break;
        return;
    }

    if (size == 3) {
@@ -12438,17 +12435,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                        gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
                    }
                    break;
                case 0xb: /* ABS, NEG */
                    if (u) {
                        tcg_gen_neg_i32(tcg_res, tcg_op);
                    } else {
                        TCGv_i32 tcg_zero = tcg_const_i32(0);
                        tcg_gen_neg_i32(tcg_res, tcg_op);
                        tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
                                            tcg_zero, tcg_op, tcg_res);
                        tcg_temp_free_i32(tcg_zero);
                    }
                    break;
                case 0x2f: /* FABS */
                    gen_helper_vfp_abss(tcg_res, tcg_op);
                    break;
@@ -12561,23 +12547,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
                    tcg_temp_free_i32(tcg_zero);
                    break;
                }
                case 0xb: /* ABS, NEG */
                    if (u) {
                        TCGv_i32 tcg_zero = tcg_const_i32(0);
                        if (size) {
                            gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
                        } else {
                            gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
                        }
                        tcg_temp_free_i32(tcg_zero);
                    } else {
                        if (size) {
                            gen_helper_neon_abs_s16(tcg_res, tcg_op);
                        } else {
                            gen_helper_neon_abs_s8(tcg_res, tcg_op);
                        }
                    }
                    break;
                case 0x4: /* CLS, CLZ */
                    if (u) {
                        if (size == 0) {
Loading