Commit caf1cefc authored by Richard Henderson's avatar Richard Henderson Committed by Peter Maydell
Browse files

target/arm: Implement SVE Integer Compare - Scalars Group



Signed-off-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Message-id: 20180613015641.5667-16-richard.henderson@linaro.org
Reviewed-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parent 9ee3a611
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -678,3 +678,5 @@ DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)

DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
+8 −0
Original line number Diff line number Diff line
@@ -606,6 +606,14 @@ SINCDECP_r_64 00100101 .. 1010 d:1 u:1 10001 10 .... ..... @incdec_pred
# SVE saturating inc/dec vector by predicate count
SINCDECP_z      00100101 .. 1010 d:1 u:1 10000 00 .... .....    @incdec2_pred

### SVE Integer Compare - Scalars Group

# SVE conditionally terminate scalars
CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000

# SVE integer compare scalar count and limit
WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4

### SVE Memory - 32-bit Gather and Unsized Contiguous Group

# SVE load predicate register
+31 −0
Original line number Diff line number Diff line
@@ -2738,3 +2738,34 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
    }
    return sum;
}

uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
{
    uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
    intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
    uint64_t esz_mask = pred_esz_masks[esz];
    ARMPredicateReg *d = vd;
    uint32_t flags;
    intptr_t i;

    /* Begin with a zero predicate register.  */
    flags = do_zero(d, oprsz);
    if (count == 0) {
        return flags;
    }

    /* Scale from predicate element count to bits.  */
    count <<= esz;
    /* Bound to the bits in the predicate.  */
    count = MIN(count, oprsz * 8);

    /* Set all of the requested bits.  */
    for (i = 0; i < count / 64; ++i) {
        d->p[i] = esz_mask;
    }
    if (count & 63) {
        d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
    }

    return predtest_ones(d, oprsz, esz_mask);
}
+99 −0
Original line number Diff line number Diff line
@@ -3092,6 +3092,105 @@ static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
    return true;
}

/*
 *** SVE Integer Compare Scalars Group
 */

static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
{
    if (!sve_access_check(s)) {
        return true;
    }

    TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
    TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
    TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
    TCGv_i64 cmp = tcg_temp_new_i64();

    tcg_gen_setcond_i64(cond, cmp, rn, rm);
    tcg_gen_extrl_i64_i32(cpu_NF, cmp);
    tcg_temp_free_i64(cmp);

    /* VF = !NF & !CF.  */
    tcg_gen_xori_i32(cpu_VF, cpu_NF, 1);
    tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);

    /* Both NF and VF actually look at bit 31.  */
    tcg_gen_neg_i32(cpu_NF, cpu_NF);
    tcg_gen_neg_i32(cpu_VF, cpu_VF);
    return true;
}

static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
{
    if (!sve_access_check(s)) {
        return true;
    }

    TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
    TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
    TCGv_i64 t0 = tcg_temp_new_i64();
    TCGv_i64 t1 = tcg_temp_new_i64();
    TCGv_i32 t2, t3;
    TCGv_ptr ptr;
    unsigned desc, vsz = vec_full_reg_size(s);
    TCGCond cond;

    if (!a->sf) {
        if (a->u) {
            tcg_gen_ext32u_i64(op0, op0);
            tcg_gen_ext32u_i64(op1, op1);
        } else {
            tcg_gen_ext32s_i64(op0, op0);
            tcg_gen_ext32s_i64(op1, op1);
        }
    }

    /* For the helper, compress the different conditions into a computation
     * of how many iterations for which the condition is true.
     *
     * This is slightly complicated by 0 <= UINT64_MAX, which is nominally
     * 2**64 iterations, overflowing to 0.  Of course, predicate registers
     * aren't that large, so any value >= predicate size is sufficient.
     */
    tcg_gen_sub_i64(t0, op1, op0);

    /* t0 = MIN(op1 - op0, vsz).  */
    tcg_gen_movi_i64(t1, vsz);
    tcg_gen_umin_i64(t0, t0, t1);
    if (a->eq) {
        /* Equality means one more iteration.  */
        tcg_gen_addi_i64(t0, t0, 1);
    }

    /* t0 = (condition true ? t0 : 0).  */
    cond = (a->u
            ? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
            : (a->eq ? TCG_COND_LE : TCG_COND_LT));
    tcg_gen_movi_i64(t1, 0);
    tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);

    t2 = tcg_temp_new_i32();
    tcg_gen_extrl_i64_i32(t2, t0);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    desc = (vsz / 8) - 2;
    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
    t3 = tcg_const_i32(desc);

    ptr = tcg_temp_new_ptr();
    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));

    gen_helper_sve_while(t2, ptr, t2, t3);
    do_pred_flags(t2);

    tcg_temp_free_ptr(ptr);
    tcg_temp_free_i32(t2);
    tcg_temp_free_i32(t3);
    return true;
}

/*
 *** SVE Memory - 32-bit Gather and Unsized Contiguous Group
 */