Commit e95485f8 authored by Peter Maydell's avatar Peter Maydell
Browse files

target/arm: Convert NEON VFMA, VFMS 3-reg-same insns to decodetree



Convert the Neon floating point VFMA and VFMS insn to decodetree.
These are the last insns in the 3-reg-same group so we can
remove all the support/loop code from the old decoder.

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Reviewed-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Message-id: 20200512163904.10918-18-peter.maydell@linaro.org
parent d5fdf9e9
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -174,6 +174,9 @@ SHA256H2_3s 1111 001 1 0 . 01 .... .... 1100 . 1 . 0 .... \
SHA256SU1_3s     1111 001 1 0 . 10 .... .... 1100 . 1 . 0 .... \
                 vm=%vm_dp vn=%vn_dp vd=%vd_dp

VFMA_fp_3s       1111 001 0 0 . 0 . .... .... 1100 ... 1 .... @3same_fp
VFMS_fp_3s       1111 001 0 0 . 1 . .... .... 1100 ... 1 .... @3same_fp

VQRDMLSH_3s      1111 001 1 0 . .. .... .... 1100 ... 1 .... @3same

VADD_fp_3s       1111 001 0 0 . 0 . .... .... 1101 ... 0 .... @3same_fp
+41 −0
Original line number Diff line number Diff line
@@ -1207,6 +1207,47 @@ static bool trans_VRSQRTS_fp_3s(DisasContext *s, arg_3same *a)
    return do_3same(s, a, gen_VRSQRTS_fp_3s);
}

static void gen_VFMA_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
                            TCGv_ptr fpstatus)
{
    gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
}

static bool trans_VFMA_fp_3s(DisasContext *s, arg_3same *a)
{
    if (!dc_isar_feature(aa32_simdfmac, s)) {
        return false;
    }

    if (a->size != 0) {
        /* TODO fp16 support */
        return false;
    }

    return do_3same_fp(s, a, gen_VFMA_fp_3s, true);
}

static void gen_VFMS_fp_3s(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm,
                            TCGv_ptr fpstatus)
{
    gen_helper_vfp_negs(vn, vn);
    gen_helper_vfp_muladds(vd, vn, vm, vd, fpstatus);
}

static bool trans_VFMS_fp_3s(DisasContext *s, arg_3same *a)
{
    if (!dc_isar_feature(aa32_simdfmac, s)) {
        return false;
    }

    if (a->size != 0) {
        /* TODO fp16 support */
        return false;
    }

    return do_3same_fp(s, a, gen_VFMS_fp_3s, true);
}

static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, VFPGen3OpSPFn *fn)
{
    /* FP operations handled pairwise 32 bits at a time */
+2 −174
Original line number Diff line number Diff line
@@ -3391,78 +3391,6 @@ static void gen_neon_narrow_op(int op, int u, int size,
    }
}

/* Symbolic constants for op fields for Neon 3-register same-length.
 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
 * table A7-9.
 */
#define NEON_3R_VHADD 0
#define NEON_3R_VQADD 1
#define NEON_3R_VRHADD 2
#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
#define NEON_3R_VHSUB 4
#define NEON_3R_VQSUB 5
#define NEON_3R_VCGT 6
#define NEON_3R_VCGE 7
#define NEON_3R_VSHL 8
#define NEON_3R_VQSHL 9
#define NEON_3R_VRSHL 10
#define NEON_3R_VQRSHL 11
#define NEON_3R_VMAX 12
#define NEON_3R_VMIN 13
#define NEON_3R_VABD 14
#define NEON_3R_VABA 15
#define NEON_3R_VADD_VSUB 16
#define NEON_3R_VTST_VCEQ 17
#define NEON_3R_VML 18 /* VMLA, VMLS */
#define NEON_3R_VMUL 19
#define NEON_3R_VPMAX 20
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD_VQRDMLAH 23
#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */

static const uint8_t neon_3r_sizes[] = {
    [NEON_3R_VHADD] = 0x7,
    [NEON_3R_VQADD] = 0xf,
    [NEON_3R_VRHADD] = 0x7,
    [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
    [NEON_3R_VHSUB] = 0x7,
    [NEON_3R_VQSUB] = 0xf,
    [NEON_3R_VCGT] = 0x7,
    [NEON_3R_VCGE] = 0x7,
    [NEON_3R_VSHL] = 0xf,
    [NEON_3R_VQSHL] = 0xf,
    [NEON_3R_VRSHL] = 0xf,
    [NEON_3R_VQRSHL] = 0xf,
    [NEON_3R_VMAX] = 0x7,
    [NEON_3R_VMIN] = 0x7,
    [NEON_3R_VABD] = 0x7,
    [NEON_3R_VABA] = 0x7,
    [NEON_3R_VADD_VSUB] = 0xf,
    [NEON_3R_VTST_VCEQ] = 0x7,
    [NEON_3R_VML] = 0x7,
    [NEON_3R_VMUL] = 0x7,
    [NEON_3R_VPMAX] = 0x7,
    [NEON_3R_VPMIN] = 0x7,
    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
    [NEON_3R_VPADD_VQRDMLAH] = 0x7,
    [NEON_3R_SHA] = 0xf, /* size field encodes op type */
    [NEON_3R_VFM_VQRDMLSH] = 0x7, /* For VFM, size bit 1 encodes op */
    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
    [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
    [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
    [NEON_3R_FLOAT_MISC] = 0x5, /* size bit 1 encodes op */
};

/* Symbolic constants for op fields for Neon 2-register miscellaneous.
 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
 * table A7-13.
@@ -5383,108 +5311,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
    rm_ofs = neon_reg_offset(rm, 0);

    if ((insn & (1 << 23)) == 0) {
        /* Three register same length.  */
        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
        /* Catch invalid op and bad size combinations: UNDEF */
        if ((neon_3r_sizes[op] & (1 << size)) == 0) {
            return 1;
        }
        /* All insns of this form UNDEF for either this condition or the
         * superset of cases "Q==1"; we catch the latter later.
         */
        if (q && ((rd | rn | rm) & 1)) {
            return 1;
        }
        switch (op) {
        case NEON_3R_VFM_VQRDMLSH:
            if (!u) {
                /* VFM, VFMS */
                if (size == 1) {
                    return 1;
                }
                break;
            }
            /* VQRDMLSH : handled by decodetree */
            return 1;

        case NEON_3R_VADD_VSUB:
        case NEON_3R_LOGIC:
        case NEON_3R_VMAX:
        case NEON_3R_VMIN:
        case NEON_3R_VTST_VCEQ:
        case NEON_3R_VCGT:
        case NEON_3R_VCGE:
        case NEON_3R_VQADD:
        case NEON_3R_VQSUB:
        case NEON_3R_VMUL:
        case NEON_3R_VML:
        case NEON_3R_VSHL:
        case NEON_3R_SHA:
        case NEON_3R_VHADD:
        case NEON_3R_VRHADD:
        case NEON_3R_VHSUB:
        case NEON_3R_VABD:
        case NEON_3R_VABA:
        case NEON_3R_VQSHL:
        case NEON_3R_VRSHL:
        case NEON_3R_VQRSHL:
        case NEON_3R_VPMAX:
        case NEON_3R_VPMIN:
        case NEON_3R_VPADD_VQRDMLAH:
        case NEON_3R_VQDMULH_VQRDMULH:
        case NEON_3R_FLOAT_ARITH:
        case NEON_3R_FLOAT_MULTIPLY:
        case NEON_3R_FLOAT_CMP:
        case NEON_3R_FLOAT_ACMP:
        case NEON_3R_FLOAT_MINMAX:
        case NEON_3R_FLOAT_MISC:
            /* Already handled by decodetree */
        /* Three register same length: handled by decodetree */
        return 1;
        }

        if (size == 3) {
            /* 64-bit element instructions: handled by decodetree */
            return 1;
        }
        switch (op) {
        case NEON_3R_VFM_VQRDMLSH:
            if (!dc_isar_feature(aa32_simdfmac, s)) {
                return 1;
            }
            break;
        default:
            break;
        }

        for (pass = 0; pass < (q ? 4 : 2); pass++) {

        /* Elementwise.  */
        tmp = neon_load_reg(rn, pass);
        tmp2 = neon_load_reg(rm, pass);
        switch (op) {
        case NEON_3R_VFM_VQRDMLSH:
        {
            /* VFMA, VFMS: fused multiply-add */
            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
            TCGv_i32 tmp3 = neon_load_reg(rd, pass);
            if (size) {
                /* VFMS */
                gen_helper_vfp_negs(tmp, tmp);
            }
            gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
            tcg_temp_free_i32(tmp3);
            tcg_temp_free_ptr(fpstatus);
            break;
        }
        default:
            abort();
        }
        tcg_temp_free_i32(tmp2);

        neon_store_reg(rd, pass, tmp);

        } /* for pass */
        /* End of 3 register same size operations.  */
    } else if (insn & (1 << 4)) {
        if ((insn & 0x00380080) != 0) {
            /* Two registers and shift.  */