Commit aa318f5b authored by Peter Maydell's avatar Peter Maydell
Browse files

target/arm: Convert Neon 2-reg-scalar VQRDMLAH, VQRDMLSH to decodetree



Convert the VQRDMLAH and VQRDMLSH insns in the 2-reg-scalar
group to decodetree.

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Reviewed-by: default avatarRichard Henderson <richard.henderson@linaro.org>
parent b2fc7be9
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -488,5 +488,8 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm

    VQDMULH_2sc  1111 001 . 1 . .. .... .... 1100 . 1 . 0 .... @2scalar
    VQRDMULH_2sc 1111 001 . 1 . .. .... .... 1101 . 1 . 0 .... @2scalar

    VQRDMLAH_2sc 1111 001 . 1 . .. .... .... 1110 . 1 . 0 .... @2scalar
    VQRDMLSH_2sc 1111 001 . 1 . .. .... .... 1111 . 1 . 0 .... @2scalar
  ]
}
+74 −0
Original line number Diff line number Diff line
@@ -2575,3 +2575,77 @@ static bool trans_VQRDMULH_2sc(DisasContext *s, arg_2scalar *a)

    return do_2scalar(s, a, opfn[a->size], NULL);
}

static bool do_vqrdmlah_2sc(DisasContext *s, arg_2scalar *a,
                            NeonGenThreeOpEnvFn *opfn)
{
    /*
     * VQRDMLAH/VQRDMLSH: this is like do_2scalar, but the opfn
     * performs a kind of fused op-then-accumulate using a helper
     * function that takes all of rd, rn and the scalar at once.
     */
    TCGv_i32 scalar;
    int pass;

    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }

    if (!dc_isar_feature(aa32_rdm, s)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vn | a->vm) & 0x10)) {
        return false;
    }

    if (!opfn) {
        /* Bad size (including size == 3, which is a different insn group) */
        return false;
    }

    if (a->q && ((a->vd | a->vn) & 1)) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    scalar = neon_get_scalar(a->size, a->vm);

    for (pass = 0; pass < (a->q ? 4 : 2); pass++) {
        TCGv_i32 rn = neon_load_reg(a->vn, pass);
        TCGv_i32 rd = neon_load_reg(a->vd, pass);
        opfn(rd, cpu_env, rn, scalar, rd);
        tcg_temp_free_i32(rn);
        neon_store_reg(a->vd, pass, rd);
    }
    tcg_temp_free_i32(scalar);

    return true;
}

static bool trans_VQRDMLAH_2sc(DisasContext *s, arg_2scalar *a)
{
    static NeonGenThreeOpEnvFn *opfn[] = {
        NULL,
        gen_helper_neon_qrdmlah_s16,
        gen_helper_neon_qrdmlah_s32,
        NULL,
    };
    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
}

static bool trans_VQRDMLSH_2sc(DisasContext *s, arg_2scalar *a)
{
    static NeonGenThreeOpEnvFn *opfn[] = {
        NULL,
        gen_helper_neon_qrdmlsh_s16,
        gen_helper_neon_qrdmlsh_s32,
        NULL,
    };
    return do_vqrdmlah_2sc(s, a, opfn[a->size]);
}
+2 −36
Original line number Diff line number Diff line
@@ -5179,6 +5179,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                case 9: /* Floating point VMUL scalar */
                case 12: /* VQDMULH scalar */
                case 13: /* VQRDMULH scalar */
                case 14: /* VQRDMLAH scalar */
                case 15: /* VQRDMLSH scalar */
                    return 1; /* handled by decodetree */

                case 3: /* VQDMLAL scalar */
@@ -5238,42 +5240,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                        neon_store_reg64(cpu_V0, rd + pass);
                    }
                    break;
                case 14: /* VQRDMLAH scalar */
                case 15: /* VQRDMLSH scalar */
                    {
                        NeonGenThreeOpEnvFn *fn;

                        if (!dc_isar_feature(aa32_rdm, s)) {
                            return 1;
                        }
                        if (u && ((rd | rn) & 1)) {
                            return 1;
                        }
                        if (op == 14) {
                            if (size == 1) {
                                fn = gen_helper_neon_qrdmlah_s16;
                            } else {
                                fn = gen_helper_neon_qrdmlah_s32;
                            }
                        } else {
                            if (size == 1) {
                                fn = gen_helper_neon_qrdmlsh_s16;
                            } else {
                                fn = gen_helper_neon_qrdmlsh_s32;
                            }
                        }

                        tmp2 = neon_get_scalar(size, rm);
                        for (pass = 0; pass < (u ? 4 : 2); pass++) {
                            tmp = neon_load_reg(rn, pass);
                            tmp3 = neon_load_reg(rd, pass);
                            fn(tmp, cpu_env, tmp, tmp2, tmp3);
                            tcg_temp_free_i32(tmp3);
                            neon_store_reg(rd, pass, tmp);
                        }
                        tcg_temp_free_i32(tmp2);
                    }
                    break;
                default:
                    g_assert_not_reached();
                }