Commit 712182d3 authored by Peter Maydell's avatar Peter Maydell
Browse files

target/arm: Convert Neon narrowing shifts with op==8 to decodetree



Convert the Neon narrowing shifts where op==8 to decodetree:
 * VSHRN
 * VRSHRN
 * VQSHRUN
 * VQRSHRUN

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
Reviewed-by: default avatarRichard Henderson <richard.henderson@linaro.org>
Message-id: 20200522145520.6778-6-peter.maydell@linaro.org
parent 37bfce81
Loading
Loading
Loading
Loading
+27 −0
Original line number Diff line number Diff line
@@ -232,6 +232,17 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
@2reg_shl_b      .... ... . . . 001 shift:3  .... .... 0 q:1 . . .... \
                 &2reg_shift vm=%vm_dp vd=%vd_dp size=0

# Narrowing right shifts: here the Q bit is part of the opcode decode
@2reg_shrn_d     .... ... . . . 1 ..... .... .... 0 . . . .... \
                 &2reg_shift vm=%vm_dp vd=%vd_dp size=3 q=0 \
                 shift=%neon_rshift_i5
@2reg_shrn_s     .... ... . . . 01 .... .... .... 0 . . . .... \
                 &2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0 \
                 shift=%neon_rshift_i4
@2reg_shrn_h     .... ... . . . 001 ... .... .... 0 . . . .... \
                 &2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \
                 shift=%neon_rshift_i3

VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
VSHR_S_2sh       1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
@@ -301,3 +312,19 @@ VQSHL_U_64_2sh 1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_d
VQSHL_U_2sh      1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_s
VQSHL_U_2sh      1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_h
VQSHL_U_2sh      1111 001 1 1 . ...... .... 0111 . . . 1 .... @2reg_shl_b

VSHRN_64_2sh     1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
VSHRN_32_2sh     1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
VSHRN_16_2sh     1111 001 0 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h

VRSHRN_64_2sh    1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
VRSHRN_32_2sh    1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
VRSHRN_16_2sh    1111 001 0 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h

VQSHRUN_64_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_d
VQSHRUN_32_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_s
VQSHRUN_16_2sh   1111 001 1 1 . ...... .... 1000 . 0 . 1 .... @2reg_shrn_h

VQRSHRUN_64_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_d
VQRSHRUN_32_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_s
VQRSHRUN_16_2sh  1111 001 1 1 . ...... .... 1000 . 1 . 1 .... @2reg_shrn_h
+167 −0
Original line number Diff line number Diff line
@@ -1396,3 +1396,170 @@ static bool do_2shift_env_32(DisasContext *s, arg_2reg_shift *a,
DO_2SHIFT_ENV(VQSHLU, qshlu_s)
DO_2SHIFT_ENV(VQSHL_U, qshl_u)
DO_2SHIFT_ENV(VQSHL_S, qshl_s)

static bool do_2shift_narrow_64(DisasContext *s, arg_2reg_shift *a,
                                NeonGenTwo64OpFn *shiftfn,
                                NeonGenNarrowEnvFn *narrowfn)
{
    /* 2-reg-and-shift narrowing-shift operations, size == 3 case */
    TCGv_i64 constimm, rm1, rm2;
    TCGv_i32 rd;

    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vm) & 0x10)) {
        return false;
    }

    if (a->vm & 1) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    /*
     * This is always a right shift, and the shiftfn is always a
     * left-shift helper, which thus needs the negated shift count.
     */
    constimm = tcg_const_i64(-a->shift);
    rm1 = tcg_temp_new_i64();
    rm2 = tcg_temp_new_i64();

    /* Load both inputs first to avoid potential overwrite if rm == rd */
    neon_load_reg64(rm1, a->vm);
    neon_load_reg64(rm2, a->vm + 1);

    shiftfn(rm1, rm1, constimm);
    rd = tcg_temp_new_i32();
    narrowfn(rd, cpu_env, rm1);
    neon_store_reg(a->vd, 0, rd);

    shiftfn(rm2, rm2, constimm);
    rd = tcg_temp_new_i32();
    narrowfn(rd, cpu_env, rm2);
    neon_store_reg(a->vd, 1, rd);

    tcg_temp_free_i64(rm1);
    tcg_temp_free_i64(rm2);
    tcg_temp_free_i64(constimm);

    return true;
}

static bool do_2shift_narrow_32(DisasContext *s, arg_2reg_shift *a,
                                NeonGenTwoOpFn *shiftfn,
                                NeonGenNarrowEnvFn *narrowfn)
{
    /* 2-reg-and-shift narrowing-shift operations, size < 3 case */
    TCGv_i32 constimm, rm1, rm2, rm3, rm4;
    TCGv_i64 rtmp;
    uint32_t imm;

    if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
        return false;
    }

    /* UNDEF accesses to D16-D31 if they don't exist. */
    if (!dc_isar_feature(aa32_simd_r32, s) &&
        ((a->vd | a->vm) & 0x10)) {
        return false;
    }

    if (a->vm & 1) {
        return false;
    }

    if (!vfp_access_check(s)) {
        return true;
    }

    /*
     * This is always a right shift, and the shiftfn is always a
     * left-shift helper, which thus needs the negated shift count
     * duplicated into each lane of the immediate value.
     */
    if (a->size == 1) {
        imm = (uint16_t)(-a->shift);
        imm |= imm << 16;
    } else {
        /* size == 2 */
        imm = -a->shift;
    }
    constimm = tcg_const_i32(imm);

    /* Load all inputs first to avoid potential overwrite */
    rm1 = neon_load_reg(a->vm, 0);
    rm2 = neon_load_reg(a->vm, 1);
    rm3 = neon_load_reg(a->vm + 1, 0);
    rm4 = neon_load_reg(a->vm + 1, 1);
    rtmp = tcg_temp_new_i64();

    shiftfn(rm1, rm1, constimm);
    shiftfn(rm2, rm2, constimm);

    tcg_gen_concat_i32_i64(rtmp, rm1, rm2);
    tcg_temp_free_i32(rm2);

    narrowfn(rm1, cpu_env, rtmp);
    neon_store_reg(a->vd, 0, rm1);

    shiftfn(rm3, rm3, constimm);
    shiftfn(rm4, rm4, constimm);
    tcg_temp_free_i32(constimm);

    tcg_gen_concat_i32_i64(rtmp, rm3, rm4);
    tcg_temp_free_i32(rm4);

    narrowfn(rm3, cpu_env, rtmp);
    tcg_temp_free_i64(rtmp);
    neon_store_reg(a->vd, 1, rm3);
    return true;
}

#define DO_2SN_64(INSN, FUNC, NARROWFUNC)                               \
    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
    {                                                                   \
        return do_2shift_narrow_64(s, a, FUNC, NARROWFUNC);             \
    }
#define DO_2SN_32(INSN, FUNC, NARROWFUNC)                               \
    static bool trans_##INSN##_2sh(DisasContext *s, arg_2reg_shift *a)  \
    {                                                                   \
        return do_2shift_narrow_32(s, a, FUNC, NARROWFUNC);             \
    }

static void gen_neon_narrow_u32(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
{
    tcg_gen_extrl_i64_i32(dest, src);
}

static void gen_neon_narrow_u16(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
{
    gen_helper_neon_narrow_u16(dest, src);
}

static void gen_neon_narrow_u8(TCGv_i32 dest, TCGv_ptr env, TCGv_i64 src)
{
    gen_helper_neon_narrow_u8(dest, src);
}

DO_2SN_64(VSHRN_64, gen_ushl_i64, gen_neon_narrow_u32)
DO_2SN_32(VSHRN_32, gen_ushl_i32, gen_neon_narrow_u16)
DO_2SN_32(VSHRN_16, gen_helper_neon_shl_u16, gen_neon_narrow_u8)

DO_2SN_64(VRSHRN_64, gen_helper_neon_rshl_u64, gen_neon_narrow_u32)
DO_2SN_32(VRSHRN_32, gen_helper_neon_rshl_u32, gen_neon_narrow_u16)
DO_2SN_32(VRSHRN_16, gen_helper_neon_rshl_u16, gen_neon_narrow_u8)

DO_2SN_64(VQSHRUN_64, gen_sshl_i64, gen_helper_neon_unarrow_sat32)
DO_2SN_32(VQSHRUN_32, gen_sshl_i32, gen_helper_neon_unarrow_sat16)
DO_2SN_32(VQSHRUN_16, gen_helper_neon_shl_s16, gen_helper_neon_unarrow_sat8)

DO_2SN_64(VQRSHRUN_64, gen_helper_neon_rshl_s64, gen_helper_neon_unarrow_sat32)
DO_2SN_32(VQRSHRUN_32, gen_helper_neon_rshl_s32, gen_helper_neon_unarrow_sat16)
DO_2SN_32(VQRSHRUN_16, gen_helper_neon_rshl_s16, gen_helper_neon_unarrow_sat8)
+1 −0
Original line number Diff line number Diff line
@@ -5280,6 +5280,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
            case 5: /* VSHL, VSLI */
            case 6: /* VQSHLU */
            case 7: /* VQSHL */
            case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
                return 1; /* handled by decodetree */
            default:
                break;