Loading tcg/README +9 −0 Original line number Diff line number Diff line Loading @@ -554,6 +554,15 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. Similarly, v0 = -v1. * ssadd_vec: * sssub_vec: * usadd_vec: * ussub_vec: Signed and unsigned saturating addition and subtraction. If the true result is not representable within the element type, the element is set to the minimum or maximum value for the type. * and_vec v0, v1, v2 * or_vec v0, v1, v2 * xor_vec v0, v1, v2 Loading tcg/aarch64/tcg-target.h +1 −0 Original line number Diff line number Diff line Loading @@ -135,6 +135,7 @@ typedef enum { #define TCG_TARGET_HAS_shv_vec 0 #define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 0 #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 Loading tcg/i386/tcg-target.h +1 −0 Original line number Diff line number Diff line Loading @@ -185,6 +185,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_shv_vec 0 #define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ Loading tcg/tcg-op-gvec.c +64 −20 Original line number Diff line number Diff line Loading @@ -1678,10 +1678,22 @@ void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_ssadd8, .vece = MO_8 }, { .fno = gen_helper_gvec_ssadd16, .vece = MO_16 }, { .fno = gen_helper_gvec_ssadd32, .vece = MO_32 }, { .fno = gen_helper_gvec_ssadd64, .vece = MO_64 } { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd8, .opc = INDEX_op_ssadd_vec, .vece = MO_8 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd16, .opc = INDEX_op_ssadd_vec, .vece = MO_16 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd32, .opc = INDEX_op_ssadd_vec, .vece = MO_32 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd64, .opc = INDEX_op_ssadd_vec, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); Loading @@ -1691,16 +1703,28 @@ void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_sssub8, .vece = MO_8 }, { .fno = gen_helper_gvec_sssub16, .vece = MO_16 }, { .fno = gen_helper_gvec_sssub32, .vece = MO_32 }, { .fno = gen_helper_gvec_sssub64, .vece = MO_64 } { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub8, .opc = INDEX_op_sssub_vec, .vece = MO_8 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub16, .opc = INDEX_op_sssub_vec, .vece = MO_16 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub32, .opc = INDEX_op_sssub_vec, .vece = MO_32 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub64, .opc = INDEX_op_sssub_vec, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } static void tcg_gen_vec_usadd32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) static void tcg_gen_usadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { TCGv_i32 max = tcg_const_i32(-1); tcg_gen_add_i32(d, a, b); Loading @@ -1708,7 +1732,7 @@ static void tcg_gen_vec_usadd32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) tcg_temp_free_i32(max); } static void tcg_gen_vec_usadd32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) static void tcg_gen_usadd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 max = tcg_const_i64(-1); tcg_gen_add_i64(d, a, b); Loading @@ -1720,20 +1744,30 @@ void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_usadd8, .vece = MO_8 }, { .fno = gen_helper_gvec_usadd16, .vece = MO_16 }, { .fni4 = tcg_gen_vec_usadd32_i32, { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd8, .opc = INDEX_op_usadd_vec, .vece = MO_8 }, { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd16, .opc = INDEX_op_usadd_vec, .vece = MO_16 }, { .fni4 = tcg_gen_usadd_i32, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd32, .opc = INDEX_op_usadd_vec, .vece = MO_32 }, { .fni8 = tcg_gen_vec_usadd32_i64, { .fni8 = tcg_gen_usadd_i64, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd64, .opc = INDEX_op_usadd_vec, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } static void tcg_gen_vec_ussub32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) static void tcg_gen_ussub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { TCGv_i32 min = tcg_const_i32(0); tcg_gen_sub_i32(d, a, b); Loading @@ -1741,7 +1775,7 @@ static void tcg_gen_vec_ussub32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) tcg_temp_free_i32(min); } static void tcg_gen_vec_ussub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) static void tcg_gen_ussub_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 min = tcg_const_i64(0); tcg_gen_sub_i64(d, a, b); Loading @@ -1753,13 +1787,23 @@ void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_ussub8, .vece = MO_8 }, { .fno = gen_helper_gvec_ussub16, .vece = MO_16 }, { .fni4 = tcg_gen_vec_ussub32_i32, { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub8, .opc = INDEX_op_ussub_vec, .vece = MO_8 }, { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub16, .opc = INDEX_op_ussub_vec, .vece = MO_16 }, { .fni4 = tcg_gen_ussub_i32, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub32, .opc = INDEX_op_ussub_vec, .vece = MO_32 }, { .fni8 = tcg_gen_vec_ussub32_i64, { .fni8 = tcg_gen_ussub_i64, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub64, .opc = INDEX_op_ussub_vec, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); Loading tcg/tcg-op-vec.c +30 −4 Original line number Diff line number Diff line Loading @@ -386,7 +386,8 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, } } void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b, TCGOpcode opc) { TCGTemp *rt = tcgv_vec_temp(r); TCGTemp *at = tcgv_vec_temp(a); Loading @@ -399,11 +400,36 @@ void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) tcg_debug_assert(at->base_type >= type); tcg_debug_assert(bt->base_type >= type); can = tcg_can_emit_vec_op(INDEX_op_mul_vec, type, vece); can = tcg_can_emit_vec_op(opc, type, vece); if (can > 0) { vec_gen_3(INDEX_op_mul_vec, type, vece, ri, ai, bi); vec_gen_3(opc, type, vece, ri, ai, bi); } else { tcg_debug_assert(can < 0); tcg_expand_vec_op(INDEX_op_mul_vec, type, vece, ri, ai, bi); tcg_expand_vec_op(opc, type, vece, ri, ai, bi); } } void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_mul_vec); } void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_ssadd_vec); } void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_usadd_vec); } void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_sssub_vec); } void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_ussub_vec); } Loading
tcg/README +9 −0 Original line number Diff line number Diff line Loading @@ -554,6 +554,15 @@ E.g. VECL=1 -> 64 << 1 -> v128, and VECE=2 -> 1 << 2 -> i32. Similarly, v0 = -v1. * ssadd_vec: * sssub_vec: * usadd_vec: * ussub_vec: Signed and unsigned saturating addition and subtraction. If the true result is not representable within the element type, the element is set to the minimum or maximum value for the type. * and_vec v0, v1, v2 * or_vec v0, v1, v2 * xor_vec v0, v1, v2 Loading
tcg/aarch64/tcg-target.h +1 −0 Original line number Diff line number Diff line Loading @@ -135,6 +135,7 @@ typedef enum { #define TCG_TARGET_HAS_shv_vec 0 #define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 0 #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 Loading
tcg/i386/tcg-target.h +1 −0 Original line number Diff line number Diff line Loading @@ -185,6 +185,7 @@ extern bool have_avx2; #define TCG_TARGET_HAS_shv_vec 0 #define TCG_TARGET_HAS_cmp_vec 1 #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) \ (((ofs) == 0 && (len) == 8) || ((ofs) == 8 && (len) == 8) || \ Loading
tcg/tcg-op-gvec.c +64 −20 Original line number Diff line number Diff line Loading @@ -1678,10 +1678,22 @@ void tcg_gen_gvec_ssadd(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_ssadd8, .vece = MO_8 }, { .fno = gen_helper_gvec_ssadd16, .vece = MO_16 }, { .fno = gen_helper_gvec_ssadd32, .vece = MO_32 }, { .fno = gen_helper_gvec_ssadd64, .vece = MO_64 } { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd8, .opc = INDEX_op_ssadd_vec, .vece = MO_8 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd16, .opc = INDEX_op_ssadd_vec, .vece = MO_16 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd32, .opc = INDEX_op_ssadd_vec, .vece = MO_32 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd64, .opc = INDEX_op_ssadd_vec, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); Loading @@ -1691,16 +1703,28 @@ void tcg_gen_gvec_sssub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_sssub8, .vece = MO_8 }, { .fno = gen_helper_gvec_sssub16, .vece = MO_16 }, { .fno = gen_helper_gvec_sssub32, .vece = MO_32 }, { .fno = gen_helper_gvec_sssub64, .vece = MO_64 } { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub8, .opc = INDEX_op_sssub_vec, .vece = MO_8 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub16, .opc = INDEX_op_sssub_vec, .vece = MO_16 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub32, .opc = INDEX_op_sssub_vec, .vece = MO_32 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub64, .opc = INDEX_op_sssub_vec, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } static void tcg_gen_vec_usadd32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) static void tcg_gen_usadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { TCGv_i32 max = tcg_const_i32(-1); tcg_gen_add_i32(d, a, b); Loading @@ -1708,7 +1732,7 @@ static void tcg_gen_vec_usadd32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) tcg_temp_free_i32(max); } static void tcg_gen_vec_usadd32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) static void tcg_gen_usadd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 max = tcg_const_i64(-1); tcg_gen_add_i64(d, a, b); Loading @@ -1720,20 +1744,30 @@ void tcg_gen_gvec_usadd(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_usadd8, .vece = MO_8 }, { .fno = gen_helper_gvec_usadd16, .vece = MO_16 }, { .fni4 = tcg_gen_vec_usadd32_i32, { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd8, .opc = INDEX_op_usadd_vec, .vece = MO_8 }, { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd16, .opc = INDEX_op_usadd_vec, .vece = MO_16 }, { .fni4 = tcg_gen_usadd_i32, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd32, .opc = INDEX_op_usadd_vec, .vece = MO_32 }, { .fni8 = tcg_gen_vec_usadd32_i64, { .fni8 = tcg_gen_usadd_i64, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd64, .opc = INDEX_op_usadd_vec, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } static void tcg_gen_vec_ussub32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) static void tcg_gen_ussub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) { TCGv_i32 min = tcg_const_i32(0); tcg_gen_sub_i32(d, a, b); Loading @@ -1741,7 +1775,7 @@ static void tcg_gen_vec_ussub32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) tcg_temp_free_i32(min); } static void tcg_gen_vec_ussub32_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) static void tcg_gen_ussub_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) { TCGv_i64 min = tcg_const_i64(0); tcg_gen_sub_i64(d, a, b); Loading @@ -1753,13 +1787,23 @@ void tcg_gen_gvec_ussub(unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fno = gen_helper_gvec_ussub8, .vece = MO_8 }, { .fno = gen_helper_gvec_ussub16, .vece = MO_16 }, { .fni4 = tcg_gen_vec_ussub32_i32, { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub8, .opc = INDEX_op_ussub_vec, .vece = MO_8 }, { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub16, .opc = INDEX_op_ussub_vec, .vece = MO_16 }, { .fni4 = tcg_gen_ussub_i32, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub32, .opc = INDEX_op_ussub_vec, .vece = MO_32 }, { .fni8 = tcg_gen_vec_ussub32_i64, { .fni8 = tcg_gen_ussub_i64, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub64, .opc = INDEX_op_ussub_vec, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); Loading
tcg/tcg-op-vec.c +30 −4 Original line number Diff line number Diff line Loading @@ -386,7 +386,8 @@ void tcg_gen_cmp_vec(TCGCond cond, unsigned vece, } } void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) static void do_op3(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b, TCGOpcode opc) { TCGTemp *rt = tcgv_vec_temp(r); TCGTemp *at = tcgv_vec_temp(a); Loading @@ -399,11 +400,36 @@ void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) tcg_debug_assert(at->base_type >= type); tcg_debug_assert(bt->base_type >= type); can = tcg_can_emit_vec_op(INDEX_op_mul_vec, type, vece); can = tcg_can_emit_vec_op(opc, type, vece); if (can > 0) { vec_gen_3(INDEX_op_mul_vec, type, vece, ri, ai, bi); vec_gen_3(opc, type, vece, ri, ai, bi); } else { tcg_debug_assert(can < 0); tcg_expand_vec_op(INDEX_op_mul_vec, type, vece, ri, ai, bi); tcg_expand_vec_op(opc, type, vece, ri, ai, bi); } } void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_mul_vec); } void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_ssadd_vec); } void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_usadd_vec); } void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_sssub_vec); } void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b) { do_op3(vece, r, a, b, INDEX_op_ussub_vec); }