target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD) (09e03735) · Commits · SUMMER2020 / students / proj-2021291

target-arm/helper.h

+14 −6

Original line number	Diff line number	Diff line
		@@ -186,12 +186,20 @@ DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
		DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)

		/* neon_helper.c */
		DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
		DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32)
		DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32)
		DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32)
		DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32)
		DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64)
		DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32)
		DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64)
		DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32)
		DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32)
		DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32)

target-arm/neon_helper.c

+165 −0

Original line number	Diff line number	Diff line
		@@ -236,6 +236,171 @@ uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
		return res;
		}

		/* Unsigned saturating accumulate of signed value
		*
		* Op1/Rn is treated as signed
		* Op2/Rd is treated as unsigned
		*
		* Explicit casting is used to ensure the correct sign extension of
		* inputs. The result is treated as a unsigned value and saturated as such.
		*
		* We use a macro for the 8/16 bit cases which expects signed integers of va,
		* vb, and vr for interim calculation and an unsigned 32 bit result value r.
		*/

		#define USATACC(bits, shift) \
		do { \
		va = sextract32(a, shift, bits); \
		vb = extract32(b, shift, bits); \
		vr = va + vb; \
		if (vr > UINT##bits##_MAX) { \
		SET_QC(); \
		vr = UINT##bits##_MAX; \
		} else if (vr < 0) { \
		SET_QC(); \
		vr = 0; \
		} \
		r = deposit32(r, shift, bits, vr); \
		} while (0)

		uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b)
		{
		int16_t va, vb, vr;
		uint32_t r = 0;

		USATACC(8, 0);
		USATACC(8, 8);
		USATACC(8, 16);
		USATACC(8, 24);
		return r;
		}

		uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b)
		{
		int32_t va, vb, vr;
		uint64_t r = 0;

		USATACC(16, 0);
		USATACC(16, 16);
		return r;
		}

		#undef USATACC

		uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b)
		{
		int64_t va = (int32_t)a;
		int64_t vb = (uint32_t)b;
		int64_t vr = va + vb;
		if (vr > UINT32_MAX) {
		SET_QC();
		vr = UINT32_MAX;
		} else if (vr < 0) {
		SET_QC();
		vr = 0;
		}
		return vr;
		}

		uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b)
		{
		uint64_t res;
		res = a + b;
		/* We only need to look at the pattern of SIGN bits to detect
		* +ve/-ve saturation
		*/
		if (~a & b & ~res & SIGNBIT64) {
		SET_QC();
		res = UINT64_MAX;
		} else if (a & ~b & res & SIGNBIT64) {
		SET_QC();
		res = 0;
		}
		return res;
		}

		/* Signed saturating accumulate of unsigned value
		*
		* Op1/Rn is treated as unsigned
		* Op2/Rd is treated as signed
		*
		* The result is treated as a signed value and saturated as such
		*
		* We use a macro for the 8/16 bit cases which expects signed integers of va,
		* vb, and vr for interim calculation and an unsigned 32 bit result value r.
		*/

		#define SSATACC(bits, shift) \
		do { \
		va = extract32(a, shift, bits); \
		vb = sextract32(b, shift, bits); \
		vr = va + vb; \
		if (vr > INT##bits##_MAX) { \
		SET_QC(); \
		vr = INT##bits##_MAX; \
		} else if (vr < INT##bits##_MIN) { \
		SET_QC(); \
		vr = INT##bits##_MIN; \
		} \
		r = deposit32(r, shift, bits, vr); \
		} while (0)

		uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b)
		{
		int16_t va, vb, vr;
		uint32_t r = 0;

		SSATACC(8, 0);
		SSATACC(8, 8);
		SSATACC(8, 16);
		SSATACC(8, 24);
		return r;
		}

		uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b)
		{
		int32_t va, vb, vr;
		uint32_t r = 0;

		SSATACC(16, 0);
		SSATACC(16, 16);

		return r;
		}

		#undef SSATACC

		uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b)
		{
		int64_t res;
		int64_t op1 = (uint32_t)a;
		int64_t op2 = (int32_t)b;
		res = op1 + op2;
		if (res > INT32_MAX) {
		SET_QC();
		res = INT32_MAX;
		} else if (res < INT32_MIN) {
		SET_QC();
		res = INT32_MIN;
		}
		return res;
		}

		uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b)
		{
		uint64_t res;
		res = a + b;
		/* We only need to look at the pattern of SIGN bits to detect an overflow */
		if (((a & res)
		\| (~b & res)
		\| (a & ~b)) & SIGNBIT64) {
		SET_QC();
		res = INT64_MAX;
		}
		return res;
		}


		#define NEON_USAT(dest, src1, src2, type) do { \
		uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
		if (tmp != (type)tmp) { \

target-arm/translate-a64.c

+105 −4

Original line number	Diff line number	Diff line
		@@ -7321,6 +7321,101 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
		}
		}

		/* Remaining saturating accumulating ops */
		static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
		bool is_q, int size, int rn, int rd)
		{
		bool is_double = (size == 3);

		if (is_double) {
		TCGv_i64 tcg_rn = tcg_temp_new_i64();
		TCGv_i64 tcg_rd = tcg_temp_new_i64();
		int pass;

		for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
		read_vec_element(s, tcg_rn, rn, pass, MO_64);
		read_vec_element(s, tcg_rd, rd, pass, MO_64);

		if (is_u) { /* USQADD */
		gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		} else { /* SUQADD */
		gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		}
		write_vec_element(s, tcg_rd, rd, pass, MO_64);
		}
		if (is_scalar) {
		clear_vec_high(s, rd);
		}

		tcg_temp_free_i64(tcg_rd);
		tcg_temp_free_i64(tcg_rn);
		} else {
		TCGv_i32 tcg_rn = tcg_temp_new_i32();
		TCGv_i32 tcg_rd = tcg_temp_new_i32();
		int pass, maxpasses;

		if (is_scalar) {
		maxpasses = 1;
		} else {
		maxpasses = is_q ? 4 : 2;
		}

		for (pass = 0; pass < maxpasses; pass++) {
		if (is_scalar) {
		read_vec_element_i32(s, tcg_rn, rn, pass, size);
		read_vec_element_i32(s, tcg_rd, rd, pass, size);
		} else {
		read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
		read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
		}

		if (is_u) { /* USQADD */
		switch (size) {
		case 0:
		gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		break;
		case 1:
		gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		break;
		case 2:
		gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		break;
		default:
		g_assert_not_reached();
		}
		} else { /* SUQADD */
		switch (size) {
		case 0:
		gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		break;
		case 1:
		gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		break;
		case 2:
		gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
		break;
		default:
		g_assert_not_reached();
		}
		}

		if (is_scalar) {
		TCGv_i64 tcg_zero = tcg_const_i64(0);
		write_vec_element(s, tcg_zero, rd, 0, MO_64);
		tcg_temp_free_i64(tcg_zero);
		}
		write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
		}

		if (!is_q) {
		clear_vec_high(s, rd);
		}

		tcg_temp_free_i32(tcg_rd);
		tcg_temp_free_i32(tcg_rn);
		}
		}

		/* C3.6.12 AdvSIMD scalar two reg misc
		* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
		* +-----+---+-----------+------+-----------+--------+-----+------+------+
		@@ -7340,6 +7435,9 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
		TCGv_ptr tcg_fpstatus;

		switch (opcode) {
		case 0x3: /* USQADD / SUQADD*/
		handle_2misc_satacc(s, true, u, false, size, rn, rd);
		return;
		case 0x7: /* SQABS / SQNEG */
		break;
		case 0xa: /* CMLT */
		@@ -7427,10 +7525,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
		}
		break;
		default:
		/* Other categories of encoding in this class:
		* + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
		*/
		unsupported_encoding(s, insn);
		unallocated_encoding(s);
		return;
		}

		@@ -9194,6 +9289,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
		}
		break;
		case 0x3: /* SUQADD, USQADD */
		if (size == 3 && !is_q) {
		unallocated_encoding(s);
		return;
		}
		handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
		return;
		case 0x7: /* SQABS, SQNEG */
		if (size == 3 && !is_q) {
		unallocated_encoding(s);