target/arm: Convert Neon VSHLL, VMOVL to decodetree (968bf842) · Commits · SUMMER2020 / students / proj-2021291

target/arm/neon-dp.decode

+16 −0

Original line number	Diff line number	Diff line
		@@ -243,6 +243,14 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
		&2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0 \
		shift=%neon_rshift_i3

		# Long left shifts: again Q is part of opcode decode
		@2reg_shll_s .... ... . . . 1 shift:5 .... .... 0 . . . .... \
		&2reg_shift vm=%vm_dp vd=%vd_dp size=2 q=0
		@2reg_shll_h .... ... . . . 01 shift:4 .... .... 0 . . . .... \
		&2reg_shift vm=%vm_dp vd=%vd_dp size=1 q=0
		@2reg_shll_b .... ... . . . 001 shift:3 .... .... 0 . . . .... \
		&2reg_shift vm=%vm_dp vd=%vd_dp size=0 q=0

		VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
		VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
		VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
		@@ -348,3 +356,11 @@ VQSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 0 . 1 .... @2reg_shrn_h
		VQRSHRN_U64_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_d
		VQRSHRN_U32_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_s
		VQRSHRN_U16_2sh 1111 001 1 1 . ...... .... 1001 . 1 . 1 .... @2reg_shrn_h

		VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
		VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
		VSHLL_S_2sh 1111 001 0 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b

		VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_s
		VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_h
		VSHLL_U_2sh 1111 001 1 1 . ...... .... 1010 . 0 . 1 .... @2reg_shll_b

target/arm/translate-neon.inc.c

+81 −0

Original line number	Diff line number	Diff line
		@@ -1578,3 +1578,84 @@ DO_2SN_32(VQSHRN_U16, gen_helper_neon_shl_u16, gen_helper_neon_narrow_sat_u8)
		DO_2SN_64(VQRSHRN_U64, gen_helper_neon_rshl_u64, gen_helper_neon_narrow_sat_u32)
		DO_2SN_32(VQRSHRN_U32, gen_helper_neon_rshl_u32, gen_helper_neon_narrow_sat_u16)
		DO_2SN_32(VQRSHRN_U16, gen_helper_neon_rshl_u16, gen_helper_neon_narrow_sat_u8)

		static bool do_vshll_2sh(DisasContext s, arg_2reg_shift a,
		NeonGenWidenFn *widenfn, bool u)
		{
		TCGv_i64 tmp;
		TCGv_i32 rm0, rm1;
		uint64_t widen_mask = 0;

		if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
		return false;
		}

		/* UNDEF accesses to D16-D31 if they don't exist. */
		if (!dc_isar_feature(aa32_simd_r32, s) &&
		((a->vd \| a->vm) & 0x10)) {
		return false;
		}

		if (a->vd & 1) {
		return false;
		}

		if (!vfp_access_check(s)) {
		return true;
		}

		/*
		* This is a widen-and-shift operation. The shift is always less
		* than the width of the source type, so after widening the input
		* vector we can simply shift the whole 64-bit widened register,
		* and then clear the potential overflow bits resulting from left
		* bits of the narrow input appearing as right bits of the left
		* neighbour narrow input. Calculate a mask of bits to clear.
		*/
		if ((a->shift != 0) && (a->size < 2 \|\| u)) {
		int esize = 8 << a->size;
		widen_mask = MAKE_64BIT_MASK(0, esize);
		widen_mask >>= esize - a->shift;
		widen_mask = dup_const(a->size + 1, widen_mask);
		}

		rm0 = neon_load_reg(a->vm, 0);
		rm1 = neon_load_reg(a->vm, 1);
		tmp = tcg_temp_new_i64();

		widenfn(tmp, rm0);
		if (a->shift != 0) {
		tcg_gen_shli_i64(tmp, tmp, a->shift);
		tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
		}
		neon_store_reg64(tmp, a->vd);

		widenfn(tmp, rm1);
		if (a->shift != 0) {
		tcg_gen_shli_i64(tmp, tmp, a->shift);
		tcg_gen_andi_i64(tmp, tmp, ~widen_mask);
		}
		neon_store_reg64(tmp, a->vd + 1);
		tcg_temp_free_i64(tmp);
		return true;
		}

		static bool trans_VSHLL_S_2sh(DisasContext s, arg_2reg_shift a)
		{
		NeonGenWidenFn *widenfn[] = {
		gen_helper_neon_widen_s8,
		gen_helper_neon_widen_s16,
		tcg_gen_ext_i32_i64,
		};
		return do_vshll_2sh(s, a, widenfn[a->size], false);
		}

		static bool trans_VSHLL_U_2sh(DisasContext s, arg_2reg_shift a)
		{
		NeonGenWidenFn *widenfn[] = {
		gen_helper_neon_widen_u8,
		gen_helper_neon_widen_u16,
		tcg_gen_extu_i32_i64,
		};
		return do_vshll_2sh(s, a, widenfn[a->size], true);
		}

target/arm/translate.c

+2 −44

Original line number	Diff line number	Diff line
		@@ -5248,6 +5248,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
		case 7: /* VQSHL */
		case 8: /* VSHRN, VRSHRN, VQSHRUN, VQRSHRUN */
		case 9: /* VQSHRN, VQRSHRN */
		case 10: /* VSHLL, including VMOVL */
		return 1; /* handled by decodetree */
		default:
		break;
		@@ -5265,50 +5266,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
		size--;
		}
		shift = (insn >> 16) & ((1 << (3 + size)) - 1);
		if (op == 10) {
		/* VSHLL, VMOVL */
		if (q \|\| (rd & 1)) {
		return 1;
		}
		tmp = neon_load_reg(rm, 0);
		tmp2 = neon_load_reg(rm, 1);
		for (pass = 0; pass < 2; pass++) {
		if (pass == 1)
		tmp = tmp2;

		gen_neon_widen(cpu_V0, tmp, size, u);

		if (shift != 0) {
		/* The shift is less than the width of the source
		type, so we can just shift the whole register. */
		tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
		/* Widen the result of shift: we need to clear
		* the potential overflow bits resulting from
		* left bits of the narrow input appearing as
		* right bits of left the neighbour narrow
		* input. */
		if (size < 2 \|\| !u) {
		uint64_t imm64;
		if (size == 0) {
		imm = (0xffu >> (8 - shift));
		imm \|= imm << 16;
		} else if (size == 1) {
		imm = 0xffff >> (16 - shift);
		} else {
		/* size == 2 */
		imm = 0xffffffff >> (32 - shift);
		}
		if (size < 2) {
		imm64 = imm \| (((uint64_t)imm) << 32);
		} else {
		imm64 = imm;
		}
		tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
		}
		}
		neon_store_reg64(cpu_V0, rd + pass);
		}
		} else if (op >= 14) {
		if (op >= 14) {
		/* VCVT fixed-point. */
		TCGv_ptr fpst;
		TCGv_i32 shiftv;