tcg: Manually expand INDEX_op_dup_vec (bab1671f) · Commits · SUMMER2020 / students / proj-2021291

tcg/aarch64/tcg-target.inc.c

+4 −5

Original line number	Diff line number	Diff line
		@@ -2108,10 +2108,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,

		case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
		case INDEX_op_mov_i64:
		case INDEX_op_mov_vec:
		case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
		case INDEX_op_movi_i64:
		case INDEX_op_dupi_vec:
		case INDEX_op_call: /* Always emitted via tcg_out_call. */
		default:
		g_assert_not_reached();
		@@ -2208,9 +2206,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
		case INDEX_op_not_vec:
		tcg_out_insn(s, 3617, NOT, is_q, 0, a0, a1);
		break;
		case INDEX_op_dup_vec:
		tcg_out_dup_vec(s, type, vece, a0, a1);
		break;
		case INDEX_op_shli_vec:
		tcg_out_insn(s, 3614, SHL, is_q, a0, a1, a2 + (8 << vece));
		break;
		@@ -2254,6 +2249,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
		}
		}
		break;

		case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
		case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
		case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
		default:
		g_assert_not_reached();
		}

tcg/i386/tcg-target.inc.c

+3 −5

Original line number	Diff line number	Diff line
		@@ -2603,10 +2603,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
		break;
		case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
		case INDEX_op_mov_i64:
		case INDEX_op_mov_vec:
		case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
		case INDEX_op_movi_i64:
		case INDEX_op_dupi_vec:
		case INDEX_op_call: /* Always emitted via tcg_out_call. */
		default:
		tcg_abort();
		@@ -2795,9 +2793,6 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
		case INDEX_op_st_vec:
		tcg_out_st(s, type, a0, a1, a2);
		break;
		case INDEX_op_dup_vec:
		tcg_out_dup_vec(s, type, vece, a0, a1);
		break;

		case INDEX_op_x86_shufps_vec:
		insn = OPC_SHUFPS;
		@@ -2839,6 +2834,9 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
		tcg_out8(s, a2);
		break;

		case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
		case INDEX_op_dupi_vec: /* Always emitted via tcg_out_movi. */
		case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
		default:
		g_assert_not_reached();
		}

tcg/tcg.c

+111 −0

Original line number	Diff line number	Diff line
		@@ -3284,6 +3284,9 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
		save_globals(s, allocated_regs);
		}

		/*
		* Specialized code generation for INDEX_op_movi_*.
		*/
		static void tcg_reg_alloc_do_movi(TCGContext s, TCGTemp ots,
		tcg_target_ulong val, TCGLifeData arg_life,
		TCGRegSet preferred_regs)
		@@ -3313,6 +3316,9 @@ static void tcg_reg_alloc_movi(TCGContext s, const TCGOp op)
		tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
		}

		/*
		* Specialized code generation for INDEX_op_mov_*.
		*/
		static void tcg_reg_alloc_mov(TCGContext s, const TCGOp op)
		{
		const TCGLifeData arg_life = op->life;
		@@ -3407,6 +3413,108 @@ static void tcg_reg_alloc_mov(TCGContext s, const TCGOp op)
		}
		}

		/*
		* Specialized code generation for INDEX_op_dup_vec.
		*/
		static void tcg_reg_alloc_dup(TCGContext s, const TCGOp op)
		{
		const TCGLifeData arg_life = op->life;
		TCGRegSet dup_out_regs, dup_in_regs;
		TCGTemp its, ots;
		TCGType itype, vtype;
		unsigned vece;
		bool ok;

		ots = arg_temp(op->args[0]);
		its = arg_temp(op->args[1]);

		/* ENV should not be modified. */
		tcg_debug_assert(!ots->fixed_reg);

		itype = its->type;
		vece = TCGOP_VECE(op);
		vtype = TCGOP_VECL(op) + TCG_TYPE_V64;

		if (its->val_type == TEMP_VAL_CONST) {
		/* Propagate constant via movi -> dupi. */
		tcg_target_ulong val = its->val;
		if (IS_DEAD_ARG(1)) {
		temp_dead(s, its);
		}
		tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
		return;
		}

		dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
		dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;

		/* Allocate the output register now. */
		if (ots->val_type != TEMP_VAL_REG) {
		TCGRegSet allocated_regs = s->reserved_regs;

		if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
		/* Make sure to not spill the input register. */
		tcg_regset_set_reg(allocated_regs, its->reg);
		}
		ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
		op->output_pref[0], ots->indirect_base);
		ots->val_type = TEMP_VAL_REG;
		ots->mem_coherent = 0;
		s->reg_to_temp[ots->reg] = ots;
		}

		switch (its->val_type) {
		case TEMP_VAL_REG:
		/*
		* The dup constriaints must be broad, covering all possible VECE.
		* However, tcg_op_dup_vec() gets to see the VECE and we allow it
		* to fail, indicating that extra moves are required for that case.
		*/
		if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
		if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
		goto done;
		}
		/* Try again from memory or a vector input register. */
		}
		if (!its->mem_coherent) {
		/*
		* The input register is not synced, and so an extra store
		* would be required to use memory. Attempt an integer-vector
		* register move first. We do not have a TCGRegSet for this.
		*/
		if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
		break;
		}
		/* Sync the temp back to its slot and load from there. */
		temp_sync(s, its, s->reserved_regs, 0, 0);
		}
		/* fall through */

		case TEMP_VAL_MEM:
		/* TODO: dup from memory */
		tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
		break;

		default:
		g_assert_not_reached();
		}

		/* We now have a vector input register, so dup must succeed. */
		ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
		tcg_debug_assert(ok);

		done:
		if (IS_DEAD_ARG(1)) {
		temp_dead(s, its);
		}
		if (NEED_SYNC_ARG(0)) {
		temp_sync(s, ots, s->reserved_regs, 0, 0);
		}
		if (IS_DEAD_ARG(0)) {
		temp_dead(s, ots);
		}
		}

		static void tcg_reg_alloc_op(TCGContext s, const TCGOp op)
		{
		const TCGLifeData arg_life = op->life;
		@@ -3981,6 +4089,9 @@ int tcg_gen_code(TCGContext s, TranslationBlock tb)
		case INDEX_op_dupi_vec:
		tcg_reg_alloc_movi(s, op);
		break;
		case INDEX_op_dup_vec:
		tcg_reg_alloc_dup(s, op);
		break;
		case INDEX_op_insn_start:
		if (num_insns >= 0) {
		size_t off = tcg_current_code_size(s);