Loading tcg/i386/tcg-target.h +4 −4 Original line number Diff line number Diff line Loading @@ -93,8 +93,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 Loading Loading @@ -127,8 +127,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 Loading tcg/i386/tcg-target.inc.c +116 −9 Original line number Diff line number Diff line Loading @@ -92,6 +92,7 @@ static const int tcg_target_call_oarg_regs[] = { #define TCG_CT_CONST_S32 0x100 #define TCG_CT_CONST_U32 0x200 #define TCG_CT_CONST_I32 0x400 #define TCG_CT_CONST_WSZ 0x800 /* Registers used with L constraint, which are the first argument registers on x86_64, and two random call clobbered registers on Loading Loading @@ -138,6 +139,11 @@ static bool have_bmi2; #else # define have_bmi2 0 #endif #if defined(CONFIG_CPUID_H) && defined(bit_LZCNT) static bool have_lzcnt; #else # define have_lzcnt 0 #endif static tcg_insn_unit *tb_ret_addr; Loading Loading @@ -214,6 +220,10 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, tcg_regset_set32(ct->u.regs, 0, 0xff); } break; case 'W': /* With TZCNT/LZCNT, we can have operand-size as an input. */ ct->ct |= TCG_CT_CONST_WSZ; break; /* qemu_ld/st address constraint */ case 'L': Loading Loading @@ -260,6 +270,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { return 1; } if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; } return 0; } Loading Loading @@ -293,6 +306,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ #define OPC_ANDN (0xf2 | P_EXT38) #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) #define OPC_BSF (0xbc | P_EXT) #define OPC_BSR (0xbd | P_EXT) #define OPC_BSWAP (0xc8 | P_EXT) #define OPC_CALL_Jz (0xe8) #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ Loading @@ -307,6 +322,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_JMP_long (0xe9) #define OPC_JMP_short (0xeb) #define OPC_LEA (0x8d) #define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ Loading @@ -333,6 +349,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) #define OPC_XCHG_ax_r32 (0x90) #define OPC_GRP3_Ev (0xf7) Loading Loading @@ -418,6 +435,11 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) if (opc & P_ADDR32) { tcg_out8(s, 0x67); } if (opc & P_SIMDF3) { tcg_out8(s, 0xf3); } else if (opc & P_SIMDF2) { tcg_out8(s, 0xf2); } rex = 0; rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ Loading Loading @@ -452,6 +474,11 @@ static void tcg_out_opc(TCGContext *s, int opc) if (opc & P_DATA16) { tcg_out8(s, 0x66); } if (opc & P_SIMDF3) { tcg_out8(s, 0xf3); } else if (opc & P_SIMDF2) { tcg_out8(s, 0xf2); } if (opc & (P_EXT | P_EXT38)) { tcg_out8(s, 0x0f); if (opc & P_EXT38) { Loading Loading @@ -1080,13 +1107,11 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, } #endif static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg c1, TCGArg c2, int const_c2, TCGArg v1) static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, TCGReg dest, TCGReg v1) { tcg_out_cmp(s, c1, c2, const_c2, 0); if (have_cmov) { tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1); } else { TCGLabel *over = gen_new_label(); tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); Loading @@ -1095,16 +1120,64 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, } } static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { tcg_out_cmp(s, c1, c2, const_c2, 0); tcg_out_cmov(s, cond, 0, dest, v1); } #if TCG_TARGET_REG_BITS == 64 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg c1, TCGArg c2, int const_c2, TCGArg v1) static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { tcg_out_cmp(s, c1, c2, const_c2, P_REXW); tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); tcg_out_cmov(s, cond, P_REXW, dest, v1); } #endif static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) { if (const_a2) { tcg_debug_assert(have_bmi1); tcg_debug_assert(arg2 == (rexw ? 64 : 32)); tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); } else { tcg_debug_assert(dest != arg2); tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); } } static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) { if (have_lzcnt) { tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); if (const_a2) { tcg_debug_assert(arg2 == (rexw ? 64 : 32)); } else { tcg_debug_assert(dest != arg2); tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); } } else { tcg_debug_assert(!const_a2); tcg_debug_assert(dest != arg1); tcg_debug_assert(dest != arg2); /* Recall that the output of BSR is the index not the count. */ tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); /* Since we have destroyed the flags from BSR, we have to re-test. */ tcg_out_cmp(s, arg1, 0, 1, rexw); tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); } } static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; Loading Loading @@ -1995,6 +2068,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; OP_32_64(ctz): tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); break; OP_32_64(clz): tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_brcond_i32: tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); break; Loading Loading @@ -2359,6 +2439,24 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } }; return &arith2; } case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: { static const TCGTargetOpDef ctz[2] = { { .args_ct_str = { "&r", "r", "r" } }, { .args_ct_str = { "&r", "r", "rW" } }, }; return &ctz[have_bmi1]; } case INDEX_op_clz_i32: case INDEX_op_clz_i64: { static const TCGTargetOpDef clz[2] = { { .args_ct_str = { "&r", "r", "r" } }, { .args_ct_str = { "&r", "r", "rW" } }, }; return &clz[have_lzcnt]; } case INDEX_op_qemu_ld_i32: return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L; Loading Loading @@ -2509,6 +2607,15 @@ static void tcg_target_init(TCGContext *s) } #endif #ifndef have_lzcnt max = __get_cpuid_max(0x8000000, 0); if (max >= 1) { __cpuid(0x80000001, a, b, c, d); /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */ have_lzcnt = (c & bit_LZCNT) != 0; } #endif if (TCG_TARGET_REG_BITS == 64) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); Loading Loading
tcg/i386/tcg-target.h +4 −4 Original line number Diff line number Diff line Loading @@ -93,8 +93,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_eqv_i32 0 #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 Loading Loading @@ -127,8 +127,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_eqv_i64 0 #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 Loading
tcg/i386/tcg-target.inc.c +116 −9 Original line number Diff line number Diff line Loading @@ -92,6 +92,7 @@ static const int tcg_target_call_oarg_regs[] = { #define TCG_CT_CONST_S32 0x100 #define TCG_CT_CONST_U32 0x200 #define TCG_CT_CONST_I32 0x400 #define TCG_CT_CONST_WSZ 0x800 /* Registers used with L constraint, which are the first argument registers on x86_64, and two random call clobbered registers on Loading Loading @@ -138,6 +139,11 @@ static bool have_bmi2; #else # define have_bmi2 0 #endif #if defined(CONFIG_CPUID_H) && defined(bit_LZCNT) static bool have_lzcnt; #else # define have_lzcnt 0 #endif static tcg_insn_unit *tb_ret_addr; Loading Loading @@ -214,6 +220,10 @@ static const char *target_parse_constraint(TCGArgConstraint *ct, tcg_regset_set32(ct->u.regs, 0, 0xff); } break; case 'W': /* With TZCNT/LZCNT, we can have operand-size as an input. */ ct->ct |= TCG_CT_CONST_WSZ; break; /* qemu_ld/st address constraint */ case 'L': Loading Loading @@ -260,6 +270,9 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) { return 1; } if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) { return 1; } return 0; } Loading Loading @@ -293,6 +306,8 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */ #define OPC_ANDN (0xf2 | P_EXT38) #define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3)) #define OPC_BSF (0xbc | P_EXT) #define OPC_BSR (0xbd | P_EXT) #define OPC_BSWAP (0xc8 | P_EXT) #define OPC_CALL_Jz (0xe8) #define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */ Loading @@ -307,6 +322,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_JMP_long (0xe9) #define OPC_JMP_short (0xeb) #define OPC_LEA (0x8d) #define OPC_LZCNT (0xbd | P_EXT | P_SIMDF3) #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ Loading @@ -333,6 +349,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16) #define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2) #define OPC_TESTL (0x85) #define OPC_TZCNT (0xbc | P_EXT | P_SIMDF3) #define OPC_XCHG_ax_r32 (0x90) #define OPC_GRP3_Ev (0xf7) Loading Loading @@ -418,6 +435,11 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) if (opc & P_ADDR32) { tcg_out8(s, 0x67); } if (opc & P_SIMDF3) { tcg_out8(s, 0xf3); } else if (opc & P_SIMDF2) { tcg_out8(s, 0xf2); } rex = 0; rex |= (opc & P_REXW) ? 0x8 : 0x0; /* REX.W */ Loading Loading @@ -452,6 +474,11 @@ static void tcg_out_opc(TCGContext *s, int opc) if (opc & P_DATA16) { tcg_out8(s, 0x66); } if (opc & P_SIMDF3) { tcg_out8(s, 0xf3); } else if (opc & P_SIMDF2) { tcg_out8(s, 0xf2); } if (opc & (P_EXT | P_EXT38)) { tcg_out8(s, 0x0f); if (opc & P_EXT38) { Loading Loading @@ -1080,13 +1107,11 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, } #endif static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg c1, TCGArg c2, int const_c2, TCGArg v1) static void tcg_out_cmov(TCGContext *s, TCGCond cond, int rexw, TCGReg dest, TCGReg v1) { tcg_out_cmp(s, c1, c2, const_c2, 0); if (have_cmov) { tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1); tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | rexw, dest, v1); } else { TCGLabel *over = gen_new_label(); tcg_out_jxx(s, tcg_cond_to_jcc[tcg_invert_cond(cond)], over, 1); Loading @@ -1095,16 +1120,64 @@ static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest, } } static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { tcg_out_cmp(s, c1, c2, const_c2, 0); tcg_out_cmov(s, cond, 0, dest, v1); } #if TCG_TARGET_REG_BITS == 64 static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest, TCGArg c1, TCGArg c2, int const_c2, TCGArg v1) static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGReg dest, TCGReg c1, TCGArg c2, int const_c2, TCGReg v1) { tcg_out_cmp(s, c1, c2, const_c2, P_REXW); tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1); tcg_out_cmov(s, cond, P_REXW, dest, v1); } #endif static void tcg_out_ctz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) { if (const_a2) { tcg_debug_assert(have_bmi1); tcg_debug_assert(arg2 == (rexw ? 64 : 32)); tcg_out_modrm(s, OPC_TZCNT + rexw, dest, arg1); } else { tcg_debug_assert(dest != arg2); tcg_out_modrm(s, OPC_BSF + rexw, dest, arg1); tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); } } static void tcg_out_clz(TCGContext *s, int rexw, TCGReg dest, TCGReg arg1, TCGArg arg2, bool const_a2) { if (have_lzcnt) { tcg_out_modrm(s, OPC_LZCNT + rexw, dest, arg1); if (const_a2) { tcg_debug_assert(arg2 == (rexw ? 64 : 32)); } else { tcg_debug_assert(dest != arg2); tcg_out_cmov(s, TCG_COND_LTU, rexw, dest, arg2); } } else { tcg_debug_assert(!const_a2); tcg_debug_assert(dest != arg1); tcg_debug_assert(dest != arg2); /* Recall that the output of BSR is the index not the count. */ tcg_out_modrm(s, OPC_BSR + rexw, dest, arg1); tgen_arithi(s, ARITH_XOR + rexw, dest, rexw ? 63 : 31, 0); /* Since we have destroyed the flags from BSR, we have to re-test. */ tcg_out_cmp(s, arg1, 0, 1, rexw); tcg_out_cmov(s, TCG_COND_EQ, rexw, dest, arg2); } } static void tcg_out_branch(TCGContext *s, int call, tcg_insn_unit *dest) { intptr_t disp = tcg_pcrel_diff(s, dest) - 5; Loading Loading @@ -1995,6 +2068,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; OP_32_64(ctz): tcg_out_ctz(s, rexw, args[0], args[1], args[2], const_args[2]); break; OP_32_64(clz): tcg_out_clz(s, rexw, args[0], args[1], args[2], const_args[2]); break; case INDEX_op_brcond_i32: tcg_out_brcond32(s, a2, a0, a1, const_args[1], arg_label(args[3]), 0); break; Loading Loading @@ -2359,6 +2439,24 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) = { .args_ct_str = { "r", "r", "0", "1", "re", "re" } }; return &arith2; } case INDEX_op_ctz_i32: case INDEX_op_ctz_i64: { static const TCGTargetOpDef ctz[2] = { { .args_ct_str = { "&r", "r", "r" } }, { .args_ct_str = { "&r", "r", "rW" } }, }; return &ctz[have_bmi1]; } case INDEX_op_clz_i32: case INDEX_op_clz_i64: { static const TCGTargetOpDef clz[2] = { { .args_ct_str = { "&r", "r", "r" } }, { .args_ct_str = { "&r", "r", "rW" } }, }; return &clz[have_lzcnt]; } case INDEX_op_qemu_ld_i32: return TARGET_LONG_BITS <= TCG_TARGET_REG_BITS ? &r_L : &r_L_L; Loading Loading @@ -2509,6 +2607,15 @@ static void tcg_target_init(TCGContext *s) } #endif #ifndef have_lzcnt max = __get_cpuid_max(0x8000000, 0); if (max >= 1) { __cpuid(0x80000001, a, b, c, d); /* LZCNT was introduced with AMD Barcelona and Intel Haswell CPUs. */ have_lzcnt = (c & bit_LZCNT) != 0; } #endif if (TCG_TARGET_REG_BITS == 64) { tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffff); Loading