Loading target-i386/ops_sse.h +146 −1 Original line number Diff line number Diff line /* * MMX/3DNow!/SSE/SSE2/SSE3/PNI support * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support * * Copyright (c) 2005 Fabrice Bellard * Loading Loading @@ -1275,6 +1275,151 @@ void helper_pswapd(MMXReg *d, MMXReg *s) } #endif /* SSSE3 op helpers */ void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s) { int i; Reg r; for (i = 0; i < (8 << SHIFT); i++) r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); *d = r; } void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); } void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s) { d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); } void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); } void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) + (int8_t)s->B( 1) * (uint8_t)d->B( 1)); d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) + (int8_t)s->B( 3) * (uint8_t)d->B( 3)); d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) + (int8_t)s->B( 5) * (uint8_t)d->B( 5)); d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) + (int8_t)s->B( 7) * (uint8_t)d->B( 7)); #if SHIFT == 1 d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) + (int8_t)s->B( 9) * (uint8_t)d->B( 9)); d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + (int8_t)s->B(11) * (uint8_t)d->B(11)); d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + (int8_t)s->B(13) * (uint8_t)d->B(13)); d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + (int8_t)s->B(15) * (uint8_t)d->B(15)); #endif } void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); } void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s) { d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); } void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); } #define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x #define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x #define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x SSE_HELPER_B(helper_pabsb, FABSB) SSE_HELPER_W(helper_pabsw, FABSW) SSE_HELPER_L(helper_pabsd, FABSL) #define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15 SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) #define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d #define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d #define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d SSE_HELPER_B(helper_psignb, FSIGNB) SSE_HELPER_W(helper_psignw, FSIGNW) SSE_HELPER_L(helper_psignd, FSIGNL) void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift) { Reg r; /* XXX could be checked during translation */ if (shift >= (16 << SHIFT)) { r.Q(0) = 0; XMM_ONLY(r.Q(1) = 0); } else { shift <<= 3; #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) #if SHIFT == 0 r.Q(0) = SHR(s->Q(0), shift - 0) | SHR(d->Q(0), shift - 64); #else r.Q(0) = SHR(s->Q(0), shift - 0) | SHR(s->Q(1), shift - 64) | SHR(d->Q(0), shift - 128) | SHR(d->Q(1), shift - 192); r.Q(1) = SHR(s->Q(0), shift + 64) | SHR(s->Q(1), shift - 0) | SHR(d->Q(0), shift - 64) | SHR(d->Q(1), shift - 128); #endif #undef SHR } *d = r; } #undef SHIFT #undef XMM_ONLY #undef Reg Loading target-i386/ops_sse_header.h +19 −1 Original line number Diff line number Diff line /* * MMX/3DNow!/SSE/SSE2/SSE3/PNI support * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support * * Copyright (c) 2005 Fabrice Bellard * Loading Loading @@ -251,6 +251,24 @@ DEF_HELPER(void, helper_pfsubr, (MMXReg *d, MMXReg *s)) DEF_HELPER(void, helper_pswapd, (MMXReg *d, MMXReg *s)) #endif /* SSSE3 op helpers */ DEF_HELPER(void, glue(helper_phaddw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phaddd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phaddsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phsubw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phsubd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phsubsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pabsb, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pabsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pabsd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pmaddubsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pmulhrsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pshufb, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_psignb, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_psignw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_psignd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_palignr, SUFFIX), (Reg *d, Reg *s, int32_t shift)) #undef SHIFT #undef Reg #undef SUFFIX Loading target-i386/translate.c +107 −2 Original line number Diff line number Diff line Loading @@ -2770,6 +2770,9 @@ static void *sse_op_table1[256][4] = { [0xc2] = SSE_FOP(cmpeq), [0xc6] = { helper_shufps, helper_shufpd }, [0x38] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */ [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */ /* MMX ops and their SSE extensions */ [0x60] = MMX_OP2(punpcklbw), [0x61] = MMX_OP2(punpcklwd), Loading Loading @@ -2921,6 +2924,28 @@ static void *sse_op_table5[256] = { [0xbf] = helper_pavgb_mmx /* pavgusb */ }; static void *sse_op_table6[256][2] = { [0x00] = MMX_OP2(pshufb), [0x01] = MMX_OP2(phaddw), [0x02] = MMX_OP2(phaddd), [0x03] = MMX_OP2(phaddsw), [0x04] = MMX_OP2(pmaddubsw), [0x05] = MMX_OP2(phsubw), [0x06] = MMX_OP2(phsubd), [0x07] = MMX_OP2(phsubsw), [0x08] = MMX_OP2(psignb), [0x09] = MMX_OP2(psignw), [0x0a] = MMX_OP2(psignd), [0x0b] = MMX_OP2(pmulhrsw), [0x1c] = MMX_OP2(pabsb), [0x1d] = MMX_OP2(pabsw), [0x1e] = MMX_OP2(pabsd), }; static void *sse_op_table7[256][2] = { [0x0f] = MMX_OP2(palignr), }; static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { int b1, op1_offset, op2_offset, is_xmm, val, ot; Loading Loading @@ -2960,6 +2985,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) return; } if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) goto illegal_op; if (b == 0x0e) { if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) Loading Loading @@ -3482,6 +3508,84 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) reg = ((modrm >> 3) & 7) | rex_r; gen_op_mov_reg_T0(OT_LONG, reg); break; case 0x038: case 0x138: if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3)) goto illegal_op; b = modrm; modrm = ldub_code(s->pc++); rm = modrm & 7; reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; sse_op2 = sse_op_table6[b][b1]; if (!sse_op2) goto illegal_op; if (b1) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod == 3) { op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); } else { op2_offset = offsetof(CPUX86State,xmm_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldo_env_A0(s->mem_index, op2_offset); } } else { op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod == 3) { op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } else { op2_offset = offsetof(CPUX86State,mmx_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldq_env_A0(s->mem_index, op2_offset); } } tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1); break; case 0x03a: case 0x13a: if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3)) goto illegal_op; b = modrm; modrm = ldub_code(s->pc++); rm = modrm & 7; reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; sse_op2 = sse_op_table7[b][b1]; if (!sse_op2) goto illegal_op; if (b1) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod == 3) { op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); } else { op2_offset = offsetof(CPUX86State,xmm_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldo_env_A0(s->mem_index, op2_offset); } } else { op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod == 3) { op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } else { op2_offset = offsetof(CPUX86State,mmx_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldq_env_A0(s->mem_index, op2_offset); } } val = ldub_code(s->pc++); tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); break; default: goto illegal_op; } Loading Loading @@ -6987,7 +7091,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_eob(s); } break; /* MMX/3DNow!/SSE/SSE2/SSE3 support */ /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3 support */ case 0x1c3: /* MOVNTI reg, mem */ if (!(s->cpuid_features & CPUID_SSE2)) goto illegal_op; Loading Loading @@ -7100,6 +7204,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); case 0x110 ... 0x117: case 0x128 ... 0x12f: case 0x138 ... 0x13a: case 0x150 ... 0x177: case 0x17c ... 0x17f: case 0x1c2: Loading Loading
target-i386/ops_sse.h +146 −1 Original line number Diff line number Diff line /* * MMX/3DNow!/SSE/SSE2/SSE3/PNI support * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support * * Copyright (c) 2005 Fabrice Bellard * Loading Loading @@ -1275,6 +1275,151 @@ void helper_pswapd(MMXReg *d, MMXReg *s) } #endif /* SSSE3 op helpers */ void glue(helper_pshufb, SUFFIX) (Reg *d, Reg *s) { int i; Reg r; for (i = 0; i < (8 << SHIFT); i++) r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); *d = r; } void glue(helper_phaddw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); } void glue(helper_phaddd, SUFFIX) (Reg *d, Reg *s) { d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); } void glue(helper_phaddsw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); } void glue(helper_pmaddubsw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = satsw((int8_t)s->B( 0) * (uint8_t)d->B( 0) + (int8_t)s->B( 1) * (uint8_t)d->B( 1)); d->W(1) = satsw((int8_t)s->B( 2) * (uint8_t)d->B( 2) + (int8_t)s->B( 3) * (uint8_t)d->B( 3)); d->W(2) = satsw((int8_t)s->B( 4) * (uint8_t)d->B( 4) + (int8_t)s->B( 5) * (uint8_t)d->B( 5)); d->W(3) = satsw((int8_t)s->B( 6) * (uint8_t)d->B( 6) + (int8_t)s->B( 7) * (uint8_t)d->B( 7)); #if SHIFT == 1 d->W(4) = satsw((int8_t)s->B( 8) * (uint8_t)d->B( 8) + (int8_t)s->B( 9) * (uint8_t)d->B( 9)); d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + (int8_t)s->B(11) * (uint8_t)d->B(11)); d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + (int8_t)s->B(13) * (uint8_t)d->B(13)); d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + (int8_t)s->B(15) * (uint8_t)d->B(15)); #endif } void glue(helper_phsubw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); } void glue(helper_phsubd, SUFFIX) (Reg *d, Reg *s) { d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); } void glue(helper_phsubsw, SUFFIX) (Reg *d, Reg *s) { d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); } #define FABSB(_, x) x > INT8_MAX ? -(int8_t ) x : x #define FABSW(_, x) x > INT16_MAX ? -(int16_t) x : x #define FABSL(_, x) x > INT32_MAX ? -(int32_t) x : x SSE_HELPER_B(helper_pabsb, FABSB) SSE_HELPER_W(helper_pabsw, FABSW) SSE_HELPER_L(helper_pabsd, FABSL) #define FMULHRSW(d, s) ((int16_t) d * (int16_t) s + 0x4000) >> 15 SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) #define FSIGNB(d, s) s <= INT8_MAX ? s ? d : 0 : -(int8_t ) d #define FSIGNW(d, s) s <= INT16_MAX ? s ? d : 0 : -(int16_t) d #define FSIGNL(d, s) s <= INT32_MAX ? s ? d : 0 : -(int32_t) d SSE_HELPER_B(helper_psignb, FSIGNB) SSE_HELPER_W(helper_psignw, FSIGNW) SSE_HELPER_L(helper_psignd, FSIGNL) void glue(helper_palignr, SUFFIX) (Reg *d, Reg *s, int32_t shift) { Reg r; /* XXX could be checked during translation */ if (shift >= (16 << SHIFT)) { r.Q(0) = 0; XMM_ONLY(r.Q(1) = 0); } else { shift <<= 3; #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) #if SHIFT == 0 r.Q(0) = SHR(s->Q(0), shift - 0) | SHR(d->Q(0), shift - 64); #else r.Q(0) = SHR(s->Q(0), shift - 0) | SHR(s->Q(1), shift - 64) | SHR(d->Q(0), shift - 128) | SHR(d->Q(1), shift - 192); r.Q(1) = SHR(s->Q(0), shift + 64) | SHR(s->Q(1), shift - 0) | SHR(d->Q(0), shift - 64) | SHR(d->Q(1), shift - 128); #endif #undef SHR } *d = r; } #undef SHIFT #undef XMM_ONLY #undef Reg Loading
target-i386/ops_sse_header.h +19 −1 Original line number Diff line number Diff line /* * MMX/3DNow!/SSE/SSE2/SSE3/PNI support * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/PNI support * * Copyright (c) 2005 Fabrice Bellard * Loading Loading @@ -251,6 +251,24 @@ DEF_HELPER(void, helper_pfsubr, (MMXReg *d, MMXReg *s)) DEF_HELPER(void, helper_pswapd, (MMXReg *d, MMXReg *s)) #endif /* SSSE3 op helpers */ DEF_HELPER(void, glue(helper_phaddw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phaddd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phaddsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phsubw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phsubd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_phsubsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pabsb, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pabsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pabsd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pmaddubsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pmulhrsw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_pshufb, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_psignb, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_psignw, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_psignd, SUFFIX), (Reg *d, Reg *s)) DEF_HELPER(void, glue(helper_palignr, SUFFIX), (Reg *d, Reg *s, int32_t shift)) #undef SHIFT #undef Reg #undef SUFFIX Loading
target-i386/translate.c +107 −2 Original line number Diff line number Diff line Loading @@ -2770,6 +2770,9 @@ static void *sse_op_table1[256][4] = { [0xc2] = SSE_FOP(cmpeq), [0xc6] = { helper_shufps, helper_shufpd }, [0x38] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */ [0x3a] = { SSE_SPECIAL, SSE_SPECIAL }, /* SSSE3 */ /* MMX ops and their SSE extensions */ [0x60] = MMX_OP2(punpcklbw), [0x61] = MMX_OP2(punpcklwd), Loading Loading @@ -2921,6 +2924,28 @@ static void *sse_op_table5[256] = { [0xbf] = helper_pavgb_mmx /* pavgusb */ }; static void *sse_op_table6[256][2] = { [0x00] = MMX_OP2(pshufb), [0x01] = MMX_OP2(phaddw), [0x02] = MMX_OP2(phaddd), [0x03] = MMX_OP2(phaddsw), [0x04] = MMX_OP2(pmaddubsw), [0x05] = MMX_OP2(phsubw), [0x06] = MMX_OP2(phsubd), [0x07] = MMX_OP2(phsubsw), [0x08] = MMX_OP2(psignb), [0x09] = MMX_OP2(psignw), [0x0a] = MMX_OP2(psignd), [0x0b] = MMX_OP2(pmulhrsw), [0x1c] = MMX_OP2(pabsb), [0x1d] = MMX_OP2(pabsw), [0x1e] = MMX_OP2(pabsd), }; static void *sse_op_table7[256][2] = { [0x0f] = MMX_OP2(palignr), }; static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) { int b1, op1_offset, op2_offset, is_xmm, val, ot; Loading Loading @@ -2960,6 +2985,7 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) return; } if (is_xmm && !(s->flags & HF_OSFXSR_MASK)) if ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA)) goto illegal_op; if (b == 0x0e) { if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) Loading Loading @@ -3482,6 +3508,84 @@ static void gen_sse(DisasContext *s, int b, target_ulong pc_start, int rex_r) reg = ((modrm >> 3) & 7) | rex_r; gen_op_mov_reg_T0(OT_LONG, reg); break; case 0x038: case 0x138: if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3)) goto illegal_op; b = modrm; modrm = ldub_code(s->pc++); rm = modrm & 7; reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; sse_op2 = sse_op_table6[b][b1]; if (!sse_op2) goto illegal_op; if (b1) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod == 3) { op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); } else { op2_offset = offsetof(CPUX86State,xmm_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldo_env_A0(s->mem_index, op2_offset); } } else { op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod == 3) { op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } else { op2_offset = offsetof(CPUX86State,mmx_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldq_env_A0(s->mem_index, op2_offset); } } tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); tcg_gen_helper_0_2(sse_op2, cpu_ptr0, cpu_ptr1); break; case 0x03a: case 0x13a: if (!(s->cpuid_ext_features & CPUID_EXT_SSSE3)) goto illegal_op; b = modrm; modrm = ldub_code(s->pc++); rm = modrm & 7; reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; sse_op2 = sse_op_table7[b][b1]; if (!sse_op2) goto illegal_op; if (b1) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod == 3) { op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); } else { op2_offset = offsetof(CPUX86State,xmm_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldo_env_A0(s->mem_index, op2_offset); } } else { op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); if (mod == 3) { op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); } else { op2_offset = offsetof(CPUX86State,mmx_t0); gen_lea_modrm(s, modrm, ®_addr, &offset_addr); gen_ldq_env_A0(s->mem_index, op2_offset); } } val = ldub_code(s->pc++); tcg_gen_addi_ptr(cpu_ptr0, cpu_env, op1_offset); tcg_gen_addi_ptr(cpu_ptr1, cpu_env, op2_offset); tcg_gen_helper_0_3(sse_op2, cpu_ptr0, cpu_ptr1, tcg_const_i32(val)); break; default: goto illegal_op; } Loading Loading @@ -6987,7 +7091,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) gen_eob(s); } break; /* MMX/3DNow!/SSE/SSE2/SSE3 support */ /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3 support */ case 0x1c3: /* MOVNTI reg, mem */ if (!(s->cpuid_features & CPUID_SSE2)) goto illegal_op; Loading Loading @@ -7100,6 +7204,7 @@ static target_ulong disas_insn(DisasContext *s, target_ulong pc_start) s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); case 0x110 ... 0x117: case 0x128 ... 0x12f: case 0x138 ... 0x13a: case 0x150 ... 0x177: case 0x17c ... 0x17f: case 0x1c2: Loading