Commit 80b5d6bf authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/rth/tags/pull-i386-20160215' into staging



Add XSAVE, MPX, FSGSBASE.

# gpg: Signature made Mon 15 Feb 2016 11:21:50 GMT using RSA key ID 4DD0279B
# gpg: Good signature from "Richard Henderson <rth7680@gmail.com>"
# gpg:                 aka "Richard Henderson <rth@redhat.com>"
# gpg:                 aka "Richard Henderson <rth@twiddle.net>"

* remotes/rth/tags/pull-i386-20160215:
  target-i386: Implement FSGSBASE
  target-i386: Enable CR4/XCR0 features for user-mode
  target-i386: Clear bndregs during legacy near jumps
  target-i386: Implement BNDLDX, BNDSTX
  target-i386: Update BNDSTATUS for exceptions raised by BOUND
  target-i386: Implement BNDCL, BNDCU, BNDCN
  target-i386: Implement BNDMOV
  target-i386: Implement BNDMK
  target-i386: Split up gen_lea_modrm
  target-i386: Perform set/reset_inhibit_irq inline
  target-i386: Enable control registers for MPX
  target-i386: Implement XSAVEOPT
  target-i386: Add XSAVE extension
  target-i386: Rearrange processing of 0F AE
  target-i386: Rearrange processing of 0F 01
  target-i386: Split fxsave/fxrstor implementation

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents a5af1287 07929f2a
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
obj-y += translate.o helper.o cpu.o bpt_helper.o
obj-y += excp_helper.o fpu_helper.o cc_helper.o int_helper.o svm_helper.o
obj-y += smm_helper.o misc_helper.o mem_helper.o seg_helper.o
obj-y += smm_helper.o misc_helper.o mem_helper.o seg_helper.o mpx_helper.o
obj-y += gdbstub.o
obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o
obj-$(CONFIG_KVM) += kvm.o hyperv.o
+0 −10
Original line number Diff line number Diff line
@@ -383,13 +383,3 @@ void helper_sti_vm(CPUX86State *env)
    }
}
#endif

void helper_set_inhibit_irq(CPUX86State *env)
{
    env->hflags |= HF_INHIBIT_IRQ_MASK;
}

void helper_reset_inhibit_irq(CPUX86State *env)
{
    env->hflags &= ~HF_INHIBIT_IRQ_MASK;
}
+59 −32
Original line number Diff line number Diff line
@@ -331,14 +331,14 @@ static const char *cpuid_6_feature_name[] = {
#define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \
          CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \
          CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \
          CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */   \
          CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR)
          /* missing:
          CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX,
          CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA,
          CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA,
          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_XSAVE,
          CPUID_EXT_OSXSAVE, CPUID_EXT_AVX, CPUID_EXT_F16C,
          CPUID_EXT_RDRAND */
          CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX,
          CPUID_EXT_F16C, CPUID_EXT_RDRAND */

#ifdef TARGET_X86_64
#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM)
@@ -358,15 +358,17 @@ static const char *cpuid_6_feature_name[] = {
#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \
          CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \
          CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT |            \
          CPUID_7_0_EBX_CLWB)
          CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_FSGSBASE)
          /* missing:
          CPUID_7_0_EBX_FSGSBASE, CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
          CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2,
          CPUID_7_0_EBX_ERMS, CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
          CPUID_7_0_EBX_RDSEED */
#define TCG_7_0_ECX_FEATURES 0
#define TCG_APM_FEATURES 0
#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT

#define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
          /* missing:
          CPUID_XSAVE_XSAVEC, CPUID_XSAVE_XSAVES */

typedef struct FeatureWordInfo {
    const char **feat_names;
@@ -440,7 +442,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
        .cpuid_eax = 0xd,
        .cpuid_needs_ecx = true, .cpuid_ecx = 1,
        .cpuid_reg = R_EAX,
        .tcg_features = 0,
        .tcg_features = TCG_XSAVE_FEATURES,
    },
    [FEAT_6_EAX] = {
        .feat_names = cpuid_6_feature_name,
@@ -470,12 +472,7 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = {
};
#undef REGISTER

typedef struct ExtSaveArea {
    uint32_t feature, bits;
    uint32_t offset, size;
} ExtSaveArea;

static const ExtSaveArea ext_save_areas[] = {
const ExtSaveArea x86_ext_save_areas[] = {
    [2] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX,
            .offset = 0x240, .size = 0x100 },
    [3] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
@@ -2323,10 +2320,13 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
        *ebx = (cpu->apic_id << 24) |
               8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
        *ecx = env->features[FEAT_1_ECX];
        if ((*ecx & CPUID_EXT_XSAVE) && (env->cr[4] & CR4_OSXSAVE_MASK)) {
            *ecx |= CPUID_EXT_OSXSAVE;
        }
        *edx = env->features[FEAT_1_EDX];
        if (cs->nr_cores * cs->nr_threads > 1) {
            *ebx |= (cs->nr_cores * cs->nr_threads) << 16;
            *edx |= 1 << 28;    /* HTT bit */
            *edx |= CPUID_HT;
        }
        break;
    case 2:
@@ -2450,7 +2450,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
        break;
    case 0xD: {
        KVMState *s = cs->kvm_state;
        uint64_t kvm_mask;
        uint64_t ena_mask;
        int i;

        /* Processor Extended State */
@@ -2458,35 +2458,39 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
        *ebx = 0;
        *ecx = 0;
        *edx = 0;
        if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) || !kvm_enabled()) {
        if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
            break;
        }
        kvm_mask =
            kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX) |
            ((uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32);
        if (kvm_enabled()) {
            ena_mask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX);
            ena_mask <<= 32;
            ena_mask |= kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
        } else {
            ena_mask = -1;
        }

        if (count == 0) {
            *ecx = 0x240;
            for (i = 2; i < ARRAY_SIZE(ext_save_areas); i++) {
                const ExtSaveArea *esa = &ext_save_areas[i];
                if ((env->features[esa->feature] & esa->bits) == esa->bits &&
                    (kvm_mask & (1 << i)) != 0) {
            for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) {
                const ExtSaveArea *esa = &x86_ext_save_areas[i];
                if ((env->features[esa->feature] & esa->bits) == esa->bits
                    && ((ena_mask >> i) & 1) != 0) {
                    if (i < 32) {
                        *eax |= 1 << i;
                        *eax |= 1u << i;
                    } else {
                        *edx |= 1 << (i - 32);
                        *edx |= 1u << (i - 32);
                    }
                    *ecx = MAX(*ecx, esa->offset + esa->size);
                }
            }
            *eax |= kvm_mask & (XSTATE_FP | XSTATE_SSE);
            *eax |= ena_mask & (XSTATE_FP | XSTATE_SSE);
            *ebx = *ecx;
        } else if (count == 1) {
            *eax = env->features[FEAT_XSAVE];
        } else if (count < ARRAY_SIZE(ext_save_areas)) {
            const ExtSaveArea *esa = &ext_save_areas[count];
            if ((env->features[esa->feature] & esa->bits) == esa->bits &&
                (kvm_mask & (1 << count)) != 0) {
        } else if (count < ARRAY_SIZE(x86_ext_save_areas)) {
            const ExtSaveArea *esa = &x86_ext_save_areas[count];
            if ((env->features[esa->feature] & esa->bits) == esa->bits
                && ((ena_mask >> count) & 1) != 0) {
                *eax = esa->size;
                *ebx = esa->offset;
            }
@@ -2639,6 +2643,8 @@ static void x86_cpu_reset(CPUState *s)
    X86CPU *cpu = X86_CPU(s);
    X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu);
    CPUX86State *env = &cpu->env;
    target_ulong cr4;
    uint64_t xcr0;
    int i;

    xcc->parent_reset(s);
@@ -2698,7 +2704,8 @@ static void x86_cpu_reset(CPUState *s)
    cpu_set_fpuc(env, 0x37f);

    env->mxcsr = 0x1f80;
    env->xstate_bv = XSTATE_FP | XSTATE_SSE;
    /* All units are in INIT state.  */
    env->xstate_bv = 0;

    env->pat = 0x0007040600070406ULL;
    env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
@@ -2709,7 +2716,27 @@ static void x86_cpu_reset(CPUState *s)
    cpu_breakpoint_remove_all(s, BP_CPU);
    cpu_watchpoint_remove_all(s, BP_CPU);

    env->xcr0 = 1;
    cr4 = 0;
    xcr0 = XSTATE_FP;

#ifdef CONFIG_USER_ONLY
    /* Enable all the features for user-mode.  */
    if (env->features[FEAT_1_EDX] & CPUID_SSE) {
        xcr0 |= XSTATE_SSE;
    }
    if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_MPX) {
        xcr0 |= XSTATE_BNDREGS | XSTATE_BNDCSR;
    }
    if (env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE) {
        cr4 |= CR4_OSFXSR_MASK | CR4_OSXSAVE_MASK;
    }
    if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_FSGSBASE) {
        cr4 |= CR4_FSGSBASE_MASK;
    }
#endif

    env->xcr0 = xcr0;
    cpu_x86_update_cr4(env, cr4);

    /*
     * SDM 11.11.5 requires:
+20 −1
Original line number Diff line number Diff line
@@ -156,6 +156,8 @@
#define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
#define HF_SMAP_SHIFT       23 /* CR4.SMAP */
#define HF_IOBPT_SHIFT      24 /* an io breakpoint enabled */
#define HF_MPX_EN_SHIFT     25 /* MPX Enabled (CR4+XCR0+BNDCFGx) */
#define HF_MPX_IU_SHIFT     26 /* BND registers in-use */

#define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
#define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
@@ -180,6 +182,8 @@
#define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
#define HF_SMAP_MASK         (1 << HF_SMAP_SHIFT)
#define HF_IOBPT_MASK        (1 << HF_IOBPT_SHIFT)
#define HF_MPX_EN_MASK       (1 << HF_MPX_EN_SHIFT)
#define HF_MPX_IU_MASK       (1 << HF_MPX_IU_SHIFT)

/* hflags2 */

@@ -188,12 +192,14 @@
#define HF2_NMI_SHIFT            2 /* CPU serving NMI */
#define HF2_VINTR_SHIFT          3 /* value of V_INTR_MASKING bit */
#define HF2_SMM_INSIDE_NMI_SHIFT 4 /* CPU serving SMI nested inside NMI */
#define HF2_MPX_PR_SHIFT         5 /* BNDCFGx.BNDPRESERVE */

#define HF2_GIF_MASK            (1 << HF2_GIF_SHIFT)
#define HF2_HIF_MASK            (1 << HF2_HIF_SHIFT)
#define HF2_NMI_MASK            (1 << HF2_NMI_SHIFT)
#define HF2_VINTR_MASK          (1 << HF2_VINTR_SHIFT)
#define HF2_SMM_INSIDE_NMI_MASK (1 << HF2_SMM_INSIDE_NMI_SHIFT)
#define HF2_MPX_PR_MASK         (1 << HF2_MPX_PR_SHIFT)

#define CR0_PE_SHIFT 0
#define CR0_MP_SHIFT 1
@@ -753,6 +759,10 @@ typedef struct BNDCSReg {
    uint64_t sts;
} BNDCSReg;

#define BNDCFG_ENABLE       1ULL
#define BNDCFG_BNDPRESERVE  2ULL
#define BNDCFG_BDIR_MASK    TARGET_PAGE_MASK

#ifdef HOST_WORDS_BIGENDIAN
#define ZMM_B(n) _b_ZMMReg[63 - (n)]
#define ZMM_W(n) _w_ZMMReg[31 - (n)]
@@ -1121,7 +1131,14 @@ void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32);
int cpu_x86_signal_handler(int host_signum, void *pinfo,
                           void *puc);

/* cpuid.c */
/* cpu.c */
typedef struct ExtSaveArea {
    uint32_t feature, bits;
    uint32_t offset, size;
} ExtSaveArea;

extern const ExtSaveArea x86_ext_save_areas[];

void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                   uint32_t *eax, uint32_t *ebx,
                   uint32_t *ecx, uint32_t *edx);
@@ -1342,6 +1359,8 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
 */
void x86_cpu_change_kvm_default(const char *prop, const char *value);

/* mpx_helper.c */
void cpu_sync_bndcs_hflags(CPUX86State *env);

/* Return name of 32-bit register, from a R_* constant */
const char *get_register_name_32(unsigned int reg);
+325 −71
Original line number Diff line number Diff line
@@ -1115,89 +1115,174 @@ void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
}
#endif

static void do_fxsave(CPUX86State *env, target_ulong ptr, int data64,
                      uintptr_t retaddr)
static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
    int fpus, fptag, i, nb_xmm_regs;
    floatx80 tmp;
    int fpus, fptag, i;
    target_ulong addr;

    /* The operand must be 16 byte aligned */
    if (ptr & 0xf) {
        raise_exception_ra(env, EXCP0D_GPF, retaddr);
    }

    fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
    fptag = 0;
    for (i = 0; i < 8; i++) {
        fptag |= (env->fptags[i] << i);
    }
    cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
    cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
    cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, retaddr);
#ifdef TARGET_X86_64
    if (data64) {
        cpu_stq_data_ra(env, ptr + 0x08, 0, retaddr); /* rip */
        cpu_stq_data_ra(env, ptr + 0x10, 0, retaddr); /* rdp */
    } else
#endif
    {
        cpu_stl_data_ra(env, ptr + 0x08, 0, retaddr); /* eip */
        cpu_stl_data_ra(env, ptr + 0x0c, 0, retaddr); /* sel  */
        cpu_stl_data_ra(env, ptr + 0x10, 0, retaddr); /* dp */
        cpu_stl_data_ra(env, ptr + 0x14, 0, retaddr); /* sel  */
    }
    cpu_stw_data_ra(env, ptr, env->fpuc, ra);
    cpu_stw_data_ra(env, ptr + 2, fpus, ra);
    cpu_stw_data_ra(env, ptr + 4, fptag ^ 0xff, ra);

    /* In 32-bit mode this is eip, sel, dp, sel.
       In 64-bit mode this is rip, rdp.
       But in either case we don't write actual data, just zeros.  */
    cpu_stq_data_ra(env, ptr + 0x08, 0, ra); /* eip+sel; rip */
    cpu_stq_data_ra(env, ptr + 0x10, 0, ra); /* edp+sel; rdp */

    addr = ptr + 0x20;
    for (i = 0; i < 8; i++) {
        tmp = ST(i);
        helper_fstt(env, tmp, addr, retaddr);
        floatx80 tmp = ST(i);
        helper_fstt(env, tmp, addr, ra);
        addr += 16;
    }
}

static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
    cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, ra); /* mxcsr */
    cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, ra); /* mxcsr_mask */
}

static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
    int i, nb_xmm_regs;
    target_ulong addr;

    if (env->cr[4] & CR4_OSFXSR_MASK) {
        /* XXX: finish it */
        cpu_stl_data_ra(env, ptr + 0x18, env->mxcsr, retaddr); /* mxcsr */
        cpu_stl_data_ra(env, ptr + 0x1c, 0x0000ffff, retaddr); /* mxcsr_mask */
    if (env->hflags & HF_CS64_MASK) {
        nb_xmm_regs = 16;
    } else {
        nb_xmm_regs = 8;
    }

    addr = ptr + 0xa0;
        /* Fast FXSAVE leaves out the XMM registers */
        if (!(env->efer & MSR_EFER_FFXSR)
            || (env->hflags & HF_CPL_MASK)
            || !(env->hflags & HF_LMA_MASK)) {
    for (i = 0; i < nb_xmm_regs; i++) {
                cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), retaddr);
                cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), retaddr);
        cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
        cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
        addr += 16;
    }
}

static void do_xsave_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
{
    int i;

    for (i = 0; i < 4; i++, addr += 16) {
        cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
        cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
    }
}

void helper_fxsave(CPUX86State *env, target_ulong ptr, int data64)
static void do_xsave_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
{
    do_fxsave(env, ptr, data64, GETPC());
    cpu_stq_data_ra(env, addr, env->bndcs_regs.cfgu, ra);
    cpu_stq_data_ra(env, addr + 8, env->bndcs_regs.sts, ra);
}

static void do_fxrstor(CPUX86State *env, target_ulong ptr, int data64,
                       uintptr_t retaddr)
void helper_fxsave(CPUX86State *env, target_ulong ptr)
{
    int i, fpus, fptag, nb_xmm_regs;
    floatx80 tmp;
    target_ulong addr;
    uintptr_t ra = GETPC();

    /* The operand must be 16 byte aligned */
    if (ptr & 0xf) {
        raise_exception_ra(env, EXCP0D_GPF, retaddr);
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
    fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
    fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
    do_xsave_fpu(env, ptr, ra);

    if (env->cr[4] & CR4_OSFXSR_MASK) {
        do_xsave_mxcsr(env, ptr, ra);
        /* Fast FXSAVE leaves out the XMM registers */
        if (!(env->efer & MSR_EFER_FFXSR)
            || (env->hflags & HF_CPL_MASK)
            || !(env->hflags & HF_LMA_MASK)) {
            do_xsave_sse(env, ptr, ra);
        }
    }
}

static uint64_t get_xinuse(CPUX86State *env)
{
    uint64_t inuse = -1;

    /* For the most part, we don't track XINUSE.  We could calculate it
       here for all components, but it's probably less work to simply
       indicate in use.  That said, the state of BNDREGS is important
       enough to track in HFLAGS, so we might as well use that here.  */
    if ((env->hflags & HF_MPX_IU_MASK) == 0) {
       inuse &= ~XSTATE_BNDREGS;
    }
    return inuse;
}

static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
                     uint64_t inuse, uint64_t opt, uintptr_t ra)
{
    uint64_t old_bv, new_bv;

    /* The OS must have enabled XSAVE.  */
    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
        raise_exception_ra(env, EXCP06_ILLOP, ra);
    }

    /* The operand must be 64 byte aligned.  */
    if (ptr & 63) {
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    /* Never save anything not enabled by XCR0.  */
    rfbm &= env->xcr0;
    opt &= rfbm;

    if (opt & XSTATE_FP) {
        do_xsave_fpu(env, ptr, ra);
    }
    if (rfbm & XSTATE_SSE) {
        /* Note that saving MXCSR is not suppressed by XSAVEOPT.  */
        do_xsave_mxcsr(env, ptr, ra);
    }
    if (opt & XSTATE_SSE) {
        do_xsave_sse(env, ptr, ra);
    }
    if (opt & XSTATE_BNDREGS) {
        target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS].offset;
        do_xsave_bndregs(env, ptr + off, ra);
    }
    if (opt & XSTATE_BNDCSR) {
        target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR].offset;
        do_xsave_bndcsr(env, ptr + off, ra);
    }

    /* Update the XSTATE_BV field.  */
    old_bv = cpu_ldq_data_ra(env, ptr + 512, ra);
    new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
    cpu_stq_data_ra(env, ptr + 512, new_bv, ra);
}

void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
    do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
}

void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
    uint64_t inuse = get_xinuse(env);
    do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
}

static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
    int i, fpus, fptag;
    target_ulong addr;

    cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, ra));
    fpus = cpu_lduw_data_ra(env, ptr + 2, ra);
    fptag = cpu_lduw_data_ra(env, ptr + 4, ra);
    env->fpstt = (fpus >> 11) & 7;
    env->fpus = fpus & ~0x3800;
    fptag ^= 0xff;
@@ -1207,37 +1292,206 @@ static void do_fxrstor(CPUX86State *env, target_ulong ptr, int data64,

    addr = ptr + 0x20;
    for (i = 0; i < 8; i++) {
        tmp = helper_fldt(env, addr, retaddr);
        floatx80 tmp = helper_fldt(env, addr, ra);
        ST(i) = tmp;
        addr += 16;
    }
}

static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
    cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, ra));
}

static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
{
    int i, nb_xmm_regs;
    target_ulong addr;

    if (env->cr[4] & CR4_OSFXSR_MASK) {
        /* XXX: finish it */
        cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + 0x18, retaddr));
        /* cpu_ldl_data_ra(env, ptr + 0x1c, retaddr); */
    if (env->hflags & HF_CS64_MASK) {
        nb_xmm_regs = 16;
    } else {
        nb_xmm_regs = 8;
    }

    addr = ptr + 0xa0;
        /* Fast FXRESTORE leaves out the XMM registers */
    for (i = 0; i < nb_xmm_regs; i++) {
        env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
        env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
        addr += 16;
    }
}

static void do_xrstor_bndregs(CPUX86State *env, target_ulong addr, uintptr_t ra)
{
    int i;

    for (i = 0; i < 4; i++, addr += 16) {
        env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
        env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
    }
}

static void do_xrstor_bndcsr(CPUX86State *env, target_ulong addr, uintptr_t ra)
{
    /* FIXME: Extend highest implemented bit of linear address.  */
    env->bndcs_regs.cfgu = cpu_ldq_data_ra(env, addr, ra);
    env->bndcs_regs.sts = cpu_ldq_data_ra(env, addr + 8, ra);
}

void helper_fxrstor(CPUX86State *env, target_ulong ptr)
{
    uintptr_t ra = GETPC();

    /* The operand must be 16 byte aligned */
    if (ptr & 0xf) {
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    do_xrstor_fpu(env, ptr, ra);

    if (env->cr[4] & CR4_OSFXSR_MASK) {
        do_xrstor_mxcsr(env, ptr, ra);
        /* Fast FXRSTOR leaves out the XMM registers */
        if (!(env->efer & MSR_EFER_FFXSR)
            || (env->hflags & HF_CPL_MASK)
            || !(env->hflags & HF_LMA_MASK)) {
            for (i = 0; i < nb_xmm_regs; i++) {
                env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, retaddr);
                env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, retaddr);
                addr += 16;
            do_xrstor_sse(env, ptr, ra);
        }
    }
}

void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
{
    uintptr_t ra = GETPC();
    uint64_t xstate_bv, xcomp_bv0, xcomp_bv1;

    rfbm &= env->xcr0;

    /* The OS must have enabled XSAVE.  */
    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
        raise_exception_ra(env, EXCP06_ILLOP, ra);
    }

    /* The operand must be 64 byte aligned.  */
    if (ptr & 63) {
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    xstate_bv = cpu_ldq_data_ra(env, ptr + 512, ra);

    if ((int64_t)xstate_bv < 0) {
        /* FIXME: Compact form.  */
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    /* Standard form.  */

    /* The XSTATE field must not set bits not present in XCR0.  */
    if (xstate_bv & ~env->xcr0) {
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    /* The XCOMP field must be zero.  */
    xcomp_bv0 = cpu_ldq_data_ra(env, ptr + 520, ra);
    xcomp_bv1 = cpu_ldq_data_ra(env, ptr + 528, ra);
    if (xcomp_bv0 || xcomp_bv1) {
        raise_exception_ra(env, EXCP0D_GPF, ra);
    }

    if (rfbm & XSTATE_FP) {
        if (xstate_bv & XSTATE_FP) {
            do_xrstor_fpu(env, ptr, ra);
        } else {
            helper_fninit(env);
            memset(env->fpregs, 0, sizeof(env->fpregs));
        }
    }
    if (rfbm & XSTATE_SSE) {
        /* Note that the standard form of XRSTOR loads MXCSR from memory
           whether or not the XSTATE_BV bit is set.  */
        do_xrstor_mxcsr(env, ptr, ra);
        if (xstate_bv & XSTATE_SSE) {
            do_xrstor_sse(env, ptr, ra);
        } else {
            /* ??? When AVX is implemented, we may have to be more
               selective in the clearing.  */
            memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
        }
    }
    if (rfbm & XSTATE_BNDREGS) {
        if (xstate_bv & XSTATE_BNDREGS) {
            target_ulong off = x86_ext_save_areas[XSTATE_BNDREGS].offset;
            do_xrstor_bndregs(env, ptr + off, ra);
            env->hflags |= HF_MPX_IU_MASK;
        } else {
            memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
            env->hflags &= ~HF_MPX_IU_MASK;
        }
    }
    if (rfbm & XSTATE_BNDCSR) {
        if (xstate_bv & XSTATE_BNDCSR) {
            target_ulong off = x86_ext_save_areas[XSTATE_BNDCSR].offset;
            do_xrstor_bndcsr(env, ptr + off, ra);
        } else {
            memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
        }
        cpu_sync_bndcs_hflags(env);
    }
}

void helper_fxrstor(CPUX86State *env, target_ulong ptr, int data64)
uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
{
    do_fxrstor(env, ptr, data64, GETPC());
    /* The OS must have enabled XSAVE.  */
    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
    }

    switch (ecx) {
    case 0:
        return env->xcr0;
    case 1:
        if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
            return env->xcr0 & get_xinuse(env);
        }
        break;
    }
    raise_exception_ra(env, EXCP0D_GPF, GETPC());
}

void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
{
    uint32_t dummy, ena_lo, ena_hi;
    uint64_t ena;

    /* The OS must have enabled XSAVE.  */
    if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
        raise_exception_ra(env, EXCP06_ILLOP, GETPC());
    }

    /* Only XCR0 is defined at present; the FPU may not be disabled.  */
    if (ecx != 0 || (mask & XSTATE_FP) == 0) {
        goto do_gpf;
    }

    /* Disallow enabling unimplemented features.  */
    cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
    ena = ((uint64_t)ena_hi << 32) | ena_lo;
    if (mask & ~ena) {
        goto do_gpf;
    }

    /* Disallow enabling only half of MPX.  */
    if ((mask ^ (mask * (XSTATE_BNDCSR / XSTATE_BNDREGS))) & XSTATE_BNDCSR) {
        goto do_gpf;
    }

    env->xcr0 = mask;
    cpu_sync_bndcs_hflags(env);
    return;

 do_gpf:
    raise_exception_ra(env, EXCP0D_GPF, GETPC());
}

void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f)
Loading