Commit dbb14135 authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote branch 'qemu-kvm/uq/master' into staging

parents d03703c8 25d2e361
Loading
Loading
Loading
Loading
+99 −33
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/utsname.h>

#include <linux/kvm.h>

@@ -53,6 +54,8 @@
#define BUS_MCEERR_AO 5
#endif

static int lm_capable_kernel;

#ifdef KVM_CAP_EXT_CPUID

static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
@@ -239,12 +242,16 @@ static void kvm_do_inject_x86_mce(void *_data)
    struct kvm_x86_mce_data *data = _data;
    int r;

    /* If there is an MCE excpetion being processed, ignore this SRAO MCE */
    /* If there is an MCE exception being processed, ignore this SRAO MCE */
    if ((data->env->mcg_cap & MCG_SER_P) &&
        !(data->mce->status & MCI_STATUS_AR)) {
        r = kvm_mce_in_exception(data->env);
    if (r == -1)
        if (r == -1) {
            fprintf(stderr, "Failed to get MCE status\n");
    else if (r && !(data->mce->status & MCI_STATUS_AR))
        } else if (r) {
            return;
        }
    }

    r = kvm_set_mce(data->env, data->mce);
    if (r < 0) {
@@ -434,23 +441,26 @@ void kvm_arch_reset_vcpu(CPUState *env)
    }
}

static int kvm_has_msr_star(CPUState *env)
int has_msr_star;
int has_msr_hsave_pa;

static void kvm_supported_msrs(CPUState *env)
{
    static int has_msr_star;
    static int kvm_supported_msrs;
    int ret;

    /* first time */
    if (has_msr_star == 0) {        
    if (kvm_supported_msrs == 0) {
        struct kvm_msr_list msr_list, *kvm_msr_list;

        has_msr_star = -1;
        kvm_supported_msrs = -1;

        /* Obtain MSR list from KVM.  These are the MSRs that we must
         * save/restore */
        msr_list.nmsrs = 0;
        ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
        if (ret < 0 && ret != -E2BIG) {
            return 0;
            return;
        }
        /* Old kernel modules had a bug and could write beyond the provided
           memory. Allocate at least a safe amount of 1K. */
@@ -466,7 +476,11 @@ static int kvm_has_msr_star(CPUState *env)
            for (i = 0; i < kvm_msr_list->nmsrs; i++) {
                if (kvm_msr_list->indices[i] == MSR_STAR) {
                    has_msr_star = 1;
                    break;
                    continue;
                }
                if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
                    has_msr_hsave_pa = 1;
                    continue;
                }
            }
        }
@@ -474,9 +488,19 @@ static int kvm_has_msr_star(CPUState *env)
        free(kvm_msr_list);
    }

    if (has_msr_star == 1)
        return 1;
    return 0;
    return;
}

static int kvm_has_msr_hsave_pa(CPUState *env)
{
    kvm_supported_msrs(env);
    return has_msr_hsave_pa;
}

static int kvm_has_msr_star(CPUState *env)
{
    kvm_supported_msrs(env);
    return has_msr_star;
}

static int kvm_init_identity_map_page(KVMState *s)
@@ -502,6 +526,11 @@ int kvm_arch_init(KVMState *s, int smp_cpus)
{
    int ret;

    struct utsname utsname;

    uname(&utsname);
    lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;

    /* create vm86 tss.  KVM uses vm86 mode to emulate 16-bit code
     * directly.  In order to use vm86 mode, a TSS is needed.  Since this
     * must be part of guest physical memory, we need to allocate it.  Older
@@ -779,28 +808,40 @@ static int kvm_put_msrs(CPUState *env, int level)
        struct kvm_msr_entry entries[100];
    } msr_data;
    struct kvm_msr_entry *msrs = msr_data.entries;
    int i, n = 0;
    int n = 0;

    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
    kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
    if (kvm_has_msr_star(env))
	kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
    if (kvm_has_msr_hsave_pa(env))
        kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
#ifdef TARGET_X86_64
    /* FIXME if lm capable */
    if (lm_capable_kernel) {
        kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
        kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
        kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
        kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
    }
#endif
    if (level == KVM_PUT_FULL_STATE) {
        /*
         * KVM is yet unable to synchronize TSC values of multiple VCPUs on
         * writeback. Until this is fixed, we only write the offset to SMP
         * guests after migration, desynchronizing the VCPUs, but avoiding
         * huge jump-backs that would occur without any writeback at all.
         */
        if (smp_cpus == 1 || env->tsc != 0) {
            kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
        }
        kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
                          env->system_time_msr);
        kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
    }
#ifdef KVM_CAP_MCE
    if (env->mcg_cap) {
        int i;
        if (level == KVM_PUT_RESET_STATE)
            kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
        else if (level == KVM_PUT_FULL_STATE) {
@@ -1010,13 +1051,16 @@ static int kvm_get_msrs(CPUState *env)
    msrs[n++].index = MSR_IA32_SYSENTER_EIP;
    if (kvm_has_msr_star(env))
	msrs[n++].index = MSR_STAR;
    if (kvm_has_msr_hsave_pa(env))
        msrs[n++].index = MSR_VM_HSAVE_PA;
    msrs[n++].index = MSR_IA32_TSC;
#ifdef TARGET_X86_64
    /* FIXME lm_capable_kernel */
    if (lm_capable_kernel) {
        msrs[n++].index = MSR_CSTAR;
        msrs[n++].index = MSR_KERNELGSBASE;
        msrs[n++].index = MSR_FMASK;
        msrs[n++].index = MSR_LSTAR;
    }
#endif
    msrs[n++].index = MSR_KVM_SYSTEM_TIME;
    msrs[n++].index = MSR_KVM_WALL_CLOCK;
@@ -1066,6 +1110,9 @@ static int kvm_get_msrs(CPUState *env)
        case MSR_IA32_TSC:
            env->tsc = msrs[i].data;
            break;
        case MSR_VM_HSAVE_PA:
            env->vm_hsave = msrs[i].data;
            break;
        case MSR_KVM_SYSTEM_TIME:
            env->system_time_msr = msrs[i].data;
            break;
@@ -1085,9 +1132,9 @@ static int kvm_get_msrs(CPUState *env)
            if (msrs[i].index >= MSR_MC0_CTL &&
                msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
                env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
                break;
            }
#endif
            break;
        }
    }

@@ -1632,6 +1679,28 @@ static void hardware_memory_error(void)
    exit(1);
}

#ifdef KVM_CAP_MCE
static void kvm_mce_broadcast_rest(CPUState *env)
{
    CPUState *cenv;
    int family, model, cpuver = env->cpuid_version;

    family = (cpuver >> 8) & 0xf;
    model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0xf);

    /* Broadcast MCA signal for processor version 06H_EH and above */
    if ((family == 6 && model >= 14) || family > 6) {
        for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) {
            if (cenv == env) {
                continue;
            }
            kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
                               MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
        }
    }
}
#endif

int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
{
#if defined(KVM_CAP_MCE)
@@ -1689,6 +1758,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
            fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
            abort();
        }
        kvm_mce_broadcast_rest(env);
    } else
#endif
    {
@@ -1711,7 +1781,6 @@ int kvm_on_sigbus(int code, void *addr)
        void *vaddr;
        ram_addr_t ram_addr;
        target_phys_addr_t paddr;
        CPUState *cenv;

        /* Hope we are lucky for AO MCE */
        vaddr = addr;
@@ -1727,10 +1796,7 @@ int kvm_on_sigbus(int code, void *addr)
        kvm_inject_x86_mce(first_cpu, 9, status,
                           MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
                           (MCM_ADDR_PHYS << 6) | 0xc, 1);
        for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) {
            kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
                               MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
        }
        kvm_mce_broadcast_rest(first_cpu);
    } else
#endif
    {