Commit ebbfef2f authored by Liran Alon's avatar Liran Alon Committed by Paolo Bonzini
Browse files

target/i386: kvm: Add support for save and restore nested state



Kernel commit 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
introduced new IOCTLs to extract and restore vCPU state related to
Intel VMX & AMD SVM.

Utilize these IOCTLs to add support for migration of VMs which are
running nested hypervisors.

Reviewed-by: default avatarNikita Leshenko <nikita.leshchenko@oracle.com>
Reviewed-by: default avatarMaran Wilson <maran.wilson@oracle.com>
Tested-by: default avatarMaran Wilson <maran.wilson@oracle.com>
Signed-off-by: default avatarLiran Alon <liran.alon@oracle.com>
Message-Id: <20190619162140.133674-9-liran.alon@oracle.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 6cfd7639
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -87,6 +87,7 @@ struct KVMState
#ifdef KVM_CAP_SET_GUEST_DEBUG
    QTAILQ_HEAD(, kvm_sw_breakpoint) kvm_sw_breakpoints;
#endif
    int max_nested_state_len;
    int many_ioeventfds;
    int intx_set_mask;
    bool sync_mmu;
@@ -1681,6 +1682,8 @@ static int kvm_init(MachineState *ms)
    s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
#endif

    s->max_nested_state_len = kvm_check_extension(s, KVM_CAP_NESTED_STATE);

#ifdef KVM_CAP_IRQ_ROUTING
    kvm_direct_msi_allowed = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
#endif
@@ -2248,6 +2251,11 @@ int kvm_has_debugregs(void)
    return kvm_state->debugregs;
}

int kvm_max_nested_state_length(void)
{
    return kvm_state->max_nested_state_len;
}

int kvm_has_many_ioeventfds(void)
{
    if (!kvm_enabled()) {
+1 −0
Original line number Diff line number Diff line
@@ -210,6 +210,7 @@ bool kvm_has_sync_mmu(void);
int kvm_has_vcpu_events(void);
int kvm_has_robust_singlestep(void);
int kvm_has_debugregs(void);
int kvm_max_nested_state_length(void);
int kvm_has_pit_state2(void);
int kvm_has_many_ioeventfds(void);
int kvm_has_gsi_routing(void);
+3 −0
Original line number Diff line number Diff line
@@ -1360,6 +1360,9 @@ typedef struct CPUX86State {
#if defined(CONFIG_KVM) || defined(CONFIG_HVF)
    void *xsave_buf;
#endif
#if defined(CONFIG_KVM)
    struct kvm_nested_state *nested_state;
#endif
#if defined(CONFIG_HVF)
    HVFX86EmulatorState *hvf_emul;
#endif
+80 −0
Original line number Diff line number Diff line
@@ -1324,6 +1324,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
    struct kvm_cpuid_entry2 *c;
    uint32_t signature[3];
    int kvm_base = KVM_CPUID_SIGNATURE;
    int max_nested_state_len;
    int r;
    Error *local_err = NULL;

@@ -1658,6 +1659,24 @@ int kvm_arch_init_vcpu(CPUState *cs)
    if (has_xsave) {
        env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
    }

    max_nested_state_len = kvm_max_nested_state_length();
    if (max_nested_state_len > 0) {
        assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
        env->nested_state = g_malloc0(max_nested_state_len);

        env->nested_state->size = max_nested_state_len;

        if (IS_INTEL_CPU(env)) {
            struct kvm_vmx_nested_state_hdr *vmx_hdr =
                &env->nested_state->hdr.vmx;

            env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
            vmx_hdr->vmxon_pa = -1ull;
            vmx_hdr->vmcs12_pa = -1ull;
        }
    }

    cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE);

    if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) {
@@ -1682,12 +1701,18 @@ int kvm_arch_init_vcpu(CPUState *cs)
int kvm_arch_destroy_vcpu(CPUState *cs)
{
    X86CPU *cpu = X86_CPU(cs);
    CPUX86State *env = &cpu->env;

    if (cpu->kvm_msr_buf) {
        g_free(cpu->kvm_msr_buf);
        cpu->kvm_msr_buf = NULL;
    }

    if (env->nested_state) {
        g_free(env->nested_state);
        env->nested_state = NULL;
    }

    return 0;
}

@@ -3411,6 +3436,52 @@ static int kvm_get_debugregs(X86CPU *cpu)
    return 0;
}

static int kvm_put_nested_state(X86CPU *cpu)
{
    CPUX86State *env = &cpu->env;
    int max_nested_state_len = kvm_max_nested_state_length();

    if (max_nested_state_len <= 0) {
        return 0;
    }

    assert(env->nested_state->size <= max_nested_state_len);
    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state);
}

static int kvm_get_nested_state(X86CPU *cpu)
{
    CPUX86State *env = &cpu->env;
    int max_nested_state_len = kvm_max_nested_state_length();
    int ret;

    if (max_nested_state_len <= 0) {
        return 0;
    }

    /*
     * It is possible that migration restored a smaller size into
     * nested_state->hdr.size than what our kernel support.
     * We preserve migration origin nested_state->hdr.size for
     * call to KVM_SET_NESTED_STATE but wish that our next call
     * to KVM_GET_NESTED_STATE will use max size our kernel support.
     */
    env->nested_state->size = max_nested_state_len;

    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state);
    if (ret < 0) {
        return ret;
    }

    if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) {
        env->hflags |= HF_GUEST_MASK;
    } else {
        env->hflags &= ~HF_GUEST_MASK;
    }

    return ret;
}

int kvm_arch_put_registers(CPUState *cpu, int level)
{
    X86CPU *x86_cpu = X86_CPU(cpu);
@@ -3418,6 +3489,11 @@ int kvm_arch_put_registers(CPUState *cpu, int level)

    assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));

    ret = kvm_put_nested_state(x86_cpu);
    if (ret < 0) {
        return ret;
    }

    if (level >= KVM_PUT_RESET_STATE) {
        ret = kvm_put_msr_feature_control(x86_cpu);
        if (ret < 0) {
@@ -3533,6 +3609,10 @@ int kvm_arch_get_registers(CPUState *cs)
    if (ret < 0) {
        goto out;
    }
    ret = kvm_get_nested_state(cpu);
    if (ret < 0) {
        goto out;
    }
    ret = 0;
 out:
    cpu_sync_bndcs_hflags(&cpu->env);
+198 −0
Original line number Diff line number Diff line
@@ -231,6 +231,15 @@ static int cpu_pre_save(void *opaque)
        env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK);
    }

#ifdef CONFIG_KVM
    /* Verify we have nested virtualization state from kernel if required */
    if (cpu_has_nested_virt(env) && !env->nested_state) {
        error_report("Guest enabled nested virtualization but kernel "
                "does not support saving of nested state");
        return -EINVAL;
    }
#endif

    return 0;
}

@@ -278,6 +287,16 @@ static int cpu_post_load(void *opaque, int version_id)
    env->hflags &= ~HF_CPL_MASK;
    env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;

#ifdef CONFIG_KVM
    if ((env->hflags & HF_GUEST_MASK) &&
        (!env->nested_state ||
        !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) {
        error_report("vCPU set in guest-mode inconsistent with "
                     "migrated kernel nested state");
        return -EINVAL;
    }
#endif

    env->fpstt = (env->fpus_vmstate >> 11) & 7;
    env->fpus = env->fpus_vmstate & ~0x3800;
    env->fptag_vmstate ^= 0xff;
@@ -851,6 +870,182 @@ static const VMStateDescription vmstate_tsc_khz = {
    }
};

#ifdef CONFIG_KVM

static bool vmx_vmcs12_needed(void *opaque)
{
    struct kvm_nested_state *nested_state = opaque;
    return (nested_state->size >
            offsetof(struct kvm_nested_state, data.vmx[0].vmcs12));
}

static const VMStateDescription vmstate_vmx_vmcs12 = {
    .name = "cpu/kvm_nested_state/vmx/vmcs12",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = vmx_vmcs12_needed,
    .fields = (VMStateField[]) {
        VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12,
                            struct kvm_nested_state,
                            KVM_STATE_NESTED_VMX_VMCS_SIZE),
        VMSTATE_END_OF_LIST()
    }
};

static bool vmx_shadow_vmcs12_needed(void *opaque)
{
    struct kvm_nested_state *nested_state = opaque;
    return (nested_state->size >
            offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12));
}

static const VMStateDescription vmstate_vmx_shadow_vmcs12 = {
    .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = vmx_shadow_vmcs12_needed,
    .fields = (VMStateField[]) {
        VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12,
                            struct kvm_nested_state,
                            KVM_STATE_NESTED_VMX_VMCS_SIZE),
        VMSTATE_END_OF_LIST()
    }
};

static bool vmx_nested_state_needed(void *opaque)
{
    struct kvm_nested_state *nested_state = opaque;

    return ((nested_state->format == KVM_STATE_NESTED_FORMAT_VMX) &&
            ((nested_state->hdr.vmx.vmxon_pa != -1ull) ||
             (nested_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON)));
}

static const VMStateDescription vmstate_vmx_nested_state = {
    .name = "cpu/kvm_nested_state/vmx",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = vmx_nested_state_needed,
    .fields = (VMStateField[]) {
        VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state),
        VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state),
        VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state),
        VMSTATE_END_OF_LIST()
    },
    .subsections = (const VMStateDescription*[]) {
        &vmstate_vmx_vmcs12,
        &vmstate_vmx_shadow_vmcs12,
        NULL,
    }
};

static bool svm_nested_state_needed(void *opaque)
{
    struct kvm_nested_state *nested_state = opaque;

    return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM);
}

static const VMStateDescription vmstate_svm_nested_state = {
    .name = "cpu/kvm_nested_state/svm",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = svm_nested_state_needed,
    .fields = (VMStateField[]) {
        VMSTATE_END_OF_LIST()
    }
};

static bool nested_state_needed(void *opaque)
{
    X86CPU *cpu = opaque;
    CPUX86State *env = &cpu->env;

    return (env->nested_state &&
            (vmx_nested_state_needed(env->nested_state) ||
             svm_nested_state_needed(env->nested_state)));
}

static int nested_state_post_load(void *opaque, int version_id)
{
    X86CPU *cpu = opaque;
    CPUX86State *env = &cpu->env;
    struct kvm_nested_state *nested_state = env->nested_state;
    int min_nested_state_len = offsetof(struct kvm_nested_state, data);
    int max_nested_state_len = kvm_max_nested_state_length();

    /*
     * If our kernel don't support setting nested state
     * and we have received nested state from migration stream,
     * we need to fail migration
     */
    if (max_nested_state_len <= 0) {
        error_report("Received nested state when kernel cannot restore it");
        return -EINVAL;
    }

    /*
     * Verify that the size of received nested_state struct
     * at least cover required header and is not larger
     * than the max size that our kernel support
     */
    if (nested_state->size < min_nested_state_len) {
        error_report("Received nested state size less than min: "
                     "len=%d, min=%d",
                     nested_state->size, min_nested_state_len);
        return -EINVAL;
    }
    if (nested_state->size > max_nested_state_len) {
        error_report("Recieved unsupported nested state size: "
                     "nested_state->size=%d, max=%d",
                     nested_state->size, max_nested_state_len);
        return -EINVAL;
    }

    /* Verify format is valid */
    if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) &&
        (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) {
        error_report("Received invalid nested state format: %d",
                     nested_state->format);
        return -EINVAL;
    }

    return 0;
}

static const VMStateDescription vmstate_kvm_nested_state = {
    .name = "cpu/kvm_nested_state",
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_U16(flags, struct kvm_nested_state),
        VMSTATE_U16(format, struct kvm_nested_state),
        VMSTATE_U32(size, struct kvm_nested_state),
        VMSTATE_END_OF_LIST()
    },
    .subsections = (const VMStateDescription*[]) {
        &vmstate_vmx_nested_state,
        &vmstate_svm_nested_state,
        NULL
    }
};

static const VMStateDescription vmstate_nested_state = {
    .name = "cpu/nested_state",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = nested_state_needed,
    .post_load = nested_state_post_load,
    .fields = (VMStateField[]) {
        VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU,
                vmstate_kvm_nested_state,
                struct kvm_nested_state),
        VMSTATE_END_OF_LIST()
    }
};

#endif

static bool mcg_ext_ctl_needed(void *opaque)
{
    X86CPU *cpu = opaque;
@@ -1112,6 +1307,9 @@ VMStateDescription vmstate_x86_cpu = {
        &vmstate_svm_npt,
#ifndef TARGET_X86_64
        &vmstate_efer32,
#endif
#ifdef CONFIG_KVM
        &vmstate_nested_state,
#endif
        NULL
    }