Commit c5bbcaa4 authored by Peter Maydell's avatar Peter Maydell
Browse files

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging



pc, pci, virtio: fixes, features

A bunch of fixes all over the place.
A new vmcore device - the user interface around it is still somewhat
controversial, but I feel most of the code is fine, suggestions can be
addressed by adding patches on top.

Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Sun 15 Oct 2017 04:02:23 BST
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream: (26 commits)
  tests/pxe: Test more NICs when running in SPEED=slow mode
  pc: remove useless hot_add_cpu initialisation
  isapc: Remove unnecessary migration compatibility code
  virtio-pci: Replace modern_as with direct access to modern_bar
  virtio: fix descriptor counting in virtqueue_pop
  hw/gen_pcie_root_port: make IO RO 0 on IO disabled
  pci: Validate interfaces on base_class_init
  xen/pt: Mark TYPE_XEN_PT_DEVICE as hybrid
  pci: Add INTERFACE_CONVENTIONAL_PCI_DEVICE to Conventional PCI devices
  pci: Add INTERFACE_PCIE_DEVICE to all PCIe devices
  pci: Add interface names to hybrid PCI devices
  pci: conventional-pci-device and pci-express-device interfaces
  PCI: PCIe access should always be little endian
  virtio/pci/migration: Convert to VMState
  hw/pci-bridge/pcie_pci_bridge: properly handle MSI unavailability case
  pci: allow 32-bit PCI IO accesses to pass through the PCI bridge
  virtio/vhost: reset dev->log after syncing
  MAINTAINERS: add Dump maintainers
  scripts/dump-guest-memory.py: add vmcoreinfo
  kdump: set vmcoreinfo location
  ...

Signed-off-by: default avatarPeter Maydell <peter.maydell@linaro.org>
parents 79b2a13a ab06ec43
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -1317,6 +1317,17 @@ S: Maintained
F: device_tree.c
F: include/sysemu/device_tree.h

Dump
S: Supported
M: Marc-André Lureau <marcandre.lureau@redhat.com>
F: dump.c
F: hw/misc/vmcoreinfo.c
F: include/hw/misc/vmcoreinfo.h
F: include/sysemu/dump-arch.h
F: include/sysemu/dump.h
F: scripts/dump-guest-memory.py
F: stubs/dump.c

Error reporting
M: Markus Armbruster <armbru@redhat.com>
S: Supported
+49 −0
Original line number Diff line number Diff line
=================
VMCoreInfo device
=================

The `-device vmcoreinfo` will create a fw_cfg entry for a guest to
store dump details.

etc/vmcoreinfo
**************

A guest may use this fw_cfg entry to add information details to qemu
dumps.

The entry of 16 bytes has the following layout, in little-endian::

#define VMCOREINFO_FORMAT_NONE 0x0
#define VMCOREINFO_FORMAT_ELF 0x1

    struct FWCfgVMCoreInfo {
        uint16_t host_format;  /* formats host supports */
        uint16_t guest_format; /* format guest supplies */
        uint32_t size;         /* size of vmcoreinfo region */
        uint64_t paddr;        /* physical address of vmcoreinfo region */
    };

Only full write (of 16 bytes) are considered valid for further
processing of entry values.

A write of 0 in guest_format will disable further processing of
vmcoreinfo entry values & content.

Format & content
****************

As of qemu 2.11, only VMCOREINFO_FORMAT_ELF is supported.

The entry gives location and size of an ELF note that is appended in
qemu dumps.

The note format/class must be of the target bitness and the size must
be less than 1Mb.

If the ELF note name is "VMCOREINFO", it is expected to be the Linux
vmcoreinfo note (see Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
in Linux source). In this case, qemu dump code will read the content
as a key=value text file, looking for "NUMBER(phys_base)" key
value. The value is expected to be more accurate than architecture
guess of the value. This is useful for KASLR-enabled guest with
ancient tools not handling the VMCOREINFO note.
+183 −0
Original line number Diff line number Diff line
@@ -25,6 +25,8 @@
#include "qapi/qmp/qerror.h"
#include "qmp-commands.h"
#include "qapi-event.h"
#include "qemu/error-report.h"
#include "hw/misc/vmcoreinfo.h"

#include <zlib.h>
#ifdef CONFIG_LZO
@@ -37,6 +39,13 @@
#define ELF_MACHINE_UNAME "Unknown"
#endif

#define MAX_GUEST_NOTE_SIZE (1 << 20) /* 1MB should be enough */

#define ELF_NOTE_SIZE(hdr_size, name_size, desc_size)   \
    ((DIV_ROUND_UP((hdr_size), 4) +                     \
      DIV_ROUND_UP((name_size), 4) +                    \
      DIV_ROUND_UP((desc_size), 4)) * 4)

uint16_t cpu_to_dump16(DumpState *s, uint16_t val)
{
    if (s->dump_info.d_endian == ELFDATA2LSB) {
@@ -75,6 +84,8 @@ static int dump_cleanup(DumpState *s)
    guest_phys_blocks_free(&s->guest_phys_blocks);
    memory_mapping_list_free(&s->list);
    close(s->fd);
    g_free(s->guest_note);
    s->guest_note = NULL;
    if (s->resume) {
        if (s->detached) {
            qemu_mutex_lock_iothread();
@@ -234,6 +245,19 @@ static inline int cpu_index(CPUState *cpu)
    return cpu->cpu_index + 1;
}

static void write_guest_note(WriteCoreDumpFunction f, DumpState *s,
                             Error **errp)
{
    int ret;

    if (s->guest_note) {
        ret = f(s->guest_note, s->guest_note_size, s);
        if (ret < 0) {
            error_setg(errp, "dump: failed to write guest note");
        }
    }
}

static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
                              Error **errp)
{
@@ -257,6 +281,8 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s,
            return;
        }
    }

    write_guest_note(f, s, errp);
}

static void write_elf32_note(DumpState *s, Error **errp)
@@ -302,6 +328,8 @@ static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s,
            return;
        }
    }

    write_guest_note(f, s, errp);
}

static void write_elf_section(DumpState *s, int type, Error **errp)
@@ -713,6 +741,61 @@ static int buf_write_note(const void *buf, size_t size, void *opaque)
    return 0;
}

/*
 * This function retrieves various sizes from an elf header.
 *
 * @note has to be a valid ELF note. The return sizes are unmodified
 * (not padded or rounded up to be multiple of 4).
 */
static void get_note_sizes(DumpState *s, const void *note,
                           uint64_t *note_head_size,
                           uint64_t *name_size,
                           uint64_t *desc_size)
{
    uint64_t note_head_sz;
    uint64_t name_sz;
    uint64_t desc_sz;

    if (s->dump_info.d_class == ELFCLASS64) {
        const Elf64_Nhdr *hdr = note;
        note_head_sz = sizeof(Elf64_Nhdr);
        name_sz = tswap64(hdr->n_namesz);
        desc_sz = tswap64(hdr->n_descsz);
    } else {
        const Elf32_Nhdr *hdr = note;
        note_head_sz = sizeof(Elf32_Nhdr);
        name_sz = tswap32(hdr->n_namesz);
        desc_sz = tswap32(hdr->n_descsz);
    }

    if (note_head_size) {
        *note_head_size = note_head_sz;
    }
    if (name_size) {
        *name_size = name_sz;
    }
    if (desc_size) {
        *desc_size = desc_sz;
    }
}

static bool note_name_equal(DumpState *s,
                            const uint8_t *note, const char *name)
{
    int len = strlen(name) + 1;
    uint64_t head_size, name_size;

    get_note_sizes(s, note, &head_size, &name_size, NULL);
    head_size = ROUND_UP(head_size, 4);

    if (name_size != len ||
        memcmp(note + head_size, "VMCOREINFO", len)) {
        return false;
    }

    return true;
}

/* write common header, sub header and elf note to vmcore */
static void create_header32(DumpState *s, Error **errp)
{
@@ -774,6 +857,18 @@ static void create_header32(DumpState *s, Error **errp)
    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);

    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
    if (s->guest_note &&
        note_name_equal(s, s->guest_note, "VMCOREINFO")) {
        uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;

        get_note_sizes(s, s->guest_note,
                       &hsize, &name_size, &size_vmcoreinfo_desc);
        offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
            (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
        kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
        kh->size_vmcoreinfo = cpu_to_dump32(s, size_vmcoreinfo_desc);
    }

    kh->offset_note = cpu_to_dump64(s, offset_note);
    kh->note_size = cpu_to_dump32(s, s->note_size);

@@ -874,6 +969,18 @@ static void create_header64(DumpState *s, Error **errp)
    kh->dump_level = cpu_to_dump32(s, DUMP_LEVEL);

    offset_note = DISKDUMP_HEADER_BLOCKS * block_size + size;
    if (s->guest_note &&
        note_name_equal(s, s->guest_note, "VMCOREINFO")) {
        uint64_t hsize, name_size, size_vmcoreinfo_desc, offset_vmcoreinfo;

        get_note_sizes(s, s->guest_note,
                       &hsize, &name_size, &size_vmcoreinfo_desc);
        offset_vmcoreinfo = offset_note + s->note_size - s->guest_note_size +
            (DIV_ROUND_UP(hsize, 4) + DIV_ROUND_UP(name_size, 4)) * 4;
        kh->offset_vmcoreinfo = cpu_to_dump64(s, offset_vmcoreinfo);
        kh->size_vmcoreinfo = cpu_to_dump64(s, size_vmcoreinfo_desc);
    }

    kh->offset_note = cpu_to_dump64(s, offset_note);
    kh->note_size = cpu_to_dump64(s, s->note_size);

@@ -1487,10 +1594,44 @@ static int64_t dump_calculate_size(DumpState *s)
    return total;
}

static void vmcoreinfo_update_phys_base(DumpState *s)
{
    uint64_t size, note_head_size, name_size, phys_base;
    char **lines;
    uint8_t *vmci;
    size_t i;

    if (!note_name_equal(s, s->guest_note, "VMCOREINFO")) {
        return;
    }

    get_note_sizes(s, s->guest_note, &note_head_size, &name_size, &size);
    note_head_size = ROUND_UP(note_head_size, 4);

    vmci = s->guest_note + note_head_size + ROUND_UP(name_size, 4);
    *(vmci + size) = '\0';

    lines = g_strsplit((char *)vmci, "\n", -1);
    for (i = 0; lines[i]; i++) {
        if (g_str_has_prefix(lines[i], "NUMBER(phys_base)=")) {
            if (qemu_strtou64(lines[i] + 18, NULL, 16,
                              &phys_base) < 0) {
                warn_report("Failed to read NUMBER(phys_base)=");
            } else {
                s->dump_info.phys_base = phys_base;
            }
            break;
        }
    }

    g_strfreev(lines);
}

static void dump_init(DumpState *s, int fd, bool has_format,
                      DumpGuestMemoryFormat format, bool paging, bool has_filter,
                      int64_t begin, int64_t length, Error **errp)
{
    VMCoreInfoState *vmci = vmcoreinfo_find();
    CPUState *cpu;
    int nr_cpus;
    Error *err = NULL;
@@ -1568,6 +1709,48 @@ static void dump_init(DumpState *s, int fd, bool has_format,
        goto cleanup;
    }

    /*
     * The goal of this block is to (a) update the previously guessed
     * phys_base, (b) copy the guest note out of the guest.
     * Failure to do so is not fatal for dumping.
     */
    if (vmci) {
        uint64_t addr, note_head_size, name_size, desc_size;
        uint32_t size;
        uint16_t format;

        note_head_size = s->dump_info.d_class == ELFCLASS32 ?
            sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr);

        format = le16_to_cpu(vmci->vmcoreinfo.guest_format);
        size = le32_to_cpu(vmci->vmcoreinfo.size);
        addr = le64_to_cpu(vmci->vmcoreinfo.paddr);
        if (!vmci->has_vmcoreinfo) {
            warn_report("guest note is not present");
        } else if (size < note_head_size || size > MAX_GUEST_NOTE_SIZE) {
            warn_report("guest note size is invalid: %" PRIu32, size);
        } else if (format != VMCOREINFO_FORMAT_ELF) {
            warn_report("guest note format is unsupported: %" PRIu16, format);
        } else {
            s->guest_note = g_malloc(size + 1); /* +1 for adding \0 */
            cpu_physical_memory_read(addr, s->guest_note, size);

            get_note_sizes(s, s->guest_note, NULL, &name_size, &desc_size);
            s->guest_note_size = ELF_NOTE_SIZE(note_head_size, name_size,
                                               desc_size);
            if (name_size > MAX_GUEST_NOTE_SIZE ||
                desc_size > MAX_GUEST_NOTE_SIZE ||
                s->guest_note_size > size) {
                warn_report("Invalid guest note header");
                g_free(s->guest_note);
                s->guest_note = NULL;
            } else {
                vmcoreinfo_update_phys_base(s);
                s->note_size += s->guest_note_size;
            }
        }
    }

    /* get memory mapping */
    if (paging) {
        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
+1 −0
Original line number Diff line number Diff line
@@ -718,6 +718,7 @@ static const TypeInfo piix4_pm_info = {
    .interfaces = (InterfaceInfo[]) {
        { TYPE_HOTPLUG_HANDLER },
        { TYPE_ACPI_DEVICE_IF },
        { INTERFACE_CONVENTIONAL_PCI_DEVICE },
        { }
    }
};
+1 −1
Original line number Diff line number Diff line
@@ -124,7 +124,7 @@ void vmgenid_add_fw_cfg(VmGenIdState *vms, FWCfgState *s, GArray *guid)
    fw_cfg_add_file(s, VMGENID_GUID_FW_CFG_FILE, guid->data,
                    VMGENID_FW_CFG_SIZE);
    /* Create a read-write fw_cfg file for Address */
    fw_cfg_add_file_callback(s, VMGENID_ADDR_FW_CFG_FILE, NULL, NULL,
    fw_cfg_add_file_callback(s, VMGENID_ADDR_FW_CFG_FILE, NULL, NULL, NULL,
                             vms->vmgenid_addr_le,
                             ARRAY_SIZE(vms->vmgenid_addr_le), false);
}
Loading