Commit 4a9a8876 authored by Anthony Liguori's avatar Anthony Liguori
Browse files

Merge remote-tracking branch 'luiz/queue/qmp' into staging



# By Laszlo Ersek
# Via Luiz Capitulino
* luiz/queue/qmp:
  dump: rebase from host-private RAMBlock offsets to guest-physical addresses
  dump: populate guest_phys_blocks
  dump: introduce GuestPhysBlockList
  dump: clamp guest-provided mapping lengths to ramblock sizes

Message-id: 1375974809-1757-1-git-send-email-lcapitulino@redhat.com
Signed-off-by: default avatarAnthony Liguori <aliguori@us.ibm.com>
parents 283c8733 56c4bfb3
Loading
Loading
Loading
Loading
+96 −75
Original line number Diff line number Diff line
@@ -59,6 +59,7 @@ static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
}

typedef struct DumpState {
    GuestPhysBlockList guest_phys_blocks;
    ArchDumpInfo dump_info;
    MemoryMappingList list;
    uint16_t phdr_num;
@@ -69,7 +70,7 @@ typedef struct DumpState {
    hwaddr memory_offset;
    int fd;

    RAMBlock *block;
    GuestPhysBlock *next_block;
    ram_addr_t start;
    bool has_filter;
    int64_t begin;
@@ -81,6 +82,7 @@ static int dump_cleanup(DumpState *s)
{
    int ret = 0;

    guest_phys_blocks_free(&s->guest_phys_blocks);
    memory_mapping_list_free(&s->list);
    if (s->fd != -1) {
        close(s->fd);
@@ -187,7 +189,8 @@ static int write_elf32_header(DumpState *s)
}

static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
                            int phdr_index, hwaddr offset)
                            int phdr_index, hwaddr offset,
                            hwaddr filesz)
{
    Elf64_Phdr phdr;
    int ret;
@@ -197,15 +200,12 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
    phdr.p_offset = cpu_convert_to_target64(offset, endian);
    phdr.p_paddr = cpu_convert_to_target64(memory_mapping->phys_addr, endian);
    if (offset == -1) {
        /* When the memory is not stored into vmcore, offset will be -1 */
        phdr.p_filesz = 0;
    } else {
        phdr.p_filesz = cpu_convert_to_target64(memory_mapping->length, endian);
    }
    phdr.p_filesz = cpu_convert_to_target64(filesz, endian);
    phdr.p_memsz = cpu_convert_to_target64(memory_mapping->length, endian);
    phdr.p_vaddr = cpu_convert_to_target64(memory_mapping->virt_addr, endian);

    assert(memory_mapping->length >= filesz);

    ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s);
    if (ret < 0) {
        dump_error(s, "dump: failed to write program header table.\n");
@@ -216,7 +216,8 @@ static int write_elf64_load(DumpState *s, MemoryMapping *memory_mapping,
}

static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
                            int phdr_index, hwaddr offset)
                            int phdr_index, hwaddr offset,
                            hwaddr filesz)
{
    Elf32_Phdr phdr;
    int ret;
@@ -226,15 +227,12 @@ static int write_elf32_load(DumpState *s, MemoryMapping *memory_mapping,
    phdr.p_type = cpu_convert_to_target32(PT_LOAD, endian);
    phdr.p_offset = cpu_convert_to_target32(offset, endian);
    phdr.p_paddr = cpu_convert_to_target32(memory_mapping->phys_addr, endian);
    if (offset == -1) {
        /* When the memory is not stored into vmcore, offset will be -1 */
        phdr.p_filesz = 0;
    } else {
        phdr.p_filesz = cpu_convert_to_target32(memory_mapping->length, endian);
    }
    phdr.p_filesz = cpu_convert_to_target32(filesz, endian);
    phdr.p_memsz = cpu_convert_to_target32(memory_mapping->length, endian);
    phdr.p_vaddr = cpu_convert_to_target32(memory_mapping->virt_addr, endian);

    assert(memory_mapping->length >= filesz);

    ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s);
    if (ret < 0) {
        dump_error(s, "dump: failed to write program header table.\n");
@@ -393,14 +391,14 @@ static int write_data(DumpState *s, void *buf, int length)
}

/* write the memroy to vmcore. 1 page per I/O. */
static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
static int write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start,
                        int64_t size)
{
    int64_t i;
    int ret;

    for (i = 0; i < size / TARGET_PAGE_SIZE; i++) {
        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
                         TARGET_PAGE_SIZE);
        if (ret < 0) {
            return ret;
@@ -408,7 +406,7 @@ static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
    }

    if ((size % TARGET_PAGE_SIZE) != 0) {
        ret = write_data(s, block->host + start + i * TARGET_PAGE_SIZE,
        ret = write_data(s, block->host_addr + start + i * TARGET_PAGE_SIZE,
                         size % TARGET_PAGE_SIZE);
        if (ret < 0) {
            return ret;
@@ -418,57 +416,71 @@ static int write_memory(DumpState *s, RAMBlock *block, ram_addr_t start,
    return 0;
}

/* get the memory's offset in the vmcore */
static hwaddr get_offset(hwaddr phys_addr,
                                     DumpState *s)
/* get the memory's offset and size in the vmcore */
static void get_offset_range(hwaddr phys_addr,
                             ram_addr_t mapping_length,
                             DumpState *s,
                             hwaddr *p_offset,
                             hwaddr *p_filesz)
{
    RAMBlock *block;
    GuestPhysBlock *block;
    hwaddr offset = s->memory_offset;
    int64_t size_in_block, start;

    /* When the memory is not stored into vmcore, offset will be -1 */
    *p_offset = -1;
    *p_filesz = 0;

    if (s->has_filter) {
        if (phys_addr < s->begin || phys_addr >= s->begin + s->length) {
            return -1;
            return;
        }
    }

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
        if (s->has_filter) {
            if (block->offset >= s->begin + s->length ||
                block->offset + block->length <= s->begin) {
            if (block->target_start >= s->begin + s->length ||
                block->target_end <= s->begin) {
                /* This block is out of the range */
                continue;
            }

            if (s->begin <= block->offset) {
                start = block->offset;
            if (s->begin <= block->target_start) {
                start = block->target_start;
            } else {
                start = s->begin;
            }

            size_in_block = block->length - (start - block->offset);
            if (s->begin + s->length < block->offset + block->length) {
                size_in_block -= block->offset + block->length -
                                 (s->begin + s->length);
            size_in_block = block->target_end - start;
            if (s->begin + s->length < block->target_end) {
                size_in_block -= block->target_end - (s->begin + s->length);
            }
        } else {
            start = block->offset;
            size_in_block = block->length;
            start = block->target_start;
            size_in_block = block->target_end - block->target_start;
        }

        if (phys_addr >= start && phys_addr < start + size_in_block) {
            return phys_addr - start + offset;
            *p_offset = phys_addr - start + offset;

            /* The offset range mapped from the vmcore file must not spill over
             * the GuestPhysBlock, clamp it. The rest of the mapping will be
             * zero-filled in memory at load time; see
             * <http://refspecs.linuxbase.org/elf/gabi4+/ch5.pheader.html>.
             */
            *p_filesz = phys_addr + mapping_length <= start + size_in_block ?
                        mapping_length :
                        size_in_block - (phys_addr - start);
            return;
        }

        offset += size_in_block;
    }

    return -1;
}

static int write_elf_loads(DumpState *s)
{
    hwaddr offset;
    hwaddr offset, filesz;
    MemoryMapping *memory_mapping;
    uint32_t phdr_index = 1;
    int ret;
@@ -481,11 +493,15 @@ static int write_elf_loads(DumpState *s)
    }

    QTAILQ_FOREACH(memory_mapping, &s->list.head, next) {
        offset = get_offset(memory_mapping->phys_addr, s);
        get_offset_range(memory_mapping->phys_addr,
                         memory_mapping->length,
                         s, &offset, &filesz);
        if (s->dump_info.d_class == ELFCLASS64) {
            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset);
            ret = write_elf64_load(s, memory_mapping, phdr_index++, offset,
                                   filesz);
        } else {
            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset);
            ret = write_elf32_load(s, memory_mapping, phdr_index++, offset,
                                   filesz);
        }

        if (ret < 0) {
@@ -596,7 +612,7 @@ static int dump_completed(DumpState *s)
    return 0;
}

static int get_next_block(DumpState *s, RAMBlock *block)
static int get_next_block(DumpState *s, GuestPhysBlock *block)
{
    while (1) {
        block = QTAILQ_NEXT(block, next);
@@ -606,16 +622,16 @@ static int get_next_block(DumpState *s, RAMBlock *block)
        }

        s->start = 0;
        s->block = block;
        s->next_block = block;
        if (s->has_filter) {
            if (block->offset >= s->begin + s->length ||
                block->offset + block->length <= s->begin) {
            if (block->target_start >= s->begin + s->length ||
                block->target_end <= s->begin) {
                /* This block is out of the range */
                continue;
            }

            if (s->begin > block->offset) {
                s->start = s->begin - block->offset;
            if (s->begin > block->target_start) {
                s->start = s->begin - block->target_start;
            }
        }

@@ -626,18 +642,18 @@ static int get_next_block(DumpState *s, RAMBlock *block)
/* write all memory to vmcore */
static int dump_iterate(DumpState *s)
{
    RAMBlock *block;
    GuestPhysBlock *block;
    int64_t size;
    int ret;

    while (1) {
        block = s->block;
        block = s->next_block;

        size = block->length;
        size = block->target_end - block->target_start;
        if (s->has_filter) {
            size -= s->start;
            if (s->begin + s->length < block->offset + block->length) {
                size -= block->offset + block->length - (s->begin + s->length);
            if (s->begin + s->length < block->target_end) {
                size -= block->target_end - (s->begin + s->length);
            }
        }
        ret = write_memory(s, block, s->start, size);
@@ -672,23 +688,23 @@ static int create_vmcore(DumpState *s)

static ram_addr_t get_start_block(DumpState *s)
{
    RAMBlock *block;
    GuestPhysBlock *block;

    if (!s->has_filter) {
        s->block = QTAILQ_FIRST(&ram_list.blocks);
        s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head);
        return 0;
    }

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        if (block->offset >= s->begin + s->length ||
            block->offset + block->length <= s->begin) {
    QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) {
        if (block->target_start >= s->begin + s->length ||
            block->target_end <= s->begin) {
            /* This block is out of the range */
            continue;
        }

        s->block = block;
        if (s->begin > block->offset) {
            s->start = s->begin - block->offset;
        s->next_block = block;
        if (s->begin > block->target_start) {
            s->start = s->begin - block->target_start;
        } else {
            s->start = 0;
        }
@@ -713,32 +729,35 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
        s->resume = false;
    }

    /* If we use KVM, we should synchronize the registers before we get dump
     * info or physmap info.
     */
    cpu_synchronize_all_states();
    nr_cpus = 0;
    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        nr_cpus++;
    }

    s->errp = errp;
    s->fd = fd;
    s->has_filter = has_filter;
    s->begin = begin;
    s->length = length;

    guest_phys_blocks_init(&s->guest_phys_blocks);
    guest_phys_blocks_append(&s->guest_phys_blocks);

    s->start = get_start_block(s);
    if (s->start == -1) {
        error_set(errp, QERR_INVALID_PARAMETER, "begin");
        goto cleanup;
    }

    /*
     * get dump info: endian, class and architecture.
    /* get dump info: endian, class and architecture.
     * If the target architecture is not supported, cpu_get_dump_info() will
     * return -1.
     *
     * If we use KVM, we should synchronize the registers before we get dump
     * info.
     */
    cpu_synchronize_all_states();
    nr_cpus = 0;
    for (cpu = first_cpu; cpu != NULL; cpu = cpu->next_cpu) {
        nr_cpus++;
    }

    ret = cpu_get_dump_info(&s->dump_info);
    ret = cpu_get_dump_info(&s->dump_info, &s->guest_phys_blocks);
    if (ret < 0) {
        error_set(errp, QERR_UNSUPPORTED);
        goto cleanup;
@@ -754,13 +773,13 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
    /* get memory mapping */
    memory_mapping_list_init(&s->list);
    if (paging) {
        qemu_get_guest_memory_mapping(&s->list, &err);
        qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            goto cleanup;
        }
    } else {
        qemu_get_guest_simple_memory_mapping(&s->list);
        qemu_get_guest_simple_memory_mapping(&s->list, &s->guest_phys_blocks);
    }

    if (s->has_filter) {
@@ -812,6 +831,8 @@ static int dump_init(DumpState *s, int fd, bool paging, bool has_filter,
    return 0;

cleanup:
    guest_phys_blocks_free(&s->guest_phys_blocks);

    if (s->resume) {
        vm_start();
    }
@@ -859,7 +880,7 @@ void qmp_dump_guest_memory(bool paging, const char *file, bool has_begin,
        return;
    }

    s = g_malloc(sizeof(DumpState));
    s = g_malloc0(sizeof(DumpState));

    ret = dump_init(s, fd, paging, has_begin, begin, length, errp);
    if (ret < 0) {
+3 −1
Original line number Diff line number Diff line
@@ -20,7 +20,9 @@ typedef struct ArchDumpInfo {
    int d_class;    /* ELFCLASS32 or ELFCLASS64 */
} ArchDumpInfo;

int cpu_get_dump_info(ArchDumpInfo *info);
struct GuestPhysBlockList; /* memory_mapping.h */
int cpu_get_dump_info(ArchDumpInfo *info,
                      const struct GuestPhysBlockList *guest_phys_blocks);
ssize_t cpu_get_note_size(int class, int machine, int nr_cpus);

#endif
+28 −2
Original line number Diff line number Diff line
@@ -17,6 +17,25 @@
#include "qemu/queue.h"
#include "qemu/typedefs.h"

typedef struct GuestPhysBlock {
    /* visible to guest, reflects PCI hole, etc */
    hwaddr target_start;

    /* implies size */
    hwaddr target_end;

    /* points into host memory */
    uint8_t *host_addr;

    QTAILQ_ENTRY(GuestPhysBlock) next;
} GuestPhysBlock;

/* point-in-time snapshot of guest-visible physical mappings */
typedef struct GuestPhysBlockList {
    unsigned num;
    QTAILQ_HEAD(GuestPhysBlockHead, GuestPhysBlock) head;
} GuestPhysBlockList;

/* The physical and virtual address in the memory mapping are contiguous. */
typedef struct MemoryMapping {
    hwaddr phys_addr;
@@ -45,10 +64,17 @@ void memory_mapping_list_free(MemoryMappingList *list);

void memory_mapping_list_init(MemoryMappingList *list);

void qemu_get_guest_memory_mapping(MemoryMappingList *list, Error **errp);
void guest_phys_blocks_free(GuestPhysBlockList *list);
void guest_phys_blocks_init(GuestPhysBlockList *list);
void guest_phys_blocks_append(GuestPhysBlockList *list);

void qemu_get_guest_memory_mapping(MemoryMappingList *list,
                                   const GuestPhysBlockList *guest_phys_blocks,
                                   Error **errp);

/* get guest's memory mapping without do paging(virtual address is 0). */
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
                                  const GuestPhysBlockList *guest_phys_blocks);

void memory_mapping_filter(MemoryMappingList *list, int64_t begin,
                           int64_t length);
+114 −9
Original line number Diff line number Diff line
@@ -11,9 +11,15 @@
 *
 */

#include <glib.h>

#include "cpu.h"
#include "exec/cpu-all.h"
#include "sysemu/memory_mapping.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"

//#define DEBUG_GUEST_PHYS_REGION_ADD

static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
                                                   MemoryMapping *mapping)
@@ -165,6 +171,101 @@ void memory_mapping_list_init(MemoryMappingList *list)
    QTAILQ_INIT(&list->head);
}

void guest_phys_blocks_free(GuestPhysBlockList *list)
{
    GuestPhysBlock *p, *q;

    QTAILQ_FOREACH_SAFE(p, &list->head, next, q) {
        QTAILQ_REMOVE(&list->head, p, next);
        g_free(p);
    }
    list->num = 0;
}

void guest_phys_blocks_init(GuestPhysBlockList *list)
{
    list->num = 0;
    QTAILQ_INIT(&list->head);
}

typedef struct GuestPhysListener {
    GuestPhysBlockList *list;
    MemoryListener listener;
} GuestPhysListener;

static void guest_phys_blocks_region_add(MemoryListener *listener,
                                         MemoryRegionSection *section)
{
    GuestPhysListener *g;
    uint64_t section_size;
    hwaddr target_start, target_end;
    uint8_t *host_addr;
    GuestPhysBlock *predecessor;

    /* we only care about RAM */
    if (!memory_region_is_ram(section->mr)) {
        return;
    }

    g            = container_of(listener, GuestPhysListener, listener);
    section_size = int128_get64(section->size);
    target_start = section->offset_within_address_space;
    target_end   = target_start + section_size;
    host_addr    = memory_region_get_ram_ptr(section->mr) +
                   section->offset_within_region;
    predecessor  = NULL;

    /* find continuity in guest physical address space */
    if (!QTAILQ_EMPTY(&g->list->head)) {
        hwaddr predecessor_size;

        predecessor = QTAILQ_LAST(&g->list->head, GuestPhysBlockHead);
        predecessor_size = predecessor->target_end - predecessor->target_start;

        /* the memory API guarantees monotonically increasing traversal */
        g_assert(predecessor->target_end <= target_start);

        /* we want continuity in both guest-physical and host-virtual memory */
        if (predecessor->target_end < target_start ||
            predecessor->host_addr + predecessor_size != host_addr) {
            predecessor = NULL;
        }
    }

    if (predecessor == NULL) {
        /* isolated mapping, allocate it and add it to the list */
        GuestPhysBlock *block = g_malloc0(sizeof *block);

        block->target_start = target_start;
        block->target_end   = target_end;
        block->host_addr    = host_addr;

        QTAILQ_INSERT_TAIL(&g->list->head, block, next);
        ++g->list->num;
    } else {
        /* expand predecessor until @target_end; predecessor's start doesn't
         * change
         */
        predecessor->target_end = target_end;
    }

#ifdef DEBUG_GUEST_PHYS_REGION_ADD
    fprintf(stderr, "%s: target_start=" TARGET_FMT_plx " target_end="
            TARGET_FMT_plx ": %s (count: %u)\n", __FUNCTION__, target_start,
            target_end, predecessor ? "joined" : "added", g->list->num);
#endif
}

void guest_phys_blocks_append(GuestPhysBlockList *list)
{
    GuestPhysListener g = { 0 };

    g.list = list;
    g.listener.region_add = &guest_phys_blocks_region_add;
    memory_listener_register(&g.listener, &address_space_memory);
    memory_listener_unregister(&g.listener);
}

static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
{
    CPUState *cpu;
@@ -178,10 +279,12 @@ static CPUState *find_paging_enabled_cpu(CPUState *start_cpu)
    return NULL;
}

void qemu_get_guest_memory_mapping(MemoryMappingList *list, Error **errp)
void qemu_get_guest_memory_mapping(MemoryMappingList *list,
                                   const GuestPhysBlockList *guest_phys_blocks,
                                   Error **errp)
{
    CPUState *cpu, *first_paging_enabled_cpu;
    RAMBlock *block;
    GuestPhysBlock *block;
    ram_addr_t offset, length;

    first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
@@ -201,19 +304,21 @@ void qemu_get_guest_memory_mapping(MemoryMappingList *list, Error **errp)
     * If the guest doesn't use paging, the virtual address is equal to physical
     * address.
     */
    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        offset = block->offset;
        length = block->length;
    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
        offset = block->target_start;
        length = block->target_end - block->target_start;
        create_new_memory_mapping(list, offset, offset, length);
    }
}

void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list,
                                   const GuestPhysBlockList *guest_phys_blocks)
{
    RAMBlock *block;
    GuestPhysBlock *block;

    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
        create_new_memory_mapping(list, block->offset, 0, block->length);
    QTAILQ_FOREACH(block, &guest_phys_blocks->head, next) {
        create_new_memory_mapping(list, block->target_start, 0,
                                  block->target_end - block->target_start);
    }
}

+2 −1
Original line number Diff line number Diff line
@@ -16,7 +16,8 @@
#include "qapi/qmp/qerror.h"
#include "qmp-commands.h"

int cpu_get_dump_info(ArchDumpInfo *info)
int cpu_get_dump_info(ArchDumpInfo *info,
                      const struct GuestPhysBlockList *guest_phys_blocks)
{
    return -1;
}
Loading