Commit 53cb28cb authored by Marcel Apfelbaum's avatar Marcel Apfelbaum Committed by Michael S. Tsirkin
Browse files

exec: separate sections and nodes per address space



Every address space has its own nodes and sections, but
it uses the same global arrays of nodes/section.

This limits the number of devices that can be attached
to the guest to 20-30 devices. It happens because:
 - The sections array is limited to 2^12 entries.
 - The main memory has at least 100 sections.
 - Each device address space is actually an alias to
   main memory, multiplying its number of nodes/sections.

Remove the limitation by using separate arrays of
nodes and sections for each address space.

Signed-off-by: default avatarMarcel Apfelbaum <marcel.a@redhat.com>
Reviewed-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Reviewed-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent 6307d974
Loading
Loading
Loading
Loading
+66 −89
Original line number Diff line number Diff line
@@ -103,13 +103,21 @@ struct PhysPageEntry {

typedef PhysPageEntry Node[P_L2_SIZE];

typedef struct PhysPageMap {
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

struct AddressSpaceDispatch {
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
    Node *nodes;
    MemoryRegionSection *sections;
    PhysPageMap map;
    AddressSpace *as;
};

@@ -126,18 +134,6 @@ typedef struct subpage_t {
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3

typedef struct PhysPageMap {
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

static PhysPageMap *prev_map;
static PhysPageMap next_map;

static void io_mem_init(void);
static void memory_map_init(void);

@@ -146,35 +142,32 @@ static MemoryRegion io_mem_watch;

#if !defined(CONFIG_USER_ONLY)

static void phys_map_node_reserve(unsigned nodes)
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
{
    if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
                                            16);
        next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
                                      next_map.nodes_nb + nodes);
        next_map.nodes = g_renew(Node, next_map.nodes,
                                 next_map.nodes_nb_alloc);
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
    }
}

static uint32_t phys_map_node_alloc(void)
static uint32_t phys_map_node_alloc(PhysPageMap *map)
{
    unsigned i;
    uint32_t ret;

    ret = next_map.nodes_nb++;
    ret = map->nodes_nb++;
    assert(ret != PHYS_MAP_NODE_NIL);
    assert(ret != next_map.nodes_nb_alloc);
    assert(ret != map->nodes_nb_alloc);
    for (i = 0; i < P_L2_SIZE; ++i) {
        next_map.nodes[ret][i].skip = 1;
        next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
        map->nodes[ret][i].skip = 1;
        map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
    }
    return ret;
}

static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
                                hwaddr *nb, uint16_t leaf,
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
                                int level)
{
    PhysPageEntry *p;
@@ -182,8 +175,8 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);

    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
        lp->ptr = phys_map_node_alloc();
        p = next_map.nodes[lp->ptr];
        lp->ptr = phys_map_node_alloc(map);
        p = map->nodes[lp->ptr];
        if (level == 0) {
            for (i = 0; i < P_L2_SIZE; i++) {
                p[i].skip = 0;
@@ -191,7 +184,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
            }
        }
    } else {
        p = next_map.nodes[lp->ptr];
        p = map->nodes[lp->ptr];
    }
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];

@@ -202,7 +195,7 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
            *index += step;
            *nb -= step;
        } else {
            phys_page_set_level(lp, index, nb, leaf, level - 1);
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
        }
        ++lp;
    }
@@ -213,9 +206,9 @@ static void phys_page_set(AddressSpaceDispatch *d,
                          uint16_t leaf)
{
    /* Wildly overreserve - it doesn't matter much. */
    phys_map_node_reserve(3 * P_L2_LEVELS);
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);

    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
}

/* Compact a non leaf page entry. Simply detect that the entry has a single child,
@@ -276,7 +269,7 @@ static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
        phys_page_compact(&d->phys_map, d->nodes, compacted);
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
    }
}

@@ -317,10 +310,10 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
    MemoryRegionSection *section;
    subpage_t *subpage;

    section = phys_page_find(d->phys_map, addr, d->nodes, d->sections);
    section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
        section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
    }
    return section;
}
@@ -788,7 +781,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
            iotlb |= PHYS_SECTION_ROM;
        }
    } else {
        iotlb = section - address_space_memory.dispatch->sections;
        iotlb = section - address_space_memory.dispatch->map.sections;
        iotlb += xlat;
    }

@@ -827,23 +820,23 @@ void phys_mem_set_alloc(void *(*alloc)(size_t))
    phys_mem_alloc = alloc;
}

static uint16_t phys_section_add(MemoryRegionSection *section)
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)
{
    /* The physical section number is ORed with a page-aligned
     * pointer to produce the iotlb entries.  Thus it should
     * never overflow into the page-aligned value.
     */
    assert(next_map.sections_nb < TARGET_PAGE_SIZE);
    assert(map->sections_nb < TARGET_PAGE_SIZE);

    if (next_map.sections_nb == next_map.sections_nb_alloc) {
        next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
                                         16);
        next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
                                    next_map.sections_nb_alloc);
    if (map->sections_nb == map->sections_nb_alloc) {
        map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
        map->sections = g_renew(MemoryRegionSection, map->sections,
                                map->sections_nb_alloc);
    }
    next_map.sections[next_map.sections_nb] = *section;
    map->sections[map->sections_nb] = *section;
    memory_region_ref(section->mr);
    return next_map.sections_nb++;
    return map->sections_nb++;
}

static void phys_section_destroy(MemoryRegion *mr)
@@ -865,7 +858,6 @@ static void phys_sections_free(PhysPageMap *map)
    }
    g_free(map->sections);
    g_free(map->nodes);
    g_free(map);
}

static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
@@ -874,7 +866,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
    hwaddr base = section->offset_within_address_space
        & TARGET_PAGE_MASK;
    MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
                                                   next_map.nodes, next_map.sections);
                                                   d->map.nodes, d->map.sections);
    MemoryRegionSection subsection = {
        .offset_within_address_space = base,
        .size = int128_make64(TARGET_PAGE_SIZE),
@@ -887,13 +879,14 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
        subpage = subpage_init(d->as, base);
        subsection.mr = &subpage->iomem;
        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
                      phys_section_add(&subsection));
                      phys_section_add(&d->map, &subsection));
    } else {
        subpage = container_of(existing->mr, subpage_t, iomem);
    }
    start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
    end = start + int128_get64(section->size) - 1;
    subpage_register(subpage, start, end, phys_section_add(section));
    subpage_register(subpage, start, end,
                     phys_section_add(&d->map, section));
}


@@ -901,7 +894,7 @@ static void register_multipage(AddressSpaceDispatch *d,
                               MemoryRegionSection *section)
{
    hwaddr start_addr = section->offset_within_address_space;
    uint16_t section_index = phys_section_add(section);
    uint16_t section_index = phys_section_add(&d->map, section);
    uint64_t num_pages = int128_get64(int128_rshift(section->size,
                                                    TARGET_PAGE_BITS));

@@ -1720,7 +1713,7 @@ static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
    return mmio;
}

static uint16_t dummy_section(MemoryRegion *mr)
static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
{
    MemoryRegionSection section = {
        .mr = mr,
@@ -1729,12 +1722,13 @@ static uint16_t dummy_section(MemoryRegion *mr)
        .size = int128_2_64(),
    };

    return phys_section_add(&section);
    return phys_section_add(map, &section);
}

MemoryRegion *iotlb_to_region(hwaddr index)
{
    return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
    return address_space_memory.dispatch->map.sections[
           index & ~TARGET_PAGE_MASK].mr;
}

static void io_mem_init(void)
@@ -1751,7 +1745,17 @@ static void io_mem_init(void)
static void mem_begin(MemoryListener *listener)
{
    AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
    AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
    uint16_t n;

    n = dummy_section(&d->map, &io_mem_unassigned);
    assert(n == PHYS_SECTION_UNASSIGNED);
    n = dummy_section(&d->map, &io_mem_notdirty);
    assert(n == PHYS_SECTION_NOTDIRTY);
    n = dummy_section(&d->map, &io_mem_rom);
    assert(n == PHYS_SECTION_ROM);
    n = dummy_section(&d->map, &io_mem_watch);
    assert(n == PHYS_SECTION_WATCH);

    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
    d->as = as;
@@ -1764,39 +1768,14 @@ static void mem_commit(MemoryListener *listener)
    AddressSpaceDispatch *cur = as->dispatch;
    AddressSpaceDispatch *next = as->next_dispatch;

    next->nodes = next_map.nodes;
    next->sections = next_map.sections;

    phys_page_compact_all(next, next_map.nodes_nb);
    phys_page_compact_all(next, next->map.nodes_nb);

    as->dispatch = next;
    g_free(cur);
}

static void core_begin(MemoryListener *listener)
{
    uint16_t n;

    prev_map = g_new(PhysPageMap, 1);
    *prev_map = next_map;

    memset(&next_map, 0, sizeof(next_map));
    n = dummy_section(&io_mem_unassigned);
    assert(n == PHYS_SECTION_UNASSIGNED);
    n = dummy_section(&io_mem_notdirty);
    assert(n == PHYS_SECTION_NOTDIRTY);
    n = dummy_section(&io_mem_rom);
    assert(n == PHYS_SECTION_ROM);
    n = dummy_section(&io_mem_watch);
    assert(n == PHYS_SECTION_WATCH);
    if (cur) {
        phys_sections_free(&cur->map);
        g_free(cur);
    }

/* This listener's commit run after the other AddressSpaceDispatch listeners'.
 * All AddressSpaceDispatch instances have switched to the next map.
 */
static void core_commit(MemoryListener *listener)
{
    phys_sections_free(prev_map);
}

static void tcg_commit(MemoryListener *listener)
@@ -1824,8 +1803,6 @@ static void core_log_global_stop(MemoryListener *listener)
}

static MemoryListener core_memory_listener = {
    .begin = core_begin,
    .commit = core_commit,
    .log_global_start = core_log_global_start,
    .log_global_stop = core_log_global_stop,
    .priority = 1,