Commit 3bfe5716 authored by Laurent Vivier's avatar Laurent Vivier Committed by Eduardo Habkost
Browse files

numa: equally distribute memory on nodes

When there are more nodes than available memory to put the minimum
allowed memory by node, all the memory is put on the last node.

This is because we put (ram_size / nb_numa_nodes) &
~((1 << mc->numa_mem_align_shift) - 1); on each node, and in this
case the value is 0. This is particularly true with pseries,
as the memory must be aligned to 256MB.

To avoid this problem, this patch uses an error diffusion algorithm [1]
to distribute equally the memory on nodes.

We introduce numa_auto_assign_ram() function in MachineClass
to keep compatibility between machine type versions.
The legacy function is used with pseries-2.9, pc-q35-2.9 and
pc-i440fx-2.9 (and previous), the new one with all others.

Example:

qemu-system-ppc64 -S -nographic  -nodefaults -monitor stdio -m 1G -smp 8 \
                  -numa node -numa node -numa node \
                  -numa node -numa node -numa node

Before:

(qemu) info numa
6 nodes
node 0 cpus: 0 6
node 0 size: 0 MB
node 1 cpus: 1 7
node 1 size: 0 MB
node 2 cpus: 2
node 2 size: 0 MB
node 3 cpus: 3
node 3 size: 0 MB
node 4 cpus: 4
node 4 size: 0 MB
node 5 cpus: 5
node 5 size: 1024 MB

After:
(qemu) info numa
6 nodes
node 0 cpus: 0 6
node 0 size: 0 MB
node 1 cpus: 1 7
node 1 size: 256 MB
node 2 cpus: 2
node 2 size: 0 MB
node 3 cpus: 3
node 3 size: 256 MB
node 4 cpus: 4
node 4 size: 256 MB
node 5 cpus: 5
node 5 size: 256 MB

[1] https://en.wikipedia.org/wiki/Error_diffusion



Signed-off-by: default avatarLaurent Vivier <lvivier@redhat.com>
Message-Id: <20170502162955.1610-2-lvivier@redhat.com>
Reviewed-by: default avatarEduardo Habkost <ehabkost@redhat.com>
[ehabkost: s/ram_size/size/ at numa_default_auto_assign_ram()]
Signed-off-by: default avatarEduardo Habkost <ehabkost@redhat.com>
parent 0f203430
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -17,6 +17,7 @@
#include "qapi/visitor.h"
#include "hw/sysbus.h"
#include "sysemu/sysemu.h"
#include "sysemu/numa.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"

@@ -400,6 +401,7 @@ static void machine_class_init(ObjectClass *oc, void *data)
     * On Linux, each node's border has to be 8MB aligned
     */
    mc->numa_mem_align_shift = 23;
    mc->numa_auto_assign_ram = numa_default_auto_assign_ram;

    object_class_property_add_str(oc, "accel",
        machine_get_accel, machine_set_accel, &error_abort);
+2 −0
Original line number Diff line number Diff line
@@ -54,6 +54,7 @@
#endif
#include "migration/migration.h"
#include "kvm_i386.h"
#include "sysemu/numa.h"

#define MAX_IDE_BUS 2

@@ -442,6 +443,7 @@ static void pc_i440fx_2_9_machine_options(MachineClass *m)
    pc_i440fx_machine_options(m);
    m->alias = "pc";
    m->is_default = 1;
    m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}

DEFINE_I440FX_MACHINE(v2_9, "pc-i440fx-2.9", NULL,
+2 −0
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@
#include "hw/usb.h"
#include "qemu/error-report.h"
#include "migration/migration.h"
#include "sysemu/numa.h"

/* ICH9 AHCI has 6 ports */
#define MAX_SATA_PORTS     6
@@ -305,6 +306,7 @@ static void pc_q35_2_9_machine_options(MachineClass *m)
{
    pc_q35_machine_options(m);
    m->alias = "q35";
    m->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}

DEFINE_Q35_MACHINE(v2_9, "pc-q35-2.9", NULL,
+1 −0
Original line number Diff line number Diff line
@@ -3242,6 +3242,7 @@ static void spapr_machine_2_9_class_options(MachineClass *mc)
{
    spapr_machine_2_10_class_options(mc);
    SET_MACHINE_COMPAT(mc, SPAPR_COMPAT_2_9);
    mc->numa_auto_assign_ram = numa_legacy_auto_assign_ram;
}

DEFINE_SPAPR_MACHINE(2_9, "2.9", false);
+2 −0
Original line number Diff line number Diff line
@@ -136,6 +136,8 @@ struct MachineClass {
    int minimum_page_bits;
    bool has_hotpluggable_cpus;
    int numa_mem_align_shift;
    void (*numa_auto_assign_ram)(MachineClass *mc, NodeInfo *nodes,
                                 int nb_nodes, ram_addr_t size);

    HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                           DeviceState *dev);
Loading