Commit 57924bcd authored by Igor Mammedov's avatar Igor Mammedov Committed by Eduardo Habkost
Browse files

numa: introduce machine callback for VCPU to node mapping



Current default round-robin way of distributing VCPUs among
NUMA nodes might be wrong in case on multi-core/threads
CPUs. Making guests confused wrt topology where cores from
the same socket are on different nodes.

Allow a machine to override default mapping by providing
 MachineClass::cpu_index_to_socket_id()
callback which would allow it group VCPUs from a socket
on the same NUMA node.

Signed-off-by: default avatarIgor Mammedov <imammedo@redhat.com>
Reviewed-by: default avatarAndreas Färber <afaerber@suse.de>
Signed-off-by: default avatarEduardo Habkost <ehabkost@redhat.com>
parent 3ef71975
Loading
Loading
Loading
Loading
+5 −0
Original line number Diff line number Diff line
@@ -82,6 +82,10 @@ bool machine_mem_merge(MachineState *machine);
 *    of HotplugHandler object, which handles hotplug operation
 *    for a given @dev. It may return NULL if @dev doesn't require
 *    any actions to be performed by hotplug handler.
 * @cpu_index_to_socket_id:
 *    used to provide @cpu_index to socket number mapping, allowing
 *    a machine to group CPU threads belonging to the same socket/package
 *    Returns: socket number given cpu_index belongs to.
 */
struct MachineClass {
    /*< private >*/
@@ -118,6 +122,7 @@ struct MachineClass {

    HotplugHandler *(*get_hotplug_handler)(MachineState *machine,
                                           DeviceState *dev);
    unsigned (*cpu_index_to_socket_id)(unsigned cpu_index);
};

/**
+2 −1
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@
#include "qemu/option.h"
#include "sysemu/sysemu.h"
#include "sysemu/hostmem.h"
#include "hw/boards.h"

extern int nb_numa_nodes;   /* Number of NUMA nodes */

@@ -16,7 +17,7 @@ typedef struct node_info {
    bool present;
} NodeInfo;
extern NodeInfo numa_info[MAX_NODES];
void parse_numa_opts(void);
void parse_numa_opts(MachineClass *mc);
void numa_post_machine_init(void);
void query_numa_node_mem(uint64_t node_mem[]);
extern QemuOptsList qemu_numa_opts;
+13 −5
Original line number Diff line number Diff line
@@ -202,7 +202,7 @@ static void validate_numa_cpus(void)
    }
}

void parse_numa_opts(void)
void parse_numa_opts(MachineClass *mc)
{
    int i;

@@ -270,13 +270,21 @@ void parse_numa_opts(void)
                break;
            }
        }
        /* assigning the VCPUs round-robin is easier to implement, guest OSes
         * must cope with this anyway, because there are BIOSes out there in
         * real machines which also use this scheme.
        /* Historically VCPUs were assigned in round-robin order to NUMA
         * nodes. However it causes issues with guest not handling it nice
         * in case where cores/threads from a multicore CPU appear on
         * different nodes. So allow boards to override default distribution
         * rule grouping VCPUs by socket so that VCPUs from the same socket
         * would be on the same node.
         */
        if (i == nb_numa_nodes) {
            for (i = 0; i < max_cpus; i++) {
                set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
                unsigned node_id = i % nb_numa_nodes;
                if (mc->cpu_index_to_socket_id) {
                    node_id = mc->cpu_index_to_socket_id(i) % nb_numa_nodes;
                }

                set_bit(i, numa_info[node_id].node_cpu);
            }
        }

+1 −1
Original line number Diff line number Diff line
@@ -4170,7 +4170,7 @@ int main(int argc, char **argv, char **envp)
    default_drive(default_floppy, snapshot, IF_FLOPPY, 0, FD_OPTS);
    default_drive(default_sdcard, snapshot, IF_SD, 0, SD_OPTS);

    parse_numa_opts();
    parse_numa_opts(machine_class);

    if (qemu_opts_foreach(qemu_find_opts("mon"), mon_init_func, NULL, 1) != 0) {
        exit(1);