Commit c1e53367 authored by Srikar Dronamraju's avatar Srikar Dronamraju Committed by Michael Ellerman
Browse files

powerpc/smp: Cache CPU to chip lookup



On systems with large CPUs per node, even with the filtered matching of
related CPUs, there can be large number of calls to cpu_to_chip_id for
the same CPU. For example with 4096 vCPU, 1 node QEMU configuration,
with 4 threads per core, system could be see upto 1024 calls to
cpu_to_chip_id() for the same CPU. On a given system, cpu_to_chip_id()
for a given CPU would always return the same. Hence cache the result in
a lookup table for use in subsequent calls.

Since all CPUs sharing the same core will belong to the same chip, the
lookup_table has an entry for one CPU per core.  chip_id_lookup_table is
not being freed and would be used on subsequent CPU online post CPU
offline.

Reported-by: default avatarDaniel Henrique Barboza <danielhb413@gmail.com>
Suggested-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Signed-off-by: default avatarSrikar Dronamraju <srikar@linux.vnet.ibm.com>
Tested-by: default avatarDaniel Henrique Barboza <danielhb413@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20210415120934.232271-4-srikar@linux.vnet.ibm.com
parent 131c82b6
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@ extern u32 *cpu_to_phys_id;
extern bool coregroup_enabled;

extern int cpu_to_chip_id(int cpu);
extern int *chip_id_lookup_table;

#ifdef CONFIG_SMP

+15 −4
Original line number Diff line number Diff line
@@ -65,6 +65,8 @@
#define DBG(fmt...)
#endif

int *chip_id_lookup_table;

#ifdef CONFIG_PPC64
int __initdata iommu_is_off;
int __initdata iommu_force_on;
@@ -914,13 +916,22 @@ EXPORT_SYMBOL(of_get_ibm_chip_id);
int cpu_to_chip_id(int cpu)
{
	struct device_node *np;
	int ret = -1, idx;

	np = of_get_cpu_node(cpu, NULL);
	if (!np)
		return -1;
	idx = cpu / threads_per_core;
	if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
		return chip_id_lookup_table[idx];

	np = of_get_cpu_node(cpu, NULL);
	if (np) {
		ret = of_get_ibm_chip_id(np);
		of_node_put(np);
	return of_get_ibm_chip_id(np);

		if (chip_id_lookup_table)
			chip_id_lookup_table[idx] = ret;
	}

	return ret;
}
EXPORT_SYMBOL(cpu_to_chip_id);

+19 −2
Original line number Diff line number Diff line
@@ -1073,6 +1073,20 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
				cpu_smallcore_mask(boot_cpuid));
	}

	if (cpu_to_chip_id(boot_cpuid) != -1) {
		int idx = num_possible_cpus() / threads_per_core;

		/*
		 * All threads of a core will all belong to the same core,
		 * chip_id_lookup_table will have one entry per core.
		 * Assumption: if boot_cpuid doesn't have a chip-id, then no
		 * other CPUs, will also not have chip-id.
		 */
		chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
		if (chip_id_lookup_table)
			memset(chip_id_lookup_table, -1, sizeof(int) * idx);
	}

	if (smp_ops && smp_ops->probe)
		smp_ops->probe();
}
@@ -1468,8 +1482,8 @@ static void add_cpu_to_masks(int cpu)
{
	struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
	int first_thread = cpu_first_thread_sibling(cpu);
	int chip_id = cpu_to_chip_id(cpu);
	cpumask_var_t mask;
	int chip_id = -1;
	bool ret;
	int i;

@@ -1492,7 +1506,10 @@ static void add_cpu_to_masks(int cpu)
	if (has_coregroup_support())
		update_coregroup_mask(cpu, &mask);

	if (chip_id == -1 || !ret) {
	if (chip_id_lookup_table && ret)
		chip_id = cpu_to_chip_id(cpu);

	if (chip_id == -1) {
		cpumask_copy(per_cpu(cpu_core_map, cpu), cpu_cpu_mask(cpu));
		goto out;
	}