Commit 291009f6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull power management fixes from Rafael Wysocki:
 "Address a performance regression related to scale-invariance on x86
  that may prevent turbo CPU frequencies from being used in certain
  workloads on systems using acpi-cpufreq as the CPU performance scaling
  driver and schedutil as the scaling governor"

* tag 'pm-5.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  cpufreq: ACPI: Update arch scale-invariance max perf ratio if CPPC is not there
  cpufreq: ACPI: Extend frequency tables to cover boost frequencies
parents a3961497 d11a1d08
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -1833,6 +1833,7 @@ void arch_set_max_freq_ratio(bool turbo_disabled)
	arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
	arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
					arch_turbo_freq_ratio;
					arch_turbo_freq_ratio;
}
}
EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);


static bool turbo_disabled(void)
static bool turbo_disabled(void)
{
{
+103 −12
Original line number Original line Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/uaccess.h>
#include <linux/uaccess.h>


#include <acpi/processor.h>
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>


#include <asm/msr.h>
#include <asm/msr.h>
#include <asm/processor.h>
#include <asm/processor.h>
@@ -53,6 +54,7 @@ struct acpi_cpufreq_data {
	unsigned int resume;
	unsigned int resume;
	unsigned int cpu_feature;
	unsigned int cpu_feature;
	unsigned int acpi_perf_cpu;
	unsigned int acpi_perf_cpu;
	unsigned int first_perf_state;
	cpumask_var_t freqdomain_cpus;
	cpumask_var_t freqdomain_cpus;
	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
	void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
	u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@@ -221,10 +223,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)


	perf = to_perf_data(data);
	perf = to_perf_data(data);


	cpufreq_for_each_entry(pos, policy->freq_table)
	cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state)
		if (msr == perf->states[pos->driver_data].status)
		if (msr == perf->states[pos->driver_data].status)
			return pos->frequency;
			return pos->frequency;
	return policy->freq_table[0].frequency;
	return policy->freq_table[data->first_perf_state].frequency;
}
}


static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@@ -363,6 +365,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
	struct cpufreq_policy *policy;
	struct cpufreq_policy *policy;
	unsigned int freq;
	unsigned int freq;
	unsigned int cached_freq;
	unsigned int cached_freq;
	unsigned int state;


	pr_debug("%s (%d)\n", __func__, cpu);
	pr_debug("%s (%d)\n", __func__, cpu);


@@ -374,7 +377,11 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
	if (unlikely(!data || !policy->freq_table))
	if (unlikely(!data || !policy->freq_table))
		return 0;
		return 0;


	cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
	state = to_perf_data(data)->state;
	if (state < data->first_perf_state)
		state = data->first_perf_state;

	cached_freq = policy->freq_table[state].frequency;
	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
	freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
	if (freq != cached_freq) {
	if (freq != cached_freq) {
		/*
		/*
@@ -628,16 +635,54 @@ static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
}
}
#endif
#endif


#ifdef CONFIG_ACPI_CPPC_LIB
static u64 get_max_boost_ratio(unsigned int cpu)
{
	struct cppc_perf_caps perf_caps;
	u64 highest_perf, nominal_perf;
	int ret;

	if (acpi_pstate_strict)
		return 0;

	ret = cppc_get_perf_caps(cpu, &perf_caps);
	if (ret) {
		pr_debug("CPU%d: Unable to get performance capabilities (%d)\n",
			 cpu, ret);
		return 0;
	}

	highest_perf = perf_caps.highest_perf;
	nominal_perf = perf_caps.nominal_perf;

	if (!highest_perf || !nominal_perf) {
		pr_debug("CPU%d: highest or nominal performance missing\n", cpu);
		return 0;
	}

	if (highest_perf < nominal_perf) {
		pr_debug("CPU%d: nominal performance above highest\n", cpu);
		return 0;
	}

	return div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
}
#else
static inline u64 get_max_boost_ratio(unsigned int cpu) { return 0; }
#endif

static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
{
	unsigned int i;
	struct cpufreq_frequency_table *freq_table;
	unsigned int valid_states = 0;
	struct acpi_processor_performance *perf;
	unsigned int cpu = policy->cpu;
	struct acpi_cpufreq_data *data;
	struct acpi_cpufreq_data *data;
	unsigned int cpu = policy->cpu;
	struct cpuinfo_x86 *c = &cpu_data(cpu);
	unsigned int valid_states = 0;
	unsigned int result = 0;
	unsigned int result = 0;
	struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
	unsigned int state_count;
	struct acpi_processor_performance *perf;
	u64 max_boost_ratio;
	struct cpufreq_frequency_table *freq_table;
	unsigned int i;
#ifdef CONFIG_SMP
#ifdef CONFIG_SMP
	static int blacklisted;
	static int blacklisted;
#endif
#endif
@@ -750,8 +795,28 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
		goto err_unreg;
		goto err_unreg;
	}
	}


	freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
	state_count = perf->state_count + 1;
			     GFP_KERNEL);

	max_boost_ratio = get_max_boost_ratio(cpu);
	if (max_boost_ratio) {
		/*
		 * Make a room for one more entry to represent the highest
		 * available "boost" frequency.
		 */
		state_count++;
		valid_states++;
		data->first_perf_state = valid_states;
	} else {
		/*
		 * If the maximum "boost" frequency is unknown, ask the arch
		 * scale-invariance code to use the "nominal" performance for
		 * CPU utilization scaling so as to prevent the schedutil
		 * governor from selecting inadequate CPU frequencies.
		 */
		arch_set_max_freq_ratio(true);
	}

	freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
	if (!freq_table) {
	if (!freq_table) {
		result = -ENOMEM;
		result = -ENOMEM;
		goto err_unreg;
		goto err_unreg;
@@ -785,6 +850,30 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
		valid_states++;
		valid_states++;
	}
	}
	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
	freq_table[valid_states].frequency = CPUFREQ_TABLE_END;

	if (max_boost_ratio) {
		unsigned int state = data->first_perf_state;
		unsigned int freq = freq_table[state].frequency;

		/*
		 * Because the loop above sorts the freq_table entries in the
		 * descending order, freq is the maximum frequency in the table.
		 * Assume that it corresponds to the CPPC nominal frequency and
		 * use it to populate the frequency field of the extra "boost"
		 * frequency entry.
		 */
		freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
		/*
		 * The purpose of the extra "boost" frequency entry is to make
		 * the rest of cpufreq aware of the real maximum frequency, but
		 * the way to request it is the same as for the first_perf_state
		 * entry that is expected to cover the entire range of "boost"
		 * frequencies of the CPU, so copy the driver_data value from
		 * that entry.
		 */
		freq_table[0].driver_data = freq_table[state].driver_data;
	}

	policy->freq_table = freq_table;
	policy->freq_table = freq_table;
	perf->state = 0;
	perf->state = 0;


@@ -858,8 +947,10 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
{
{
	struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
	struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
							      policy->cpu);
							      policy->cpu);
	struct acpi_cpufreq_data *data = policy->driver_data;
	unsigned int freq = policy->freq_table[data->first_perf_state].frequency;


	if (perf->states[0].core_frequency * 1000 != policy->cpuinfo.max_freq)
	if (perf->states[0].core_frequency * 1000 != freq)
		pr_warn(FW_WARN "P-state 0 is not max freq\n");
		pr_warn(FW_WARN "P-state 0 is not max freq\n");
}
}