Commit dd9f2ae9 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

Merge branch 'pm-cpufreq'

* pm-cpufreq: (22 commits)
  cpufreq: Kconfig: fix documentation links
  cpufreq: intel_pstate: Simplify intel_pstate_update_perf_limits()
  cpufreq: armada-37xx: Fix module unloading
  cpufreq: armada-37xx: Remove cur_frequency variable
  cpufreq: armada-37xx: Fix determining base CPU frequency
  cpufreq: armada-37xx: Fix driver cleanup when registration failed
  clk: mvebu: armada-37xx-periph: Fix workaround for switching from L1 to L0
  clk: mvebu: armada-37xx-periph: Fix switching CPU freq from 250 Mhz to 1 GHz
  cpufreq: armada-37xx: Fix the AVS value for load L1
  clk: mvebu: armada-37xx-periph: remove .set_parent method for CPU PM clock
  cpufreq: armada-37xx: Fix setting TBG parent for load levels
  cpufreq: Remove unused for_each_policy macro
  cpufreq: dt: dev_pm_opp_of_cpumask_add_table() may return -EPROBE_DEFER
  cpufreq: intel_pstate: Clean up frequency computations
  cpufreq: cppc: simplify default delay_us setting
  cpufreq: Rudimentary typos fix in the file s5pv210-cpufreq.c
  cpufreq: CPPC: Add support for frequency invariance
  ia64: fix format string for ia64-acpi-cpu-freq
  cpufreq: schedutil: Call sugov_update_next_freq() before check to fast_switch_enabled
  arch_topology: Export arch_freq_scale and helpers
  ...
parents 71f4dd34 733dda9c
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -17,17 +17,9 @@ int pcibus_to_node(struct pci_bus *bus);
#include <linux/arch_topology.h>

void update_freq_counters_refs(void);
void topology_scale_freq_tick(void);

#ifdef CONFIG_ARM64_AMU_EXTN
/*
 * Replace task scheduler's default counter-based
 * frequency-invariance scale factor setting.
 */
#define arch_scale_freq_tick topology_scale_freq_tick
#endif /* CONFIG_ARM64_AMU_EXTN */

/* Replace task scheduler's default frequency-invariant accounting */
#define arch_scale_freq_tick topology_scale_freq_tick
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
+41 −68
Original line number Diff line number Diff line
@@ -199,12 +199,47 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
	return 0;
}

static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
static void amu_scale_freq_tick(void)
{
	u64 prev_core_cnt, prev_const_cnt;
	u64 core_cnt, const_cnt, scale;

	prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
	prev_core_cnt = this_cpu_read(arch_core_cycles_prev);

	update_freq_counters_refs();

	const_cnt = this_cpu_read(arch_const_cycles_prev);
	core_cnt = this_cpu_read(arch_core_cycles_prev);

	if (unlikely(core_cnt <= prev_core_cnt ||
		     const_cnt <= prev_const_cnt))
		return;

	/*
	 *	    /\core    arch_max_freq_scale
	 * scale =  ------- * --------------------
	 *	    /\const   SCHED_CAPACITY_SCALE
	 *
	 * See validate_cpu_freq_invariance_counters() for details on
	 * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
	 */
	scale = core_cnt - prev_core_cnt;
	scale *= this_cpu_read(arch_max_freq_scale);
	scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
			  const_cnt - prev_const_cnt);

	scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
	this_cpu_write(arch_freq_scale, (unsigned long)scale);
}

static struct scale_freq_data amu_sfd = {
	.source = SCALE_FREQ_SOURCE_ARCH,
	.set_freq_scale = amu_scale_freq_tick,
};

static void amu_fie_setup(const struct cpumask *cpus)
{
	bool invariant;
	int cpu;

	/* We are already set since the last insmod of cpufreq driver */
@@ -221,25 +256,10 @@ static void amu_fie_setup(const struct cpumask *cpus)

	cpumask_or(amu_fie_cpus, amu_fie_cpus, cpus);

	invariant = topology_scale_freq_invariant();

	/* We aren't fully invariant yet */
	if (!invariant && !cpumask_equal(amu_fie_cpus, cpu_present_mask))
		return;

	static_branch_enable(&amu_fie_key);
	topology_set_scale_freq_source(&amu_sfd, amu_fie_cpus);

	pr_debug("CPUs[%*pbl]: counters will be used for FIE.",
		 cpumask_pr_args(cpus));

	/*
	 * Task scheduler behavior depends on frequency invariance support,
	 * either cpufreq or counter driven. If the support status changes as
	 * a result of counter initialisation and use, retrigger the build of
	 * scheduling domains to ensure the information is propagated properly.
	 */
	if (!invariant)
		rebuild_sched_domains_energy();
}

static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
@@ -256,8 +276,8 @@ static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
	 * initialized AMU support and enabled invariance. The AMU counters will
	 * keep on working just fine in the absence of the cpufreq driver, and
	 * for the CPUs for which there are no counters available, the last set
	 * value of freq_scale will remain valid as that is the frequency those
	 * CPUs are running at.
	 * value of arch_freq_scale will remain valid as that is the frequency
	 * those CPUs are running at.
	 */

	return 0;
@@ -283,53 +303,6 @@ static int __init init_amu_fie(void)
}
core_initcall(init_amu_fie);

bool arch_freq_counters_available(const struct cpumask *cpus)
{
	return amu_freq_invariant() &&
	       cpumask_subset(cpus, amu_fie_cpus);
}

void topology_scale_freq_tick(void)
{
	u64 prev_core_cnt, prev_const_cnt;
	u64 core_cnt, const_cnt, scale;
	int cpu = smp_processor_id();

	if (!amu_freq_invariant())
		return;

	if (!cpumask_test_cpu(cpu, amu_fie_cpus))
		return;

	prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
	prev_core_cnt = this_cpu_read(arch_core_cycles_prev);

	update_freq_counters_refs();

	const_cnt = this_cpu_read(arch_const_cycles_prev);
	core_cnt = this_cpu_read(arch_core_cycles_prev);

	if (unlikely(core_cnt <= prev_core_cnt ||
		     const_cnt <= prev_const_cnt))
		return;

	/*
	 *	    /\core    arch_max_freq_scale
	 * scale =  ------- * --------------------
	 *	    /\const   SCHED_CAPACITY_SCALE
	 *
	 * See validate_cpu_freq_invariance_counters() for details on
	 * arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
	 */
	scale = core_cnt - prev_core_cnt;
	scale *= this_cpu_read(arch_max_freq_scale);
	scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
			  const_cnt - prev_const_cnt);

	scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
	this_cpu_write(freq_scale, (unsigned long)scale);
}

#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>

+83 −6
Original line number Diff line number Diff line
@@ -21,17 +21,94 @@
#include <linux/sched.h>
#include <linux/smp.h>

static DEFINE_PER_CPU(struct scale_freq_data *, sft_data);
static struct cpumask scale_freq_counters_mask;
static bool scale_freq_invariant;

static bool supports_scale_freq_counters(const struct cpumask *cpus)
{
	return cpumask_subset(cpus, &scale_freq_counters_mask);
}

bool topology_scale_freq_invariant(void)
{
	return cpufreq_supports_freq_invariance() ||
	       arch_freq_counters_available(cpu_online_mask);
	       supports_scale_freq_counters(cpu_online_mask);
}

__weak bool arch_freq_counters_available(const struct cpumask *cpus)
static void update_scale_freq_invariant(bool status)
{
	return false;
	if (scale_freq_invariant == status)
		return;

	/*
	 * Task scheduler behavior depends on frequency invariance support,
	 * either cpufreq or counter driven. If the support status changes as
	 * a result of counter initialisation and use, retrigger the build of
	 * scheduling domains to ensure the information is propagated properly.
	 */
	if (topology_scale_freq_invariant() == status) {
		scale_freq_invariant = status;
		rebuild_sched_domains_energy();
	}
}

void topology_set_scale_freq_source(struct scale_freq_data *data,
				    const struct cpumask *cpus)
{
	struct scale_freq_data *sfd;
	int cpu;

	/*
	 * Avoid calling rebuild_sched_domains() unnecessarily if FIE is
	 * supported by cpufreq.
	 */
	if (cpumask_empty(&scale_freq_counters_mask))
		scale_freq_invariant = topology_scale_freq_invariant();

	for_each_cpu(cpu, cpus) {
		sfd = per_cpu(sft_data, cpu);

		/* Use ARCH provided counters whenever possible */
		if (!sfd || sfd->source != SCALE_FREQ_SOURCE_ARCH) {
			per_cpu(sft_data, cpu) = data;
			cpumask_set_cpu(cpu, &scale_freq_counters_mask);
		}
	}

	update_scale_freq_invariant(true);
}
EXPORT_SYMBOL_GPL(topology_set_scale_freq_source);

void topology_clear_scale_freq_source(enum scale_freq_source source,
				      const struct cpumask *cpus)
{
	struct scale_freq_data *sfd;
	int cpu;

	for_each_cpu(cpu, cpus) {
		sfd = per_cpu(sft_data, cpu);

		if (sfd && sfd->source == source) {
			per_cpu(sft_data, cpu) = NULL;
			cpumask_clear_cpu(cpu, &scale_freq_counters_mask);
		}
	}
DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;

	update_scale_freq_invariant(false);
}
EXPORT_SYMBOL_GPL(topology_clear_scale_freq_source);

void topology_scale_freq_tick(void)
{
	struct scale_freq_data *sfd = *this_cpu_ptr(&sft_data);

	if (sfd)
		sfd->set_freq_scale();
}

DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale);

void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
			     unsigned long max_freq)
@@ -47,13 +124,13 @@ void topology_set_freq_scale(const struct cpumask *cpus, unsigned long cur_freq,
	 * want to update the scale factor with information from CPUFREQ.
	 * Instead the scale factor will be updated from arch_scale_freq_tick.
	 */
	if (arch_freq_counters_available(cpus))
	if (supports_scale_freq_counters(cpus))
		return;

	scale = (cur_freq << SCHED_CAPACITY_SHIFT) / max_freq;

	for_each_cpu(i, cpus)
		per_cpu(freq_scale, i) = scale;
		per_cpu(arch_freq_scale, i) = scale;
}

DEFINE_PER_CPU(unsigned long, cpu_scale) = SCHED_CAPACITY_SCALE;
+45 −38
Original line number Diff line number Diff line
@@ -84,6 +84,7 @@ struct clk_pm_cpu {
	void __iomem *reg_div;
	u8 shift_div;
	struct regmap *nb_pm_base;
	unsigned long l1_expiration;
};

#define to_clk_double_div(_hw) container_of(_hw, struct clk_double_div, hw)
@@ -440,33 +441,6 @@ static u8 clk_pm_cpu_get_parent(struct clk_hw *hw)
	return val;
}

static int clk_pm_cpu_set_parent(struct clk_hw *hw, u8 index)
{
	struct clk_pm_cpu *pm_cpu = to_clk_pm_cpu(hw);
	struct regmap *base = pm_cpu->nb_pm_base;
	int load_level;

	/*
	 * We set the clock parent only if the DVFS is available but
	 * not enabled.
	 */
	if (IS_ERR(base) || armada_3700_pm_dvfs_is_enabled(base))
		return -EINVAL;

	/* Set the parent clock for all the load level */
	for (load_level = 0; load_level < LOAD_LEVEL_NR; load_level++) {
		unsigned int reg, mask,  val,
			offset = ARMADA_37XX_NB_TBG_SEL_OFF;

		armada_3700_pm_dvfs_update_regs(load_level, &reg, &offset);

		val = index << offset;
		mask = ARMADA_37XX_NB_TBG_SEL_MASK << offset;
		regmap_update_bits(base, reg, mask, val);
	}
	return 0;
}

static unsigned long clk_pm_cpu_recalc_rate(struct clk_hw *hw,
					    unsigned long parent_rate)
{
@@ -514,8 +488,10 @@ static long clk_pm_cpu_round_rate(struct clk_hw *hw, unsigned long rate,
}

/*
 * Switching the CPU from the L2 or L3 frequencies (300 and 200 Mhz
 * respectively) to L0 frequency (1.2 Ghz) requires a significant
 * Workaround when base CPU frequnecy is 1000 or 1200 MHz
 *
 * Switching the CPU from the L2 or L3 frequencies (250/300 or 200 MHz
 * respectively) to L0 frequency (1/1.2 GHz) requires a significant
 * amount of time to let VDD stabilize to the appropriate
 * voltage. This amount of time is large enough that it cannot be
 * covered by the hardware countdown register. Due to this, the CPU
@@ -525,26 +501,56 @@ static long clk_pm_cpu_round_rate(struct clk_hw *hw, unsigned long rate,
 * To work around this problem, we prevent switching directly from the
 * L2/L3 frequencies to the L0 frequency, and instead switch to the L1
 * frequency in-between. The sequence therefore becomes:
 * 1. First switch from L2/L3(200/300MHz) to L1(600MHZ)
 * 1. First switch from L2/L3 (200/250/300 MHz) to L1 (500/600 MHz)
 * 2. Sleep 20ms for stabling VDD voltage
 * 3. Then switch from L1(600MHZ) to L0(1200Mhz).
 * 3. Then switch from L1 (500/600 MHz) to L0 (1000/1200 MHz).
 */
static void clk_pm_cpu_set_rate_wa(unsigned long rate, struct regmap *base)
static void clk_pm_cpu_set_rate_wa(struct clk_pm_cpu *pm_cpu,
				   unsigned int new_level, unsigned long rate,
				   struct regmap *base)
{
	unsigned int cur_level;

	if (rate != 1200 * 1000 * 1000)
		return;

	regmap_read(base, ARMADA_37XX_NB_CPU_LOAD, &cur_level);
	cur_level &= ARMADA_37XX_NB_CPU_LOAD_MASK;
	if (cur_level <= ARMADA_37XX_DVFS_LOAD_1)

	if (cur_level == new_level)
		return;

	/*
	 * System wants to go to L1 on its own. If we are going from L2/L3,
	 * remember when 20ms will expire. If from L0, set the value so that
	 * next switch to L0 won't have to wait.
	 */
	if (new_level == ARMADA_37XX_DVFS_LOAD_1) {
		if (cur_level == ARMADA_37XX_DVFS_LOAD_0)
			pm_cpu->l1_expiration = jiffies;
		else
			pm_cpu->l1_expiration = jiffies + msecs_to_jiffies(20);
		return;
	}

	/*
	 * If we are setting to L2/L3, just invalidate L1 expiration time,
	 * sleeping is not needed.
	 */
	if (rate < 1000*1000*1000)
		goto invalidate_l1_exp;

	/*
	 * We are going to L0 with rate >= 1GHz. Check whether we have been at
	 * L1 for long enough time. If not, go to L1 for 20ms.
	 */
	if (pm_cpu->l1_expiration && jiffies >= pm_cpu->l1_expiration)
		goto invalidate_l1_exp;

	regmap_update_bits(base, ARMADA_37XX_NB_CPU_LOAD,
			   ARMADA_37XX_NB_CPU_LOAD_MASK,
			   ARMADA_37XX_DVFS_LOAD_1);
	msleep(20);

invalidate_l1_exp:
	pm_cpu->l1_expiration = 0;
}

static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate,
@@ -578,7 +584,9 @@ static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate,
			reg = ARMADA_37XX_NB_CPU_LOAD;
			mask = ARMADA_37XX_NB_CPU_LOAD_MASK;

			clk_pm_cpu_set_rate_wa(rate, base);
			/* Apply workaround when base CPU frequency is 1000 or 1200 MHz */
			if (parent_rate >= 1000*1000*1000)
				clk_pm_cpu_set_rate_wa(pm_cpu, load_level, rate, base);

			regmap_update_bits(base, reg, mask, load_level);

@@ -592,7 +600,6 @@ static int clk_pm_cpu_set_rate(struct clk_hw *hw, unsigned long rate,

static const struct clk_ops clk_pm_cpu_ops = {
	.get_parent = clk_pm_cpu_get_parent,
	.set_parent = clk_pm_cpu_set_parent,
	.round_rate = clk_pm_cpu_round_rate,
	.set_rate = clk_pm_cpu_set_rate,
	.recalc_rate = clk_pm_cpu_recalc_rate,
+6 −17
Original line number Diff line number Diff line
@@ -13,7 +13,8 @@ config CPU_FREQ
	  clock speed, you need to either enable a dynamic cpufreq governor
	  (see below) after boot, or use a userspace tool.

	  For details, take a look at <file:Documentation/cpu-freq>.
	  For details, take a look at
	  <file:Documentation/admin-guide/pm/cpufreq.rst>.

	  If in doubt, say N.

@@ -140,8 +141,6 @@ config CPU_FREQ_GOV_USERSPACE
	  To compile this driver as a module, choose M here: the
	  module will be called cpufreq_userspace.

	  For details, take a look at <file:Documentation/cpu-freq/>.

	  If in doubt, say Y.

config CPU_FREQ_GOV_ONDEMAND
@@ -158,7 +157,8 @@ config CPU_FREQ_GOV_ONDEMAND
	  To compile this driver as a module, choose M here: the
	  module will be called cpufreq_ondemand.

	  For details, take a look at linux/Documentation/cpu-freq.
	  For details, take a look at
	  <file:Documentation/admin-guide/pm/cpufreq.rst>.

	  If in doubt, say N.

@@ -182,7 +182,8 @@ config CPU_FREQ_GOV_CONSERVATIVE
	  To compile this driver as a module, choose M here: the
	  module will be called cpufreq_conservative.

	  For details, take a look at linux/Documentation/cpu-freq.
	  For details, take a look at
	  <file:Documentation/admin-guide/pm/cpufreq.rst>.

	  If in doubt, say N.

@@ -246,8 +247,6 @@ config IA64_ACPI_CPUFREQ
	This driver adds a CPUFreq driver which utilizes the ACPI
	Processor Performance States.

	For details, take a look at <file:Documentation/cpu-freq/>.

	If in doubt, say N.
endif

@@ -271,8 +270,6 @@ config LOONGSON2_CPUFREQ

	  Loongson2F and it's successors support this feature.

	  For details, take a look at <file:Documentation/cpu-freq/>.

	  If in doubt, say N.

config LOONGSON1_CPUFREQ
@@ -282,8 +279,6 @@ config LOONGSON1_CPUFREQ
	  This option adds a CPUFreq driver for loongson1 processors which
	  support software configurable cpu frequency.

	  For details, take a look at <file:Documentation/cpu-freq/>.

	  If in doubt, say N.
endif

@@ -293,8 +288,6 @@ config SPARC_US3_CPUFREQ
	help
	  This adds the CPUFreq driver for UltraSPARC-III processors.

	  For details, take a look at <file:Documentation/cpu-freq>.

	  If in doubt, say N.

config SPARC_US2E_CPUFREQ
@@ -302,8 +295,6 @@ config SPARC_US2E_CPUFREQ
	help
	  This adds the CPUFreq driver for UltraSPARC-IIe processors.

	  For details, take a look at <file:Documentation/cpu-freq>.

	  If in doubt, say N.
endif

@@ -318,8 +309,6 @@ config SH_CPU_FREQ
	  will also generate a notice in the boot log before disabling
	  itself if the CPU in question is not capable of rate rounding.

	  For details, take a look at <file:Documentation/cpu-freq>.

	  If unsure, say N.
endif

Loading