Commit 1201c50c authored by Rafael J. Wysocki's avatar Rafael J. Wysocki
Browse files

Merge branches 'pm-cpuidle' and 'pm-cpufreq'

Merge CPU power management updates for 6.6-rc1:

 - Rework the menu and teo cpuidle governors to avoid calling
   tick_nohz_get_sleep_length(), which is likely to become quite
   expensive going forward, too often and improve making decisions
   regarding whether or not to stop the scheduler tick in the teo
   governor (Rafael Wysocki).

 - Improve the performance of cpufreq_stats_create_table() in some
   cases (Liao Chang).

 - Fix two issues in the amd-pstate-ut cpufreq driver (Swapnil Sapkal).

 - Use clamp() helper macro to improve the code readability in
   cpufreq_verify_within_limits() (Liao Chang).

 - Set stale CPU frequency to minimum in intel_pstate (Doug Smythies).

* pm-cpuidle:
  cpuidle: teo: Avoid unnecessary variable assignments
  cpuidle: menu: Skip tick_nohz_get_sleep_length() call in some cases
  cpuidle: teo: Gather statistics regarding whether or not to stop the tick
  cpuidle: teo: Skip tick_nohz_get_sleep_length() call in some cases
  cpuidle: teo: Do not call tick_nohz_get_sleep_length() upfront
  cpuidle: teo: Drop utilized from struct teo_cpu
  cpuidle: teo: Avoid stopping the tick unnecessarily when bailing out
  cpuidle: teo: Update idle duration estimate when choosing shallower state

* pm-cpufreq:
  cpufreq: amd-pstate-ut: Fix kernel panic when loading the driver
  cpufreq: amd-pstate-ut: Remove module parameter access
  cpufreq: Use clamp() helper macro to improve the code readability
  cpufreq: intel_pstate: set stale CPU frequency to minimum
  cpufreq: stats: Improve the performance of cpufreq_stats_create_table()
Loading
Loading
Loading
Loading
+18 −28
Original line number Diff line number Diff line
@@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
static bool get_shared_mem(void)
{
	bool result = false;
	char path[] = "/sys/module/amd_pstate/parameters/shared_mem";
	char buf[5] = {0};
	struct file *filp = NULL;
	loff_t pos = 0;
	ssize_t ret;

	if (!boot_cpu_has(X86_FEATURE_CPPC)) {
		filp = filp_open(path, O_RDONLY, 0);
		if (IS_ERR(filp))
			pr_err("%s unable to open %s file!\n", __func__, path);
		else {
			ret = kernel_read(filp, &buf, sizeof(buf), &pos);
			if (ret < 0)
				pr_err("%s read %s file fail ret=%ld!\n",
					__func__, path, (long)ret);
			filp_close(filp, NULL);
		}

		if ('Y' == *buf)
	if (!boot_cpu_has(X86_FEATURE_CPPC))
		result = true;
	}

	return result;
}
@@ -158,7 +140,7 @@ static void amd_pstate_ut_check_perf(u32 index)
			if (ret) {
				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
				pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
				return;
				goto skip_test;
			}

			nominal_perf = cppc_perf.nominal_perf;
@@ -169,7 +151,7 @@ static void amd_pstate_ut_check_perf(u32 index)
			if (ret) {
				amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
				pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
				return;
				goto skip_test;
			}

			nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
@@ -187,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index)
				nominal_perf, cpudata->nominal_perf,
				lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
				lowest_perf, cpudata->lowest_perf);
			return;
			goto skip_test;
		}

		if (!((highest_perf >= nominal_perf) &&
@@ -198,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index)
			pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
				__func__, cpu, highest_perf, nominal_perf,
				lowest_nonlinear_perf, lowest_perf);
			return;
			goto skip_test;
		}
		cpufreq_cpu_put(policy);
	}

	amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
	return;
skip_test:
	cpufreq_cpu_put(policy);
}

/*
@@ -230,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index)
			pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
				__func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
				cpudata->lowest_nonlinear_freq, cpudata->min_freq);
			return;
			goto skip_test;
		}

		if (cpudata->min_freq != policy->min) {
			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
			pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
				__func__, cpu, cpudata->min_freq, policy->min);
			return;
			goto skip_test;
		}

		if (cpudata->boost_supported) {
@@ -249,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index)
				pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
					__func__, cpu, policy->max, cpudata->max_freq,
					cpudata->nominal_freq);
				return;
				goto skip_test;
			}
		} else {
			amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
			pr_err("%s cpu%d must support boost!\n", __func__, cpu);
			return;
			goto skip_test;
		}
		cpufreq_cpu_put(policy);
	}

	amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
	return;
skip_test:
	cpufreq_cpu_put(policy);
}

static int __init amd_pstate_ut_init(void)
+2 −1
Original line number Diff line number Diff line
@@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)

	/* Find valid-unique entries */
	cpufreq_for_each_valid_entry(pos, policy->freq_table)
		if (freq_table_get_index(stats, pos->frequency) == -1)
		if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED ||
		    freq_table_get_index(stats, pos->frequency) == -1)
			stats->freq_table[i++] = pos->frequency;

	stats->state_num = i;
+5 −0
Original line number Diff line number Diff line
@@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
			intel_pstate_clear_update_util_hook(policy->cpu);
		intel_pstate_hwp_set(policy->cpu);
	}
	/*
	 * policy->cur is never updated with the intel_pstate driver, but it
	 * is used as a stale frequency value. So, keep it within limits.
	 */
	policy->cur = policy->min;

	mutex_unlock(&intel_pstate_limits_lock);

+14 −0
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0 */

/* Common definitions for cpuidle governors. */

#ifndef __CPUIDLE_GOVERNOR_H
#define __CPUIDLE_GOVERNOR_H

/*
 * Idle state target residency threshold used for deciding whether or not to
 * check the time till the closest expected timer event.
 */
#define RESIDENCY_THRESHOLD_NS	(15 * NSEC_PER_USEC)

#endif /* __CPUIDLE_GOVERNOR_H */
+38 −27
Original line number Diff line number Diff line
@@ -19,6 +19,8 @@
#include <linux/sched/stat.h>
#include <linux/math64.h>

#include "gov.h"

#define BUCKETS 12
#define INTERVAL_SHIFT 3
#define INTERVALS (1UL << INTERVAL_SHIFT)
@@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
 * of points is below a threshold. If it is... then use the
 * average of these 8 points as the estimated value.
 */
static unsigned int get_typical_interval(struct menu_device *data,
					 unsigned int predicted_us)
static unsigned int get_typical_interval(struct menu_device *data)
{
	int i, divisor;
	unsigned int min, max, thresh, avg;
@@ -195,11 +196,7 @@ static unsigned int get_typical_interval(struct menu_device *data,
		}
	}

	/*
	 * If the result of the computation is going to be discarded anyway,
	 * avoid the computation altogether.
	 */
	if (min >= predicted_us)
	if (!max)
		return UINT_MAX;

	if (divisor == INTERVALS)
@@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
{
	struct menu_device *data = this_cpu_ptr(&menu_devices);
	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
	unsigned int predicted_us;
	u64 predicted_ns;
	u64 interactivity_req;
	unsigned int nr_iowaiters;
@@ -279,17 +275,42 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
		data->needs_update = 0;
	}

	/* determine the expected residency time, round up */
	nr_iowaiters = nr_iowait_cpu(dev->cpu);

	/* Find the shortest expected idle interval. */
	predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
	if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
		unsigned int timer_us;

		/* Determine the time till the closest timer. */
		delta = tick_nohz_get_sleep_length(&delta_tick);
		if (unlikely(delta < 0)) {
			delta = 0;
			delta_tick = 0;
		}
	data->next_timer_ns = delta;

	nr_iowaiters = nr_iowait_cpu(dev->cpu);
		data->next_timer_ns = delta;
		data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);

		/* Round up the result for half microseconds. */
		timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
					data->next_timer_ns *
						data->correction_factor[data->bucket],
				   RESOLUTION * DECAY * NSEC_PER_USEC);
		/* Use the lowest expected idle interval to pick the idle state. */
		predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
	} else {
		/*
		 * Because the next timer event is not going to be determined
		 * in this case, assume that without the tick the closest timer
		 * will be in distant future and that the closest tick will occur
		 * after 1/2 of the tick period.
		 */
		data->next_timer_ns = KTIME_MAX;
		delta_tick = TICK_NSEC / 2;
		data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
	}

	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
	    ((data->next_timer_ns < drv->states[1].target_residency_ns ||
	      latency_req < drv->states[1].exit_latency_ns) &&
@@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
		return 0;
	}

	/* Round up the result for half microseconds. */
	predicted_us = div_u64(data->next_timer_ns *
			       data->correction_factor[data->bucket] +
			       (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
			       RESOLUTION * DECAY * NSEC_PER_USEC);
	/* Use the lowest expected idle interval to pick the idle state. */
	predicted_ns = (u64)min(predicted_us,
				get_typical_interval(data, predicted_us)) *
				NSEC_PER_USEC;

	if (tick_nohz_tick_stopped()) {
		/*
		 * If the tick is already stopped, the cost of possible short
Loading