Commit 0331dcf3 authored by Zhang Rui's avatar Zhang Rui Committed by Xiaolong Wang
Browse files

powercap: intel_rapl: Cleanup Power Limits support

mainline inclusion
from mainline-v6.5-rc1
commit 9050a9cd
category: feature
bugzilla: https://gitee.com/openeuler/intel-kernel/issues/I92135

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=9050a9cd5e4c848e265915d6e7b1f731e6e1e0e6



--------------------------------

The same set of operations are shared by different Powert Limits,
including Power Limit get/set, Power Limit enable/disable, clamping
enable/disable, time window get/set, and max power get/set, etc.

But the same operation for different Power Limit has different
primitives because they use different registers/register bits.

A lot of dirty/duplicate code was introduced to handle this difference.

Introduce a universal way to issue Power Limit operations.
Instead of using hardcoded primitive name directly, use Power Limit id
+ operation type, and hide all the Power Limit difference details in a
central place, get_pl_prim(). Two helpers, rapl_read_pl_data() and
rapl_write_pl_data(), are introduced at the same time to simplify the
code for issuing Power Limit operations.

Intel-SIG: commit 9050a9cd powercap: intel_rapl: Cleanup Power Limits support.
Backport Intel RAPL driver support on TPMI.

Signed-off-by: default avatarZhang Rui <rui.zhang@intel.com>
Tested-by: default avatarWang Wendy <wendy.wang@intel.com>
Signed-off-by: default avatarRafael J. Wysocki <rafael.j.wysocki@intel.com>
[ Xiaolong Wang: amend commit log ]
Signed-off-by: default avatarXiaolong Wang <xiaolong.wang@intel.com>
parent 6a760dc7
Loading
Loading
Loading
Loading
+146 −197
Original line number Diff line number Diff line
@@ -96,9 +96,67 @@ enum unit_type {
#define	DOMAIN_STATE_POWER_LIMIT_SET    BIT(1)
#define DOMAIN_STATE_BIOS_LOCKED        BIT(2)

static const char pl1_name[] = "long_term";
static const char pl2_name[] = "short_term";
static const char pl4_name[] = "peak_power";
static const char *pl_names[NR_POWER_LIMITS] = {
	[POWER_LIMIT1] = "long_term",
	[POWER_LIMIT2] = "short_term",
	[POWER_LIMIT4] = "peak_power",
};

enum pl_prims {
	PL_ENABLE,
	PL_CLAMP,
	PL_LIMIT,
	PL_TIME_WINDOW,
	PL_MAX_POWER,
};

static bool is_pl_valid(struct rapl_domain *rd, int pl)
{
	if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
		return false;
	return rd->rpl[pl].name ? true : false;
}

static int get_pl_prim(int pl, enum pl_prims prim)
{
	switch (pl) {
	case POWER_LIMIT1:
		if (prim == PL_ENABLE)
			return PL1_ENABLE;
		if (prim == PL_CLAMP)
			return PL1_CLAMP;
		if (prim == PL_LIMIT)
			return POWER_LIMIT1;
		if (prim == PL_TIME_WINDOW)
			return TIME_WINDOW1;
		if (prim == PL_MAX_POWER)
			return THERMAL_SPEC_POWER;
		return -EINVAL;
	case POWER_LIMIT2:
		if (prim == PL_ENABLE)
			return PL2_ENABLE;
		if (prim == PL_CLAMP)
			return PL2_CLAMP;
		if (prim == PL_LIMIT)
			return POWER_LIMIT2;
		if (prim == PL_TIME_WINDOW)
			return TIME_WINDOW2;
		if (prim == PL_MAX_POWER)
			return MAX_POWER;
		return -EINVAL;
	case POWER_LIMIT4:
		if (prim == PL_LIMIT)
			return POWER_LIMIT4;
		if (prim == PL_ENABLE)
			return PL4_ENABLE;
		/* PL4 would be around two times PL2, use same prim as PL2. */
		if (prim == PL_MAX_POWER)
			return MAX_POWER;
		return -EINVAL;
	default:
		return -EINVAL;
	}
}

#define power_zone_to_rapl_domain(_zone) \
	container_of(_zone, struct rapl_domain, power_zone)
@@ -155,6 +213,12 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
static int rapl_write_data_raw(struct rapl_domain *rd,
			       enum rapl_primitives prim,
			       unsigned long long value);
static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
			      enum pl_prims pl_prim,
			      bool xlate, u64 *data);
static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
			       enum pl_prims pl_prim,
			       unsigned long long value);
static u64 rapl_unit_xlate(struct rapl_domain *rd,
			   enum unit_type type, u64 value, int to_raw);
static void package_power_limit_irq_save(struct rapl_package *rp);
@@ -222,7 +286,7 @@ static int find_nr_power_limit(struct rapl_domain *rd)
	int i, nr_pl = 0;

	for (i = 0; i < NR_POWER_LIMITS; i++) {
		if (rd->rpl[i].name)
		if (is_pl_valid(rd, i))
			nr_pl++;
	}

@@ -233,37 +297,34 @@ static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
	struct rapl_defaults *defaults = get_defaults(rd->rp);

	if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
		return -EACCES;
	int ret;

	cpus_read_lock();
	rapl_write_data_raw(rd, PL1_ENABLE, mode);
	if (defaults->set_floor_freq)
	ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
	if (!ret && defaults->set_floor_freq)
		defaults->set_floor_freq(rd, mode);
	cpus_read_unlock();

	return 0;
	return ret;
}

static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
{
	struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
	u64 val;
	int ret;

	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
		*mode = false;
		return 0;
	}
	cpus_read_lock();
	if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
		cpus_read_unlock();
		return -EIO;
	}
	ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
	if (!ret)
		*mode = val;
	cpus_read_unlock();

	return 0;
	return ret;
}

/* per RAPL domain ops, in the order of rapl_domain_type */
@@ -319,8 +380,8 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid)
{
	int i, j;

	for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
		if ((rd->rpl[i].name) && j++ == cid) {
	for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
		if (is_pl_valid(rd, i) && j++ == cid) {
			pr_debug("%s: index %d\n", __func__, i);
			return i;
		}
@@ -341,36 +402,11 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
	cpus_read_lock();
	rd = power_zone_to_rapl_domain(power_zone);
	id = contraint_to_pl(rd, cid);
	if (id < 0) {
		ret = id;
		goto set_exit;
	}

	rp = rd->rp;

	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
		dev_warn(&power_zone->dev,
			 "%s locked by BIOS, monitoring only\n", rd->name);
		ret = -EACCES;
		goto set_exit;
	}

	switch (rd->rpl[id].prim_id) {
	case PL1_ENABLE:
		rapl_write_data_raw(rd, POWER_LIMIT1, power_limit);
		break;
	case PL2_ENABLE:
		rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
		break;
	case PL4_ENABLE:
		rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
		break;
	default:
		ret = -EINVAL;
	}
	ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
	if (!ret)
		package_power_limit_irq_save(rp);
set_exit:
	cpus_read_unlock();
	return ret;
}
@@ -380,38 +416,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
{
	struct rapl_domain *rd;
	u64 val;
	int prim;
	int ret = 0;
	int id;

	cpus_read_lock();
	rd = power_zone_to_rapl_domain(power_zone);
	id = contraint_to_pl(rd, cid);
	if (id < 0) {
		ret = id;
		goto get_exit;
	}

	switch (rd->rpl[id].prim_id) {
	case PL1_ENABLE:
		prim = POWER_LIMIT1;
		break;
	case PL2_ENABLE:
		prim = POWER_LIMIT2;
		break;
	case PL4_ENABLE:
		prim = POWER_LIMIT4;
		break;
	default:
		cpus_read_unlock();
		return -EINVAL;
	}
	if (rapl_read_data_raw(rd, prim, true, &val))
		ret = -EIO;
	else
	ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
	if (!ret)
		*data = val;

get_exit:
	cpus_read_unlock();

	return ret;
@@ -427,23 +442,9 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
	cpus_read_lock();
	rd = power_zone_to_rapl_domain(power_zone);
	id = contraint_to_pl(rd, cid);
	if (id < 0) {
		ret = id;
		goto set_time_exit;
	}

	switch (rd->rpl[id].prim_id) {
	case PL1_ENABLE:
		rapl_write_data_raw(rd, TIME_WINDOW1, window);
		break;
	case PL2_ENABLE:
		rapl_write_data_raw(rd, TIME_WINDOW2, window);
		break;
	default:
		ret = -EINVAL;
	}
	ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);

set_time_exit:
	cpus_read_unlock();
	return ret;
}
@@ -459,33 +460,11 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
	cpus_read_lock();
	rd = power_zone_to_rapl_domain(power_zone);
	id = contraint_to_pl(rd, cid);
	if (id < 0) {
		ret = id;
		goto get_time_exit;
	}

	switch (rd->rpl[id].prim_id) {
	case PL1_ENABLE:
		ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
		break;
	case PL2_ENABLE:
		ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val);
		break;
	case PL4_ENABLE:
		/*
		 * Time window parameter is not applicable for PL4 entry
		 * so assigining '0' as default value.
		 */
		val = 0;
		break;
	default:
		cpus_read_unlock();
		return -EINVAL;
	}
	ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
	if (!ret)
		*data = val;

get_time_exit:
	cpus_read_unlock();

	return ret;
@@ -505,36 +484,23 @@ static const char *get_constraint_name(struct powercap_zone *power_zone,
	return NULL;
}

static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
{
	struct rapl_domain *rd;
	u64 val;
	int prim;
	int ret = 0;
	int id;

	cpus_read_lock();
	rd = power_zone_to_rapl_domain(power_zone);
	switch (rd->rpl[id].prim_id) {
	case PL1_ENABLE:
		prim = THERMAL_SPEC_POWER;
		break;
	case PL2_ENABLE:
		prim = MAX_POWER;
		break;
	case PL4_ENABLE:
		prim = MAX_POWER;
		break;
	default:
		cpus_read_unlock();
		return -EINVAL;
	}
	if (rapl_read_data_raw(rd, prim, true, &val))
		ret = -EIO;
	else
	id = contraint_to_pl(rd, cid);

	ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
	if (!ret)
		*data = val;

	/* As a generalization rule, PL4 would be around two times PL2. */
	if (rd->rpl[id].prim_id == PL4_ENABLE)
	if (id == POWER_LIMIT4)
		*data = *data * 2;

	cpus_read_unlock();
@@ -560,6 +526,7 @@ static void rapl_init_domains(struct rapl_package *rp)

	for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
		unsigned int mask = rp->domain_map & (1 << i);
		int t;

		if (!mask)
			continue;
@@ -577,17 +544,10 @@ static void rapl_init_domains(struct rapl_package *rp)

		/* PL1 is supported by default */
		rp->priv->limits[i] |= BIT(POWER_LIMIT1);
		rd->rpl[0].prim_id = PL1_ENABLE;
		rd->rpl[0].name = pl1_name;

		if (rp->priv->limits[i] & BIT(POWER_LIMIT2)) {
			rd->rpl[1].prim_id = PL2_ENABLE;
			rd->rpl[1].name = pl2_name;
		}

		if (rp->priv->limits[i] & BIT(POWER_LIMIT4)) {
			rd->rpl[2].prim_id = PL4_ENABLE;
			rd->rpl[2].name = pl4_name;
		for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
			if (rp->priv->limits[i] & BIT(t))
				rd->rpl[t].name = pl_names[t];
		}

		for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
@@ -818,6 +778,33 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
	return ret;
}

static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
			      enum pl_prims pl_prim, bool xlate, u64 *data)
{
	enum rapl_primitives prim = get_pl_prim(pl, pl_prim);

	if (!is_pl_valid(rd, pl))
		return -EINVAL;

	return rapl_read_data_raw(rd, prim, xlate, data);
}

static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
			       enum pl_prims pl_prim,
			       unsigned long long value)
{
	enum rapl_primitives prim = get_pl_prim(pl, pl_prim);

	if (!is_pl_valid(rd, pl))
		return -EINVAL;

	if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
		pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
		return -EACCES;
	}

	return rapl_write_data_raw(rd, prim, value);
}
/*
 * Raw RAPL data stored in MSRs are in certain scales. We need to
 * convert them into standard units based on the units reported in
@@ -945,17 +932,16 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)

static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
{
	int nr_powerlimit = find_nr_power_limit(rd);
	int i;

	/* always enable clamp such that p-state can go below OS requested
	 * range. power capping priority over guranteed frequency.
	 */
	rapl_write_data_raw(rd, PL1_CLAMP, mode);
	rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);

	/* some domains have pl2 */
	if (nr_powerlimit > 1) {
		rapl_write_data_raw(rd, PL2_ENABLE, mode);
		rapl_write_data_raw(rd, PL2_CLAMP, mode);
	for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
		rapl_write_pl_data(rd, i, PL_ENABLE, mode);
		rapl_write_pl_data(rd, i, PL_CLAMP, mode);
	}
}

@@ -1311,11 +1297,10 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
			rd->state |= DOMAIN_STATE_BIOS_LOCKED;
		}
	}
	/* check if power limit MSR exists, otherwise domain is monitoring only */
	for (i = 0; i < NR_POWER_LIMITS; i++) {
		int prim = rd->rpl[i].prim_id;

		if (rapl_read_data_raw(rd, prim, false, &val64))
	/* check if power limit exists, otherwise domain is monitoring only */
	for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
		if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
			rd->rpl[i].name = NULL;
	}
}
@@ -1365,13 +1350,13 @@ void rapl_remove_package(struct rapl_package *rp)
	package_power_limit_irq_restore(rp);

	for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
		rapl_write_data_raw(rd, PL1_ENABLE, 0);
		rapl_write_data_raw(rd, PL1_CLAMP, 0);
		if (find_nr_power_limit(rd) > 1) {
			rapl_write_data_raw(rd, PL2_ENABLE, 0);
			rapl_write_data_raw(rd, PL2_CLAMP, 0);
			rapl_write_data_raw(rd, PL4_ENABLE, 0);
		int i;

		for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
			rapl_write_pl_data(rd, i, PL_ENABLE, 0);
			rapl_write_pl_data(rd, i, PL_CLAMP, 0);
		}

		if (rd->id == RAPL_DOMAIN_PACKAGE) {
			rd_package = rd;
			continue;
@@ -1456,38 +1441,18 @@ static void power_limit_state_save(void)
{
	struct rapl_package *rp;
	struct rapl_domain *rd;
	int nr_pl, ret, i;
	int ret, i;

	cpus_read_lock();
	list_for_each_entry(rp, &rapl_packages, plist) {
		if (!rp->power_zone)
			continue;
		rd = power_zone_to_rapl_domain(rp->power_zone);
		nr_pl = find_nr_power_limit(rd);
		for (i = 0; i < nr_pl; i++) {
			switch (rd->rpl[i].prim_id) {
			case PL1_ENABLE:
				ret = rapl_read_data_raw(rd,
						 POWER_LIMIT1, true,
						 &rd->rpl[i].last_power_limit);
				if (ret)
					rd->rpl[i].last_power_limit = 0;
				break;
			case PL2_ENABLE:
				ret = rapl_read_data_raw(rd,
						 POWER_LIMIT2, true,
						 &rd->rpl[i].last_power_limit);
				if (ret)
					rd->rpl[i].last_power_limit = 0;
				break;
			case PL4_ENABLE:
				ret = rapl_read_data_raw(rd,
						 POWER_LIMIT4, true,
		for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
			ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
						 &rd->rpl[i].last_power_limit);
			if (ret)
				rd->rpl[i].last_power_limit = 0;
				break;
			}
		}
	}
	cpus_read_unlock();
@@ -1497,33 +1462,17 @@ static void power_limit_state_restore(void)
{
	struct rapl_package *rp;
	struct rapl_domain *rd;
	int nr_pl, i;
	int i;

	cpus_read_lock();
	list_for_each_entry(rp, &rapl_packages, plist) {
		if (!rp->power_zone)
			continue;
		rd = power_zone_to_rapl_domain(rp->power_zone);
		nr_pl = find_nr_power_limit(rd);
		for (i = 0; i < nr_pl; i++) {
			switch (rd->rpl[i].prim_id) {
			case PL1_ENABLE:
		for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
			if (rd->rpl[i].last_power_limit)
					rapl_write_data_raw(rd, POWER_LIMIT1,
				rapl_write_pl_data(rd, i, PL_LIMIT,
					       rd->rpl[i].last_power_limit);
				break;
			case PL2_ENABLE:
				if (rd->rpl[i].last_power_limit)
					rapl_write_data_raw(rd, POWER_LIMIT2,
					    rd->rpl[i].last_power_limit);
				break;
			case PL4_ENABLE:
				if (rd->rpl[i].last_power_limit)
					rapl_write_data_raw(rd, POWER_LIMIT4,
					    rd->rpl[i].last_power_limit);
				break;
			}
		}
	}
	cpus_read_unlock();
}
+0 −1
Original line number Diff line number Diff line
@@ -79,7 +79,6 @@ struct rapl_domain_data {

struct rapl_power_limit {
	struct powercap_zone_constraint *constraint;
	int prim_id;		/* primitive ID used to enable */
	struct rapl_domain *domain;
	const char *name;
	u64 last_power_limit;