Unverified Commit 0e7e6fc7 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!4273 Intel: Backport SRF/GRR perf cstate support back to kernel 5.10

Merge Pull Request from: @yunyingsun 
 
Title: Backport Sierra Forest(SRF)/Grand Ridge(GRR) perf cstate support to kernel OLK-5.10

Content:
To make perf tool support reading cstate information on the first E-core Xeon platform Sierra Forrest(SRF), OLK-5.10 kernel needs 4 patches from mainline kernel v6.8-rc1:
bbb968696d0f perf/x86/intel/cstate: Add Grand Ridge support
3877d55a0db2 perf/x86/intel/cstate: Add Sierra Forest support
c3dd1995620c x86/smp: Export symbol cpu_clustergroup_mask()
243218ca9303 perf/x86/intel/cstate: Cleanup duplicate attr_groups

Intel-kernel issue:
https://gitee.com/openeuler/intel-kernel/issues/I8YVW5

Test:
On SRF, before backport:
```
# perf list | grep cstate
#
```

On SRF, after backport:
```
# perf list | grep cstate
  cstate_core/c1-residency/                          [Kernel PMU event]
  cstate_core/c6-residency/                          [Kernel PMU event]
  cstate_module/c6-residency/                        [Kernel PMU event]
  cstate_pkg/c6-residency/                           [Kernel PMU event]
#
# perf stat -e cstate_core/c1-residency/,cstate_core/c6-residency/,cstate_module/c6-residency/,cstate_pkg/c6-residency/ -- sleep 1

 Performance counter stats for 'system wide':

       121,798,080      cstate_core/c1-residency/
   312,280,460,496      cstate_core/c6-residency/
    74,616,978,288      cstate_module/c6-residency/
       505,499,040      cstate_pkg/c6-residency/

       1.024821144 seconds time elapsed
```

Known issue:
N/A

Default config change:
N/A 
 
Link:https://gitee.com/openeuler/kernel/pulls/4273

 

Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents 1de0ecd5 d32a860a
Loading
Loading
Loading
Loading
+125 −33
Original line number Diff line number Diff line
@@ -41,7 +41,7 @@
 *	MSR_CORE_C1_RES: CORE C1 Residency Counter
 *			 perf code: 0x00
 *			 Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
 *					  MTL
 *					  MTL,SRF,GRR
 *			 Scope: Core (each processor core has a MSR)
 *	MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
 *			       perf code: 0x01
@@ -52,7 +52,8 @@
 *			       perf code: 0x02
 *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
 *						SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
 *						TGL,TNT,SPR,RPL,MTL
 *						TGL,TNT,SPR,RPL,MTL,SRF,
 *						GRR
 *			       Scope: Core
 *	MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
 *			       perf code: 0x03
@@ -75,7 +76,7 @@
 *			       perf code: 0x02
 *			       Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
 *						SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
 *						TGL,TNT,SPR,ADL,RPL,MTL
 *						TGL,TNT,SPR,ADL,RPL,MTL,SRF
 *			       Scope: Package (physical package)
 *	MSR_PKG_C7_RESIDENCY:  Package C7 Residency Counter.
 *			       perf code: 0x03
@@ -97,6 +98,10 @@
 *			       Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
 *						TNT,ADL,RPL,MTL
 *			       Scope: Package (physical package)
 *	MSR_MODULE_C6_RES_MS:  Module C6 Residency Counter.
 *			       perf code: 0x00
 *			       Available model: SRF,GRR
 *			       Scope: A cluster of cores shared L2 cache
 *
 */

@@ -130,6 +135,7 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
struct cstate_model {
	unsigned long		core_events;
	unsigned long		pkg_events;
	unsigned long		module_events;
	unsigned long		quirks;
};

@@ -189,20 +195,20 @@ static struct attribute *attrs_empty[] = {
 * "events" group (with empty attrs) before updating
 * it with detected events.
 */
static struct attribute_group core_events_attr_group = {
static struct attribute_group cstate_events_attr_group = {
	.name = "events",
	.attrs = attrs_empty,
};

DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63");
static struct attribute *core_format_attrs[] = {
	&format_attr_core_event.attr,
DEFINE_CSTATE_FORMAT_ATTR(cstate_event, event, "config:0-63");
static struct attribute *cstate_format_attrs[] = {
	&format_attr_cstate_event.attr,
	NULL,
};

static struct attribute_group core_format_attr_group = {
static struct attribute_group cstate_format_attr_group = {
	.name = "format",
	.attrs = core_format_attrs,
	.attrs = cstate_format_attrs,
};

static cpumask_t cstate_core_cpu_mask;
@@ -217,9 +223,9 @@ static struct attribute_group cpumask_attr_group = {
	.attrs = cstate_cpumask_attrs,
};

static const struct attribute_group *core_attr_groups[] = {
	&core_events_attr_group,
	&core_format_attr_group,
static const struct attribute_group *cstate_attr_groups[] = {
	&cstate_events_attr_group,
	&cstate_format_attr_group,
	&cpumask_attr_group,
	NULL,
};
@@ -268,30 +274,30 @@ static struct perf_msr pkg_msr[] = {
	[PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY,	&group_cstate_pkg_c10,	test_msr },
};

static struct attribute_group pkg_events_attr_group = {
	.name = "events",
	.attrs = attrs_empty,
};
static cpumask_t cstate_pkg_cpu_mask;

DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63");
static struct attribute *pkg_format_attrs[] = {
	&format_attr_pkg_event.attr,
	NULL,
};
static struct attribute_group pkg_format_attr_group = {
	.name = "format",
	.attrs = pkg_format_attrs,
/* cstate_module PMU */
static struct pmu cstate_module_pmu;
static bool has_cstate_module;

enum perf_cstate_module_events {
	PERF_CSTATE_MODULE_C6_RES = 0,

	PERF_CSTATE_MODULE_EVENT_MAX,
};

static cpumask_t cstate_pkg_cpu_mask;
PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_module_c6, "event=0x00");

static const struct attribute_group *pkg_attr_groups[] = {
	&pkg_events_attr_group,
	&pkg_format_attr_group,
	&cpumask_attr_group,
	NULL,
static unsigned long module_msr_mask;

PMU_EVENT_GROUP(events, cstate_module_c6);

static struct perf_msr module_msr[] = {
	[PERF_CSTATE_MODULE_C6_RES]  = { MSR_MODULE_C6_RES_MS,	&group_cstate_module_c6,	test_msr },
};

static cpumask_t cstate_module_cpu_mask;

static ssize_t cstate_get_attr_cpumask(struct device *dev,
				       struct device_attribute *attr,
				       char *buf)
@@ -302,6 +308,8 @@ static ssize_t cstate_get_attr_cpumask(struct device *dev,
		return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask);
	else if (pmu == &cstate_pkg_pmu)
		return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask);
	else if (pmu == &cstate_module_pmu)
		return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask);
	else
		return 0;
}
@@ -339,6 +347,15 @@ static int cstate_pmu_event_init(struct perf_event *event)
		event->hw.event_base = pkg_msr[cfg].msr;
		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
				      topology_die_cpumask(event->cpu));
	} else if (event->pmu == &cstate_module_pmu) {
		if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX)
			return -EINVAL;
		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_MODULE_EVENT_MAX);
		if (!(module_msr_mask & (1 << cfg)))
			return -EINVAL;
		event->hw.event_base = module_msr[cfg].msr;
		cpu = cpumask_any_and(&cstate_module_cpu_mask,
				      topology_cluster_cpumask(event->cpu));
	} else {
		return -ENOENT;
	}
@@ -428,6 +445,17 @@ static int cstate_cpu_exit(unsigned int cpu)
			perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target);
		}
	}

	if (has_cstate_module &&
	    cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) {

		target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu);
		/* Migrate events if there is a valid target */
		if (target < nr_cpu_ids) {
			cpumask_set_cpu(target, &cstate_module_cpu_mask);
			perf_pmu_migrate_context(&cstate_module_pmu, cpu, target);
		}
	}
	return 0;
}

@@ -454,6 +482,15 @@ static int cstate_cpu_init(unsigned int cpu)
	if (has_cstate_pkg && target >= nr_cpu_ids)
		cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask);

	/*
	 * If this is the first online thread of that cluster, set it
	 * in the cluster cpu mask as the designated reader.
	 */
	target = cpumask_any_and(&cstate_module_cpu_mask,
				 topology_cluster_cpumask(cpu));
	if (has_cstate_module && target >= nr_cpu_ids)
		cpumask_set_cpu(cpu, &cstate_module_cpu_mask);

	return 0;
}

@@ -476,8 +513,13 @@ static const struct attribute_group *pkg_attr_update[] = {
	NULL,
};

static const struct attribute_group *module_attr_update[] = {
	&group_cstate_module_c6,
	NULL
};

static struct pmu cstate_core_pmu = {
	.attr_groups	= core_attr_groups,
	.attr_groups	= cstate_attr_groups,
	.attr_update	= core_attr_update,
	.name		= "cstate_core",
	.task_ctx_nr	= perf_invalid_context,
@@ -492,7 +534,7 @@ static struct pmu cstate_core_pmu = {
};

static struct pmu cstate_pkg_pmu = {
	.attr_groups	= pkg_attr_groups,
	.attr_groups	= cstate_attr_groups,
	.attr_update	= pkg_attr_update,
	.name		= "cstate_pkg",
	.task_ctx_nr	= perf_invalid_context,
@@ -506,6 +548,21 @@ static struct pmu cstate_pkg_pmu = {
	.module		= THIS_MODULE,
};

static struct pmu cstate_module_pmu = {
	.attr_groups	= cstate_attr_groups,
	.attr_update	= module_attr_update,
	.name		= "cstate_module",
	.task_ctx_nr	= perf_invalid_context,
	.event_init	= cstate_pmu_event_init,
	.add		= cstate_pmu_event_add,
	.del		= cstate_pmu_event_del,
	.start		= cstate_pmu_event_start,
	.stop		= cstate_pmu_event_stop,
	.read		= cstate_pmu_event_update,
	.capabilities	= PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE,
	.module		= THIS_MODULE,
};

static const struct cstate_model nhm_cstates __initconst = {
	.core_events		= BIT(PERF_CSTATE_CORE_C3_RES) |
				  BIT(PERF_CSTATE_CORE_C6_RES),
@@ -620,6 +677,22 @@ static const struct cstate_model glm_cstates __initconst = {
				  BIT(PERF_CSTATE_PKG_C10_RES),
};

static const struct cstate_model grr_cstates __initconst = {
	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
				  BIT(PERF_CSTATE_CORE_C6_RES),

	.module_events		= BIT(PERF_CSTATE_MODULE_C6_RES),
};

static const struct cstate_model srf_cstates __initconst = {
	.core_events		= BIT(PERF_CSTATE_CORE_C1_RES) |
				  BIT(PERF_CSTATE_CORE_C6_RES),

	.pkg_events		= BIT(PERF_CSTATE_PKG_C6_RES),

	.module_events		= BIT(PERF_CSTATE_MODULE_C6_RES),
};


static const struct x86_cpu_id intel_cstates_match[] __initconst = {
	X86_MATCH_INTEL_FAM6_MODEL(NEHALEM,		&nhm_cstates),
@@ -672,6 +745,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,	&glm_cstates),
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,	&glm_cstates),
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,	&adl_cstates),
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,	&srf_cstates),
	X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT,	&grr_cstates),

	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&icl_cstates),
	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&icl_cstates),
@@ -711,10 +786,14 @@ static int __init cstate_probe(const struct cstate_model *cm)
	pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX,
				      true, (void *) &cm->pkg_events);

	module_msr_mask = perf_msr_probe(module_msr, PERF_CSTATE_MODULE_EVENT_MAX,
				      true, (void *) &cm->module_events);

	has_cstate_core = !!core_msr_mask;
	has_cstate_pkg  = !!pkg_msr_mask;
	has_cstate_module  = !!module_msr_mask;

	return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV;
	return (has_cstate_core || has_cstate_pkg || has_cstate_module) ? 0 : -ENODEV;
}

static inline void cstate_cleanup(void)
@@ -727,6 +806,9 @@ static inline void cstate_cleanup(void)

	if (has_cstate_pkg)
		perf_pmu_unregister(&cstate_pkg_pmu);

	if (has_cstate_module)
		perf_pmu_unregister(&cstate_module_pmu);
}

static int __init cstate_init(void)
@@ -763,6 +845,16 @@ static int __init cstate_init(void)
			return err;
		}
	}

	if (has_cstate_module) {
		err = perf_pmu_register(&cstate_module_pmu, cstate_module_pmu.name, -1);
		if (err) {
			has_cstate_module = false;
			pr_info("Failed to register cstate cluster pmu\n");
			cstate_cleanup();
			return err;
		}
	}
	return 0;
}

+1 −0
Original line number Diff line number Diff line
@@ -713,6 +713,7 @@ const struct cpumask *cpu_clustergroup_mask(int cpu)
{
	return cpu_l2c_shared_mask(cpu);
}
EXPORT_SYMBOL_GPL(cpu_clustergroup_mask);

static void impress_friends(void)
{