Commit 46284c6c authored by Kan Liang's avatar Kan Liang Committed by Joerg Roedel
Browse files

iommu/vt-d: Support cpumask for IOMMU perfmon



The perf subsystem assumes that all counters are by default per-CPU. So
the user space tool reads a counter from each CPU. However, the IOMMU
counters are system-wide and can be read from any CPU. Here we use a CPU
mask to restrict counting to one CPU to handle the issue. (with CPU
hotplug notifier to choose a different CPU if the chosen one is taken
off-line).

The CPU is exposed to /sys/bus/event_source/devices/dmar*/cpumask for
the user space perf tool.

Signed-off-by: default avatarKan Liang <kan.liang@linux.intel.com>
Link: https://lore.kernel.org/r/20230128200428.1459118-6-kan.liang@linux.intel.com


Signed-off-by: default avatarLu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: default avatarJoerg Roedel <jroedel@suse.de>
parent 7232ab8b
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -27,3 +27,11 @@ Description: Read-only. Attribute group to describe the magic bits
		    filter_pasid	= "config2:0-21"  - PASID filter
		    filter_ats		= "config2:24-28" - Address Type filter
		    filter_page_table	= "config2:32-36" - Page Table Level filter

What:		/sys/bus/event_source/devices/dmar*/cpumask
Date:		Jan 2023
KernelVersion:	6.3
Contact:	Kan Liang <kan.liang@linux.intel.com>
Description:	Read-only. This file always returns the CPU to which the
		IOMMU pmu is bound for access to all IOMMU pmu performance
		monitoring events.
+105 −8
Original line number Diff line number Diff line
@@ -34,9 +34,28 @@ static struct attribute_group iommu_pmu_events_attr_group = {
	.attrs = attrs_empty,
};

static cpumask_t iommu_pmu_cpu_mask;

static ssize_t
cpumask_show(struct device *dev, struct device_attribute *attr, char *buf)
{
	return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask);
}
static DEVICE_ATTR_RO(cpumask);

static struct attribute *iommu_pmu_cpumask_attrs[] = {
	&dev_attr_cpumask.attr,
	NULL
};

static struct attribute_group iommu_pmu_cpumask_attr_group = {
	.attrs = iommu_pmu_cpumask_attrs,
};

static const struct attribute_group *iommu_pmu_attr_groups[] = {
	&iommu_pmu_format_attr_group,
	&iommu_pmu_events_attr_group,
	&iommu_pmu_cpumask_attr_group,
	NULL
};

@@ -679,20 +698,98 @@ void free_iommu_pmu(struct intel_iommu *iommu)
	iommu->pmu = NULL;
}

void iommu_pmu_register(struct intel_iommu *iommu)
static int iommu_pmu_cpu_online(unsigned int cpu)
{
	if (cpumask_empty(&iommu_pmu_cpu_mask))
		cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask);

	return 0;
}

static int iommu_pmu_cpu_offline(unsigned int cpu)
{
	struct dmar_drhd_unit *drhd;
	struct intel_iommu *iommu;
	int target;

	if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask))
		return 0;

	target = cpumask_any_but(cpu_online_mask, cpu);

	if (target < nr_cpu_ids)
		cpumask_set_cpu(target, &iommu_pmu_cpu_mask);
	else
		target = -1;

	rcu_read_lock();

	for_each_iommu(iommu, drhd) {
		if (!iommu->pmu)
			continue;
		perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target);
	}
	rcu_read_unlock();

	return 0;
}

static int nr_iommu_pmu;

static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu)
{
	int ret;

	if (nr_iommu_pmu++)
		return 0;

	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
				"driver/iommu/intel/perfmon:online",
				iommu_pmu_cpu_online,
				iommu_pmu_cpu_offline);
	if (ret)
		nr_iommu_pmu = 0;

	return ret;
}

static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu)
{
	if (--nr_iommu_pmu)
		return;

	if (__iommu_pmu_register(iommu)) {
		pr_err("Failed to register PMU for iommu (seq_id = %d)\n",
		       iommu->seq_id);
		free_iommu_pmu(iommu);
	cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE);
}

void iommu_pmu_register(struct intel_iommu *iommu)
{
	struct iommu_pmu *iommu_pmu = iommu->pmu;

	if (!iommu_pmu)
		return;

	if (__iommu_pmu_register(iommu))
		goto err;

	if (iommu_pmu_cpuhp_setup(iommu_pmu))
		goto unregister;

	return;

unregister:
	perf_pmu_unregister(&iommu_pmu->pmu);
err:
	pr_err("Failed to register PMU for iommu (seq_id = %d)\n", iommu->seq_id);
	free_iommu_pmu(iommu);
}

void iommu_pmu_unregister(struct intel_iommu *iommu)
{
	if (iommu->pmu)
		perf_pmu_unregister(&iommu->pmu->pmu);
	struct iommu_pmu *iommu_pmu = iommu->pmu;

	if (!iommu_pmu)
		return;

	iommu_pmu_cpuhp_free(iommu_pmu);
	perf_pmu_unregister(&iommu_pmu->pmu);
}
+1 −0
Original line number Diff line number Diff line
@@ -221,6 +221,7 @@ enum cpuhp_state {
	CPUHP_AP_PERF_X86_CQM_ONLINE,
	CPUHP_AP_PERF_X86_CSTATE_ONLINE,
	CPUHP_AP_PERF_X86_IDXD_ONLINE,
	CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE,
	CPUHP_AP_PERF_S390_CF_ONLINE,
	CPUHP_AP_PERF_S390_SF_ONLINE,
	CPUHP_AP_PERF_ARM_CCI_ONLINE,