Unverified Commit af481f24 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!1315 Intel: Backport mainline UPI uncore discovery warning fixes for SPR MCC to OLK-5.10

Merge Pull Request from: @yunyingsun 
 
Title:
Backport mainline UPI uncore discovery warning fixes for SPR MCC to OLK-5.10

Content:
The discovery table of UPI on SPR MCC is broken. The broken discovery table triggers a kernel warning message on SPR MCC: “WARNING: CPU: xx PID: xx at arch/x86/events/intel/uncore_discovery.c:184  intel_uncore_has_discovery_tables+  ……”, which is overkilled. 

The backported patch series is to mitigate the issue by providing a hardcode pre-defined table, and it also refines the error handling code.

Commits from mainline kernel v6.3-rc1:
5d515ee4 perf/x86/uncore: Don't WARN_ON_ONCE() for a broken discovery table
65248a9a perf/x86/uncore: Add a quirk for UPI on SPR
bd9514a4 perf/x86/uncore: Ignore broken units in discovery table
3af548f2 perf/x86/uncore: Fix potential NULL pointer in uncore_get_alias_name
dbf061b2 perf/x86/uncore: Factor out uncore_device_to_die()

It has been verified on our SPR MCC(spreec01) that with the backported patches, the uncore related kernel warning and the call trace are not seen anymore.

Kernel issue:
https://gitee.com/openeuler/kernel/issues/I7H29Y

Test:
1. Before backport, with OLK-5.10 kernel on SPR MCC, below warning and call trace are seen in dmesg(with "dmesg | grep uncore"):
"“WARNING: CPU: xx PID: xx at arch/x86/events/intel/uncore_discovery.c:184  intel_uncore_has_discovery_tables+".

After backport, above warning and call trace are gone. Instead new uncore information prints are seen:
"[    6.801611] intel_uncore: Duplicate uncore type 3 box ID 7 is detected, Drop the duplicate uncore unit.
[    6.801638] intel_uncore: Duplicate uncore type 1 box ID 7 is detected, Drop the duplicate uncore unit.
[    6.801663] intel_uncore: Duplicate uncore type 2 box ID 7 is detected, Drop the duplicate uncore unit."

2. Before backport, there's no uncore_m3upi_x devices under /sys/devices/.
After backport, there're uncore_m3upi_1/2 available under /sys/devices/.

Known issue:
N/A

Default config change:
N/A 
 
Link:https://gitee.com/openeuler/kernel/pulls/1315

 

Reviewed-by: default avatarJun Tian <jun.j.tian@intel.com>
Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Reviewed-by: default avatarAichun Shi <aichun.shi@intel.com>
Signed-off-by: default avatarJialin Zhang <zhangjialin11@huawei.com>
parents c5633a85 cab52da6
Loading
Loading
Loading
Loading
+30 −4
Original line number Diff line number Diff line
@@ -53,6 +53,21 @@ int uncore_pcibus_to_dieid(struct pci_bus *bus)
	return die_id;
}

int uncore_device_to_die(struct pci_dev *dev)
{
	int node = pcibus_to_node(dev->bus);
	int cpu;

	for_each_cpu(cpu, cpumask_of_pcibus(dev->bus)) {
		struct cpuinfo_x86 *c = &cpu_data(cpu);

		if (c->initialized && cpu_to_node(cpu) == node)
			return c->logical_die_id;
	}

	return -1;
}

static void uncore_free_pcibus_map(void)
{
	struct pci2phy_map *map, *tmp;
@@ -834,6 +849,12 @@ static const struct attribute_group uncore_pmu_attr_group = {
	.attrs = uncore_pmu_attrs,
};

static inline int uncore_get_box_id(struct intel_uncore_type *type,
				    struct intel_uncore_pmu *pmu)
{
	return type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx;
}

void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
{
	struct intel_uncore_type *type = pmu->type;
@@ -842,7 +863,7 @@ void uncore_get_alias_name(char *pmu_name, struct intel_uncore_pmu *pmu)
		sprintf(pmu_name, "uncore_type_%u", type->type_id);
	else {
		sprintf(pmu_name, "uncore_type_%u_%d",
			type->type_id, type->box_ids[pmu->pmu_idx]);
			type->type_id, uncore_get_box_id(type, pmu));
	}
}

@@ -869,7 +890,7 @@ static void uncore_get_pmu_name(struct intel_uncore_pmu *pmu)
		 * Use the box ID from the discovery table if applicable.
		 */
		sprintf(pmu->name, "uncore_%s_%d", type->name,
			type->box_ids ? type->box_ids[pmu->pmu_idx] : pmu->pmu_idx);
			uncore_get_box_id(type, pmu));
	}
}

@@ -1666,7 +1687,10 @@ struct intel_uncore_init_fun {
	void	(*cpu_init)(void);
	int	(*pci_init)(void);
	void	(*mmio_init)(void);
	/* Discovery table is required */
	bool	use_discovery;
	/* The units in the discovery table should be ignored. */
	int	*uncore_units_ignore;
};

static const struct intel_uncore_init_fun nhm_uncore_init __initconst = {
@@ -1764,6 +1788,7 @@ static const struct intel_uncore_init_fun spr_uncore_init __initconst = {
	.pci_init = spr_uncore_pci_init,
	.mmio_init = spr_uncore_mmio_init,
	.use_discovery = true,
	.uncore_units_ignore = spr_uncore_units_ignore,
};

static const struct intel_uncore_init_fun generic_uncore_init __initconst = {
@@ -1828,7 +1853,7 @@ static int __init intel_uncore_init(void)

	id = x86_match_cpu(intel_uncore_match);
	if (!id) {
		if (!uncore_no_discover && intel_uncore_has_discovery_tables())
		if (!uncore_no_discover && intel_uncore_has_discovery_tables(NULL))
			uncore_init = (struct intel_uncore_init_fun *)&generic_uncore_init;
		else
			return -ENODEV;
@@ -1836,7 +1861,8 @@ static int __init intel_uncore_init(void)
		uncore_init = (struct intel_uncore_init_fun *)id->driver_data;
		if (uncore_no_discover && uncore_init->use_discovery)
			return -ENODEV;
		if (uncore_init->use_discovery && !intel_uncore_has_discovery_tables())
		if (uncore_init->use_discovery &&
		    !intel_uncore_has_discovery_tables(uncore_init->uncore_units_ignore))
			return -ENODEV;
	}

+4 −0
Original line number Diff line number Diff line
@@ -33,6 +33,8 @@

#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)

#define UNCORE_IGNORE_END		-1

struct pci_extra_dev {
	struct pci_dev *dev[UNCORE_EXTRA_PCI_DEV_MAX];
};
@@ -184,6 +186,7 @@ struct pci2phy_map {

struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_dieid(struct pci_bus *bus);
int uncore_device_to_die(struct pci_dev *dev);

ssize_t uncore_event_show(struct device *dev,
			  struct device_attribute *attr, char *buf);
@@ -565,6 +568,7 @@ extern raw_spinlock_t pci2phy_map_lock;
extern struct list_head pci2phy_map_head;
extern struct pci_extra_dev *uncore_extra_pci_dev;
extern struct event_constraint uncore_constraint_empty;
extern int spr_uncore_units_ignore[];

/* uncore_snb.c */
int snb_uncore_pci_init(void);
+40 −20
Original line number Diff line number Diff line
@@ -33,7 +33,7 @@ static int logical_die_id;

static int get_device_die_id(struct pci_dev *dev)
{
	int cpu, node = pcibus_to_node(dev->bus);
	int node = pcibus_to_node(dev->bus);

	/*
	 * If the NUMA info is not available, assume that the logical die id is
@@ -43,19 +43,7 @@ static int get_device_die_id(struct pci_dev *dev)
	if (node < 0)
		return logical_die_id++;

	for_each_cpu(cpu, cpumask_of_node(node)) {
		struct cpuinfo_x86 *c = &cpu_data(cpu);

		if (c->initialized && cpu_to_node(cpu) == node)
			return c->logical_die_id;
	}

	/*
	 * All CPUs of a node may be offlined. For this case,
	 * the PCI and MMIO type of uncore blocks which are
	 * enumerated by the device will be unavailable.
	 */
	return -1;
	return uncore_device_to_die(dev);
}

#define __node_2_type(cur)	\
@@ -140,13 +128,21 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
	unsigned int *box_offset, *ids;
	int i;

	if (WARN_ON_ONCE(!unit->ctl || !unit->ctl_offset || !unit->ctr_offset))
	if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) {
		pr_info("Invalid address is detected for uncore type %d box %d, "
			"Disable the uncore unit.\n",
			unit->box_type, unit->box_id);
		return;
	}

	if (parsed) {
		type = search_uncore_discovery_type(unit->box_type);
		if (WARN_ON_ONCE(!type))
		if (!type) {
			pr_info("A spurious uncore type %d is detected, "
				"Disable the uncore type.\n",
				unit->box_type);
			return;
		}
		/* Store the first box of each die */
		if (!type->box_ctrl_die[die])
			type->box_ctrl_die[die] = unit->ctl;
@@ -181,9 +177,13 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,
		ids[i] = type->ids[i];
		box_offset[i] = type->box_offset[i];

		if (WARN_ON_ONCE(unit->box_id == ids[i]))
		if (unit->box_id == ids[i]) {
			pr_info("Duplicate uncore type %d box ID %d is detected, "
				"Drop the duplicate uncore unit.\n",
				unit->box_type, unit->box_id);
			goto free_ids;
		}
	}
	ids[i] = unit->box_id;
	box_offset[i] = unit->ctl - type->box_ctrl;
	kfree(type->ids);
@@ -202,8 +202,25 @@ uncore_insert_box_info(struct uncore_unit_discovery *unit,

}

static bool
uncore_ignore_unit(struct uncore_unit_discovery *unit, int *ignore)
{
	int i;

	if (!ignore)
		return false;

	for (i = 0; ignore[i] != UNCORE_IGNORE_END ; i++) {
		if (unit->box_type == ignore[i])
			return true;
	}

	return false;
}

static int parse_discovery_table(struct pci_dev *dev, int die,
				 u32 bar_offset, bool *parsed)
				 u32 bar_offset, bool *parsed,
				 int *ignore)
{
	struct uncore_global_discovery global;
	struct uncore_unit_discovery unit;
@@ -250,6 +267,9 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
		if (unit.access_type >= UNCORE_ACCESS_MAX)
			continue;

		if (uncore_ignore_unit(&unit, ignore))
			continue;

		uncore_insert_box_info(&unit, die, *parsed);
	}

@@ -258,7 +278,7 @@ static int parse_discovery_table(struct pci_dev *dev, int die,
	return 0;
}

bool intel_uncore_has_discovery_tables(void)
bool intel_uncore_has_discovery_tables(int *ignore)
{
	u32 device, val, entry_id, bar_offset;
	int die, dvsec = 0, ret = true;
@@ -294,7 +314,7 @@ bool intel_uncore_has_discovery_tables(void)
			if (die < 0)
				continue;

			parse_discovery_table(dev, die, bar_offset, &parsed);
			parse_discovery_table(dev, die, bar_offset, &parsed, ignore);
		}
	}

+10 −4
Original line number Diff line number Diff line
@@ -23,9 +23,15 @@
/* Global discovery table size */
#define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE	0x20

#define UNCORE_DISCOVERY_PCI_DOMAIN(data)	((data >> 28) & 0x7)
#define UNCORE_DISCOVERY_PCI_BUS(data)		((data >> 20) & 0xff)
#define UNCORE_DISCOVERY_PCI_DEVFN(data)	((data >> 12) & 0xff)
#define UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET	28
#define UNCORE_DISCOVERY_PCI_DOMAIN(data)			\
		((data >> UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET) & 0x7)
#define UNCORE_DISCOVERY_PCI_BUS_OFFSET		20
#define UNCORE_DISCOVERY_PCI_BUS(data)				\
		((data >> UNCORE_DISCOVERY_PCI_BUS_OFFSET) & 0xff)
#define UNCORE_DISCOVERY_PCI_DEVFN_OFFSET	12
#define UNCORE_DISCOVERY_PCI_DEVFN(data)			\
		((data >> UNCORE_DISCOVERY_PCI_DEVFN_OFFSET) & 0xff)
#define UNCORE_DISCOVERY_PCI_BOX_CTRL(data)	(data & 0xfff)


@@ -124,7 +130,7 @@ struct intel_uncore_discovery_type {
	unsigned int	*box_offset;	/* Box offset */
};

bool intel_uncore_has_discovery_tables(void);
bool intel_uncore_has_discovery_tables(int *ignore);
void intel_uncore_clear_discovery_tables(void);
void intel_uncore_generic_uncore_cpu_init(void);
int intel_uncore_generic_uncore_pci_init(void);
+119 −25
Original line number Diff line number Diff line
@@ -1427,9 +1427,6 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
			}
			raw_spin_unlock(&pci2phy_map_lock);
		} else {
			int node = pcibus_to_node(ubox_dev->bus);
			int cpu;

			segment = pci_domain_nr(ubox_dev->bus);
			raw_spin_lock(&pci2phy_map_lock);
			map = __find_pci2phy_map(segment);
@@ -1439,15 +1436,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
				break;
			}

			die_id = -1;
			for_each_cpu(cpu, cpumask_of_pcibus(ubox_dev->bus)) {
				struct cpuinfo_x86 *c = &cpu_data(cpu);
			map->pbus_to_dieid[bus] = die_id = uncore_device_to_die(ubox_dev);

				if (c->initialized && cpu_to_node(cpu) == node) {
					map->pbus_to_dieid[bus] = die_id = c->logical_die_id;
					break;
				}
			}
			raw_spin_unlock(&pci2phy_map_lock);

			if (WARN_ON_ONCE(die_id == -1)) {
@@ -5601,17 +5591,6 @@ static struct intel_uncore_type spr_uncore_m2m = {
	.name			= "m2m",
};

static struct intel_uncore_type spr_uncore_upi = {
	SPR_UNCORE_PCI_COMMON_FORMAT(),
	.name			= "upi",
};

static struct intel_uncore_type spr_uncore_m3upi = {
	SPR_UNCORE_PCI_COMMON_FORMAT(),
	.name			= "m3upi",
	.constraints		= icx_uncore_m3upi_constraints,
};

static struct intel_uncore_type spr_uncore_mdf = {
	SPR_UNCORE_COMMON_FORMAT(),
	.name			= "mdf",
@@ -5620,7 +5599,13 @@ static struct intel_uncore_type spr_uncore_mdf = {
#define UNCORE_SPR_NUM_UNCORE_TYPES		12
#define UNCORE_SPR_IIO				1
#define UNCORE_SPR_IMC				6
#define UNCORE_SPR_UPI				8
#define UNCORE_SPR_M3UPI			9

/*
 * The uncore units, which are supported by the discovery table,
 * are defined here.
 */
static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
	&spr_uncore_chabox,
	&spr_uncore_iio,
@@ -5630,12 +5615,49 @@ static struct intel_uncore_type *spr_uncores[UNCORE_SPR_NUM_UNCORE_TYPES] = {
	NULL,
	&spr_uncore_imc,
	&spr_uncore_m2m,
	&spr_uncore_upi,
	&spr_uncore_m3upi,
	NULL,
	NULL,
	NULL,
	&spr_uncore_mdf,
};

/*
 * The uncore units, which are not supported by the discovery table,
 * are implemented from here.
 */
#define SPR_UNCORE_UPI_NUM_BOXES	4

static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = {
	0, 0x8000, 0x10000, 0x18000
};

static struct intel_uncore_type spr_uncore_upi = {
       SPR_UNCORE_PCI_COMMON_FORMAT(),
       .name                   = "upi",
       .type_id                = UNCORE_SPR_UPI,
       .num_counters           = 4,
       .num_boxes              = SPR_UNCORE_UPI_NUM_BOXES,
       .perf_ctr_bits          = 48,
       .perf_ctr               = ICX_UPI_PCI_PMON_CTR0,
       .event_ctl              = ICX_UPI_PCI_PMON_CTL0,
       .box_ctl                = ICX_UPI_PCI_PMON_BOX_CTL,
       .pci_offsets            = spr_upi_pci_offsets,
};

static struct intel_uncore_type spr_uncore_m3upi = {
       SPR_UNCORE_PCI_COMMON_FORMAT(),
       .name                   = "m3upi",
       .type_id                = UNCORE_SPR_M3UPI,
       .num_counters           = 4,
       .num_boxes              = SPR_UNCORE_UPI_NUM_BOXES,
       .perf_ctr_bits          = 48,
       .perf_ctr               = ICX_M3UPI_PCI_PMON_CTR0,
       .event_ctl              = ICX_M3UPI_PCI_PMON_CTL0,
       .box_ctl                = ICX_M3UPI_PCI_PMON_BOX_CTL,
       .pci_offsets            = spr_upi_pci_offsets,
       .constraints            = icx_uncore_m3upi_constraints,
};

enum perf_uncore_spr_iio_freerunning_type_id {
	SPR_IIO_MSR_IOCLK,
	SPR_IIO_MSR_BW_IN,
@@ -5766,6 +5788,7 @@ static struct intel_uncore_type spr_uncore_imc_free_running = {

#define UNCORE_SPR_MSR_EXTRA_UNCORES		1
#define UNCORE_SPR_MMIO_EXTRA_UNCORES		1
#define UNCORE_SPR_PCI_EXTRA_UNCORES		2

static struct intel_uncore_type *spr_msr_uncores[UNCORE_SPR_MSR_EXTRA_UNCORES] = {
	&spr_uncore_iio_free_running,
@@ -5775,6 +5798,17 @@ static struct intel_uncore_type *spr_mmio_uncores[UNCORE_SPR_MMIO_EXTRA_UNCORES]
	&spr_uncore_imc_free_running,
};

static struct intel_uncore_type *spr_pci_uncores[UNCORE_SPR_PCI_EXTRA_UNCORES] = {
	&spr_uncore_upi,
	&spr_uncore_m3upi
};

int spr_uncore_units_ignore[] = {
	UNCORE_SPR_UPI,
	UNCORE_SPR_M3UPI,
	UNCORE_IGNORE_END
};

static void uncore_type_customized_copy(struct intel_uncore_type *to_type,
					struct intel_uncore_type *from_type)
{
@@ -5869,9 +5903,69 @@ void spr_uncore_cpu_init(void)
	spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO);
}

#define SPR_UNCORE_UPI_PCIID		0x3241
#define SPR_UNCORE_UPI0_DEVFN		0x9
#define SPR_UNCORE_M3UPI_PCIID		0x3246
#define SPR_UNCORE_M3UPI0_DEVFN		0x29

static void spr_update_device_location(int type_id)
{
	struct intel_uncore_type *type;
	struct pci_dev *dev = NULL;
	u32 device, devfn;
	u64 *ctls;
	int die;

	if (type_id == UNCORE_SPR_UPI) {
		type = &spr_uncore_upi;
		device = SPR_UNCORE_UPI_PCIID;
		devfn = SPR_UNCORE_UPI0_DEVFN;
	} else if (type_id == UNCORE_SPR_M3UPI) {
		type = &spr_uncore_m3upi;
		device = SPR_UNCORE_M3UPI_PCIID;
		devfn = SPR_UNCORE_M3UPI0_DEVFN;
	} else
		return;

	ctls = kcalloc(__uncore_max_dies, sizeof(u64), GFP_KERNEL);
	if (!ctls) {
		type->num_boxes = 0;
		return;
	}

	while ((dev = pci_get_device(PCI_VENDOR_ID_INTEL, device, dev)) != NULL) {
		if (devfn != dev->devfn)
			continue;

		die = uncore_device_to_die(dev);
		if (die < 0)
			continue;

		ctls[die] = pci_domain_nr(dev->bus) << UNCORE_DISCOVERY_PCI_DOMAIN_OFFSET |
			    dev->bus->number << UNCORE_DISCOVERY_PCI_BUS_OFFSET |
			    devfn << UNCORE_DISCOVERY_PCI_DEVFN_OFFSET |
			    type->box_ctl;
	}

	type->box_ctls = ctls;
}

int spr_uncore_pci_init(void)
{
	uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL);
	/*
	 * The discovery table of UPI on some SPR variant is broken,
	 * which impacts the detection of both UPI and M3UPI uncore PMON.
	 * Use the pre-defined UPI and M3UPI table to replace.
	 *
	 * The accurate location, e.g., domain and BUS number,
	 * can only be retrieved at load time.
	 * Update the location of UPI and M3UPI.
	 */
	spr_update_device_location(UNCORE_SPR_UPI);
	spr_update_device_location(UNCORE_SPR_M3UPI);
	uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI,
						UNCORE_SPR_PCI_EXTRA_UNCORES,
						spr_pci_uncores);
	return 0;
}