Commit 728a748b authored by Linus Torvalds's avatar Linus Torvalds
Browse files
Pull PCI fixes from Bjorn Helgaas:

 - Clear 64-bit flag for host bridge windows below 4GB to fix a resource
   allocation regression added in -rc1 (Punit Agrawal)

 - Fix tegra194 MCFG quirk build regressions added in -rc1 (Jon Hunter)

 - Avoid secondary bus resets on TI KeyStone C667X devices (Antti
   Järvinen)

 - Avoid secondary bus resets on some NVIDIA GPUs (Shanker Donthineni)

 - Work around FLR erratum on Huawei Intelligent NIC VF (Chiqijun)

 - Avoid broken ATS on AMD Navi14 GPU (Evan Quan)

 - Trust Broadcom BCM57414 NIC to isolate functions even though it
   doesn't advertise ACS support (Sriharsha Basavapatna)

 - Work around AMD RS690 BIOSes that don't configure DMA above 4GB
   (Mikel Rychliski)

 - Fix panic during PIO transfer on Aardvark controller (Pali Rohár)

* tag 'pci-v5.13-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci:
  PCI: aardvark: Fix kernel panic during PIO transfer
  PCI: Add AMD RS690 quirk to enable 64-bit DMA
  PCI: Add ACS quirk for Broadcom BCM57414 NIC
  PCI: Mark AMD Navi14 GPU ATS as broken
  PCI: Work around Huawei Intelligent NIC VF FLR erratum
  PCI: Mark some NVIDIA GPUs to avoid bus reset
  PCI: Mark TI C667X to avoid bus reset
  PCI: tegra194: Fix MCFG quirk build regressions
  PCI: of: Clear 64-bit flag for non-prefetchable memory below 4GB
parents 9620ad86 f1813996
Loading
Loading
Loading
Loading
+44 −0
Original line number Diff line number Diff line
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);

#define RS690_LOWER_TOP_OF_DRAM2	0x30
#define RS690_LOWER_TOP_OF_DRAM2_VALID	0x1
#define RS690_UPPER_TOP_OF_DRAM2	0x31
#define RS690_HTIU_NB_INDEX		0xA8
#define RS690_HTIU_NB_INDEX_WR_ENABLE	0x100
#define RS690_HTIU_NB_DATA		0xAC

/*
 * Some BIOS implementations support RAM above 4GB, but do not configure the
 * PCI host to respond to bus master accesses for these addresses. These
 * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
 * works as expected for addresses below 4GB.
 *
 * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
 * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
 */
static void rs690_fix_64bit_dma(struct pci_dev *pdev)
{
	u32 val = 0;
	phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;

	if (top_of_dram <= (1ULL << 32))
		return;

	pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
				RS690_LOWER_TOP_OF_DRAM2);
	pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);

	if (val)
		return;

	pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);

	pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
		RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
	pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);

	pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
		RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
	pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
		top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);

#endif
+2 −1
Original line number Diff line number Diff line
@@ -18,6 +18,7 @@ obj-$(CONFIG_PCIE_INTEL_GW) += pcie-intel-gw.o
obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
obj-$(CONFIG_PCI_MESON) += pci-meson.o
obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o

@@ -38,6 +39,6 @@ ifdef CONFIG_ACPI
ifdef CONFIG_PCI_QUIRKS
obj-$(CONFIG_ARM64) += pcie-al.o
obj-$(CONFIG_ARM64) += pcie-hisi.o
obj-$(CONFIG_ARM64) += pcie-tegra194.o
obj-$(CONFIG_ARM64) += pcie-tegra194-acpi.o
endif
endif
+108 −0
Original line number Diff line number Diff line
// SPDX-License-Identifier: GPL-2.0+
/*
 * ACPI quirks for Tegra194 PCIe host controller
 *
 * Copyright (C) 2021 NVIDIA Corporation.
 *
 * Author: Vidya Sagar <vidyas@nvidia.com>
 */

#include <linux/pci.h>
#include <linux/pci-acpi.h>
#include <linux/pci-ecam.h>

#include "pcie-designware.h"

struct tegra194_pcie_ecam  {
	void __iomem *config_base;
	void __iomem *iatu_base;
	void __iomem *dbi_base;
};

static int tegra194_acpi_init(struct pci_config_window *cfg)
{
	struct device *dev = cfg->parent;
	struct tegra194_pcie_ecam *pcie_ecam;

	pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
	if (!pcie_ecam)
		return -ENOMEM;

	pcie_ecam->config_base = cfg->win;
	pcie_ecam->iatu_base = cfg->win + SZ_256K;
	pcie_ecam->dbi_base = cfg->win + SZ_512K;
	cfg->priv = pcie_ecam;

	return 0;
}

static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
			  u32 val, u32 reg)
{
	u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);

	writel(val, pcie_ecam->iatu_base + offset + reg);
}

static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
				 int index, int type, u64 cpu_addr,
				 u64 pci_addr, u64 size)
{
	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
		      PCIE_ATU_LOWER_BASE);
	atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
		      PCIE_ATU_UPPER_BASE);
	atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
		      PCIE_ATU_LOWER_TARGET);
	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
		      PCIE_ATU_LIMIT);
	atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
		      PCIE_ATU_UPPER_TARGET);
	atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
	atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
}

static void __iomem *tegra194_map_bus(struct pci_bus *bus,
				      unsigned int devfn, int where)
{
	struct pci_config_window *cfg = bus->sysdata;
	struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
	u32 busdev;
	int type;

	if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
		return NULL;

	if (bus->number == cfg->busr.start) {
		if (PCI_SLOT(devfn) == 0)
			return pcie_ecam->dbi_base + where;
		else
			return NULL;
	}

	busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
		 PCIE_ATU_FUNC(PCI_FUNC(devfn));

	if (bus->parent->number == cfg->busr.start) {
		if (PCI_SLOT(devfn) == 0)
			type = PCIE_ATU_TYPE_CFG0;
		else
			return NULL;
	} else {
		type = PCIE_ATU_TYPE_CFG1;
	}

	program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
			     SZ_256K);

	return pcie_ecam->config_base + where;
}

const struct pci_ecam_ops tegra194_pcie_ops = {
	.init		= tegra194_acpi_init,
	.pci_ops	= {
		.map_bus	= tegra194_map_bus,
		.read		= pci_generic_config_read,
		.write		= pci_generic_config_write,
	}
};
+18 −120
Original line number Diff line number Diff line
@@ -22,8 +22,6 @@
#include <linux/of_irq.h>
#include <linux/of_pci.h>
#include <linux/pci.h>
#include <linux/pci-acpi.h>
#include <linux/pci-ecam.h>
#include <linux/phy/phy.h>
#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
@@ -247,24 +245,6 @@ static const unsigned int pcie_gen_freq[] = {
	GEN4_CORE_CLK_FREQ
};

static const u32 event_cntr_ctrl_offset[] = {
	0x1d8,
	0x1a8,
	0x1a8,
	0x1a8,
	0x1c4,
	0x1d8
};

static const u32 event_cntr_data_offset[] = {
	0x1dc,
	0x1ac,
	0x1ac,
	0x1ac,
	0x1c8,
	0x1dc
};

struct tegra_pcie_dw {
	struct device *dev;
	struct resource *appl_res;
@@ -313,104 +293,6 @@ struct tegra_pcie_dw_of_data {
	enum dw_pcie_device_mode mode;
};

#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
struct tegra194_pcie_ecam  {
	void __iomem *config_base;
	void __iomem *iatu_base;
	void __iomem *dbi_base;
};

static int tegra194_acpi_init(struct pci_config_window *cfg)
{
	struct device *dev = cfg->parent;
	struct tegra194_pcie_ecam *pcie_ecam;

	pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
	if (!pcie_ecam)
		return -ENOMEM;

	pcie_ecam->config_base = cfg->win;
	pcie_ecam->iatu_base = cfg->win + SZ_256K;
	pcie_ecam->dbi_base = cfg->win + SZ_512K;
	cfg->priv = pcie_ecam;

	return 0;
}

static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
			  u32 val, u32 reg)
{
	u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);

	writel(val, pcie_ecam->iatu_base + offset + reg);
}

static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
				 int index, int type, u64 cpu_addr,
				 u64 pci_addr, u64 size)
{
	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
		      PCIE_ATU_LOWER_BASE);
	atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
		      PCIE_ATU_UPPER_BASE);
	atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
		      PCIE_ATU_LOWER_TARGET);
	atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
		      PCIE_ATU_LIMIT);
	atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
		      PCIE_ATU_UPPER_TARGET);
	atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
	atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
}

static void __iomem *tegra194_map_bus(struct pci_bus *bus,
				      unsigned int devfn, int where)
{
	struct pci_config_window *cfg = bus->sysdata;
	struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
	u32 busdev;
	int type;

	if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
		return NULL;

	if (bus->number == cfg->busr.start) {
		if (PCI_SLOT(devfn) == 0)
			return pcie_ecam->dbi_base + where;
		else
			return NULL;
	}

	busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
		 PCIE_ATU_FUNC(PCI_FUNC(devfn));

	if (bus->parent->number == cfg->busr.start) {
		if (PCI_SLOT(devfn) == 0)
			type = PCIE_ATU_TYPE_CFG0;
		else
			return NULL;
	} else {
		type = PCIE_ATU_TYPE_CFG1;
	}

	program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
			     SZ_256K);

	return pcie_ecam->config_base + where;
}

const struct pci_ecam_ops tegra194_pcie_ops = {
	.init		= tegra194_acpi_init,
	.pci_ops	= {
		.map_bus	= tegra194_map_bus,
		.read		= pci_generic_config_read,
		.write		= pci_generic_config_write,
	}
};
#endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */

#ifdef CONFIG_PCIE_TEGRA194

static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
{
	return container_of(pci, struct tegra_pcie_dw, pci);
@@ -694,6 +576,24 @@ static struct pci_ops tegra_pci_ops = {
};

#if defined(CONFIG_PCIEASPM)
static const u32 event_cntr_ctrl_offset[] = {
	0x1d8,
	0x1a8,
	0x1a8,
	0x1a8,
	0x1c4,
	0x1d8
};

static const u32 event_cntr_data_offset[] = {
	0x1dc,
	0x1ac,
	0x1ac,
	0x1ac,
	0x1c8,
	0x1dc
};

static void disable_aspm_l11(struct tegra_pcie_dw *pcie)
{
	u32 val;
@@ -2411,5 +2311,3 @@ MODULE_DEVICE_TABLE(of, tegra_pcie_dw_of_match);
MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
MODULE_LICENSE("GPL v2");

#endif /* CONFIG_PCIE_TEGRA194 */
+40 −9
Original line number Diff line number Diff line
@@ -514,7 +514,7 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie)
		udelay(PIO_RETRY_DELAY);
	}

	dev_err(dev, "config read/write timed out\n");
	dev_err(dev, "PIO read/write transfer time out\n");
	return -ETIMEDOUT;
}

@@ -657,6 +657,35 @@ static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus,
	return true;
}

static bool advk_pcie_pio_is_running(struct advk_pcie *pcie)
{
	struct device *dev = &pcie->pdev->dev;

	/*
	 * Trying to start a new PIO transfer when previous has not completed
	 * cause External Abort on CPU which results in kernel panic:
	 *
	 *     SError Interrupt on CPU0, code 0xbf000002 -- SError
	 *     Kernel panic - not syncing: Asynchronous SError Interrupt
	 *
	 * Functions advk_pcie_rd_conf() and advk_pcie_wr_conf() are protected
	 * by raw_spin_lock_irqsave() at pci_lock_config() level to prevent
	 * concurrent calls at the same time. But because PIO transfer may take
	 * about 1.5s when link is down or card is disconnected, it means that
	 * advk_pcie_wait_pio() does not always have to wait for completion.
	 *
	 * Some versions of ARM Trusted Firmware handles this External Abort at
	 * EL3 level and mask it to prevent kernel panic. Relevant TF-A commit:
	 * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50
	 */
	if (advk_readl(pcie, PIO_START)) {
		dev_err(dev, "Previous PIO read/write transfer is still running\n");
		return true;
	}

	return false;
}

static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
			     int where, int size, u32 *val)
{
@@ -673,9 +702,10 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
		return pci_bridge_emul_conf_read(&pcie->bridge, where,
						 size, val);

	/* Start PIO */
	advk_writel(pcie, 0, PIO_START);
	advk_writel(pcie, 1, PIO_ISR);
	if (advk_pcie_pio_is_running(pcie)) {
		*val = 0xffffffff;
		return PCIBIOS_SET_FAILED;
	}

	/* Program the control register */
	reg = advk_readl(pcie, PIO_CTRL);
@@ -694,7 +724,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn,
	/* Program the data strobe */
	advk_writel(pcie, 0xf, PIO_WR_DATA_STRB);

	/* Start the transfer */
	/* Clear PIO DONE ISR and start the transfer */
	advk_writel(pcie, 1, PIO_ISR);
	advk_writel(pcie, 1, PIO_START);

	ret = advk_pcie_wait_pio(pcie);
@@ -734,9 +765,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
	if (where % size)
		return PCIBIOS_SET_FAILED;

	/* Start PIO */
	advk_writel(pcie, 0, PIO_START);
	advk_writel(pcie, 1, PIO_ISR);
	if (advk_pcie_pio_is_running(pcie))
		return PCIBIOS_SET_FAILED;

	/* Program the control register */
	reg = advk_readl(pcie, PIO_CTRL);
@@ -763,7 +793,8 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
	/* Program the data strobe */
	advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB);

	/* Start the transfer */
	/* Clear PIO DONE ISR and start the transfer */
	advk_writel(pcie, 1, PIO_ISR);
	advk_writel(pcie, 1, PIO_START);

	ret = advk_pcie_wait_pio(pcie);
Loading