Loading Documentation/devicetree/bindings/iommu/arm,smmu.txt +6 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,12 @@ conditions. ** System MMU optional properties: - dma-coherent : Present if page table walks made by the SMMU are cache coherent with the CPU. NOTE: this only applies to the SMMU itself, not masters connected upstream of the SMMU. - calxeda,smmu-secure-config-access : Enable proper handling of buggy implementations that always use secure access to SMMU configuration registers. In this case non-secure Loading drivers/iommu/Kconfig +2 −1 Original line number Diff line number Diff line Loading @@ -23,7 +23,8 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE depends on ARM || ARM64 || COMPILE_TEST # SWIOTLB guarantees a dma_to_phys() implementation depends on ARM || ARM64 || (COMPILE_TEST && SWIOTLB) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page Loading drivers/iommu/arm-smmu-v3.c +27 −39 Original line number Diff line number Diff line Loading @@ -118,6 +118,7 @@ #define ARM_SMMU_IRQ_CTRL 0x50 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2) #define IRQ_CTRL_PRIQ_IRQEN (1 << 1) #define IRQ_CTRL_GERROR_IRQEN (1 << 0) #define ARM_SMMU_IRQ_CTRLACK 0x54 Loading Loading @@ -173,14 +174,14 @@ #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc /* Common MSI config fields */ #define MSI_CFG0_SH_SHIFT 60 #define MSI_CFG0_SH_NSH (0UL << MSI_CFG0_SH_SHIFT) #define MSI_CFG0_SH_OSH (2UL << MSI_CFG0_SH_SHIFT) #define MSI_CFG0_SH_ISH (3UL << MSI_CFG0_SH_SHIFT) #define MSI_CFG0_MEMATTR_SHIFT 56 #define MSI_CFG0_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG0_MEMATTR_SHIFT) #define MSI_CFG0_ADDR_SHIFT 2 #define MSI_CFG0_ADDR_MASK 0x3fffffffffffUL #define MSI_CFG2_SH_SHIFT 4 #define MSI_CFG2_SH_NSH (0UL << MSI_CFG2_SH_SHIFT) #define MSI_CFG2_SH_OSH (2UL << MSI_CFG2_SH_SHIFT) #define MSI_CFG2_SH_ISH (3UL << MSI_CFG2_SH_SHIFT) #define MSI_CFG2_MEMATTR_SHIFT 0 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG2_MEMATTR_SHIFT) #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1)) #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift)) Loading Loading @@ -1330,33 +1331,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, arm_smmu_cmdq_issue_cmd(smmu, &cmd); } static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long offset = (unsigned long)addr & ~PAGE_MASK; if (smmu->features & ARM_SMMU_FEAT_COHERENCY) { dsb(ishst); } else { dma_addr_t dma_addr; struct device *dev = smmu->dev; dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) dev_err(dev, "failed to flush pgtable at %p\n", addr); else dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE); } } static struct iommu_gather_ops arm_smmu_gather_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, .flush_pgtable = arm_smmu_flush_pgtable, }; /* IOMMU API */ Loading Loading @@ -1531,6 +1509,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .ias = ias, .oas = oas, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); Loading Loading @@ -2053,9 +2032,17 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) int ret; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; /* Calculate the L1 size, capped to the SIDSIZE */ /* * If we can resolve everything with a single L2 table, then we * just need a single L1 descriptor. Otherwise, calculate the L1 * size, capped to the SIDSIZE. */ if (smmu->sid_bits < STRTAB_SPLIT) { size = 0; } else { size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); size = min(size, smmu->sid_bits - STRTAB_SPLIT); } cfg->num_l1_ents = 1 << size; size += STRTAB_SPLIT; Loading Loading @@ -2198,6 +2185,7 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { int ret, irq; u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; /* Disable IRQs first */ ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, Loading Loading @@ -2252,13 +2240,13 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) if (IS_ERR_VALUE(ret)) dev_warn(smmu->dev, "failed to enable priq irq\n"); else irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; } } /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN, ret = arm_smmu_write_reg_sync(smmu, irqen_flags, ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); if (ret) dev_warn(smmu->dev, "failed to enable irqs\n"); Loading Loading @@ -2540,12 +2528,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu) case IDR5_OAS_44_BIT: smmu->oas = 44; break; default: dev_info(smmu->dev, "unknown output address size. Truncating to 48-bit\n"); /* Fallthrough */ case IDR5_OAS_48_BIT: smmu->oas = 48; break; default: dev_err(smmu->dev, "unknown output address size!\n"); return -ENXIO; } /* Set the DMA mask for our table walker */ Loading drivers/iommu/arm-smmu.c +18 −27 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ #include <linux/iopoll.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> Loading Loading @@ -607,34 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long offset = (unsigned long)addr & ~PAGE_MASK; /* Ensure new page tables are visible to the hardware walker */ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { dsb(ishst); } else { /* * If the SMMU can't walk tables in the CPU caches, treat them * like non-coherent DMA since we need to flush the new entries * all the way out to memory. There's no possibility of * recursion here as the SMMU table walker will not be wired * through another SMMU. */ dma_map_page(smmu->dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE); } } static struct iommu_gather_ops arm_smmu_gather_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, .flush_pgtable = arm_smmu_flush_pgtable, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) Loading Loading @@ -898,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .ias = ias, .oas = oas, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; smmu_domain->smmu = smmu; Loading Loading @@ -1532,6 +1510,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) unsigned long size; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); u32 id; bool cttw_dt, cttw_reg; dev_notice(smmu->dev, "probing hardware configuration...\n"); dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); Loading Loading @@ -1571,10 +1550,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\taddress translation ops\n"); } if (id & ID0_CTTW) { /* * In order for DMA API calls to work properly, we must defer to what * the DT says about coherency, regardless of what the hardware claims. * Fortunately, this also opens up a workaround for systems where the * ID register value has ended up configured incorrectly. */ cttw_dt = of_dma_is_coherent(smmu->dev->of_node); cttw_reg = !!(id & ID0_CTTW); if (cttw_dt) smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; dev_notice(smmu->dev, "\tcoherent table walk\n"); } if (cttw_dt || cttw_reg) dev_notice(smmu->dev, "\t%scoherent table walk\n", cttw_dt ? "" : "non-"); if (cttw_dt != cttw_reg) dev_notice(smmu->dev, "\t(IDR0.CTTW overridden by dma-coherent property)\n"); if (id & ID0_SMS) { u32 smr, sid, mask; Loading drivers/iommu/io-pgtable-arm.c +93 −33 Original line number Diff line number Diff line Loading @@ -26,6 +26,8 @@ #include <linux/slab.h> #include <linux/types.h> #include <asm/barrier.h> #include "io-pgtable.h" #define ARM_LPAE_MAX_ADDR_BITS 48 Loading Loading @@ -200,12 +202,74 @@ typedef u64 arm_lpae_iopte; static bool selftest_running = false; static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages) { return phys_to_dma(dev, virt_to_phys(pages)); } static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; dma_addr_t dma; void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO); if (!pages) return NULL; if (!selftest_running) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; /* * We depend on the IOMMU being able to work with any physical * address directly, so if the DMA layer suggests it can't by * giving us back some translation, that bodes very badly... */ if (dma != __arm_lpae_dma_addr(dev, pages)) goto out_unmap; } return pages; out_unmap: dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); out_free: free_pages_exact(pages, size); return NULL; } static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; if (!selftest_running) dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages), size, DMA_TO_DEVICE); free_pages_exact(pages, size); } static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; *ptep = pte; if (!selftest_running) dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep), sizeof(pte), DMA_TO_DEVICE); } static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep) { arm_lpae_iopte pte = prot; struct io_pgtable_cfg *cfg = &data->iop.cfg; /* We require an unmap first */ if (iopte_leaf(*ptep, lvl)) { Loading @@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, return -EEXIST; } if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NS; if (lvl == ARM_LPAE_MAX_LEVELS - 1) Loading @@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; pte |= pfn_to_iopte(paddr >> data->pg_shift, data); *ptep = pte; data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); __arm_lpae_set_pte(ptep, pte, cfg); return 0; } Loading @@ -234,14 +297,14 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, int lvl, arm_lpae_iopte *ptep) { arm_lpae_iopte *cptep, pte; void *cookie = data->iop.cookie; size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); struct io_pgtable_cfg *cfg = &data->iop.cfg; /* Find our entry at the current level */ ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); /* If we can install a leaf entry at this level, then do so */ if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) if (size == block_size && (size & cfg->pgsize_bitmap)) return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); /* We can't allocate tables at the final level */ Loading @@ -251,18 +314,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* Grab a pointer to the next level */ pte = *ptep; if (!pte) { cptep = alloc_pages_exact(1UL << data->pg_shift, GFP_ATOMIC | __GFP_ZERO); cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift, GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift, cookie); pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NSTABLE; *ptep = pte; data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); __arm_lpae_set_pte(ptep, pte, cfg); } else { cptep = iopte_deref(pte, data); } Loading Loading @@ -309,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); int ret, lvl = ARM_LPAE_START_LVL(data); arm_lpae_iopte prot; /* If no access, then nothing to do */ Loading @@ -317,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, return 0; prot = arm_lpae_prot_to_pte(data, iommu_prot); return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); /* * Synchronise all PTE updates for the new mapping before there's * a chance for anything to kick off a table walk for the new iova. */ wmb(); return ret; } static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, Loading Loading @@ -347,7 +414,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); } free_pages_exact(start, table_size); __arm_lpae_free_pages(start, table_size, &data->iop.cfg); } static void arm_lpae_free_pgtable(struct io_pgtable *iop) Loading @@ -366,8 +433,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, unsigned long blk_start, blk_end; phys_addr_t blk_paddr; arm_lpae_iopte table = 0; void *cookie = data->iop.cookie; const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; struct io_pgtable_cfg *cfg = &data->iop.cfg; blk_start = iova & ~(blk_size - 1); blk_end = blk_start + blk_size; Loading @@ -393,10 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, } } *ptep = table; tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); __arm_lpae_set_pte(ptep, table, cfg); iova &= ~(blk_size - 1); tlb->tlb_add_flush(iova, blk_size, true, cookie); cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie); return size; } Loading @@ -418,13 +483,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, /* If the size matches this level, we're in the right place */ if (size == blk_size) { *ptep = 0; tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); __arm_lpae_set_pte(ptep, 0, &data->iop.cfg); if (!iopte_leaf(pte, lvl)) { /* Also flush any partial walks */ tlb->tlb_add_flush(iova, size, false, cookie); tlb->tlb_sync(data->iop.cookie); tlb->tlb_sync(cookie); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); } else { Loading Loading @@ -640,11 +704,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) cfg->arm_lpae_s1_cfg.mair[1] = 0; /* Looking good; allocate a pgd */ data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); if (!data->pgd) goto out_free_data; cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); /* TTBRs */ cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); Loading Loading @@ -728,11 +793,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) cfg->arm_lpae_s2_cfg.vtcr = reg; /* Allocate pgd pages */ data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); if (!data->pgd) goto out_free_data; cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); /* VTTBR */ cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); Loading Loading @@ -818,16 +884,10 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie) { WARN_ON(cookie != cfg_cookie); } static struct iommu_gather_ops dummy_tlb_ops __initdata = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, .flush_pgtable = dummy_flush_pgtable, }; static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) Loading Loading
Documentation/devicetree/bindings/iommu/arm,smmu.txt +6 −0 Original line number Diff line number Diff line Loading @@ -43,6 +43,12 @@ conditions. ** System MMU optional properties: - dma-coherent : Present if page table walks made by the SMMU are cache coherent with the CPU. NOTE: this only applies to the SMMU itself, not masters connected upstream of the SMMU. - calxeda,smmu-secure-config-access : Enable proper handling of buggy implementations that always use secure access to SMMU configuration registers. In this case non-secure Loading
drivers/iommu/Kconfig +2 −1 Original line number Diff line number Diff line Loading @@ -23,7 +23,8 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE depends on ARM || ARM64 || COMPILE_TEST # SWIOTLB guarantees a dma_to_phys() implementation depends on ARM || ARM64 || (COMPILE_TEST && SWIOTLB) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page Loading
drivers/iommu/arm-smmu-v3.c +27 −39 Original line number Diff line number Diff line Loading @@ -118,6 +118,7 @@ #define ARM_SMMU_IRQ_CTRL 0x50 #define IRQ_CTRL_EVTQ_IRQEN (1 << 2) #define IRQ_CTRL_PRIQ_IRQEN (1 << 1) #define IRQ_CTRL_GERROR_IRQEN (1 << 0) #define ARM_SMMU_IRQ_CTRLACK 0x54 Loading Loading @@ -173,14 +174,14 @@ #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc /* Common MSI config fields */ #define MSI_CFG0_SH_SHIFT 60 #define MSI_CFG0_SH_NSH (0UL << MSI_CFG0_SH_SHIFT) #define MSI_CFG0_SH_OSH (2UL << MSI_CFG0_SH_SHIFT) #define MSI_CFG0_SH_ISH (3UL << MSI_CFG0_SH_SHIFT) #define MSI_CFG0_MEMATTR_SHIFT 56 #define MSI_CFG0_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG0_MEMATTR_SHIFT) #define MSI_CFG0_ADDR_SHIFT 2 #define MSI_CFG0_ADDR_MASK 0x3fffffffffffUL #define MSI_CFG2_SH_SHIFT 4 #define MSI_CFG2_SH_NSH (0UL << MSI_CFG2_SH_SHIFT) #define MSI_CFG2_SH_OSH (2UL << MSI_CFG2_SH_SHIFT) #define MSI_CFG2_SH_ISH (3UL << MSI_CFG2_SH_SHIFT) #define MSI_CFG2_MEMATTR_SHIFT 0 #define MSI_CFG2_MEMATTR_DEVICE_nGnRE (0x1 << MSI_CFG2_MEMATTR_SHIFT) #define Q_IDX(q, p) ((p) & ((1 << (q)->max_n_shift) - 1)) #define Q_WRP(q, p) ((p) & (1 << (q)->max_n_shift)) Loading Loading @@ -1330,33 +1331,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, arm_smmu_cmdq_issue_cmd(smmu, &cmd); } static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long offset = (unsigned long)addr & ~PAGE_MASK; if (smmu->features & ARM_SMMU_FEAT_COHERENCY) { dsb(ishst); } else { dma_addr_t dma_addr; struct device *dev = smmu->dev; dma_addr = dma_map_page(dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma_addr)) dev_err(dev, "failed to flush pgtable at %p\n", addr); else dma_unmap_page(dev, dma_addr, size, DMA_TO_DEVICE); } } static struct iommu_gather_ops arm_smmu_gather_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, .flush_pgtable = arm_smmu_flush_pgtable, }; /* IOMMU API */ Loading Loading @@ -1531,6 +1509,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .ias = ias, .oas = oas, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); Loading Loading @@ -2053,9 +2032,17 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) int ret; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; /* Calculate the L1 size, capped to the SIDSIZE */ /* * If we can resolve everything with a single L2 table, then we * just need a single L1 descriptor. Otherwise, calculate the L1 * size, capped to the SIDSIZE. */ if (smmu->sid_bits < STRTAB_SPLIT) { size = 0; } else { size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); size = min(size, smmu->sid_bits - STRTAB_SPLIT); } cfg->num_l1_ents = 1 << size; size += STRTAB_SPLIT; Loading Loading @@ -2198,6 +2185,7 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { int ret, irq; u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; /* Disable IRQs first */ ret = arm_smmu_write_reg_sync(smmu, 0, ARM_SMMU_IRQ_CTRL, Loading Loading @@ -2252,13 +2240,13 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) if (IS_ERR_VALUE(ret)) dev_warn(smmu->dev, "failed to enable priq irq\n"); else irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; } } /* Enable interrupt generation on the SMMU */ ret = arm_smmu_write_reg_sync(smmu, IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN, ret = arm_smmu_write_reg_sync(smmu, irqen_flags, ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); if (ret) dev_warn(smmu->dev, "failed to enable irqs\n"); Loading Loading @@ -2540,12 +2528,12 @@ static int arm_smmu_device_probe(struct arm_smmu_device *smmu) case IDR5_OAS_44_BIT: smmu->oas = 44; break; default: dev_info(smmu->dev, "unknown output address size. Truncating to 48-bit\n"); /* Fallthrough */ case IDR5_OAS_48_BIT: smmu->oas = 48; break; default: dev_err(smmu->dev, "unknown output address size!\n"); return -ENXIO; } /* Set the DMA mask for our table walker */ Loading
drivers/iommu/arm-smmu.c +18 −27 Original line number Diff line number Diff line Loading @@ -37,6 +37,7 @@ #include <linux/iopoll.h> #include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> Loading Loading @@ -607,34 +608,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } static void arm_smmu_flush_pgtable(void *addr, size_t size, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; unsigned long offset = (unsigned long)addr & ~PAGE_MASK; /* Ensure new page tables are visible to the hardware walker */ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { dsb(ishst); } else { /* * If the SMMU can't walk tables in the CPU caches, treat them * like non-coherent DMA since we need to flush the new entries * all the way out to memory. There's no possibility of * recursion here as the SMMU table walker will not be wired * through another SMMU. */ dma_map_page(smmu->dev, virt_to_page(addr), offset, size, DMA_TO_DEVICE); } } static struct iommu_gather_ops arm_smmu_gather_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, .flush_pgtable = arm_smmu_flush_pgtable, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) Loading Loading @@ -898,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .ias = ias, .oas = oas, .tlb = &arm_smmu_gather_ops, .iommu_dev = smmu->dev, }; smmu_domain->smmu = smmu; Loading Loading @@ -1532,6 +1510,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) unsigned long size; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); u32 id; bool cttw_dt, cttw_reg; dev_notice(smmu->dev, "probing hardware configuration...\n"); dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); Loading Loading @@ -1571,10 +1550,22 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) dev_notice(smmu->dev, "\taddress translation ops\n"); } if (id & ID0_CTTW) { /* * In order for DMA API calls to work properly, we must defer to what * the DT says about coherency, regardless of what the hardware claims. * Fortunately, this also opens up a workaround for systems where the * ID register value has ended up configured incorrectly. */ cttw_dt = of_dma_is_coherent(smmu->dev->of_node); cttw_reg = !!(id & ID0_CTTW); if (cttw_dt) smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK; dev_notice(smmu->dev, "\tcoherent table walk\n"); } if (cttw_dt || cttw_reg) dev_notice(smmu->dev, "\t%scoherent table walk\n", cttw_dt ? "" : "non-"); if (cttw_dt != cttw_reg) dev_notice(smmu->dev, "\t(IDR0.CTTW overridden by dma-coherent property)\n"); if (id & ID0_SMS) { u32 smr, sid, mask; Loading
drivers/iommu/io-pgtable-arm.c +93 −33 Original line number Diff line number Diff line Loading @@ -26,6 +26,8 @@ #include <linux/slab.h> #include <linux/types.h> #include <asm/barrier.h> #include "io-pgtable.h" #define ARM_LPAE_MAX_ADDR_BITS 48 Loading Loading @@ -200,12 +202,74 @@ typedef u64 arm_lpae_iopte; static bool selftest_running = false; static dma_addr_t __arm_lpae_dma_addr(struct device *dev, void *pages) { return phys_to_dma(dev, virt_to_phys(pages)); } static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; dma_addr_t dma; void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO); if (!pages) return NULL; if (!selftest_running) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; /* * We depend on the IOMMU being able to work with any physical * address directly, so if the DMA layer suggests it can't by * giving us back some translation, that bodes very badly... */ if (dma != __arm_lpae_dma_addr(dev, pages)) goto out_unmap; } return pages; out_unmap: dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); out_free: free_pages_exact(pages, size); return NULL; } static void __arm_lpae_free_pages(void *pages, size_t size, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; if (!selftest_running) dma_unmap_single(dev, __arm_lpae_dma_addr(dev, pages), size, DMA_TO_DEVICE); free_pages_exact(pages, size); } static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; *ptep = pte; if (!selftest_running) dma_sync_single_for_device(dev, __arm_lpae_dma_addr(dev, ptep), sizeof(pte), DMA_TO_DEVICE); } static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot, int lvl, arm_lpae_iopte *ptep) { arm_lpae_iopte pte = prot; struct io_pgtable_cfg *cfg = &data->iop.cfg; /* We require an unmap first */ if (iopte_leaf(*ptep, lvl)) { Loading @@ -213,7 +277,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, return -EEXIST; } if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NS; if (lvl == ARM_LPAE_MAX_LEVELS - 1) Loading @@ -224,8 +288,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, pte |= ARM_LPAE_PTE_AF | ARM_LPAE_PTE_SH_IS; pte |= pfn_to_iopte(paddr >> data->pg_shift, data); *ptep = pte; data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), data->iop.cookie); __arm_lpae_set_pte(ptep, pte, cfg); return 0; } Loading @@ -234,14 +297,14 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, int lvl, arm_lpae_iopte *ptep) { arm_lpae_iopte *cptep, pte; void *cookie = data->iop.cookie; size_t block_size = ARM_LPAE_BLOCK_SIZE(lvl, data); struct io_pgtable_cfg *cfg = &data->iop.cfg; /* Find our entry at the current level */ ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); /* If we can install a leaf entry at this level, then do so */ if (size == block_size && (size & data->iop.cfg.pgsize_bitmap)) if (size == block_size && (size & cfg->pgsize_bitmap)) return arm_lpae_init_pte(data, iova, paddr, prot, lvl, ptep); /* We can't allocate tables at the final level */ Loading @@ -251,18 +314,15 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, /* Grab a pointer to the next level */ pte = *ptep; if (!pte) { cptep = alloc_pages_exact(1UL << data->pg_shift, GFP_ATOMIC | __GFP_ZERO); cptep = __arm_lpae_alloc_pages(1UL << data->pg_shift, GFP_ATOMIC, cfg); if (!cptep) return -ENOMEM; data->iop.cfg.tlb->flush_pgtable(cptep, 1UL << data->pg_shift, cookie); pte = __pa(cptep) | ARM_LPAE_PTE_TYPE_TABLE; if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) pte |= ARM_LPAE_PTE_NSTABLE; *ptep = pte; data->iop.cfg.tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); __arm_lpae_set_pte(ptep, pte, cfg); } else { cptep = iopte_deref(pte, data); } Loading Loading @@ -309,7 +369,7 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); int ret, lvl = ARM_LPAE_START_LVL(data); arm_lpae_iopte prot; /* If no access, then nothing to do */ Loading @@ -317,7 +377,14 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, return 0; prot = arm_lpae_prot_to_pte(data, iommu_prot); return __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); /* * Synchronise all PTE updates for the new mapping before there's * a chance for anything to kick off a table walk for the new iova. */ wmb(); return ret; } static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, Loading Loading @@ -347,7 +414,7 @@ static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl, __arm_lpae_free_pgtable(data, lvl + 1, iopte_deref(pte, data)); } free_pages_exact(start, table_size); __arm_lpae_free_pages(start, table_size, &data->iop.cfg); } static void arm_lpae_free_pgtable(struct io_pgtable *iop) Loading @@ -366,8 +433,7 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, unsigned long blk_start, blk_end; phys_addr_t blk_paddr; arm_lpae_iopte table = 0; void *cookie = data->iop.cookie; const struct iommu_gather_ops *tlb = data->iop.cfg.tlb; struct io_pgtable_cfg *cfg = &data->iop.cfg; blk_start = iova & ~(blk_size - 1); blk_end = blk_start + blk_size; Loading @@ -393,10 +459,9 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, } } *ptep = table; tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); __arm_lpae_set_pte(ptep, table, cfg); iova &= ~(blk_size - 1); tlb->tlb_add_flush(iova, blk_size, true, cookie); cfg->tlb->tlb_add_flush(iova, blk_size, true, data->iop.cookie); return size; } Loading @@ -418,13 +483,12 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, /* If the size matches this level, we're in the right place */ if (size == blk_size) { *ptep = 0; tlb->flush_pgtable(ptep, sizeof(*ptep), cookie); __arm_lpae_set_pte(ptep, 0, &data->iop.cfg); if (!iopte_leaf(pte, lvl)) { /* Also flush any partial walks */ tlb->tlb_add_flush(iova, size, false, cookie); tlb->tlb_sync(data->iop.cookie); tlb->tlb_sync(cookie); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); } else { Loading Loading @@ -640,11 +704,12 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) cfg->arm_lpae_s1_cfg.mair[1] = 0; /* Looking good; allocate a pgd */ data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); if (!data->pgd) goto out_free_data; cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); /* TTBRs */ cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd); Loading Loading @@ -728,11 +793,12 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie) cfg->arm_lpae_s2_cfg.vtcr = reg; /* Allocate pgd pages */ data->pgd = alloc_pages_exact(data->pgd_size, GFP_KERNEL | __GFP_ZERO); data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg); if (!data->pgd) goto out_free_data; cfg->tlb->flush_pgtable(data->pgd, data->pgd_size, cookie); /* Ensure the empty pgd is visible before any actual TTBR write */ wmb(); /* VTTBR */ cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd); Loading Loading @@ -818,16 +884,10 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } static void dummy_flush_pgtable(void *ptr, size_t size, void *cookie) { WARN_ON(cookie != cfg_cookie); } static struct iommu_gather_ops dummy_tlb_ops __initdata = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, .flush_pgtable = dummy_flush_pgtable, }; static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) Loading