Unverified Commit f9cbb8a8 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!3932 [OLK-6.6] 同步OLK-5.10 SMMU HTTU补丁

Merge Pull Request from: @wanywhn 
 
 [OLK-6.6] 同步OLK-5.10 SMMU HTTU补丁



ARM SMMU  HTTU support

Intel-kernel issue:
https://gitee.com/open_euler/dashboard?issue_id=I8UZSP

Some types of IOMMU are capable of tracking DMA dirty log, such as
ARM SMMU with HTTU or Intel IOMMU with SLADE. This introduces the
dirty log tracking framework in the IOMMU base layer. 


Test:
passthrough device's live migration progress success

Known issue:


Default config change:
if any recommended config change vs upstream default config, please list as git diff format:
add new config CONFIG_ARM_SMMU_V3_HTTU set to n, and set y in openeuler_defconfig
+ CONFIG_ARM_SMMU_V3_HTTU=n

Reference:https://lore.kernel.org/linux-iommu/20210507102211.8836-1-zhukeqian1@huawei.com/ 
 
Link:https://gitee.com/openeuler/kernel/pulls/3932

 

Reviewed-by: default avatarXu Kuohai <xukuohai@huawei.com>
Reviewed-by: default avatarKevin Zhu <zhukeqian1@huawei.com>
Reviewed-by: default avatarZucheng Zheng <zhengzucheng@huawei.com>
Reviewed-by: default avatarWeilong Chen <chenweilong@huawei.com>
Reviewed-by: default avatarJason Zeng <jason.zeng@intel.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
Acked-by: default avatarXie XiuQi <xiexiuqi@huawei.com>
parents ee951347 2ca35da8
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -6517,6 +6517,7 @@ CONFIG_ARM_SMMU_QCOM=y
CONFIG_ARM_SMMU_V3=y
CONFIG_ARM_SMMU_V3_SVA=y
# CONFIG_ARM_SMMU_V3_PM is not set
CONFIG_ARM_SMMU_V3_HTTU=y
# CONFIG_QCOM_IOMMU is not set
# CONFIG_VIRTIO_IOMMU is not set
CONFIG_SMMU_BYPASS_DEV=y
+7 −0
Original line number Diff line number Diff line
@@ -423,6 +423,13 @@ config ARM_SMMU_V3_PM
	help
	  Add support for suspend and resume support for arm smmu v3.

config ARM_SMMU_V3_HTTU
	bool "Add arm_smmu_v3  Hardware Translation Table Update"
	depends on ARM_SMMU_V3
	default n
	help
	  Add support for Hardware Translation Table Update arm smmu v3.

config S390_IOMMU
	def_bool y if S390 && PCI
	depends on S390 && PCI
+2 −0
Original line number Diff line number Diff line
@@ -127,10 +127,12 @@ static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm)
	if (err)
		goto out_free_asid;

	/* HA and HD will be filtered out later if not supported by the SMMU */
	tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, 64ULL - vabits_actual) |
	      FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, ARM_LPAE_TCR_RGN_WBWA) |
	      FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, ARM_LPAE_TCR_RGN_WBWA) |
	      FIELD_PREP(CTXDESC_CD_0_TCR_SH0, ARM_LPAE_TCR_SH_IS) |
	      CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD |
	      CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;

	switch (PAGE_SIZE) {
+271 −1
Original line number Diff line number Diff line
@@ -1149,10 +1149,17 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
		 * this substream's traffic
		 */
	} else { /* (1) and (2) */
		u64 tcr = cd->tcr;

		cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK);
		cdptr[2] = 0;
		cdptr[3] = cpu_to_le64(cd->mair);

		if (!(smmu->features & ARM_SMMU_FEAT_HD))
			tcr &= ~CTXDESC_CD_0_TCR_HD;
		if (!(smmu->features & ARM_SMMU_FEAT_HA))
			tcr &= ~CTXDESC_CD_0_TCR_HA;

		/*
		 * STE is live, and the SMMU might read dwords of this CD in any
		 * order. Ensure that it observes valid values before reading
@@ -1160,7 +1167,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
		 */
		arm_smmu_sync_cd(smmu_domain, ssid, true);

		val = cd->tcr |
		val = tcr |
#ifdef __BIG_ENDIAN
			CTXDESC_CD_0_ENDI |
#endif
@@ -2177,6 +2184,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
			  FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
			  FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
			  FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
			  CTXDESC_CD_0_TCR_HA | CTXDESC_CD_0_TCR_HD |
			  CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair;

@@ -2282,6 +2290,14 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
		.iommu_dev	= smmu->dev,
	};

	if (smmu->features & ARM_SMMU_FEAT_HD)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD;

	if (smmu->features & ARM_SMMU_FEAT_BBML1)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_BBML1;
	else if (smmu->features & ARM_SMMU_FEAT_BBML2)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_BBML2;

	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
	if (!pgtbl_ops)
		return -ENOMEM;
@@ -2828,6 +2844,209 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain)
	return ret;
}

#ifdef CONFIG_ARM_SMMU_V3_HTTU
static int arm_smmu_split_block(struct iommu_domain *domain,
				unsigned long iova, size_t size)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
	size_t handled_size;

	if (!(smmu->features & (ARM_SMMU_FEAT_BBML1 | ARM_SMMU_FEAT_BBML2))) {
		dev_err(smmu->dev, "don't support BBML1/2, can't split block\n");
		return -ENODEV;
	}
	if (!ops || !ops->split_block) {
		pr_err("io-pgtable don't realize split block\n");
		return -ENODEV;
	}

	handled_size = ops->split_block(ops, iova, size);
	if (handled_size != size) {
		pr_err("split block failed\n");
		return -EFAULT;
	}

	return 0;
}

static int __arm_smmu_merge_page(struct iommu_domain *domain,
				 unsigned long iova, phys_addr_t paddr,
				 size_t size, int prot)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
	size_t handled_size;

	if (!ops || !ops->merge_page) {
		pr_err("io-pgtable don't realize merge page\n");
		return -ENODEV;
	}

	while (size) {
		size_t pgsize = iommu_pgsize(domain, iova, paddr, size, NULL);

		handled_size = ops->merge_page(ops, iova, paddr, pgsize, prot);
		if (handled_size != pgsize) {
			pr_err("merge page failed\n");
			return -EFAULT;
		}

		pr_debug("merge handled: iova 0x%lx pa %pa size 0x%zx\n",
			 iova, &paddr, pgsize);

		iova += pgsize;
		paddr += pgsize;
		size -= pgsize;
	}

	return 0;
}

static int arm_smmu_merge_page(struct iommu_domain *domain, unsigned long iova,
			       size_t size, int prot)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
	phys_addr_t phys;
	dma_addr_t p, i;
	size_t cont_size;
	int ret = 0;

	if (!(smmu->features & (ARM_SMMU_FEAT_BBML1 | ARM_SMMU_FEAT_BBML2))) {
		dev_err(smmu->dev, "don't support BBML1/2, can't merge page\n");
		return -ENODEV;
	}

	if (!ops || !ops->iova_to_phys)
		return -ENODEV;

	while (size) {
		phys = ops->iova_to_phys(ops, iova);
		cont_size = PAGE_SIZE;
		p = phys + cont_size;
		i = iova + cont_size;

		while (cont_size < size && p == ops->iova_to_phys(ops, i)) {
			p += PAGE_SIZE;
			i += PAGE_SIZE;
			cont_size += PAGE_SIZE;
		}

		if (cont_size != PAGE_SIZE) {
			ret = __arm_smmu_merge_page(domain, iova, phys,
						    cont_size, prot);
			if (ret)
				break;
		}

		iova += cont_size;
		size -= cont_size;
	}

	return ret;
}

static bool arm_smmu_support_dirty_log(struct iommu_domain *domain)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);

	return !!(smmu_domain->smmu->features & ARM_SMMU_FEAT_HD);
}

static int arm_smmu_switch_dirty_log(struct iommu_domain *domain, bool enable,
				     unsigned long iova, size_t size, int prot)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	if (!(smmu->features & ARM_SMMU_FEAT_HD))
		return -ENODEV;
	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
		return -EINVAL;

	if (enable) {
		/*
		 * For SMMU, the hardware dirty management is always enabled if
		 * hardware supports HTTU HD. The action to start dirty log is
		 * spliting block mapping.
		 *
		 * We don't return error even if the split operation fail, as we
		 * can still track dirty at block granule, which is still a much
		 * better choice compared to full dirty policy.
		 */
		arm_smmu_split_block(domain, iova, size);
	} else {
		/*
		 * For SMMU, the hardware dirty management is always enabled if
		 * hardware supports HTTU HD. The action to stop dirty log is
		 * merging page mapping.
		 *
		 * We don't return error even if the merge operation fail, as it
		 * just effects performace of DMA transaction.
		 */
		arm_smmu_merge_page(domain, iova, size, prot);
	}

	return 0;
}

static int arm_smmu_sync_dirty_log(struct iommu_domain *domain,
				   unsigned long iova, size_t size,
				   unsigned long *bitmap,
				   unsigned long base_iova,
				   unsigned long bitmap_pgshift)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	if (!(smmu->features & ARM_SMMU_FEAT_HD))
		return -ENODEV;
	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
		return -EINVAL;

	if (!ops || !ops->sync_dirty_log) {
		pr_err("io-pgtable don't realize sync dirty log\n");
		return -ENODEV;
	}

	/*
	 * Flush iotlb to ensure all inflight transactions are completed.
	 * See doc IHI0070Da 3.13.4 "HTTU behavior summary".
	 */
	arm_smmu_flush_iotlb_all(domain);
	return ops->sync_dirty_log(ops, iova, size, bitmap, base_iova,
				   bitmap_pgshift);
}

static int arm_smmu_clear_dirty_log(struct iommu_domain *domain,
				    unsigned long iova, size_t size,
				    unsigned long *bitmap,
				    unsigned long base_iova,
				    unsigned long bitmap_pgshift)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
	struct arm_smmu_device *smmu = smmu_domain->smmu;

	if (!(smmu->features & ARM_SMMU_FEAT_HD))
		return -ENODEV;
	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
		return -EINVAL;

	if (!ops || !ops->clear_dirty_log) {
		pr_err("io-pgtable don't realize clear dirty log\n");
		return -ENODEV;
	}

	return ops->clear_dirty_log(ops, iova, size, bitmap, base_iova,
				    bitmap_pgshift);
}
#endif

static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
	return iommu_fwspec_add_ids(dev, args->args, 1);
@@ -2963,6 +3182,12 @@ static struct iommu_ops arm_smmu_ops = {
#endif
		.iova_to_phys		= arm_smmu_iova_to_phys,
		.enable_nesting		= arm_smmu_enable_nesting,
#ifdef CONFIG_ARM_SMMU_V3_HTTU
		.support_dirty_log	= arm_smmu_support_dirty_log,
		.switch_dirty_log	= arm_smmu_switch_dirty_log,
		.sync_dirty_log		= arm_smmu_sync_dirty_log,
		.clear_dirty_log	= arm_smmu_clear_dirty_log,
#endif
		.free			= arm_smmu_domain_free,
	}
};
@@ -3645,6 +3870,28 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu)
	}
}

static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg)
{
	u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD);
	u32 features = 0;

	switch (FIELD_GET(IDR0_HTTU, reg)) {
	case IDR0_HTTU_ACCESS_DIRTY:
		features |= ARM_SMMU_FEAT_HD;
		fallthrough;
	case IDR0_HTTU_ACCESS:
		features |= ARM_SMMU_FEAT_HA;
	}

	if (smmu->dev->of_node)
		smmu->features |= features;
	else if (features != fw_features)
		/* ACPI IORT sets the HTTU bits */
		dev_warn(smmu->dev,
			 "IDR0.HTTU overridden by FW configuration (0x%x)\n",
			 fw_features);
}

static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
{
	u32 reg;
@@ -3705,6 +3952,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
			smmu->features |= ARM_SMMU_FEAT_E2H;
	}

	arm_smmu_get_httu(smmu, reg);

	/*
	 * The coherency feature as set by FW is used in preference to the ID
	 * register, but warn on mismatch.
@@ -3789,6 +4038,19 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)

	/* IDR3 */
	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
	switch (FIELD_GET(IDR3_BBML, reg)) {
	case IDR3_BBML0:
		break;
	case IDR3_BBML1:
		smmu->features |= ARM_SMMU_FEAT_BBML1;
		break;
	case IDR3_BBML2:
		smmu->features |= ARM_SMMU_FEAT_BBML2;
		break;
	default:
		dev_err(smmu->dev, "unknown/unsupported BBM behavior level\n");
	}

	if (FIELD_GET(IDR3_RIL, reg))
		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;

@@ -3897,6 +4159,14 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
		smmu->features |= ARM_SMMU_FEAT_COHERENCY;

	switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) {
	case IDR0_HTTU_ACCESS_DIRTY:
		smmu->features |= ARM_SMMU_FEAT_HD;
		fallthrough;
	case IDR0_HTTU_ACCESS:
		smmu->features |= ARM_SMMU_FEAT_HA;
	}

	return 0;
}
#else
+16 −1
Original line number Diff line number Diff line
@@ -33,6 +33,10 @@
#define IDR0_ASID16			(1 << 12)
#define IDR0_ATS			(1 << 10)
#define IDR0_HYP			(1 << 9)
#define IDR0_HTTU			GENMASK(7, 6)
#define IDR0_HTTU_ACCESS		1
#define IDR0_HTTU_ACCESS_DIRTY		2
#define IDR0_BTM			(1 << 5)
#define IDR0_COHACC			(1 << 4)
#define IDR0_TTF			GENMASK(3, 2)
#define IDR0_TTF_AARCH64		2
@@ -51,6 +55,10 @@
#define IDR1_SIDSIZE			GENMASK(5, 0)

#define ARM_SMMU_IDR3			0xc
#define IDR3_BBML			GENMASK(12, 11)
#define IDR3_BBML0			0
#define IDR3_BBML1			1
#define IDR3_BBML2			2
#define IDR3_RIL			(1 << 10)

#define ARM_SMMU_IDR5			0x14
@@ -291,6 +299,9 @@
#define CTXDESC_CD_0_TCR_IPS		GENMASK_ULL(34, 32)
#define CTXDESC_CD_0_TCR_TBI0		(1ULL << 38)

#define CTXDESC_CD_0_TCR_HA		(1UL << 43)
#define CTXDESC_CD_0_TCR_HD		(1UL << 42)

#define CTXDESC_CD_0_AA64		(1UL << 41)
#define CTXDESC_CD_0_S			(1UL << 44)
#define CTXDESC_CD_0_R			(1UL << 45)
@@ -645,7 +656,11 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_BTM		(1 << 16)
#define ARM_SMMU_FEAT_SVA		(1 << 17)
#define ARM_SMMU_FEAT_E2H		(1 << 18)
#define ARM_SMMU_FEAT_NESTING		(1 << 19)
#define ARM_SMMU_FEAT_HA		(1 << 19)
#define ARM_SMMU_FEAT_HD		(1 << 20)
#define ARM_SMMU_FEAT_NESTING		(1 << 21)
#define ARM_SMMU_FEAT_BBML1		(1 << 22)
#define ARM_SMMU_FEAT_BBML2		(1 << 23)
	u32				features;

#define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
Loading