Unverified Commit 43467830 authored by openeuler-ci-bot's avatar openeuler-ci-bot Committed by Gitee
Browse files

!4576 v6 Add support for ecmdq

Merge Pull Request from: @ci-robot 
 
PR sync from: Zhang Zekun <zhangzekun11@huawei.com>
https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/SRVV3M4AFYOS7TWJFRYI5YLMUA46XJMO/ 
Add support for ecmdq

v2:
fix some code check errors

v3:
Add config isolations

v4:
Enable CONFIG_ARM_SMMU_V3_ECMDQ in openeuler_defconfig
in ARCH arm64

v5:
Fix apply error

v6:
Fix the config isolation error

Zhen Lei (5):
  iommu/arm-smmu-v3: Ensure that a set of associated commands are
    inserted in the same ECMDQ
  iommu/arm-smmu-v3: Add arm_smmu_ecmdq_issue_cmdlist() for non-shared
    ECMDQ
  iommu/arm-smmu-v3: Add support for less than one ECMDQ per core
  iommu/arm-smmu-v3: Allow disabling ECMDQs at boot time


-- 
2.17.1
 
https://gitee.com/openeuler/kernel/issues/I8Z6DC 
 
Link:https://gitee.com/openeuler/kernel/pulls/4576

 

Reviewed-by: default avatarWeilong Chen <chenweilong@huawei.com>
Reviewed-by: default avatarLiu Chao <liuchao173@huawei.com>
Signed-off-by: default avatarZheng Zengkai <zhengzengkai@huawei.com>
parents c9ca5762 96897153
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -6520,6 +6520,7 @@ CONFIG_ARM_SMMU_V3=y
CONFIG_ARM_SMMU_V3_SVA=y
# CONFIG_ARM_SMMU_V3_PM is not set
CONFIG_ARM_SMMU_V3_HTTU=y
CONFIG_ARM_SMMU_V3_ECMDQ=y
# CONFIG_QCOM_IOMMU is not set
# CONFIG_VIRTIO_IOMMU is not set
CONFIG_SMMU_BYPASS_DEV=y
+11 −0
Original line number Diff line number Diff line
@@ -430,6 +430,17 @@ config ARM_SMMU_V3_HTTU
	help
	  Add support for Hardware Translation Table Update arm smmu v3.

config ARM_SMMU_V3_ECMDQ
	bool "Add arm_smmu_v3 ecmdq support"
	depends on ARM_SMMU_V3
	default n
	help
	  Add support for ARM_SMMU_V3 ECMDQ. One smmu can have multiple
	  ECMDQs which can be used to reduce competition when smmu try
	  to send commands.

	  If not sure, say no.

config S390_IOMMU
	def_bool y if S390 && PCI
	depends on S390 && PCI
+463 −5
Original line number Diff line number Diff line
@@ -41,6 +41,12 @@ module_param(disable_msipolling, bool, 0444);
MODULE_PARM_DESC(disable_msipolling,
	"Disable MSI-based polling for CMD_SYNC completion.");

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static bool disable_ecmdq;
module_param(disable_ecmdq, bool, 0444);
MODULE_PARM_DESC(disable_ecmdq,	"Disable the use of ECMDQs");
#endif

#ifdef CONFIG_SMMU_BYPASS_DEV
struct smmu_bypass_device {
	unsigned short vendor;
@@ -296,6 +302,24 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent)
	return 0;
}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static void arm_smmu_preempt_disable(struct arm_smmu_device *smmu)
{
	if (smmu->ecmdq_enabled)
		preempt_disable();
}

static void arm_smmu_preempt_enable(struct arm_smmu_device *smmu)
{
	if (smmu->ecmdq_enabled)
		preempt_enable();
}
#else
static void arm_smmu_preempt_disable(struct arm_smmu_device *smmu) {}
static void arm_smmu_preempt_enable(struct arm_smmu_device *smmu) {}
#endif


/* High-level queue accessors */
static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
{
@@ -400,6 +424,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)

static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
{
#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	if (smmu->ecmdq_enabled) {
		struct arm_smmu_ecmdq *ecmdq;

		ecmdq = *this_cpu_ptr(smmu->ecmdqs);

		return &ecmdq->cmdq;
	}
#endif

	return &smmu->cmdq;
}

@@ -482,6 +516,40 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu)
{
	int i;
	u32 prod, cons;
	struct arm_smmu_queue *q;
	struct arm_smmu_ecmdq *ecmdq;

	for (i = 0; i < smmu->nr_ecmdq; i++) {
		unsigned long flags;

		ecmdq = *per_cpu_ptr(smmu->ecmdqs, i);
		q = &ecmdq->cmdq.q;

		prod = readl_relaxed(q->prod_reg);
		cons = readl_relaxed(q->cons_reg);
		if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0)
			continue;

		__arm_smmu_cmdq_skip_err(smmu, q);

		write_lock_irqsave(&q->ecmdq_lock, flags);
		q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK;
		q->ecmdq_prod |= cons & ECMDQ_CONS_ERR;

		prod = readl_relaxed(q->prod_reg);
		prod &= ~ECMDQ_PROD_ERRACK;
		prod |= cons & ECMDQ_CONS_ERR;
		writel(prod, q->prod_reg);
		write_unlock_irqrestore(&q->ecmdq_lock, flags);
	}
}
#endif

/*
 * Command queue locking.
 * This is a form of bastardised rwlock with the following major changes:
@@ -778,6 +846,89 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
	}
}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
/*
 * The function is used when the current core exclusively occupies an ECMDQ.
 * This is a reduced version of arm_smmu_cmdq_issue_cmdlist(), which eliminates
 * a lot of unnecessary inter-core competition considerations.
 */
static int arm_smmu_ecmdq_issue_cmdlist(struct arm_smmu_device *smmu,
					struct arm_smmu_cmdq *cmdq,
					u64 *cmds, int n, bool sync)
{
	u32 prod;
	unsigned long flags;
	struct arm_smmu_ll_queue llq = {
		.max_n_shift = cmdq->q.llq.max_n_shift,
	}, head;
	int ret = 0;

	/* 1. Allocate some space in the queue */
	local_irq_save(flags);
	llq.val = READ_ONCE(cmdq->q.llq.val);
	do {
		u64 old;

		while (!queue_has_space(&llq, n + sync)) {
			local_irq_restore(flags);
			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
				dev_err_ratelimited(smmu->dev, "ECMDQ timeout\n");
			local_irq_save(flags);
		}

		head.cons = llq.cons;
		head.prod = queue_inc_prod_n(&llq, n + sync);

		old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val);
		if (old == llq.val)
			break;

		llq.val = old;
	} while (1);

	/* 2. Write our commands into the queue */
	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
	if (sync) {
		u64 cmd_sync[CMDQ_ENT_DWORDS];

		prod = queue_inc_prod_n(&llq, n);
		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
	}

	/* 3. Ensuring commands are visible first */
	dma_wmb();

	/* 4. Advance the hardware prod pointer */
	read_lock(&cmdq->q.ecmdq_lock);
	writel_relaxed(head.prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
	read_unlock(&cmdq->q.ecmdq_lock);

	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
	if (sync) {
		llq.prod = queue_inc_prod_n(&llq, n);
		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
		if (ret) {
			dev_err_ratelimited(smmu->dev,
					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
					    llq.prod,
					    readl_relaxed(cmdq->q.prod_reg),
					    readl_relaxed(cmdq->q.cons_reg));
		}

		/*
		 * Update cmdq->q.llq.cons, to improve the success rate of
		 * queue_has_space() when some new commands are inserted next
		 * time.
		 */
		WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
	}

	local_irq_restore(flags);
	return ret;
}
#endif

/*
 * This is the actual insertion function, and provides the following
 * ordering guarantees to callers:
@@ -805,6 +956,11 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
	struct arm_smmu_ll_queue llq, head;
	int ret = 0;

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	if (!cmdq->shared)
		return arm_smmu_ecmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync);
#endif

	llq.max_n_shift = cmdq->q.llq.max_n_shift;

	/* 1. Allocate some space in the queue */
@@ -878,6 +1034,13 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
		 * d. Advance the hardware prod pointer
		 * Control dependency ordering from the entries becoming valid.
		 */
#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
		if (smmu->ecmdq_enabled) {
			read_lock(&cmdq->q.ecmdq_lock);
			writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg);
			read_unlock(&cmdq->q.ecmdq_lock);
		} else
#endif
			writel_relaxed(prod, cmdq->q.prod_reg);

		/*
@@ -1042,6 +1205,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,

	cmds.num = 0;

	arm_smmu_preempt_disable(smmu);
	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
		for (i = 0; i < master->num_streams; i++) {
@@ -1052,6 +1216,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

	arm_smmu_cmdq_batch_submit(smmu, &cmds);
	arm_smmu_preempt_enable(smmu);
}

static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
@@ -1764,6 +1929,11 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
	if (active & GERROR_CMDQ_ERR)
		arm_smmu_cmdq_skip_err(smmu);

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	if (active & GERROR_CMDQP_ERR)
		arm_smmu_ecmdq_skip_err(smmu);
#endif

	writel(gerror, smmu->base + ARM_SMMU_GERRORN);
	return IRQ_HANDLED;
}
@@ -1854,25 +2024,28 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,

static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
{
	int i;
	int i, ret;
	struct arm_smmu_cmdq_ent cmd;
	struct arm_smmu_cmdq_batch cmds;

	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);

	cmds.num = 0;
	arm_smmu_preempt_disable(master->smmu);
	for (i = 0; i < master->num_streams; i++) {
		cmd.atc.sid = master->streams[i].id;
		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
	}

	return arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
	ret = arm_smmu_cmdq_batch_submit(master->smmu, &cmds);
	arm_smmu_preempt_enable(master->smmu);
	return ret;
}

int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
			    unsigned long iova, size_t size)
{
	int i;
	int i, ret;
	unsigned long flags;
	struct arm_smmu_cmdq_ent cmd;
	struct arm_smmu_master *master;
@@ -1902,6 +2075,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,

	cmds.num = 0;

	arm_smmu_preempt_disable(smmu_domain->smmu);
	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
		if (!master->ats_enabled)
@@ -1914,7 +2088,10 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
	}
	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
	ret = arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
	arm_smmu_preempt_enable(smmu_domain->smmu);

	return ret;
}

/* IO_PGTABLE API */
@@ -1979,6 +2156,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,

	cmds.num = 0;

	arm_smmu_preempt_disable(smmu);
	while (iova < end) {
		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
			/*
@@ -2010,6 +2188,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
		iova += inv_range;
	}
	arm_smmu_cmdq_batch_submit(smmu, &cmds);
	arm_smmu_preempt_enable(smmu);
}

static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
@@ -3241,6 +3420,9 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	cmdq->shared = 1;
#endif
	atomic_set(&cmdq->owner_prod, 0);
	atomic_set(&cmdq->lock, 0);

@@ -3252,6 +3434,22 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
	return 0;
}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq)
{
	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;

	atomic_set(&cmdq->owner_prod, 0);
	atomic_set(&cmdq->lock, 0);

	cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL);
	if (!cmdq->valid_map)
		return -ENOMEM;

	return 0;
}
#endif

static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
{
	int ret;
@@ -3697,6 +3895,55 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
	return ret;
}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static int arm_smmu_ecmdq_reset(struct arm_smmu_device *smmu)
{
	int i, cpu, ret = 0;
	u32 reg;

	if (!smmu->nr_ecmdq)
		return 0;

	i = 0;
	for_each_possible_cpu(cpu) {
		struct arm_smmu_ecmdq *ecmdq;
		struct arm_smmu_queue *q;

		ecmdq = *per_cpu_ptr(smmu->ecmdqs, cpu);
		if (ecmdq != per_cpu_ptr(smmu->ecmdq, cpu))
			continue;

		q = &ecmdq->cmdq.q;
		i++;

		if (WARN_ON(q->llq.prod != q->llq.cons)) {
			q->llq.prod = 0;
			q->llq.cons = 0;
		}
		writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE);
		writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD);
		writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS);

		/* enable ecmdq */
		writel(ECMDQ_PROD_EN | q->llq.prod, q->prod_reg);
		ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK,
					  1, ARM_SMMU_POLL_TIMEOUT_US);
		if (ret) {
			dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i);
			smmu->ecmdq_enabled = 0;
			break;
		}
	}

	return ret;
}
#else
static int arm_smmu_ecmdq_reset(struct arm_smmu_device *smmu)
{
	return 0;
}
#endif

static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume)
{
	int ret;
@@ -3743,6 +3990,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume)
	writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD);
	writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);

	arm_smmu_ecmdq_reset(smmu);

	enables = CR0_CMDQEN;
	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
				      ARM_SMMU_CR0ACK);
@@ -3832,6 +4081,136 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume)
	return 0;
}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu)
{
	int cpu, host_cpu;
	struct arm_smmu_ecmdq *ecmdq;

	ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq);
	if (!ecmdq)
		return -ENOMEM;
	smmu->ecmdq = ecmdq;

	/* A core requires at most one ECMDQ */
	if (num_possible_cpus() < smmu->nr_ecmdq)
		smmu->nr_ecmdq = num_possible_cpus();

	for_each_possible_cpu(cpu) {
		if (cpu < smmu->nr_ecmdq) {
			*per_cpu_ptr(smmu->ecmdqs, cpu) = per_cpu_ptr(smmu->ecmdq, cpu);
		} else {
			host_cpu = cpu % smmu->nr_ecmdq;
			ecmdq = per_cpu_ptr(smmu->ecmdq, host_cpu);
			ecmdq->cmdq.shared = 1;
			*per_cpu_ptr(smmu->ecmdqs, cpu) = ecmdq;
		}
	}

	return 0;
}

static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu)
{
	int ret, cpu;
	u32 i, nump, numq, gap;
	u32 reg, shift_increment;
	u64 addr, smmu_dma_base;
	void __iomem *cp_regs, *cp_base;

	/* IDR6 */
	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6);
	nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg);
	numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg);
	smmu->nr_ecmdq = nump * numq;
	gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg);
	if (!smmu->nr_ecmdq)
		return -EOPNOTSUPP;

	smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT);
	cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE);
	if (!cp_regs)
		return -ENOMEM;

	for (i = 0; i < nump; i++) {
		u64 val, pre_addr;

		val = readq_relaxed(cp_regs + 32 * i);
		if (!(val & ECMDQ_CP_PRESET)) {
			iounmap(cp_regs);
			dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i);
			return -EFAULT;
		}

		if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) {
			iounmap(cp_regs);
			dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n");
			return -EFAULT;
		}

		pre_addr = val & ECMDQ_CP_ADDR;
	}

	addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR;
	iounmap(cp_regs);

	cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump);
	if (!cp_base)
		return -ENOMEM;

	smmu->ecmdqs = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *);
	if (!smmu->ecmdqs)
		return -ENOMEM;

	ret = arm_smmu_ecmdq_layout(smmu);
	if (ret)
		return ret;

	shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq);

	addr = 0;
	for_each_possible_cpu(cpu) {
		struct arm_smmu_ecmdq *ecmdq;
		struct arm_smmu_queue *q;

		ecmdq = *per_cpu_ptr(smmu->ecmdqs, cpu);
		q = &ecmdq->cmdq.q;

		/*
		 * The boot option "maxcpus=" can limit the number of online
		 * CPUs. The CPUs that are not selected are not showed in
		 * cpumask_of_node(node), their 'ecmdq' may be NULL.
		 *
		 * (ecmdq != per_cpu_ptr(smmu->ecmdq, cpu)) indicates that the
		 * ECMDQ is shared by multiple cores and should be initialized
		 * only by the first owner.
		 */
		if (!ecmdq || (ecmdq != per_cpu_ptr(smmu->ecmdq, cpu)))
			continue;
		ecmdq->base = cp_base + addr;

		q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment;
		ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD,
				ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
		if (ret)
			return ret;

		q->ecmdq_prod = ECMDQ_PROD_EN;
		rwlock_init(&q->ecmdq_lock);

		ret = arm_smmu_ecmdq_init(&ecmdq->cmdq);
		if (ret) {
			dev_err(smmu->dev, "ecmdq[%d] init failed\n", i);
			return ret;
		}

		addr += gap;
	}

	return 0;
}
#endif

#define IIDR_IMPLEMENTER_ARM		0x43b
#define IIDR_PRODUCTID_ARM_MMU_600	0x483
#define IIDR_PRODUCTID_ARM_MMU_700	0x487
@@ -4004,6 +4383,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
		return -ENXIO;
	}

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	if (reg & IDR1_ECMDQ)
		smmu->features |= ARM_SMMU_FEAT_ECMDQ;
#endif

	/* Queue sizes, capped to ensure natural alignment */
	smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT,
					     FIELD_GET(IDR1_CMDQS, reg));
@@ -4124,6 +4508,18 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)

	dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n",
		 smmu->ias, smmu->oas, smmu->features);

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	if (smmu->features & ARM_SMMU_FEAT_ECMDQ && !disable_ecmdq) {
		int err;

		err = arm_smmu_ecmdq_probe(smmu);
		if (err) {
			dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err);
			smmu->ecmdq_enabled = 0;
		}
	}
#endif
	return 0;
}

@@ -4216,8 +4612,70 @@ static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start,
}

#ifdef CONFIG_ARM_SMMU_V3_PM
#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
static int arm_smmu_ecmdq_disable(struct device *dev)
{
	int i, j;
	int ret, nr_fail = 0, n = 100;
	u32 reg, prod, cons;
	struct arm_smmu_ecmdq *ecmdq;
	struct arm_smmu_queue *q;
	struct arm_smmu_device *smmu = dev_get_drvdata(dev);

	for (i = 0; i < smmu->nr_ecmdq; i++) {
		ecmdq = *per_cpu_ptr(smmu->ecmdqs, i);
		q = &ecmdq->cmdq.q;

		prod = readl_relaxed(q->prod_reg);
		cons = readl_relaxed(q->cons_reg);
		if ((prod & ECMDQ_PROD_EN) == 0)
			continue;

		for (j = 0; j < n; j++) {
			if (Q_IDX(&q->llq, prod) == Q_IDX(&q->llq, cons) &&
			    Q_WRP(&q->llq, prod) == Q_WRP(&q->llq, cons))
				break;

			/* Wait a moment, so ECMDQ has a chance to finish */
			udelay(1);
			cons = readl_relaxed(q->cons_reg);
		}
		WARN_ON(prod != readl_relaxed(q->prod_reg));
		if (j >= n)
			dev_warn(smmu->dev,
				 "Forcibly disabling ecmdq[%d]: prod=%08x, cons=%08x\n",
				 i, prod, cons);

		/* disable ecmdq */
		prod &= ~ECMDQ_PROD_EN;
		writel(prod, q->prod_reg);
		ret = readl_relaxed_poll_timeout(q->cons_reg, reg, !(reg & ECMDQ_CONS_ENACK),
					  1, ARM_SMMU_POLL_TIMEOUT_US);
		if (ret) {
			nr_fail++;
			dev_err(smmu->dev, "ecmdq[%d] disable failed\n", i);
		}
	}

	if (nr_fail) {
		smmu->ecmdq_enabled = 0;
		pr_warn("Suppress ecmdq feature, switch to normal cmdq\n");
		return -EIO;
	}

	return 0;
}
#else
static int arm_smmu_ecmdq_disable(struct device *dev)
{
	return 0;
}
#endif

static int arm_smmu_suspend(struct device *dev)
{
	arm_smmu_ecmdq_disable(dev);

	/*
	 * The smmu is powered off and related registers are automatically
	 * cleared when suspend. No need to do anything.
+47 −0
Original line number Diff line number Diff line
@@ -45,6 +45,7 @@
#define IDR0_S2P			(1 << 0)

#define ARM_SMMU_IDR1			0x4
#define IDR1_ECMDQ			(1 << 31)
#define IDR1_TABLES_PRESET		(1 << 30)
#define IDR1_QUEUES_PRESET		(1 << 29)
#define IDR1_REL			(1 << 28)
@@ -121,6 +122,7 @@
#define ARM_SMMU_IRQ_CTRLACK		0x54

#define ARM_SMMU_GERROR			0x60
#define GERROR_CMDQP_ERR		(1 << 9)
#define GERROR_SFM_ERR			(1 << 8)
#define GERROR_MSI_GERROR_ABT_ERR	(1 << 7)
#define GERROR_MSI_PRIQ_ABT_ERR		(1 << 6)
@@ -137,6 +139,26 @@
#define ARM_SMMU_GERROR_IRQ_CFG1	0x70
#define ARM_SMMU_GERROR_IRQ_CFG2	0x74

#define ARM_SMMU_IDR6			0x190
#define IDR6_LOG2NUMP			GENMASK(27, 24)
#define IDR6_LOG2NUMQ			GENMASK(19, 16)
#define IDR6_BA_DOORBELLS		GENMASK(9, 0)

#define ARM_SMMU_ECMDQ_BASE		0x00
#define ARM_SMMU_ECMDQ_PROD		0x08
#define ARM_SMMU_ECMDQ_CONS		0x0c
#define ECMDQ_MAX_SZ_SHIFT		8
#define ECMDQ_PROD_EN			(1 << 31)
#define ECMDQ_CONS_ENACK		(1 << 31)
#define ECMDQ_CONS_ERR			(1 << 23)
#define ECMDQ_PROD_ERRACK		(1 << 23)

#define ARM_SMMU_ECMDQ_CP_BASE		0x4000
#define ECMDQ_CP_ADDR			GENMASK_ULL(51, 12)
#define ECMDQ_CP_CMDQGS			GENMASK_ULL(2, 1)
#define ECMDQ_CP_PRESET			(1UL << 0)
#define ECMDQ_CP_RRESET_SIZE		0x10000

#define ARM_SMMU_STRTAB_BASE		0x80
#define STRTAB_BASE_RA			(1UL << 62)
#define STRTAB_BASE_ADDR_MASK		GENMASK_ULL(51, 6)
@@ -538,6 +560,10 @@ struct arm_smmu_ll_queue {
struct arm_smmu_queue {
	struct arm_smmu_ll_queue	llq;
	int				irq; /* Wired interrupt */
#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	u32				ecmdq_prod;
	rwlock_t			ecmdq_lock;
#endif

	__le64				*base;
	dma_addr_t			base_dma;
@@ -561,8 +587,18 @@ struct arm_smmu_cmdq {
	atomic_long_t			*valid_map;
	atomic_t			owner_prod;
	atomic_t			lock;
#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	int				shared;
#endif
};

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
struct arm_smmu_ecmdq {
	struct arm_smmu_cmdq		cmdq;
	void __iomem			*base;
};
#endif

struct arm_smmu_cmdq_batch {
	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
	int				num;
@@ -661,6 +697,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_NESTING		(1 << 21)
#define ARM_SMMU_FEAT_BBML1		(1 << 22)
#define ARM_SMMU_FEAT_BBML2		(1 << 23)
#define ARM_SMMU_FEAT_ECMDQ		(1 << 24)
	u32				features;

#define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -669,6 +706,16 @@ struct arm_smmu_device {
#define ARM_SMMU_OPT_CMDQ_FORCE_SYNC	(1 << 3)
	u32				options;

#ifdef CONFIG_ARM_SMMU_V3_ECMDQ
	union {
		u32			nr_ecmdq;
		u32			ecmdq_enabled;
	};

	struct arm_smmu_ecmdq *__percpu	*ecmdqs;
	struct arm_smmu_ecmdq  __percpu	*ecmdq;
#endif

	struct arm_smmu_cmdq		cmdq;
	struct arm_smmu_evtq		evtq;
	struct arm_smmu_priq		priq;