Commit 077bd800 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-msm-next-2022-11-28' of https://gitlab.freedesktop.org/drm/msm into drm-next



msm-next for v6.2 (the gpu/gem bits)

- Remove exclusive-fence hack that caused over-synchronization
- Fix speed-bin detection vs. probe-defer
- Enable clamp_to_idle on 7c3
- Improved hangcheck detection

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/CAF6AEGvT1h_S4d=YRgphgR8i7aMaxQaNW8mru7QaoUo9uiUk2A@mail.gmail.com
parents 92e11ddb d73b1d02
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) {

static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
{
	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);

	return 0;
}
+10 −17
Original line number Diff line number Diff line
@@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
	}

	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);

	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);

	return 0;
}
@@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
	 * memory rendering at this point in time and we don't want to block off
	 * part of the virtual memory space.
	 */
	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);

	/* Put the GPU into 64 bit by default */
@@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
		return ret;

	/* Set the ringbuffer address */
	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
		gpu->rb[0]->iova);
	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);

	/*
	 * If the microcode supports the WHERE_AM_I opcode then we can use that
@@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
		}

		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
			    shadowptr(a5xx_gpu, gpu->rb[0]));
	} else if (gpu->nr_rings > 1) {
		/* Disable preemption if WHERE_AM_I isn't available */
		a5xx_preempt_fini(gpu);
@@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));

	/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)

static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
{
	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);

	return 0;
}
@@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu,
	if (IS_ERR_OR_NULL(dumper->ptr))
		return -EINVAL;

	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);

	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);

@@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
{
	u64 busy_cycles;

	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
	*out_sample_rate = clk_get_rate(gpu->core_clk);

	return busy_cycles;
+1 −3
Original line number Diff line number Diff line
@@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)

	/* Set the address of the incoming preemption record */
	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
		REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
		a5xx_gpu->preempt_iova[ring->id]);

	a5xx_gpu->next_ring = ring;
@@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
	}

	/* Write a 0 to signal that we aren't switching pagetables */
	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
		REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0);

	/* Reset the preemption state */
	set_preempt_state(a5xx_gpu, PREEMPT_NONE);
+54 −30
Original line number Diff line number Diff line
@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
	OUT_RING(ring, submit->seqno);

	trace_msm_gpu_submit_flush(submit,
		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
			REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));

	a6xx_flush(gpu, ring);
}
@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
		}
	}

	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
		REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);

	return 0;
}
@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
	 * memory rendering at this point in time and we don't want to block off
	 * part of the virtual memory space.
	 */
	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
		REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);

	/* Turn on 64 bit addressing for all blocks */
@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)

	if (!adreno_is_a650_family(adreno_gpu)) {
		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
			REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);

		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
			REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
			0x00100000 + adreno_gpu->gmem - 1);
	}

@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
		goto out;

	/* Set the ringbuffer address */
	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
		gpu->rb[0]->iova);
	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);

	/* Targets that support extended APRIV can use the RPTR shadow from
	 * hardware but all the other ones need to disable the feature. Targets
@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
		}

		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
			REG_A6XX_CP_RB_RPTR_ADDR_HI,
			shadowptr(a6xx_gpu, gpu->rb[0]));
	}

@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));

	/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
	/* Force the GPU power on so we can read this register */
	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);

	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
			    REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);

	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);

@@ -1824,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
}

static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
	struct msm_cp_state cp_state = {
		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
	};
	bool progress;

	/*
	 * Adjust the remaining data to account for what has already been
	 * fetched from memory, but not yet consumed by the SQE.
	 *
	 * This is not *technically* correct, the amount buffered could
	 * exceed the IB size due to hw prefetching ahead, but:
	 *
	 * (1) We aren't trying to find the exact position, just whether
	 *     progress has been made
	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
	 *     to prevent prefetching into an unrelated submit.  (And
	 *     either way, at some point the ROQ will be full.)
	 */
	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;

	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));

	ring->last_cp_state = cp_state;

	return progress;
}

static u32 a618_get_speed_bin(u32 fuse)
{
	if (fuse == 0)
@@ -1879,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)

	if (val == UINT_MAX) {
		DRM_DEV_ERROR(dev,
			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware",
			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
			fuse);
		return UINT_MAX;
	}
@@ -1889,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)

static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
{
	u32 supp_hw = UINT_MAX;
	u32 supp_hw;
	u32 speedbin;
	int ret;

@@ -1901,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
	if (ret == -ENOENT) {
		return 0;
	} else if (ret) {
		DRM_DEV_ERROR(dev,
			      "failed to read speed-bin (%d). Some OPPs may not be supported by hardware",
			      ret);
		goto done;
		dev_err_probe(dev, ret,
			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
		return ret;
	}

	supp_hw = fuse_to_supp_hw(dev, rev, speedbin);

done:
	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
	if (ret)
		return ret;
@@ -1942,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = {
		.create_address_space = a6xx_create_address_space,
		.create_private_address_space = a6xx_create_private_address_space,
		.get_rptr = a6xx_get_rptr,
		.progress = a6xx_progress,
	},
	.get_timestamp = a6xx_get_timestamp,
};
@@ -1978,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
			adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
		adreno_gpu->base.hw_apriv = true;

	/*
	 * For now only clamp to idle freq for devices where this is known not
	 * to cause power supply issues:
	 */
	if (info && (info->revn == 618))
		gpu->clamp_to_idle = true;

	a6xx_llc_slices_init(pdev, a6xx_gpu);

	ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
@@ -1999,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
		return ERR_PTR(ret);
	}

	/*
	 * For now only clamp to idle freq for devices where this is known not
	 * to cause power supply issues:
	 */
	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
		gpu->clamp_to_idle = true;

	/* Check if there is a GMU phandle and set it up */
	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);

+1 −2
Original line number Diff line number Diff line
@@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu,
	/* Make sure all pending memory writes are posted */
	wmb();

	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);

	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);

Loading