Commit 1f8d3ad2 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher
Browse files

drm/amdgpu: only harvest gcea/mmea error status in aldebaran



In aldebaran, driver only needs to harvest SDP
RdRspStatus, WrRspStatus and first parity error
on RdRsp data. Check error type before harvest
error information.

Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarStanley Yang <Stanley.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 53ee6609
Loading
Loading
Loading
Loading
+12 −9
Original line number Diff line number Diff line
@@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
	  REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
};

static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs =
static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
	{ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };

static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
@@ -1041,11 +1041,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
	uint32_t i, j;

	mutex_lock(&adev->grbm_idx_mutex);
	for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
		for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
	for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
		for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
		     j++) {
			gfx_v9_4_2_select_se_sh(adev, i, 0, j);
			WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
			WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
		}
	}
	gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -1090,17 +1090,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)

	mutex_lock(&adev->grbm_idx_mutex);

	for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
		for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
	for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
		for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
		     j++) {
			gfx_v9_4_2_select_se_sh(adev, i, 0, j);
			reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
				gfx_v9_4_2_rdrsp_status_regs));
			if ((reg_value & 0xFFF) != GCEA_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK)
				gfx_v9_4_2_ea_err_status_regs));
			if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
			    REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
			    REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
				dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
						j, reg_value);
			}
			/* clear after read */
			WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
			WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
		}
	}

+7 −4
Original line number Diff line number Diff line
@@ -1286,7 +1286,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
	}
}

static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = {
static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
	{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
	{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
	{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
@@ -1303,14 +1303,17 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
		return;

	for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) {
	for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
		reg_value =
			RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs[i]));
		if ((reg_value & 0xFFF) != MMEA0_ERR_STATUS__SDP_RDRSP_DATASTATUS_MASK)
			RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]));
		if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
		    REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
		    REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
			dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
					i, reg_value);
		}
	}
}

const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
	.ras_late_init = amdgpu_mmhub_ras_late_init,