Commit 2f6247da authored by Stanley.Yang's avatar Stanley.Yang Committed by Alex Deucher
Browse files

drm/amdgpu/pm: support mca_ceumc_addr in ecctable



SMU add a new variable mca_ceumc_addr to record
umc correctable error address in EccInfo table,
driver side add EccInfo_V2_t to support this feature

Signed-off-by: default avatarStanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent faf26f2b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -328,6 +328,7 @@ struct ecc_info_per_ch {
	uint16_t ce_count_hi_chip;
	uint64_t mca_umc_status;
	uint64_t mca_umc_addr;
	uint64_t mca_ceumc_addr;
};

struct umc_ecc_info {
+15 −1
Original line number Diff line number Diff line
@@ -519,7 +519,21 @@ typedef struct {
} EccInfo_t;

typedef struct {
	uint64_t mca_umc_status;
	uint64_t mca_umc_addr;
	uint64_t mca_ceumc_addr;

	uint16_t ce_count_lo_chip;
	uint16_t ce_count_hi_chip;

	uint32_t eccPadding;
} EccInfo_V2_t;

typedef struct {
	union {
		EccInfo_t  EccInfo[ALDEBARAN_UMC_CHANNEL_NUM];
		EccInfo_V2_t EccInfo_V2[ALDEBARAN_UMC_CHANNEL_NUM];
	};
} EccInfoTable_t;

// These defines are used with the following messages:
+41 −12
Original line number Diff line number Diff line
@@ -82,6 +82,12 @@
 */
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00

/*
 * SMU support mca_ceumc_addr in ECCTABLE since version 68.55.0,
 * use this to check mca_ceumc_addr record whether support
 */
#define SUPPORT_ECCTABLE_V2_SMU_VERSION 0x00443700

/*
 * SMU support BAD CHENNEL info MSG since version 68.51.00,
 * use this to check ECCTALE feature whether support
@@ -1803,7 +1809,8 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
	return sizeof(struct gpu_metrics_v1_3);
}

static int aldebaran_check_ecc_table_support(struct smu_context *smu)
static int aldebaran_check_ecc_table_support(struct smu_context *smu,
		int *ecctable_version)
{
	uint32_t if_version = 0xff, smu_version = 0xff;
	int ret = 0;
@@ -1816,6 +1823,11 @@ static int aldebaran_check_ecc_table_support(struct smu_context *smu)

	if (smu_version < SUPPORT_ECCTABLE_SMU_VERSION)
		ret = -EOPNOTSUPP;
	else if (smu_version >= SUPPORT_ECCTABLE_SMU_VERSION &&
			smu_version < SUPPORT_ECCTABLE_V2_SMU_VERSION)
		*ecctable_version = 1;
	else
		*ecctable_version = 2;

	return ret;
}
@@ -1827,9 +1839,10 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
	EccInfoTable_t *ecc_table = NULL;
	struct ecc_info_per_ch *ecc_info_per_channel = NULL;
	int i, ret = 0;
	int table_version = 0;
	struct umc_ecc_info *eccinfo = (struct umc_ecc_info *)table;

	ret = aldebaran_check_ecc_table_support(smu);
	ret = aldebaran_check_ecc_table_support(smu, &table_version);
	if (ret)
		return ret;

@@ -1845,6 +1858,7 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,

	ecc_table = (EccInfoTable_t *)smu_table->ecc_table;

	if (table_version == 1) {
		for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
			ecc_info_per_channel = &(eccinfo->ecc[i]);
			ecc_info_per_channel->ce_count_lo_chip =
@@ -1856,6 +1870,21 @@ static ssize_t aldebaran_get_ecc_info(struct smu_context *smu,
			ecc_info_per_channel->mca_umc_addr =
				ecc_table->EccInfo[i].mca_umc_addr;
		}
	} else if (table_version == 2) {
		for (i = 0; i < ALDEBARAN_UMC_CHANNEL_NUM; i++) {
			ecc_info_per_channel = &(eccinfo->ecc[i]);
			ecc_info_per_channel->ce_count_lo_chip =
				ecc_table->EccInfo_V2[i].ce_count_lo_chip;
			ecc_info_per_channel->ce_count_hi_chip =
				ecc_table->EccInfo_V2[i].ce_count_hi_chip;
			ecc_info_per_channel->mca_umc_status =
				ecc_table->EccInfo_V2[i].mca_umc_status;
			ecc_info_per_channel->mca_umc_addr =
				ecc_table->EccInfo_V2[i].mca_umc_addr;
			ecc_info_per_channel->mca_ceumc_addr =
				ecc_table->EccInfo_V2[i].mca_ceumc_addr;
		}
	}

	return ret;
}