Commit 400013b2 authored by Tao Zhou's avatar Tao Zhou Committed by Alex Deucher
Browse files

drm/amdgpu: add umc_fill_error_record to make code more simple



Create common amdgpu_umc_fill_error_record function for all versions
of UMC and clean up related codes.

Signed-off-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 9a176960
Loading
Loading
Loading
Loading
+5 −18
Original line number Diff line number Diff line
@@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
	}

	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));

	err_rec.address = address;
	err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
	err_rec.ts = (uint64_t)ktime_get_real_seconds();
	err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;

	err_data.err_addr = &err_rec;
	err_data.err_addr_cnt = 1;
	amdgpu_umc_fill_error_record(&err_data, address,
			(address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);

	if (amdgpu_bad_page_threshold != 0) {
		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
@@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
			     umc_inst, ch_inst);

	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));

	/*
	 * Translate UMC channel address to Physical address
	 */
@@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
			ADDR_OF_256B_BLOCK(channel_index) |
			OFFSET_IN_256B_BLOCK(m->addr);

	err_rec.address = m->addr;
	err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
	err_rec.ts = (uint64_t)ktime_get_real_seconds();
	err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
	err_rec.cu = 0;
	err_rec.mem_channel = channel_index;
	err_rec.mcumc_id = umc_inst;

	memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
	err_data.err_addr = &err_rec;
	err_data.err_addr_cnt = 1;
	amdgpu_umc_fill_error_record(&err_data, m->addr,
			retired_page, channel_index, umc_inst);

	if (amdgpu_bad_page_threshold != 0) {
		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+21 −0
Original line number Diff line number Diff line
@@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
	amdgpu_ras_interrupt_dispatch(adev, &ih_data);
	return 0;
}

void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
		uint64_t err_addr,
		uint64_t retired_page,
		uint32_t channel_index,
		uint32_t umc_inst)
{
	struct eeprom_table_record *err_rec =
		&err_data->err_addr[err_data->err_addr_cnt];

	err_rec->address = err_addr;
	/* page frame address is saved */
	err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
	err_rec->ts = (uint64_t)ktime_get_real_seconds();
	err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
	err_rec->cu = 0;
	err_rec->mem_channel = channel_index;
	err_rec->mcumc_id = umc_inst;

	err_data->err_addr_cnt++;
}
+5 −0
Original line number Diff line number Diff line
@@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
		struct amdgpu_irq_src *source,
		struct amdgpu_iv_entry *entry);
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
		uint64_t err_addr,
		uint64_t retired_page,
		uint32_t channel_index,
		uint32_t umc_inst);
#endif
+3 −15
Original line number Diff line number Diff line
@@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
{
	uint32_t lsb, mc_umc_status_addr;
	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
	struct eeprom_table_record *err_rec;
	uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];

	if (adev->asic_type == CHIP_ARCTURUS) {
@@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
		return;
	}

	err_rec = &err_data->err_addr[err_data->err_addr_cnt];

	/* calculate error address if ue/ce error is detected */
	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,

		/* we only save ue error information currently, ce is skipped */
		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
				== 1) {
			err_rec->address = err_addr;
			/* page frame address is saved */
			err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
			err_rec->ts = (uint64_t)ktime_get_real_seconds();
			err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
			err_rec->cu = 0;
			err_rec->mem_channel = channel_index;
			err_rec->mcumc_id = umc_inst;

			err_data->err_addr_cnt++;
		}
				== 1)
			amdgpu_umc_fill_error_record(err_data, err_addr,
					retired_page, channel_index, umc_inst);
	}

	/* clear umc status */
+6 −30
Original line number Diff line number Diff line
@@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
					 uint32_t umc_inst)
{
	uint64_t mc_umc_status, err_addr, retired_page;
	struct eeprom_table_record *err_rec;
	uint32_t channel_index;
	uint32_t eccinfo_table_idx;
	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
	if (!err_data->err_addr)
		return;

	err_rec = &err_data->err_addr[err_data->err_addr_cnt];

	/* calculate error address if ue/ce error is detected */
	if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
	    (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
@@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,

		/* we only save ue error information currently, ce is skipped */
		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
				== 1) {
			err_rec->address = err_addr;
			/* page frame address is saved */
			err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
			err_rec->ts = (uint64_t)ktime_get_real_seconds();
			err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
			err_rec->cu = 0;
			err_rec->mem_channel = channel_index;
			err_rec->mcumc_id = umc_inst;

			err_data->err_addr_cnt++;
		}
				== 1)
			amdgpu_umc_fill_error_record(err_data, err_addr,
					retired_page, channel_index, umc_inst);
	}
}

@@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
{
	uint32_t mc_umc_status_addr;
	uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
	struct eeprom_table_record *err_rec;
	uint32_t channel_index;

	mc_umc_status_addr =
@@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
		return;
	}

	err_rec = &err_data->err_addr[err_data->err_addr_cnt];

	channel_index =
		adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];

@@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,

		/* we only save ue error information currently, ce is skipped */
		if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
				== 1) {
			err_rec->address = err_addr;
			/* page frame address is saved */
			err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
			err_rec->ts = (uint64_t)ktime_get_real_seconds();
			err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
			err_rec->cu = 0;
			err_rec->mem_channel = channel_index;
			err_rec->mcumc_id = umc_inst;

			err_data->err_addr_cnt++;
		}
				== 1)
			amdgpu_umc_fill_error_record(err_data, err_addr,
					retired_page, channel_index, umc_inst);
	}

	/* clear umc status */
Loading