Commit 6c0ca748 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher
Browse files

drm/amdgpu: move convert_error_address out of umc_ras



RAS error address translation algorithm is common
across dGPU and A + A platform as along as the SOC
integrates the same generation of UMC IP.

UMC RAS is managed by x86 MCA on A + A platform,
umc_ras in GPU driver is not initialized at all on
A + A platform. In such case, any umc_ras callback
implemented for dGPU config shouldn't be invoked
from A + A specific callback.

The change moves convert_error_address out of dGPU
umc_ras structure and makes it share between A + A
and dGPU config.

Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarStanley Yang <Stanley.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 027bf0ce
Loading
Loading
Loading
Loading
+12 −4
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
#include "atom.h"
#include "amdgpu_reset.h"
#include "umc_v6_7.h"

#ifdef CONFIG_X86_MCE_AMD
#include <asm/mce.h>
@@ -2899,10 +2900,17 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
	/*
	 * Translate UMC channel address to Physical address
	 */
	if (adev->umc.ras &&
	    adev->umc.ras->convert_ras_error_address)
		adev->umc.ras->convert_ras_error_address(adev,
	switch (adev->ip_versions[UMC_HWIP][0]) {
	case IP_VERSION(6, 7, 0):
		umc_v6_7_convert_error_address(adev,
				&err_data, m->addr, ch_inst, umc_inst);
		break;
	default:
		dev_warn(adev->dev,
			 "UMC address to Physical address translation is not supported\n");
		kfree(err_data.err_addr);
		return NOTIFY_DONE;
	}

	if (amdgpu_bad_page_threshold != 0) {
		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+0 −3
Original line number Diff line number Diff line
@@ -51,9 +51,6 @@ struct amdgpu_umc_ras {
	struct amdgpu_ras_block_object ras_block;
	void (*err_cnt_init)(struct amdgpu_device *adev);
	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
	void (*convert_ras_error_address)(struct amdgpu_device *adev,
				struct ras_err_data *err_data, uint64_t err_addr,
				uint32_t ch_inst, uint32_t umc_inst);
	void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
				      void *ras_error_status);
	void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
+3 −4
Original line number Diff line number Diff line
@@ -187,7 +187,7 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
	}
}

static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
				    struct ras_err_data *err_data, uint64_t err_addr,
				    uint32_t ch_inst, uint32_t umc_inst)
{
@@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
	.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
	.ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
	.ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
	.convert_ras_error_address = umc_v6_7_convert_error_address,
};
+3 −1
Original line number Diff line number Diff line
@@ -71,5 +71,7 @@ extern const uint32_t
	umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
extern const uint32_t
	umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];

void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
                                    struct ras_err_data *err_data, uint64_t err_addr,
                                    uint32_t ch_inst, uint32_t umc_inst);
#endif