Commit 5436ab94 authored by Stanley.Yang's avatar Stanley.Yang Committed by Alex Deucher
Browse files

drm/amdkfd: fix set kfd node ras properties value



The ctx->features are new RAS implementation which
is only available for Vega20 and onwards, it is not
available for vega10, vega10 should follow legacy
ECC implementation.

Changed from V1:
    wrap function to initialize kfd node properties

Changed from V2:
    remove wrap function and SDMA SRAM ECC check

Signed-off-by: default avatarStanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: default avatarGuchun Chen <guchun.chen@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1887544d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -986,6 +986,7 @@ struct amdgpu_device {

	atomic_t			throttling_logging_enabled;
	struct ratelimit_state		throttling_logging_rs;
	uint32_t			ras_features;
};

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+19 −9
Original line number Diff line number Diff line
@@ -1963,6 +1963,17 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,
	return 0;
}

static int amdgpu_ras_check_asic_type(struct amdgpu_device *adev)
{
	if (adev->asic_type != CHIP_VEGA10 &&
		adev->asic_type != CHIP_VEGA20 &&
		adev->asic_type != CHIP_ARCTURUS &&
		adev->asic_type != CHIP_SIENNA_CICHLID)
		return 1;
	else
		return 0;
}

/*
 * check hardware's ras ability which will be saved in hw_supported.
 * if hardware does not support ras, we can skip some ras initializtion and
@@ -1979,9 +1990,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
	*supported = 0;

	if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
	    (adev->asic_type != CHIP_VEGA20   &&
	     adev->asic_type != CHIP_ARCTURUS &&
	     adev->asic_type != CHIP_SIENNA_CICHLID))
		amdgpu_ras_check_asic_type(adev))
		return;

	if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
@@ -2003,6 +2012,7 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,

	*supported = amdgpu_ras_enable == 0 ?
			0 : *hw_supported & amdgpu_ras_mask;
	adev->ras_features = *supported;
}

int amdgpu_ras_init(struct amdgpu_device *adev)
@@ -2025,9 +2035,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)

	amdgpu_ras_check_supported(adev, &con->hw_supported,
			&con->supported);
	if (!con->hw_supported) {
	if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
		r = 0;
		goto err_out;
		goto release_con;
	}

	con->features = 0;
@@ -2038,25 +2048,25 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
	if (adev->nbio.funcs->init_ras_controller_interrupt) {
		r = adev->nbio.funcs->init_ras_controller_interrupt(adev);
		if (r)
			goto err_out;
			goto release_con;
	}

	if (adev->nbio.funcs->init_ras_err_event_athub_interrupt) {
		r = adev->nbio.funcs->init_ras_err_event_athub_interrupt(adev);
		if (r)
			goto err_out;
			goto release_con;
	}

	if (amdgpu_ras_fs_init(adev)) {
		r = -EINVAL;
		goto err_out;
		goto release_con;
	}

	dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
			"hardware ability[%x] ras_mask[%x]\n",
			con->hw_supported, con->supported);
	return 0;
err_out:
release_con:
	amdgpu_ras_set_context(adev, NULL);
	kfree(con);

+11 −13
Original line number Diff line number Diff line
@@ -1239,7 +1239,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
	void *crat_image = NULL;
	size_t image_size = 0;
	int proximity_domain;
	struct amdgpu_ras *ctx;
	struct amdgpu_device *adev;

	INIT_LIST_HEAD(&temp_topology_device_list);

@@ -1404,19 +1404,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
		dev->node_props.max_waves_per_simd = 10;
	}

	ctx = amdgpu_ras_get_context((struct amdgpu_device *)(dev->gpu->kgd));
	if (ctx) {
		/* kfd only concerns sram ecc on GFX/SDMA and HBM ecc on UMC */
	adev = (struct amdgpu_device *)(dev->gpu->kgd);
	/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
	dev->node_props.capability |=
			(((ctx->features & BIT(AMDGPU_RAS_BLOCK__SDMA)) != 0) ||
			 ((ctx->features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0)) ?
		((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?
		HSA_CAP_SRAM_EDCSUPPORTED : 0;
		dev->node_props.capability |= ((ctx->features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
	dev->node_props.capability |= ((adev->ras_features & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?
		HSA_CAP_MEM_EDCSUPPORTED : 0;

		dev->node_props.capability |= (ctx->features != 0) ?
	if (adev->asic_type != CHIP_VEGA10)
		dev->node_props.capability |= (adev->ras_features != 0) ?
			HSA_CAP_RASEVENTNOTIFY : 0;
	}

	kfd_debug_print_topology();