Commit d95e8e97 authored by Dennis Li's avatar Dennis Li Committed by Alex Deucher
Browse files

drm/amdgpu: refine create and release logic of hive info



Change to dynamically create and release hive info object,
which help driver support more hives in the future.

v2:
Change to save hive object pointer in adev, to avoid locking
xgmi_mutex every time when calling amdgpu_get_xgmi_hive.

v3:
1. Change type of hive object pointer in adev from void* to
amdgpu_hive_info*.
2. remove unnecessary variable initialization.

Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarDennis Li <Dennis.Li@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent aac89168
Loading
Loading
Loading
Loading
+2 −1
Original line number Original line Diff line number Diff line
@@ -251,6 +251,7 @@ struct amdgpu_fpriv;
struct amdgpu_bo_va_mapping;
struct amdgpu_bo_va_mapping;
struct amdgpu_atif;
struct amdgpu_atif;
struct kfd_vm_fault_info;
struct kfd_vm_fault_info;
struct amdgpu_hive_info;


enum amdgpu_cp_irq {
enum amdgpu_cp_irq {
	AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
	AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
@@ -727,7 +728,7 @@ struct amdgpu_device {
#ifdef CONFIG_DRM_AMD_ACP
#ifdef CONFIG_DRM_AMD_ACP
	struct amdgpu_acp		acp;
	struct amdgpu_acp		acp;
#endif
#endif

	struct amdgpu_hive_info *hive;
	/* ASIC */
	/* ASIC */
	enum amd_asic_type		asic_type;
	enum amd_asic_type		asic_type;
	uint32_t			family;
	uint32_t			family;
+5 −2
Original line number Original line Diff line number Diff line
@@ -2857,7 +2857,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
{
{
	struct amdgpu_device *adev =
	struct amdgpu_device *adev =
		container_of(__work, struct amdgpu_device, xgmi_reset_work);
		container_of(__work, struct amdgpu_device, xgmi_reset_work);
	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);


	/* It's a bug to not have a hive within this function */
	/* It's a bug to not have a hive within this function */
	if (WARN_ON(!hive))
	if (WARN_ON(!hive))
@@ -2895,6 +2895,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
	if (adev->asic_reset_res)
	if (adev->asic_reset_res)
		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
			 adev->asic_reset_res, adev->ddev->unique);
			 adev->asic_reset_res, adev->ddev->unique);
	amdgpu_put_xgmi_hive(hive);
}
}


static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
@@ -4339,11 +4340,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
	 * We always reset all schedulers for device and all devices for XGMI
	 * We always reset all schedulers for device and all devices for XGMI
	 * hive so that should take care of them too.
	 * hive so that should take care of them too.
	 */
	 */
	hive = amdgpu_get_xgmi_hive(adev, false);
	hive = amdgpu_get_xgmi_hive(adev);
	if (hive) {
	if (hive) {
		if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
		if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) {
			DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
			DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
				job ? job->base.id : -1, hive->hive_id);
				job ? job->base.id : -1, hive->hive_id);
			amdgpu_put_xgmi_hive(hive);
			return 0;
			return 0;
		}
		}
		mutex_lock(&hive->hive_lock);
		mutex_lock(&hive->hive_lock);
@@ -4509,6 +4511,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
	if (hive) {
	if (hive) {
		atomic_set(&hive->in_reset, 0);
		atomic_set(&hive->in_reset, 0);
		mutex_unlock(&hive->hive_lock);
		mutex_unlock(&hive->hive_lock);
		amdgpu_put_xgmi_hive(hive);
	}
	}


	if (r)
	if (r)
+4 −1
Original line number Original line Diff line number Diff line
@@ -1554,9 +1554,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
	struct amdgpu_device *remote_adev = NULL;
	struct amdgpu_device *remote_adev = NULL;
	struct amdgpu_device *adev = ras->adev;
	struct amdgpu_device *adev = ras->adev;
	struct list_head device_list, *device_list_handle =  NULL;
	struct list_head device_list, *device_list_handle =  NULL;
	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, false);


	if (!ras->disable_ras_err_cnt_harvest) {
	if (!ras->disable_ras_err_cnt_harvest) {
		struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);

		/* Build list of devices to query RAS related errors */
		/* Build list of devices to query RAS related errors */
		if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
		if  (hive && adev->gmc.xgmi.num_physical_nodes > 1) {
			device_list_handle = &hive->device_list;
			device_list_handle = &hive->device_list;
@@ -1569,6 +1570,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
		list_for_each_entry(remote_adev,
		list_for_each_entry(remote_adev,
				device_list_handle, gmc.xgmi.head)
				device_list_handle, gmc.xgmi.head)
			amdgpu_ras_log_on_err_counter(remote_adev);
			amdgpu_ras_log_on_err_counter(remote_adev);

		amdgpu_put_xgmi_hive(hive);
	}
	}


	if (amdgpu_device_should_recover_gpu(ras->adev))
	if (amdgpu_device_should_recover_gpu(ras->adev))
+113 −95
Original line number Original line Diff line number Diff line
@@ -35,11 +35,9 @@


static DEFINE_MUTEX(xgmi_mutex);
static DEFINE_MUTEX(xgmi_mutex);


#define AMDGPU_MAX_XGMI_HIVE			8
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE		4
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE		4


static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
static LIST_HEAD(xgmi_hive_list);
static unsigned hive_count = 0;


static const int xgmi_pcs_err_status_reg_vg20[] = {
static const int xgmi_pcs_err_status_reg_vg20[] = {
	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS,
	smnXGMI0_PCS_GOPX16_PCS_ERROR_STATUS,
@@ -171,58 +169,46 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
 *
 *
 */
 */


static struct attribute amdgpu_xgmi_hive_id = {
	.name = "xgmi_hive_id",
	.mode = S_IRUGO
};

static struct attribute *amdgpu_xgmi_hive_attrs[] = {
	&amdgpu_xgmi_hive_id,
	NULL
};


static ssize_t amdgpu_xgmi_show_hive_id(struct device *dev,
static ssize_t amdgpu_xgmi_show_attrs(struct kobject *kobj,
		struct device_attribute *attr, char *buf)
	struct attribute *attr, char *buf)
{
{
	struct amdgpu_hive_info *hive =
	struct amdgpu_hive_info *hive = container_of(
			container_of(attr, struct amdgpu_hive_info, dev_attr);
		kobj, struct amdgpu_hive_info, kobj);


	if (attr == &amdgpu_xgmi_hive_id)
		return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id);
		return snprintf(buf, PAGE_SIZE, "%llu\n", hive->hive_id);

	return 0;
}
}


static int amdgpu_xgmi_sysfs_create(struct amdgpu_device *adev,
static void amdgpu_xgmi_hive_release(struct kobject *kobj)
				    struct amdgpu_hive_info *hive)
{
{
	int ret = 0;
	struct amdgpu_hive_info *hive = container_of(

		kobj, struct amdgpu_hive_info, kobj);
	if (WARN_ON(hive->kobj))
		return -EINVAL;


	hive->kobj = kobject_create_and_add("xgmi_hive_info", &adev->dev->kobj);
	mutex_destroy(&hive->hive_lock);
	if (!hive->kobj) {
	kfree(hive);
		dev_err(adev->dev, "XGMI: Failed to allocate sysfs entry!\n");
		return -EINVAL;
}
}


	hive->dev_attr = (struct device_attribute) {
static const struct sysfs_ops amdgpu_xgmi_hive_ops = {
		.attr = {
	.show = amdgpu_xgmi_show_attrs,
			.name = "xgmi_hive_id",
			.mode = S_IRUGO,

		},
		.show = amdgpu_xgmi_show_hive_id,
};
};


	ret = sysfs_create_file(hive->kobj, &hive->dev_attr.attr);
struct kobj_type amdgpu_xgmi_hive_type = {
	if (ret) {
	.release = amdgpu_xgmi_hive_release,
		dev_err(adev->dev, "XGMI: Failed to create device file xgmi_hive_id\n");
	.sysfs_ops = &amdgpu_xgmi_hive_ops,
		kobject_del(hive->kobj);
	.default_attrs = amdgpu_xgmi_hive_attrs,
		kobject_put(hive->kobj);
};
		hive->kobj = NULL;
	}

	return ret;
}

static void amdgpu_xgmi_sysfs_destroy(struct amdgpu_device *adev,
				    struct amdgpu_hive_info *hive)
{
	sysfs_remove_file(hive->kobj, &hive->dev_attr.attr);
	kobject_del(hive->kobj);
	kobject_put(hive->kobj);
	hive->kobj = NULL;
}


static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
				     struct device_attribute *attr,
				     struct device_attribute *attr,
@@ -287,8 +273,8 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,




	/* Create sysfs link to hive info folder on the first device */
	/* Create sysfs link to hive info folder on the first device */
	if (adev != hive->adev) {
	if (hive->kobj.parent != (&adev->dev->kobj)) {
		ret = sysfs_create_link(&adev->dev->kobj, hive->kobj,
		ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj,
					"xgmi_hive_info");
					"xgmi_hive_info");
		if (ret) {
		if (ret) {
			dev_err(adev->dev, "XGMI: Failed to create link to hive info");
			dev_err(adev->dev, "XGMI: Failed to create link to hive info");
@@ -296,9 +282,9 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
		}
		}
	}
	}


	sprintf(node, "node%d", hive->number_devices);
	sprintf(node, "node%d", atomic_read(&hive->number_devices));
	/* Create sysfs link form the hive folder to yourself */
	/* Create sysfs link form the hive folder to yourself */
	ret = sysfs_create_link(hive->kobj, &adev->dev->kobj, node);
	ret = sysfs_create_link(&hive->kobj, &adev->dev->kobj, node);
	if (ret) {
	if (ret) {
		dev_err(adev->dev, "XGMI: Failed to create link from hive info");
		dev_err(adev->dev, "XGMI: Failed to create link from hive info");
		goto remove_link;
		goto remove_link;
@@ -326,78 +312,96 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
	device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
	device_remove_file(adev->dev, &dev_attr_xgmi_error);
	device_remove_file(adev->dev, &dev_attr_xgmi_error);


	if (adev != hive->adev)
	if (hive->kobj.parent != (&adev->dev->kobj))
		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
		sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");


	sprintf(node, "node%d", hive->number_devices);
	sprintf(node, "node%d", atomic_read(&hive->number_devices));
	sysfs_remove_link(hive->kobj, node);
	sysfs_remove_link(&hive->kobj, node);


}
}






struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock)
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
{
{
	int i;
	struct amdgpu_hive_info *hive = NULL, *tmp = NULL;
	struct amdgpu_hive_info *tmp;
	int ret;


	if (!adev->gmc.xgmi.hive_id)
	if (!adev->gmc.xgmi.hive_id)
		return NULL;
		return NULL;


	if (adev->hive) {
		kobject_get(&adev->hive->kobj);
		return adev->hive;
	}

	mutex_lock(&xgmi_mutex);
	mutex_lock(&xgmi_mutex);


	for (i = 0 ; i < hive_count; ++i) {
	if (!list_empty(&xgmi_hive_list)) {
		tmp = &xgmi_hives[i];
		list_for_each_entry_safe(hive, tmp, &xgmi_hive_list, node)  {
		if (tmp->hive_id == adev->gmc.xgmi.hive_id) {
			if (hive->hive_id == adev->gmc.xgmi.hive_id)
			if (lock)
				goto pro_end;
				mutex_lock(&tmp->hive_lock);
			mutex_unlock(&xgmi_mutex);
			return tmp;
		}
		}
		}
	if (i >= AMDGPU_MAX_XGMI_HIVE) {
		mutex_unlock(&xgmi_mutex);
		return NULL;
	}
	}


	/* initialize new hive if not exist */
	hive = kzalloc(sizeof(*hive), GFP_KERNEL);
	tmp = &xgmi_hives[hive_count++];
	if (!hive) {

		dev_err(adev->dev, "XGMI: allocation failed\n");
	if (amdgpu_xgmi_sysfs_create(adev, tmp)) {
		hive = NULL;
		mutex_unlock(&xgmi_mutex);
		goto pro_end;
		return NULL;
	}
	}


	tmp->adev = adev;
	/* initialize new hive if not exist */
	tmp->hive_id = adev->gmc.xgmi.hive_id;
	ret = kobject_init_and_add(&hive->kobj,
	INIT_LIST_HEAD(&tmp->device_list);
			&amdgpu_xgmi_hive_type,
	mutex_init(&tmp->hive_lock);
			&adev->dev->kobj,
	atomic_set(&tmp->in_reset, 0);
			"%s", "xgmi_hive_info");
	task_barrier_init(&tmp->tb);
	if (ret) {

		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
	if (lock)
		kfree(hive);
		mutex_lock(&tmp->hive_lock);
		hive = NULL;
	tmp->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
		goto pro_end;
	tmp->hi_req_gpu = NULL;
	}

	hive->hive_id = adev->gmc.xgmi.hive_id;
	INIT_LIST_HEAD(&hive->device_list);
	INIT_LIST_HEAD(&hive->node);
	mutex_init(&hive->hive_lock);
	atomic_set(&hive->in_reset, 0);
	atomic_set(&hive->number_devices, 0);
	task_barrier_init(&hive->tb);
	hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN;
	hive->hi_req_gpu = NULL;
	/*
	/*
	 * hive pstate on boot is high in vega20 so we have to go to low
	 * hive pstate on boot is high in vega20 so we have to go to low
	 * pstate on after boot.
	 * pstate on after boot.
	 */
	 */
	tmp->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
	hive->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE;
	list_add_tail(&hive->node, &xgmi_hive_list);

pro_end:
	if (hive)
		kobject_get(&hive->kobj);
	mutex_unlock(&xgmi_mutex);
	mutex_unlock(&xgmi_mutex);
	return hive;
}


	return tmp;
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive)
{
	if (hive)
		kobject_put(&hive->kobj);
}
}


int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate)
{
{
	int ret = 0;
	int ret = 0;
	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0);
	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
	struct amdgpu_device *request_adev = hive->hi_req_gpu ?
	struct amdgpu_device *request_adev = hive->hi_req_gpu ?
						hive->hi_req_gpu : adev;
						hive->hi_req_gpu : adev;
	bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
	bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20;
	bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;
	bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN;


	amdgpu_put_xgmi_hive(hive);
	/* fw bug so temporarily disable pstate switching */
	/* fw bug so temporarily disable pstate switching */
	return 0;
	return 0;


@@ -449,7 +453,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev


	/* Each psp need to set the latest topology */
	/* Each psp need to set the latest topology */
	ret = psp_xgmi_set_topology_info(&adev->psp,
	ret = psp_xgmi_set_topology_info(&adev->psp,
					 hive->number_devices,
					 atomic_read(&hive->number_devices),
					 &adev->psp.xgmi_context.top_info);
					 &adev->psp.xgmi_context.top_info);
	if (ret)
	if (ret)
		dev_err(adev->dev,
		dev_err(adev->dev,
@@ -511,7 +515,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
		adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
		adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16;
	}
	}


	hive = amdgpu_get_xgmi_hive(adev, 1);
	hive = amdgpu_get_xgmi_hive(adev);
	if (!hive) {
	if (!hive) {
		ret = -EINVAL;
		ret = -EINVAL;
		dev_err(adev->dev,
		dev_err(adev->dev,
@@ -519,6 +523,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
			adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
			adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
		goto exit;
		goto exit;
	}
	}
	mutex_lock(&hive->hive_lock);


	top_info = &adev->psp.xgmi_context.top_info;
	top_info = &adev->psp.xgmi_context.top_info;


@@ -526,7 +531,7 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
	list_for_each_entry(entry, &hive->device_list, head)
	list_for_each_entry(entry, &hive->device_list, head)
		top_info->nodes[count++].node_id = entry->node_id;
		top_info->nodes[count++].node_id = entry->node_id;
	top_info->num_nodes = count;
	top_info->num_nodes = count;
	hive->number_devices = count;
	atomic_set(&hive->number_devices, count);


	task_barrier_add_task(&hive->tb);
	task_barrier_add_task(&hive->tb);


@@ -565,35 +570,48 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
exit_unlock:
exit_unlock:
	mutex_unlock(&hive->hive_lock);
	mutex_unlock(&hive->hive_lock);
exit:
exit:
	if (!ret)
	if (!ret) {
		adev->hive = hive;
		dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
		dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
			 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
			 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
	else
	} else {
		amdgpu_put_xgmi_hive(hive);
		dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",
		dev_err(adev->dev, "XGMI: Failed to add node %d, hive 0x%llx ret: %d\n",
			adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
			adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id,
			ret);
			ret);
	}


	return ret;
	return ret;
}
}


int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
{
{
	struct amdgpu_hive_info *hive;
	struct amdgpu_hive_info *hive = adev->hive;


	if (!adev->gmc.xgmi.supported)
	if (!adev->gmc.xgmi.supported)
		return -EINVAL;
		return -EINVAL;


	hive = amdgpu_get_xgmi_hive(adev, 1);
	if (!hive)
	if (!hive)
		return -EINVAL;
		return -EINVAL;


	mutex_lock(&hive->hive_lock);
	task_barrier_rem_task(&hive->tb);
	task_barrier_rem_task(&hive->tb);
	amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
	amdgpu_xgmi_sysfs_rem_dev_info(adev, hive);
	if (hive->hi_req_gpu == adev)
		hive->hi_req_gpu = NULL;
	list_del(&adev->gmc.xgmi.head);
	mutex_unlock(&hive->hive_lock);
	mutex_unlock(&hive->hive_lock);


	if(!(--hive->number_devices)){
	amdgpu_put_xgmi_hive(hive);
		amdgpu_xgmi_sysfs_destroy(adev, hive);
	adev->hive = NULL;
		mutex_destroy(&hive->hive_lock);

	if (atomic_dec_return(&hive->number_devices) == 0) {
		/* Remove the hive from global hive list */
		mutex_lock(&xgmi_mutex);
		list_del(&hive->node);
		mutex_unlock(&xgmi_mutex);

		amdgpu_put_xgmi_hive(hive);
	}
	}


	return psp_xgmi_terminate(&adev->psp);
	return psp_xgmi_terminate(&adev->psp);
+7 −7
Original line number Original line Diff line number Diff line
@@ -27,13 +27,12 @@




struct amdgpu_hive_info {
struct amdgpu_hive_info {
	struct kobject kobj;
	uint64_t hive_id;
	uint64_t hive_id;
	struct list_head device_list;
	struct list_head device_list;
	int number_devices;
	struct list_head node;
	atomic_t number_devices;
	struct mutex hive_lock;
	struct mutex hive_lock;
	struct kobject *kobj;
	struct device_attribute dev_attr;
	struct amdgpu_device *adev;
	atomic_t in_reset;
	atomic_t in_reset;
	int hi_req_count;
	int hi_req_count;
	struct amdgpu_device *hi_req_gpu;
	struct amdgpu_device *hi_req_gpu;
@@ -51,7 +50,8 @@ struct amdgpu_pcs_ras_field {
	uint32_t pcs_err_shift;
	uint32_t pcs_err_shift;
};
};


struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);