Commit e8aa1623 authored by Lijo Lazar's avatar Lijo Lazar Committed by Wen Zhiwei
Browse files

drm/amdkfd: Use device based logging for errors

stable inclusion
from stable-v6.6.69
commit 4312b60fa7aca47e70557de48a2c3b644465a51f
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/IBNEPJ

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=4312b60fa7aca47e70557de48a2c3b644465a51f



--------------------------------

[ Upstream commit 62ec7d38b769ccf33b1080e69c2ae5b7344d116d ]

Convert some pr_* to some dev_* APIs to identify the device.

Signed-off-by: default avatarLijo Lazar <lijo.lazar@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Stable-dep-of: 438b39ac74e2 ("drm/amdkfd: pause autosuspend when creating pdd")
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
Signed-off-by: default avatarWen Zhiwei <wenzhiwei@kylinos.cn>
parent a2d9f387
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -373,7 +373,8 @@ int kfd_init_apertures(struct kfd_process *process)

		pdd = kfd_create_process_device_data(dev, process);
		if (!pdd) {
			pr_err("Failed to create process device data\n");
			dev_err(dev->adev->dev,
				"Failed to create process device data\n");
			return -ENOMEM;
		}
		/*
+11 −10
Original line number Diff line number Diff line
@@ -68,7 +68,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
		kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
		break;
	default:
		pr_err("Invalid queue type %d\n", type);
		dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
		return false;
	}

@@ -78,13 +78,14 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
	prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);

	if (!prop.doorbell_ptr) {
		pr_err("Failed to initialize doorbell");
		dev_err(dev->adev->dev, "Failed to initialize doorbell");
		goto err_get_kernel_doorbell;
	}

	retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
	if (retval != 0) {
		pr_err("Failed to init pq queues size %d\n", queue_size);
		dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
			queue_size);
		goto err_pq_allocate_vidmem;
	}

@@ -332,7 +333,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
	if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
		return kq;

	pr_err("Failed to init kernel queue\n");
	dev_err(dev->adev->dev, "Failed to init kernel queue\n");

	kfree(kq);
	return NULL;
@@ -351,26 +352,26 @@ static __attribute__((unused)) void test_kq(struct kfd_node *dev)
	uint32_t *buffer, i;
	int retval;

	pr_err("Starting kernel queue test\n");
	dev_err(dev->adev->dev, "Starting kernel queue test\n");

	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
	if (unlikely(!kq)) {
		pr_err("  Failed to initialize HIQ\n");
		pr_err("Kernel queue test failed\n");
		dev_err(dev->adev->dev, "  Failed to initialize HIQ\n");
		dev_err(dev->adev->dev, "Kernel queue test failed\n");
		return;
	}

	retval = kq_acquire_packet_buffer(kq, 5, &buffer);
	if (unlikely(retval != 0)) {
		pr_err("  Failed to acquire packet buffer\n");
		pr_err("Kernel queue test failed\n");
		dev_err(dev->adev->dev, "  Failed to acquire packet buffer\n");
		dev_err(dev->adev->dev, "Kernel queue test failed\n");
		return;
	}
	for (i = 0; i < 5; i++)
		buffer[i] = kq->nop_packet;
	kq_submit_packet(kq);

	pr_err("Ending kernel queue test\n");
	dev_err(dev->adev->dev, "Ending kernel queue test\n");
}

+5 −3
Original line number Diff line number Diff line
@@ -118,12 +118,14 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
	 * attention grabbing.
	 */
	if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
		pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
		dev_err(mm->dev->adev->dev,
			"Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
			gfx_info->max_shader_engines);
		return;
	}
	if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
		pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
		dev_err(mm->dev->adev->dev,
			"Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
			gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
		return;
	}
+41 −22
Original line number Diff line number Diff line
@@ -45,7 +45,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
	unsigned int map_queue_size;
	unsigned int max_proc_per_quantum = 1;
	struct kfd_node *dev = pm->dqm->dev;
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;

	process_count = pm->dqm->processes_count;
	queue_count = pm->dqm->active_queue_count;
@@ -59,14 +60,14 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
	 */
	*over_subscription = false;

	if (dev->max_proc_per_quantum > 1)
		max_proc_per_quantum = dev->max_proc_per_quantum;
	if (node->max_proc_per_quantum > 1)
		max_proc_per_quantum = node->max_proc_per_quantum;

	if ((process_count > max_proc_per_quantum) ||
	    compute_queue_count > get_cp_queues_num(pm->dqm) ||
	    gws_queue_count > 1) {
		*over_subscription = true;
		pr_debug("Over subscribed runlist\n");
		dev_dbg(dev, "Over subscribed runlist\n");
	}

	map_queue_size = pm->pmf->map_queues_size;
@@ -81,7 +82,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
	if (*over_subscription)
		*rlib_size += pm->pmf->runlist_size;

	pr_debug("runlist ib size %d\n", *rlib_size);
	dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
}

static int pm_allocate_runlist_ib(struct packet_manager *pm,
@@ -90,6 +91,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
				unsigned int *rl_buffer_size,
				bool *is_over_subscription)
{
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	int retval;

	if (WARN_ON(pm->allocated))
@@ -99,11 +102,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,

	mutex_lock(&pm->lock);

	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
					&pm->ib_buffer_obj);
	retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);

	if (retval) {
		pr_err("Failed to allocate runlist IB\n");
		dev_err(dev, "Failed to allocate runlist IB\n");
		goto out;
	}

@@ -125,6 +127,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
{
	unsigned int alloc_size_bytes;
	unsigned int *rl_buffer, rl_wptr, i;
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	int retval, processes_mapped;
	struct device_process_node *cur;
	struct qcm_process_device *qpd;
@@ -142,7 +146,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
	*rl_size_bytes = alloc_size_bytes;
	pm->ib_size_bytes = alloc_size_bytes;

	pr_debug("Building runlist ib process count: %d queues count %d\n",
	dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
		pm->dqm->processes_count, pm->dqm->active_queue_count);

	/* build the run list ib packet */
@@ -150,7 +154,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
		qpd = cur->qpd;
		/* build map process packet */
		if (processes_mapped >= pm->dqm->processes_count) {
			pr_debug("Not enough space left in runlist IB\n");
			dev_dbg(dev, "Not enough space left in runlist IB\n");
			pm_release_ib(pm);
			return -ENOMEM;
		}
@@ -167,7 +171,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
			if (!kq->queue->properties.is_active)
				continue;

			pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
			dev_dbg(dev,
				"static_queue, mapping kernel q %d, is debug status %d\n",
				kq->queue->queue, qpd->is_debug);

			retval = pm->pmf->map_queues(pm,
@@ -186,7 +191,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
			if (!q->properties.is_active)
				continue;

			pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
			dev_dbg(dev,
				"static_queue, mapping user queue %d, is debug status %d\n",
				q->queue, qpd->is_debug);

			retval = pm->pmf->map_queues(pm,
@@ -203,11 +209,13 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
		}
	}

	pr_debug("Finished map process and queues to runlist\n");
	dev_dbg(dev, "Finished map process and queues to runlist\n");

	if (is_over_subscription) {
		if (!pm->is_over_subscription)
			pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
			dev_warn(
				dev,
				"Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
		retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
					*rl_gpu_addr,
					alloc_size_bytes / sizeof(uint32_t),
@@ -272,6 +280,8 @@ void pm_uninit(struct packet_manager *pm, bool hanging)
int pm_send_set_resources(struct packet_manager *pm,
				struct scheduling_resources *res)
{
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	uint32_t *buffer, size;
	int retval = 0;

@@ -281,7 +291,7 @@ int pm_send_set_resources(struct packet_manager *pm,
					size / sizeof(uint32_t),
					(unsigned int **)&buffer);
	if (!buffer) {
		pr_err("Failed to allocate buffer on kernel queue\n");
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
		retval = -ENOMEM;
		goto out;
	}
@@ -343,6 +353,8 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
			uint64_t fence_value)
{
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	uint32_t *buffer, size;
	int retval = 0;

@@ -354,7 +366,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
	kq_acquire_packet_buffer(pm->priv_queue,
			size / sizeof(uint32_t), (unsigned int **)&buffer);
	if (!buffer) {
		pr_err("Failed to allocate buffer on kernel queue\n");
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
		retval = -ENOMEM;
		goto out;
	}
@@ -372,6 +384,8 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,

int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
{
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	int retval = 0;
	uint32_t *buffer, size;

@@ -385,7 +399,8 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
			(unsigned int **)&buffer);

		if (!buffer) {
			pr_err("Failed to allocate buffer on kernel queue\n");
			dev_err(dev,
				"Failed to allocate buffer on kernel queue\n");
			retval = -ENOMEM;
			goto out;
		}
@@ -406,6 +421,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
			enum kfd_unmap_queues_filter filter,
			uint32_t filter_param, bool reset)
{
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	uint32_t *buffer, size;
	int retval = 0;

@@ -414,7 +431,7 @@ int pm_send_unmap_queue(struct packet_manager *pm,
	kq_acquire_packet_buffer(pm->priv_queue,
			size / sizeof(uint32_t), (unsigned int **)&buffer);
	if (!buffer) {
		pr_err("Failed to allocate buffer on kernel queue\n");
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
		retval = -ENOMEM;
		goto out;
	}
@@ -463,6 +480,8 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)

int pm_debugfs_hang_hws(struct packet_manager *pm)
{
	struct kfd_node *node = pm->dqm->dev;
	struct device *dev = node->adev->dev;
	uint32_t *buffer, size;
	int r = 0;

@@ -474,16 +493,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
	kq_acquire_packet_buffer(pm->priv_queue,
			size / sizeof(uint32_t), (unsigned int **)&buffer);
	if (!buffer) {
		pr_err("Failed to allocate buffer on kernel queue\n");
		dev_err(dev, "Failed to allocate buffer on kernel queue\n");
		r = -ENOMEM;
		goto out;
	}
	memset(buffer, 0x55, size);
	kq_submit_packet(pm->priv_queue);

	pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
		buffer[0], buffer[1], buffer[2], buffer[3],
		buffer[4], buffer[5], buffer[6]);
	dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
		 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
		 buffer[5], buffer[6]);
out:
	mutex_unlock(&pm->lock);
	return r;
+15 −9
Original line number Diff line number Diff line
@@ -1308,7 +1308,8 @@ int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
		if (IS_ERR_VALUE(qpd->tba_addr)) {
			int err = qpd->tba_addr;

			pr_err("Failure to set tba address. error %d.\n", err);
			dev_err(dev->adev->dev,
				"Failure to set tba address. error %d.\n", err);
			qpd->tba_addr = 0;
			qpd->cwsr_kaddr = NULL;
			return err;
@@ -1603,7 +1604,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
						&pdd->proc_ctx_cpu_ptr,
						false);
		if (retval) {
			pr_err("failed to allocate process context bo\n");
			dev_err(dev->adev->dev,
				"failed to allocate process context bo\n");
			goto err_free_pdd;
		}
		memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
@@ -1667,7 +1669,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
						     &p->kgd_process_info,
						     &p->ef);
	if (ret) {
		pr_err("Failed to create process VM object\n");
		dev_err(dev->adev->dev, "Failed to create process VM object\n");
		return ret;
	}
	pdd->drm_priv = drm_file->private_data;
@@ -1714,7 +1716,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,

	pdd = kfd_get_process_device_data(dev, p);
	if (!pdd) {
		pr_err("Process device data doesn't exist\n");
		dev_err(dev->adev->dev, "Process device data doesn't exist\n");
		return ERR_PTR(-ENOMEM);
	}

@@ -1824,6 +1826,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)

	for (i = 0; i < p->n_pdds; i++) {
		struct kfd_process_device *pdd = p->pdds[i];
		struct device *dev = pdd->dev->adev->dev;

		kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
					     trigger);
@@ -1835,7 +1838,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
		 * them been add back since they actually not be saved right now.
		 */
		if (r && r != -EIO) {
			pr_err("Failed to evict process queues\n");
			dev_err(dev, "Failed to evict process queues\n");
			goto fail;
		}
		n_evicted++;
@@ -1857,7 +1860,8 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)

		if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
							      &pdd->qpd))
			pr_err("Failed to restore queues\n");
			dev_err(pdd->dev->adev->dev,
				"Failed to restore queues\n");

		n_evicted--;
	}
@@ -1873,13 +1877,14 @@ int kfd_process_restore_queues(struct kfd_process *p)

	for (i = 0; i < p->n_pdds; i++) {
		struct kfd_process_device *pdd = p->pdds[i];
		struct device *dev = pdd->dev->adev->dev;

		kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);

		r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
							      &pdd->qpd);
		if (r) {
			pr_err("Failed to restore process queues\n");
			dev_err(dev, "Failed to restore process queues\n");
			if (!ret)
				ret = r;
		}
@@ -2039,7 +2044,7 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
	struct qcm_process_device *qpd;

	if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
		pr_err("Incorrect CWSR mapping size.\n");
		dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n");
		return -EINVAL;
	}

@@ -2051,7 +2056,8 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
	qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
					get_order(KFD_CWSR_TBA_TMA_SIZE));
	if (!qpd->cwsr_kaddr) {
		pr_err("Error allocating per process CWSR buffer.\n");
		dev_err(dev->adev->dev,
			"Error allocating per process CWSR buffer.\n");
		return -ENOMEM;
	}