Commit 3a9822d7 authored by David Yat Sin's avatar David Yat Sin Committed by Alex Deucher
Browse files

drm/amdkfd: CRIU checkpoint and restore queue control stack



Checkpoint contents of queue control stacks on CRIU dump and restore them
during CRIU restore.

Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarDavid Yat Sin <david.yatsin@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 42c6c482
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -311,7 +311,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
			p->pasid,
			dev->id);

	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL,
	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL,
			&doorbell_offset_in_process);
	if (err != 0)
		goto err_create_queue;
+1 −1
Original line number Diff line number Diff line
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
	properties.type = KFD_QUEUE_TYPE_DIQ;

	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
				&properties, &qid, NULL, NULL, NULL);
				&properties, &qid, NULL, NULL, NULL, NULL);

	if (status) {
		pr_err("Failed to create DIQ\n");
+15 −7
Original line number Diff line number Diff line
@@ -323,7 +323,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
				struct queue *q,
				struct qcm_process_device *qpd,
				const struct kfd_criu_queue_priv_data *qd,
				const void *restore_mqd)
				const void *restore_mqd, const void *restore_ctl_stack)
{
	struct mqd_manager *mqd_mgr;
	int retval;
@@ -385,7 +385,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,

	if (qd)
		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
				     &q->properties, restore_mqd);
				     &q->properties, restore_mqd, restore_ctl_stack,
				     qd->ctl_stack_size);
	else
		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
					&q->gart_mqd_addr, &q->properties);
@@ -1342,7 +1343,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
			struct qcm_process_device *qpd,
			const struct kfd_criu_queue_priv_data *qd,
			const void *restore_mqd)
			const void *restore_mqd, const void *restore_ctl_stack)
{
	int retval;
	struct mqd_manager *mqd_mgr;
@@ -1391,7 +1392,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,

	if (qd)
		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
				     &q->properties, restore_mqd);
				     &q->properties, restore_mqd, restore_ctl_stack,
				     qd->ctl_stack_size);
	else
		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
					&q->gart_mqd_addr, &q->properties);
@@ -1799,7 +1801,8 @@ static int get_wave_state(struct device_queue_manager *dqm,

static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
			const struct queue *q,
			u32 *mqd_size)
			u32 *mqd_size,
			u32 *ctl_stack_size)
{
	struct mqd_manager *mqd_mgr;
	enum KFD_MQD_TYPE mqd_type =
@@ -1808,13 +1811,18 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
	dqm_lock(dqm);
	mqd_mgr = dqm->mqd_mgrs[mqd_type];
	*mqd_size = mqd_mgr->mqd_size;
	*ctl_stack_size = 0;

	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);

	dqm_unlock(dqm);
}

static int checkpoint_mqd(struct device_queue_manager *dqm,
			  const struct queue *q,
			  void *mqd)
			  void *mqd,
			  void *ctl_stack)
{
	struct mqd_manager *mqd_mgr;
	int r = 0;
@@ -1834,7 +1842,7 @@ static int checkpoint_mqd(struct device_queue_manager *dqm,
		goto dqm_unlock;
	}

	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd);
	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);

dqm_unlock:
	dqm_unlock(dqm);
+6 −3
Original line number Diff line number Diff line
@@ -93,7 +93,8 @@ struct device_queue_manager_ops {
				struct queue *q,
				struct qcm_process_device *qpd,
				const struct kfd_criu_queue_priv_data *qd,
				const void *restore_mqd);
				const void *restore_mqd,
				const void *restore_ctl_stack);

	int	(*destroy_queue)(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
@@ -145,11 +146,13 @@ struct device_queue_manager_ops {
	int (*reset_queues)(struct device_queue_manager *dqm,
					uint16_t pasid);
	void	(*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
				  const struct queue *q, u32 *mqd_size);
				  const struct queue *q, u32 *mqd_size,
				  u32 *ctl_stack_size);

	int	(*checkpoint_mqd)(struct device_queue_manager *dqm,
				  const struct queue *q,
				  void *mqd);
				  void *mqd,
				  void *ctl_stack);
};

struct device_queue_manager_asic_ops {
+9 −2
Original line number Diff line number Diff line
@@ -100,12 +100,19 @@ struct mqd_manager {
				  u32 *ctl_stack_used_size,
				  u32 *save_area_used_size);

	void	(*checkpoint_mqd)(struct mqd_manager *mm, void *mqd, void *mqd_dst);
	void	(*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size);

	void	(*checkpoint_mqd)(struct mqd_manager *mm,
				  void *mqd,
				  void *mqd_dst,
				  void *ctl_stack_dst);

	void	(*restore_mqd)(struct mqd_manager *mm, void **mqd,
				struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
				struct queue_properties *p,
				const void *mqd_src);
				const void *mqd_src,
				const void *ctl_stack_src,
				const u32 ctl_stack_size);

#if defined(CONFIG_DEBUG_FS)
	int	(*debugfs_show_mqd)(struct seq_file *m, void *data);
Loading