Commit 40e8a766 authored by David Yat Sin's avatar David Yat Sin Committed by Alex Deucher
Browse files

drm/amdkfd: CRIU checkpoint and restore events



Add support to existing CRIU ioctl's to save and restore events during
criu checkpoint and restore.

Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarDavid Yat Sin <david.yatsin@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3a9822d7
Loading
Loading
Loading
Loading
+12 −58
Original line number Diff line number Diff line
@@ -1008,57 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
	 * through the event_page_offset field.
	 */
	if (args->event_page_offset) {
		struct kfd_dev *kfd;
		struct kfd_process_device *pdd;
		void *mem, *kern_addr;
		uint64_t size;

		kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
		if (!kfd) {
			pr_err("Getting device by id failed in %s\n", __func__);
			return -EINVAL;
		}

		mutex_lock(&p->mutex);

		if (p->signal_page) {
			pr_err("Event page is already set\n");
			err = -EINVAL;
			goto out_unlock;
		}

		pdd = kfd_bind_process_to_device(kfd, p);
		if (IS_ERR(pdd)) {
			err = PTR_ERR(pdd);
			goto out_unlock;
		}

		mem = kfd_process_device_translate_handle(pdd,
				GET_IDR_HANDLE(args->event_page_offset));
		if (!mem) {
			pr_err("Can't find BO, offset is 0x%llx\n",
			       args->event_page_offset);
			err = -EINVAL;
			goto out_unlock;
		}

		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
						mem, &kern_addr, &size);
		if (err) {
			pr_err("Failed to map event page to kernel\n");
			goto out_unlock;
		}

		err = kfd_event_page_set(p, kern_addr, size);
		if (err) {
			pr_err("Failed to set event page\n");
			amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
			goto out_unlock;
		}

		p->signal_handle = args->event_page_offset;

		err = kfd_kmap_event_page(p, args->event_page_offset);
		mutex_unlock(&p->mutex);
		if (err)
			return err;
	}

	err = kfd_event_create(filp, p, args->event_type,
@@ -1067,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
				&args->event_page_offset,
				&args->event_slot_index);

	return err;

out_unlock:
	mutex_unlock(&p->mutex);
	pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
	return err;
}

@@ -2031,7 +1982,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
	if (ret)
		return ret;

	num_events = 0;     /* TODO: Implement Events */
	num_events = kfd_get_num_events(p);
	num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */

	*num_objects = num_queues + num_events + num_svm_ranges;
@@ -2040,7 +1991,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
		priv_size = sizeof(struct kfd_criu_process_priv_data);
		priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
		priv_size += queues_priv_data_size;
		/* TODO: Add Events priv size */
		priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
		/* TODO: Add SVM ranges priv size */
		*objs_priv_size = priv_size;
	}
@@ -2102,7 +2053,10 @@ static int criu_checkpoint(struct file *filep,
		if (ret)
			goto exit_unlock;

		/* TODO: Dump Events */
		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
						 &priv_offset);
		if (ret)
			goto exit_unlock;

		/* TODO: Dump SVM-Ranges */
	}
@@ -2410,8 +2364,8 @@ static int criu_restore_objects(struct file *filep,
				goto exit;
			break;
		case KFD_CRIU_OBJECT_TYPE_EVENT:
			/* TODO: Implement Events */
			*priv_offset += sizeof(struct kfd_criu_event_priv_data);
			ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
						     priv_offset, max_priv_data_size);
			if (ret)
				goto exit;
			break;
+244 −28
Original line number Diff line number Diff line
@@ -55,7 +55,6 @@ struct kfd_signal_page {
	bool need_to_free_pages;
};


static uint64_t *page_slots(struct kfd_signal_page *page)
{
	return page->kernel_address;
@@ -92,7 +91,8 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
}

static int allocate_event_notification_slot(struct kfd_process *p,
					    struct kfd_event *ev)
					    struct kfd_event *ev,
					    const int *restore_id)
{
	int id;

@@ -104,6 +104,10 @@ static int allocate_event_notification_slot(struct kfd_process *p,
		p->signal_mapped_size = 256*8;
	}

	if (restore_id) {
		id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
				GFP_KERNEL);
	} else {
		/*
		 * Compatibility with old user mode: Only use signal slots
		 * user mode has mapped, may be less than
@@ -112,6 +116,7 @@ static int allocate_event_notification_slot(struct kfd_process *p,
		 */
		id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
				GFP_KERNEL);
	}
	if (id < 0)
		return id;

@@ -178,9 +183,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
	return ev;
}

static int create_signal_event(struct file *devkfd,
				struct kfd_process *p,
				struct kfd_event *ev)
static int create_signal_event(struct file *devkfd, struct kfd_process *p,
				struct kfd_event *ev, const int *restore_id)
{
	int ret;

@@ -193,7 +197,7 @@ static int create_signal_event(struct file *devkfd,
		return -ENOSPC;
	}

	ret = allocate_event_notification_slot(p, ev);
	ret = allocate_event_notification_slot(p, ev, restore_id);
	if (ret) {
		pr_warn("Signal event wasn't created because out of kernel memory\n");
		return ret;
@@ -209,14 +213,20 @@ static int create_signal_event(struct file *devkfd,
	return 0;
}

static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
{
	int id;

	if (restore_id)
		id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
			GFP_KERNEL);
	else
		/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
		 * intentional integer overflow to -1 without a compiler
		 * warning. idr_alloc treats a negative value as "maximum
		 * signed integer".
		 */
	int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
		id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
				(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
				GFP_KERNEL);

@@ -295,8 +305,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
	return ev->type == KFD_EVENT_TYPE_SIGNAL;
}

int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
		       uint64_t size)
static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
		       uint64_t size, uint64_t user_handle)
{
	struct kfd_signal_page *page;

@@ -315,10 +325,56 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,

	p->signal_page = page;
	p->signal_mapped_size = size;

	p->signal_handle = user_handle;
	return 0;
}

int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
{
	struct kfd_dev *kfd;
	struct kfd_process_device *pdd;
	void *mem, *kern_addr;
	uint64_t size;
	int err = 0;

	if (p->signal_page) {
		pr_err("Event page is already set\n");
		return -EINVAL;
	}

	kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
	if (!kfd) {
		pr_err("Getting device by id failed in %s\n", __func__);
		return -EINVAL;
	}

	pdd = kfd_bind_process_to_device(kfd, p);
	if (IS_ERR(pdd))
		return PTR_ERR(pdd);

	mem = kfd_process_device_translate_handle(pdd,
			GET_IDR_HANDLE(event_page_offset));
	if (!mem) {
		pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
		return -EINVAL;
	}

	err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
					mem, &kern_addr, &size);
	if (err) {
		pr_err("Failed to map event page to kernel\n");
		return err;
	}

	err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
	if (err) {
		pr_err("Failed to set event page\n");
		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
		return err;
	}
	return err;
}

int kfd_event_create(struct file *devkfd, struct kfd_process *p,
		     uint32_t event_type, bool auto_reset, uint32_t node_id,
		     uint32_t *event_id, uint32_t *event_trigger_data,
@@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
	switch (event_type) {
	case KFD_EVENT_TYPE_SIGNAL:
	case KFD_EVENT_TYPE_DEBUG:
		ret = create_signal_event(devkfd, p, ev);
		ret = create_signal_event(devkfd, p, ev, NULL);
		if (!ret) {
			*event_page_offset = KFD_MMAP_TYPE_EVENTS;
			*event_slot_index = ev->event_id;
		}
		break;
	default:
		ret = create_other_event(p, ev);
		ret = create_other_event(p, ev, NULL);
		break;
	}

@@ -366,6 +422,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
	return ret;
}

int kfd_criu_restore_event(struct file *devkfd,
			   struct kfd_process *p,
			   uint8_t __user *user_priv_ptr,
			   uint64_t *priv_data_offset,
			   uint64_t max_priv_data_size)
{
	struct kfd_criu_event_priv_data *ev_priv;
	struct kfd_event *ev = NULL;
	int ret = 0;

	ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
	if (!ev_priv)
		return -ENOMEM;

	ev = kzalloc(sizeof(*ev), GFP_KERNEL);
	if (!ev) {
		ret = -ENOMEM;
		goto exit;
	}

	if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
		ret = -EINVAL;
		goto exit;
	}

	ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
	if (ret) {
		ret = -EFAULT;
		goto exit;
	}
	*priv_data_offset += sizeof(*ev_priv);

	if (ev_priv->user_handle) {
		ret = kfd_kmap_event_page(p, ev_priv->user_handle);
		if (ret)
			goto exit;
	}

	ev->type = ev_priv->type;
	ev->auto_reset = ev_priv->auto_reset;
	ev->signaled = ev_priv->signaled;

	init_waitqueue_head(&ev->wq);

	mutex_lock(&p->event_mutex);
	switch (ev->type) {
	case KFD_EVENT_TYPE_SIGNAL:
	case KFD_EVENT_TYPE_DEBUG:
		ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
		break;
	case KFD_EVENT_TYPE_MEMORY:
		memcpy(&ev->memory_exception_data,
			&ev_priv->memory_exception_data,
			sizeof(struct kfd_hsa_memory_exception_data));

		ret = create_other_event(p, ev, &ev_priv->event_id);
		break;
	case KFD_EVENT_TYPE_HW_EXCEPTION:
		memcpy(&ev->hw_exception_data,
			&ev_priv->hw_exception_data,
			sizeof(struct kfd_hsa_hw_exception_data));

		ret = create_other_event(p, ev, &ev_priv->event_id);
		break;
	}

exit:
	if (ret)
		kfree(ev);

	kfree(ev_priv);

	mutex_unlock(&p->event_mutex);

	return ret;
}

int kfd_criu_checkpoint_events(struct kfd_process *p,
			 uint8_t __user *user_priv_data,
			 uint64_t *priv_data_offset)
{
	struct kfd_criu_event_priv_data *ev_privs;
	int i = 0;
	int ret =  0;
	struct kfd_event *ev;
	uint32_t ev_id;

	uint32_t num_events = kfd_get_num_events(p);

	if (!num_events)
		return 0;

	ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
	if (!ev_privs)
		return -ENOMEM;


	idr_for_each_entry(&p->event_idr, ev, ev_id) {
		struct kfd_criu_event_priv_data *ev_priv;

		/*
		 * Currently, all events have same size of private_data, but the current ioctl's
		 * and CRIU plugin supports private_data of variable sizes
		 */
		ev_priv = &ev_privs[i];

		ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;

		/* We store the user_handle with the first event */
		if (i == 0 && p->signal_page)
			ev_priv->user_handle = p->signal_handle;

		ev_priv->event_id = ev->event_id;
		ev_priv->auto_reset = ev->auto_reset;
		ev_priv->type = ev->type;
		ev_priv->signaled = ev->signaled;

		if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
			memcpy(&ev_priv->memory_exception_data,
				&ev->memory_exception_data,
				sizeof(struct kfd_hsa_memory_exception_data));
		else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
			memcpy(&ev_priv->hw_exception_data,
				&ev->hw_exception_data,
				sizeof(struct kfd_hsa_hw_exception_data));

		pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
			  i,
			  ev_priv->event_id,
			  ev_priv->auto_reset,
			  ev_priv->type,
			  ev_priv->signaled);
		i++;
	}

	ret = copy_to_user(user_priv_data + *priv_data_offset,
			   ev_privs, num_events * sizeof(*ev_privs));
	if (ret) {
		pr_err("Failed to copy events priv to user\n");
		ret = -EFAULT;
	}

	*priv_data_offset += num_events * sizeof(*ev_privs);

	kvfree(ev_privs);
	return ret;
}

int kfd_get_num_events(struct kfd_process *p)
{
	struct kfd_event *ev;
	uint32_t id;
	u32 num_events = 0;

	idr_for_each_entry(&p->event_idr, ev, id)
		num_events++;

	return num_events;
}

/* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{
+24 −3
Original line number Diff line number Diff line
@@ -1099,7 +1099,16 @@ struct kfd_criu_queue_priv_data {

struct kfd_criu_event_priv_data {
	uint32_t object_type;
	uint32_t reserved;
	uint64_t user_handle;
	uint32_t event_id;
	uint32_t auto_reset;
	uint32_t type;
	uint32_t signaled;

	union {
		struct kfd_hsa_memory_exception_data memory_exception_data;
		struct kfd_hsa_hw_exception_data hw_exception_data;
	};
};

int kfd_process_get_queue_info(struct kfd_process *p,
@@ -1114,6 +1123,16 @@ int kfd_criu_restore_queue(struct kfd_process *p,
			   uint8_t __user *user_priv_data,
			   uint64_t *priv_data_offset,
			   uint64_t max_priv_data_size);

int kfd_criu_checkpoint_events(struct kfd_process *p,
			 uint8_t __user *user_priv_data,
			 uint64_t *priv_data_offset);

int kfd_criu_restore_event(struct file *devkfd,
			   struct kfd_process *p,
			   uint8_t __user *user_priv_data,
			   uint64_t *priv_data_offset,
			   uint64_t max_priv_data_size);
/* CRIU - End */

/* Queue Context Management */
@@ -1277,12 +1296,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
		       uint64_t size);
int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);

int kfd_event_create(struct file *devkfd, struct kfd_process *p,
		     uint32_t event_type, bool auto_reset, uint32_t node_id,
		     uint32_t *event_id, uint32_t *event_trigger_data,
		     uint64_t *event_page_offset, uint32_t *event_slot_index);

int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);

void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,