Commit 36988070 authored by Rajneesh Bhardwaj's avatar Rajneesh Bhardwaj Committed by Alex Deucher
Browse files

drm/amdkfd: CRIU Introduce Checkpoint-Restore APIs



Checkpoint-Restore in userspace (CRIU) is a powerful tool that can
snapshot a running process and later restore it on same or a remote
machine but expects the processes that have a device file (e.g. GPU)
associated with them, provide necessary driver support to assist CRIU
and its extensible plugin interface. Thus, In order to support the
Checkpoint-Restore of any ROCm process, the AMD Radeon Open Compute
Kernel driver, needs to provide a set of new APIs that provide
necessary VRAM metadata and its contents to a userspace component
(CRIU plugin) that can store it in form of image files.

This introduces some new ioctls which will be used to checkpoint-Restore
any KFD bound user process. KFD only allows ioctl calls from the same
process that opened the KFD file descriptor. Since these ioctls are
expected to be called from a KFD criu plugin which has elevated ptrace
attached privileges and CAP_CHECKPOINT_RESTORE capabilities attached with
the file descriptors so modify KFD to allow such calls.

(API redesigned by David Yat Sin)
Suggested-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarDavid Yat Sin <david.yatsin@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent afa37315
Loading
Loading
Loading
Loading
+97 −1
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@
#include <linux/time.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
#include <asm/processor.h>
#include "kfd_priv.h"
@@ -1859,6 +1860,75 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
}
#endif

static int criu_checkpoint(struct file *filep,
			   struct kfd_process *p,
			   struct kfd_ioctl_criu_args *args)
{
	return 0;
}

static int criu_restore(struct file *filep,
			struct kfd_process *p,
			struct kfd_ioctl_criu_args *args)
{
	return 0;
}

static int criu_unpause(struct file *filep,
			struct kfd_process *p,
			struct kfd_ioctl_criu_args *args)
{
	return 0;
}

static int criu_resume(struct file *filep,
			struct kfd_process *p,
			struct kfd_ioctl_criu_args *args)
{
	return 0;
}

static int criu_process_info(struct file *filep,
				struct kfd_process *p,
				struct kfd_ioctl_criu_args *args)
{
	return 0;
}

static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data)
{
	struct kfd_ioctl_criu_args *args = data;
	int ret;

	dev_dbg(kfd_device, "CRIU operation: %d\n", args->op);
	switch (args->op) {
	case KFD_CRIU_OP_PROCESS_INFO:
		ret = criu_process_info(filep, p, args);
		break;
	case KFD_CRIU_OP_CHECKPOINT:
		ret = criu_checkpoint(filep, p, args);
		break;
	case KFD_CRIU_OP_UNPAUSE:
		ret = criu_unpause(filep, p, args);
		break;
	case KFD_CRIU_OP_RESTORE:
		ret = criu_restore(filep, p, args);
		break;
	case KFD_CRIU_OP_RESUME:
		ret = criu_resume(filep, p, args);
		break;
	default:
		dev_dbg(kfd_device, "Unsupported CRIU operation:%d\n", args->op);
		ret = -EINVAL;
		break;
	}

	if (ret)
		dev_dbg(kfd_device, "CRIU operation:%d err:%d\n", args->op, ret);

	return ret;
}

#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
			    .cmd_drv = 0, .name = #ioctl}
@@ -1962,6 +2032,10 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {

	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
			kfd_ioctl_set_xnack_mode, 0),

	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
			kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),

};

#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
@@ -1976,6 +2050,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
	char *kdata = NULL;
	unsigned int usize, asize;
	int retcode = -EINVAL;
	bool ptrace_attached = false;

	if (nr >= AMDKFD_CORE_IOCTL_COUNT)
		goto err_i1;
@@ -2001,7 +2076,15 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
	 * processes need to create their own KFD device context.
	 */
	process = filep->private_data;
	if (process->lead_thread != current->group_leader) {

	rcu_read_lock();
	if ((ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE) &&
	    ptrace_parent(process->lead_thread) == current)
		ptrace_attached = true;
	rcu_read_unlock();

	if (process->lead_thread != current->group_leader
	    && !ptrace_attached) {
		dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
		retcode = -EBADF;
		goto err_i1;
@@ -2016,6 +2099,19 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
		goto err_i1;
	}

	/*
	 * Versions of docker shipped in Ubuntu 18.xx and 20.xx do not support
	 * CAP_CHECKPOINT_RESTORE, so we also allow access if CAP_SYS_ADMIN as CAP_SYS_ADMIN is a
	 * more priviledged access.
	 */
	if (unlikely(ioctl->flags & KFD_IOC_FLAG_CHECKPOINT_RESTORE)) {
		if (!capable(CAP_CHECKPOINT_RESTORE) &&
						!capable(CAP_SYS_ADMIN)) {
			retcode = -EACCES;
			goto err_i1;
		}
	}

	if (cmd & (IOC_IN | IOC_OUT)) {
		if (asize <= sizeof(stack_kdata)) {
			kdata = stack_kdata;
+64 −1
Original line number Diff line number Diff line
@@ -121,7 +121,26 @@
 */
#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512


/**
 * enum kfd_ioctl_flags - KFD ioctl flags
 * Various flags that can be set in &amdkfd_ioctl_desc.flags to control how
 * userspace can use a given ioctl.
 */
enum kfd_ioctl_flags {
	/*
	 * @KFD_IOC_FLAG_CHECKPOINT_RESTORE:
	 * Certain KFD ioctls such as AMDKFD_IOC_CRIU_OP can potentially
	 * perform privileged operations and load arbitrary data into MQDs and
	 * eventually HQD registers when the queue is mapped by HWS. In order to
	 * prevent this we should perform additional security checks.
	 *
	 * This is equivalent to callers with the CHECKPOINT_RESTORE capability.
	 *
	 * Note: Since earlier versions of docker do not support CHECKPOINT_RESTORE,
	 * we also allow ioctls with SYS_ADMIN capability.
	 */
	KFD_IOC_FLAG_CHECKPOINT_RESTORE = BIT(0),
};
/*
 * Kernel module parameter to specify maximum number of supported queues per
 * device
@@ -1006,6 +1025,50 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
				  uint64_t tba_addr,
				  uint64_t tma_addr);

/* CRIU */
/*
 * Need to increment KFD_CRIU_PRIV_VERSION each time a change is made to any of the CRIU private
 * structures:
 * kfd_criu_process_priv_data
 * kfd_criu_device_priv_data
 * kfd_criu_bo_priv_data
 * kfd_criu_queue_priv_data
 * kfd_criu_event_priv_data
 * kfd_criu_svm_range_priv_data
 */

#define KFD_CRIU_PRIV_VERSION 1

struct kfd_criu_process_priv_data {
	uint32_t version;
};

struct kfd_criu_device_priv_data {
	/* For future use */
	uint64_t reserved;
};

struct kfd_criu_bo_priv_data {
	uint64_t reserved;
};

struct kfd_criu_svm_range_priv_data {
	uint32_t object_type;
	uint32_t reserved;
};

struct kfd_criu_queue_priv_data {
	uint32_t object_type;
	uint32_t reserved;
};

struct kfd_criu_event_priv_data {
	uint32_t object_type;
	uint32_t reserved;
};

/* CRIU - End */

/* Queue Context Management */
int init_queue(struct queue **q, const struct queue_properties *properties);
void uninit_queue(struct queue *q);
+80 −1
Original line number Diff line number Diff line
@@ -468,6 +468,82 @@ struct kfd_ioctl_smi_events_args {
	__u32 anon_fd;	/* from KFD */
};

/**************************************************************************************************
 * CRIU IOCTLs (Checkpoint Restore In Userspace)
 *
 * When checkpointing a process, the userspace application will perform:
 * 1. PROCESS_INFO op to determine current process information. This pauses execution and evicts
 *    all the queues.
 * 2. CHECKPOINT op to checkpoint process contents (BOs, queues, events, svm-ranges)
 * 3. UNPAUSE op to un-evict all the queues
 *
 * When restoring a process, the CRIU userspace application will perform:
 *
 * 1. RESTORE op to restore process contents
 * 2. RESUME op to start the process
 *
 * Note: Queues are forced into an evicted state after a successful PROCESS_INFO. User
 * application needs to perform an UNPAUSE operation after calling PROCESS_INFO.
 */

enum kfd_criu_op {
	KFD_CRIU_OP_PROCESS_INFO,
	KFD_CRIU_OP_CHECKPOINT,
	KFD_CRIU_OP_UNPAUSE,
	KFD_CRIU_OP_RESTORE,
	KFD_CRIU_OP_RESUME,
};

/**
 * kfd_ioctl_criu_args - Arguments perform CRIU operation
 * @devices:		[in/out] User pointer to memory location for devices information.
 * 			This is an array of type kfd_criu_device_bucket.
 * @bos:		[in/out] User pointer to memory location for BOs information
 * 			This is an array of type kfd_criu_bo_bucket.
 * @priv_data:		[in/out] User pointer to memory location for private data
 * @priv_data_size:	[in/out] Size of priv_data in bytes
 * @num_devices:	[in/out] Number of GPUs used by process. Size of @devices array.
 * @num_bos		[in/out] Number of BOs used by process. Size of @bos array.
 * @num_objects:	[in/out] Number of objects used by process. Objects are opaque to
 *				 user application.
 * @pid:		[in/out] PID of the process being checkpointed
 * @op			[in] Type of operation (kfd_criu_op)
 *
 * Return: 0 on success, -errno on failure
 */
struct kfd_ioctl_criu_args {
	__u64 devices;		/* Used during ops: CHECKPOINT, RESTORE */
	__u64 bos;		/* Used during ops: CHECKPOINT, RESTORE */
	__u64 priv_data;	/* Used during ops: CHECKPOINT, RESTORE */
	__u64 priv_data_size;	/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 num_devices;	/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 num_bos;		/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 num_objects;	/* Used during ops: PROCESS_INFO, RESTORE */
	__u32 pid;		/* Used during ops: PROCESS_INFO, RESUME */
	__u32 op;
};

struct kfd_criu_device_bucket {
	__u32 user_gpu_id;
	__u32 actual_gpu_id;
	__u32 drm_fd;
	__u32 pad;
};

struct kfd_criu_bo_bucket {
	__u64 addr;
	__u64 size;
	__u64 offset;
	__u64 restored_offset;    /* During restore, updated offset for BO */
	__u32 gpu_id;             /* This is the user_gpu_id */
	__u32 alloc_flags;
	__u32 dmabuf_fd;
	__u32 pad;
};

/* CRIU IOCTLs - END */
/**************************************************************************************************/

/* Register offset inside the remapped mmio page
 */
enum kfd_mmio_remap {
@@ -742,7 +818,10 @@ struct kfd_ioctl_set_xnack_mode_args {
#define AMDKFD_IOC_SET_XNACK_MODE		\
		AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)

#define AMDKFD_IOC_CRIU_OP			\
		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)

#define AMDKFD_COMMAND_START		0x01
#define AMDKFD_COMMAND_END		0x22
#define AMDKFD_COMMAND_END		0x23

#endif