Commit 9731dd4c authored by Daniel Phillips's avatar Daniel Phillips Committed by Alex Deucher
Browse files

drm/amdkfd: Add available memory ioctl

Add a new KFD ioctl to return the largest possible memory size that
can be allocated as a buffer object using
kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same
accept/reject criteria as that function so that allocating a new
buffer object of the size returned by this new ioctl is guaranteed to
succeed, barring races with other allocating tasks.

This IOCTL will be used by libhsakmt:
https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html



Signed-off-by: default avatarDaniel Phillips <Daniel.Phillips@amd.com>
Signed-off-by: default avatarDavid Yat Sin <David.YatSin@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1a65327a
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -268,6 +268,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
					void *drm_priv);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		struct amdgpu_device *adev, uint64_t va, uint64_t size,
		void *drm_priv, struct kgd_mem **mem,
+34 −4
Original line number Diff line number Diff line
@@ -38,6 +38,12 @@
 */
#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1

/*
 * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB
 * BO chunk
 */
#define VRAM_ALLOCATION_ALIGN (1 << 21)

/* Impose limit on how much memory KFD can use */
static struct {
	uint64_t max_system_mem_limit;
@@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
 * compromise that should work in most cases without reserving too
 * much memory for page tables unnecessarily (factor 16K, >> 14).
 */
#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14)
#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)

static size_t amdgpu_amdkfd_acc_size(uint64_t size)
{
@@ -148,7 +154,13 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
		system_mem_needed = acc_size;
		ttm_mem_needed = acc_size;
		vram_needed = size;

		/*
		 * Conservatively round up the allocation requirement to 2 MB
		 * to avoid fragmentation caused by 4K allocations in the tail
		 * 2M BO chunk.
		 */
		vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size;
@@ -173,7 +185,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
	    (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
	     kfd_mem_limit.max_ttm_mem_limit) ||
	    (adev->kfd.vram_used + vram_needed >
	     adev->gmc.real_vram_size - reserved_for_pt)) {
	     adev->gmc.real_vram_size -
	     atomic64_read(&adev->vram_pin_size) -
	     reserved_for_pt)) {
		ret = -ENOMEM;
		goto release;
	}
@@ -205,7 +219,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev,
	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
		kfd_mem_limit.system_mem_used -= acc_size;
		kfd_mem_limit.ttm_mem_used -= acc_size;
		adev->kfd.vram_used -= size;
		adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
	} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
		kfd_mem_limit.ttm_mem_used -= acc_size;
@@ -1633,6 +1647,22 @@ int amdgpu_amdkfd_criu_resume(void *p)
	return ret;
}

size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev)
{
	uint64_t reserved_for_pt =
		ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
	size_t available;

	spin_lock(&kfd_mem_limit.mem_limit_lock);
	available = adev->gmc.real_vram_size
		- adev->kfd.vram_used
		- atomic64_read(&adev->vram_pin_size)
		- reserved_for_pt;
	spin_unlock(&kfd_mem_limit.mem_limit_lock);

	return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN);
}

int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		struct amdgpu_device *adev, uint64_t va, uint64_t size,
		void *drm_priv, struct kgd_mem **mem,
+34 −0
Original line number Diff line number Diff line
@@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1;
static struct class *kfd_class;
struct device *kfd_device;

static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id)
{
	struct kfd_process_device *pdd;

	mutex_lock(&p->mutex);
	pdd = kfd_process_device_data_by_id(p, gpu_id);

	if (pdd)
		return pdd;

	mutex_unlock(&p->mutex);
	return NULL;
}

static inline void kfd_unlock_pdd(struct kfd_process_device *pdd)
{
	mutex_unlock(&pdd->process->mutex);
}

int kfd_chardev_init(void)
{
	int err = 0;
@@ -958,6 +977,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
	return false;
}

static int kfd_ioctl_get_available_memory(struct file *filep,
					  struct kfd_process *p, void *data)
{
	struct kfd_ioctl_get_available_memory_args *args = data;
	struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id);

	if (!pdd)
		return -EINVAL;
	args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev);
	kfd_unlock_pdd(pdd);
	return 0;
}

static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
					struct kfd_process *p, void *data)
{
@@ -2648,6 +2680,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP,
			kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE),

	AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY,
			kfd_ioctl_get_available_memory, 0),
};

#define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
+12 −2
Original line number Diff line number Diff line
@@ -34,9 +34,10 @@
 * - 1.6 - Query clear flags in SVM get_attr API
 * - 1.7 - Checkpoint Restore (CRIU) API
 * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs
 * - 1.9 - Add available memory ioctl
 */
#define KFD_IOCTL_MAJOR_VERSION 1
#define KFD_IOCTL_MINOR_VERSION 8
#define KFD_IOCTL_MINOR_VERSION 9

struct kfd_ioctl_get_version_args {
	__u32 major_version;	/* from KFD */
@@ -100,6 +101,12 @@ struct kfd_ioctl_get_queue_wave_state_args {
	__u32 pad;
};

struct kfd_ioctl_get_available_memory_args {
	__u64 available;	/* from KFD */
	__u32 gpu_id;		/* to KFD */
	__u32 pad;
};

/* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
#define KFD_IOC_CACHE_POLICY_COHERENT 0
#define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -826,7 +833,10 @@ struct kfd_ioctl_set_xnack_mode_args {
#define AMDKFD_IOC_CRIU_OP			\
		AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args)

#define AMDKFD_IOC_AVAILABLE_MEMORY		\
		AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args)

#define AMDKFD_COMMAND_START		0x01
#define AMDKFD_COMMAND_END		0x23
#define AMDKFD_COMMAND_END		0x24

#endif