drm/amdkfd: Introduce kfd_node struct (v5) (8dc1db31) · Commits · EulixOS / Software / Kernel

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+1 −0

Original line number	Diff line number	Diff line
		@@ -35,6 +35,7 @@
		#include "amdgpu_dma_buf.h"
		#include <uapi/linux/kfd_ioctl.h>
		#include "amdgpu_xgmi.h"
		#include "kfd_priv.h"
		#include "kfd_smi_events.h"
		#include <drm/ttm/ttm_tt.h>

drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -26,7 +26,7 @@
		#include "amdgpu_amdkfd.h"
		#include "kfd_smi_events.h"

		static bool cik_event_interrupt_isr(struct kfd_dev *dev,
		static bool cik_event_interrupt_isr(struct kfd_node *dev,
		const uint32_t *ih_ring_entry,
		uint32_t *patched_ihre,
		bool *patched_flag)
		@@ -85,7 +85,7 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,
		!amdgpu_no_queue_eviction_on_vm_fault);
		}

		static void cik_event_interrupt_wq(struct kfd_dev *dev,
		static void cik_event_interrupt_wq(struct kfd_node *dev,
		const uint32_t *ih_ring_entry)
		{
		const struct cik_ih_ring_entry *ihre =

drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

+22 −21

Original line number	Diff line number	Diff line
		@@ -293,7 +293,7 @@ static int kfd_ioctl_create_queue(struct file filep, struct kfd_process p,
		void *data)
		{
		struct kfd_ioctl_create_queue_args *args = data;
		struct kfd_dev *dev;
		struct kfd_node *dev;
		int err = 0;
		unsigned int queue_id;
		struct kfd_process_device *pdd;
		@@ -328,7 +328,7 @@ static int kfd_ioctl_create_queue(struct file filep, struct kfd_process p,
		}

		if (!pdd->doorbell_index &&
		kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
		kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) {
		err = -ENOMEM;
		goto err_alloc_doorbells;
		}
		@@ -336,7 +336,7 @@ static int kfd_ioctl_create_queue(struct file filep, struct kfd_process p,
		/* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work
		* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
		*/
		if (dev->shared_resources.enable_mes &&
		if (dev->kfd->shared_resources.enable_mes &&
		((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK)
		>> AMDGPU_MES_API_VERSION_SHIFT) >= 2) {
		struct amdgpu_bo_va_mapping *wptr_mapping;
		@@ -887,7 +887,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
		{
		struct kfd_ioctl_set_scratch_backing_va_args *args = data;
		struct kfd_process_device *pdd;
		struct kfd_dev *dev;
		struct kfd_node *dev;
		long err;

		mutex_lock(&p->mutex);
		@@ -1006,18 +1006,18 @@ static int kfd_ioctl_acquire_vm(struct file filep, struct kfd_process p,
		return ret;
		}

		bool kfd_dev_is_large_bar(struct kfd_dev *dev)
		bool kfd_dev_is_large_bar(struct kfd_node *dev)
		{
		if (debug_largebar) {
		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
		return true;
		}

		if (dev->use_iommu_v2)
		if (dev->kfd->use_iommu_v2)
		return false;

		if (dev->local_mem_info.local_mem_size_private == 0 &&
		dev->local_mem_info.local_mem_size_public > 0)
		if (dev->kfd->local_mem_info.local_mem_size_private == 0 &&
		dev->kfd->local_mem_info.local_mem_size_public > 0)
		return true;
		return false;
		}
		@@ -1041,7 +1041,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
		struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
		struct kfd_process_device *pdd;
		void *mem;
		struct kfd_dev *dev;
		struct kfd_node *dev;
		int idr_handle;
		long err;
		uint64_t offset = args->mmap_offset;
		@@ -1105,7 +1105,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
		}

		if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
		if (args->size != kfd_doorbell_process_slice(dev)) {
		if (args->size != kfd_doorbell_process_slice(dev->kfd)) {
		err = -EINVAL;
		goto err_unlock;
		}
		@@ -1231,7 +1231,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
		struct kfd_ioctl_map_memory_to_gpu_args *args = data;
		struct kfd_process_device pdd, peer_pdd;
		void *mem;
		struct kfd_dev *dev;
		struct kfd_node *dev;
		long err = 0;
		int i;
		uint32_t *devices_arr = NULL;
		@@ -1405,7 +1405,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
		args->n_success = i+1;
		}

		flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
		flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
		if (flush_tlb) {
		err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
		(struct kgd_mem *) mem, true);
		@@ -1445,7 +1445,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
		int retval;
		struct kfd_ioctl_alloc_queue_gws_args *args = data;
		struct queue *q;
		struct kfd_dev *dev;
		struct kfd_node *dev;

		mutex_lock(&p->mutex);
		q = pqm_get_user_queue(&p->pqm, args->queue_id);
		@@ -1482,7 +1482,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,
		struct kfd_process p, void data)
		{
		struct kfd_ioctl_get_dmabuf_info_args *args = data;
		struct kfd_dev *dev = NULL;
		struct kfd_node *dev = NULL;
		struct amdgpu_device *dmabuf_adev;
		void *metadata_buffer = NULL;
		uint32_t flags;
		@@ -1596,7 +1596,7 @@ static int kfd_ioctl_export_dmabuf(struct file *filep,
		struct kfd_ioctl_export_dmabuf_args *args = data;
		struct kfd_process_device *pdd;
		struct dma_buf *dmabuf;
		struct kfd_dev *dev;
		struct kfd_node *dev;
		void *mem;
		int ret = 0;

		@@ -2178,7 +2178,7 @@ static int criu_restore_devices(struct kfd_process *p,
		}

		for (i = 0; i < args->num_devices; i++) {
		struct kfd_dev *dev;
		struct kfd_node *dev;
		struct kfd_process_device *pdd;
		struct file *drm_file;

		@@ -2240,7 +2240,7 @@ static int criu_restore_devices(struct kfd_process *p,
		}

		if (!pdd->doorbell_index &&
		kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) {
		kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) {
		ret = -ENOMEM;
		goto exit;
		}
		@@ -2268,7 +2268,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
		u64 offset;

		if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
		if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev))
		if (bo_bucket->size !=
		kfd_doorbell_process_slice(pdd->dev->kfd))
		return -EINVAL;

		offset = kfd_get_process_doorbells(pdd);
		@@ -2350,7 +2351,7 @@ static int criu_restore_bo(struct kfd_process *p,

		/* now map these BOs to GPU/s */
		for (j = 0; j < p->n_pdds; j++) {
		struct kfd_dev *peer;
		struct kfd_node *peer;
		struct kfd_process_device *peer_pdd;

		if (!bo_priv->mapped_gpuids[j])
		@@ -2947,7 +2948,7 @@ static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
		return retcode;
		}

		static int kfd_mmio_mmap(struct kfd_dev dev, struct kfd_process process,
		static int kfd_mmio_mmap(struct kfd_node dev, struct kfd_process process,
		struct vm_area_struct *vma)
		{
		phys_addr_t address;
		@@ -2981,7 +2982,7 @@ static int kfd_mmio_mmap(struct kfd_dev dev, struct kfd_process process,
		static int kfd_mmap(struct file filp, struct vm_area_struct vma)
		{
		struct kfd_process *process;
		struct kfd_dev *dev = NULL;
		struct kfd_node *dev = NULL;
		unsigned long mmap_offset;
		unsigned int gpu_id;

drivers/gpu/drm/amd/amdkfd/kfd_crat.c

+14 −14

Original line number	Diff line number	Diff line
		@@ -1405,7 +1405,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
		return i;
		}

		int kfd_get_gpu_cache_info(struct kfd_dev kdev, struct kfd_gpu_cache_info *pcache_info)
		int kfd_get_gpu_cache_info(struct kfd_node kdev, struct kfd_gpu_cache_info *pcache_info)
		{
		int num_of_cache_types = 0;

		@@ -1524,7 +1524,7 @@ int kfd_get_gpu_cache_info(struct kfd_dev kdev, struct kfd_gpu_cache_info *pca
		case IP_VERSION(11, 0, 3):
		case IP_VERSION(11, 0, 4):
		num_of_cache_types =
		kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
		kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info);
		break;
		default:
		*pcache_info = dummy_cache_info;
		@@ -1858,7 +1858,7 @@ static int kfd_create_vcrat_image_cpu(void pcrat_image, size_t size)
		}

		static int kfd_fill_gpu_memory_affinity(int *avail_size,
		struct kfd_dev *kdev, uint8_t type, uint64_t size,
		struct kfd_node *kdev, uint8_t type, uint64_t size,
		struct crat_subtype_memory *sub_type_hdr,
		uint32_t proximity_domain,
		const struct kfd_local_mem_info *local_mem_info)
		@@ -1887,7 +1887,7 @@ static int kfd_fill_gpu_memory_affinity(int *avail_size,
		}

		#ifdef CONFIG_ACPI_NUMA
		static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
		static void kfd_find_numa_node_in_srat(struct kfd_node *kdev)
		{
		struct acpi_table_header *table_header = NULL;
		struct acpi_subtable_header *sub_header = NULL;
		@@ -1982,7 +1982,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
		* Return 0 if successful else return -ve value
		*/
		static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
		struct kfd_dev *kdev,
		struct kfd_node *kdev,
		struct crat_subtype_iolink *sub_type_hdr,
		uint32_t proximity_domain)
		{
		@@ -2044,8 +2044,8 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
		}

		static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
		struct kfd_dev *kdev,
		struct kfd_dev *peer_kdev,
		struct kfd_node *kdev,
		struct kfd_node *peer_kdev,
		struct crat_subtype_iolink *sub_type_hdr,
		uint32_t proximity_domain_from,
		uint32_t proximity_domain_to)
		@@ -2081,7 +2081,7 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,
		* [OUT] actual size of data filled in crat_image
		*/
		static int kfd_create_vcrat_image_gpu(void *pcrat_image,
		size_t size, struct kfd_dev kdev,
		size_t size, struct kfd_node kdev,
		uint32_t proximity_domain)
		{
		struct crat_header crat_table = (struct crat_header )pcrat_image;
		@@ -2153,7 +2153,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
		/* Check if this node supports IOMMU. During parsing this flag will
		* translate to HSA_CAP_ATS_PRESENT
		*/
		if (!kfd_iommu_check_device(kdev))
		if (!kfd_iommu_check_device(kdev->kfd))
		cu->hsa_capability \|= CRAT_CU_FLAGS_IOMMU_PRESENT;

		crat_table->length += sub_type_hdr->length;
		@@ -2164,7 +2164,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
		* report the total FB size (public+private) as a single
		* private heap.
		*/
		local_mem_info = kdev->local_mem_info;
		local_mem_info = kdev->kfd->local_mem_info;
		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
		sub_type_hdr->length);

		@@ -2216,12 +2216,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
		* (from other GPU to this GPU) will be added
		* in kfd_parse_subtype_iolink.
		*/
		if (kdev->hive_id) {
		if (kdev->kfd->hive_id) {
		for (nid = 0; nid < proximity_domain; ++nid) {
		peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid);
		if (!peer_dev->gpu)
		continue;
		if (peer_dev->gpu->hive_id != kdev->hive_id)
		if (peer_dev->gpu->kfd->hive_id != kdev->kfd->hive_id)
		continue;
		sub_type_hdr = (typeof(sub_type_hdr))(
		(char *)sub_type_hdr +
		@@ -2255,12 +2255,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
		* (COMPUTE_UNIT_CPU \| COMPUTE_UNIT_GPU) - Create VCRAT for APU
		* -- this option is not currently implemented.
		* The assumption is that all AMD APUs will have CRAT
		* @kdev: Valid kfd_device required if flags contain COMPUTE_UNIT_GPU
		* @kdev: Valid kfd_node required if flags contain COMPUTE_UNIT_GPU
		*
		* Return 0 if successful else return -ve value
		*/
		int kfd_create_crat_image_virtual(void *crat_image, size_t size,
		int flags, struct kfd_dev *kdev,
		int flags, struct kfd_node *kdev,
		uint32_t proximity_domain)
		{
		void *pcrat_image = NULL;

drivers/gpu/drm/amd/amdkfd/kfd_crat.h

+3 −3

Original line number	Diff line number	Diff line
		@@ -293,7 +293,7 @@ struct crat_subtype_generic {

		#pragma pack()

		struct kfd_dev;
		struct kfd_node;

		/* Static table to describe GPU Cache information */
		struct kfd_gpu_cache_info {
		@@ -305,14 +305,14 @@ struct kfd_gpu_cache_info {
		*/
		uint32_t num_cu_shared;
		};
		int kfd_get_gpu_cache_info(struct kfd_dev kdev, struct kfd_gpu_cache_info *pcache_info);
		int kfd_get_gpu_cache_info(struct kfd_node kdev, struct kfd_gpu_cache_info *pcache_info);

		int kfd_create_crat_image_acpi(void *crat_image, size_t size);
		void kfd_destroy_crat_image(void *crat_image);
		int kfd_parse_crat_table(void crat_image, struct list_head device_list,
		uint32_t proximity_domain);
		int kfd_create_crat_image_virtual(void *crat_image, size_t size,
		int flags, struct kfd_dev *kdev,
		int flags, struct kfd_node *kdev,
		uint32_t proximity_domain);

		#endif /* KFD_CRAT_H_INCLUDED */