Commit f0dc99a6 authored by Graham Sider's avatar Graham Sider Committed by Alex Deucher
Browse files

drm/amdkfd: add kfd_device_info_init function



Initializes kfd->device_info given either asic_type (enum) if GFX
version is less than GFX9, or GC IP version if greater. Also takes in vf
and the target compiler gfx version. Uses SDMA version to determine
num_sdma_queues_per_engine.

Convert device_info to a non-pointer member of kfd, change references
accordingly.

Change unsupported asic condition to only probe f2g, move device_info
initialization post-switch.

Signed-off-by: default avatarGraham Sider <Graham.Sider@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b7675b7b
Loading
Loading
Loading
Loading
+134 −112
Original line number Diff line number Diff line
@@ -511,193 +511,215 @@ static void kfd_gtt_sa_fini(struct kfd_dev *kfd);

static int kfd_resume(struct kfd_dev *kfd);

static void kfd_device_info_init(struct kfd_dev *kfd,
				 bool vf, uint32_t gfx_target_version)
{
	uint32_t gc_version = KFD_GC_VERSION(kfd);
	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0];
	uint32_t asic_type = kfd->adev->asic_type;

	kfd->device_info.max_pasid_bits = 16;
	kfd->device_info.max_no_of_hqd = 24;
	kfd->device_info.num_of_watch_points = 4;
	kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
	kfd->device_info.gfx_target_version = gfx_target_version;

	if (KFD_IS_SOC15(kfd)) {
		kfd->device_info.doorbell_size = 8;
		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
		kfd->device_info.supports_cwsr = true;

		if ((sdma_version >= IP_VERSION(4, 0, 0)  &&
		     sdma_version <= IP_VERSION(4, 2, 0)) ||
		     sdma_version == IP_VERSION(5, 2, 1)  ||
		     sdma_version == IP_VERSION(5, 2, 3))
			kfd->device_info.num_sdma_queues_per_engine = 2;
		else
			kfd->device_info.num_sdma_queues_per_engine = 8;

		/* Raven */
		if (gc_version == IP_VERSION(9, 1, 0) ||
		    gc_version == IP_VERSION(9, 2, 2))
			kfd->device_info.needs_iommu_device = true;

		if (gc_version < IP_VERSION(11, 0, 0)) {
			/* Navi2x+, Navi1x+ */
			if (gc_version >= IP_VERSION(10, 3, 0))
				kfd->device_info.no_atomic_fw_version = 145;
			else if (gc_version >= IP_VERSION(10, 1, 1))
				kfd->device_info.no_atomic_fw_version = 92;

			/* Navi1x+ */
			if (gc_version >= IP_VERSION(10, 1, 1))
				kfd->device_info.needs_pci_atomics = true;
		}
	} else {
		kfd->device_info.doorbell_size = 4;
		kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
		kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
		kfd->device_info.num_sdma_queues_per_engine = 2;

		if (asic_type != CHIP_KAVERI &&
		    asic_type != CHIP_HAWAII &&
		    asic_type != CHIP_TONGA)
			kfd->device_info.supports_cwsr = true;

		if (asic_type == CHIP_KAVERI ||
		    asic_type == CHIP_CARRIZO)
			kfd->device_info.needs_iommu_device = true;

		if (asic_type != CHIP_HAWAII && !vf)
			kfd->device_info.needs_pci_atomics = true;
	}
}

struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
{
	struct kfd_dev *kfd;
	const struct kfd_device_info *device_info;
	const struct kfd2kgd_calls *f2g;
	struct kfd_dev *kfd = NULL;
	const struct kfd2kgd_calls *f2g = NULL;
	struct pci_dev *pdev = adev->pdev;
	uint32_t gfx_target_version = 0;

	switch (adev->asic_type) {
#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
	case CHIP_KAVERI:
		if (vf)
			device_info = NULL;
		else
			device_info = &kaveri_device_info;
		gfx_target_version = 70000;
		if (!vf)
			f2g = &gfx_v7_kfd2kgd;
		break;
#endif
	case CHIP_CARRIZO:
		if (vf)
			device_info = NULL;
		else
			device_info = &carrizo_device_info;
		gfx_target_version = 80001;
		if (!vf)
			f2g = &gfx_v8_kfd2kgd;
		break;
#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
	case CHIP_HAWAII:
		if (vf)
			device_info = NULL;
		else
			device_info = &hawaii_device_info;
		gfx_target_version = 70001;
		if (!vf)
			f2g = &gfx_v7_kfd2kgd;
		break;
#endif
	case CHIP_TONGA:
		if (vf)
			device_info = NULL;
		else
			device_info = &tonga_device_info;
		gfx_target_version = 80002;
		if (!vf)
			f2g = &gfx_v8_kfd2kgd;
		break;
	case CHIP_FIJI:
		if (vf)
			device_info = &fiji_vf_device_info;
		else
			device_info = &fiji_device_info;
		gfx_target_version = 80003;
		f2g = &gfx_v8_kfd2kgd;
		break;
	case CHIP_POLARIS10:
		if (vf)
			device_info = &polaris10_vf_device_info;
		else
			device_info = &polaris10_device_info;
		gfx_target_version = 80003;
		f2g = &gfx_v8_kfd2kgd;
		break;
	case CHIP_POLARIS11:
		if (vf)
			device_info = NULL;
		else
			device_info = &polaris11_device_info;
		gfx_target_version = 80003;
		if (!vf)
			f2g = &gfx_v8_kfd2kgd;
		break;
	case CHIP_POLARIS12:
		if (vf)
			device_info = NULL;
		else
			device_info = &polaris12_device_info;
		gfx_target_version = 80003;
		if (!vf)
			f2g = &gfx_v8_kfd2kgd;
		break;
	case CHIP_VEGAM:
		if (vf)
			device_info = NULL;
		else
			device_info = &vegam_device_info;
		gfx_target_version = 80003;
		if (!vf)
			f2g = &gfx_v8_kfd2kgd;
		break;
	default:
		switch (adev->ip_versions[GC_HWIP][0]) {
		case IP_VERSION(9, 0, 1):
			if (vf)
				device_info = &vega10_vf_device_info;
			else
				device_info = &vega10_device_info;
			gfx_target_version = 90000;
			f2g = &gfx_v9_kfd2kgd;
			break;
#ifdef KFD_SUPPORT_IOMMU_V2
		case IP_VERSION(9, 1, 0):
		case IP_VERSION(9, 2, 2):
			if (vf)
				device_info = NULL;
			else
				device_info = &raven_device_info;
			gfx_target_version = 90002;
			if (!vf)
				f2g = &gfx_v9_kfd2kgd;
			break;
#endif
		case IP_VERSION(9, 2, 1):
			if (vf)
				device_info = NULL;
			else
				device_info = &vega12_device_info;
			gfx_target_version = 90004;
			if (!vf)
				f2g = &gfx_v9_kfd2kgd;
			break;
		case IP_VERSION(9, 3, 0):
			if (vf)
				device_info = NULL;
			else
				device_info = &renoir_device_info;
			gfx_target_version = 90012;
			if (!vf)
				f2g = &gfx_v9_kfd2kgd;
			break;
		case IP_VERSION(9, 4, 0):
			if (vf)
				device_info = NULL;
			else
				device_info = &vega20_device_info;
			gfx_target_version = 90006;
			if (!vf)
				f2g = &gfx_v9_kfd2kgd;
			break;
		case IP_VERSION(9, 4, 1):
			device_info = &arcturus_device_info;
			gfx_target_version = 90008;
			f2g = &arcturus_kfd2kgd;
			break;
		case IP_VERSION(9, 4, 2):
			device_info = &aldebaran_device_info;
			gfx_target_version = 90010;
			f2g = &aldebaran_kfd2kgd;
			break;
		case IP_VERSION(10, 1, 10):
			if (vf)
				device_info = NULL;
			else
				device_info = &navi10_device_info;
			gfx_target_version = 100100;
			if (!vf)
				f2g = &gfx_v10_kfd2kgd;
			break;
		case IP_VERSION(10, 1, 2):
			device_info = &navi12_device_info;
			gfx_target_version = 100101;
			f2g = &gfx_v10_kfd2kgd;
			break;
		case IP_VERSION(10, 1, 1):
			if (vf)
				device_info = NULL;
			else
				device_info = &navi14_device_info;
			gfx_target_version = 100102;
			if (!vf)
				f2g = &gfx_v10_kfd2kgd;
			break;
		case IP_VERSION(10, 1, 3):
			if (vf)
				device_info = NULL;
			else
				device_info = &cyan_skillfish_device_info;
			gfx_target_version = 100103;
			if (!vf)
				f2g = &gfx_v10_kfd2kgd;
			break;
		case IP_VERSION(10, 3, 0):
			device_info = &sienna_cichlid_device_info;
			gfx_target_version = 100300;
			f2g = &gfx_v10_3_kfd2kgd;
			break;
		case IP_VERSION(10, 3, 2):
			device_info = &navy_flounder_device_info;
			gfx_target_version = 100301;
			f2g = &gfx_v10_3_kfd2kgd;
			break;
		case IP_VERSION(10, 3, 1):
			if (vf)
				device_info = NULL;
			else
				device_info = &vangogh_device_info;
			gfx_target_version = 100303;
			if (!vf)
				f2g = &gfx_v10_3_kfd2kgd;
			break;
		case IP_VERSION(10, 3, 4):
			device_info = &dimgrey_cavefish_device_info;
			gfx_target_version = 100302;
			f2g = &gfx_v10_3_kfd2kgd;
			break;
		case IP_VERSION(10, 3, 5):
			device_info = &beige_goby_device_info;
			gfx_target_version = 100304;
			f2g = &gfx_v10_3_kfd2kgd;
			break;
		case IP_VERSION(10, 3, 3):
			if (vf)
				device_info = NULL;
			else
				device_info = &yellow_carp_device_info;
			gfx_target_version = 100305;
			if (!vf)
				f2g = &gfx_v10_3_kfd2kgd;
			break;
		default:
			return NULL;
			break;
		}
		break;
	}

	if (!device_info || !f2g) {
	if (!f2g) {
		if (adev->ip_versions[GC_HWIP][0])
			dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
				adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
@@ -712,7 +734,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
		return NULL;

	kfd->adev = adev;
	kfd->device_info = device_info;
	kfd_device_info_init(kfd, vf, gfx_target_version);
	kfd->pdev = pdev;
	kfd->init_complete = false;
	kfd->kfd2kgd = f2g;
@@ -731,7 +753,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)

static void kfd_cwsr_init(struct kfd_dev *kfd)
{
	if (cwsr_enable && kfd->device_info->supports_cwsr) {
	if (cwsr_enable && kfd->device_info.supports_cwsr) {
		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
@@ -815,14 +837,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
	 */
	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
	if (!kfd->pci_atomic_requested &&
	    kfd->device_info->needs_pci_atomics &&
	    (!kfd->device_info->no_atomic_fw_version ||
	     kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
	    kfd->device_info.needs_pci_atomics &&
	    (!kfd->device_info.no_atomic_fw_version ||
	     kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
		dev_info(kfd_device,
			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
			 kfd->pdev->vendor, kfd->pdev->device,
			 kfd->mec_fw_version,
			 kfd->device_info->no_atomic_fw_version);
			 kfd->device_info.no_atomic_fw_version);
		return false;
	}

@@ -839,7 +861,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,

	/* calculate max size of mqds needed for queues */
	size = max_num_of_queues_per_device *
			kfd->device_info->mqd_size_aligned;
			kfd->device_info.mqd_size_aligned;

	/*
	 * calculate max size of runlist packet.
@@ -1114,7 +1136,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
	if (!kfd->init_complete)
		return;

	if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
	if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
		dev_err_once(kfd_device, "Ring entry too small\n");
		return;
	}
+4 −4
Original line number Diff line number Diff line
@@ -108,13 +108,13 @@ static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
	return kfd_get_num_sdma_engines(dqm->dev) *
		dqm->dev->device_info->num_sdma_queues_per_engine;
		dqm->dev->device_info.num_sdma_queues_per_engine;
}

unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
		dqm->dev->device_info->num_sdma_queues_per_engine;
		dqm->dev->device_info.num_sdma_queues_per_engine;
}

void program_sh_mem_settings(struct device_queue_manager *dqm,
@@ -1838,7 +1838,7 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
		get_num_all_sdma_engines(dqm) *
		dev->device_info->num_sdma_queues_per_engine +
		dev->device_info.num_sdma_queues_per_engine +
		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;

	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
@@ -2082,7 +2082,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)

	for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
		for (queue = 0;
		     queue < dqm->dev->device_info->num_sdma_queues_per_engine;
		     queue < dqm->dev->device_info.num_sdma_queues_per_engine;
		     queue++) {
			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
				dqm->dev->adev, pipe, queue, &dump, &n_regs);
+4 −4
Original line number Diff line number Diff line
@@ -48,7 +48,7 @@
/* # of doorbell bytes allocated for each process. */
size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
	return roundup(kfd->device_info->doorbell_size *
	return roundup(kfd->device_info.doorbell_size *
			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
			PAGE_SIZE);
}
@@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
		return NULL;

	inx *= kfd->device_info->doorbell_size / sizeof(u32);
	inx *= kfd->device_info.doorbell_size / sizeof(u32);

	/*
	 * Calculating the kernel doorbell offset using the first
@@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
	unsigned int inx;

	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr)
		* sizeof(u32) / kfd->device_info->doorbell_size;
		* sizeof(u32) / kfd->device_info.doorbell_size;

	mutex_lock(&kfd->doorbell_mutex);
	__clear_bit(inx, kfd->doorbell_available_index);
@@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
	return kfd->doorbell_base_dw_offset +
		pdd->doorbell_index
		* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
		doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
		doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);
}

uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
+1 −1
Original line number Diff line number Diff line
@@ -135,7 +135,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,

		*patched_flag = true;
		memcpy(patched_ihre, ih_ring_entry,
				dev->device_info->ih_ring_entry_size);
				dev->device_info.ih_ring_entry_size);

		pasid = dev->dqm->vmid_pasid[vmid];

+9 −9
Original line number Diff line number Diff line
@@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
	int r;

	r = kfifo_alloc(&kfd->ih_fifo,
		KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
		KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,
		GFP_KERNEL);
	if (r) {
		dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
@@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry)
	int count;

	count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
				kfd->device_info->ih_ring_entry_size);
	if (count != kfd->device_info->ih_ring_entry_size) {
				kfd->device_info.ih_ring_entry_size);
	if (count != kfd->device_info.ih_ring_entry_size) {
		dev_err_ratelimited(kfd_chardev(),
			"Interrupt ring overflow, dropping interrupt %d\n",
			count);
@@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
	int count;

	count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
				kfd->device_info->ih_ring_entry_size);
				kfd->device_info.ih_ring_entry_size);

	WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
	WARN_ON(count && count != kfd->device_info.ih_ring_entry_size);

	return count == kfd->device_info->ih_ring_entry_size;
	return count == kfd->device_info.ih_ring_entry_size;
}

static void interrupt_wq(struct work_struct *work)
@@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work)
						interrupt_work);
	uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];

	if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
	if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {
		dev_err_once(kfd_chardev(), "Ring entry too small\n");
		return;
	}

	while (dequeue_ih_ring_entry(dev, ih_ring_entry))
		dev->device_info->event_interrupt_class->interrupt_wq(dev,
		dev->device_info.event_interrupt_class->interrupt_wq(dev,
								ih_ring_entry);
}

@@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,
	/* integer and bitwise OR so there is no boolean short-circuiting */
	unsigned int wanted = 0;

	wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
	wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,
					 ih_ring_entry, patched_ihre, flag);

	return wanted != 0;
Loading