From 4518cd28d81fa4d2872268e4f8f7b35f387642cc Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 29 Jan 2018 17:31:40 +0200 Subject: [PATCH 0001/1646] dma-fence: add comment for WARN_ON in dma_fence_release() In dma_fence_release() there is a WARN_ON which could be triggered by several cases of wrong dma-fence usage. This patch adds a comment to explain two use-cases to help driver developers that use dma-fence and trigger that WARN_ON to better understand the reasons for it. v2: change to a more generic, one-liner comment Reviewed-by: Daniel Vetter Signed-off-by: Oded Gabbay --- drivers/dma-buf/dma-fence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index 5d101c4053e05..4edb9fd3cf479 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -171,6 +171,7 @@ void dma_fence_release(struct kref *kref) trace_dma_fence_destroy(fence); + /* Failed to signal before release, could be a refcounting issue */ WARN_ON(!list_empty(&fence->cb_list)); if (fence->ops->release) -- GitLab From 3f866f5f04d3645970662409d2bdff3dca58b1a3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 18 Jan 2018 18:39:55 -0600 Subject: [PATCH 0002/1646] drm/amdkfd: Use ARRAY_SIZE macro in kfd_build_sysfs_node_entry Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element. This issue was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Felix Kuehling Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index c6a76090a7250..7783250e1c6dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -677,7 +677,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, } /* All hardware blocks have the same number of attributes. */ - num_attrs = sizeof(perf_attr_iommu)/sizeof(struct kfd_perf_attr); + num_attrs = ARRAY_SIZE(perf_attr_iommu); list_for_each_entry(perf, &dev->perf_props, list) { perf->attr_group = kzalloc(sizeof(struct kfd_perf_attr) * num_attrs + sizeof(struct attribute_group), -- GitLab From 3ee2d00cfb6c0b44aeb9575c20ad8d1abf09be0f Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:41 -0500 Subject: [PATCH 0003/1646] drm/amdkfd: Conditionally enable PCIe atomics This will be needed for most dGPUs. CC: linux-pci@vger.kernel.org Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 17 +++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 1 + 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a8fa33a08de30..fafe971d1d49d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -41,6 +41,7 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_pci_atomics = false, }; static const struct kfd_device_info carrizo_device_info = { @@ -53,6 +54,7 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_pci_atomics = false, }; struct kfd_deviceid { @@ -127,6 +129,21 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } + if (device_info->needs_pci_atomics) { + /* Allow BIF to recode atomics to PCIe 3.0 + * AtomicOps. 32 and 64-bit requests are possible and + * must be supported. + */ + if (pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics", + pdev->vendor, pdev->device); + return NULL; + } + } + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); if (!kfd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 0bedcf9cc08c7..e5b16209f0696 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -158,6 +158,7 @@ struct kfd_device_info { uint8_t num_of_watch_points; uint16_t mqd_size_aligned; bool supports_cwsr; + bool needs_pci_atomics; }; struct kfd_mem_obj { -- GitLab From d146c5a7196b4c2c2586569971a55392b501b93b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:43 -0500 Subject: [PATCH 0004/1646] drm/amdkfd: Make sched_policy a per-device setting Some dGPUs don't support HWS. Allow them to use a per-device sched_policy that may be different from the global default. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c | 3 ++- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 2 +- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 26 +++++++++++++++---- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 1 + .../amd/amdkfd/kfd_process_queue_manager.c | 3 ++- 6 files changed, 29 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 62c3d9cd6ef18..6fe24964540b4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -901,7 +901,8 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, mutex_unlock(&p->mutex); - if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0) + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && + pdd->qpd.vmid != 0) dev->kfd2kgd->set_scratch_backing_va( dev->kgd, args->va_addr, pdd->qpd.vmid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c index 3da25f7bda6ba..9d4af961c5d1f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c @@ -33,6 +33,7 @@ #include "kfd_pm4_headers_diq.h" #include "kfd_dbgmgr.h" #include "kfd_dbgdev.h" +#include "kfd_device_queue_manager.h" static DEFINE_MUTEX(kfd_dbgmgr_mutex); @@ -83,7 +84,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev) } /* get actual type of DBGDevice cpsch or not */ - if (sched_policy == KFD_SCHED_POLICY_NO_HWS) + if (pdev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) type = DBGDEV_TYPE_NODIQ; kfd_dbgdev_init(new_buff->dbgdev, pdev, type); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index fafe971d1d49d..e8ff4eab63e73 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -340,7 +340,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->pdev->device); pr_debug("Starting kfd with the following scheduling policy %d\n", - sched_policy); + kfd->dqm->sched_policy); goto out; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b21285afa4eac..e57781d99664c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -385,7 +385,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) prev_active = q->properties.is_active; /* Make sure the queue is unmapped before updating the MQD */ - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) { + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval) { @@ -417,7 +417,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (!q->properties.is_active && prev_active) dqm->queue_count--; - if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || @@ -1097,7 +1097,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, alternate_aperture_base, alternate_aperture_size); - if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) + if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", @@ -1242,8 +1242,24 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) if (!dqm) return NULL; + switch (dev->device_info->asic_family) { + /* HWS is not available on Hawaii. */ + case CHIP_HAWAII: + /* HWS depends on CWSR for timely dequeue. CWSR is not + * available on Tonga. + * + * FIXME: This argument also applies to Kaveri. + */ + case CHIP_TONGA: + dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; + break; + default: + dqm->sched_policy = sched_policy; + break; + } + dqm->dev = dev; - switch (sched_policy) { + switch (dqm->sched_policy) { case KFD_SCHED_POLICY_HWS: case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: /* initialize dqm for cp scheduling */ @@ -1280,7 +1296,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.process_termination = process_termination_nocpsch; break; default: - pr_err("Invalid scheduling policy %d\n", sched_policy); + pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); goto out_free; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index c61b693bfa8c3..9fdc9c2a107a2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -180,6 +180,7 @@ struct device_queue_manager { unsigned int *fence_addr; struct kfd_mem_obj *fence_mem; bool active_runlist; + int sched_policy; }; void device_queue_manager_init_cik( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 8763806326682..7817e327ea6dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -208,7 +208,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, case KFD_QUEUE_TYPE_COMPUTE: /* check if there is over subscription */ - if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + if ((dev->dqm->sched_policy == + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && ((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) || (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) { pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); -- GitLab From 97672cbe3de809ef8c4ea66cce675f5da3d3df44 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:44 -0500 Subject: [PATCH 0005/1646] drm/amdkfd: Add dGPU support to the device queue manager GFXv7 and v8 dGPUs use a different addressing mode for KFD compared to APUs (GPUVM64 vs HSA64). And dGPUs don't support MTYPE_CC. They use MTYPE_UC instead for memory that requires coherency. Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 11 +++ .../drm/amd/amdkfd/kfd_device_queue_manager.h | 4 + .../amd/amdkfd/kfd_device_queue_manager_cik.c | 56 +++++++++++ .../amd/amdkfd/kfd_device_queue_manager_vi.c | 93 +++++++++++++++++++ 4 files changed, 164 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index e57781d99664c..47d493ea8e4fc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1308,6 +1308,17 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_KAVERI: device_queue_manager_init_cik(&dqm->asic_ops); break; + + case CHIP_HAWAII: + device_queue_manager_init_cik_hawaii(&dqm->asic_ops); + break; + + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + device_queue_manager_init_vi_tonga(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 9fdc9c2a107a2..68be0aaff3f4d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -185,8 +185,12 @@ struct device_queue_manager { void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index 28e48c90c5964..aed4c21417bfe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -34,8 +34,13 @@ static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm, uint64_t alternate_aperture_size); static int update_qpd_cik(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_cik( struct device_queue_manager_asic_ops *asic_ops) @@ -45,6 +50,14 @@ void device_queue_manager_init_cik( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_cik_hawaii( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; + asic_ops->update_qpd = update_qpd_cik_hawaii; + asic_ops->init_sdma_vm = init_sdma_vm_hawaii; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -132,6 +145,36 @@ static int update_qpd_cik(struct device_queue_manager *dqm, return 0; } +static int update_qpd_cik_hawaii(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + + pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); + + return 0; +} + static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { @@ -147,3 +190,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } + +static void init_sdma_vm_hawaii(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index 2fbce57a2f21d..fd60a116be373 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -33,10 +33,21 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, enum cache_policy alternate_policy, void __user *alternate_aperture_base, uint64_t alternate_aperture_size); +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd); +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops) @@ -46,6 +57,14 @@ void device_queue_manager_init_vi( asic_ops->init_sdma_vm = init_sdma_vm; } +void device_queue_manager_init_vi_tonga( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; + asic_ops->update_qpd = update_qpd_vi_tonga; + asic_ops->init_sdma_vm = init_sdma_vm_tonga; +} + static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) { /* In 64-bit mode, we can only control the top 3 bits of the LDS, @@ -103,6 +122,33 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, return true; } +static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_UC : + MTYPE_NC; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_UC : + MTYPE_NC; + + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; + + return true; +} + static int update_qpd_vi(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -144,6 +190,40 @@ static int update_qpd_vi(struct device_queue_manager *dqm, return 0; } +static int update_qpd_vi_tonga(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | + MTYPE_UC << + SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | + MTYPE_UC << + SH_MEM_CONFIG__APE1_MTYPE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + + pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n", + temp, qpd->sh_mem_bases); + + return 0; +} + static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd) { @@ -159,3 +239,16 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, q->properties.sdma_vm_addr = value; } + +static void init_sdma_vm_tonga(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + /* On dGPU we're always in GPUVM64 addressing mode with 64-bit + * aperture addresses. + */ + q->properties.sdma_vm_addr = + ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & + SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; +} -- GitLab From ee04955af6b851a4f133d2472bc65c5d8b9aa692 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:45 -0500 Subject: [PATCH 0006/1646] drm/amdkfd: Add dGPU support to the MQD manager On dGPUs don't set ATC addressing bits and use MTYPE_UC for coherent memory. Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 7 ++++ .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 35 +++++++++++++++++-- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 21 +++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +++ 4 files changed, 64 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index dfd260ef81ffe..ee7061e1c466f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -29,8 +29,15 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, switch (dev->device_info->asic_family) { case CHIP_KAVERI: return mqd_manager_init_cik(type, dev); + case CHIP_HAWAII: + return mqd_manager_init_cik_hawaii(type, dev); case CHIP_CARRIZO: return mqd_manager_init_vi(type, dev); + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + return mqd_manager_init_vi_tonga(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index f8ef4a051e082..fbe3f83ba6853 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -170,14 +170,19 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, mms); } -static int update_mqd(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) +static int __update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q, unsigned int atc_bit) { struct cik_mqd *m; m = get_mqd(mqd); m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | - DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; + DEFAULT_MIN_AVAIL_SIZE; + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; + if (atc_bit) { + m->cp_hqd_pq_control |= PQ_ATC_EN; + m->cp_hqd_ib_control |= IB_ATC_EN; + } /* * Calculating queue size which is log base 2 of actual queue size -1 @@ -202,6 +207,18 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, return 0; } +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, 1); +} + +static int update_mqd_hawaii(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, 0); +} + static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { @@ -441,3 +458,15 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, return mqd; } +struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + mqd = mqd_manager_init_cik(type, dev); + if (!mqd) + return NULL; + if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + mqd->update_mqd = update_mqd_hawaii; + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 971aec0637dc7..58221c1fc917c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -151,6 +151,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); m->cp_hqd_pq_doorbell_control = q->doorbell_off << @@ -208,6 +210,12 @@ static int update_mqd(struct mqd_manager *mm, void *mqd, return __update_mqd(mm, mqd, q, MTYPE_CC, 1); } +static int update_mqd_tonga(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + return __update_mqd(mm, mqd, q, MTYPE_UC, 0); +} + static int destroy_mqd(struct mqd_manager *mm, void *mqd, enum kfd_preempt_type type, unsigned int timeout, uint32_t pipe_id, @@ -432,3 +440,16 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, return mqd; } + +struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + mqd = mqd_manager_init_vi(type, dev); + if (!mqd) + return NULL; + if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + mqd->update_mqd = update_mqd_tonga; + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index e5b16209f0696..594f853553971 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -706,8 +706,12 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, -- GitLab From 1d63669885ebc8fca13e44864f7199c3ff50cea3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:46 -0500 Subject: [PATCH 0007/1646] drm/amdkfd: Add dGPU support to kernel_queue_init Recognize dGPU ASIC families. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 5dc6567d4a138..69f4964853310 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -297,10 +297,15 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, switch (dev->device_info->asic_family) { case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kernel_queue_init_vi(&kq->ops_asic_specific); break; case CHIP_KAVERI: + case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; default: -- GitLab From a3084e6c522f94130c9dfc26fe6458c353dbc0c9 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:47 -0500 Subject: [PATCH 0008/1646] drm/amdkfd: Add dGPU device IDs and device info v2: remove needs_iommu field as it doesn't exists CC: linux-pci@vger.kernel.org Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 145 +++++++++++++++++++++++- 1 file changed, 143 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index e8ff4eab63e73..83d6f410890e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -57,12 +57,110 @@ static const struct kfd_device_info carrizo_device_info = { .needs_pci_atomics = false, }; +static const struct kfd_device_info hawaii_device_info = { + .asic_family = CHIP_HAWAII, + .max_pasid_bits = 16, + /* max num of queues for KV.TODO should be a dynamic value */ + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info tonga_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info tonga_vf_device_info = { + .asic_family = CHIP_TONGA, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info fiji_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info fiji_vf_device_info = { + .asic_family = CHIP_FIJI, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_pci_atomics = false, +}; + + +static const struct kfd_device_info polaris10_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_pci_atomics = true, +}; + +static const struct kfd_device_info polaris10_vf_device_info = { + .asic_family = CHIP_POLARIS10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info polaris11_device_info = { + .asic_family = CHIP_POLARIS11, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_pci_atomics = true, +}; + + struct kfd_deviceid { unsigned short did; const struct kfd_device_info *device_info; }; -/* Please keep this sorted by increasing device id. */ static const struct kfd_deviceid supported_devices[] = { { 0x1304, &kaveri_device_info }, /* Kaveri */ { 0x1305, &kaveri_device_info }, /* Kaveri */ @@ -90,7 +188,50 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9874, &carrizo_device_info }, /* Carrizo */ { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ - { 0x9877, &carrizo_device_info } /* Carrizo */ + { 0x9877, &carrizo_device_info }, /* Carrizo */ + { 0x67A0, &hawaii_device_info }, /* Hawaii */ + { 0x67A1, &hawaii_device_info }, /* Hawaii */ + { 0x67A2, &hawaii_device_info }, /* Hawaii */ + { 0x67A8, &hawaii_device_info }, /* Hawaii */ + { 0x67A9, &hawaii_device_info }, /* Hawaii */ + { 0x67AA, &hawaii_device_info }, /* Hawaii */ + { 0x67B0, &hawaii_device_info }, /* Hawaii */ + { 0x67B1, &hawaii_device_info }, /* Hawaii */ + { 0x67B8, &hawaii_device_info }, /* Hawaii */ + { 0x67B9, &hawaii_device_info }, /* Hawaii */ + { 0x67BA, &hawaii_device_info }, /* Hawaii */ + { 0x67BE, &hawaii_device_info }, /* Hawaii */ + { 0x6920, &tonga_device_info }, /* Tonga */ + { 0x6921, &tonga_device_info }, /* Tonga */ + { 0x6928, &tonga_device_info }, /* Tonga */ + { 0x6929, &tonga_device_info }, /* Tonga */ + { 0x692B, &tonga_device_info }, /* Tonga */ + { 0x692F, &tonga_vf_device_info }, /* Tonga vf */ + { 0x6938, &tonga_device_info }, /* Tonga */ + { 0x6939, &tonga_device_info }, /* Tonga */ + { 0x7300, &fiji_device_info }, /* Fiji */ + { 0x730F, &fiji_vf_device_info }, /* Fiji vf*/ + { 0x67C0, &polaris10_device_info }, /* Polaris10 */ + { 0x67C1, &polaris10_device_info }, /* Polaris10 */ + { 0x67C2, &polaris10_device_info }, /* Polaris10 */ + { 0x67C4, &polaris10_device_info }, /* Polaris10 */ + { 0x67C7, &polaris10_device_info }, /* Polaris10 */ + { 0x67C8, &polaris10_device_info }, /* Polaris10 */ + { 0x67C9, &polaris10_device_info }, /* Polaris10 */ + { 0x67CA, &polaris10_device_info }, /* Polaris10 */ + { 0x67CC, &polaris10_device_info }, /* Polaris10 */ + { 0x67CF, &polaris10_device_info }, /* Polaris10 */ + { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ + { 0x67DF, &polaris10_device_info }, /* Polaris10 */ + { 0x67E0, &polaris11_device_info }, /* Polaris11 */ + { 0x67E1, &polaris11_device_info }, /* Polaris11 */ + { 0x67E3, &polaris11_device_info }, /* Polaris11 */ + { 0x67E7, &polaris11_device_info }, /* Polaris11 */ + { 0x67E8, &polaris11_device_info }, /* Polaris11 */ + { 0x67E9, &polaris11_device_info }, /* Polaris11 */ + { 0x67EB, &polaris11_device_info }, /* Polaris11 */ + { 0x67EF, &polaris11_device_info }, /* Polaris11 */ + { 0x67FF, &polaris11_device_info }, /* Polaris11 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, -- GitLab From 30d13424fba9236b741887eabb016279519c6ac4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 4 Jan 2018 17:17:48 -0500 Subject: [PATCH 0009/1646] drm/amdgpu: Enable KFD initialization on dGPUs Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 450426dbed921..1d2b6ca82f1d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -78,10 +78,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: + case CHIP_HAWAII: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); break; #endif case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; default: -- GitLab From fa72d66198c90668723bb25ec23639a012bee99b Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:30 -0500 Subject: [PATCH 0010/1646] drm/amdgpu: remove useless BUG_ONs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dereferencing NULL pointers will cause a BUG anyway. No need to do an explicit check. Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 ------ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 -- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 -- 3 files changed, 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1d2b6ca82f1d9..e06d7919747a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -212,10 +212,6 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, struct kgd_mem **mem = (struct kgd_mem **) mem_obj; int r; - BUG_ON(kgd == NULL); - BUG_ON(gpu_addr == NULL); - BUG_ON(cpu_ptr == NULL); - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); if ((*mem) == NULL) return -ENOMEM; @@ -269,8 +265,6 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { struct kgd_mem *mem = (struct kgd_mem *) mem_obj; - BUG_ON(mem == NULL); - amdgpu_bo_reserve(mem->bo, true); amdgpu_bo_kunmap(mem->bo); amdgpu_bo_unpin(mem->bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index a9e6aea0e5f8f..74fcb8b42fd09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -812,8 +812,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index b127259d7d854..c70c8e1d1863c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -775,8 +775,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) struct amdgpu_device *adev = (struct amdgpu_device *) kgd; const union amdgpu_firmware_header *hdr; - BUG_ON(kgd == NULL); - switch (type) { case KGD_ENGINE_PFP: hdr = (const union amdgpu_firmware_header *) -- GitLab From 473fee476a3c39f50bae07354972dd1cefaf79e9 Mon Sep 17 00:00:00 2001 From: Yong Zhao Date: Tue, 6 Feb 2018 20:32:31 -0500 Subject: [PATCH 0011/1646] drm/amdgpu: Replace kgd_mem with amdgpu_bo for kernel pinned gtt mem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The extra fields in struct kgd_mem aren't actually needed. This struct will be used for GPUVM allocations later. Signed-off-by: Yong Zhao Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 47 +++++++++++----------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index e06d7919747a7..141d11dfede04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -209,15 +209,13 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **cpu_ptr) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - struct kgd_mem **mem = (struct kgd_mem **) mem_obj; + struct amdgpu_bo *bo = NULL; int r; - - *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if ((*mem) == NULL) - return -ENOMEM; + uint64_t gpu_addr_tmp = 0; + void *cpu_ptr_tmp = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); @@ -225,52 +223,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, } /* map the buffer */ - r = amdgpu_bo_reserve((*mem)->bo, true); + r = amdgpu_bo_reserve(bo, true); if (r) { dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } - r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, - &(*mem)->gpu_addr); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, + &gpu_addr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } - *gpu_addr = (*mem)->gpu_addr; - r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); + r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp); if (r) { dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } - *cpu_ptr = (*mem)->cpu_ptr; - amdgpu_bo_unreserve((*mem)->bo); + *mem_obj = bo; + *gpu_addr = gpu_addr_tmp; + *cpu_ptr = cpu_ptr_tmp; + + amdgpu_bo_unreserve(bo); return 0; allocate_mem_kmap_bo_failed: - amdgpu_bo_unpin((*mem)->bo); + amdgpu_bo_unpin(bo); allocate_mem_pin_bo_failed: - amdgpu_bo_unreserve((*mem)->bo); + amdgpu_bo_unreserve(bo); allocate_mem_reserve_bo_failed: - amdgpu_bo_unref(&(*mem)->bo); + amdgpu_bo_unref(&bo); return r; } void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { - struct kgd_mem *mem = (struct kgd_mem *) mem_obj; - - amdgpu_bo_reserve(mem->bo, true); - amdgpu_bo_kunmap(mem->bo); - amdgpu_bo_unpin(mem->bo); - amdgpu_bo_unreserve(mem->bo); - amdgpu_bo_unref(&(mem->bo)); - kfree(mem); + struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; + + amdgpu_bo_reserve(bo, true); + amdgpu_bo_kunmap(bo); + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&(bo)); } void get_local_mem_info(struct kgd_dev *kgd, -- GitLab From 61b100e98f16e02df44862bba7798c7654b565f2 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:32 -0500 Subject: [PATCH 0012/1646] drm/amdgpu: Fix header file dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 1d0d250cbfdfa..06436c3ebd2bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -26,6 +26,7 @@ #include #include +#include /* max number of rings */ #define AMDGPU_MAX_RINGS 18 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index fabf44b262be3..e9841518343e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -28,6 +28,7 @@ #include #include #include +#include #include "amdgpu_sync.h" #include "amdgpu_ring.h" -- GitLab From 2f901c25eb35a1b24a6006b9b668ad8bf88da582 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:33 -0500 Subject: [PATCH 0013/1646] drm/amdgpu: Fix wrong mask in get_atc_vmid_pasid_mapping_pasid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 74fcb8b42fd09..b8be7b961b7e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -787,7 +787,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index c70c8e1d1863c..744c05b186d5c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -704,7 +704,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, struct amdgpu_device *adev = (struct amdgpu_device *) kgd; reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -- GitLab From 1029a3f33678afb8978285209ec5cfe153fe44ef Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:34 -0500 Subject: [PATCH 0014/1646] drm/amdgpu: Remove unused kfd2kgd interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 9 --------- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 10 ---------- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 -- 3 files changed, 21 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index b8be7b961b7e4..1362181b10d0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -139,7 +139,6 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, @@ -196,7 +195,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, @@ -790,13 +788,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); -} - static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 744c05b186d5c..5130eac7afddb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t queue_id); static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, unsigned int utimeout); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static int kgd_address_watch_disable(struct kgd_dev *kgd); static int kgd_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, @@ -99,7 +98,6 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); @@ -157,7 +155,6 @@ static const struct kfd2kgd_calls kfd2kgd = { get_atc_vmid_pasid_mapping_pasid, .get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid, - .write_vmid_invalidate_request = write_vmid_invalidate_request, .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, @@ -707,13 +704,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); -} - static int kgd_address_watch_disable(struct kgd_dev *kgd) { return 0; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index a6752bd0c8713..94eab54864bf7 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -258,8 +258,6 @@ struct kfd2kgd_calls { uint16_t (*get_atc_vmid_pasid_mapping_pasid)( struct kgd_dev *kgd, uint8_t vmid); - void (*write_vmid_invalidate_request)(struct kgd_dev *kgd, - uint8_t vmid); uint16_t (*get_fw_version)(struct kgd_dev *kgd, enum kgd_engine_type type); -- GitLab From d8d019ccffb838bb0dd98e583b5c25ccc0bc6ece Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:35 -0500 Subject: [PATCH 0015/1646] drm/amdgpu: Add KFD eviction fence This fence is used by KFD to keep memory resident while user mode queues are enabled. Trying to evict memory will trigger the enable_signaling callback, which starts a KFD eviction, which involves preempting user mode queues before signaling the fence. There is one such fence per process. v2: * Grab a reference to mm_struct * Dereference fence after NULL check * Simplify fence release, no need to signal without anyone waiting * Added signed-off-by Harish, who is the original author of this code v3: * update MAINTAINERS file * change amd_kfd_ prefix to amdkfd_ * remove useless initialization of variable to NULL v4: * set amdkfd_fence_ops to be static * Suggested by: Fengguang Wu Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- MAINTAINERS | 1 + drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 15 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 179 ++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 21 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 18 ++ .../gpu/drm/amd/include/kgd_kfd_interface.h | 6 + 8 files changed, 242 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c diff --git a/MAINTAINERS b/MAINTAINERS index 13c8ec11135a9..9e376c7f15b1b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -766,6 +766,7 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 353c937d947dd..5dd317579e77e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -129,6 +129,7 @@ amdgpu-y += \ # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ + amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2a519f9062eea..833dc26d402f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -29,6 +29,8 @@ #include #include +extern const struct kgd2kfd_calls *kgd2kfd; + struct amdgpu_device; struct kgd_mem { @@ -37,6 +39,19 @@ struct kgd_mem { void *cpu_ptr; }; +/* KFD Memory Eviction */ +struct amdgpu_amdkfd_fence { + struct dma_fence base; + struct mm_struct *mm; + spinlock_t lock; + char timeline_name[TASK_COMM_LEN]; +}; + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); + int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c new file mode 100644 index 0000000000000..2c14025e5e768 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -0,0 +1,179 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "amdgpu_amdkfd.h" + +static const struct dma_fence_ops amdkfd_fence_ops; +static atomic_t fence_seq = ATOMIC_INIT(0); + +/* Eviction Fence + * Fence helper functions to deal with KFD memory eviction. + * Big Idea - Since KFD submissions are done by user queues, a BO cannot be + * evicted unless all the user queues for that process are evicted. + * + * All the BOs in a process share an eviction fence. When process X wants + * to map VRAM memory but TTM can't find enough space, TTM will attempt to + * evict BOs from its LRU list. TTM checks if the BO is valuable to evict + * by calling ttm_bo_driver->eviction_valuable(). + * + * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs + * to process X. Otherwise, it will return true to indicate BO can be + * evicted by TTM. + * + * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue + * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move + * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler. + * + * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to + * nofity when the BO is free to move. fence_add_callback --> enable_signaling + * --> amdgpu_amdkfd_fence.enable_signaling + * + * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce + * user queues and signal fence. The work item will also start another delayed + * work item to restore BOs + */ + +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (fence == NULL) + return NULL; + + /* This reference gets released in amdkfd_fence_release */ + mmgrab(mm); + fence->mm = mm; + get_task_comm(fence->timeline_name, current); + spin_lock_init(&fence->lock); + + dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, + context, atomic_inc_return(&fence_seq)); + + return fence; +} + +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence; + + if (!f) + return NULL; + + fence = container_of(f, struct amdgpu_amdkfd_fence, base); + if (fence && f->ops == &amdkfd_fence_ops) + return fence; + + return NULL; +} + +static const char *amdkfd_fence_get_driver_name(struct dma_fence *f) +{ + return "amdgpu_amdkfd_fence"; +} + +static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + return fence->timeline_name; +} + +/** + * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict + * a KFD BO and schedules a job to move the BO. + * If fence is already signaled return true. + * If fence is not signaled schedule a evict KFD process work item. + */ +static bool amdkfd_fence_enable_signaling(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + + if (dma_fence_is_signaled(f)) + return true; + + if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f)) + return true; + + return false; +} + +/** + * amdkfd_fence_release - callback that fence can be freed + * + * @fence: fence + * + * This function is called when the reference count becomes zero. + * Drops the mm_struct reference and RCU schedules freeing up the fence. + */ +static void amdkfd_fence_release(struct dma_fence *f) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + /* Unconditionally signal the fence. The process is getting + * terminated. + */ + if (WARN_ON(!fence)) + return; /* Not an amdgpu_amdkfd_fence */ + + mmdrop(fence->mm); + kfree_rcu(f, rcu); +} + +/** + * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f + * if same return TRUE else return FALSE. + * + * @f: [IN] fence + * @mm: [IN] mm that needs to be verified + */ +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f); + + if (!fence) + return false; + else if (fence->mm == mm) + return true; + + return false; +} + +static const struct dma_fence_ops amdkfd_fence_ops = { + .get_driver_name = amdkfd_fence_get_driver_name, + .get_timeline_name = amdkfd_fence_get_timeline_name, + .enable_signaling = amdkfd_fence_enable_signaling, + .signaled = NULL, + .wait = dma_fence_default_wait, + .release = amdkfd_fence_release, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 06436c3ebd2bf..1a5911882657b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -36,8 +36,9 @@ #define AMDGPU_MAX_UVD_ENC_RINGS 2 /* some special values for the owner field */ -#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul) -#define AMDGPU_FENCE_OWNER_VM ((void*)1ul) +#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul) +#define AMDGPU_FENCE_OWNER_VM ((void *)1ul) +#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul) #define AMDGPU_FENCE_FLAG_64BIT (1 << 0) #define AMDGPU_FENCE_FLAG_INT (1 << 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index df65c66dc956f..b8d3b87fd43e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -31,6 +31,7 @@ #include #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" struct amdgpu_sync_entry { struct hlist_node node; @@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, */ static void *amdgpu_sync_get_owner(struct dma_fence *f) { - struct drm_sched_fence *s_fence = to_drm_sched_fence(f); + struct drm_sched_fence *s_fence; + struct amdgpu_amdkfd_fence *kfd_fence; + + if (!f) + return AMDGPU_FENCE_OWNER_UNDEFINED; + s_fence = to_drm_sched_fence(f); if (s_fence) return s_fence->owner; + kfd_fence = to_amdgpu_amdkfd_fence(f); + if (kfd_fence) + return AMDGPU_FENCE_OWNER_KFD; + return AMDGPU_FENCE_OWNER_UNDEFINED; } @@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], reservation_object_held(resv)); + /* We only want to trigger KFD eviction fences on + * evict or move jobs. Skip KFD fences otherwise. + */ + fence_owner = amdgpu_sync_get_owner(f); + if (fence_owner == AMDGPU_FENCE_OWNER_KFD && + owner != AMDGPU_FENCE_OWNER_UNDEFINED) + continue; + if (amdgpu_sync_same_dev(adev, f)) { /* VM updates are only interesting * for other VM updates and moves. */ - fence_owner = amdgpu_sync_get_owner(f); if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) && (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) && ((owner == AMDGPU_FENCE_OWNER_VM) != diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 28c33d711bab3..5fcb3488a5959 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -46,6 +46,7 @@ #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -1171,6 +1172,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, { unsigned long num_pages = bo->mem.num_pages; struct drm_mm_node *node = bo->mem.mm_node; + struct reservation_object_list *flist; + struct dma_fence *f; + int i; + + /* If bo is a KFD BO, check if the bo belongs to the current process. + * If true, then return false as any KFD process needs all its BOs to + * be resident to run successfully + */ + flist = reservation_object_get_list(bo->resv); + if (flist) { + for (i = 0; i < flist->shared_count; ++i) { + f = rcu_dereference_protected(flist->shared[i], + reservation_object_held(bo->resv)); + if (amdkfd_fence_check_mm(f, current->mm)) + return false; + } + } switch (bo->mem.mem_type) { case TTM_PL_TT: diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 94eab54864bf7..9e352499c409b 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -30,6 +30,7 @@ #include #include +#include struct pci_dev; @@ -286,6 +287,9 @@ struct kfd2kgd_calls { * * @resume: Notifies amdkfd about a resume action done to a kgd device * + * @schedule_evict_and_restore_process: Schedules work queue that will prepare + * for safe eviction of KFD BOs that belong to the specified process. + * * This structure contains function callback pointers so the kgd driver * will notify to the amdkfd about certain status changes. * @@ -300,6 +304,8 @@ struct kgd2kfd_calls { void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); void (*suspend)(struct kfd_dev *kfd); int (*resume)(struct kfd_dev *kfd); + int (*schedule_evict_and_restore_process)(struct mm_struct *mm, + struct dma_fence *fence); }; int kgd2kfd_init(unsigned interface_version, -- GitLab From 155494dbbbf4d6d6512b8bc2dc6bc483e47e1c71 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:36 -0500 Subject: [PATCH 0016/1646] drm/amdgpu: Update kgd2kfd_shared_resources for dGPU support Add GPUVM size and DRM render node. Also add function to query the VMID mask to avoid hard-coding it in multiple places later. v2: cut off GPUVM size at the VA hole Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 20 +++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ .../gpu/drm/amd/include/kgd_kfd_interface.h | 6 ++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 141d11dfede04..3ec4bada61d7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -30,6 +30,8 @@ const struct kgd2kfd_calls *kgd2kfd; bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); +static const unsigned int compute_vmid_bitmap = 0xFF00; + int amdgpu_amdkfd_init(void) { int ret; @@ -137,9 +139,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) int last_valid_bit; if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { - .compute_vmid_bitmap = 0xFF00, + .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, - .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, + .gpuvm_size = min(adev->vm_manager.max_pfn + << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_VA_HOLE_START), + .drm_render_minor = adev->ddev->render->index }; /* this is going to have a few of the MSBs set that we need to @@ -359,3 +365,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } + +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) +{ + if (adev->kfd) { + if ((1 << vmid) & compute_vmid_bitmap) + return true; + } + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 833dc26d402f6..d1bab32622df7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -66,6 +66,8 @@ void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); + /* Shared API */ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 9e352499c409b..36c706aff2ace 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -108,6 +108,12 @@ struct kgd2kfd_shared_resources { /* Number of bytes at start of aperture reserved for KGD. */ size_t doorbell_start_offset; + + /* GPUVM address space size in bytes */ + uint64_t gpuvm_size; + + /* Minor device number of the render node */ + int drm_render_minor; }; struct tile_config { -- GitLab From 3c728d3aa1fd5c7c2461835a93ac8fad57813db6 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:37 -0500 Subject: [PATCH 0017/1646] drm/amdgpu: add amdgpu_sync_clone Cloning a sync object is useful for waiting for a sync object without locking the original structure indefinitely, blocking other threads. Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 35 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 1 + 2 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b8d3b87fd43e9..2d6f5ec77a687 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -322,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit return NULL; } +/** + * amdgpu_sync_clone - clone a sync object + * + * @source: sync object to clone + * @clone: pointer to destination sync object + * + * Adds references to all unsignaled fences in @source to @clone. Also + * removes signaled fences from @source while at it. + */ +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + struct dma_fence *f; + int i, r; + + hash_for_each_safe(source->fences, i, tmp, e, node) { + f = e->fence; + if (!dma_fence_is_signaled(f)) { + r = amdgpu_sync_fence(NULL, clone, f, e->explicit); + if (r) + return r; + } else { + hash_del(&e->node); + dma_fence_put(f); + kmem_cache_free(amdgpu_sync_slab, e); + } + } + + dma_fence_put(clone->last_vm_update); + clone->last_vm_update = dma_fence_get(source->last_vm_update); + + return 0; +} + int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) { struct amdgpu_sync_entry *e; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7aba38d5c9dff..10cf23a57f172 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit); +int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); -- GitLab From a46a2cd103a863724d668c86dca86bd0d93a19e4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:38 -0500 Subject: [PATCH 0018/1646] drm/amdgpu: Add GPUVM memory management functions for KFD v2: * Removed unused flags from struct kgd_mem * Updated some comments * Added a check to unmap_memory_from_gpu whether BO was mapped v3: add mutex_destroy in relevant places Signed-off-by: Felix Kuehling Acked-by: Oded Gabbay Signed-off-by: Oded Gabbay --- MAINTAINERS | 1 + drivers/gpu/drm/amd/amdgpu/Makefile | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 91 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 66 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 67 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1506 +++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 + .../gpu/drm/amd/include/kgd_kfd_interface.h | 77 + 11 files changed, 1819 insertions(+), 4 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c diff --git a/MAINTAINERS b/MAINTAINERS index 9e376c7f15b1b..dccae57985fee 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -767,6 +767,7 @@ F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +F: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c F: drivers/gpu/drm/amd/amdkfd/ F: drivers/gpu/drm/amd/include/cik_structs.h F: drivers/gpu/drm/amd/include/kgd_kfd_interface.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 5dd317579e77e..8522c2ea1f3e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -130,6 +130,7 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ + amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o # add cgs diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3ec4bada61d7a..5a881107a15ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -58,6 +58,7 @@ int amdgpu_amdkfd_init(void) #else ret = -ENOENT; #endif + amdgpu_amdkfd_gpuvm_init_mem_limits(); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index d1bab32622df7..05a228d60241c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,15 +28,41 @@ #include #include #include +#include +#include "amdgpu_sync.h" +#include "amdgpu_vm.h" extern const struct kgd2kfd_calls *kgd2kfd; struct amdgpu_device; +struct kfd_bo_va_list { + struct list_head bo_list; + struct amdgpu_bo_va *bo_va; + void *kgd_dev; + bool is_mapped; + uint64_t va; + uint64_t pte_flags; +}; + struct kgd_mem { + struct mutex lock; struct amdgpu_bo *bo; - uint64_t gpu_addr; - void *cpu_ptr; + struct list_head bo_va_list; + /* protected by amdkfd_process_info.lock */ + struct ttm_validate_buffer validate_list; + struct ttm_validate_buffer resv_list; + uint32_t domain; + unsigned int mapped_to_gpu_memory; + uint64_t va; + + uint32_t mapping_flags; + + struct amdkfd_process_info *process_info; + + struct amdgpu_sync sync; + + bool aql_queue; }; /* KFD Memory Eviction */ @@ -52,6 +78,41 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); +struct amdkfd_process_info { + /* List head of all VMs that belong to a KFD process */ + struct list_head vm_list_head; + /* List head for all KFD BOs that belong to a KFD process. */ + struct list_head kfd_bo_list; + /* Lock to protect kfd_bo_list */ + struct mutex lock; + + /* Number of VMs */ + unsigned int n_vms; + /* Eviction Fence */ + struct amdgpu_amdkfd_fence *eviction_fence; +}; + +/* struct amdkfd_vm - + * For Memory Eviction KGD requires a mechanism to keep track of all KFD BOs + * belonging to a KFD process. All the VMs belonging to the same process point + * to the same amdkfd_process_info. + */ +struct amdkfd_vm { + /* Keep base as the first parameter for pointer compatibility between + * amdkfd_vm and amdgpu_vm. + */ + struct amdgpu_vm base; + + /* List node in amdkfd_process_info.vm_list_head*/ + struct list_head vm_list_node; + + struct amdgpu_device *adev; + /* Points to the KFD process VM info*/ + struct amdkfd_process_info *process_info; + + uint64_t pd_phys_addr; +}; + int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); @@ -96,4 +157,30 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); valid; \ }) +/* GPUVM API */ +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef); +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags); +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size); +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, + struct dma_fence **ef); + +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); + #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1362181b10d0d..65783d1eddcac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -143,6 +143,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -199,7 +203,20 @@ static const struct kfd2kgd_calls kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -855,3 +872,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) return hdr->common.ucode_version; } +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid\n"); + return 0; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 5130eac7afddb..1b5bf1353f0c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -101,6 +101,10 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. @@ -159,7 +163,20 @@ static const struct kfd2kgd_calls kfd2kgd = { .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -816,3 +833,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) /* Only 12 bit in use*/ return hdr->common.ucode_version; } + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID\n"); + return; + } + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + unsigned int tmp; + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + + tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); + if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + break; + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return -EINVAL; + } + + WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); + RREG32(mmVM_INVALIDATE_RESPONSE); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c new file mode 100644 index 0000000000000..e0371a9967b94 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -0,0 +1,1506 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include +#include +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_amdkfd.h" + +/* Special VM and GART address alignment needed for VI pre-Fiji due to + * a HW bug. + */ +#define VI_BO_SIZE_ALIGN (0x8000) + +/* Impose limit on how much memory KFD can use */ +static struct { + uint64_t max_system_mem_limit; + int64_t system_mem_used; + spinlock_t mem_limit_lock; +} kfd_mem_limit; + +/* Struct used for amdgpu_amdkfd_bo_validate */ +struct amdgpu_vm_parser { + uint32_t domain; + bool wait; +}; + +static const char * const domain_bit_to_string[] = { + "CPU", + "GTT", + "VRAM", + "GDS", + "GWS", + "OA" +}; + +#define domain_string(domain) domain_bit_to_string[ffs(domain)-1] + + + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, + struct kgd_mem *mem) +{ + struct kfd_bo_va_list *entry; + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) + if (entry->bo_va->base.vm == avm) + return false; + + return true; +} + +/* Set memory usage limits. Current, limits are + * System (kernel) memory - 3/8th System RAM + */ +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ + struct sysinfo si; + uint64_t mem; + + si_meminfo(&si); + mem = si.totalram - si.totalhigh; + mem *= si.mem_unit; + + spin_lock_init(&kfd_mem_limit.mem_limit_lock); + kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20)); +} + +static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + int ret = 0; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) { + if (kfd_mem_limit.system_mem_used + (acc_size + size) > + kfd_mem_limit.max_system_mem_limit) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += (acc_size + size); + } +err_no_mem: + spin_unlock(&kfd_mem_limit.mem_limit_lock); + return ret; +} + +static void unreserve_system_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain) +{ + size_t acc_size; + + acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, + sizeof(struct amdgpu_bo)); + + spin_lock(&kfd_mem_limit.mem_limit_lock); + if (domain == AMDGPU_GEM_DOMAIN_GTT) + kfd_mem_limit.system_mem_used -= (acc_size + size); + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + +void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +{ + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + kfd_mem_limit.system_mem_used -= + (bo->tbo.acc_size + amdgpu_bo_size(bo)); + } + WARN_ONCE(kfd_mem_limit.system_mem_used < 0, + "kfd system memory accounting unbalanced"); + + spin_unlock(&kfd_mem_limit.mem_limit_lock); +} + + +/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's + * reservation object. + * + * @bo: [IN] Remove eviction fence(s) from this BO + * @ef: [IN] If ef is specified, then this eviction fence is removed if it + * is present in the shared list. + * @ef_list: [OUT] Returns list of eviction fences. These fences are removed + * from BO's reservation object shared list. + * @ef_count: [OUT] Number of fences in ef_list. + * + * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be + * called to restore the eviction fences and to avoid memory leak. This is + * useful for shared BOs. + * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held. + */ +static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence *ef, + struct amdgpu_amdkfd_fence ***ef_list, + unsigned int *ef_count) +{ + struct reservation_object_list *fobj; + struct reservation_object *resv; + unsigned int i = 0, j = 0, k = 0, shared_count; + unsigned int count = 0; + struct amdgpu_amdkfd_fence **fence_list; + + if (!ef && !ef_list) + return -EINVAL; + + if (ef_list) { + *ef_list = NULL; + *ef_count = 0; + } + + resv = bo->tbo.resv; + fobj = reservation_object_get_list(resv); + + if (!fobj) + return 0; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + /* Go through all the shared fences in the resevation object. If + * ef is specified and it exists in the list, remove it and reduce the + * count. If ef is not specified, then get the count of eviction fences + * present. + */ + shared_count = fobj->shared_count; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + if (ef) { + if (f->context == ef->base.context) { + dma_fence_put(f); + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } else if (to_amdgpu_amdkfd_fence(f)) + count++; + } + write_seqcount_end(&resv->seq); + preempt_enable(); + + if (ef || !count) + return 0; + + /* Alloc memory for count number of eviction fence pointers. Fill the + * ef_list array and ef_count + */ + fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *), + GFP_KERNEL); + if (!fence_list) + return -ENOMEM; + + preempt_disable(); + write_seqcount_begin(&resv->seq); + + j = 0; + for (i = 0; i < shared_count; ++i) { + struct dma_fence *f; + struct amdgpu_amdkfd_fence *efence; + + f = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + efence = to_amdgpu_amdkfd_fence(f); + if (efence) { + fence_list[k++] = efence; + fobj->shared_count--; + } else { + RCU_INIT_POINTER(fobj->shared[j++], f); + } + } + + write_seqcount_end(&resv->seq); + preempt_enable(); + + *ef_list = fence_list; + *ef_count = k; + + return 0; +} + +/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's + * reservation object. + * + * @bo: [IN] Add eviction fences to this BO + * @ef_list: [IN] List of eviction fences to be added + * @ef_count: [IN] Number of fences in ef_list. + * + * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this + * function. + */ +static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo, + struct amdgpu_amdkfd_fence **ef_list, + unsigned int ef_count) +{ + int i; + + if (!ef_list || !ef_count) + return; + + for (i = 0; i < ef_count; i++) { + amdgpu_bo_fence(bo, &ef_list[i]->base, true); + /* Re-adding the fence takes an additional reference. Drop that + * reference. + */ + dma_fence_put(&ef_list[i]->base); + } + + kfree(ef_list); +} + +static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, + bool wait) +{ + struct ttm_operation_ctx ctx = { false, false }; + int ret; + + if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm), + "Called with userptr BO")) + return -EINVAL; + + amdgpu_ttm_placement_from_domain(bo, domain); + + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + goto validate_fail; + if (wait) { + struct amdgpu_amdkfd_fence **ef_list; + unsigned int ef_count; + + ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list, + &ef_count); + if (ret) + goto validate_fail; + + ttm_bo_wait(&bo->tbo, false, false); + amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count); + } + +validate_fail: + return ret; +} + +static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo) +{ + struct amdgpu_vm_parser *p = param; + + return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait); +} + +/* vm_validate_pt_pd_bos - Validate page table and directory BOs + * + * Page directories are not updated here because huge page handling + * during page table updates can invalidate page directory entries + * again. Page directories are only updated after updating page + * tables. + */ +static int vm_validate_pt_pd_bos(struct amdkfd_vm *vm) +{ + struct amdgpu_bo *pd = vm->base.root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + struct amdgpu_vm_parser param; + uint64_t addr, flags = AMDGPU_PTE_VALID; + int ret; + + param.domain = AMDGPU_GEM_DOMAIN_VRAM; + param.wait = false; + + ret = amdgpu_vm_validate_pt_bos(adev, &vm->base, amdgpu_amdkfd_validate, + ¶m); + if (ret) { + pr_err("amdgpu: failed to validate PT BOs\n"); + return ret; + } + + ret = amdgpu_amdkfd_validate(¶m, pd); + if (ret) { + pr_err("amdgpu: failed to validate PD\n"); + return ret; + } + + addr = amdgpu_bo_gpu_offset(vm->base.root.base.bo); + amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); + vm->pd_phys_addr = addr; + + if (vm->base.use_cpu_for_update) { + ret = amdgpu_bo_kmap(pd, NULL); + if (ret) { + pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret); + return ret; + } + } + + return 0; +} + +static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, + struct dma_fence *f) +{ + int ret = amdgpu_sync_fence(adev, sync, f, false); + + /* Sync objects can't handle multiple GPUs (contexts) updating + * sync->last_vm_update. Fortunately we don't need it for + * KFD's purposes, so we can just drop that fence. + */ + if (sync->last_vm_update) { + dma_fence_put(sync->last_vm_update); + sync->last_vm_update = NULL; + } + + return ret; +} + +static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) +{ + struct amdgpu_bo *pd = vm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); + int ret; + + ret = amdgpu_vm_update_directories(adev, vm); + if (ret) + return ret; + + return sync_vm_fence(adev, sync, vm->last_update); +} + +/* add_bo_to_vm - Add a BO to a VM + * + * Everything that needs to bo done only once when a BO is first added + * to a VM. It can later be mapped and unmapped many times without + * repeating these steps. + * + * 1. Allocate and initialize BO VA entry data structure + * 2. Add BO to the VM + * 3. Determine ASIC-specific PTE flags + * 4. Alloc page tables and directories if needed + * 4a. Validate new page tables and directories + */ +static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, + struct amdgpu_vm *avm, bool is_aql, + struct kfd_bo_va_list **p_bo_va_entry) +{ + int ret; + struct kfd_bo_va_list *bo_va_entry; + struct amdkfd_vm *kvm = container_of(avm, + struct amdkfd_vm, base); + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_bo *bo = mem->bo; + uint64_t va = mem->va; + struct list_head *list_bo_va = &mem->bo_va_list; + unsigned long bo_size = bo->tbo.mem.size; + + if (!va) { + pr_err("Invalid VA when adding BO to VM\n"); + return -EINVAL; + } + + if (is_aql) + va += bo_size; + + bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL); + if (!bo_va_entry) + return -ENOMEM; + + pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va, + va + bo_size, avm); + + /* Add BO to VM internal data structures*/ + bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, avm, bo); + if (!bo_va_entry->bo_va) { + ret = -EINVAL; + pr_err("Failed to add BO object to VM. ret == %d\n", + ret); + goto err_vmadd; + } + + bo_va_entry->va = va; + bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev, + mem->mapping_flags); + bo_va_entry->kgd_dev = (void *)adev; + list_add(&bo_va_entry->bo_list, list_bo_va); + + if (p_bo_va_entry) + *p_bo_va_entry = bo_va_entry; + + /* Allocate new page tables if needed and validate + * them. Clearing of new page tables and validate need to wait + * on move fences. We don't want that to trigger the eviction + * fence, so remove it temporarily. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + + ret = amdgpu_vm_alloc_pts(adev, avm, va, amdgpu_bo_size(bo)); + if (ret) { + pr_err("Failed to allocate pts, err=%d\n", ret); + goto err_alloc_pts; + } + + ret = vm_validate_pt_pd_bos(kvm); + if (ret) { + pr_err("validate_pt_pd_bos() failed\n"); + goto err_alloc_pts; + } + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + return 0; + +err_alloc_pts: + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va); + list_del(&bo_va_entry->bo_list); +err_vmadd: + kfree(bo_va_entry); + return ret; +} + +static void remove_bo_from_vm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, unsigned long size) +{ + pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n", + entry->va, + entry->va + size, entry); + amdgpu_vm_bo_rmv(adev, entry->bo_va); + list_del(&entry->bo_list); + kfree(entry); +} + +static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, + struct amdkfd_process_info *process_info) +{ + struct ttm_validate_buffer *entry = &mem->validate_list; + struct amdgpu_bo *bo = mem->bo; + + INIT_LIST_HEAD(&entry->head); + entry->shared = true; + entry->bo = &bo->tbo; + mutex_lock(&process_info->lock); + list_add_tail(&entry->head, &process_info->kfd_bo_list); + mutex_unlock(&process_info->lock); +} + +/* Reserving a BO and its page table BOs must happen atomically to + * avoid deadlocks. Some operations update multiple VMs at once. Track + * all the reservation info in a context structure. Optionally a sync + * object can track VM updates. + */ +struct bo_vm_reservation_context { + struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ + unsigned int n_vms; /* Number of VMs reserved */ + struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ + struct ww_acquire_ctx ticket; /* Reservation ticket */ + struct list_head list, duplicates; /* BO lists */ + struct amdgpu_sync *sync; /* Pointer to sync object */ + bool reserved; /* Whether BOs are reserved */ +}; + +enum bo_vm_match { + BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */ + BO_VM_MAPPED, /* Match VMs where a BO is mapped */ + BO_VM_ALL, /* Match all VMs a BO was added to */ +}; + +/** + * reserve_bo_and_vm - reserve a BO and a VM unconditionally. + * @mem: KFD BO structure. + * @vm: the VM to reserve. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + */ +static int reserve_bo_and_vm(struct kgd_mem *mem, + struct amdgpu_vm *vm, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + int ret; + + WARN_ON(!vm); + + ctx->reserved = false; + ctx->n_vms = 1; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else { + pr_err("Failed to reserve buffers in ttm\n"); + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally + * @mem: KFD BO structure. + * @vm: the VM to reserve. If NULL, then all VMs associated with the BO + * is used. Otherwise, a single VM associated with the BO. + * @map_type: the mapping status that will be used to filter the VMs. + * @ctx: the struct that will be used in unreserve_bo_and_vms(). + * + * Returns 0 for success, negative for failure. + */ +static int reserve_bo_and_cond_vms(struct kgd_mem *mem, + struct amdgpu_vm *vm, enum bo_vm_match map_type, + struct bo_vm_reservation_context *ctx) +{ + struct amdgpu_bo *bo = mem->bo; + struct kfd_bo_va_list *entry; + unsigned int i; + int ret; + + ctx->reserved = false; + ctx->n_vms = 0; + ctx->vm_pd = NULL; + ctx->sync = &mem->sync; + + INIT_LIST_HEAD(&ctx->list); + INIT_LIST_HEAD(&ctx->duplicates); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + ctx->n_vms++; + } + + if (ctx->n_vms != 0) { + ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), + GFP_KERNEL); + if (!ctx->vm_pd) + return -ENOMEM; + } + + ctx->kfd_bo.robj = bo; + ctx->kfd_bo.priority = 0; + ctx->kfd_bo.tv.bo = &bo->tbo; + ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.user_pages = NULL; + list_add(&ctx->kfd_bo.tv.head, &ctx->list); + + i = 0; + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; + + amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, + &ctx->vm_pd[i]); + i++; + } + + ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, + false, &ctx->duplicates); + if (!ret) + ctx->reserved = true; + else + pr_err("Failed to reserve buffers in ttm.\n"); + + if (ret) { + kfree(ctx->vm_pd); + ctx->vm_pd = NULL; + } + + return ret; +} + +/** + * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context + * @ctx: Reservation context to unreserve + * @wait: Optionally wait for a sync object representing pending VM updates + * @intr: Whether the wait is interruptible + * + * Also frees any resources allocated in + * reserve_bo_and_(cond_)vm(s). Returns the status from + * amdgpu_sync_wait. + */ +static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, + bool wait, bool intr) +{ + int ret = 0; + + if (wait) + ret = amdgpu_sync_wait(ctx->sync, intr); + + if (ctx->reserved) + ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); + kfree(ctx->vm_pd); + + ctx->sync = NULL; + + ctx->reserved = false; + ctx->vm_pd = NULL; + + return ret; +} + +static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + struct amdgpu_bo_va *bo_va = entry->bo_va; + struct amdgpu_vm *vm = bo_va->base.vm; + struct amdkfd_vm *kvm = container_of(vm, struct amdkfd_vm, base); + struct amdgpu_bo *pd = vm->root.base.bo; + + /* Remove eviction fence from PD (and thereby from PTs too as + * they share the resv. object). Otherwise during PT update + * job (see amdgpu_vm_bo_update_mapping), eviction fence would + * get added to job->sync object and job execution would + * trigger the eviction fence. + */ + amdgpu_amdkfd_remove_eviction_fence(pd, + kvm->process_info->eviction_fence, + NULL, NULL); + amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + + amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + + /* Add the eviction fence back */ + amdgpu_bo_fence(pd, &kvm->process_info->eviction_fence->base, true); + + sync_vm_fence(adev, sync, bo_va->last_pt_update); + + return 0; +} + +static int update_gpuvm_pte(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, + struct amdgpu_sync *sync) +{ + int ret; + struct amdgpu_vm *vm; + struct amdgpu_bo_va *bo_va; + struct amdgpu_bo *bo; + + bo_va = entry->bo_va; + vm = bo_va->base.vm; + bo = bo_va->base.bo; + + /* Update the page tables */ + ret = amdgpu_vm_bo_update(adev, bo_va, false); + if (ret) { + pr_err("amdgpu_vm_bo_update failed\n"); + return ret; + } + + return sync_vm_fence(adev, sync, bo_va->last_pt_update); +} + +static int map_bo_to_gpuvm(struct amdgpu_device *adev, + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) +{ + int ret; + + /* Set virtual address for the allocation */ + ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0, + amdgpu_bo_size(entry->bo_va->base.bo), + entry->pte_flags); + if (ret) { + pr_err("Failed to map VA 0x%llx in vm. ret %d\n", + entry->va, ret); + return ret; + } + + ret = update_gpuvm_pte(adev, entry, sync); + if (ret) { + pr_err("update_gpuvm_pte() failed\n"); + goto update_gpuvm_pte_failed; + } + + return 0; + +update_gpuvm_pte_failed: + unmap_bo_from_gpuvm(adev, entry, sync); + return ret; +} + +static int process_validate_vms(struct amdkfd_process_info *process_info) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_validate_pt_pd_bos(peer_vm); + if (ret) + return ret; + } + + return 0; +} + +static int process_update_pds(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdkfd_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = vm_update_pds(&peer_vm->base, sync); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, + void **process_info, + struct dma_fence **ef) +{ + int ret; + struct amdkfd_vm *new_vm; + struct amdkfd_process_info *info; + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL); + if (!new_vm) + return -ENOMEM; + + /* Initialize the VM context, allocate the page directory and zero it */ + ret = amdgpu_vm_init(adev, &new_vm->base, AMDGPU_VM_CONTEXT_COMPUTE, 0); + if (ret) { + pr_err("Failed init vm ret %d\n", ret); + goto vm_init_fail; + } + new_vm->adev = adev; + + if (!*process_info) { + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto alloc_process_info_fail; + } + + mutex_init(&info->lock); + INIT_LIST_HEAD(&info->vm_list_head); + INIT_LIST_HEAD(&info->kfd_bo_list); + + info->eviction_fence = + amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), + current->mm); + if (!info->eviction_fence) { + pr_err("Failed to create eviction fence\n"); + goto create_evict_fence_fail; + } + + *process_info = info; + *ef = dma_fence_get(&info->eviction_fence->base); + } + + new_vm->process_info = *process_info; + + mutex_lock(&new_vm->process_info->lock); + list_add_tail(&new_vm->vm_list_node, + &(new_vm->process_info->vm_list_head)); + new_vm->process_info->n_vms++; + mutex_unlock(&new_vm->process_info->lock); + + *vm = (void *) new_vm; + + pr_debug("Created process vm %p\n", *vm); + + return ret; + +create_evict_fence_fail: + mutex_destroy(&info->lock); + kfree(info); +alloc_process_info_fail: + amdgpu_vm_fini(adev, &new_vm->base); +vm_init_fail: + kfree(new_vm); + return ret; + +} + +void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *) vm; + struct amdgpu_vm *avm = &kfd_vm->base; + struct amdgpu_bo *pd; + struct amdkfd_process_info *process_info; + + if (WARN_ON(!kgd || !vm)) + return; + + pr_debug("Destroying process vm %p\n", vm); + /* Release eviction fence from PD */ + pd = avm->root.base.bo; + amdgpu_bo_reserve(pd, false); + amdgpu_bo_fence(pd, NULL, false); + amdgpu_bo_unreserve(pd); + + process_info = kfd_vm->process_info; + + mutex_lock(&process_info->lock); + process_info->n_vms--; + list_del(&kfd_vm->vm_list_node); + mutex_unlock(&process_info->lock); + + /* Release per-process resources */ + if (!process_info->n_vms) { + WARN_ON(!list_empty(&process_info->kfd_bo_list)); + + dma_fence_put(&process_info->eviction_fence->base); + mutex_destroy(&process_info->lock); + kfree(process_info); + } + + /* Release the VM context */ + amdgpu_vm_fini(adev, avm); + kfree(vm); +} + +uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +{ + struct amdkfd_vm *avm = (struct amdkfd_vm *)vm; + + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; +} + +int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( + struct kgd_dev *kgd, uint64_t va, uint64_t size, + void *vm, struct kgd_mem **mem, + uint64_t *offset, uint32_t flags) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + struct amdgpu_bo *bo; + int byte_align; + u32 alloc_domain; + u64 alloc_flags; + uint32_t mapping_flags; + int ret; + + /* + * Check on which domain to allocate BO + */ + if (flags & ALLOC_MEM_FLAGS_VRAM) { + alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : + AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + } else if (flags & ALLOC_MEM_FLAGS_GTT) { + alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else { + return -EINVAL; + } + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); + + /* Workaround for AQL queue wraparound bug. Map the same + * memory twice. That means we only actually allocate half + * the memory. + */ + if ((*mem)->aql_queue) + size = size >> 1; + + /* Workaround for TLB bug on older VI chips */ + byte_align = (adev->family == AMDGPU_FAMILY_VI && + adev->asic_type != CHIP_FIJI && + adev->asic_type != CHIP_POLARIS10 && + adev->asic_type != CHIP_POLARIS11) ? + VI_BO_SIZE_ALIGN : 1; + + mapping_flags = AMDGPU_VM_PAGE_READABLE; + if (flags & ALLOC_MEM_FLAGS_WRITABLE) + mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; + if (flags & ALLOC_MEM_FLAGS_EXECUTABLE) + mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; + if (flags & ALLOC_MEM_FLAGS_COHERENT) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + (*mem)->mapping_flags = mapping_flags; + + amdgpu_sync_create(&(*mem)->sync); + + ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + if (ret) { + pr_debug("Insufficient system memory\n"); + goto err_reserve_system_mem; + } + + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", + va, size, domain_string(alloc_domain)); + + ret = amdgpu_bo_create(adev, size, byte_align, false, + alloc_domain, alloc_flags, NULL, NULL, &bo); + if (ret) { + pr_debug("Failed to create BO on domain %s. ret %d\n", + domain_string(alloc_domain), ret); + goto err_bo_create; + } + bo->kfd_bo = *mem; + (*mem)->bo = bo; + + (*mem)->va = va; + (*mem)->domain = alloc_domain; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = kfd_vm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, kfd_vm->process_info); + + if (offset) + *offset = amdgpu_bo_mmap_offset(bo); + + return 0; + +err_bo_create: + unreserve_system_mem_limit(adev, size, alloc_domain); +err_reserve_system_mem: + mutex_destroy(&(*mem)->lock); + kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem) +{ + struct amdkfd_process_info *process_info = mem->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry, *tmp; + struct bo_vm_reservation_context ctx; + struct ttm_validate_buffer *bo_list_entry; + int ret; + + mutex_lock(&mem->lock); + + if (mem->mapped_to_gpu_memory > 0) { + pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n", + mem->va, bo_size); + mutex_unlock(&mem->lock); + return -EBUSY; + } + + mutex_unlock(&mem->lock); + /* lock is not needed after this, since mem is unused and will + * be freed anyway + */ + + /* Make sure restore workers don't access the BO any more */ + bo_list_entry = &mem->validate_list; + mutex_lock(&process_info->lock); + list_del(&bo_list_entry->head); + mutex_unlock(&process_info->lock); + + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); + if (unlikely(ret)) + return ret; + + /* The eviction fence should be removed by the last unmap. + * TODO: Log an error condition if the bo still has the eviction fence + * attached + */ + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, + mem->va + bo_size * (1 + mem->aql_queue)); + + /* Remove from VM internal data structures */ + list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list) + remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev, + entry, bo_size); + + ret = unreserve_bo_and_vms(&ctx, false, false); + + /* Free the sync object */ + amdgpu_sync_free(&mem->sync); + + /* Free the BO*/ + amdgpu_bo_unref(&mem->bo); + mutex_destroy(&mem->lock); + kfree(mem); + + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_vm *kfd_vm = (struct amdkfd_vm *)vm; + int ret; + struct amdgpu_bo *bo; + uint32_t domain; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + struct kfd_bo_va_list *bo_va_entry = NULL; + struct kfd_bo_va_list *bo_va_entry_aql = NULL; + unsigned long bo_size; + + /* Make sure restore is not running concurrently. + */ + mutex_lock(&mem->process_info->lock); + + mutex_lock(&mem->lock); + + bo = mem->bo; + + if (!bo) { + pr_err("Invalid BO when mapping memory to GPU\n"); + ret = -EINVAL; + goto out; + } + + domain = mem->domain; + bo_size = bo->tbo.mem.size; + + pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm, domain_string(domain)); + + ret = reserve_bo_and_vm(mem, vm, &ctx); + if (unlikely(ret)) + goto out; + + if (check_if_add_bo_to_vm((struct amdgpu_vm *)vm, mem)) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, false, + &bo_va_entry); + if (ret) + goto add_bo_to_vm_failed; + if (mem->aql_queue) { + ret = add_bo_to_vm(adev, mem, (struct amdgpu_vm *)vm, + true, &bo_va_entry_aql); + if (ret) + goto add_bo_to_vm_failed_aql; + } + } else { + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto add_bo_to_vm_failed; + } + + if (mem->mapped_to_gpu_memory == 0) { + /* Validate BO only once. The eviction fence gets added to BO + * the first time it is mapped. Validate will wait for all + * background evictions to complete. + */ + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) { + pr_debug("Validate failed\n"); + goto map_bo_to_gpuvm_failed; + } + } + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && !entry->is_mapped) { + pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", + entry->va, entry->va + bo_size, + entry); + + ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + if (ret) { + pr_err("Failed to map radeon bo to gpuvm\n"); + goto map_bo_to_gpuvm_failed; + } + + ret = vm_update_pds(vm, ctx.sync); + if (ret) { + pr_err("Failed to update page directories\n"); + goto map_bo_to_gpuvm_failed; + } + + entry->is_mapped = true; + mem->mapped_to_gpu_memory++; + pr_debug("\t INC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count) + amdgpu_bo_fence(bo, + &kfd_vm->process_info->eviction_fence->base, + true); + ret = unreserve_bo_and_vms(&ctx, false, false); + + goto out; + +map_bo_to_gpuvm_failed: + if (bo_va_entry_aql) + remove_bo_from_vm(adev, bo_va_entry_aql, bo_size); +add_bo_to_vm_failed_aql: + if (bo_va_entry) + remove_bo_from_vm(adev, bo_va_entry, bo_size); +add_bo_to_vm_failed: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->process_info->lock); + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( + struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdkfd_process_info *process_info = + ((struct amdkfd_vm *)vm)->process_info; + unsigned long bo_size = mem->bo->tbo.mem.size; + struct kfd_bo_va_list *entry; + struct bo_vm_reservation_context ctx; + int ret; + + mutex_lock(&mem->lock); + + ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + if (unlikely(ret)) + goto out; + /* If no VMs were reserved, it means the BO wasn't actually mapped */ + if (ctx.n_vms == 0) { + ret = -EINVAL; + goto unreserve_out; + } + + ret = vm_validate_pt_pd_bos((struct amdkfd_vm *)vm); + if (unlikely(ret)) + goto unreserve_out; + + pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", + mem->va, + mem->va + bo_size * (1 + mem->aql_queue), + vm); + + list_for_each_entry(entry, &mem->bo_va_list, bo_list) { + if (entry->bo_va->base.vm == vm && entry->is_mapped) { + pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", + entry->va, + entry->va + bo_size, + entry); + + ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync); + if (ret == 0) { + entry->is_mapped = false; + } else { + pr_err("failed to unmap VA 0x%llx\n", + mem->va); + goto unreserve_out; + } + + mem->mapped_to_gpu_memory--; + pr_debug("\t DEC mapping count %d\n", + mem->mapped_to_gpu_memory); + } + } + + /* If BO is unmapped from all VMs, unfence it. It can be evicted if + * required. + */ + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count) + amdgpu_amdkfd_remove_eviction_fence(mem->bo, + process_info->eviction_fence, + NULL, NULL); + +unreserve_out: + unreserve_bo_and_vms(&ctx, false, false); +out: + mutex_unlock(&mem->lock); + return ret; +} + +int amdgpu_amdkfd_gpuvm_sync_memory( + struct kgd_dev *kgd, struct kgd_mem *mem, bool intr) +{ + struct amdgpu_sync sync; + int ret; + + amdgpu_sync_create(&sync); + + mutex_lock(&mem->lock); + amdgpu_sync_clone(&mem->sync, &sync); + mutex_unlock(&mem->lock); + + ret = amdgpu_sync_wait(&sync, intr); + amdgpu_sync_free(&sync); + return ret; +} + +int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, + struct kgd_mem *mem, void **kptr, uint64_t *size) +{ + int ret; + struct amdgpu_bo *bo = mem->bo; + + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + pr_err("userptr can't be mapped to kernel\n"); + return -EINVAL; + } + + /* delete kgd_mem from kfd_bo_list to avoid re-validating + * this BO in BO's restoring after eviction. + */ + mutex_lock(&mem->process_info->lock); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("Failed to reserve bo. ret %d\n", ret); + goto bo_reserve_failed; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL); + if (ret) { + pr_err("Failed to pin bo. ret %d\n", ret); + goto pin_failed; + } + + ret = amdgpu_bo_kmap(bo, kptr); + if (ret) { + pr_err("Failed to map bo to kernel. ret %d\n", ret); + goto kmap_failed; + } + + amdgpu_amdkfd_remove_eviction_fence( + bo, mem->process_info->eviction_fence, NULL, NULL); + list_del_init(&mem->validate_list.head); + + if (size) + *size = amdgpu_bo_size(bo); + + amdgpu_bo_unreserve(bo); + + mutex_unlock(&mem->process_info->lock); + return 0; + +kmap_failed: + amdgpu_bo_unpin(bo); +pin_failed: + amdgpu_bo_unreserve(bo); +bo_reserve_failed: + mutex_unlock(&mem->process_info->lock); + + return ret; +} + +/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given + * KFD process identified by process_info + * + * @process_info: amdkfd_process_info of the KFD process + * + * After memory eviction, restore thread calls this function. The function + * should be called when the Process is still valid. BO restore involves - + * + * 1. Release old eviction fence and create new one + * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list. + * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of + * BOs that need to be reserved. + * 4. Reserve all the BOs + * 5. Validate of PD and PT BOs. + * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence + * 7. Add fence to all PD and PT BOs. + * 8. Unreserve all BOs + */ +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +{ + struct amdgpu_bo_list_entry *pd_bo_list; + struct amdkfd_process_info *process_info = info; + struct amdkfd_vm *peer_vm; + struct kgd_mem *mem; + struct bo_vm_reservation_context ctx; + struct amdgpu_amdkfd_fence *new_fence; + int ret = 0, i; + struct list_head duplicate_save; + struct amdgpu_sync sync_obj; + + INIT_LIST_HEAD(&duplicate_save); + INIT_LIST_HEAD(&ctx.list); + INIT_LIST_HEAD(&ctx.duplicates); + + pd_bo_list = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list) + return -ENOMEM; + + i = 0; + mutex_lock(&process_info->lock); + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(&peer_vm->base, &ctx.list, + &pd_bo_list[i++]); + + /* Reserve all BOs and page tables/directory. Add all BOs from + * kfd_bo_list to ctx.list + */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + list_add_tail(&mem->resv_list.head, &ctx.list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, + false, &duplicate_save); + if (ret) { + pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); + goto ttm_reserve_fail; + } + + amdgpu_sync_create(&sync_obj); + + /* Validate PDs and PTs */ + ret = process_validate_vms(process_info); + if (ret) + goto validate_map_fail; + + /* Wait for PD/PTs validate to finish */ + /* FIXME: I think this isn't needed */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + ttm_bo_wait(&bo->tbo, false, false); + } + + /* Validate BOs and map them to GPUVM (update VM page tables). */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) { + + struct amdgpu_bo *bo = mem->bo; + uint32_t domain = mem->domain; + struct kfd_bo_va_list *bo_va_entry; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); + if (ret) { + pr_debug("Memory eviction: Validate BOs failed. Try again\n"); + goto validate_map_fail; + } + + list_for_each_entry(bo_va_entry, &mem->bo_va_list, + bo_list) { + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, + &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PTE failed. Try again\n"); + goto validate_map_fail; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: update PDs failed. Try again\n"); + goto validate_map_fail; + } + + amdgpu_sync_wait(&sync_obj, false); + + /* Release old eviction fence and create new one, because fence only + * goes from unsignaled to signaled, fence cannot be reused. + * Use context and mm from the old fence. + */ + new_fence = amdgpu_amdkfd_fence_create( + process_info->eviction_fence->base.context, + process_info->eviction_fence->mm); + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + *ef = dma_fence_get(&new_fence->base); + + /* Wait for validate to finish and attach new eviction fence */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + ttm_bo_wait(&mem->bo->tbo, false, false); + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list.head) + amdgpu_bo_fence(mem->bo, + &process_info->eviction_fence->base, true); + + /* Attach eviction fence to PD / PT BOs */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *bo = peer_vm->base.root.base.bo; + + amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true); + } + +validate_map_fail: + ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); + amdgpu_sync_free(&sync_obj); +ttm_reserve_fail: + mutex_unlock(&process_info->lock); + kfree(pd_bo_list); + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 216799ccb545b..9157745fce149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -36,6 +36,7 @@ #include #include "amdgpu.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" static bool amdgpu_need_backup(struct amdgpu_device *adev) { @@ -54,6 +55,9 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); + if (bo->kfd_bo) + amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_bo_kunmap(bo); drm_gem_object_release(&bo->gem_base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 1cef944ef98d3..d4dbfe1f842ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -92,6 +92,8 @@ struct amdgpu_bo { struct list_head mn_list; struct list_head shadow_list; }; + + struct kgd_mem *kfd_bo; }; static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5fcb3488a5959..c2fae04d769ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -259,6 +259,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + /* + * Don't verify access for KFD BOs. They don't have a GEM + * object associated with them. + */ + if (abo->kfd_bo) + return 0; + if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->gem_base.vma_node, diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 36c706aff2ace..5984fec78be4a 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -127,6 +127,25 @@ struct tile_config { uint32_t num_ranks; }; + +/* + * Allocation flag domains + */ +#define ALLOC_MEM_FLAGS_VRAM (1 << 0) +#define ALLOC_MEM_FLAGS_GTT (1 << 1) +#define ALLOC_MEM_FLAGS_USERPTR (1 << 2) /* TODO */ +#define ALLOC_MEM_FLAGS_DOORBELL (1 << 3) /* TODO */ + +/* + * Allocation flags attributes/access options. + */ +#define ALLOC_MEM_FLAGS_WRITABLE (1 << 31) +#define ALLOC_MEM_FLAGS_EXECUTABLE (1 << 30) +#define ALLOC_MEM_FLAGS_PUBLIC (1 << 29) +#define ALLOC_MEM_FLAGS_NO_SUBSTITUTE (1 << 28) /* TODO */ +#define ALLOC_MEM_FLAGS_AQL_QUEUE_MEM (1 << 27) +#define ALLOC_MEM_FLAGS_COHERENT (1 << 26) /* For GFXv9 or later */ + /** * struct kfd2kgd_calls * @@ -186,6 +205,41 @@ struct tile_config { * * @get_vram_usage: Returns current VRAM usage * + * @create_process_vm: Create a VM address space for a given process and GPU + * + * @destroy_process_vm: Destroy a VM + * + * @get_process_page_dir: Get physical address of a VM page directory + * + * @set_vm_context_page_table_base: Program page table base for a VMID + * + * @alloc_memory_of_gpu: Allocate GPUVM memory + * + * @free_memory_of_gpu: Free GPUVM memory + * + * @map_memory_to_gpu: Map GPUVM memory into a specific VM address + * space. Allocates and updates page tables and page directories as + * needed. This function may return before all page table updates have + * completed. This allows multiple map operations (on multiple GPUs) + * to happen concurrently. Use sync_memory to synchronize with all + * pending updates. + * + * @unmap_memor_to_gpu: Unmap GPUVM memory from a specific VM address space + * + * @sync_memory: Wait for pending page table updates to complete + * + * @map_gtt_bo_to_kernel: Map a GTT BO for kernel access + * Pins the BO, maps it to kernel address space. Such BOs are never evicted. + * The kernel virtual address remains valid until the BO is freed. + * + * @restore_process_bos: Restore all BOs that belong to the + * process. This is intended for restoring memory mappings after a TTM + * eviction. + * + * @invalidate_tlbs: Invalidate TLBs for a specific PASID + * + * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -275,6 +329,29 @@ struct kfd2kgd_calls { void (*get_cu_info)(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); uint64_t (*get_vram_usage)(struct kgd_dev *kgd); + + int (*create_process_vm)(struct kgd_dev *kgd, void **vm, + void **process_info, struct dma_fence **ef); + void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm); + uint32_t (*get_process_page_dir)(void *vm); + void (*set_vm_context_page_table_base)(struct kgd_dev *kgd, + uint32_t vmid, uint32_t page_table_base); + int (*alloc_memory_of_gpu)(struct kgd_dev *kgd, uint64_t va, + uint64_t size, void *vm, + struct kgd_mem **mem, uint64_t *offset, + uint32_t flags); + int (*free_memory_of_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem); + int (*map_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); + int (*unmap_memory_to_gpu)(struct kgd_dev *kgd, struct kgd_mem *mem, + void *vm); + int (*sync_memory)(struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); + int (*map_gtt_bo_to_kernel)(struct kgd_dev *kgd, struct kgd_mem *mem, + void **kptr, uint64_t *size); + int (*restore_process_bos)(void *process_info, struct dma_fence **ef); + + int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); + int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); }; /** -- GitLab From 4c660c8fbbf7b0d65a402f51e5f30a246cf3c44c Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 6 Feb 2018 20:32:39 -0500 Subject: [PATCH 0019/1646] drm/amdgpu: Add submit IB function for KFD This can be used for flushing caches when not using the HWS. Signed-off-by: Felix Kuehling Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 55 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + .../gpu/drm/amd/include/kgd_kfd_interface.h | 8 +++ 5 files changed, 69 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5a881107a15ad..8a23aa8f9c732 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -367,6 +367,61 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct amdgpu_ring *ring; + struct dma_fence *f = NULL; + int ret; + + switch (engine) { + case KGD_ENGINE_MEC1: + ring = &adev->gfx.compute_ring[0]; + break; + case KGD_ENGINE_SDMA1: + ring = &adev->sdma.instance[0].ring; + break; + case KGD_ENGINE_SDMA2: + ring = &adev->sdma.instance[1].ring; + break; + default: + pr_err("Invalid engine in IB submission: %d\n", engine); + ret = -EINVAL; + goto err; + } + + ret = amdgpu_job_alloc(adev, 1, &job, NULL); + if (ret) + goto err; + + ib = &job->ibs[0]; + memset(ib, 0, sizeof(struct amdgpu_ib)); + + ib->gpu_addr = gpu_addr; + ib->ptr = ib_cmd; + ib->length_dw = ib_len; + /* This works for NO_HWS. TODO: need to handle without knowing VMID */ + job->vmid = vmid; + + ret = amdgpu_ib_schedule(ring, 1, ib, job, &f); + if (ret) { + DRM_ERROR("amdgpu: failed to schedule IB.\n"); + goto err_ib_sched; + } + + ret = dma_fence_wait(f, false); + +err_ib_sched: + dma_fence_put(f); + amdgpu_job_free(job); +err: + return ret; +} + bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { if (adev->kfd) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 05a228d60241c..d7509b706b26b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -124,6 +124,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); + struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 65783d1eddcac..7485c376b90ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -217,6 +217,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 1b5bf1353f0c4..7be453494423c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -177,6 +177,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 5984fec78be4a..1e5c22ceb2568 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -240,6 +240,10 @@ struct tile_config { * * @invalidate_tlbs_vmid: Invalidate TLBs for a specific VMID * + * @submit_ib: Submits an IB to the engine specified by inserting the + * IB to the corresponding ring (ring type). The IB is executed with the + * specified VMID in a user mode context. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -352,6 +356,10 @@ struct kfd2kgd_calls { int (*invalidate_tlbs)(struct kgd_dev *kgd, uint16_t pasid); int (*invalidate_tlbs_vmid)(struct kgd_dev *kgd, uint16_t vmid); + + int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, + uint32_t vmid, uint64_t gpu_addr, + uint32_t *ib_cmd, uint32_t ib_len); }; /** -- GitLab From 64d1c3a43a6fb5cef32a085bc17cbbe31945a651 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Fri, 8 Dec 2017 19:22:12 -0500 Subject: [PATCH 0020/1646] drm/amdkfd: Centralize IOMMUv2 code and make it conditional dGPUs work without IOMMUv2. Make IOMMUv2 initialization dependent on ASIC information. Also allow building KFD without IOMMUv2 support. This is still useful for dGPUs and prepares for enabling KFD on architectures that don't support AMD IOMMUv2. v2: * Centralize IOMMUv2 code to avoid #ifdefs in too many places v3: * Imply AMD_IOMMU_V2 in Kconfig Signed-off-by: Felix Kuehling Acked-by: Christian Konig Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/Kconfig | 3 +- drivers/gpu/drm/amd/amdkfd/Makefile | 4 + drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 14 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 127 ++------ drivers/gpu/drm/amd/amdkfd/kfd_events.c | 3 + drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 357 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_iommu.h | 78 +++++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 14 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 138 +-------- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 16 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 +- 11 files changed, 495 insertions(+), 265 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_iommu.h diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index bc5a2945bd2b9..ed2f06c9f3462 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -4,6 +4,7 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 + depends on DRM_AMDGPU && X86_64 + imply AMD_IOMMU_V2 help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a317e76ffb5e4..0d0242240c478 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o +ifneq ($(CONFIG_AMD_IOMMU_V2),) +amdkfd-y += kfd_iommu.o +endif + amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2bc2816767a7b..7493f47e7fe19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -22,10 +22,10 @@ #include #include -#include #include "kfd_crat.h" #include "kfd_priv.h" #include "kfd_topology.h" +#include "kfd_iommu.h" /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. @@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, struct crat_subtype_generic *sub_type_hdr; struct crat_subtype_computeunit *cu; struct kfd_cu_info cu_info; - struct amd_iommu_device_info iommu_info; int avail_size = *size; uint32_t total_num_of_cu; int num_of_cache_entries = 0; int cache_mem_filled = 0; int ret = 0; - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; struct kfd_local_mem_info local_mem_info; if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) @@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, /* Check if this node supports IOMMU. During parsing this flag will * translate to HSA_CAP_ATS_PRESENT */ - iommu_info.flags = 0; - if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { - if ((iommu_info.flags & required_iommu_flags) == - required_iommu_flags) - cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; - } + if (!kfd_iommu_check_device(kdev)) + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; crat_table->length += sub_type_hdr->length; crat_table->total_entries++; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 83d6f410890e9..4ac2d61b65d55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,7 +20,9 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) #include +#endif #include #include #include @@ -28,9 +30,11 @@ #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" #include "cwsr_trap_handler_gfx8.asm" +#include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 +#ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, .max_pasid_bits = 16, @@ -41,6 +45,7 @@ static const struct kfd_device_info kaveri_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = true, .needs_pci_atomics = false, }; @@ -54,8 +59,10 @@ static const struct kfd_device_info carrizo_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = true, .needs_pci_atomics = false, }; +#endif static const struct kfd_device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, @@ -67,6 +74,7 @@ static const struct kfd_device_info hawaii_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -79,6 +87,7 @@ static const struct kfd_device_info tonga_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -91,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = false, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -103,6 +113,7 @@ static const struct kfd_device_info fiji_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -115,6 +126,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -128,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -140,6 +153,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = false, }; @@ -152,6 +166,7 @@ static const struct kfd_device_info polaris11_device_info = { .num_of_watch_points = 4, .mqd_size_aligned = MQD_SIZE_ALIGNED, .supports_cwsr = true, + .needs_iommu_device = false, .needs_pci_atomics = true, }; @@ -162,6 +177,7 @@ struct kfd_deviceid { }; static const struct kfd_deviceid supported_devices[] = { +#ifdef KFD_SUPPORT_IOMMU_V2 { 0x1304, &kaveri_device_info }, /* Kaveri */ { 0x1305, &kaveri_device_info }, /* Kaveri */ { 0x1306, &kaveri_device_info }, /* Kaveri */ @@ -189,6 +205,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x9875, &carrizo_device_info }, /* Carrizo */ { 0x9876, &carrizo_device_info }, /* Carrizo */ { 0x9877, &carrizo_device_info }, /* Carrizo */ +#endif { 0x67A0, &hawaii_device_info }, /* Hawaii */ { 0x67A1, &hawaii_device_info }, /* Hawaii */ { 0x67A2, &hawaii_device_info }, /* Hawaii */ @@ -302,77 +319,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return kfd; } -static bool device_iommu_pasid_init(struct kfd_dev *kfd) -{ - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - - struct amd_iommu_device_info iommu_info; - unsigned int pasid_limit; - int err; - - err = amd_iommu_device_info(kfd->pdev, &iommu_info); - if (err < 0) { - dev_err(kfd_device, - "error getting iommu info. is the iommu enabled?\n"); - return false; - } - - if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) - != 0); - return false; - } - - pasid_limit = min_t(unsigned int, - (unsigned int)(1 << kfd->device_info->max_pasid_bits), - iommu_info.max_pasids); - - if (!kfd_set_pasid_limit(pasid_limit)) { - dev_err(kfd_device, "error setting pasid limit\n"); - return false; - } - - return true; -} - -static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) -{ - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); - - if (dev) - kfd_process_iommu_unbind_callback(dev, pasid); -} - -/* - * This function called by IOMMU driver on PPR failure - */ -static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, - unsigned long address, u16 flags) -{ - struct kfd_dev *dev; - - dev_warn(kfd_device, - "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", - PCI_BUS_NUM(pdev->devfn), - PCI_SLOT(pdev->devfn), - PCI_FUNC(pdev->devfn), - pasid, - address, - flags); - - dev = kfd_device_by_pci_dev(pdev); - if (!WARN_ON(!dev)) - kfd_signal_iommu_event(dev, pasid, address, - flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); - - return AMD_IOMMU_INV_PRI_RSP_INVALID; -} - static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { @@ -462,11 +408,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } - if (!device_iommu_pasid_init(kfd)) { - dev_err(kfd_device, - "Error initializing iommuv2 for device %x:%x\n", - kfd->pdev->vendor, kfd->pdev->device); - goto device_iommu_pasid_error; + if (kfd_iommu_device_init(kfd)) { + dev_err(kfd_device, "Error initializing iommuv2\n"); + goto device_iommu_error; } kfd_cwsr_init(kfd); @@ -486,7 +430,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto out; kfd_resume_error: -device_iommu_pasid_error: +device_iommu_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); @@ -527,11 +471,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) kfd->dqm->ops.stop(kfd->dqm); - kfd_unbind_processes_from_device(kfd); - - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); - amd_iommu_free_device(kfd->pdev); + kfd_iommu_suspend(kfd); } int kgd2kfd_resume(struct kfd_dev *kfd) @@ -546,19 +486,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd) static int kfd_resume(struct kfd_dev *kfd) { int err = 0; - unsigned int pasid_limit = kfd_get_pasid_limit(); - - err = amd_iommu_init_device(kfd->pdev, pasid_limit); - if (err) - return -ENXIO; - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, - iommu_pasid_shutdown_callback); - amd_iommu_set_invalid_ppr_cb(kfd->pdev, - iommu_invalid_ppr_cb); - err = kfd_bind_processes_to_device(kfd); - if (err) - goto processes_bind_error; + err = kfd_iommu_resume(kfd); + if (err) { + dev_err(kfd_device, + "Failed to resume IOMMU for device %x:%x\n", + kfd->pdev->vendor, kfd->pdev->device); + return err; + } err = kfd->dqm->ops.start(kfd->dqm); if (err) { @@ -571,9 +506,7 @@ static int kfd_resume(struct kfd_dev *kfd) return err; dqm_start_error: -processes_bind_error: - amd_iommu_free_device(kfd->pdev); - + kfd_iommu_suspend(kfd); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 93aae5c1e78bf..6fb9c0d46d631 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -30,6 +30,7 @@ #include #include "kfd_priv.h" #include "kfd_events.h" +#include "kfd_iommu.h" #include /* @@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, } } +#ifdef KFD_SUPPORT_IOMMU_V2 void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, unsigned long address, bool is_write_requested, bool is_execute_requested) @@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, mutex_unlock(&p->event_mutex); kfd_unref_process(p); } +#endif /* KFD_SUPPORT_IOMMU_V2 */ void kfd_signal_hw_exception_event(unsigned int pasid) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c new file mode 100644 index 0000000000000..c71817963eea6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -0,0 +1,357 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_dbgmgr.h" +#include "kfd_topology.h" +#include "kfd_iommu.h" + +static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + +/** kfd_iommu_check_device - Check whether IOMMU is available for device + */ +int kfd_iommu_check_device(struct kfd_dev *kfd) +{ + struct amd_iommu_device_info iommu_info; + int err; + + if (!kfd->device_info->needs_iommu_device) + return -ENODEV; + + iommu_info.flags = 0; + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err) + return err; + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) + return -ENODEV; + + return 0; +} + +/** kfd_iommu_device_init - Initialize IOMMU for device + */ +int kfd_iommu_device_init(struct kfd_dev *kfd) +{ + struct amd_iommu_device_info iommu_info; + unsigned int pasid_limit; + int err; + + if (!kfd->device_info->needs_iommu_device) + return 0; + + iommu_info.flags = 0; + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err < 0) { + dev_err(kfd_device, + "error getting iommu info. is the iommu enabled?\n"); + return -ENODEV; + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { + dev_err(kfd_device, + "error required iommu flags ats %i, pri %i, pasid %i\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) + != 0); + return -ENODEV; + } + + pasid_limit = min_t(unsigned int, + (unsigned int)(1 << kfd->device_info->max_pasid_bits), + iommu_info.max_pasids); + + if (!kfd_set_pasid_limit(pasid_limit)) { + dev_err(kfd_device, "error setting pasid limit\n"); + return -EBUSY; + } + + return 0; +} + +/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process + * + * Binds the given process to the given device using its PASID. This + * enables IOMMUv2 address translation for the process on the device. + * + * This function assumes that the process mutex is held. + */ +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) +{ + struct kfd_dev *dev = pdd->dev; + struct kfd_process *p = pdd->process; + int err; + + if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND) + return 0; + + if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { + pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); + return -EINVAL; + } + + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); + if (!err) + pdd->bound = PDD_BOUND; + + return err; +} + +/** kfd_iommu_unbind_process - Unbind process from all devices + * + * This removes all IOMMU device bindings of the process. To be used + * before process termination. + */ +void kfd_iommu_unbind_process(struct kfd_process *p) +{ + struct kfd_process_device *pdd; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + if (pdd->bound == PDD_BOUND) + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); +} + +/* Callback for process shutdown invoked by the IOMMU driver */ +static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) +{ + struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); + struct kfd_process *p; + struct kfd_process_device *pdd; + + if (!dev) + return; + + /* + * Look for the process that matches the pasid. If there is no such + * process, we either released it in amdkfd's own notifier, or there + * is a bug. Unfortunately, there is no way to tell... + */ + p = kfd_lookup_process_by_pasid(pasid); + if (!p) + return; + + pr_debug("Unbinding process %d from IOMMU\n", pasid); + + mutex_lock(kfd_get_dbgmgr_mutex()); + + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { + kfd_dbgmgr_destroy(dev->dbgmgr); + dev->dbgmgr = NULL; + } + } + + mutex_unlock(kfd_get_dbgmgr_mutex()); + + mutex_lock(&p->mutex); + + pdd = kfd_get_process_device_data(dev, p); + if (pdd) + /* For GPU relying on IOMMU, we need to dequeue here + * when PASID is still bound. + */ + kfd_process_dequeue_from_device(pdd); + + mutex_unlock(&p->mutex); + + kfd_unref_process(p); +} + +/* This function called by IOMMU driver on PPR failure */ +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, + unsigned long address, u16 flags) +{ + struct kfd_dev *dev; + + dev_warn(kfd_device, + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", + PCI_BUS_NUM(pdev->devfn), + PCI_SLOT(pdev->devfn), + PCI_FUNC(pdev->devfn), + pasid, + address, + flags); + + dev = kfd_device_by_pci_dev(pdev); + if (!WARN_ON(!dev)) + kfd_signal_iommu_event(dev, pasid, address, + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); + + return AMD_IOMMU_INV_PRI_RSP_INVALID; +} + +/* + * Bind processes do the device that have been temporarily unbound + * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. + */ +static int kfd_bind_processes_to_device(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + int err = 0; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(kfd, p); + + if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { + mutex_unlock(&p->mutex); + continue; + } + + err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, + p->lead_thread); + if (err < 0) { + pr_err("Unexpected pasid %d binding failure\n", + p->pasid); + mutex_unlock(&p->mutex); + break; + } + + pdd->bound = PDD_BOUND; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return err; +} + +/* + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These + * processes will be restored to PDD_BOUND state in + * kfd_bind_processes_to_device. + */ +static void kfd_unbind_processes_from_device(struct kfd_dev *kfd) +{ + struct kfd_process_device *pdd; + struct kfd_process *p; + unsigned int temp; + + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + mutex_lock(&p->mutex); + pdd = kfd_get_process_device_data(kfd, p); + + if (WARN_ON(!pdd)) { + mutex_unlock(&p->mutex); + continue; + } + + if (pdd->bound == PDD_BOUND) + pdd->bound = PDD_BOUND_SUSPENDED; + mutex_unlock(&p->mutex); + } + + srcu_read_unlock(&kfd_processes_srcu, idx); +} + +/** kfd_iommu_suspend - Prepare IOMMU for suspend + * + * This unbinds processes from the device and disables the IOMMU for + * the device. + */ +void kfd_iommu_suspend(struct kfd_dev *kfd) +{ + if (!kfd->device_info->needs_iommu_device) + return; + + kfd_unbind_processes_from_device(kfd); + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); +} + +/** kfd_iommu_resume - Restore IOMMU after resume + * + * This reinitializes the IOMMU for the device and re-binds previously + * suspended processes to the device. + */ +int kfd_iommu_resume(struct kfd_dev *kfd) +{ + unsigned int pasid_limit; + int err; + + if (!kfd->device_info->needs_iommu_device) + return 0; + + pasid_limit = kfd_get_pasid_limit(); + + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err) + return -ENXIO; + + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, + iommu_invalid_ppr_cb); + + err = kfd_bind_processes_to_device(kfd); + if (err) { + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); + return err; + } + + return 0; +} + +extern bool amd_iommu_pc_supported(void); +extern u8 amd_iommu_pc_get_max_banks(u16 devid); +extern u8 amd_iommu_pc_get_max_counters(u16 devid); + +/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology + */ +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) +{ + struct kfd_perf_properties *props; + + if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT)) + return 0; + + if (!amd_iommu_pc_supported()) + return 0; + + props = kfd_alloc_struct(props); + if (!props) + return -ENOMEM; + strcpy(props->block_name, "iommu"); + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ + list_add_tail(&props->list, &kdev->perf_props); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h new file mode 100644 index 0000000000000..dd23d9fdf6a82 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h @@ -0,0 +1,78 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KFD_IOMMU_H__ +#define __KFD_IOMMU_H__ + +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) + +#define KFD_SUPPORT_IOMMU_V2 + +int kfd_iommu_check_device(struct kfd_dev *kfd); +int kfd_iommu_device_init(struct kfd_dev *kfd); + +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd); +void kfd_iommu_unbind_process(struct kfd_process *p); + +void kfd_iommu_suspend(struct kfd_dev *kfd); +int kfd_iommu_resume(struct kfd_dev *kfd); + +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev); + +#else + +static inline int kfd_iommu_check_device(struct kfd_dev *kfd) +{ + return -ENODEV; +} +static inline int kfd_iommu_device_init(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kfd_iommu_bind_process_to_device( + struct kfd_process_device *pdd) +{ + return 0; +} +static inline void kfd_iommu_unbind_process(struct kfd_process *p) +{ + /* empty */ +} + +static inline void kfd_iommu_suspend(struct kfd_dev *kfd) +{ + /* empty */ +} +static inline int kfd_iommu_resume(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) +{ + return 0; +} + +#endif /* defined(CONFIG_AMD_IOMMU_V2) */ + +#endif /* __KFD_IOMMU_H__ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 594f853553971..f12eb5d98be8d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -158,6 +158,7 @@ struct kfd_device_info { uint8_t num_of_watch_points; uint16_t mqd_size_aligned; bool supports_cwsr; + bool needs_iommu_device; bool needs_pci_atomics; }; @@ -517,15 +518,15 @@ struct kfd_process_device { uint64_t scratch_base; uint64_t scratch_limit; - /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ - enum kfd_pdd_bound bound; - /* Flag used to tell the pdd has dequeued from the dqm. * This is used to prevent dev->dqm->ops.process_termination() from * being called twice when it is already called in IOMMU callback * function. */ bool already_dequeued; + + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ + enum kfd_pdd_bound bound; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -590,6 +591,10 @@ struct kfd_process { bool signal_event_limit_reached; }; +#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ +extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +extern struct srcu_struct kfd_processes_srcu; + /** * Ioctl function type. * @@ -617,9 +622,6 @@ void kfd_unref_process(struct kfd_process *p); struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, struct kfd_process *p); -int kfd_bind_processes_to_device(struct kfd_dev *dev); -void kfd_unbind_processes_from_device(struct kfd_dev *dev); -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p); struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4ff5f0fe6db83..e9aee76ceba92 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,16 +35,16 @@ struct mm_struct; #include "kfd_priv.h" #include "kfd_dbgmgr.h" +#include "kfd_iommu.h" /* * List of struct kfd_process (field kfd_process). * Unique/indexed by mm_struct* */ -#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ -static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); static DEFINE_MUTEX(kfd_processes_mutex); -DEFINE_STATIC_SRCU(kfd_processes_srcu); +DEFINE_SRCU(kfd_processes_srcu); static struct workqueue_struct *kfd_process_wq; @@ -173,14 +173,8 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - struct kfd_process_device *pdd; - pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); - - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - if (pdd->bound == PDD_BOUND) - amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); - } + kfd_iommu_unbind_process(p); kfd_process_destroy_pdds(p); @@ -429,133 +423,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } - if (pdd->bound == PDD_BOUND) { - return pdd; - } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { - pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); - return ERR_PTR(-EINVAL); - } - - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); - if (err < 0) + err = kfd_iommu_bind_process_to_device(pdd); + if (err) return ERR_PTR(err); - pdd->bound = PDD_BOUND; - return pdd; } -/* - * Bind processes do the device that have been temporarily unbound - * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. - */ -int kfd_bind_processes_to_device(struct kfd_dev *dev) -{ - struct kfd_process_device *pdd; - struct kfd_process *p; - unsigned int temp; - int err = 0; - - int idx = srcu_read_lock(&kfd_processes_srcu); - - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); - - if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { - mutex_unlock(&p->mutex); - continue; - } - - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, - p->lead_thread); - if (err < 0) { - pr_err("Unexpected pasid %d binding failure\n", - p->pasid); - mutex_unlock(&p->mutex); - break; - } - - pdd->bound = PDD_BOUND; - mutex_unlock(&p->mutex); - } - - srcu_read_unlock(&kfd_processes_srcu, idx); - - return err; -} - -/* - * Mark currently bound processes as PDD_BOUND_SUSPENDED. These - * processes will be restored to PDD_BOUND state in - * kfd_bind_processes_to_device. - */ -void kfd_unbind_processes_from_device(struct kfd_dev *dev) -{ - struct kfd_process_device *pdd; - struct kfd_process *p; - unsigned int temp; - - int idx = srcu_read_lock(&kfd_processes_srcu); - - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { - mutex_lock(&p->mutex); - pdd = kfd_get_process_device_data(dev, p); - - if (WARN_ON(!pdd)) { - mutex_unlock(&p->mutex); - continue; - } - - if (pdd->bound == PDD_BOUND) - pdd->bound = PDD_BOUND_SUSPENDED; - mutex_unlock(&p->mutex); - } - - srcu_read_unlock(&kfd_processes_srcu, idx); -} - -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) -{ - struct kfd_process *p; - struct kfd_process_device *pdd; - - /* - * Look for the process that matches the pasid. If there is no such - * process, we either released it in amdkfd's own notifier, or there - * is a bug. Unfortunately, there is no way to tell... - */ - p = kfd_lookup_process_by_pasid(pasid); - if (!p) - return; - - pr_debug("Unbinding process %d from IOMMU\n", pasid); - - mutex_lock(kfd_get_dbgmgr_mutex()); - - if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { - kfd_dbgmgr_destroy(dev->dbgmgr); - dev->dbgmgr = NULL; - } - } - - mutex_unlock(kfd_get_dbgmgr_mutex()); - - mutex_lock(&p->mutex); - - pdd = kfd_get_process_device_data(dev, p); - if (pdd) - /* For GPU relying on IOMMU, we need to dequeue here - * when PASID is still bound. - */ - kfd_process_dequeue_from_device(pdd); - - mutex_unlock(&p->mutex); - - kfd_unref_process(p); -} - struct kfd_process_device *kfd_get_first_process_device_data( struct kfd_process *p) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 7783250e1c6dd..2506155355635 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -35,6 +35,7 @@ #include "kfd_crat.h" #include "kfd_topology.h" #include "kfd_device_queue_manager.h" +#include "kfd_iommu.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm, */ static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) { - struct kfd_perf_properties *props; - - if (amd_iommu_pc_supported()) { - props = kfd_alloc_struct(props); - if (!props) - return -ENOMEM; - strcpy(props->block_name, "iommu"); - props->max_concurrent = amd_iommu_pc_get_max_banks(0) * - amd_iommu_pc_get_max_counters(0); /* assume one iommu */ - list_add_tail(&props->list, &kdev->perf_props); - } - - return 0; + /* These are the only counters supported so far */ + return kfd_iommu_add_perf_counters(kdev); } /* kfd_add_non_crat_information - Add information that is not currently diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 53fca1f454014..c0be2be6dca52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,7 +25,7 @@ #include #include -#include "kfd_priv.h" +#include "kfd_crat.h" #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 @@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device( struct list_head *device_list); void kfd_release_topology_device_list(struct list_head *device_list); -extern bool amd_iommu_pc_supported(void); -extern u8 amd_iommu_pc_get_max_banks(u16 devid); -extern u8 amd_iommu_pc_get_max_counters(u16 devid); - #endif /* __KFD_TOPOLOGY_H__ */ -- GitLab From 53421c2fe99ce16838639ad89d772d914a119a49 Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Mon, 4 Dec 2017 15:22:10 -0800 Subject: [PATCH 0021/1646] drm/i915: Apply Display WA #1183 on skl, kbl, and cfl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Display WA #1183 was recently added to workaround "Failures when enabling DPLL0 with eDP link rate 2.16 or 4.32 GHz and CD clock frequency 308.57 or 617.14 MHz (CDCLK_CTL CD Frequency Select 10b or 11b) used in this enabling or in previous enabling." This workaround was designed to minimize the impact only to save the bad case with that link rates. But HW engineers indicated that it should be safe to apply broadly, although they were expecting the DPLL0 link rate to be unchanged on runtime. We need to cover 2 cases: when we are in fact enabling DPLL0 and when we are just changing the frequency with small differences. This is based on previous patch by Rodrigo Vivi with suggestions from Ville Syrjälä. Cc: Arthur J Runyan Cc: Ville Syrjälä Cc: Rodrigo Vivi Cc: stable@vger.kernel.org Signed-off-by: Lucas De Marchi Reviewed-by: Ville Syrjälä Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20171204232210.4958-1-lucas.demarchi@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_cdclk.c | 35 ++++++++++++++++++------- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 +++++++ 3 files changed, 38 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 41285bec8fc03..966e4df9700ec 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7029,6 +7029,7 @@ enum { #define RESET_PCH_HANDSHAKE_ENABLE (1<<4) #define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430) +#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30) #define MASK_WAKEMEM (1<<13) #define SKL_DFSM _MMIO(0x51000) @@ -8585,6 +8586,7 @@ enum skl_power_gate { #define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22) #define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22) #define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20) +#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19) #define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3) #define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16) #define CDCLK_FREQ_DECIMAL_MASK (0x7ff) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 9c5ceb98d48f4..d77e2bec1e29d 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -931,16 +931,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) { - int min_cdclk = skl_calc_cdclk(0, vco); u32 val; WARN_ON(vco != 8100000 && vco != 8640000); - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - /* * We always enable DPLL0 with the lowest link rate possible, but still * taking into account the VCO required to operate the eDP panel at the @@ -994,7 +988,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, { int cdclk = cdclk_state->cdclk; int vco = cdclk_state->vco; - u32 freq_select; + u32 freq_select, cdclk_ctl; int ret; mutex_lock(&dev_priv->pcu_lock); @@ -1009,7 +1003,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, return; } - /* set CDCLK_CTL */ + /* Choose frequency for this cdclk */ switch (cdclk) { default: WARN_ON(cdclk != dev_priv->cdclk.hw.ref); @@ -1036,10 +1030,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); + cdclk_ctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk.hw.vco != vco) { + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + } + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); + POSTING_READ(CDCLK_CTL); + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk); + I915_WRITE(CDCLK_CTL, cdclk_ctl); + + /* Wa Display #1183: skl,kbl,cfl */ + cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE; + I915_WRITE(CDCLK_CTL, cdclk_ctl); POSTING_READ(CDCLK_CTL); /* inform PCU of the change */ diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index db9d57f395341..d758da6156a82 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -600,6 +600,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Enabling DC5\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5); } @@ -627,6 +632,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("Disabling DC6\n"); + /* Wa Display #1183: skl,kbl,cfl */ + if (IS_GEN9_BC(dev_priv)) + I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | + SKL_SELECT_ALTERNATE_DC_EXIT); + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); } -- GitLab From c8dae55a8ced625038d52d26e48273707fab2688 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 20 Dec 2017 11:50:17 +0100 Subject: [PATCH 0022/1646] drm/i915/vlv: Add cdclk workaround for DSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least on the Chuwi Vi8 (non pro/plus) the LCD panel will show an image shifted aprox. 20% to the left (with wraparound) and sometimes also wrong colors, showing that the panel controller is starting with sampling the datastream somewhere mid-line. This happens after the first blanking and re-init of the panel. After looking at drm.debug output I noticed that initially we inherit the cdclk of 333333 KHz set by the GOP, but after the re-init we picked 266667 KHz, which turns out to be the cause of this problem, a quick hack to hard code the cdclk to 333333 KHz makes the problem go away. I've tested this on various Bay Trail devices, to make sure this not does cause regressions on other devices and the higher cdclk does not cause any problems on the following devices: -GP-electronic T701 1024x600 333333 KHz cdclk after this patch -PEAQ C1010 1920x1200 333333 KHz cdclk after this patch -PoV mobii-wintab-800w 800x1280 333333 KHz cdclk after this patch -Asus Transformer-T100TA 1368x768 320000 KHz cdclk after this patch Also interesting wrt this is the comment in vlv_calc_cdclk about the existing workaround to avoid 200 Mhz as clock because that causes issues in some cases. This commit extends the "do not use 200 Mhz" workaround with an extra check to require atleast 320000 KHz (avoiding 266667 KHz) when a DSI panel is active. Changes in v2: -Change the commit message and the code comment to not treat the GOP as a reference, the GOP should not be treated as a reference Acked-by: Ville Syrjälä Signed-off-by: Hans de Goede Link: https://patchwork.freedesktop.org/patch/msgid/20171220105017.11259-1-hdegoede@redhat.com --- drivers/gpu/drm/i915/intel_cdclk.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index d77e2bec1e29d..ca36321eafac2 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1940,6 +1940,14 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) min_cdclk = max(2 * 96000, min_cdclk); + /* + * On Valleyview some DSI panels lose (v|h)sync when the clock is lower + * than 320000KHz. + */ + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI) && + IS_VALLEYVIEW(dev_priv)) + min_cdclk = max(320000, min_cdclk); + if (min_cdclk > dev_priv->max_cdclk_freq) { DRM_DEBUG_KMS("required cdclk (%d kHz) exceeds max (%d kHz)\n", min_cdclk, dev_priv->max_cdclk_freq); -- GitLab From 35954e88bc5091ef82134e8c99c5bcc3b4c6772a Mon Sep 17 00:00:00 2001 From: "C, Ramalingam" Date: Wed, 8 Nov 2017 00:08:23 +0530 Subject: [PATCH 0023/1646] drm/i915: Runtime disable for eDP DRRS Debugfs called i915_drrs_ctl is added to enable and disable the eDP DRRS. Writing 0 will disable the feature, whereas non-zero will enable the feature. Possibility of disabling the DRRS, enables the testing of the frontbuffer tracking based features (FBC, DRRS and PSR) as standalone or any combination of the set. [v2]: ctl interface is moved from module parameter to debugfs [Rodrigo] Signed-off-by: C, Ramalingam Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1510079903-29441-1-git-send-email-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 43 ++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e968aeae1d845..ffa5f3f8222d7 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4606,6 +4606,46 @@ static const struct file_operations i915_hpd_storm_ctl_fops = { .write = i915_hpd_storm_ctl_write }; +static int i915_drrs_ctl_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + struct drm_device *dev = &dev_priv->drm; + struct intel_crtc *intel_crtc; + struct intel_encoder *encoder; + struct intel_dp *intel_dp; + + if (INTEL_GEN(dev_priv) < 7) + return -ENODEV; + + drm_modeset_lock_all(dev); + for_each_intel_crtc(dev, intel_crtc) { + if (!intel_crtc->base.state->active || + !intel_crtc->config->has_drrs) + continue; + + for_each_encoder_on_crtc(dev, &intel_crtc->base, encoder) { + if (encoder->type != INTEL_OUTPUT_EDP) + continue; + + DRM_DEBUG_DRIVER("Manually %sabling DRRS. %llu\n", + val ? "en" : "dis", val); + + intel_dp = enc_to_intel_dp(&encoder->base); + if (val) + intel_edp_drrs_enable(intel_dp, + intel_crtc->config); + else + intel_edp_drrs_disable(intel_dp, + intel_crtc->config); + } + } + drm_modeset_unlock_all(dev); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_drrs_ctl_fops, NULL, i915_drrs_ctl_set, "%llu\n"); + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4683,7 +4723,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_active", &i915_displayport_test_active_fops}, {"i915_guc_log_control", &i915_guc_log_control_fops}, {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, - {"i915_ipc_status", &i915_ipc_status_fops} + {"i915_ipc_status", &i915_ipc_status_fops}, + {"i915_drrs_ctl", &i915_drrs_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) -- GitLab From ce6e21370d5cacff3cd06c997af65316e7bba577 Mon Sep 17 00:00:00 2001 From: "C, Ramalingam" Date: Mon, 20 Nov 2017 09:53:47 +0530 Subject: [PATCH 0024/1646] i915/drrs/debugfs: psr status info addition Existing debugfs entry i915_drrs_status is updated with whether PSR is the cause for DRRS disabled state. [v2]: Dropped the module parameter details as ctl moved from module parameter to debugfs. [Rodrigo] [v3]: Crtc ID information is dropped as there is no immediate usecase. [Rodrigo]. Signed-off-by: C, Ramalingam Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1511151827-6596-1-git-send-email-ramalingam.c@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ffa5f3f8222d7..d81cb25130693 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3363,7 +3363,10 @@ static void drrs_status_per_crtc(struct seq_file *m, /* disable_drrs() will make drrs->dp NULL */ if (!drrs->dp) { - seq_puts(m, "Idleness DRRS: Disabled"); + seq_puts(m, "Idleness DRRS: Disabled\n"); + if (dev_priv->psr.enabled) + seq_puts(m, + "\tAs PSR is enabled, DRRS is not enabled\n"); mutex_unlock(&drrs->mutex); return; } -- GitLab From 85a9c0bc0888be8b140de06ae6486208bc1424f9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 23 Dec 2017 11:04:06 +0000 Subject: [PATCH 0025/1646] drm/i915/selftests: Tweak igt_ggtt_page to speed it up Reduce the number of GGTT PTE operations to speed the test up, but we reduce the likelihood of spotting a coherency error in those operations. However, Broxton is sporadically timing on this test, presumably because its GGTT operations are all uncached. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171223110407.21402-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 4a28d713a7d82..bb7cf998fc659 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1052,35 +1052,38 @@ static int igt_ggtt_page(void *arg) memset(&tmp, 0, sizeof(tmp)); err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, - 1024 * PAGE_SIZE, 0, + count * PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE, 0, ggtt->mappable_end, DRM_MM_INSERT_LOW); if (err) goto out_unpin; + intel_runtime_pm_get(i915); + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + n * PAGE_SIZE; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + } + order = i915_random_order(count, &prng); if (!order) { err = -ENOMEM; goto out_remove; } - intel_runtime_pm_get(i915); for (n = 0; n < count; n++) { u64 offset = tmp.start + order[n] * PAGE_SIZE; u32 __iomem *vaddr; - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, 0), - offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); iowrite32(n, vaddr + n); io_mapping_unmap_atomic(vaddr); - - wmb(); - ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); } + i915_gem_flush_ggtt_writes(i915); i915_random_reorder(order, count, &prng); for (n = 0; n < count; n++) { @@ -1088,16 +1091,10 @@ static int igt_ggtt_page(void *arg) u32 __iomem *vaddr; u32 val; - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, 0), - offset, I915_CACHE_NONE, 0); - vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset); val = ioread32(vaddr + n); io_mapping_unmap_atomic(vaddr); - ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); - if (val != n) { pr_err("insert page failed: found %d, expected %d\n", val, n); @@ -1105,10 +1102,11 @@ static int igt_ggtt_page(void *arg) break; } } - intel_runtime_pm_put(i915); kfree(order); out_remove: + ggtt->base.clear_range(&ggtt->base, tmp.start, tmp.size); + intel_runtime_pm_put(i915); drm_mm_remove_node(&tmp); out_unpin: i915_gem_object_unpin_pages(obj); -- GitLab From 9861b6682804a9db82600e0adf592bfa6deec377 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 23 Dec 2017 11:04:07 +0000 Subject: [PATCH 0026/1646] drm/i915/selftests: Allow random array allocation to fail In the selftests, we don't want to force an oom and would rather ENOMEM be reported. In this case, we would rather the allocation for the random array to fail. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20171223110407.21402-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_random.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c index 2088ae57aa899..1f415ce470183 100644 --- a/drivers/gpu/drm/i915/selftests/i915_random.c +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -57,7 +57,8 @@ unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) { unsigned int *order, i; - order = kmalloc_array(count, sizeof(*order), GFP_KERNEL | __GFP_NOWARN); + order = kmalloc_array(count, sizeof(*order), + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!order) return order; -- GitLab From 27ec1843169973fbdf224d831e2a40f441407ab0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:17 +0000 Subject: [PATCH 0027/1646] drm/i915: Delete defunct i915_gem_request_assign() i915_gem_request_assign() is not used since commit 77f0d0e925e8 ("drm/i915/execlists: Pack the count into the low bits of the port.request"), so remove the defunct code References: 77f0d0e925e8 ("drm/i915/execlists: Pack the count into the low bits of the port.request") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0d6d39f19506d..04ee289d6cab4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -245,18 +245,6 @@ i915_gem_request_put(struct drm_i915_gem_request *req) dma_fence_put(&req->fence); } -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, - struct drm_i915_gem_request *src) -{ - if (src) - i915_gem_request_get(src); - - if (*pdst) - i915_gem_request_put(*pdst); - - *pdst = src; -} - /** * i915_gem_request_global_seqno - report the current global seqno * @request - the request -- GitLab From 42232213614ddbef854ef368092e39c4e8347877 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:32 +0000 Subject: [PATCH 0028/1646] drm/i915/execlists: Clear context-switch interrupt earlier in the reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the clearing of the CS-interrupt into the engine reset phase, before the current init-hw phase. This helps clarify that we clear the pending interrupts prior to any restarting of the execlists. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 33 ++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 739c33b07c590..8f699a1c9ed5e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1493,18 +1493,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - /* - * Clear any pending interrupt state. - * - * We do it twice out of paranoia that some of the IIR are double - * buffered, and if we only reset it once there may still be - * an interrupt pending. - */ - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); execlists->csb_head = -1; execlists->active = 0; @@ -1551,6 +1539,24 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) return init_workarounds_ring(engine); } +static void reset_irq(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are double + * buffered, and if we only reset it once there may still be + * an interrupt pending. + */ + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); +} + static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { @@ -1560,6 +1566,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, GEM_TRACE("%s seqno=%x\n", engine->name, request ? request->global_seqno : 0); + + reset_irq(engine); + spin_lock_irqsave(&engine->timeline->lock, flags); /* -- GitLab From 693cfbf0589040536c673d6ed3d262ea3ecf9aea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:33 +0000 Subject: [PATCH 0029/1646] drm/i915/execlists: Record elsp offset during engine setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we record the elsp register offset inside init-hw but we only need to do it once during engine setup (after we know the mmio iomapping). Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8f699a1c9ed5e..c23ffde363555 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1496,9 +1496,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) execlists->csb_head = -1; execlists->active = 0; - execlists->elsp = - dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); - /* After a GPU reset, we may have requests to replay */ if (execlists->first) tasklet_schedule(&execlists->tasklet); @@ -2007,6 +2004,9 @@ static int logical_ring_init(struct intel_engine_cs *engine) if (ret) goto error; + engine->execlists.elsp = + engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine)); + return 0; error: -- GitLab From f3c9d4075771b4fbaa0d793d2c875c61e2e1cb12 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:34 +0000 Subject: [PATCH 0030/1646] drm/i915/execlists: Tidy enabling execlists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the register settings for enabling execlists into its own function for clarity. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c23ffde363555..e114776b88367 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1469,9 +1469,20 @@ static u8 gtiir[] = { [VECS] = 3, }; -static int gen8_init_common_ring(struct intel_engine_cs *engine) +static void enable_execlists(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + + I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); + I915_WRITE(RING_MODE_GEN7(engine), + _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); + I915_WRITE(RING_HWS_PGA(engine->mmio_base), + engine->status_page.ggtt_offset); + POSTING_READ(RING_HWS_PGA(engine->mmio_base)); +} + +static int gen8_init_common_ring(struct intel_engine_cs *engine) +{ struct intel_engine_execlists * const execlists = &engine->execlists; int ret; @@ -1482,13 +1493,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) intel_engine_reset_breadcrumbs(engine); intel_engine_init_hangcheck(engine); - I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); - I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); - I915_WRITE(RING_HWS_PGA(engine->mmio_base), - engine->status_page.ggtt_offset); - POSTING_READ(RING_HWS_PGA(engine->mmio_base)); - + enable_execlists(engine); DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); @@ -1915,6 +1920,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx(engine); + engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); -- GitLab From 65c475c6dd607226159a774d7c3d7ff4b6e00255 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:31 +0000 Subject: [PATCH 0031/1646] drm/i915: Hold rpm wakeref for modifying the global seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To modify the global seqno may require rewriting a few registers, which requires us to hold the rpm wakeref. We must therefore take it around the call to i915_gem_set_global_seqno() in debugfs, on behalf of the user. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index d81cb25130693..2bb63073d73fd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -988,7 +988,10 @@ i915_next_seqno_set(void *data, u64 val) if (ret) return ret; + intel_runtime_pm_get(dev_priv); ret = i915_gem_set_global_seqno(dev, val); + intel_runtime_pm_put(dev_priv); + mutex_unlock(&dev->struct_mutex); return ret; -- GitLab From 83cc84c5a848748d4bb190bc179a7620491a925f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:25 +0000 Subject: [PATCH 0032/1646] drm/i915: Assert all signalers we depended on did indeed signal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Back up our comment that all signalers should have been signaled before we ourselves were retired with an assert to that effect. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 9 ++++++++- drivers/gpu/drm/i915/i915_gem_request.h | 8 ++++++++ drivers/gpu/drm/i915/intel_lrc.c | 2 +- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d575109f7a7fe..72bdc203716fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -161,12 +161,16 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) GEM_BUG_ON(!list_empty(&pt->link)); - /* Everyone we depended upon (the fences we wait to be signaled) + /* + * Everyone we depended upon (the fences we wait to be signaled) * should retire before us and remove themselves from our list. * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { + GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + list_del(&dep->wait_link); if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(i915, dep); @@ -174,6 +178,9 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) /* Remove ourselves from everyone who depends upon us */ list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != pt); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + list_del(&dep->signal_link); if (dep->flags & I915_DEPENDENCY_ALLOC) i915_dependency_free(i915, dep); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 04ee289d6cab4..6c607f8dbf923 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -329,6 +329,14 @@ i915_gem_request_completed(const struct drm_i915_gem_request *req) return __i915_gem_request_completed(req, seqno); } +static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +{ + const struct drm_i915_gem_request *rq = + container_of(pt, const struct drm_i915_gem_request, priotree); + + return i915_gem_request_completed(rq); +} + /* We treat requests as fences. This is not be to confused with our * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync. * We use the fences to synchronize access from the CPU with activity on the diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e114776b88367..04c35e4dd7c64 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1037,7 +1037,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { - if (i915_gem_request_completed(pt_to_request(p->signaler))) + if (i915_priotree_signaled(p->signaler)) continue; GEM_BUG_ON(p->signaler->priority < pt->priority); -- GitLab From ce01b173773a09e2b5d3007c26d11cbd7737c3fe Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:26 +0000 Subject: [PATCH 0033/1646] drm/i915/execlists: Assert there are no simple cycles in the dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dependency chain must be an acyclic graph. This is checked by the swfence, but for sanity, also do a simple check that we do not corrupt our list iteration in execlists_schedule() by a shallow dependency cycle. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 04c35e4dd7c64..7d1ce21fa051e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1011,7 +1011,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) stack.signaler = &request->priotree; list_add(&stack.dfs_link, &dfs); - /* Recursively bump all dependent priorities to match the new request. + /* + * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: * static void update_priorities(struct i915_priotree *pt, prio) { @@ -1031,12 +1032,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) list_for_each_entry_safe(dep, p, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; - /* Within an engine, there can be no cycle, but we may + /* + * Within an engine, there can be no cycle, but we may * refer to the same dependency chain multiple times * (redundant dependencies are not eliminated) and across * engines. */ list_for_each_entry(p, &pt->signalers_list, signal_link) { + GEM_BUG_ON(p == dep); /* no cycles! */ + if (i915_priotree_signaled(p->signaler)) continue; @@ -1048,7 +1052,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) list_safe_reset_next(dep, p, dfs_link); } - /* If we didn't need to bump any existing priorities, and we haven't + /* + * If we didn't need to bump any existing priorities, and we haven't * yet submitted this request (i.e. there is no potential race with * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. -- GitLab From 2221c5b7ddfae1ed1ca8b21a9092d0878281293a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 15:12:27 +0000 Subject: [PATCH 0034/1646] drm/i915/execlists: Reduce list_for_each_safe+list_safe_reset_next MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After staring at the list_for_each_safe macros for a bit, our current invocation of list_safe_reset_next in execlists_schedule() simply reduces to list_for_each. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102151235.3949-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 7d1ce21fa051e..4e150b095a117 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1029,7 +1029,7 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) * end result is a topological list of requests in reverse order, the * last element in the list is the request we must execute first. */ - list_for_each_entry_safe(dep, p, &dfs, dfs_link) { + list_for_each_entry(dep, &dfs, dfs_link) { struct i915_priotree *pt = dep->signaler; /* @@ -1048,8 +1048,6 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) if (prio > READ_ONCE(p->signaler->priority)) list_move_tail(&p->dfs_link, &dfs); } - - list_safe_reset_next(dep, p, dfs_link); } /* -- GitLab From c24f0c1de481c1c0f914e937f409bac2c90b90b3 Mon Sep 17 00:00:00 2001 From: Sujaritha Sundaresan Date: Tue, 2 Jan 2018 13:20:24 -0800 Subject: [PATCH 0035/1646] drm/i915/guc : Decoupling ADS and logs from submission The Additional Data Struct (ADS) contains objects that are required by GuC post FW load and are not necessarily submission-only. Even with submission disabled we may require something inside the ADS, so it makes more sense for them to be always created. Similarly, we need to access GuC logs and even if GuC submission is disabled, to debug issues with GuC loading or with whatever we're using GuC for. v2: re-wording commit message (Sagar) Signed-off-by: Sujaritha Sundaresan Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1514928025-29659-1-git-send-email-sujaritha.sundaresan@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/intel_guc.c | 18 +++ drivers/gpu/drm/i915/intel_guc_ads.c | 151 ++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc_ads.h | 33 +++++ drivers/gpu/drm/i915/intel_guc_submission.c | 134 ----------------- 5 files changed, 203 insertions(+), 134 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_guc_ads.c create mode 100644 drivers/gpu/drm/i915/intel_guc_ads.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 091aef281963c..4d9e2f855e9d8 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -83,6 +83,7 @@ i915-y += i915_cmd_parser.o \ i915-y += intel_uc.o \ intel_uc_fw.o \ intel_guc.o \ + intel_guc_ads.o \ intel_guc_ct.o \ intel_guc_fw.o \ intel_guc_log.o \ diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index 3c6bf5a34c3ca..50b472576980e 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -23,6 +23,7 @@ */ #include "intel_guc.h" +#include "intel_guc_ads.h" #include "intel_guc_submission.h" #include "i915_drv.h" @@ -163,10 +164,25 @@ int intel_guc_init(struct intel_guc *guc) return ret; GEM_BUG_ON(!guc->shared_data); + ret = intel_guc_log_create(guc); + if (ret) + goto err_shared; + + ret = intel_guc_ads_create(guc); + if (ret) + goto err_log; + GEM_BUG_ON(!guc->ads_vma); + /* We need to notify the guc whenever we change the GGTT */ i915_ggtt_enable_guc(dev_priv); return 0; + +err_log: + intel_guc_log_destroy(guc); +err_shared: + guc_shared_data_destroy(guc); + return ret; } void intel_guc_fini(struct intel_guc *guc) @@ -174,6 +190,8 @@ void intel_guc_fini(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); i915_ggtt_disable_guc(dev_priv); + intel_guc_ads_destroy(guc); + intel_guc_log_destroy(guc); guc_shared_data_destroy(guc); } diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c new file mode 100644 index 0000000000000..ac627534667d9 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -0,0 +1,151 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "intel_guc_ads.h" +#include "intel_uc.h" +#include "i915_drv.h" + +/* + * The Additional Data Struct (ADS) has pointers for different buffers used by + * the GuC. One single gem object contains the ADS struct itself (guc_ads), the + * scheduling policies (guc_policies), a structure describing a collection of + * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save + * its internal state for sleep. + */ + +static void guc_policy_init(struct guc_policy *policy) +{ + policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; + policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; + policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; + policy->policy_flags = 0; +} + +static void guc_policies_init(struct guc_policies *policies) +{ + struct guc_policy *policy; + u32 p, i; + + policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; + policies->max_num_work_items = POLICY_MAX_NUM_WI; + + for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { + for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { + policy = &policies->policy[p][i]; + + guc_policy_init(policy); + } + } + + policies->is_valid = 1; +} + +/* + * The first 80 dwords of the register state context, containing the + * execlists and ppgtt registers. + */ +#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) + +/** + * intel_guc_ads_create() - creates GuC ADS + * @guc: intel_guc struct + * + */ +int intel_guc_ads_create(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct i915_vma *vma; + struct page *page; + /* The ads obj includes the struct itself and buffers passed to GuC */ + struct { + struct guc_ads ads; + struct guc_policies policies; + struct guc_mmio_reg_state reg_state; + u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; + } __packed *blob; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const u32 skipped_offset = LRC_HEADER_PAGES * PAGE_SIZE; + const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; + u32 base; + + GEM_BUG_ON(guc->ads_vma); + + vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + guc->ads_vma = vma; + + page = i915_vma_first_page(vma); + blob = kmap(page); + + /* GuC scheduling policies */ + guc_policies_init(&blob->policies); + + /* MMIO reg state */ + for_each_engine(engine, dev_priv, id) { + blob->reg_state.white_list[engine->guc_id].mmio_start = + engine->mmio_base + GUC_MMIO_WHITE_LIST_START; + + /* Nothing to be saved or restored for now. */ + blob->reg_state.white_list[engine->guc_id].count = 0; + } + + /* + * The GuC requires a "Golden Context" when it reinitialises + * engines after a reset. Here we use the Render ring default + * context, which must already exist and be pinned in the GGTT, + * so its address won't change after we've told the GuC where + * to find it. Note that we have to skip our header (1 page), + * because our GuC shared data is there. + */ + blob->ads.golden_context_lrca = + guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + + skipped_offset; + + /* + * The GuC expects us to exclude the portion of the context image that + * it skips from the size it is to read. It starts reading from after + * the execlist context (so skipping the first page [PPHWSP] and 80 + * dwords). Weird guc is weird. + */ + for_each_engine(engine, dev_priv, id) + blob->ads.eng_state_size[engine->guc_id] = + engine->context_size - skipped_size; + + base = guc_ggtt_offset(vma); + blob->ads.scheduler_policies = base + ptr_offset(blob, policies); + blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); + blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); + + kunmap(page); + + return 0; +} + +void intel_guc_ads_destroy(struct intel_guc *guc) +{ + i915_vma_unpin_and_release(&guc->ads_vma); +} diff --git a/drivers/gpu/drm/i915/intel_guc_ads.h b/drivers/gpu/drm/i915/intel_guc_ads.h new file mode 100644 index 0000000000000..c4735742c5645 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_guc_ads.h @@ -0,0 +1,33 @@ +/* + * Copyright © 2014-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_GUC_ADS_H_ +#define _INTEL_GUC_ADS_H_ + +struct intel_guc; + +int intel_guc_ads_create(struct intel_guc *guc); +void intel_guc_ads_destroy(struct intel_guc *guc); + +#endif diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 4d2409466a3ad..1f3a8786bbdc7 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -73,13 +73,6 @@ * ELSP context descriptor dword into Work Item. * See guc_add_request() * - * ADS: - * The Additional Data Struct (ADS) has pointers for different buffers used by - * the GuC. One single gem object contains the ADS struct itself (guc_ads), the - * scheduling policies (guc_policies), a structure describing a collection of - * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save - * its internal state for sleep. - * */ static inline bool is_high_priority(struct intel_guc_client *client) @@ -1012,117 +1005,6 @@ static void guc_clients_destroy(struct intel_guc *guc) guc_client_free(client); } -static void guc_policy_init(struct guc_policy *policy) -{ - policy->execution_quantum = POLICY_DEFAULT_EXECUTION_QUANTUM_US; - policy->preemption_time = POLICY_DEFAULT_PREEMPTION_TIME_US; - policy->fault_time = POLICY_DEFAULT_FAULT_TIME_US; - policy->policy_flags = 0; -} - -static void guc_policies_init(struct guc_policies *policies) -{ - struct guc_policy *policy; - u32 p, i; - - policies->dpc_promote_time = POLICY_DEFAULT_DPC_PROMOTE_TIME_US; - policies->max_num_work_items = POLICY_MAX_NUM_WI; - - for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) { - for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) { - policy = &policies->policy[p][i]; - - guc_policy_init(policy); - } - } - - policies->is_valid = 1; -} - -/* - * The first 80 dwords of the register state context, containing the - * execlists and ppgtt registers. - */ -#define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) - -static int guc_ads_create(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct i915_vma *vma; - struct page *page; - /* The ads obj includes the struct itself and buffers passed to GuC */ - struct { - struct guc_ads ads; - struct guc_policies policies; - struct guc_mmio_reg_state reg_state; - u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; - } __packed *blob; - struct intel_engine_cs *engine; - enum intel_engine_id id; - const u32 skipped_offset = LRC_HEADER_PAGES * PAGE_SIZE; - const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; - u32 base; - - GEM_BUG_ON(guc->ads_vma); - - vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob))); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - guc->ads_vma = vma; - - page = i915_vma_first_page(vma); - blob = kmap(page); - - /* GuC scheduling policies */ - guc_policies_init(&blob->policies); - - /* MMIO reg state */ - for_each_engine(engine, dev_priv, id) { - blob->reg_state.white_list[engine->guc_id].mmio_start = - engine->mmio_base + GUC_MMIO_WHITE_LIST_START; - - /* Nothing to be saved or restored for now. */ - blob->reg_state.white_list[engine->guc_id].count = 0; - } - - /* - * The GuC requires a "Golden Context" when it reinitialises - * engines after a reset. Here we use the Render ring default - * context, which must already exist and be pinned in the GGTT, - * so its address won't change after we've told the GuC where - * to find it. Note that we have to skip our header (1 page), - * because our GuC shared data is there. - */ - blob->ads.golden_context_lrca = - guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + - skipped_offset; - - /* - * The GuC expects us to exclude the portion of the context image that - * it skips from the size it is to read. It starts reading from after - * the execlist context (so skipping the first page [PPHWSP] and 80 - * dwords). Weird guc is weird. - */ - for_each_engine(engine, dev_priv, id) - blob->ads.eng_state_size[engine->guc_id] = - engine->context_size - skipped_size; - - base = guc_ggtt_offset(vma); - blob->ads.scheduler_policies = base + ptr_offset(blob, policies); - blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); - blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); - - kunmap(page); - - return 0; -} - -static void guc_ads_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->ads_vma); -} - /* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. @@ -1146,15 +1028,6 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - ret = intel_guc_log_create(guc); - if (ret < 0) - goto err_stage_desc_pool; - - ret = guc_ads_create(guc); - if (ret < 0) - goto err_log; - GEM_BUG_ON(!guc->ads_vma); - WARN_ON(!guc_verify_doorbells(guc)); ret = guc_clients_create(guc); if (ret) @@ -1167,11 +1040,6 @@ int intel_guc_submission_init(struct intel_guc *guc) return 0; -err_log: - intel_guc_log_destroy(guc); -err_stage_desc_pool: - guc_stage_desc_pool_destroy(guc); - return ret; } void intel_guc_submission_fini(struct intel_guc *guc) @@ -1186,8 +1054,6 @@ void intel_guc_submission_fini(struct intel_guc *guc) guc_clients_destroy(guc); WARN_ON(!guc_verify_doorbells(guc)); - guc_ads_destroy(guc); - intel_guc_log_destroy(guc); guc_stage_desc_pool_destroy(guc); } -- GitLab From 6f25d0be180f5b4713c6fb5a6619fe11dd1dacd9 Mon Sep 17 00:00:00 2001 From: Sujaritha Sundaresan Date: Tue, 2 Jan 2018 13:20:25 -0800 Subject: [PATCH 0036/1646] drm/i915/guc : GEM_BUG_ON on invoking GuC reset function Instead of returning -EINVAL, GEM_BUG_ON when GuC reset is invoked for platforms not supporting as we don't expect to invoke it. v2: re-wording commit message and subject (Sagar) Signed-off-by: Sujaritha Sundaresan Cc: Chris Wilson Cc: Michal Wajdeczko Cc: Sagar Arun Kamble Reviewed-by: Sagar Arun Kamble Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1514928025-29659-2-git-send-email-sujaritha.sundaresan@intel.com --- drivers/gpu/drm/i915/intel_uncore.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 89547b614aa6d..94e1fb3a2936d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1936,8 +1936,7 @@ int intel_reset_guc(struct drm_i915_private *dev_priv) { int ret; - if (!HAS_GUC(dev_priv)) - return -EINVAL; + GEM_BUG_ON(!HAS_GUC(dev_priv)); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); -- GitLab From c68ce69910e3b292afc138e12725399d3b0fb347 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 2 Jan 2018 19:25:00 +0000 Subject: [PATCH 0037/1646] drm/i915: Assert we do not try to wait on an invalid seqno MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should never insert the invalid seqno into the wait tree, so assert we do not. Signed-off-by: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180102192500.20364-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 58c624f982d91..86acac010bb8c 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -396,6 +396,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, bool first, armed; u32 seqno; + GEM_BUG_ON(!wait->seqno); + /* Insert the request into the retirement ordered list * of waiters by walking the rbtree. If we are the oldest * seqno in the tree (the first to be retired), then -- GitLab From 82e07602d22ab45cd90e31b62a87f0fa37ca428f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 4 Jan 2018 16:38:42 +0000 Subject: [PATCH 0038/1646] drm/i915: Pass DMA_ATTR_NO_WARN to dma_map_sg() In some iommu, e.g. swiotlb, the available space can be quite limited. So we employ a trial-and-error approach to seeing if our large contiguous chunks can fit, and if that fails we try again with smaller chunks after trying to free our own lazily allocated blobs. As we use a trial-and-error approach, we do not want dma_map_sg() to emit a WARN of its own accord, we want to gracefully report the error back to the caller instead. Note that our noisy culprit, swiotlb, doesn't honour the flag, yet. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20180104163842.11635-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c5f393870532f..f2a0f556da21a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2335,9 +2335,10 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { do { - if (dma_map_sg(&obj->base.dev->pdev->dev, - pages->sgl, pages->nents, - PCI_DMA_BIDIRECTIONAL)) + if (dma_map_sg_attrs(&obj->base.dev->pdev->dev, + pages->sgl, pages->nents, + PCI_DMA_BIDIRECTIONAL, + DMA_ATTR_NO_WARN)) return 0; /* If the DMA remap fails, one cause can be that we have -- GitLab From a76050a4837860fcadb6ca11d69d41e08f4090d8 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 3 Jan 2018 11:03:45 -0800 Subject: [PATCH 0039/1646] drm/i915/glk: Disable Guc and HuC on GLK Since the firmwares are not yet released to public repo, disable them on Geminilake. v2: Remove the firmware versions (Michal) v3: Remove unwanted defines (Rodrigo) Correct commit message (Michal) Cc: Michal Wajdeczko Cc: Rodrigo Vivi Cc: Signed-off-by: Anusha Srivatsa Fixes: 90f192c8241e ("drm/i915/GuC/GLK: Load GuC on GLK") Fixes: db5ba0d8931e ("drm/i915/GLK/HuC: Load HuC on GLK") Reviewed-by: Michal Wajdeczko Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1515006225-13003-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_guc_fw.c | 9 --------- drivers/gpu/drm/i915/intel_huc.c | 11 ----------- 2 files changed, 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index cbc51c9604256..3b0932942857f 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -39,9 +39,6 @@ #define KBL_FW_MAJOR 9 #define KBL_FW_MINOR 39 -#define GLK_FW_MAJOR 10 -#define GLK_FW_MINOR 56 - #define GUC_FW_PATH(platform, major, minor) \ "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" @@ -54,8 +51,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE); #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); -#define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR) - static void guc_fw_select(struct intel_uc_fw *guc_fw) { struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw); @@ -82,10 +77,6 @@ static void guc_fw_select(struct intel_uc_fw *guc_fw) guc_fw->path = I915_KBL_GUC_UCODE; guc_fw->major_ver_wanted = KBL_FW_MAJOR; guc_fw->minor_ver_wanted = KBL_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - guc_fw->path = I915_GLK_GUC_UCODE; - guc_fw->major_ver_wanted = GLK_FW_MAJOR; - guc_fw->minor_ver_wanted = GLK_FW_MINOR; } else { DRM_WARN("%s: No firmware known for this platform!\n", intel_uc_fw_type_repr(guc_fw->type)); diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 974be3defa709..8ed05182f9444 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -54,10 +54,6 @@ #define KBL_HUC_FW_MINOR 00 #define KBL_BLD_NUM 1810 -#define GLK_HUC_FW_MAJOR 02 -#define GLK_HUC_FW_MINOR 00 -#define GLK_BLD_NUM 1748 - #define HUC_FW_PATH(platform, major, minor, bld_num) \ "i915/" __stringify(platform) "_huc_ver" __stringify(major) "_" \ __stringify(minor) "_" __stringify(bld_num) ".bin" @@ -74,9 +70,6 @@ MODULE_FIRMWARE(I915_BXT_HUC_UCODE); KBL_HUC_FW_MINOR, KBL_BLD_NUM) MODULE_FIRMWARE(I915_KBL_HUC_UCODE); -#define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \ - GLK_HUC_FW_MINOR, GLK_BLD_NUM) - static void huc_fw_select(struct intel_uc_fw *huc_fw) { struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw); @@ -103,10 +96,6 @@ static void huc_fw_select(struct intel_uc_fw *huc_fw) huc_fw->path = I915_KBL_HUC_UCODE; huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR; huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR; - } else if (IS_GEMINILAKE(dev_priv)) { - huc_fw->path = I915_GLK_HUC_UCODE; - huc_fw->major_ver_wanted = GLK_HUC_FW_MAJOR; - huc_fw->minor_ver_wanted = GLK_HUC_FW_MINOR; } else { DRM_WARN("%s: No firmware known for this platform!\n", intel_uc_fw_type_repr(huc_fw->type)); -- GitLab From fe9a9da61c3ec33c7d3ad305c2c535edbcae1ddb Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Thu, 4 Jan 2018 15:51:42 -0800 Subject: [PATCH 0040/1646] drm/i915/dmc: DMC 1.07 for Cannonlake There is a new version of DMC available for CNL. The release notes mentions: 1. Fix for the issue where DC_STATE was getting enabled even when disabled by driver causing data corruption v2: Since the firmware is merged to linux-firmware.git, add MODULE_FIRMWARE. v3: rebased. Correct commit message(Jani) Cc: Jani Saarinen Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1515109902-14076-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 7fe4aac0facc6..41e6c75a7f3c6 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -37,8 +37,9 @@ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) -#define I915_CSR_CNL "i915/cnl_dmc_ver1_06.bin" -#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 6) +#define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin" +MODULE_FIRMWARE(I915_CSR_CNL); +#define CNL_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) #define I915_CSR_KBL "i915/kbl_dmc_ver1_04.bin" MODULE_FIRMWARE(I915_CSR_KBL); -- GitLab From ab062639edb0412daf6de540725276b9a5d217f9 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Fri, 5 Jan 2018 00:59:05 -0800 Subject: [PATCH 0041/1646] drm/i915: Whitelist SLICE_COMMON_ECO_CHICKEN1 on Geminilake. Geminilake requires the 3D driver to select whether barriers are intended for compute shaders, or tessellation control shaders, by whacking a "Barrier Mode" bit in SLICE_COMMON_ECO_CHICKEN1 when switching pipelines. Failure to do this properly can result in GPU hangs. Unfortunately, this means it needs to switch mid-batch, so only userspace can properly set it. To facilitate this, the kernel needs to whitelist the register. The workarounds page currently tags this as applying to Broxton only, but that doesn't make sense. The documentation for the register it references says the bit userspace is supposed to toggle only exists on Geminilake. Empirically, the Mesa patch to toggle this bit appears to fix intermittent GPU hangs in tessellation control shader barrier tests on Geminilake; we haven't seen those hangs on Broxton. v2: Mention WA #0862 in the comment (it doesn't have a name). Signed-off-by: Kenneth Graunke Acked-by: Rodrigo Vivi Cc: stable@vger.kernel.org Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180105085905.9298-1-kenneth@whitecape.org --- drivers/gpu/drm/i915/i915_reg.h | 2 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 966e4df9700ec..505c605eff989 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7079,6 +7079,8 @@ enum { #define GEN9_SLICE_COMMON_ECO_CHICKEN0 _MMIO(0x7308) #define DISABLE_PIXEL_MASK_CAMMING (1<<14) +#define GEN9_SLICE_COMMON_ECO_CHICKEN1 _MMIO(0x731c) + #define GEN7_L3SQCREG1 _MMIO(0xB010) #define VLV_B0_WA_L3SQCREG1_VALUE 0x00D30000 diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index ebdcbcbacb3c8..6bb51a502b8b4 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1338,6 +1338,11 @@ static int glk_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); + if (ret) + return ret; + /* WaToEnableHwFixForPushConstHWBug:glk */ WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); -- GitLab From c218ee03b9315073ce43992792554dafa0626eb8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 6 Jan 2018 10:56:18 +0000 Subject: [PATCH 0042/1646] drm/i915: Don't adjust priority on an already signaled fence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we retire a signaled fence, we free the dependency tree. However, we skip clearing the list so that if we then try to adjust the priority of the signaled fence, we may walk the list of freed dependencies. [ 3083.156757] ================================================================== [ 3083.156806] BUG: KASAN: use-after-free in execlists_schedule+0x199/0x660 [i915] [ 3083.156810] Read of size 8 at addr ffff8806bf20f400 by task Xorg/831 [ 3083.156815] CPU: 0 PID: 831 Comm: Xorg Not tainted 4.15.0-rc6-no-psn+ #1 [ 3083.156817] Hardware name: Notebook N24_25BU/N24_25BU, BIOS 5.12 02/17/2017 [ 3083.156818] Call Trace: [ 3083.156823] dump_stack+0x5c/0x7a [ 3083.156827] print_address_description+0x6b/0x290 [ 3083.156830] kasan_report+0x28f/0x380 [ 3083.156872] ? execlists_schedule+0x199/0x660 [i915] [ 3083.156914] execlists_schedule+0x199/0x660 [i915] [ 3083.156956] ? intel_crtc_atomic_check+0x146/0x4e0 [i915] [ 3083.156997] ? execlists_submit_request+0xe0/0xe0 [i915] [ 3083.157038] ? i915_vma_misplaced.part.4+0x25/0xb0 [i915] [ 3083.157079] ? __i915_vma_do_pin+0x7c8/0xc80 [i915] [ 3083.157121] ? intel_atomic_state_alloc+0x44/0x60 [i915] [ 3083.157130] ? drm_atomic_helper_page_flip+0x3e/0xb0 [drm_kms_helper] [ 3083.157145] ? drm_mode_page_flip_ioctl+0x7d2/0x850 [drm] [ 3083.157159] ? drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.157172] ? drm_ioctl+0x45b/0x560 [drm] [ 3083.157211] i915_gem_object_wait_priority+0x14c/0x2c0 [i915] [ 3083.157251] ? i915_gem_get_aperture_ioctl+0x150/0x150 [i915] [ 3083.157290] ? i915_vma_pin_fence+0x1d8/0x320 [i915] [ 3083.157331] ? intel_pin_and_fence_fb_obj+0x175/0x250 [i915] [ 3083.157372] ? intel_rotation_info_size+0x60/0x60 [i915] [ 3083.157413] ? intel_link_compute_m_n+0x80/0x80 [i915] [ 3083.157428] ? drm_dev_printk+0x1b0/0x1b0 [drm] [ 3083.157443] ? drm_dev_printk+0x1b0/0x1b0 [drm] [ 3083.157485] intel_prepare_plane_fb+0x2f8/0x5a0 [i915] [ 3083.157527] ? intel_crtc_get_vblank_counter+0x80/0x80 [i915] [ 3083.157536] drm_atomic_helper_prepare_planes+0xa0/0x1c0 [drm_kms_helper] [ 3083.157587] intel_atomic_commit+0x12e/0x4e0 [i915] [ 3083.157605] drm_atomic_helper_page_flip+0xa2/0xb0 [drm_kms_helper] [ 3083.157621] drm_mode_page_flip_ioctl+0x7d2/0x850 [drm] [ 3083.157638] ? drm_mode_cursor2_ioctl+0x10/0x10 [drm] [ 3083.157652] ? drm_lease_owner+0x1a/0x30 [drm] [ 3083.157668] ? drm_mode_cursor2_ioctl+0x10/0x10 [drm] [ 3083.157681] drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.157696] drm_ioctl+0x45b/0x560 [drm] [ 3083.157711] ? drm_mode_cursor2_ioctl+0x10/0x10 [drm] [ 3083.157725] ? drm_getstats+0x20/0x20 [drm] [ 3083.157729] ? timerqueue_del+0x49/0x80 [ 3083.157732] ? __remove_hrtimer+0x62/0xb0 [ 3083.157735] ? hrtimer_try_to_cancel+0x173/0x210 [ 3083.157738] do_vfs_ioctl+0x13b/0x880 [ 3083.157741] ? ioctl_preallocate+0x140/0x140 [ 3083.157744] ? _raw_spin_unlock_irq+0xe/0x30 [ 3083.157746] ? do_setitimer+0x234/0x370 [ 3083.157750] ? SyS_setitimer+0x19e/0x1b0 [ 3083.157752] ? SyS_alarm+0x140/0x140 [ 3083.157755] ? __rcu_read_unlock+0x66/0x80 [ 3083.157757] ? __fget+0xc4/0x100 [ 3083.157760] SyS_ioctl+0x74/0x80 [ 3083.157763] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 3083.157765] RIP: 0033:0x7f6135d0c6a7 [ 3083.157767] RSP: 002b:00007fff01451888 EFLAGS: 00003246 ORIG_RAX: 0000000000000010 [ 3083.157769] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f6135d0c6a7 [ 3083.157771] RDX: 00007fff01451950 RSI: 00000000c01864b0 RDI: 000000000000000c [ 3083.157772] RBP: 00007f613076f600 R08: 0000000000000001 R09: 0000000000000000 [ 3083.157773] R10: 0000000000000060 R11: 0000000000003246 R12: 0000000000000000 [ 3083.157774] R13: 0000000000000060 R14: 000000000000001b R15: 0000000000000060 [ 3083.157779] Allocated by task 831: [ 3083.157783] kmem_cache_alloc+0xc0/0x200 [ 3083.157822] i915_gem_request_await_dma_fence+0x2c4/0x5d0 [i915] [ 3083.157861] i915_gem_request_await_object+0x321/0x370 [i915] [ 3083.157900] i915_gem_do_execbuffer+0x1165/0x19c0 [i915] [ 3083.157937] i915_gem_execbuffer2+0x1ad/0x550 [i915] [ 3083.157950] drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.157962] drm_ioctl+0x45b/0x560 [drm] [ 3083.157964] do_vfs_ioctl+0x13b/0x880 [ 3083.157966] SyS_ioctl+0x74/0x80 [ 3083.157968] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 3083.157971] Freed by task 831: [ 3083.157973] kmem_cache_free+0x77/0x220 [ 3083.158012] i915_gem_request_retire+0x72c/0xa70 [i915] [ 3083.158051] i915_gem_request_alloc+0x1e9/0x8b0 [i915] [ 3083.158089] i915_gem_do_execbuffer+0xa96/0x19c0 [i915] [ 3083.158127] i915_gem_execbuffer2+0x1ad/0x550 [i915] [ 3083.158140] drm_ioctl_kernel+0xa7/0xf0 [drm] [ 3083.158153] drm_ioctl+0x45b/0x560 [drm] [ 3083.158155] do_vfs_ioctl+0x13b/0x880 [ 3083.158156] SyS_ioctl+0x74/0x80 [ 3083.158158] entry_SYSCALL_64_fastpath+0x1a/0x7d [ 3083.158162] The buggy address belongs to the object at ffff8806bf20f400 which belongs to the cache i915_dependency of size 64 [ 3083.158166] The buggy address is located 0 bytes inside of 64-byte region [ffff8806bf20f400, ffff8806bf20f440) [ 3083.158168] The buggy address belongs to the page: [ 3083.158171] page:00000000d43decc4 count:1 mapcount:0 mapping: (null) index:0x0 [ 3083.158174] flags: 0x17ffe0000000100(slab) [ 3083.158179] raw: 017ffe0000000100 0000000000000000 0000000000000000 0000000180200020 [ 3083.158182] raw: ffffea001afc16c0 0000000500000005 ffff880731b881c0 0000000000000000 [ 3083.158184] page dumped because: kasan: bad access detected [ 3083.158187] Memory state around the buggy address: [ 3083.158190] ffff8806bf20f300: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158192] ffff8806bf20f380: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158195] >ffff8806bf20f400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158196] ^ [ 3083.158199] ffff8806bf20f480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158201] ffff8806bf20f500: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc [ 3083.158203] ================================================================== Reported-by: Alexandru Chirvasitu Reported-by: Mike Keehan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104436 Fixes: 1f181225f8ec ("drm/i915/execlists: Keep request->priority for its lifetime") Signed-off-by: Chris Wilson Cc: Alexandru Chirvasitu Cc: Michał Winiarski Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Tested-by: Alexandru Chirvasitu Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/20180106105618.13532-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/intel_lrc.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ba9f67c256f4b..8bc3283484be2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -467,7 +467,7 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) struct drm_i915_gem_request *rq; struct intel_engine_cs *engine; - if (!dma_fence_is_i915(fence)) + if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence)) return; rq = to_request(fence); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 4e150b095a117..ff25f209d0a55 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1002,6 +1002,9 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + if (i915_gem_request_completed(request)) + return; + if (prio <= READ_ONCE(request->priotree.priority)) return; -- GitLab From 716719a3e6c67da6ecfa4bd6963b5d8729415453 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:35 -0500 Subject: [PATCH 0043/1646] drm: Fix link-status kerneldoc line lengths I'm adding some stuff below it and it's killing my editor's vibe. Changes in v2: - Added to the series Changes in v3: - None Changes in v4: - None Changes in v5: - None Changes in v6: - None Cc: Manasi Navare Acked-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-2-seanpaul@chromium.org --- drivers/gpu/drm/drm_connector.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index e6a21e69059cd..2559c615d9845 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -817,10 +817,11 @@ DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name, * should update this value using drm_mode_connector_set_tile_property(). * Userspace cannot change this property. * link-status: - * Connector link-status property to indicate the status of link. The default - * value of link-status is "GOOD". If something fails during or after modeset, - * the kernel driver may set this to "BAD" and issue a hotplug uevent. Drivers - * should update this value using drm_mode_connector_set_link_status_property(). + * Connector link-status property to indicate the status of link. The + * default value of link-status is "GOOD". If something fails during or + * after modeset, the kernel driver may set this to "BAD" and issue a + * hotplug uevent. Drivers should update this value using + * drm_mode_connector_set_link_status_property(). * non_desktop: * Indicates the output should be ignored for purposes of displaying a * standard desktop environment or console. This is most likely because -- GitLab From 23fdbdd7ed3f18f56773c6cbef5d3f1d68befd69 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:36 -0500 Subject: [PATCH 0044/1646] drm/i915: Add more control to wait_for routines This patch adds a little more control to a couple wait_for routines such that we can avoid open-coding read/wait/timeout patterns which: - need the value of the register after the wait_for - run arbitrary operation for the read portion This patch also chooses the correct sleep function (based on timers-howto.txt) for the polling interval the caller specifies. Changes in v2: - Added to the series Changes in v3: - Rebased on drm-intel-next-queued and the new Wmin/max _wait_for - Removed msleep option Changes in v4: - Removed ; for OP in _wait_for (Chris) - Moved reg_value definition above ret (Chris) Changes in v4: - checkpatch whitespace fix Changes in v5: - None Changes in v6: - None Suggested-by: Chris Wilson Reviewed-by: Daniel Vetter Reviewed-by: Chris Wilson Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-3-seanpaul@chromium.org --- drivers/gpu/drm/i915/intel_drv.h | 17 ++++++++++------- drivers/gpu/drm/i915/intel_uncore.c | 23 ++++++++++++++++------- drivers/gpu/drm/i915/intel_uncore.h | 14 +++++++++++++- 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 30f791f89d649..9848e8eb00749 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -41,20 +41,21 @@ #include /** - * _wait_for - magic (register) wait macro + * __wait_for - magic wait macro * - * Does the right thing for modeset paths when run under kdgb or similar atomic - * contexts. Note that it's important that we check the condition again after - * having timed out, since the timeout could be due to preemption or similar and - * we've never had a chance to check the condition before the timeout. + * Macro to help avoid open coding check/wait/timeout patterns. Note that it's + * important that we check the condition again after having timed out, since the + * timeout could be due to preemption or similar and we've never had a chance to + * check the condition before the timeout. */ -#define _wait_for(COND, US, Wmin, Wmax) ({ \ +#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \ might_sleep(); \ for (;;) { \ bool expired__ = time_after(jiffies, timeout__); \ + OP; \ if (COND) { \ ret__ = 0; \ break; \ @@ -70,7 +71,9 @@ ret__; \ }) -#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) +#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \ + (Wmax)) +#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ #if defined(CONFIG_DRM_I915_DEBUG) && defined(CONFIG_PREEMPT_COUNT) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 89547b614aa6d..129c8ac77985a 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1767,12 +1767,14 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, } /** - * intel_wait_for_register - wait until register matches expected state + * __intel_wait_for_register - wait until register matches expected state * @dev_priv: the i915 device * @reg: the register to read * @mask: mask to apply to register value * @value: expected value - * @timeout_ms: timeout in millisecond + * @fast_timeout_us: fast timeout in microsecond for atomic/tight wait + * @slow_timeout_ms: slow timeout in millisecond + * @out_value: optional placeholder to hold registry value * * This routine waits until the target register @reg contains the expected * @value after applying the @mask, i.e. it waits until :: @@ -1783,14 +1785,17 @@ int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, * * Returns 0 if the register matches the desired condition, or -ETIMEOUT. */ -int intel_wait_for_register(struct drm_i915_private *dev_priv, +int __intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, u32 value, - unsigned int timeout_ms) + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value) { unsigned fw = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); + u32 reg_value; int ret; might_sleep(); @@ -1800,14 +1805,18 @@ int intel_wait_for_register(struct drm_i915_private *dev_priv, ret = __intel_wait_for_register_fw(dev_priv, reg, mask, value, - 2, 0, NULL); + fast_timeout_us, 0, ®_value); intel_uncore_forcewake_put__locked(dev_priv, fw); spin_unlock_irq(&dev_priv->uncore.lock); if (ret) - ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value, - timeout_ms); + ret = __wait_for(reg_value = I915_READ_NOTRACE(reg), + (reg_value & mask) == value, + slow_timeout_ms * 1000, 10, 1000); + + if (out_value) + *out_value = reg_value; return ret; } diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 9ce079b5dd0d8..bed019ef000fa 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -163,11 +163,23 @@ void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_user_get(struct drm_i915_private *dev_priv); void intel_uncore_forcewake_user_put(struct drm_i915_private *dev_priv); +int __intel_wait_for_register(struct drm_i915_private *dev_priv, + i915_reg_t reg, + u32 mask, + u32 value, + unsigned int fast_timeout_us, + unsigned int slow_timeout_ms, + u32 *out_value); +static inline int intel_wait_for_register(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, u32 value, - unsigned int timeout_ms); + unsigned int timeout_ms) +{ + return __intel_wait_for_register(dev_priv, reg, mask, value, 2, + timeout_ms, NULL); +} int __intel_wait_for_register_fw(struct drm_i915_private *dev_priv, i915_reg_t reg, u32 mask, -- GitLab From 24557865c8b1a6d0eaccaac47aabd9b23badf8fd Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:37 -0500 Subject: [PATCH 0045/1646] drm: Add Content Protection property This patch adds a new optional connector property to allow userspace to enable protection over the content it is displaying. This will typically be implemented by the driver using HDCP. The property is a tri-state with the following values: - OFF: Self explanatory, no content protection - DESIRED: Userspace requests that the driver enable protection - ENABLED: Once the driver has authenticated the link, it sets this value The driver is responsible for downgrading ENABLED to DESIRED if the link becomes unprotected. The driver should also maintain the desiredness of protection across hotplug/dpms/suspend. If this looks familiar, I posted [1] this 3 years ago. We have been using this in ChromeOS across exynos, mediatek, and rockchip over that time. Changes in v2: - Pimp kerneldoc for content_protection_property (Daniel) - Drop sysfs attribute Changes in v3: - None Changes in v4: - Changed kerneldoc to recommend userspace polling (Daniel) - Changed kerneldoc to briefly describe how to attach the property (Daniel) Changes in v5: - checkpatch whitespace noise - Change DRM_MODE_CONTENT_PROTECTION_OFF to DRM_MODE_CONTENT_PROTECTION_UNDESIRED Changes in v6: - None Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul [1] https://lists.freedesktop.org/archives/dri-devel/2014-December/073336.html Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-4-seanpaul@chromium.org --- drivers/gpu/drm/drm_atomic.c | 8 ++++ drivers/gpu/drm/drm_connector.c | 78 +++++++++++++++++++++++++++++++++ include/drm/drm_connector.h | 16 +++++++ include/uapi/drm/drm_mode.h | 4 ++ 4 files changed, 106 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index b76d49218cf1d..69ff763a834e5 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1224,6 +1224,12 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, state->picture_aspect_ratio = val; } else if (property == connector->scaling_mode_property) { state->scaling_mode = val; + } else if (property == connector->content_protection_property) { + if (val == DRM_MODE_CONTENT_PROTECTION_ENABLED) { + DRM_DEBUG_KMS("only drivers can set CP Enabled\n"); + return -EINVAL; + } + state->content_protection = val; } else if (connector->funcs->atomic_set_property) { return connector->funcs->atomic_set_property(connector, state, property, val); @@ -1303,6 +1309,8 @@ drm_atomic_connector_get_property(struct drm_connector *connector, *val = state->picture_aspect_ratio; } else if (property == connector->scaling_mode_property) { *val = state->scaling_mode; + } else if (property == connector->content_protection_property) { + *val = state->content_protection; } else if (connector->funcs->atomic_get_property) { return connector->funcs->atomic_get_property(connector, state, property, val); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 2559c615d9845..b85a7749709d3 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -756,6 +756,13 @@ static const struct drm_prop_enum_list drm_tv_subconnector_enum_list[] = { DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name, drm_tv_subconnector_enum_list) +static struct drm_prop_enum_list drm_cp_enum_list[] = { + { DRM_MODE_CONTENT_PROTECTION_UNDESIRED, "Undesired" }, + { DRM_MODE_CONTENT_PROTECTION_DESIRED, "Desired" }, + { DRM_MODE_CONTENT_PROTECTION_ENABLED, "Enabled" }, +}; +DRM_ENUM_NAME_FN(drm_get_content_protection_name, drm_cp_enum_list) + /** * DOC: standard connector properties * @@ -826,6 +833,41 @@ DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name, * Indicates the output should be ignored for purposes of displaying a * standard desktop environment or console. This is most likely because * the output device is not rectilinear. + * Content Protection: + * This property is used by userspace to request the kernel protect future + * content communicated over the link. When requested, kernel will apply + * the appropriate means of protection (most often HDCP), and use the + * property to tell userspace the protection is active. + * + * Drivers can set this up by calling + * drm_connector_attach_content_protection_property() on initialization. + * + * The value of this property can be one of the following: + * + * - DRM_MODE_CONTENT_PROTECTION_UNDESIRED = 0 + * The link is not protected, content is transmitted in the clear. + * - DRM_MODE_CONTENT_PROTECTION_DESIRED = 1 + * Userspace has requested content protection, but the link is not + * currently protected. When in this state, kernel should enable + * Content Protection as soon as possible. + * - DRM_MODE_CONTENT_PROTECTION_ENABLED = 2 + * Userspace has requested content protection, and the link is + * protected. Only the driver can set the property to this value. + * If userspace attempts to set to ENABLED, kernel will return + * -EINVAL. + * + * A few guidelines: + * + * - DESIRED state should be preserved until userspace de-asserts it by + * setting the property to UNDESIRED. This means ENABLED should only + * transition to UNDESIRED when the user explicitly requests it. + * - If the state is DESIRED, kernel should attempt to re-authenticate the + * link whenever possible. This includes across disable/enable, dpms, + * hotplug, downstream device changes, link status failures, etc.. + * - Userspace is responsible for polling the property to determine when + * the value transitions from ENABLED to DESIRED. This signifies the link + * is no longer protected and userspace should take appropriate action + * (whatever that might be). * * Connectors also have one standardized atomic property: * @@ -1126,6 +1168,42 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, } EXPORT_SYMBOL(drm_connector_attach_scaling_mode_property); +/** + * drm_connector_attach_content_protection_property - attach content protection + * property + * + * @connector: connector to attach CP property on. + * + * This is used to add support for content protection on select connectors. + * Content Protection is intentionally vague to allow for different underlying + * technologies, however it is most implemented by HDCP. + * + * The content protection will be set to &drm_connector_state.content_protection + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_connector_attach_content_protection_property( + struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_property *prop; + + prop = drm_property_create_enum(dev, 0, "Content Protection", + drm_cp_enum_list, + ARRAY_SIZE(drm_cp_enum_list)); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&connector->base, prop, + DRM_MODE_CONTENT_PROTECTION_UNDESIRED); + + connector->content_protection_property = prop; + + return 0; +} +EXPORT_SYMBOL(drm_connector_attach_content_protection_property); + /** * drm_mode_create_aspect_ratio_property - create aspect ratio property * @dev: DRM device diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index ed38df4ac204a..758a176e7b571 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -419,6 +419,12 @@ struct drm_connector_state { * upscaling, mostly used for built-in panels. */ unsigned int scaling_mode; + + /** + * @content_protection: Connector property to request content + * protection. This is most commonly used for HDCP. + */ + unsigned int content_protection; }; /** @@ -766,6 +772,7 @@ struct drm_cmdline_mode { * @tile_h_size: horizontal size of this tile. * @tile_v_size: vertical size of this tile. * @scaling_mode_property: Optional atomic property to control the upscaling. + * @content_protection_property: Optional property to control content protection * * Each connector may be connected to one or more CRTCs, or may be clonable by * another connector if they can share a CRTC. Each connector also has a specific @@ -856,6 +863,12 @@ struct drm_connector { struct drm_property *scaling_mode_property; + /** + * @content_protection_property: DRM ENUM property for content + * protection + */ + struct drm_property *content_protection_property; + /** * @path_blob_ptr: * @@ -1065,6 +1078,7 @@ const char *drm_get_dvi_i_subconnector_name(int val); const char *drm_get_dvi_i_select_name(int val); const char *drm_get_tv_subconnector_name(int val); const char *drm_get_tv_select_name(int val); +const char *drm_get_content_protection_name(int val); int drm_mode_create_dvi_i_properties(struct drm_device *dev); int drm_mode_create_tv_properties(struct drm_device *dev, @@ -1073,6 +1087,8 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int drm_mode_create_scaling_mode_property(struct drm_device *dev); int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, u32 scaling_mode_mask); +int drm_connector_attach_content_protection_property( + struct drm_connector *connector); int drm_mode_create_aspect_ratio_property(struct drm_device *dev); int drm_mode_create_suggested_offset_properties(struct drm_device *dev); diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 5597a87154e58..d1a69ff24fe84 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -173,6 +173,10 @@ extern "C" { DRM_MODE_REFLECT_X | \ DRM_MODE_REFLECT_Y) +/* Content Protection Flags */ +#define DRM_MODE_CONTENT_PROTECTION_UNDESIRED 0 +#define DRM_MODE_CONTENT_PROTECTION_DESIRED 1 +#define DRM_MODE_CONTENT_PROTECTION_ENABLED 2 struct drm_mode_modeinfo { __u32 clock; -- GitLab From 495eb7f877ab3789355c6ab5a0dbbd5277a88d69 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:38 -0500 Subject: [PATCH 0046/1646] drm: Add some HDCP related #defines In preparation for implementing HDCP in i915, add some HDCP related register offsets and defines. The dpcd register offsets will go in drm_dp_helper.h whereas the ddc offsets along with generic HDCP stuff will get stuffed in drm_hdcp.h, which is new. Changes in v2: - drm_hdcp.h gets MIT license (Daniel) Changes in v3: - None Changes in v4: - None Changes in v5: - None Changes in v6: - SPDX license Cc: Daniel Vetter Reviewed-by: Ramalingam C Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-5-seanpaul@chromium.org --- include/drm/drm_dp_helper.h | 17 ++++++++++++++++ include/drm/drm_hdcp.h | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 include/drm/drm_hdcp.h diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index da58a428c8d72..9d3ce3b9b1215 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -836,6 +836,23 @@ #define DP_CEC_TX_MESSAGE_BUFFER 0x3020 #define DP_CEC_MESSAGE_BUFFER_LENGTH 0x10 +#define DP_AUX_HDCP_BKSV 0x68000 +#define DP_AUX_HDCP_RI_PRIME 0x68005 +#define DP_AUX_HDCP_AKSV 0x68007 +#define DP_AUX_HDCP_AN 0x6800C +#define DP_AUX_HDCP_V_PRIME(h) (0x68014 + h * 4) +#define DP_AUX_HDCP_BCAPS 0x68028 +# define DP_BCAPS_REPEATER_PRESENT BIT(1) +# define DP_BCAPS_HDCP_CAPABLE BIT(0) +#define DP_AUX_HDCP_BSTATUS 0x68029 +# define DP_BSTATUS_REAUTH_REQ BIT(3) +# define DP_BSTATUS_LINK_FAILURE BIT(2) +# define DP_BSTATUS_R0_PRIME_READY BIT(1) +# define DP_BSTATUS_READY BIT(0) +#define DP_AUX_HDCP_BINFO 0x6802A +#define DP_AUX_HDCP_KSV_FIFO 0x6802C +#define DP_AUX_HDCP_AINFO 0x6803B + /* DP 1.2 Sideband message defines */ /* peer device type - DP 1.2a Table 2-92 */ #define DP_PEER_DEVICE_NONE 0x0 diff --git a/include/drm/drm_hdcp.h b/include/drm/drm_hdcp.h new file mode 100644 index 0000000000000..43f7bd902b41c --- /dev/null +++ b/include/drm/drm_hdcp.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2017 Google, Inc. + * + * Authors: + * Sean Paul + */ + +#ifndef _DRM_HDCP_H_INCLUDED_ +#define _DRM_HDCP_H_INCLUDED_ + +/* Period of hdcp checks (to ensure we're still authenticated) */ +#define DRM_HDCP_CHECK_PERIOD_MS (128 * 16) + +/* Shared lengths/masks between HDMI/DVI/DisplayPort */ +#define DRM_HDCP_AN_LEN 8 +#define DRM_HDCP_BSTATUS_LEN 2 +#define DRM_HDCP_KSV_LEN 5 +#define DRM_HDCP_RI_LEN 2 +#define DRM_HDCP_V_PRIME_PART_LEN 4 +#define DRM_HDCP_V_PRIME_NUM_PARTS 5 +#define DRM_HDCP_NUM_DOWNSTREAM(x) (x & 0x3f) + +/* Slave address for the HDCP registers in the receiver */ +#define DRM_HDCP_DDC_ADDR 0x3A + +/* HDCP register offsets for HDMI/DVI devices */ +#define DRM_HDCP_DDC_BKSV 0x00 +#define DRM_HDCP_DDC_RI_PRIME 0x08 +#define DRM_HDCP_DDC_AKSV 0x10 +#define DRM_HDCP_DDC_AN 0x18 +#define DRM_HDCP_DDC_V_PRIME(h) (0x20 + h * 4) +#define DRM_HDCP_DDC_BCAPS 0x40 +#define DRM_HDCP_DDC_BCAPS_REPEATER_PRESENT BIT(6) +#define DRM_HDCP_DDC_BCAPS_KSV_FIFO_READY BIT(5) +#define DRM_HDCP_DDC_BSTATUS 0x41 +#define DRM_HDCP_DDC_KSV_FIFO 0x43 + +#endif -- GitLab From ee5e5e7a5e0fdeca5add8de6314b1f0a62604bdf Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:39 -0500 Subject: [PATCH 0047/1646] drm/i915: Add HDCP framework + base implementation This patch adds the framework required to add HDCP support to intel connectors. It implements Aksv loading from fuse, and parts 1/2/3 of the HDCP authentication scheme. Note that without shim implementations, this does not actually implement HDCP. That will come in subsequent patches. Changes in v2: - Don't open code wait_fors (Chris) - drm_hdcp.c under MIT license (Daniel) - Move intel_hdcp_disable() call above ddi_disable (Ram) - Fix // comments (I wore a cone of shame for 12 hours to atone) (Daniel) - Justify intel_hdcp_shim with comments (Daniel) - Fixed async locking issues by adding hdcp_mutex (Daniel) - Don't alter connector_state in enable/disable (Daniel) Changes in v3: - Added hdcp_mutex/hdcp_value to make async reasonable - Added hdcp_prop_work to separate link checking & property setting - Added new helper for atomic_check state tracking (Daniel) - Moved enable/disable into atomic_commit with matching helpers - Moved intel_hdcp_check_link out of all locks when called from dp - Bumped up ksv_fifo timeout (noticed failure on one of my dongles) Changes in v4: - Remove SKL_ prefix from most register names (Daniel) - Move enable/disable back to modeset path (Daniel) - s/get_random_long/get_random_u32/ (Daniel) - Remove mode_config.mutex lock in prop_work (Daniel) - Add intel_hdcp_init to handle init of conn components (Daniel) - Actually check return value of attach_property - Check Bksv is valid before trying to authenticate (Ram) Changes in v5: - checkpatch whitespace changes - s/DRM_MODE_CONTENT_PROTECTION_OFF/DRM_MODE_CONTENT_PROTECTION_UNDESIRED/ - Fix ksv list wait timeout (actually wait 5s) - Increase the R0 timeout to 300ms (Ram) Changes in v6: - SPDX license Cc: Chris Wilson Reviewed-by: Ramalingam C Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-6-seanpaul@chromium.org --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_reg.h | 83 +++ drivers/gpu/drm/i915/intel_atomic.c | 2 + drivers/gpu/drm/i915/intel_ddi.c | 7 + drivers/gpu/drm/i915/intel_display.c | 4 + drivers/gpu/drm/i915/intel_drv.h | 85 ++++ drivers/gpu/drm/i915/intel_hdcp.c | 723 +++++++++++++++++++++++++++ 7 files changed, 905 insertions(+) create mode 100644 drivers/gpu/drm/i915/intel_hdcp.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 091aef281963c..7de05dc80f13a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -108,6 +108,7 @@ i915-y += intel_audio.o \ intel_fbc.o \ intel_fifo_underrun.o \ intel_frontbuffer.o \ + intel_hdcp.o \ intel_hotplug.o \ intel_modes.o \ intel_overlay.o \ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 41285bec8fc03..5f886c7a58702 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8043,6 +8043,7 @@ enum { #define GEN9_MEM_LATENCY_LEVEL_1_5_SHIFT 8 #define GEN9_MEM_LATENCY_LEVEL_2_6_SHIFT 16 #define GEN9_MEM_LATENCY_LEVEL_3_7_SHIFT 24 +#define SKL_PCODE_LOAD_HDCP_KEYS 0x5 #define SKL_PCODE_CDCLK_CONTROL 0x7 #define SKL_CDCLK_PREPARE_FOR_CHANGE 0x3 #define SKL_CDCLK_READY_FOR_CHANGE 0x1 @@ -8345,6 +8346,88 @@ enum skl_power_gate { #define SKL_PW_TO_PG(pw) ((pw) - SKL_DISP_PW_1 + SKL_PG1) #define SKL_FUSE_PG_DIST_STATUS(pg) (1 << (27 - (pg))) + +/* HDCP Key Registers */ +#define HDCP_KEY_CONF _MMIO(0x66c00) +#define HDCP_AKSV_SEND_TRIGGER BIT(31) +#define HDCP_CLEAR_KEYS_TRIGGER BIT(30) +#define HDCP_KEY_STATUS _MMIO(0x66c04) +#define HDCP_FUSE_IN_PROGRESS BIT(7) +#define HDCP_FUSE_ERROR BIT(6) +#define HDCP_FUSE_DONE BIT(5) +#define HDCP_KEY_LOAD_STATUS BIT(1) +#define HDCP_KEY_LOAD_DONE BIT(0) +#define HDCP_AKSV_LO _MMIO(0x66c10) +#define HDCP_AKSV_HI _MMIO(0x66c14) + +/* HDCP Repeater Registers */ +#define HDCP_REP_CTL _MMIO(0x66d00) +#define HDCP_DDIB_REP_PRESENT BIT(30) +#define HDCP_DDIA_REP_PRESENT BIT(29) +#define HDCP_DDIC_REP_PRESENT BIT(28) +#define HDCP_DDID_REP_PRESENT BIT(27) +#define HDCP_DDIF_REP_PRESENT BIT(26) +#define HDCP_DDIE_REP_PRESENT BIT(25) +#define HDCP_DDIB_SHA1_M0 (1 << 20) +#define HDCP_DDIA_SHA1_M0 (2 << 20) +#define HDCP_DDIC_SHA1_M0 (3 << 20) +#define HDCP_DDID_SHA1_M0 (4 << 20) +#define HDCP_DDIF_SHA1_M0 (5 << 20) +#define HDCP_DDIE_SHA1_M0 (6 << 20) /* Bspec says 5? */ +#define HDCP_SHA1_BUSY BIT(16) +#define HDCP_SHA1_READY BIT(17) +#define HDCP_SHA1_COMPLETE BIT(18) +#define HDCP_SHA1_V_MATCH BIT(19) +#define HDCP_SHA1_TEXT_32 (1 << 1) +#define HDCP_SHA1_COMPLETE_HASH (2 << 1) +#define HDCP_SHA1_TEXT_24 (4 << 1) +#define HDCP_SHA1_TEXT_16 (5 << 1) +#define HDCP_SHA1_TEXT_8 (6 << 1) +#define HDCP_SHA1_TEXT_0 (7 << 1) +#define HDCP_SHA_V_PRIME_H0 _MMIO(0x66d04) +#define HDCP_SHA_V_PRIME_H1 _MMIO(0x66d08) +#define HDCP_SHA_V_PRIME_H2 _MMIO(0x66d0C) +#define HDCP_SHA_V_PRIME_H3 _MMIO(0x66d10) +#define HDCP_SHA_V_PRIME_H4 _MMIO(0x66d14) +#define HDCP_SHA_V_PRIME(h) _MMIO((0x66d04 + h * 4)) +#define HDCP_SHA_TEXT _MMIO(0x66d18) + +/* HDCP Auth Registers */ +#define _PORTA_HDCP_AUTHENC 0x66800 +#define _PORTB_HDCP_AUTHENC 0x66500 +#define _PORTC_HDCP_AUTHENC 0x66600 +#define _PORTD_HDCP_AUTHENC 0x66700 +#define _PORTE_HDCP_AUTHENC 0x66A00 +#define _PORTF_HDCP_AUTHENC 0x66900 +#define _PORT_HDCP_AUTHENC(port, x) _MMIO(_PICK(port, \ + _PORTA_HDCP_AUTHENC, \ + _PORTB_HDCP_AUTHENC, \ + _PORTC_HDCP_AUTHENC, \ + _PORTD_HDCP_AUTHENC, \ + _PORTE_HDCP_AUTHENC, \ + _PORTF_HDCP_AUTHENC) + x) +#define PORT_HDCP_CONF(port) _PORT_HDCP_AUTHENC(port, 0x0) +#define HDCP_CONF_CAPTURE_AN BIT(0) +#define HDCP_CONF_AUTH_AND_ENC (BIT(1) | BIT(0)) +#define PORT_HDCP_ANINIT(port) _PORT_HDCP_AUTHENC(port, 0x4) +#define PORT_HDCP_ANLO(port) _PORT_HDCP_AUTHENC(port, 0x8) +#define PORT_HDCP_ANHI(port) _PORT_HDCP_AUTHENC(port, 0xC) +#define PORT_HDCP_BKSVLO(port) _PORT_HDCP_AUTHENC(port, 0x10) +#define PORT_HDCP_BKSVHI(port) _PORT_HDCP_AUTHENC(port, 0x14) +#define PORT_HDCP_RPRIME(port) _PORT_HDCP_AUTHENC(port, 0x18) +#define PORT_HDCP_STATUS(port) _PORT_HDCP_AUTHENC(port, 0x1C) +#define HDCP_STATUS_STREAM_A_ENC BIT(31) +#define HDCP_STATUS_STREAM_B_ENC BIT(30) +#define HDCP_STATUS_STREAM_C_ENC BIT(29) +#define HDCP_STATUS_STREAM_D_ENC BIT(28) +#define HDCP_STATUS_AUTH BIT(21) +#define HDCP_STATUS_ENC BIT(20) +#define HDCP_STATUS_RI_MATCH BIT(19) +#define HDCP_STATUS_R0_READY BIT(18) +#define HDCP_STATUS_AN_READY BIT(17) +#define HDCP_STATUS_CIPHER BIT(16) +#define HDCP_STATUS_FRAME_CNT(x) ((x >> 8) & 0xff) + /* Per-pipe DDI Function Control */ #define _TRANS_DDI_FUNC_CTL_A 0x60400 #define _TRANS_DDI_FUNC_CTL_B 0x61400 diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 36d4e635e4ce4..d452c327dc1da 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -110,6 +110,8 @@ int intel_digital_connector_atomic_check(struct drm_connector *conn, to_intel_digital_connector_state(old_state); struct drm_crtc_state *crtc_state; + intel_hdcp_atomic_check(conn, old_state, new_state); + if (!new_state->crtc) return 0; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index f51645a08dcaf..2766b5e5c0ac0 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2423,6 +2423,11 @@ static void intel_enable_ddi(struct intel_encoder *encoder, intel_enable_ddi_hdmi(encoder, crtc_state, conn_state); else intel_enable_ddi_dp(encoder, crtc_state, conn_state); + + /* Enable hdcp if it's desired */ + if (conn_state->content_protection == + DRM_MODE_CONTENT_PROTECTION_DESIRED) + intel_hdcp_enable(to_intel_connector(conn_state->connector)); } static void intel_disable_ddi_dp(struct intel_encoder *encoder, @@ -2457,6 +2462,8 @@ static void intel_disable_ddi(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { + intel_hdcp_disable(to_intel_connector(old_conn_state->connector)); + if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI)) intel_disable_ddi_hdmi(encoder, old_crtc_state, old_conn_state); else diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0cd355978ab42..02bf31d935b45 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15217,6 +15217,10 @@ static void intel_hpd_poll_fini(struct drm_device *dev) for_each_intel_connector_iter(connector, &conn_iter) { if (connector->modeset_retry_work.func) cancel_work_sync(&connector->modeset_retry_work); + if (connector->hdcp_shim) { + cancel_delayed_work_sync(&connector->hdcp_check_work); + cancel_work_sync(&connector->hdcp_prop_work); + } } drm_connector_list_iter_end(&conn_iter); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9848e8eb00749..c59277c5b63ed 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -301,6 +301,76 @@ struct intel_panel { } backlight; }; +/* + * This structure serves as a translation layer between the generic HDCP code + * and the bus-specific code. What that means is that HDCP over HDMI differs + * from HDCP over DP, so to account for these differences, we need to + * communicate with the receiver through this shim. + * + * For completeness, the 2 buses differ in the following ways: + * - DP AUX vs. DDC + * HDCP registers on the receiver are set via DP AUX for DP, and + * they are set via DDC for HDMI. + * - Receiver register offsets + * The offsets of the registers are different for DP vs. HDMI + * - Receiver register masks/offsets + * For instance, the ready bit for the KSV fifo is in a different + * place on DP vs HDMI + * - Receiver register names + * Seriously. In the DP spec, the 16-bit register containing + * downstream information is called BINFO, on HDMI it's called + * BSTATUS. To confuse matters further, DP has a BSTATUS register + * with a completely different definition. + * - KSV FIFO + * On HDMI, the ksv fifo is read all at once, whereas on DP it must + * be read 3 keys at a time + * - Aksv output + * Since Aksv is hidden in hardware, there's different procedures + * to send it over DP AUX vs DDC + */ +struct intel_hdcp_shim { + /* Outputs the transmitter's An and Aksv values to the receiver. */ + int (*write_an_aksv)(struct intel_digital_port *intel_dig_port, u8 *an); + + /* Reads the receiver's key selection vector */ + int (*read_bksv)(struct intel_digital_port *intel_dig_port, u8 *bksv); + + /* + * Reads BINFO from DP receivers and BSTATUS from HDMI receivers. The + * definitions are the same in the respective specs, but the names are + * different. Call it BSTATUS since that's the name the HDMI spec + * uses and it was there first. + */ + int (*read_bstatus)(struct intel_digital_port *intel_dig_port, + u8 *bstatus); + + /* Determines whether a repeater is present downstream */ + int (*repeater_present)(struct intel_digital_port *intel_dig_port, + bool *repeater_present); + + /* Reads the receiver's Ri' value */ + int (*read_ri_prime)(struct intel_digital_port *intel_dig_port, u8 *ri); + + /* Determines if the receiver's KSV FIFO is ready for consumption */ + int (*read_ksv_ready)(struct intel_digital_port *intel_dig_port, + bool *ksv_ready); + + /* Reads the ksv fifo for num_downstream devices */ + int (*read_ksv_fifo)(struct intel_digital_port *intel_dig_port, + int num_downstream, u8 *ksv_fifo); + + /* Reads a 32-bit part of V' from the receiver */ + int (*read_v_prime_part)(struct intel_digital_port *intel_dig_port, + int i, u32 *part); + + /* Enables HDCP signalling on the port */ + int (*toggle_signalling)(struct intel_digital_port *intel_dig_port, + bool enable); + + /* Ensures the link is still protected */ + bool (*check_link)(struct intel_digital_port *intel_dig_port); +}; + struct intel_connector { struct drm_connector base; /* @@ -332,6 +402,12 @@ struct intel_connector { /* Work struct to schedule a uevent on link train failure */ struct work_struct modeset_retry_work; + + const struct intel_hdcp_shim *hdcp_shim; + struct mutex hdcp_mutex; + uint64_t hdcp_value; /* protected by hdcp_mutex */ + struct delayed_work hdcp_check_work; + struct work_struct hdcp_prop_work; }; struct intel_digital_connector_state { @@ -1761,6 +1837,15 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con } #endif /* CONFIG_BACKLIGHT_CLASS_DEVICE */ +/* intel_hdcp.c */ +void intel_hdcp_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state); +int intel_hdcp_init(struct intel_connector *connector, + const struct intel_hdcp_shim *hdcp_shim); +int intel_hdcp_enable(struct intel_connector *connector); +int intel_hdcp_disable(struct intel_connector *connector); +int intel_hdcp_check_link(struct intel_connector *connector); /* intel_psr.c */ void intel_psr_enable(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c new file mode 100644 index 0000000000000..3c164a27d50bf --- /dev/null +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -0,0 +1,723 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2017 Google, Inc. + * + * Authors: + * Sean Paul + */ + +#include +#include +#include +#include + +#include "intel_drv.h" +#include "i915_reg.h" + +#define KEY_LOAD_TRIES 5 + +static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + int ret, read_ret; + bool ksv_ready; + + /* Poll for ksv list ready (spec says max time allowed is 5s) */ + ret = __wait_for(read_ret = shim->read_ksv_ready(intel_dig_port, + &ksv_ready), + read_ret || ksv_ready, 5 * 1000 * 1000, 1000, + 100 * 1000); + if (ret) + return ret; + if (read_ret) + return read_ret; + if (!ksv_ready) + return -ETIMEDOUT; + + return 0; +} + +static void intel_hdcp_clear_keys(struct drm_i915_private *dev_priv) +{ + I915_WRITE(HDCP_KEY_CONF, HDCP_CLEAR_KEYS_TRIGGER); + I915_WRITE(HDCP_KEY_STATUS, HDCP_KEY_LOAD_DONE | HDCP_KEY_LOAD_STATUS | + HDCP_FUSE_IN_PROGRESS | HDCP_FUSE_ERROR | HDCP_FUSE_DONE); +} + +static int intel_hdcp_load_keys(struct drm_i915_private *dev_priv) +{ + int ret; + u32 val; + + /* Initiate loading the HDCP key from fuses */ + mutex_lock(&dev_priv->pcu_lock); + ret = sandybridge_pcode_write(dev_priv, SKL_PCODE_LOAD_HDCP_KEYS, 1); + mutex_unlock(&dev_priv->pcu_lock); + if (ret) { + DRM_ERROR("Failed to initiate HDCP key load (%d)\n", ret); + return ret; + } + + /* Wait for the keys to load (500us) */ + ret = __intel_wait_for_register(dev_priv, HDCP_KEY_STATUS, + HDCP_KEY_LOAD_DONE, HDCP_KEY_LOAD_DONE, + 10, 1, &val); + if (ret) + return ret; + else if (!(val & HDCP_KEY_LOAD_STATUS)) + return -ENXIO; + + /* Send Aksv over to PCH display for use in authentication */ + I915_WRITE(HDCP_KEY_CONF, HDCP_AKSV_SEND_TRIGGER); + + return 0; +} + +/* Returns updated SHA-1 index */ +static int intel_write_sha_text(struct drm_i915_private *dev_priv, u32 sha_text) +{ + I915_WRITE(HDCP_SHA_TEXT, sha_text); + if (intel_wait_for_register(dev_priv, HDCP_REP_CTL, + HDCP_SHA1_READY, HDCP_SHA1_READY, 1)) { + DRM_ERROR("Timed out waiting for SHA1 ready\n"); + return -ETIMEDOUT; + } + return 0; +} + +static +u32 intel_hdcp_get_repeater_ctl(struct intel_digital_port *intel_dig_port) +{ + enum port port = intel_dig_port->base.port; + switch (port) { + case PORT_A: + return HDCP_DDIA_REP_PRESENT | HDCP_DDIA_SHA1_M0; + case PORT_B: + return HDCP_DDIB_REP_PRESENT | HDCP_DDIB_SHA1_M0; + case PORT_C: + return HDCP_DDIC_REP_PRESENT | HDCP_DDIC_SHA1_M0; + case PORT_D: + return HDCP_DDID_REP_PRESENT | HDCP_DDID_SHA1_M0; + case PORT_E: + return HDCP_DDIE_REP_PRESENT | HDCP_DDIE_SHA1_M0; + default: + break; + } + DRM_ERROR("Unknown port %d\n", port); + return -EINVAL; +} + +static +bool intel_hdcp_is_ksv_valid(u8 *ksv) +{ + int i, ones = 0; + /* KSV has 20 1's and 20 0's */ + for (i = 0; i < DRM_HDCP_KSV_LEN; i++) + ones += hweight8(ksv[i]); + if (ones != 20) + return false; + return true; +} + +/* Implements Part 2 of the HDCP authorization procedure */ +static +int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + struct drm_i915_private *dev_priv; + u32 vprime, sha_text, sha_leftovers, rep_ctl; + u8 bstatus[2], num_downstream, *ksv_fifo; + int ret, i, j, sha_idx; + + dev_priv = intel_dig_port->base.base.dev->dev_private; + + ret = shim->read_bstatus(intel_dig_port, bstatus); + if (ret) + return ret; + + /* If there are no downstream devices, we're all done. */ + num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); + if (num_downstream == 0) { + DRM_INFO("HDCP is enabled (no downstream devices)\n"); + return 0; + } + + ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim); + if (ret) { + DRM_ERROR("KSV list failed to become ready (%d)\n", ret); + return ret; + } + + ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL); + if (!ksv_fifo) + return -ENOMEM; + + ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); + if (ret) + return ret; + + /* Process V' values from the receiver */ + for (i = 0; i < DRM_HDCP_V_PRIME_NUM_PARTS; i++) { + ret = shim->read_v_prime_part(intel_dig_port, i, &vprime); + if (ret) + return ret; + I915_WRITE(HDCP_SHA_V_PRIME(i), vprime); + } + + /* + * We need to write the concatenation of all device KSVs, BINFO (DP) || + * BSTATUS (HDMI), and M0 (which is added via HDCP_REP_CTL). This byte + * stream is written via the HDCP_SHA_TEXT register in 32-bit + * increments. Every 64 bytes, we need to write HDCP_REP_CTL again. This + * index will keep track of our progress through the 64 bytes as well as + * helping us work the 40-bit KSVs through our 32-bit register. + * + * NOTE: data passed via HDCP_SHA_TEXT should be big-endian + */ + sha_idx = 0; + sha_text = 0; + sha_leftovers = 0; + rep_ctl = intel_hdcp_get_repeater_ctl(intel_dig_port); + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + for (i = 0; i < num_downstream; i++) { + unsigned int sha_empty; + u8 *ksv = &ksv_fifo[i * DRM_HDCP_KSV_LEN]; + + /* Fill up the empty slots in sha_text and write it out */ + sha_empty = sizeof(sha_text) - sha_leftovers; + for (j = 0; j < sha_empty; j++) + sha_text |= ksv[j] << ((sizeof(sha_text) - j - 1) * 8); + + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + + /* Programming guide writes this every 64 bytes */ + sha_idx += sizeof(sha_text); + if (!(sha_idx % 64)) + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + + /* Store the leftover bytes from the ksv in sha_text */ + sha_leftovers = DRM_HDCP_KSV_LEN - sha_empty; + sha_text = 0; + for (j = 0; j < sha_leftovers; j++) + sha_text |= ksv[sha_empty + j] << + ((sizeof(sha_text) - j - 1) * 8); + + /* + * If we still have room in sha_text for more data, continue. + * Otherwise, write it out immediately. + */ + if (sizeof(sha_text) > sha_leftovers) + continue; + + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_leftovers = 0; + sha_text = 0; + sha_idx += sizeof(sha_text); + } + + /* + * We need to write BINFO/BSTATUS, and M0 now. Depending on how many + * bytes are leftover from the last ksv, we might be able to fit them + * all in sha_text (first 2 cases), or we might need to split them up + * into 2 writes (last 2 cases). + */ + if (sha_leftovers == 0) { + /* Write 16 bits of text, 16 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_16); + ret = intel_write_sha_text(dev_priv, + bstatus[0] << 8 | bstatus[1]); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 32 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 16 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_16); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + } else if (sha_leftovers == 1) { + /* Write 24 bits of text, 8 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_24); + sha_text |= bstatus[0] << 16 | bstatus[1] << 8; + /* Only 24-bits of data, must be in the LSB */ + sha_text = (sha_text & 0xffffff00) >> 8; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 32 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 24 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_8); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + } else if (sha_leftovers == 2) { + /* Write 32 bits of text */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + sha_text |= bstatus[0] << 24 | bstatus[1] << 16; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 64 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + for (i = 0; i < 2; i++) { + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + } + } else if (sha_leftovers == 3) { + /* Write 32 bits of text */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + sha_text |= bstatus[0] << 24; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 8 bits of text, 24 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_8); + ret = intel_write_sha_text(dev_priv, bstatus[1]); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 32 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_0); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + + /* Write 8 bits of M0 */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_24); + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + } else { + DRM_ERROR("Invalid number of leftovers %d\n", sha_leftovers); + return -EINVAL; + } + + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_TEXT_32); + /* Fill up to 64-4 bytes with zeros (leave the last write for length) */ + while ((sha_idx % 64) < (64 - sizeof(sha_text))) { + ret = intel_write_sha_text(dev_priv, 0); + if (ret < 0) + return ret; + sha_idx += sizeof(sha_text); + } + + /* + * Last write gets the length of the concatenation in bits. That is: + * - 5 bytes per device + * - 10 bytes for BINFO/BSTATUS(2), M0(8) + */ + sha_text = (num_downstream * 5 + 10) * 8; + ret = intel_write_sha_text(dev_priv, sha_text); + if (ret < 0) + return ret; + + /* Tell the HW we're done with the hash and wait for it to ACK */ + I915_WRITE(HDCP_REP_CTL, rep_ctl | HDCP_SHA1_COMPLETE_HASH); + if (intel_wait_for_register(dev_priv, HDCP_REP_CTL, + HDCP_SHA1_COMPLETE, + HDCP_SHA1_COMPLETE, 1)) { + DRM_ERROR("Timed out waiting for SHA1 complete\n"); + return -ETIMEDOUT; + } + if (!(I915_READ(HDCP_REP_CTL) & HDCP_SHA1_V_MATCH)) { + DRM_ERROR("SHA-1 mismatch, HDCP failed\n"); + return -ENXIO; + } + + DRM_INFO("HDCP is enabled (%d downstream devices)\n", num_downstream); + return 0; +} + +/* Implements Part 1 of the HDCP authorization procedure */ +static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + struct drm_i915_private *dev_priv; + enum port port; + unsigned long r0_prime_gen_start; + int ret, i; + union { + u32 reg[2]; + u8 shim[DRM_HDCP_AN_LEN]; + } an; + union { + u32 reg[2]; + u8 shim[DRM_HDCP_KSV_LEN]; + } bksv; + union { + u32 reg; + u8 shim[DRM_HDCP_RI_LEN]; + } ri; + bool repeater_present; + + dev_priv = intel_dig_port->base.base.dev->dev_private; + + port = intel_dig_port->base.port; + + /* Initialize An with 2 random values and acquire it */ + for (i = 0; i < 2; i++) + I915_WRITE(PORT_HDCP_ANINIT(port), get_random_u32()); + I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_CAPTURE_AN); + + /* Wait for An to be acquired */ + if (intel_wait_for_register(dev_priv, PORT_HDCP_STATUS(port), + HDCP_STATUS_AN_READY, + HDCP_STATUS_AN_READY, 1)) { + DRM_ERROR("Timed out waiting for An\n"); + return -ETIMEDOUT; + } + + an.reg[0] = I915_READ(PORT_HDCP_ANLO(port)); + an.reg[1] = I915_READ(PORT_HDCP_ANHI(port)); + ret = shim->write_an_aksv(intel_dig_port, an.shim); + if (ret) + return ret; + + r0_prime_gen_start = jiffies; + + memset(&bksv, 0, sizeof(bksv)); + ret = shim->read_bksv(intel_dig_port, bksv.shim); + if (ret) + return ret; + else if (!intel_hdcp_is_ksv_valid(bksv.shim)) + return -ENODEV; + + I915_WRITE(PORT_HDCP_BKSVLO(port), bksv.reg[0]); + I915_WRITE(PORT_HDCP_BKSVHI(port), bksv.reg[1]); + + ret = shim->repeater_present(intel_dig_port, &repeater_present); + if (ret) + return ret; + if (repeater_present) + I915_WRITE(HDCP_REP_CTL, + intel_hdcp_get_repeater_ctl(intel_dig_port)); + + ret = shim->toggle_signalling(intel_dig_port, true); + if (ret) + return ret; + + I915_WRITE(PORT_HDCP_CONF(port), HDCP_CONF_AUTH_AND_ENC); + + /* Wait for R0 ready */ + if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_R0_READY | HDCP_STATUS_ENC), 1)) { + DRM_ERROR("Timed out waiting for R0 ready\n"); + return -ETIMEDOUT; + } + + /* + * Wait for R0' to become available. The spec says 100ms from Aksv, but + * some monitors can take longer than this. We'll set the timeout at + * 300ms just to be sure. + * + * On DP, there's an R0_READY bit available but no such bit + * exists on HDMI. Since the upper-bound is the same, we'll just do + * the stupid thing instead of polling on one and not the other. + */ + wait_remaining_ms_from_jiffies(r0_prime_gen_start, 300); + + ri.reg = 0; + ret = shim->read_ri_prime(intel_dig_port, ri.shim); + if (ret) + return ret; + I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + + /* Wait for Ri prime match */ + if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + DRM_ERROR("Timed out waiting for Ri prime match (%x)\n", + I915_READ(PORT_HDCP_STATUS(port))); + return -ETIMEDOUT; + } + + /* Wait for encryption confirmation */ + if (intel_wait_for_register(dev_priv, PORT_HDCP_STATUS(port), + HDCP_STATUS_ENC, HDCP_STATUS_ENC, 20)) { + DRM_ERROR("Timed out waiting for encryption\n"); + return -ETIMEDOUT; + } + + /* + * XXX: If we have MST-connected devices, we need to enable encryption + * on those as well. + */ + + return intel_hdcp_auth_downstream(intel_dig_port, shim); +} + +static +struct intel_digital_port *conn_to_dig_port(struct intel_connector *connector) +{ + return enc_to_dig_port(&intel_attached_encoder(&connector->base)->base); +} + +static int _intel_hdcp_disable(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = connector->base.dev->dev_private; + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); + enum port port = intel_dig_port->base.port; + int ret; + + I915_WRITE(PORT_HDCP_CONF(port), 0); + if (intel_wait_for_register(dev_priv, PORT_HDCP_STATUS(port), ~0, 0, + 20)) { + DRM_ERROR("Failed to disable HDCP, timeout clearing status\n"); + return -ETIMEDOUT; + } + + intel_hdcp_clear_keys(dev_priv); + + ret = connector->hdcp_shim->toggle_signalling(intel_dig_port, false); + if (ret) { + DRM_ERROR("Failed to disable HDCP signalling\n"); + return ret; + } + + DRM_INFO("HDCP is disabled\n"); + return 0; +} + +static int _intel_hdcp_enable(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = connector->base.dev->dev_private; + int i, ret; + + if (!(I915_READ(SKL_FUSE_STATUS) & SKL_FUSE_PG_DIST_STATUS(1))) { + DRM_ERROR("PG1 is disabled, cannot load keys\n"); + return -ENXIO; + } + + for (i = 0; i < KEY_LOAD_TRIES; i++) { + ret = intel_hdcp_load_keys(dev_priv); + if (!ret) + break; + intel_hdcp_clear_keys(dev_priv); + } + if (ret) { + DRM_ERROR("Could not load HDCP keys, (%d)\n", ret); + return ret; + } + + ret = intel_hdcp_auth(conn_to_dig_port(connector), + connector->hdcp_shim); + if (ret) { + DRM_ERROR("Failed to authenticate HDCP (%d)\n", ret); + return ret; + } + + return 0; +} + +static void intel_hdcp_check_work(struct work_struct *work) +{ + struct intel_connector *connector = container_of(to_delayed_work(work), + struct intel_connector, + hdcp_check_work); + if (!intel_hdcp_check_link(connector)) + schedule_delayed_work(&connector->hdcp_check_work, + DRM_HDCP_CHECK_PERIOD_MS); +} + +static void intel_hdcp_prop_work(struct work_struct *work) +{ + struct intel_connector *connector = container_of(work, + struct intel_connector, + hdcp_prop_work); + struct drm_device *dev = connector->base.dev; + struct drm_connector_state *state; + + drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); + mutex_lock(&connector->hdcp_mutex); + + /* + * This worker is only used to flip between ENABLED/DESIRED. Either of + * those to UNDESIRED is handled by core. If hdcp_value == UNDESIRED, + * we're running just after hdcp has been disabled, so just exit + */ + if (connector->hdcp_value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { + state = connector->base.state; + state->content_protection = connector->hdcp_value; + } + + mutex_unlock(&connector->hdcp_mutex); + drm_modeset_unlock(&dev->mode_config.connection_mutex); +} + +int intel_hdcp_init(struct intel_connector *connector, + const struct intel_hdcp_shim *hdcp_shim) +{ + int ret; + + ret = drm_connector_attach_content_protection_property( + &connector->base); + if (ret) + return ret; + + connector->hdcp_shim = hdcp_shim; + mutex_init(&connector->hdcp_mutex); + INIT_DELAYED_WORK(&connector->hdcp_check_work, intel_hdcp_check_work); + INIT_WORK(&connector->hdcp_prop_work, intel_hdcp_prop_work); + return 0; +} + +int intel_hdcp_enable(struct intel_connector *connector) +{ + int ret; + + if (!connector->hdcp_shim) + return -ENOENT; + + mutex_lock(&connector->hdcp_mutex); + + ret = _intel_hdcp_enable(connector); + if (ret) + goto out; + + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_ENABLED; + schedule_work(&connector->hdcp_prop_work); + schedule_delayed_work(&connector->hdcp_check_work, + DRM_HDCP_CHECK_PERIOD_MS); +out: + mutex_unlock(&connector->hdcp_mutex); + return ret; +} + +int intel_hdcp_disable(struct intel_connector *connector) +{ + int ret; + + if (!connector->hdcp_shim) + return -ENOENT; + + mutex_lock(&connector->hdcp_mutex); + + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; + ret = _intel_hdcp_disable(connector); + + mutex_unlock(&connector->hdcp_mutex); + cancel_delayed_work_sync(&connector->hdcp_check_work); + return ret; +} + +void intel_hdcp_atomic_check(struct drm_connector *connector, + struct drm_connector_state *old_state, + struct drm_connector_state *new_state) +{ + uint64_t old_cp = old_state->content_protection; + uint64_t new_cp = new_state->content_protection; + struct drm_crtc_state *crtc_state; + + if (!new_state->crtc) { + /* + * If the connector is being disabled with CP enabled, mark it + * desired so it's re-enabled when the connector is brought back + */ + if (old_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED) + new_state->content_protection = + DRM_MODE_CONTENT_PROTECTION_DESIRED; + return; + } + + /* + * Nothing to do if the state didn't change, or HDCP was activated since + * the last commit + */ + if (old_cp == new_cp || + (old_cp == DRM_MODE_CONTENT_PROTECTION_DESIRED && + new_cp == DRM_MODE_CONTENT_PROTECTION_ENABLED)) + return; + + crtc_state = drm_atomic_get_new_crtc_state(new_state->state, + new_state->crtc); + crtc_state->mode_changed = true; +} + +/* Implements Part 3 of the HDCP authorization procedure */ +int intel_hdcp_check_link(struct intel_connector *connector) +{ + struct drm_i915_private *dev_priv = connector->base.dev->dev_private; + struct intel_digital_port *intel_dig_port = conn_to_dig_port(connector); + enum port port = intel_dig_port->base.port; + int ret = 0; + + if (!connector->hdcp_shim) + return -ENOENT; + + mutex_lock(&connector->hdcp_mutex); + + if (connector->hdcp_value == DRM_MODE_CONTENT_PROTECTION_UNDESIRED) + goto out; + + if (!(I915_READ(PORT_HDCP_STATUS(port)) & HDCP_STATUS_ENC)) { + DRM_ERROR("HDCP check failed: link is not encrypted, %x\n", + I915_READ(PORT_HDCP_STATUS(port))); + ret = -ENXIO; + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&connector->hdcp_prop_work); + goto out; + } + + if (connector->hdcp_shim->check_link(intel_dig_port)) { + if (connector->hdcp_value != + DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { + connector->hdcp_value = + DRM_MODE_CONTENT_PROTECTION_ENABLED; + schedule_work(&connector->hdcp_prop_work); + } + goto out; + } + + DRM_INFO("HDCP link failed, retrying authentication\n"); + + ret = _intel_hdcp_disable(connector); + if (ret) { + DRM_ERROR("Failed to disable hdcp (%d)\n", ret); + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&connector->hdcp_prop_work); + goto out; + } + + ret = _intel_hdcp_enable(connector); + if (ret) { + DRM_ERROR("Failed to enable hdcp (%d)\n", ret); + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_DESIRED; + schedule_work(&connector->hdcp_prop_work); + goto out; + } + +out: + mutex_unlock(&connector->hdcp_mutex); + return ret; +} -- GitLab From d02cf0a4f4db3bdbf1f7fa6915db0a96bd98ed9e Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:40 -0500 Subject: [PATCH 0048/1646] drm/i915: Make use of indexed write GMBUS feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch enables the indexed write feature of the GMBUS to concatenate 2 consecutive messages into one. The criteria for an indexed write is that both messages are writes, the first is length == 1, and the second is length > 0. The first message is sent out by the GMBUS as the slave command, and the second one is sent via the GMBUS FIFO as usual. Changes in v3: - Added to series Changes in v4: - Combine indexed reads and writes (Ville) Changes in v5: - checkpatch whitespace nits Changes in v6: - None Reviewed-by: Daniel Vetter Suggested-by: Ville Syrjälä Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-7-seanpaul@chromium.org --- drivers/gpu/drm/i915/intel_i2c.c | 34 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index ef9f91a0b0c98..d9e6993fa718e 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -402,7 +402,8 @@ gmbus_xfer_read(struct drm_i915_private *dev_priv, struct i2c_msg *msg, static int gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, - unsigned short addr, u8 *buf, unsigned int len) + unsigned short addr, u8 *buf, unsigned int len, + u32 gmbus1_index) { unsigned int chunk_size = len; u32 val, loop; @@ -415,7 +416,7 @@ gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, I915_WRITE_FW(GMBUS3, val); I915_WRITE_FW(GMBUS1, - GMBUS_CYCLE_WAIT | + gmbus1_index | GMBUS_CYCLE_WAIT | (chunk_size << GMBUS_BYTE_COUNT_SHIFT) | (addr << GMBUS_SLAVE_ADDR_SHIFT) | GMBUS_SLAVE_WRITE | GMBUS_SW_RDY); @@ -438,7 +439,8 @@ gmbus_xfer_write_chunk(struct drm_i915_private *dev_priv, } static int -gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) +gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg, + u32 gmbus1_index) { u8 *buf = msg->buf; unsigned int tx_size = msg->len; @@ -448,7 +450,8 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) do { len = min(tx_size, GMBUS_BYTE_COUNT_MAX); - ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len); + ret = gmbus_xfer_write_chunk(dev_priv, msg->addr, buf, len, + gmbus1_index); if (ret) return ret; @@ -460,21 +463,21 @@ gmbus_xfer_write(struct drm_i915_private *dev_priv, struct i2c_msg *msg) } /* - * The gmbus controller can combine a 1 or 2 byte write with a read that - * immediately follows it by using an "INDEX" cycle. + * The gmbus controller can combine a 1 or 2 byte write with another read/write + * that immediately follows it by using an "INDEX" cycle. */ static bool -gmbus_is_index_read(struct i2c_msg *msgs, int i, int num) +gmbus_is_index_xfer(struct i2c_msg *msgs, int i, int num) { return (i + 1 < num && msgs[i].addr == msgs[i + 1].addr && !(msgs[i].flags & I2C_M_RD) && (msgs[i].len == 1 || msgs[i].len == 2) && - (msgs[i + 1].flags & I2C_M_RD)); + msgs[i + 1].len > 0); } static int -gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) +gmbus_index_xfer(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) { u32 gmbus1_index = 0; u32 gmbus5 = 0; @@ -491,7 +494,10 @@ gmbus_xfer_index_read(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) if (gmbus5) I915_WRITE_FW(GMBUS5, gmbus5); - ret = gmbus_xfer_read(dev_priv, &msgs[1], gmbus1_index); + if (msgs[1].flags & I2C_M_RD) + ret = gmbus_xfer_read(dev_priv, &msgs[1], gmbus1_index); + else + ret = gmbus_xfer_write(dev_priv, &msgs[1], gmbus1_index); /* Clear GMBUS5 after each index transfer */ if (gmbus5) @@ -522,13 +528,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) for (; i < num; i += inc) { inc = 1; - if (gmbus_is_index_read(msgs, i, num)) { - ret = gmbus_xfer_index_read(dev_priv, &msgs[i]); - inc = 2; /* an index read is two msgs */ + if (gmbus_is_index_xfer(msgs, i, num)) { + ret = gmbus_index_xfer(dev_priv, &msgs[i]); + inc = 2; /* an index transmission is two msgs */ } else if (msgs[i].flags & I2C_M_RD) { ret = gmbus_xfer_read(dev_priv, &msgs[i], 0); } else { - ret = gmbus_xfer_write(dev_priv, &msgs[i]); + ret = gmbus_xfer_write(dev_priv, &msgs[i], 0); } if (!ret) -- GitLab From 07e17a75922a593393b7eb8f73622723b76d3706 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:41 -0500 Subject: [PATCH 0049/1646] drm/i915: Add function to output Aksv over GMBUS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once the Aksv is available in the PCH, we need to get it on the wire to the receiver via DDC. The hardware doesn't allow us to read the value directly, so we need to tell GMBUS to source the Aksv internally and send it to the right offset on the receiver. The way we do this is to initiate an indexed write where the index is the Aksv register offset. We write dummy values to GMBUS3 as if we were sending the key, and the hardware slips in the "real" values when it goes out. Changes in v2: - None Changes in v3: - Uses new index write feature (Ville) Changes in v4: - None Changes in v5: - checkpatch whitespace fix Changes in v6: - None Cc: Ville Syrjälä Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-8-seanpaul@chromium.org --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_i2c.c | 47 ++++++++++++++++++++++++++++++-- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index caebd5825279e..a689396d0ff6d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3644,6 +3644,7 @@ extern int intel_setup_gmbus(struct drm_i915_private *dev_priv); extern void intel_teardown_gmbus(struct drm_i915_private *dev_priv); extern bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, unsigned int pin); +extern int intel_gmbus_output_aksv(struct i2c_adapter *adapter); extern struct i2c_adapter * intel_gmbus_get_adapter(struct drm_i915_private *dev_priv, unsigned int pin); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5f886c7a58702..3504cf1a2e685 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3043,6 +3043,7 @@ enum i915_power_well_id { # define GPIO_DATA_PULLUP_DISABLE (1 << 13) #define GMBUS0 _MMIO(dev_priv->gpio_mmio_base + 0x5100) /* clock/port select */ +#define GMBUS_AKSV_SELECT (1<<11) #define GMBUS_RATE_100KHZ (0<<8) #define GMBUS_RATE_50KHZ (1<<8) #define GMBUS_RATE_400KHZ (2<<8) /* reserved on Pineview */ diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index d9e6993fa718e..6f7ef4e225ee4 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "intel_drv.h" #include #include "i915_drv.h" @@ -507,7 +508,8 @@ gmbus_index_xfer(struct drm_i915_private *dev_priv, struct i2c_msg *msgs) } static int -do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) +do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num, + u32 gmbus0_source) { struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, @@ -524,7 +526,7 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) pch_gmbus_clock_gating(dev_priv, false); retry: - I915_WRITE_FW(GMBUS0, bus->reg0); + I915_WRITE_FW(GMBUS0, gmbus0_source | bus->reg0); for (; i < num; i += inc) { inc = 1; @@ -649,7 +651,7 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) if (ret < 0) bus->force_bit &= ~GMBUS_FORCE_BIT_RETRY; } else { - ret = do_gmbus_xfer(adapter, msgs, num); + ret = do_gmbus_xfer(adapter, msgs, num, 0); if (ret == -EAGAIN) bus->force_bit |= GMBUS_FORCE_BIT_RETRY; } @@ -659,6 +661,45 @@ gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num) return ret; } +int intel_gmbus_output_aksv(struct i2c_adapter *adapter) +{ + struct intel_gmbus *bus = container_of(adapter, struct intel_gmbus, + adapter); + struct drm_i915_private *dev_priv = bus->dev_priv; + int ret; + u8 cmd = DRM_HDCP_DDC_AKSV; + u8 buf[DRM_HDCP_KSV_LEN] = { 0 }; + struct i2c_msg msgs[] = { + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = sizeof(cmd), + .buf = &cmd, + }, + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = sizeof(buf), + .buf = buf, + } + }; + + intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); + mutex_lock(&dev_priv->gmbus_mutex); + + /* + * In order to output Aksv to the receiver, use an indexed write to + * pass the i2c command, and tell GMBUS to use the HW-provided value + * instead of sourcing GMBUS3 for the data. + */ + ret = do_gmbus_xfer(adapter, msgs, ARRAY_SIZE(msgs), GMBUS_AKSV_SELECT); + + mutex_unlock(&dev_priv->gmbus_mutex); + intel_display_power_put(dev_priv, POWER_DOMAIN_GMBUS); + + return ret; +} + static u32 gmbus_func(struct i2c_adapter *adapter) { return i2c_bit_algo.functionality(adapter) & -- GitLab From 2320175feb74a11d3b287eead09bb5c0953cf27f Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:42 -0500 Subject: [PATCH 0050/1646] drm/i915: Implement HDCP for HDMI This patch adds HDCP support for HDMI connectors by implementing the intel_hdcp_shim. Nothing too special, just a bunch of DDC reads/writes. Changes in v2: - Rebased on drm-intel-next Changes in v3: - Initialize new worker Changes in v4: - Remove SKL_ prefix from most register names (Daniel) - Wrap sanity checks in WARN_ON (Daniel) - Consolidate the enable/disable functions into one toggle fn - Use intel_hdcp_init (Daniel) Changes in v5: - checkpatch whitespace nits Changes in v6: - None Cc: Daniel Vetter Reviewed-by: Ramalingam C Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-9-seanpaul@chromium.org --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 29 ++++ drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_hdmi.c | 250 ++++++++++++++++++++++++++++++ 4 files changed, 282 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3504cf1a2e685..e5f7ef636a8e7 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8460,6 +8460,7 @@ enum skl_power_gate { #define TRANS_DDI_EDP_INPUT_A_ONOFF (4<<12) #define TRANS_DDI_EDP_INPUT_B_ONOFF (5<<12) #define TRANS_DDI_EDP_INPUT_C_ONOFF (6<<12) +#define TRANS_DDI_HDCP_SIGNALLING (1<<9) #define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1<<8) #define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7) #define TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 2766b5e5c0ac0..6260a882fbe44 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1615,6 +1615,35 @@ void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, I915_WRITE(reg, val); } +int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + bool enable) +{ + struct drm_device *dev = intel_encoder->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + enum pipe pipe = 0; + int ret = 0; + uint32_t tmp; + + if (WARN_ON(!intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain))) + return -ENXIO; + + if (WARN_ON(!intel_encoder->get_hw_state(intel_encoder, &pipe))) { + ret = -EIO; + goto out; + } + + tmp = I915_READ(TRANS_DDI_FUNC_CTL(pipe)); + if (enable) + tmp |= TRANS_DDI_HDCP_SIGNALLING; + else + tmp &= ~TRANS_DDI_HDCP_SIGNALLING; + I915_WRITE(TRANS_DDI_FUNC_CTL(pipe), tmp); +out: + intel_display_power_put(dev_priv, intel_encoder->power_domain); + return ret; +} + bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) { struct drm_device *dev = intel_connector->base.dev; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c59277c5b63ed..731dc36d71295 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1377,6 +1377,8 @@ void intel_ddi_compute_min_voltage_level(struct drm_i915_private *dev_priv, u32 bxt_signal_levels(struct intel_dp *intel_dp); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); +int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, + bool enable); unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int plane, unsigned int height); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index bced7b954d93d..22251ad48b3bf 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include "intel_drv.h" #include @@ -876,6 +877,248 @@ void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable) adapter, enable); } +static int intel_hdmi_hdcp_read(struct intel_digital_port *intel_dig_port, + unsigned int offset, void *buffer, size_t size) +{ + struct intel_hdmi *hdmi = &intel_dig_port->hdmi; + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, + hdmi->ddc_bus); + int ret; + u8 start = offset & 0xff; + struct i2c_msg msgs[] = { + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = 0, + .len = 1, + .buf = &start, + }, + { + .addr = DRM_HDCP_DDC_ADDR, + .flags = I2C_M_RD, + .len = size, + .buf = buffer + } + }; + ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs)); + if (ret == ARRAY_SIZE(msgs)) + return 0; + return ret >= 0 ? -EIO : ret; +} + +static int intel_hdmi_hdcp_write(struct intel_digital_port *intel_dig_port, + unsigned int offset, void *buffer, size_t size) +{ + struct intel_hdmi *hdmi = &intel_dig_port->hdmi; + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, + hdmi->ddc_bus); + int ret; + u8 *write_buf; + struct i2c_msg msg; + + write_buf = kzalloc(size + 1, GFP_KERNEL); + if (!write_buf) + return -ENOMEM; + + write_buf[0] = offset & 0xff; + memcpy(&write_buf[1], buffer, size); + + msg.addr = DRM_HDCP_DDC_ADDR; + msg.flags = 0, + msg.len = size + 1, + msg.buf = write_buf; + + ret = i2c_transfer(adapter, &msg, 1); + if (ret == 1) + return 0; + return ret >= 0 ? -EIO : ret; +} + +static +int intel_hdmi_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, + u8 *an) +{ + struct intel_hdmi *hdmi = &intel_dig_port->hdmi; + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + struct i2c_adapter *adapter = intel_gmbus_get_adapter(dev_priv, + hdmi->ddc_bus); + int ret; + + ret = intel_hdmi_hdcp_write(intel_dig_port, DRM_HDCP_DDC_AN, an, + DRM_HDCP_AN_LEN); + if (ret) { + DRM_ERROR("Write An over DDC failed (%d)\n", ret); + return ret; + } + + ret = intel_gmbus_output_aksv(adapter); + if (ret < 0) { + DRM_ERROR("Failed to output aksv (%d)\n", ret); + return ret; + } + return 0; +} + +static int intel_hdmi_hdcp_read_bksv(struct intel_digital_port *intel_dig_port, + u8 *bksv) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BKSV, bksv, + DRM_HDCP_KSV_LEN); + if (ret) + DRM_ERROR("Read Bksv over DDC failed (%d)\n", ret); + return ret; +} + +static +int intel_hdmi_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port, + u8 *bstatus) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BSTATUS, + bstatus, DRM_HDCP_BSTATUS_LEN); + if (ret) + DRM_ERROR("Read bstatus over DDC failed (%d)\n", ret); + return ret; +} + +static +int intel_hdmi_hdcp_repeater_present(struct intel_digital_port *intel_dig_port, + bool *repeater_present) +{ + int ret; + u8 val; + + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BCAPS, &val, 1); + if (ret) { + DRM_ERROR("Read bcaps over DDC failed (%d)\n", ret); + return ret; + } + *repeater_present = val & DRM_HDCP_DDC_BCAPS_REPEATER_PRESENT; + return 0; +} + +static +int intel_hdmi_hdcp_read_ri_prime(struct intel_digital_port *intel_dig_port, + u8 *ri_prime) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_RI_PRIME, + ri_prime, DRM_HDCP_RI_LEN); + if (ret) + DRM_ERROR("Read Ri' over DDC failed (%d)\n", ret); + return ret; +} + +static +int intel_hdmi_hdcp_read_ksv_ready(struct intel_digital_port *intel_dig_port, + bool *ksv_ready) +{ + int ret; + u8 val; + + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_BCAPS, &val, 1); + if (ret) { + DRM_ERROR("Read bcaps over DDC failed (%d)\n", ret); + return ret; + } + *ksv_ready = val & DRM_HDCP_DDC_BCAPS_KSV_FIFO_READY; + return 0; +} + +static +int intel_hdmi_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port, + int num_downstream, u8 *ksv_fifo) +{ + int ret; + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_KSV_FIFO, + ksv_fifo, num_downstream * DRM_HDCP_KSV_LEN); + if (ret) { + DRM_ERROR("Read ksv fifo over DDC failed (%d)\n", ret); + return ret; + } + return 0; +} + +static +int intel_hdmi_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port, + int i, u32 *part) +{ + int ret; + + if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) + return -EINVAL; + + ret = intel_hdmi_hdcp_read(intel_dig_port, DRM_HDCP_DDC_V_PRIME(i), + part, DRM_HDCP_V_PRIME_PART_LEN); + if (ret) + DRM_ERROR("Read V'[%d] over DDC failed (%d)\n", i, ret); + return ret; +} + +static +int intel_hdmi_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port, + bool enable) +{ + int ret; + + if (!enable) + usleep_range(6, 60); /* Bspec says >= 6us */ + + ret = intel_ddi_toggle_hdcp_signalling(&intel_dig_port->base, enable); + if (ret) { + DRM_ERROR("%s HDCP signalling failed (%d)\n", + enable ? "Enable" : "Disable", ret); + return ret; + } + return 0; +} + +static +bool intel_hdmi_hdcp_check_link(struct intel_digital_port *intel_dig_port) +{ + struct drm_i915_private *dev_priv = + intel_dig_port->base.base.dev->dev_private; + enum port port = intel_dig_port->base.port; + int ret; + union { + u32 reg; + u8 shim[DRM_HDCP_RI_LEN]; + } ri; + + ret = intel_hdmi_hdcp_read_ri_prime(intel_dig_port, ri.shim); + if (ret) + return false; + + I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + + /* Wait for Ri prime match */ + if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + DRM_ERROR("Ri' mismatch detected, link check failed (%x)\n", + I915_READ(PORT_HDCP_STATUS(port))); + return false; + } + return true; +} + +static const struct intel_hdcp_shim intel_hdmi_hdcp_shim = { + .write_an_aksv = intel_hdmi_hdcp_write_an_aksv, + .read_bksv = intel_hdmi_hdcp_read_bksv, + .read_bstatus = intel_hdmi_hdcp_read_bstatus, + .repeater_present = intel_hdmi_hdcp_repeater_present, + .read_ri_prime = intel_hdmi_hdcp_read_ri_prime, + .read_ksv_ready = intel_hdmi_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_hdmi_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_hdmi_hdcp_read_v_prime_part, + .toggle_signalling = intel_hdmi_hdcp_toggle_signalling, + .check_link = intel_hdmi_hdcp_check_link, +}; + static void intel_hdmi_prepare(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state) { @@ -2053,6 +2296,13 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_hdmi_add_properties(intel_hdmi, connector); + if (INTEL_GEN(dev_priv) >= 9) { + int ret = intel_hdcp_init(intel_connector, + &intel_hdmi_hdcp_shim); + if (ret) + DRM_DEBUG_KMS("HDCP init failed, skipping.\n"); + } + intel_connector_attach_encoder(intel_connector, intel_encoder); intel_hdmi->attached_connector = intel_connector; -- GitLab From 20f24d776d1be6ecf353f21a158e7b716143e523 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Mon, 8 Jan 2018 14:55:43 -0500 Subject: [PATCH 0051/1646] drm/i915: Implement HDCP for DisplayPort This patch adds HDCP support for DisplayPort connectors by implementing the intel_hdcp_shim. Most of this is straightforward read/write from/to DPCD registers. One thing worth pointing out is the Aksv output bit. It wasn't easily separable like it's HDMI counterpart, so it's crammed in with the rest of it. Changes in v2: - Moved intel_hdcp_check_link out of intel_dp_check_link and only call it on short pulse. Since intel_hdcp_check_link does its own locking, this ensures we don't deadlock when intel_dp_check_link is called holding connection_mutex. - Rebased on drm-intel-next Changes in v3: - Initialize new worker Changes in v4: - Use intel_hdcp_init (Daniel) - Check for reauth requests in check_link (Ram) Changes in v5: - None Changes in v6: - Fix build warnings when printing ssize_t Cc: Daniel Vetter Reviewed-by: Ramalingam C Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180108195545.218615-10-seanpaul@chromium.org --- drivers/gpu/drm/i915/intel_dp.c | 244 +++++++++++++++++++++++++++++++- 1 file changed, 237 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 35c5299feab68..68229f53d5b8a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -36,7 +36,9 @@ #include #include #include +#include #include +#include #include "intel_drv.h" #include #include "i915_drv.h" @@ -1025,10 +1027,29 @@ static uint32_t skl_get_aux_send_ctl(struct intel_dp *intel_dp, DP_AUX_CH_CTL_SYNC_PULSE_SKL(32); } +static uint32_t intel_dp_get_aux_send_ctl(struct intel_dp *intel_dp, + bool has_aux_irq, + int send_bytes, + uint32_t aux_clock_divider, + bool aksv_write) +{ + uint32_t val = 0; + + if (aksv_write) { + send_bytes += 5; + val |= DP_AUX_CH_CTL_AUX_AKSV_SELECT; + } + + return val | intel_dp->get_aux_send_ctl(intel_dp, + has_aux_irq, + send_bytes, + aux_clock_divider); +} + static int intel_dp_aux_ch(struct intel_dp *intel_dp, const uint8_t *send, int send_bytes, - uint8_t *recv, int recv_size) + uint8_t *recv, int recv_size, bool aksv_write) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = @@ -1088,10 +1109,11 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, } while ((aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, clock++))) { - u32 send_ctl = intel_dp->get_aux_send_ctl(intel_dp, - has_aux_irq, - send_bytes, - aux_clock_divider); + u32 send_ctl = intel_dp_get_aux_send_ctl(intel_dp, + has_aux_irq, + send_bytes, + aux_clock_divider, + aksv_write); /* Must try at least 3 times according to DP spec */ for (try = 0; try < 5; try++) { @@ -1228,7 +1250,8 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (msg->buffer) memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size); - ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); + ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize, + false); if (ret > 0) { msg->reply = rxbuf[0] >> 4; @@ -1250,7 +1273,8 @@ intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg) if (WARN_ON(rxsize > 20)) return -E2BIG; - ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize); + ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize, + false); if (ret > 0) { msg->reply = rxbuf[0] >> 4; /* @@ -4985,6 +5009,203 @@ void intel_dp_encoder_suspend(struct intel_encoder *intel_encoder) pps_unlock(intel_dp); } +static +int intel_dp_hdcp_write_an_aksv(struct intel_digital_port *intel_dig_port, + u8 *an) +{ + struct intel_dp *intel_dp = enc_to_intel_dp(&intel_dig_port->base.base); + uint8_t txbuf[4], rxbuf[2], reply = 0; + ssize_t dpcd_ret; + int ret; + + /* Output An first, that's easy */ + dpcd_ret = drm_dp_dpcd_write(&intel_dig_port->dp.aux, DP_AUX_HDCP_AN, + an, DRM_HDCP_AN_LEN); + if (dpcd_ret != DRM_HDCP_AN_LEN) { + DRM_ERROR("Failed to write An over DP/AUX (%zd)\n", dpcd_ret); + return dpcd_ret >= 0 ? -EIO : dpcd_ret; + } + + /* + * Since Aksv is Oh-So-Secret, we can't access it in software. So in + * order to get it on the wire, we need to create the AUX header as if + * we were writing the data, and then tickle the hardware to output the + * data once the header is sent out. + */ + txbuf[0] = (DP_AUX_NATIVE_WRITE << 4) | + ((DP_AUX_HDCP_AKSV >> 16) & 0xf); + txbuf[1] = (DP_AUX_HDCP_AKSV >> 8) & 0xff; + txbuf[2] = DP_AUX_HDCP_AKSV & 0xff; + txbuf[3] = DRM_HDCP_KSV_LEN - 1; + + ret = intel_dp_aux_ch(intel_dp, txbuf, sizeof(txbuf), rxbuf, + sizeof(rxbuf), true); + if (ret < 0) { + DRM_ERROR("Write Aksv over DP/AUX failed (%d)\n", ret); + return ret; + } else if (ret == 0) { + DRM_ERROR("Aksv write over DP/AUX was empty\n"); + return -EIO; + } + + reply = (rxbuf[0] >> 4) & DP_AUX_NATIVE_REPLY_MASK; + return reply == DP_AUX_NATIVE_REPLY_ACK ? 0 : -EIO; +} + +static int intel_dp_hdcp_read_bksv(struct intel_digital_port *intel_dig_port, + u8 *bksv) +{ + ssize_t ret; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BKSV, bksv, + DRM_HDCP_KSV_LEN); + if (ret != DRM_HDCP_KSV_LEN) { + DRM_ERROR("Read Bksv from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static int intel_dp_hdcp_read_bstatus(struct intel_digital_port *intel_dig_port, + u8 *bstatus) +{ + ssize_t ret; + /* + * For some reason the HDMI and DP HDCP specs call this register + * definition by different names. In the HDMI spec, it's called BSTATUS, + * but in DP it's called BINFO. + */ + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BINFO, + bstatus, DRM_HDCP_BSTATUS_LEN); + if (ret != DRM_HDCP_BSTATUS_LEN) { + DRM_ERROR("Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_repeater_present(struct intel_digital_port *intel_dig_port, + bool *repeater_present) +{ + ssize_t ret; + u8 bcaps; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BCAPS, + &bcaps, 1); + if (ret != 1) { + DRM_ERROR("Read bcaps from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + *repeater_present = bcaps & DP_BCAPS_REPEATER_PRESENT; + return 0; +} + +static +int intel_dp_hdcp_read_ri_prime(struct intel_digital_port *intel_dig_port, + u8 *ri_prime) +{ + ssize_t ret; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_RI_PRIME, + ri_prime, DRM_HDCP_RI_LEN); + if (ret != DRM_HDCP_RI_LEN) { + DRM_ERROR("Read Ri' from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_read_ksv_ready(struct intel_digital_port *intel_dig_port, + bool *ksv_ready) +{ + ssize_t ret; + u8 bstatus; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + DRM_ERROR("Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + *ksv_ready = bstatus & DP_BSTATUS_READY; + return 0; +} + +static +int intel_dp_hdcp_read_ksv_fifo(struct intel_digital_port *intel_dig_port, + int num_downstream, u8 *ksv_fifo) +{ + ssize_t ret; + int i; + + /* KSV list is read via 15 byte window (3 entries @ 5 bytes each) */ + for (i = 0; i < num_downstream; i += 3) { + size_t len = min(num_downstream - i, 3) * DRM_HDCP_KSV_LEN; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, + DP_AUX_HDCP_KSV_FIFO, + ksv_fifo + i * DRM_HDCP_KSV_LEN, + len); + if (ret != len) { + DRM_ERROR("Read ksv[%d] from DP/AUX failed (%zd)\n", i, + ret); + return ret >= 0 ? -EIO : ret; + } + } + return 0; +} + +static +int intel_dp_hdcp_read_v_prime_part(struct intel_digital_port *intel_dig_port, + int i, u32 *part) +{ + ssize_t ret; + + if (i >= DRM_HDCP_V_PRIME_NUM_PARTS) + return -EINVAL; + + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, + DP_AUX_HDCP_V_PRIME(i), part, + DRM_HDCP_V_PRIME_PART_LEN); + if (ret != DRM_HDCP_V_PRIME_PART_LEN) { + DRM_ERROR("Read v'[%d] from DP/AUX failed (%zd)\n", i, ret); + return ret >= 0 ? -EIO : ret; + } + return 0; +} + +static +int intel_dp_hdcp_toggle_signalling(struct intel_digital_port *intel_dig_port, + bool enable) +{ + /* Not used for single stream DisplayPort setups */ + return 0; +} + +static +bool intel_dp_hdcp_check_link(struct intel_digital_port *intel_dig_port) +{ + ssize_t ret; + u8 bstatus; + ret = drm_dp_dpcd_read(&intel_dig_port->dp.aux, DP_AUX_HDCP_BSTATUS, + &bstatus, 1); + if (ret != 1) { + DRM_ERROR("Read bstatus from DP/AUX failed (%zd)\n", ret); + return ret >= 0 ? -EIO : ret; + } + return !(bstatus & (DP_BSTATUS_LINK_FAILURE | DP_BSTATUS_REAUTH_REQ)); +} + +static const struct intel_hdcp_shim intel_dp_hdcp_shim = { + .write_an_aksv = intel_dp_hdcp_write_an_aksv, + .read_bksv = intel_dp_hdcp_read_bksv, + .read_bstatus = intel_dp_hdcp_read_bstatus, + .repeater_present = intel_dp_hdcp_repeater_present, + .read_ri_prime = intel_dp_hdcp_read_ri_prime, + .read_ksv_ready = intel_dp_hdcp_read_ksv_ready, + .read_ksv_fifo = intel_dp_hdcp_read_ksv_fifo, + .read_v_prime_part = intel_dp_hdcp_read_v_prime_part, + .toggle_signalling = intel_dp_hdcp_toggle_signalling, + .check_link = intel_dp_hdcp_check_link, +}; + static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); @@ -5150,6 +5371,9 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) drm_modeset_acquire_fini(&ctx); WARN(iret, "Acquiring modeset locks failed with %i\n", iret); + /* Short pulse can signify loss of hdcp authentication */ + intel_hdcp_check_link(intel_dp->attached_connector); + if (!handled) { intel_dp->detect_done = false; goto put_power; @@ -6128,6 +6352,12 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dp_add_properties(intel_dp, connector); + if (INTEL_GEN(dev_priv) >= 9 && !intel_dp_is_edp(intel_dp)) { + int ret = intel_hdcp_init(intel_connector, &intel_dp_hdcp_shim); + if (ret) + DRM_DEBUG_KMS("HDCP init failed, skipping.\n"); + } + /* For G4X desktop chip, PEG_BAND_GAP_DATA 3:0 must first be written * 0xd. Failure to do so will result in spurious interrupts being * generated on the port when a cable is not attached. -- GitLab From cfb926e148e99acc02351d72e8b85e32b5f786ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Sun, 31 Dec 2017 23:34:54 +0100 Subject: [PATCH 0052/1646] drm/i915: Try EDID bitbanging on HDMI after failed read MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACK/NACK implementation as found in e.g. the G965 has the falling clock edge and the release of the data line after the ACK for the received byte happen at the same time. This is conformant with the I2C specification, which allows a zero hold time, see footnote [3]: "A device must internally provide a hold time of at least 300 ns for the SDA signal (with respect to the V IH(min) of the SCL signal) to bridge the undefined region of the falling edge of SCL." Some HDMI-to-VGA converters apparently fail to adhere to this requirement and latch SDA at the falling clock edge, so instead of an ACK sometimes a NACK is read and the slave (i.e. the EDID ROM) ends the transfer. The bitbanging releases the data line for the ACK only 1/4 bit time after the falling clock edge, so a slave will see the correct value no matter if it samples at the rising or the falling clock edge or in the center. Fallback to bitbanging is already done for the CRT connector. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92685 Signed-off-by: Stefan Brüns Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/a39f080b-81a5-4c93-b3f7-7cb0a58daca3@rwthex-w2-a.rwth-ad.de --- drivers/gpu/drm/i915/intel_hdmi.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index bced7b954d93d..179d0ad3889d1 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1595,12 +1595,20 @@ intel_hdmi_set_edid(struct drm_connector *connector) struct intel_hdmi *intel_hdmi = intel_attached_hdmi(connector); struct edid *edid; bool connected = false; + struct i2c_adapter *i2c; intel_display_power_get(dev_priv, POWER_DOMAIN_GMBUS); - edid = drm_get_edid(connector, - intel_gmbus_get_adapter(dev_priv, - intel_hdmi->ddc_bus)); + i2c = intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); + + edid = drm_get_edid(connector, i2c); + + if (!edid && !intel_gmbus_is_forced_bit(i2c)) { + DRM_DEBUG_KMS("HDMI GMBUS EDID read failed, retry using GPIO bit-banging\n"); + intel_gmbus_force_bit(i2c, true); + edid = drm_get_edid(connector, i2c); + intel_gmbus_force_bit(i2c, false); + } intel_hdmi_dp_dual_mode_detect(connector, edid != NULL); -- GitLab From c4712f27be5159819a735d7d294103040cf60477 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Tue, 12 Dec 2017 12:20:37 +0100 Subject: [PATCH 0053/1646] drm/bridge: analogix: Remove unreachable code from analogic_dp_core.c This patch removes an unreachable code found by the SVACE static analysis: UNREACHABLE_CODE: This statement in the source code might be unreachable during program execution. [unreachable] unreachable at drivers/gpu/drm/bridge/analogix/analogix_dp_core.c:787 retval != 0 is always false because at this program point the variable retval is always equal to 0 at drivers/gpu/drm/bridge/analogix/analogix_dp_core.c:786 Signed-off-by: Sylwester Nawrocki Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20171212112037.13107-1-s.nawrocki@samsung.com --- drivers/gpu/drm/bridge/analogix/analogix_dp_core.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 5dd3f1cd074a1..263e8215b3de1 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -727,7 +727,6 @@ static int analogix_dp_set_link_train(struct analogix_dp_device *dp, static int analogix_dp_config_video(struct analogix_dp_device *dp) { - int retval = 0; int timeout_loop = 0; int done_count = 0; @@ -783,10 +782,7 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) usleep_range(1000, 1001); } - if (retval != 0) - dev_err(dp->dev, "Video stream is not detected!\n"); - - return retval; + return 0; } static void analogix_dp_enable_scramble(struct analogix_dp_device *dp, -- GitLab From b706a25eaed083a54afd113db86ee9747cc2f28b Mon Sep 17 00:00:00 2001 From: Philippe CORNU Date: Thu, 26 Oct 2017 13:17:46 +0200 Subject: [PATCH 0054/1646] drm/stm: ltdc: add clut mode support Add the 8-bit clut mode support at crtc level. Useful for low memory footprint user interfaces but also for 8-bit old games (including color shifting visual effects). Tested with fbdev FBIOPUTCMAP & drm DRM_IOCTL_MODE_SETGAMMA ioctls. Signed-off-by: Philippe Cornu Signed-off-by: Benjamin Gaignard Link: https://patchwork.freedesktop.org/patch/msgid/1509016666-18927-1-git-send-email-philippe.cornu@st.com --- drivers/gpu/drm/stm/ltdc.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 6dc5d4ec4e17c..b48589343ae18 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -175,6 +175,8 @@ #define LXCFBLNR_CFBLN GENMASK(10, 0) /* Color Frame Buffer Line Number */ +#define CLUT_SIZE 256 + #define CONSTA_MAX 0xFF /* CONSTant Alpha MAX= 1.0 */ #define BF1_PAXCA 0x600 /* Pixel Alpha x Constant Alpha */ #define BF1_CA 0x400 /* Constant Alpha */ @@ -363,6 +365,28 @@ static irqreturn_t ltdc_irq(int irq, void *arg) * DRM_CRTC */ +static void ltdc_crtc_update_clut(struct drm_crtc *crtc) +{ + struct ltdc_device *ldev = crtc_to_ltdc(crtc); + struct drm_color_lut *lut; + u32 val; + int i; + + if (!crtc || !crtc->state) + return; + + if (!crtc->state->color_mgmt_changed || !crtc->state->gamma_lut) + return; + + lut = (struct drm_color_lut *)crtc->state->gamma_lut->data; + + for (i = 0; i < CLUT_SIZE; i++, lut++) { + val = ((lut->red << 8) & 0xff0000) | (lut->green & 0xff00) | + (lut->blue >> 8) | (i << 24); + reg_write(ldev->regs, LTDC_L1CLUTWR, val); + } +} + static void ltdc_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { @@ -486,6 +510,8 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc, DRM_DEBUG_ATOMIC("\n"); + ltdc_crtc_update_clut(crtc); + /* Commit shadow registers = update planes at next vblank */ reg_set(ldev->regs, LTDC_SRCR, SRCR_VBR); @@ -533,6 +559,7 @@ static const struct drm_crtc_funcs ltdc_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .gamma_set = drm_atomic_helper_legacy_gamma_set, }; /* @@ -765,6 +792,9 @@ static int ltdc_crtc_init(struct drm_device *ddev, struct drm_crtc *crtc) drm_crtc_helper_add(crtc, <dc_crtc_helper_funcs); + drm_mode_crtc_set_gamma_size(crtc, CLUT_SIZE); + drm_crtc_enable_color_mgmt(crtc, 0, false, CLUT_SIZE); + DRM_DEBUG_DRIVER("CRTC:%d created\n", crtc->base.id); /* Add planes. Note : the first layer is used by primary plane */ -- GitLab From 8242ecbd597dac1375f6b80e0c0574189afc9a17 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 27 Nov 2017 17:05:38 -0800 Subject: [PATCH 0055/1646] drm/bridge/synopsys: stop clobbering drvdata Bridge drivers/helpers shouldn't be clobbering the drvdata, since a parent driver might need to own this. Instead, let's return our 'dw_mipi_dsi' object and have callers pass that back to us for removal. Signed-off-by: Brian Norris Reviewed-by: Matthias Kaehlcke Reviewed-by: Archit Taneja Acked-by: Philippe Cornu Signed-off-by: Andrzej Hajda Link: https://patchwork.freedesktop.org/patch/msgid/20171128010538.119114-1-briannorris@chromium.org --- drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c | 36 +++++++------------ drivers/gpu/drm/stm/dw_mipi_dsi-stm.c | 14 +++++--- include/drm/bridge/dw_mipi_dsi.h | 17 +++++---- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index d9cca4fd66ecd..c39c7dce20ed2 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -922,8 +922,6 @@ __dw_mipi_dsi_probe(struct platform_device *pdev, dsi->bridge.of_node = pdev->dev.of_node; #endif - dev_set_drvdata(dev, dsi); - return dsi; } @@ -935,23 +933,16 @@ static void __dw_mipi_dsi_remove(struct dw_mipi_dsi *dsi) /* * Probe/remove API, used from platforms based on the DRM bridge API. */ -int dw_mipi_dsi_probe(struct platform_device *pdev, - const struct dw_mipi_dsi_plat_data *plat_data) +struct dw_mipi_dsi * +dw_mipi_dsi_probe(struct platform_device *pdev, + const struct dw_mipi_dsi_plat_data *plat_data) { - struct dw_mipi_dsi *dsi; - - dsi = __dw_mipi_dsi_probe(pdev, plat_data); - if (IS_ERR(dsi)) - return PTR_ERR(dsi); - - return 0; + return __dw_mipi_dsi_probe(pdev, plat_data); } EXPORT_SYMBOL_GPL(dw_mipi_dsi_probe); -void dw_mipi_dsi_remove(struct platform_device *pdev) +void dw_mipi_dsi_remove(struct dw_mipi_dsi *dsi) { - struct dw_mipi_dsi *dsi = platform_get_drvdata(pdev); - mipi_dsi_host_unregister(&dsi->dsi_host); __dw_mipi_dsi_remove(dsi); @@ -961,31 +952,30 @@ EXPORT_SYMBOL_GPL(dw_mipi_dsi_remove); /* * Bind/unbind API, used from platforms based on the component framework. */ -int dw_mipi_dsi_bind(struct platform_device *pdev, struct drm_encoder *encoder, - const struct dw_mipi_dsi_plat_data *plat_data) +struct dw_mipi_dsi * +dw_mipi_dsi_bind(struct platform_device *pdev, struct drm_encoder *encoder, + const struct dw_mipi_dsi_plat_data *plat_data) { struct dw_mipi_dsi *dsi; int ret; dsi = __dw_mipi_dsi_probe(pdev, plat_data); if (IS_ERR(dsi)) - return PTR_ERR(dsi); + return dsi; ret = drm_bridge_attach(encoder, &dsi->bridge, NULL); if (ret) { - dw_mipi_dsi_remove(pdev); + dw_mipi_dsi_remove(dsi); DRM_ERROR("Failed to initialize bridge with drm\n"); - return ret; + return ERR_PTR(ret); } - return 0; + return dsi; } EXPORT_SYMBOL_GPL(dw_mipi_dsi_bind); -void dw_mipi_dsi_unbind(struct device *dev) +void dw_mipi_dsi_unbind(struct dw_mipi_dsi *dsi) { - struct dw_mipi_dsi *dsi = dev_get_drvdata(dev); - __dw_mipi_dsi_remove(dsi); } EXPORT_SYMBOL_GPL(dw_mipi_dsi_unbind); diff --git a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c index fd02506274dab..0ba8635d738a1 100644 --- a/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c +++ b/drivers/gpu/drm/stm/dw_mipi_dsi-stm.c @@ -65,6 +65,7 @@ enum dsi_color { struct dw_mipi_dsi_stm { void __iomem *base; struct clk *pllref_clk; + struct dw_mipi_dsi *dsi; }; static inline void dsi_write(struct dw_mipi_dsi_stm *dsi, u32 reg, u32 val) @@ -312,21 +313,24 @@ static int dw_mipi_dsi_stm_probe(struct platform_device *pdev) dw_mipi_dsi_stm_plat_data.base = dsi->base; dw_mipi_dsi_stm_plat_data.priv_data = dsi; - ret = dw_mipi_dsi_probe(pdev, &dw_mipi_dsi_stm_plat_data); - if (ret) { + platform_set_drvdata(pdev, dsi); + + dsi->dsi = dw_mipi_dsi_probe(pdev, &dw_mipi_dsi_stm_plat_data); + if (IS_ERR(dsi->dsi)) { DRM_ERROR("Failed to initialize mipi dsi host\n"); clk_disable_unprepare(dsi->pllref_clk); + return PTR_ERR(dsi->dsi); } - return ret; + return 0; } static int dw_mipi_dsi_stm_remove(struct platform_device *pdev) { - struct dw_mipi_dsi_stm *dsi = dw_mipi_dsi_stm_plat_data.priv_data; + struct dw_mipi_dsi_stm *dsi = platform_get_drvdata(pdev); clk_disable_unprepare(dsi->pllref_clk); - dw_mipi_dsi_remove(pdev); + dw_mipi_dsi_remove(dsi->dsi); return 0; } diff --git a/include/drm/bridge/dw_mipi_dsi.h b/include/drm/bridge/dw_mipi_dsi.h index 9b30fec302c8c..d9c6d549f9711 100644 --- a/include/drm/bridge/dw_mipi_dsi.h +++ b/include/drm/bridge/dw_mipi_dsi.h @@ -10,6 +10,8 @@ #ifndef __DW_MIPI_DSI__ #define __DW_MIPI_DSI__ +struct dw_mipi_dsi; + struct dw_mipi_dsi_phy_ops { int (*init)(void *priv_data); int (*get_lane_mbps)(void *priv_data, struct drm_display_mode *mode, @@ -29,11 +31,14 @@ struct dw_mipi_dsi_plat_data { void *priv_data; }; -int dw_mipi_dsi_probe(struct platform_device *pdev, - const struct dw_mipi_dsi_plat_data *plat_data); -void dw_mipi_dsi_remove(struct platform_device *pdev); -int dw_mipi_dsi_bind(struct platform_device *pdev, struct drm_encoder *encoder, - const struct dw_mipi_dsi_plat_data *plat_data); -void dw_mipi_dsi_unbind(struct device *dev); +struct dw_mipi_dsi *dw_mipi_dsi_probe(struct platform_device *pdev, + const struct dw_mipi_dsi_plat_data + *plat_data); +void dw_mipi_dsi_remove(struct dw_mipi_dsi *dsi); +struct dw_mipi_dsi *dw_mipi_dsi_bind(struct platform_device *pdev, + struct drm_encoder *encoder, + const struct dw_mipi_dsi_plat_data + *plat_data); +void dw_mipi_dsi_unbind(struct dw_mipi_dsi *dsi); #endif /* __DW_MIPI_DSI__ */ -- GitLab From 17bd6e66d8f677b1b250d098097fbed6e70175ef Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 9 Jan 2018 14:20:40 +0200 Subject: [PATCH 0056/1646] drm/i915: Fix using BIT_ULL() vs. BIT() for power domain masks The power domain masks are 64 bit wide, so we need BIT_ULL() when setting bits in them, these ones were missed during converting from 32 to 64 bit masks. All 3 enums are <32 atm, so this didn't cause a real problem. Fixes: d8fc70b7367b ("drm/i915: Make power domain masks 64 bit long") Cc: Joonas Lahtinen Reported-by: Fengguang Wu Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20180109122040.19425-1-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0cd355978ab42..f288bcc7be22d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5661,8 +5661,8 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, if (!crtc_state->base.active) return 0; - mask = BIT(POWER_DOMAIN_PIPE(pipe)); - mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); + mask = BIT_ULL(POWER_DOMAIN_PIPE(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); @@ -5674,7 +5674,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, } if (HAS_DDI(dev_priv) && crtc_state->has_audio) - mask |= BIT(POWER_DOMAIN_AUDIO); + mask |= BIT_ULL(POWER_DOMAIN_AUDIO); if (crtc_state->shared_dpll) mask |= BIT_ULL(POWER_DOMAIN_PLLS); -- GitLab From a10195bbe7f4e6ba540083ba13126ef745116cae Mon Sep 17 00:00:00 2001 From: "Leo (Sunpeng) Li" Date: Thu, 4 Jan 2018 14:47:33 -0500 Subject: [PATCH 0057/1646] drm/atomic: Fix memleak on ERESTARTSYS during non-blocking commits During a non-blocking commit, it is possible to return before the commit_tail work is queued (-ERESTARTSYS, for example). Since a reference on the crtc commit object is obtained for the pending vblank event when preparing the commit, the above situation will leave us with an extra reference. Therefore, if the commit_tail worker has not consumed the event at the end of a commit, release it's reference. Signed-off-by: Leo (Sunpeng) Li Acked-by: Harry Wentland Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/1515095253-29817-1-git-send-email-sunpeng.li@amd.com --- drivers/gpu/drm/drm_atomic_helper.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index ab4032167094c..4253f57227e37 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3421,6 +3421,15 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state); void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state) { if (state->commit) { + /* + * In the event that a non-blocking commit returns + * -ERESTARTSYS before the commit_tail work is queued, we will + * have an extra reference to the commit object. Release it, if + * the event has not been consumed by the worker. + */ + if (state->event) + drm_crtc_commit_put(state->commit); + kfree(state->commit->event); state->commit->event = NULL; drm_crtc_commit_put(state->commit); -- GitLab From c308279f87988a1582a52f25fa7903974f0f4a12 Mon Sep 17 00:00:00 2001 From: Samuel Li Date: Thu, 4 Jan 2018 16:12:14 -0500 Subject: [PATCH 0058/1646] drm: export gem dmabuf_ops for drivers to reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Samuel Li Reviewed-by: Christian König Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/1515100334-6845-1-git-send-email-Samuel.Li@amd.com --- drivers/gpu/drm/drm_prime.c | 53 ++++++++++++++++++++++--------------- include/drm/drm_prime.h | 22 +++++++++++++++ 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 9a17725b0f7a1..ca09ce74e302b 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -180,9 +180,8 @@ static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpri return -ENOENT; } -static int drm_gem_map_attach(struct dma_buf *dma_buf, - struct device *target_dev, - struct dma_buf_attachment *attach) +int drm_gem_map_attach(struct dma_buf *dma_buf, struct device *target_dev, + struct dma_buf_attachment *attach) { struct drm_prime_attachment *prime_attach; struct drm_gem_object *obj = dma_buf->priv; @@ -200,9 +199,10 @@ static int drm_gem_map_attach(struct dma_buf *dma_buf, return dev->driver->gem_prime_pin(obj); } +EXPORT_SYMBOL(drm_gem_map_attach); -static void drm_gem_map_detach(struct dma_buf *dma_buf, - struct dma_buf_attachment *attach) +void drm_gem_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach) { struct drm_prime_attachment *prime_attach = attach->priv; struct drm_gem_object *obj = dma_buf->priv; @@ -228,6 +228,7 @@ static void drm_gem_map_detach(struct dma_buf *dma_buf, kfree(prime_attach); attach->priv = NULL; } +EXPORT_SYMBOL(drm_gem_map_detach); void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf) @@ -254,8 +255,8 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr } } -static struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, - enum dma_data_direction dir) +struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, + enum dma_data_direction dir) { struct drm_prime_attachment *prime_attach = attach->priv; struct drm_gem_object *obj = attach->dmabuf->priv; @@ -291,13 +292,15 @@ static struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, return sgt; } +EXPORT_SYMBOL(drm_gem_map_dma_buf); -static void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *sgt, - enum dma_data_direction dir) +void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir) { /* nothing to be done here */ } +EXPORT_SYMBOL(drm_gem_unmap_dma_buf); /** * drm_gem_dmabuf_export - dma_buf export implementation for GEM @@ -348,47 +351,52 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf) } EXPORT_SYMBOL(drm_gem_dmabuf_release); -static void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf) +void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; return dev->driver->gem_prime_vmap(obj); } +EXPORT_SYMBOL(drm_gem_dmabuf_vmap); -static void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) { struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; dev->driver->gem_prime_vunmap(obj, vaddr); } +EXPORT_SYMBOL(drm_gem_dmabuf_vunmap); -static void *drm_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, - unsigned long page_num) +void *drm_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num) { return NULL; } +EXPORT_SYMBOL(drm_gem_dmabuf_kmap_atomic); -static void drm_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, - unsigned long page_num, void *addr) +void drm_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num, void *addr) { } -static void *drm_gem_dmabuf_kmap(struct dma_buf *dma_buf, - unsigned long page_num) +EXPORT_SYMBOL(drm_gem_dmabuf_kunmap_atomic); + +void *drm_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) { return NULL; } +EXPORT_SYMBOL(drm_gem_dmabuf_kmap); -static void drm_gem_dmabuf_kunmap(struct dma_buf *dma_buf, - unsigned long page_num, void *addr) +void drm_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, + void *addr) { } +EXPORT_SYMBOL(drm_gem_dmabuf_kunmap); -static int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, - struct vm_area_struct *vma) +int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; @@ -398,6 +406,7 @@ static int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, return dev->driver->gem_prime_mmap(obj, vma); } +EXPORT_SYMBOL(drm_gem_dmabuf_mmap); static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = { .attach = drm_gem_map_attach, diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index 9cd9e36f77b53..4d5f5d6cf6a68 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -54,6 +54,9 @@ struct device; struct dma_buf_export_info; struct dma_buf; +struct dma_buf_attachment; + +enum dma_data_direction; struct drm_device; struct drm_gem_object; @@ -79,6 +82,25 @@ int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev, struct dma_buf_export_info *exp_info); void drm_gem_dmabuf_release(struct dma_buf *dma_buf); +int drm_gem_map_attach(struct dma_buf *dma_buf, struct device *target_dev, + struct dma_buf_attachment *attach); +void drm_gem_map_detach(struct dma_buf *dma_buf, + struct dma_buf_attachment *attach); +struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, + enum dma_data_direction dir); +void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, + struct sg_table *sgt, + enum dma_data_direction dir); +void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf); +void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr); +void *drm_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num); +void drm_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, + unsigned long page_num, void *addr); +void *drm_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num); +void drm_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, + void *addr); +int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma); int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, struct page **pages, dma_addr_t *addrs, int max_pages); -- GitLab From 2f4498a4ac2f77671587faa99e38c3342d666114 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 9 Jan 2018 13:53:51 -0500 Subject: [PATCH 0059/1646] drm/i915: Don't allow HDCP on PORT E/F Port E doesn't have HDCP support, and Port F is disabled. Don't setup the hdcp shim on those. Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180109185401.16911-1-seanpaul@chromium.org --- drivers/gpu/drm/i915/intel_hdmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 22251ad48b3bf..09b52d218fd40 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2296,7 +2296,8 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, intel_hdmi_add_properties(intel_hdmi, connector); - if (INTEL_GEN(dev_priv) >= 9) { + /* PORT E doesn't have HDCP, and PORT F is disabled */ + if (INTEL_GEN(dev_priv) >= 9 && port < PORT_E) { int ret = intel_hdcp_init(intel_connector, &intel_hdmi_hdcp_shim); if (ret) -- GitLab From 01468d6c711a31a9fe333d072afa8a4730ef3bf9 Mon Sep 17 00:00:00 2001 From: Sean Paul Date: Tue, 9 Jan 2018 13:53:13 -0500 Subject: [PATCH 0060/1646] drm/i915: Only disable HDCP when it's active Instead of always trying to disable HDCP. Only run hdcp_disable when the state is not UNDESIRED. This will catch cases where it's enabled and also cases where enable failed and the state is left in DESIRED mode. Note that things won't blow up if disable is attempted while already disabled, it's just bad form. Reviewed-by: Daniel Vetter Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20180109185330.16853-1-seanpaul@chromium.org --- drivers/gpu/drm/i915/intel_hdcp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 3c164a27d50bf..827cab22f1915 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -616,15 +616,17 @@ int intel_hdcp_enable(struct intel_connector *connector) int intel_hdcp_disable(struct intel_connector *connector) { - int ret; + int ret = 0; if (!connector->hdcp_shim) return -ENOENT; mutex_lock(&connector->hdcp_mutex); - connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; - ret = _intel_hdcp_disable(connector); + if (connector->hdcp_value != DRM_MODE_CONTENT_PROTECTION_UNDESIRED) { + connector->hdcp_value = DRM_MODE_CONTENT_PROTECTION_UNDESIRED; + ret = _intel_hdcp_disable(connector); + } mutex_unlock(&connector->hdcp_mutex); cancel_delayed_work_sync(&connector->hdcp_check_work); -- GitLab From 60ccc38f53ad50128bf33616f4e1745947eb726c Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 10 Jan 2018 10:32:18 +0100 Subject: [PATCH 0061/1646] Revert "drm/atomic: Fix memleak on ERESTARTSYS during non-blocking commits" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit a10195bbe7f4e6ba540083ba13126ef745116cae. This commit needs some more thought, and is currently crashing kms_flip tests. Until we figure out what's going wrong it's better to revert, and also next time apply it to drm-misc-fixes. Testcase: kms_flip Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104566 References: https://patchwork.freedesktop.org/series/36185/ References: https://patchwork.freedesktop.org/series/36250/ Reported-by: Marta Löfstedt Acked-by: Daniel Vetter Signed-off-by: Maarten Lankhorst --- drivers/gpu/drm/drm_atomic_helper.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index 4253f57227e37..ab4032167094c 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -3421,15 +3421,6 @@ EXPORT_SYMBOL(drm_atomic_helper_crtc_duplicate_state); void __drm_atomic_helper_crtc_destroy_state(struct drm_crtc_state *state) { if (state->commit) { - /* - * In the event that a non-blocking commit returns - * -ERESTARTSYS before the commit_tail work is queued, we will - * have an extra reference to the commit object. Release it, if - * the event has not been consumed by the worker. - */ - if (state->event) - drm_crtc_commit_put(state->commit); - kfree(state->commit->event); state->commit->event = NULL; drm_crtc_commit_put(state->commit); -- GitLab From da943b5ab071584fcb9cfa896dc8c643d376f362 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 10 Jan 2018 18:24:16 +0530 Subject: [PATCH 0062/1646] drm/i915/guc: Add uc_fini_wq in gem_init unwind path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While moving code around for solving lockdep issue for GuC log relay, spotted that uc_fini_wq is not being called in failure path in gem_init. Missed in the below commit. Add it. v2: Removed GEM_BUG_ON(!HAS_GUC()) from intel_uc_fini_wq as init happens only based on enable_guc module parameter and does not consider has_guc capability. (Michal) Signed-off-by: Sagar Arun Kamble Fixes: 3176ff49bc3e ("drm/i915/guc: Move GuC workqueue allocations outside of the mutex") Cc: Michał Winiarski Cc: Chris Wilson Reviewed-by: Michał Winiarski Link: https://patchwork.freedesktop.org/patch/msgid/1515588857-10283-1-git-send-email-sagar.a.kamble@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/intel_uc.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8bc3283484be2..1135a77b383ae 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5283,6 +5283,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); mutex_unlock(&dev_priv->drm.struct_mutex); + intel_uc_fini_wq(dev_priv); + if (ret != -EIO) i915_gem_cleanup_userptr(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index 907deac6e3fa8..d82ca0f438f52 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -209,8 +209,6 @@ void intel_uc_fini_wq(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return; - GEM_BUG_ON(!HAS_GUC(dev_priv)); - intel_guc_fini_wq(&dev_priv->guc); } -- GitLab From 5a3f58dfd1420347be838546e00a4a9e847bda30 Mon Sep 17 00:00:00 2001 From: Oscar Mateo Date: Fri, 22 Dec 2017 14:38:49 -0800 Subject: [PATCH 0063/1646] drm/i915: Stop getting the fault address from RING_FAULT_REG This register does not contain it. Instead, we have to look into FAULT_TLB_DATA0 & 1 (where, by the way, we can also get the address space). v2: Right formatting v3: - Use 12 (as per the register format) instead of PAGE_SIZE (Chris) - s/BITS_44_TO_47/HIGHBITS (Chris) - Right formatting, this time for real Fixes: b03ec3d67ab8 ("drm/i915: There is only one fault register from GEN8 onwards") Signed-off-by: Oscar Mateo Cc: Michel Thierry Cc: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/1513982329-32191-1-git-send-email-oscar.mateo@intel.com Reviewed-by: Michel Thierry Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 2 ++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f2a0f556da21a..26dee5e61e997 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2287,12 +2287,23 @@ static void gen8_check_and_clear_faults(struct drm_i915_private *dev_priv) u32 fault = I915_READ(GEN8_RING_FAULT_REG); if (fault & RING_FAULT_VALID) { + u32 fault_data0, fault_data1; + u64 fault_addr; + + fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0); + fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1); + fault_addr = ((u64)(fault_data1 & FAULT_VA_HIGH_BITS) << 44) | + ((u64)fault_data0 << 12); + DRM_DEBUG_DRIVER("Unexpected fault\n" - "\tAddr: 0x%08lx\n" + "\tAddr: 0x%08x_%08x\n" + "\tAddress space: %s\n" "\tEngine ID: %d\n" "\tSource ID: %d\n" "\tType: %d\n", - fault & PAGE_MASK, + upper_32_bits(fault_addr), + lower_32_bits(fault_addr), + fault_data1 & FAULT_GTT_SEL ? "GGTT" : "PPGTT", GEN8_RING_FAULT_ENGINE_ID(fault), RING_FAULT_SRCID(fault), RING_FAULT_FAULT_TYPE(fault)); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 505c605eff989..a2108e35c5999 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2489,6 +2489,8 @@ enum i915_power_well_id { #define GEN8_FAULT_TLB_DATA0 _MMIO(0x4b10) #define GEN8_FAULT_TLB_DATA1 _MMIO(0x4b14) +#define FAULT_VA_HIGH_BITS (0xf << 0) +#define FAULT_GTT_SEL (1 << 4) #define FPGA_DBG _MMIO(0x42300) #define FPGA_DBG_RM_NOCLAIM (1<<31) -- GitLab From 6e7a3f5244356ce079457cb3cfbc58ca01c00a2e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 11 Jan 2018 08:24:17 +0000 Subject: [PATCH 0064/1646] drm/i915: Apply headless DMC workaround for CNL With firmware 1.07 having fixed the state corruption issue, we can enable the headless GT performance workaround for CNL as well. (Equivalent to b68763741aa2 ("drm/i915: Restore GT performance in headless mode with DMC loaded") on other affected platforms.) Signed-off-by: Tvrtko Ursulin Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100572 Testcase: igt/gem_exec_nop/headless Cc: Imre Deak Cc: Chris Wilson Cc: Dmitry Rogozhkin Reviewed-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20180111082417.795-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index d758da6156a82..4996c4ea8a80c 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1848,6 +1848,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) #define CNL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ + BIT_ULL(POWER_DOMAIN_GT_IRQ) | \ BIT_ULL(POWER_DOMAIN_MODESET) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ BIT_ULL(POWER_DOMAIN_INIT)) -- GitLab From 109ec558370f78143509d9ebb6c26e19de1f1fb9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 11 Jan 2018 08:35:25 +0000 Subject: [PATCH 0065/1646] drm/i915/pmu: Only enumerate available counters in sysfs Switch over to dynamically creating device attributes, which are in turn used by the perf core to expose available counters in sysfs. This way we do not expose counters which are not avaiable on the current platform, and are so more consistent between what we reply to open attempts via the perf_event_open(2), and what is discoverable in sysfs. v2: * Simplify attribute pointer freeing loop. * Changed attr init from macro to function. * More common error unwind. (Chris Wilson) * Rename some locals. (Chris Wilson) v3: * Fixed double semi-colon. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20180111083525.32394-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 321 ++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_pmu.h | 8 + 2 files changed, 251 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 55a8a1e294248..9139bc8df82b3 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -290,23 +290,44 @@ static void i915_pmu_event_destroy(struct perf_event *event) WARN_ON(event->parent); } -static int engine_event_init(struct perf_event *event) +static int +engine_event_status(struct intel_engine_cs *engine, + enum drm_i915_pmu_engine_sample sample) { - struct drm_i915_private *i915 = - container_of(event->pmu, typeof(*i915), pmu.base); - - if (!intel_engine_lookup_user(i915, engine_event_class(event), - engine_event_instance(event))) - return -ENODEV; - - switch (engine_event_sample(event)) { + switch (sample) { case I915_SAMPLE_BUSY: case I915_SAMPLE_WAIT: break; case I915_SAMPLE_SEMA: + if (INTEL_GEN(engine->i915) < 6) + return -ENODEV; + break; + default: + return -ENOENT; + } + + return 0; +} + +static int +config_status(struct drm_i915_private *i915, u64 config) +{ + switch (config) { + case I915_PMU_ACTUAL_FREQUENCY: + if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) + /* Requires a mutex for sampling! */ + return -ENODEV; + /* Fall-through. */ + case I915_PMU_REQUESTED_FREQUENCY: if (INTEL_GEN(i915) < 6) return -ENODEV; break; + case I915_PMU_INTERRUPTS: + break; + case I915_PMU_RC6_RESIDENCY: + if (!HAS_RC6(i915)) + return -ENODEV; + break; default: return -ENOENT; } @@ -314,6 +335,20 @@ static int engine_event_init(struct perf_event *event) return 0; } +static int engine_event_init(struct perf_event *event) +{ + struct drm_i915_private *i915 = + container_of(event->pmu, typeof(*i915), pmu.base); + struct intel_engine_cs *engine; + + engine = intel_engine_lookup_user(i915, engine_event_class(event), + engine_event_instance(event)); + if (!engine) + return -ENODEV; + + return engine_event_status(engine, engine_event_sample(event)); +} + static int i915_pmu_event_init(struct perf_event *event) { struct drm_i915_private *i915 = @@ -337,30 +372,10 @@ static int i915_pmu_event_init(struct perf_event *event) if (!cpumask_test_cpu(event->cpu, &i915_pmu_cpumask)) return -EINVAL; - if (is_engine_event(event)) { + if (is_engine_event(event)) ret = engine_event_init(event); - } else { - ret = 0; - switch (event->attr.config) { - case I915_PMU_ACTUAL_FREQUENCY: - if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) - /* Requires a mutex for sampling! */ - ret = -ENODEV; - case I915_PMU_REQUESTED_FREQUENCY: - if (INTEL_GEN(i915) < 6) - ret = -ENODEV; - break; - case I915_PMU_INTERRUPTS: - break; - case I915_PMU_RC6_RESIDENCY: - if (!HAS_RC6(i915)) - ret = -ENODEV; - break; - default: - ret = -ENOENT; - break; - } - } + else + ret = config_status(i915, event->attr.config); if (ret) return ret; @@ -657,52 +672,9 @@ static ssize_t i915_pmu_event_show(struct device *dev, return sprintf(buf, "config=0x%lx\n", eattr->val); } -#define I915_EVENT_ATTR(_name, _config) \ - (&((struct i915_ext_attribute[]) { \ - { .attr = __ATTR(_name, 0444, i915_pmu_event_show, NULL), \ - .val = _config, } \ - })[0].attr.attr) - -#define I915_EVENT_STR(_name, _str) \ - (&((struct perf_pmu_events_attr[]) { \ - { .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \ - .id = 0, \ - .event_str = _str, } \ - })[0].attr.attr) - -#define I915_EVENT(_name, _config, _unit) \ - I915_EVENT_ATTR(_name, _config), \ - I915_EVENT_STR(_name.unit, _unit) - -#define I915_ENGINE_EVENT(_name, _class, _instance, _sample) \ - I915_EVENT_ATTR(_name, __I915_PMU_ENGINE(_class, _instance, _sample)), \ - I915_EVENT_STR(_name.unit, "ns") - -#define I915_ENGINE_EVENTS(_name, _class, _instance) \ - I915_ENGINE_EVENT(_name##_instance-busy, _class, _instance, I915_SAMPLE_BUSY), \ - I915_ENGINE_EVENT(_name##_instance-sema, _class, _instance, I915_SAMPLE_SEMA), \ - I915_ENGINE_EVENT(_name##_instance-wait, _class, _instance, I915_SAMPLE_WAIT) - -static struct attribute *i915_pmu_events_attrs[] = { - I915_ENGINE_EVENTS(rcs, I915_ENGINE_CLASS_RENDER, 0), - I915_ENGINE_EVENTS(bcs, I915_ENGINE_CLASS_COPY, 0), - I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 0), - I915_ENGINE_EVENTS(vcs, I915_ENGINE_CLASS_VIDEO, 1), - I915_ENGINE_EVENTS(vecs, I915_ENGINE_CLASS_VIDEO_ENHANCE, 0), - - I915_EVENT(actual-frequency, I915_PMU_ACTUAL_FREQUENCY, "MHz"), - I915_EVENT(requested-frequency, I915_PMU_REQUESTED_FREQUENCY, "MHz"), - - I915_EVENT_ATTR(interrupts, I915_PMU_INTERRUPTS), - - I915_EVENT(rc6-residency, I915_PMU_RC6_RESIDENCY, "ns"), - - NULL, -}; - -static const struct attribute_group i915_pmu_events_attr_group = { +static struct attribute_group i915_pmu_events_attr_group = { .name = "events", - .attrs = i915_pmu_events_attrs, + /* Patch in attrs at runtime. */ }; static ssize_t @@ -720,7 +692,7 @@ static struct attribute *i915_cpumask_attrs[] = { NULL, }; -static struct attribute_group i915_pmu_cpumask_attr_group = { +static const struct attribute_group i915_pmu_cpumask_attr_group = { .attrs = i915_cpumask_attrs, }; @@ -731,6 +703,191 @@ static const struct attribute_group *i915_pmu_attr_groups[] = { NULL }; +#define __event(__config, __name, __unit) \ +{ \ + .config = (__config), \ + .name = (__name), \ + .unit = (__unit), \ +} + +#define __engine_event(__sample, __name) \ +{ \ + .sample = (__sample), \ + .name = (__name), \ +} + +static struct i915_ext_attribute * +add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) +{ + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = i915_pmu_event_show; + attr->val = config; + + return ++attr; +} + +static struct perf_pmu_events_attr * +add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, + const char *str) +{ + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = perf_event_sysfs_show; + attr->event_str = str; + + return ++attr; +} + +static struct attribute ** +create_event_attributes(struct drm_i915_private *i915) +{ + static const struct { + u64 config; + const char *name; + const char *unit; + } events[] = { + __event(I915_PMU_ACTUAL_FREQUENCY, "actual-frequency", "MHz"), + __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "MHz"), + __event(I915_PMU_INTERRUPTS, "interrupts", NULL), + __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), + }; + static const struct { + enum drm_i915_pmu_engine_sample sample; + char *name; + } engine_events[] = { + __engine_event(I915_SAMPLE_BUSY, "busy"), + __engine_event(I915_SAMPLE_SEMA, "sema"), + __engine_event(I915_SAMPLE_WAIT, "wait"), + }; + unsigned int count = 0; + struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter; + struct i915_ext_attribute *i915_attr = NULL, *i915_iter; + struct attribute **attr = NULL, **attr_iter; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int i; + + /* Count how many counters we will be exposing. */ + for (i = 0; i < ARRAY_SIZE(events); i++) { + if (!config_status(i915, events[i].config)) + count++; + } + + for_each_engine(engine, i915, id) { + for (i = 0; i < ARRAY_SIZE(engine_events); i++) { + if (!engine_event_status(engine, + engine_events[i].sample)) + count++; + } + } + + /* Allocate attribute objects and table. */ + i915_attr = kzalloc(count * sizeof(*i915_attr), GFP_KERNEL); + if (!i915_attr) + goto err_alloc; + + pmu_attr = kzalloc(count * sizeof(*pmu_attr), GFP_KERNEL); + if (!pmu_attr) + goto err_alloc; + + /* Max one pointer of each attribute type plus a termination entry. */ + attr = kzalloc((count * 2 + 1) * sizeof(attr), GFP_KERNEL); + if (!attr) + goto err_alloc; + + i915_iter = i915_attr; + pmu_iter = pmu_attr; + attr_iter = attr; + + /* Initialize supported non-engine counters. */ + for (i = 0; i < ARRAY_SIZE(events); i++) { + char *str; + + if (config_status(i915, events[i].config)) + continue; + + str = kstrdup(events[i].name, GFP_KERNEL); + if (!str) + goto err; + + *attr_iter++ = &i915_iter->attr.attr; + i915_iter = add_i915_attr(i915_iter, str, events[i].config); + + if (events[i].unit) { + str = kasprintf(GFP_KERNEL, "%s.unit", events[i].name); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, events[i].unit); + } + } + + /* Initialize supported engine counters. */ + for_each_engine(engine, i915, id) { + for (i = 0; i < ARRAY_SIZE(engine_events); i++) { + char *str; + + if (engine_event_status(engine, + engine_events[i].sample)) + continue; + + str = kasprintf(GFP_KERNEL, "%s-%s", + engine->name, engine_events[i].name); + if (!str) + goto err; + + *attr_iter++ = &i915_iter->attr.attr; + i915_iter = + add_i915_attr(i915_iter, str, + __I915_PMU_ENGINE(engine->class, + engine->instance, + engine_events[i].sample)); + + str = kasprintf(GFP_KERNEL, "%s-%s.unit", + engine->name, engine_events[i].name); + if (!str) + goto err; + + *attr_iter++ = &pmu_iter->attr.attr; + pmu_iter = add_pmu_attr(pmu_iter, str, "ns"); + } + } + + i915->pmu.i915_attr = i915_attr; + i915->pmu.pmu_attr = pmu_attr; + + return attr; + +err:; + for (attr_iter = attr; *attr_iter; attr_iter++) + kfree((*attr_iter)->name); + +err_alloc: + kfree(attr); + kfree(i915_attr); + kfree(pmu_attr); + + return NULL; +} + +static void free_event_attributes(struct drm_i915_private *i915) +{ + struct attribute **attr_iter = i915_pmu_events_attr_group.attrs; + + for (; *attr_iter; attr_iter++) + kfree((*attr_iter)->name); + + kfree(i915_pmu_events_attr_group.attrs); + kfree(i915->pmu.i915_attr); + kfree(i915->pmu.pmu_attr); + + i915_pmu_events_attr_group.attrs = NULL; + i915->pmu.i915_attr = NULL; + i915->pmu.pmu_attr = NULL; +} + static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), node); @@ -806,6 +963,12 @@ void i915_pmu_register(struct drm_i915_private *i915) return; } + i915_pmu_events_attr_group.attrs = create_event_attributes(i915); + if (!i915_pmu_events_attr_group.attrs) { + ret = -ENOMEM; + goto err; + } + i915->pmu.base.attr_groups = i915_pmu_attr_groups; i915->pmu.base.task_ctx_nr = perf_invalid_context; i915->pmu.base.event_init = i915_pmu_event_init; @@ -838,6 +1001,7 @@ void i915_pmu_register(struct drm_i915_private *i915) perf_pmu_unregister(&i915->pmu.base); err: i915->pmu.base.event_init = NULL; + free_event_attributes(i915); DRM_NOTE("Failed to register PMU! (err=%d)\n", ret); } @@ -862,4 +1026,5 @@ void i915_pmu_unregister(struct drm_i915_private *i915) perf_pmu_unregister(&i915->pmu.base); i915->pmu.base.event_init = NULL; + free_event_attributes(i915); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 40c154d13565a..5a2e013a56bb6 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -94,6 +94,14 @@ struct i915_pmu { * struct intel_engine_cs. */ struct i915_pmu_sample sample[__I915_NUM_PMU_SAMPLERS]; + /** + * @i915_attr: Memory block holding device attributes. + */ + void *i915_attr; + /** + * @pmu_attr: Memory block holding device attributes. + */ + void *pmu_attr; }; #ifdef CONFIG_PERF_EVENTS -- GitLab From 2bbba4e94eb987701abcacae241929fd13a196b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jan 2018 14:04:02 +0000 Subject: [PATCH 0066/1646] drm/i915/pmu: Initialise our dynamic sysfs attributes for use with lockdep As we kmalloc our dynamic sysfs attributes, we have to give them an external static lock_class_key for them to use with lockdep. Fixes: 109ec558370f ("drm/i915/pmu: Only enumerate available counters in sysfs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180111140402.3984-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_pmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 9139bc8df82b3..95ab5e28f5be3 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -719,6 +719,7 @@ static const struct attribute_group *i915_pmu_attr_groups[] = { static struct i915_ext_attribute * add_i915_attr(struct i915_ext_attribute *attr, const char *name, u64 config) { + sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = name; attr->attr.attr.mode = 0444; attr->attr.show = i915_pmu_event_show; @@ -731,6 +732,7 @@ static struct perf_pmu_events_attr * add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name, const char *str) { + sysfs_attr_init(&attr->attr.attr); attr->attr.attr.name = name; attr->attr.attr.mode = 0444; attr->attr.show = perf_event_sysfs_show; -- GitLab From 4900727d35bb20028f9bd83146ec4bf78afffe30 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 11 Jan 2018 07:30:31 +0000 Subject: [PATCH 0067/1646] drm/i915/pmu: Reconstruct active state on starting busy-stats We have a hole in our busy-stat accounting if the pmu is enabled during a long running batch, the pmu will not start accumulating busy-time until the next context switch. This then fails tests that are only sampling a single batch. v2: Count each active port just once (context in/out events are only on the first and last assignment to a port). v3: Avoid hardcoding knowledge of 2 submission ports Fixes: 30e17b7847f5 ("drm/i915: Engine busy time tracking") Testcase: igt/perf_pmu/busy-start Testcase: igt/perf_pmu/busy-double-start Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180111073031.14614-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_engine_cs.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6bb51a502b8b4..d790bdc227ffb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1951,8 +1951,22 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) spin_lock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled == ~0) goto busy; - if (engine->stats.enabled++ == 0) + if (engine->stats.enabled++ == 0) { + struct intel_engine_execlists *execlists = &engine->execlists; + const struct execlist_port *port = execlists->port; + unsigned int num_ports = execlists_num_ports(execlists); + engine->stats.enabled_at = ktime_get(); + + /* XXX submission method oblivious? */ + while (num_ports-- && port_isset(port)) { + engine->stats.active++; + port++; + } + + if (engine->stats.active) + engine->stats.start = engine->stats.enabled_at; + } spin_unlock_irqrestore(&engine->stats.lock, flags); return 0; -- GitLab From 937f3acc605397ccfce28c835f0bf9d6da589060 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 3 Jan 2018 23:21:04 +0100 Subject: [PATCH 0068/1646] drm/ioctl: Remove trailing whitespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove a couple of trailing spaces. Signed-off-by: Noralf Trønnes Reviewed-by: Daniel Vetter Reviewed-by: Laurent Pinchart Link: https://patchwork.freedesktop.org/patch/msgid/20180103222110.45855-3-noralf@tronnes.org --- drivers/gpu/drm/drm_ioctl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 4aafe48020990..b1e96fb68ea88 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -509,7 +509,7 @@ int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) return -EACCES; /* MASTER is only for master or control clients */ - if (unlikely((flags & DRM_MASTER) && + if (unlikely((flags & DRM_MASTER) && !drm_is_current_master(file_priv) && !drm_is_control_client(file_priv))) return -EACCES; @@ -704,7 +704,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = { * * ##define DRM_IOCTL_MY_DRIVER_OPERATION \ * DRM_IOW(DRM_COMMAND_BASE, struct my_driver_operation) - * + * * DRM driver private IOCTL must be in the range from DRM_COMMAND_BASE to * DRM_COMMAND_END. Finally you need an array of &struct drm_ioctl_desc to wire * up the handlers and set the access rights:: -- GitLab From 85d0875a4f0e59887307e7a435999132836a8717 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Jan 2018 08:48:51 +0100 Subject: [PATCH 0069/1646] drm/bridge: Add bindings for TI THS8134 This adds device tree bindings for the Texas Instruments THS8134, THS8134A and THS8134B VGA DACs by extending and renaming the existing bindings for THS8135. These DACs are used for the VGA outputs on the ARM reference designs such as Integrator, Versatile and RealView. Cc: devicetree@vger.kernel.org Cc: Bartosz Golaszewski Acked-by: Rob Herring Reviewed-by: Laurent Pinchart Signed-off-by: Linus Walleij Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20180112074854.9560-1-linus.walleij@linaro.org --- .../bridge/{ti,ths8135.txt => ti,ths813x.txt} | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) rename Documentation/devicetree/bindings/display/bridge/{ti,ths8135.txt => ti,ths813x.txt} (69%) diff --git a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt b/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt similarity index 69% rename from Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt rename to Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt index 6ec1a880ac18b..df3d7c1ac09e5 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,ths8135.txt +++ b/Documentation/devicetree/bindings/display/bridge/ti,ths813x.txt @@ -1,11 +1,16 @@ -THS8135 Video DAC ------------------ +THS8134 and THS8135 Video DAC +----------------------------- -This is the binding for Texas Instruments THS8135 Video DAC bridge. +This is the binding for Texas Instruments THS8134, THS8134A, THS8134B and +THS8135 Video DAC bridges. Required properties: -- compatible: Must be "ti,ths8135" +- compatible: Must be one of + "ti,ths8134" + "ti,ths8134a," "ti,ths8134" + "ti,ths8134b", "ti,ths8134" + "ti,ths8135" Required nodes: -- GitLab From 36a776df6e088a0cec2303aacd2fe762830b2457 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Jan 2018 08:48:52 +0100 Subject: [PATCH 0070/1646] drm/bridge: Provide a way to embed timing info in bridges After some discussion and failed patch sets trying to convey the right timing information between the display engine and a bridge using the connector, I try instead to use an optional timing information container in the bridge itself, so that display engines can retrieve it from any bridge and use it to determine how to drive outputs. Signed-off-by: Linus Walleij Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20180112074854.9560-2-linus.walleij@linaro.org --- include/drm/drm_bridge.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 682d01ba920c0..bb7b97dfb93ee 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -29,6 +29,7 @@ #include struct drm_bridge; +struct drm_bridge_timings; struct drm_panel; /** @@ -222,6 +223,35 @@ struct drm_bridge_funcs { void (*enable)(struct drm_bridge *bridge); }; +/** + * struct drm_bridge_timings - timing information for the bridge + */ +struct drm_bridge_timings { + /** + * @sampling_edge: + * + * Tells whether the bridge samples the digital input signal + * from the display engine on the positive or negative edge of the + * clock, this should reuse the DRM_BUS_FLAG_PIXDATA_[POS|NEG]EDGE + * bitwise flags from the DRM connector (bit 2 and 3 valid). + */ + u32 sampling_edge; + /** + * @setup_time_ps: + * + * Defines the time in picoseconds the input data lines must be + * stable before the clock edge. + */ + u32 setup_time_ps; + /** + * @hold_time_ps: + * + * Defines the time in picoseconds taken for the bridge to sample the + * input signal after the clock edge. + */ + u32 hold_time_ps; +}; + /** * struct drm_bridge - central DRM bridge control structure * @dev: DRM device this bridge belongs to @@ -229,6 +259,8 @@ struct drm_bridge_funcs { * @next: the next bridge in the encoder chain * @of_node: device node pointer to the bridge * @list: to keep track of all added bridges + * @timings: the timing specification for the bridge, if any (may + * be NULL) * @funcs: control functions * @driver_private: pointer to the bridge driver's internal context */ @@ -240,6 +272,7 @@ struct drm_bridge { struct device_node *of_node; #endif struct list_head list; + const struct drm_bridge_timings *timings; const struct drm_bridge_funcs *funcs; void *driver_private; -- GitLab From 88dda5b4789f26d773563005567c69f85e1f08f5 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Jan 2018 08:48:53 +0100 Subject: [PATCH 0071/1646] drm/bridge: Add timing support to dumb VGA DAC This extends the dumb VGA DAC bridge to handle the THS8134A and THS8134B VGA DACs in addition to those already handled. We assign the proper timing data to the pointer inside the bridge struct so display controllers that need to align their timings to the bridge can pick it up and work from there. Cc: Bartosz Golaszewski Cc: Maxime Ripard Reviewed-by: Laurent Pinchart Signed-off-by: Linus Walleij Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20180112074854.9560-3-linus.walleij@linaro.org --- drivers/gpu/drm/bridge/dumb-vga-dac.c | 59 +++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c index de5e7dee7ad60..498d5948d1a80 100644 --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c @@ -11,6 +11,7 @@ */ #include +#include #include #include @@ -204,6 +205,7 @@ static int dumb_vga_probe(struct platform_device *pdev) vga->bridge.funcs = &dumb_vga_bridge_funcs; vga->bridge.of_node = pdev->dev.of_node; + vga->bridge.timings = of_device_get_match_data(&pdev->dev); drm_bridge_add(&vga->bridge); @@ -222,10 +224,61 @@ static int dumb_vga_remove(struct platform_device *pdev) return 0; } +/* + * We assume the ADV7123 DAC is the "default" for historical reasons + * Information taken from the ADV7123 datasheet, revision D. + * NOTE: the ADV7123EP seems to have other timings and need a new timings + * set if used. + */ +static const struct drm_bridge_timings default_dac_timings = { + /* Timing specifications, datasheet page 7 */ + .sampling_edge = DRM_BUS_FLAG_PIXDATA_POSEDGE, + .setup_time_ps = 500, + .hold_time_ps = 1500, +}; + +/* + * Information taken from the THS8134, THS8134A, THS8134B datasheet named + * "SLVS205D", dated May 1990, revised March 2000. + */ +static const struct drm_bridge_timings ti_ths8134_dac_timings = { + /* From timing diagram, datasheet page 9 */ + .sampling_edge = DRM_BUS_FLAG_PIXDATA_POSEDGE, + /* From datasheet, page 12 */ + .setup_time_ps = 3000, + /* I guess this means latched input */ + .hold_time_ps = 0, +}; + +/* + * Information taken from the THS8135 datasheet named "SLAS343B", dated + * May 2001, revised April 2013. + */ +static const struct drm_bridge_timings ti_ths8135_dac_timings = { + /* From timing diagram, datasheet page 14 */ + .sampling_edge = DRM_BUS_FLAG_PIXDATA_POSEDGE, + /* From datasheet, page 16 */ + .setup_time_ps = 2000, + .hold_time_ps = 500, +}; + static const struct of_device_id dumb_vga_match[] = { - { .compatible = "dumb-vga-dac" }, - { .compatible = "adi,adv7123" }, - { .compatible = "ti,ths8135" }, + { + .compatible = "dumb-vga-dac", + .data = NULL, + }, + { + .compatible = "adi,adv7123", + .data = &default_dac_timings, + }, + { + .compatible = "ti,ths8135", + .data = &ti_ths8135_dac_timings, + }, + { + .compatible = "ti,ths8134", + .data = &ti_ths8134_dac_timings, + }, {}, }; MODULE_DEVICE_TABLE(of, dumb_vga_match); -- GitLab From 49f81d80ab8496c400dee63b31530723a390911f Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 12 Jan 2018 08:48:54 +0100 Subject: [PATCH 0072/1646] drm/pl111: Support handling bridge timings If the bridge has a too strict setup time for the incoming signals, we may not be fast enough and then we need to compensate by outputting the signal on the inverse clock edge so it is for sure stable when the bridge samples it. Since bridges in difference to panels does not expose their connectors, make the connector optional in the display setup code. Acked-by: Laurent Pinchart Reviewed-by: Eric Anholt Signed-off-by: Linus Walleij Signed-off-by: Archit Taneja Link: https://patchwork.freedesktop.org/patch/msgid/20180112074854.9560-4-linus.walleij@linaro.org --- drivers/gpu/drm/pl111/Kconfig | 1 + drivers/gpu/drm/pl111/pl111_display.c | 35 ++++++++++++++++++++++++--- drivers/gpu/drm/pl111/pl111_drv.c | 20 ++++++++------- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/pl111/Kconfig b/drivers/gpu/drm/pl111/Kconfig index e5e2abd66491e..82cb3e60ddc8f 100644 --- a/drivers/gpu/drm/pl111/Kconfig +++ b/drivers/gpu/drm/pl111/Kconfig @@ -8,6 +8,7 @@ config DRM_PL111 select DRM_GEM_CMA_HELPER select DRM_BRIDGE select DRM_PANEL_BRIDGE + select DRM_DUMB_VGA_DAC select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE help Choose this option for DRM support for the PL111 CLCD controller. diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index 06c4bf756b692..7fe4040aea46a 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -94,6 +94,7 @@ static void pl111_display_enable(struct drm_simple_display_pipe *pipe, const struct drm_display_mode *mode = &cstate->mode; struct drm_framebuffer *fb = plane->state->fb; struct drm_connector *connector = priv->connector; + struct drm_bridge *bridge = priv->bridge; u32 cntl; u32 ppl, hsw, hfp, hbp; u32 lpp, vsw, vfp, vbp; @@ -143,11 +144,37 @@ static void pl111_display_enable(struct drm_simple_display_pipe *pipe, if (mode->flags & DRM_MODE_FLAG_NVSYNC) tim2 |= TIM2_IVS; - if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW) - tim2 |= TIM2_IOE; + if (connector) { + if (connector->display_info.bus_flags & DRM_BUS_FLAG_DE_LOW) + tim2 |= TIM2_IOE; - if (connector->display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE) - tim2 |= TIM2_IPC; + if (connector->display_info.bus_flags & + DRM_BUS_FLAG_PIXDATA_NEGEDGE) + tim2 |= TIM2_IPC; + } + + if (bridge) { + const struct drm_bridge_timings *btimings = bridge->timings; + + /* + * Here is when things get really fun. Sometimes the bridge + * timings are such that the signal out from PL11x is not + * stable before the receiving bridge (such as a dumb VGA DAC + * or similar) samples it. If that happens, we compensate by + * the only method we have: output the data on the opposite + * edge of the clock so it is for sure stable when it gets + * sampled. + * + * The PL111 manual does not contain proper timining diagrams + * or data for these details, but we know from experiments + * that the setup time is more than 3000 picoseconds (3 ns). + * If we have a bridge that requires the signal to be stable + * earlier than 3000 ps before the clock pulse, we have to + * output the data on the opposite edge to avoid flicker. + */ + if (btimings && btimings->setup_time_ps >= 3000) + tim2 ^= TIM2_IPC; + } tim2 |= cpl << 16; writel(tim2, priv->regs + CLCD_TIM2); diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index acb738c698739..19e1725c3ef68 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -108,11 +108,17 @@ static int pl111_modeset_init(struct drm_device *dev) ret = PTR_ERR(bridge); goto out_config; } - /* - * TODO: when we are using a different bridge than a panel - * (such as a dumb VGA connector) we need to devise a different - * method to get the connector out of the bridge. - */ + } else if (bridge) { + dev_info(dev->dev, "Using non-panel bridge\n"); + } else { + dev_err(dev->dev, "No bridge, exiting\n"); + return -ENODEV; + } + + priv->bridge = bridge; + if (panel) { + priv->panel = panel; + priv->connector = panel->connector; } ret = pl111_display_init(dev); @@ -126,10 +132,6 @@ static int pl111_modeset_init(struct drm_device *dev) if (ret) return ret; - priv->bridge = bridge; - priv->panel = panel; - priv->connector = panel->connector; - ret = drm_vblank_init(dev, 1); if (ret != 0) { dev_err(dev->dev, "Failed to init vblank\n"); -- GitLab From 4c501230846b99ad78435637500471caa77a65c8 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Fri, 12 Jan 2018 17:03:39 +0000 Subject: [PATCH 0073/1646] drm/i915/pmu: fix noderef.cocci warnings drivers/gpu/drm/i915/i915_pmu.c:795:34-40: ERROR: application of sizeof to pointer sizeof when applied to a pointer typed expression gives the size of the pointer Generated by: scripts/coccinelle/misc/noderef.cocci Fixes: 109ec558370f ("drm/i915/pmu: Only enumerate available counters in sysfs") Signed-off-by: Fengguang Wu Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180112170340.5387-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 95ab5e28f5be3..9be4f5201e41e 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -794,7 +794,7 @@ create_event_attributes(struct drm_i915_private *i915) goto err_alloc; /* Max one pointer of each attribute type plus a termination entry. */ - attr = kzalloc((count * 2 + 1) * sizeof(attr), GFP_KERNEL); + attr = kzalloc((count * 2 + 1) * sizeof(*attr), GFP_KERNEL); if (!attr) goto err_alloc; -- GitLab From dd5fec87efdd54db6870b5d24c1fd3cda94f4510 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 12 Jan 2018 17:03:40 +0000 Subject: [PATCH 0074/1646] drm/i915/pmu: Use kcalloc instead of kzalloc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kcalloc is preffered for allocating arrays. Signed-off-by: Tvrtko Ursulin Suggested-by: Ville Syrjälä Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20180112170340.5387-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_pmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 9be4f5201e41e..065a28c713c48 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -785,16 +785,16 @@ create_event_attributes(struct drm_i915_private *i915) } /* Allocate attribute objects and table. */ - i915_attr = kzalloc(count * sizeof(*i915_attr), GFP_KERNEL); + i915_attr = kcalloc(count, sizeof(*i915_attr), GFP_KERNEL); if (!i915_attr) goto err_alloc; - pmu_attr = kzalloc(count * sizeof(*pmu_attr), GFP_KERNEL); + pmu_attr = kcalloc(count, sizeof(*pmu_attr), GFP_KERNEL); if (!pmu_attr) goto err_alloc; /* Max one pointer of each attribute type plus a termination entry. */ - attr = kzalloc((count * 2 + 1) * sizeof(*attr), GFP_KERNEL); + attr = kcalloc(count * 2 + 1, sizeof(*attr), GFP_KERNEL); if (!attr) goto err_alloc; -- GitLab From 457f1e5f47024793c7b4095e6647ce48ea0bf697 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 3 Jan 2018 13:38:22 -0800 Subject: [PATCH 0075/1646] drm/i915/psr: Kill psr.source_ok flag. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This flag has become redundant since commit 4d90f2d507ab ("drm/i915: Start tracking PSR state in crtc state") It is set at the same place as psr.enabled, which is also exposed via debugfs. Cc: Rodrigo Vivi Cc: Ville Syrjälä Signed-off-by: Dhinakaran Pandiyan Acked-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180103213824.1405-1-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 1 - drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/intel_psr.c | 2 -- 3 files changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2bb63073d73fd..6890340387b7f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2529,7 +2529,6 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) mutex_lock(&dev_priv->psr.lock); seq_printf(m, "Sink_Support: %s\n", yesno(dev_priv->psr.sink_support)); - seq_printf(m, "Source_OK: %s\n", yesno(dev_priv->psr.source_ok)); seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled)); seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active)); seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index caebd5825279e..cb1b69e7f4391 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -754,7 +754,6 @@ struct i915_drrs { struct i915_psr { struct mutex lock; bool sink_support; - bool source_ok; struct intel_dp *enabled; bool active; struct delayed_work work; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 2e32615eeada8..17636dce1cb0a 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -522,8 +522,6 @@ void intel_psr_enable(struct intel_dp *intel_dp, } dev_priv->psr.psr2_support = crtc_state->has_psr2; - dev_priv->psr.source_ok = true; - dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.setup_vsc(intel_dp, crtc_state); -- GitLab From 4371d89601c301fdbbdb9616c887ae77b38f6404 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 3 Jan 2018 13:38:23 -0800 Subject: [PATCH 0076/1646] drm/i915/psr: CAN_PSR() macro to check for PSR source and sink support. The global variable dev_priv->psr.sink_support is set if an eDP sink supports PSR. Use this instead of redoing the check with is_edp_psr(). Combine source and sink support checks into a macro that can be used to return early from psr_{invalidate, single_frame_update, flush}. Cc: Rodrigo Vivi Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180103213824.1405-2-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_psr.c | 19 ++++--------------- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 30f791f89d649..48676e99316eb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1760,6 +1760,7 @@ static inline void intel_backlight_device_unregister(struct intel_connector *con /* intel_psr.c */ +#define CAN_PSR(dev_priv) (HAS_PSR(dev_priv) && dev_priv->psr.sink_support) void intel_psr_enable(struct intel_dp *intel_dp, const struct intel_crtc_state *crtc_state); void intel_psr_disable(struct intel_dp *intel_dp, diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 17636dce1cb0a..df9b1d7baefb6 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -56,14 +56,6 @@ #include "intel_drv.h" #include "i915_drv.h" -static bool is_edp_psr(struct intel_dp *intel_dp) -{ - if (!intel_dp_is_edp(intel_dp)) - return false; - - return intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED; -} - static bool vlv_is_psr_active_on_pipe(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -358,10 +350,7 @@ void intel_psr_compute_config(struct intel_dp *intel_dp, &crtc_state->base.adjusted_mode; int psr_setup_time; - if (!HAS_PSR(dev_priv)) - return; - - if (!is_edp_psr(intel_dp)) + if (!CAN_PSR(dev_priv)) return; if (!i915_modparams.enable_psr) { @@ -794,7 +783,7 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, enum pipe pipe; u32 val; - if (!HAS_PSR(dev_priv)) + if (!CAN_PSR(dev_priv)) return; /* @@ -843,7 +832,7 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, struct drm_crtc *crtc; enum pipe pipe; - if (!HAS_PSR(dev_priv)) + if (!CAN_PSR(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); @@ -883,7 +872,7 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, struct drm_crtc *crtc; enum pipe pipe; - if (!HAS_PSR(dev_priv)) + if (!CAN_PSR(dev_priv)) return; mutex_lock(&dev_priv->psr.lock); -- GitLab From c9ef291a7e79ebc9fb1723090c12fae626e74c46 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 3 Jan 2018 13:38:24 -0800 Subject: [PATCH 0077/1646] drm/i915/psr: Avoid initializing PSR if there is no sink support. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DPCD read for the eDP is complete by the time intel_psr_init() is called, which means we can avoid initializing PSR structures and state if there is no sink support. Cc: Rodrigo Vivi Cc: Ville Syrjälä Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Rodrigo Vivi Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20180103213824.1405-3-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 7 ++++++- drivers/gpu/drm/i915/intel_psr.c | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6890340387b7f..cc659b4b2a450 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2521,14 +2521,19 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) u32 stat[3]; enum pipe pipe; bool enabled = false; + bool sink_support; if (!HAS_PSR(dev_priv)) return -ENODEV; + sink_support = dev_priv->psr.sink_support; + seq_printf(m, "Sink_Support: %s\n", yesno(sink_support)); + if (!sink_support) + return 0; + intel_runtime_pm_get(dev_priv); mutex_lock(&dev_priv->psr.lock); - seq_printf(m, "Sink_Support: %s\n", yesno(dev_priv->psr.sink_support)); seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled)); seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active)); seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index df9b1d7baefb6..8636503664256 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -503,6 +503,9 @@ void intel_psr_enable(struct intel_dp *intel_dp, if (!crtc_state->has_psr) return; + if (WARN_ON(!CAN_PSR(dev_priv))) + return; + WARN_ON(dev_priv->drrs.dp); mutex_lock(&dev_priv->psr.lock); if (dev_priv->psr.enabled) { @@ -633,6 +636,9 @@ void intel_psr_disable(struct intel_dp *intel_dp, if (!old_crtc_state->has_psr) return; + if (WARN_ON(!CAN_PSR(dev_priv))) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -913,6 +919,9 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr_mmio_base = IS_HASWELL(dev_priv) ? HSW_EDP_PSR_BASE : BDW_EDP_PSR_BASE; + if (!dev_priv->psr.sink_support) + return; + /* Per platform default: all disabled. */ if (i915_modparams.enable_psr == -1) i915_modparams.enable_psr = 0; -- GitLab From c32164b1f6a2eafa0658bbf33d02b2da41c72e5a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jan 2018 09:06:42 +0000 Subject: [PATCH 0078/1646] drm/i915: Only defer freeing of fence callback when also using the timer Without an accompanying timer (for internal fences), we can free the fence callback immediately as we do not need to employ the RCU barrier to serialise with the timer. By avoiding the RCU delay, we can avoid the extra mempressure under heavy inter-engine request utilisation. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180115090643.26696-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 3669f5eeb91ef..13021326d7775 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -398,7 +398,12 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma, if (fence) i915_sw_fence_complete(fence); - irq_work_queue(&cb->work); + if (cb->dma) { + irq_work_queue(&cb->work); + return; + } + + kfree(cb); } static void irq_i915_sw_fence_work(struct irq_work *wrk) @@ -437,10 +442,12 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, i915_sw_fence_await(fence); cb->dma = NULL; - timer_setup(&cb->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE); - init_irq_work(&cb->work, irq_i915_sw_fence_work); if (timeout) { cb->dma = dma_fence_get(dma); + init_irq_work(&cb->work, irq_i915_sw_fence_work); + + timer_setup(&cb->timer, + timer_i915_sw_fence_wake, TIMER_IRQSAFE); mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout)); } -- GitLab From f255c1e91e333ba0bd177120748f4caf294b812b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jan 2018 09:06:43 +0000 Subject: [PATCH 0079/1646] drm/i915/fence: Separate timeout mechanism for awaiting on dma-fences As the timeout mechanism has grown more and more complicated, using multiple deferred tasks and more than doubling the size of our struct, split the two implementations to streamline the simpler no-timeout callback variant. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180115090643.26696-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 61 ++++++++++++++++++---------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 13021326d7775..1de5173e53a29 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -365,18 +365,31 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, struct i915_sw_dma_fence_cb { struct dma_fence_cb base; struct i915_sw_fence *fence; +}; + +struct i915_sw_dma_fence_cb_timer { + struct i915_sw_dma_fence_cb base; struct dma_fence *dma; struct timer_list timer; struct irq_work work; struct rcu_head rcu; }; +static void dma_i915_sw_fence_wake(struct dma_fence *dma, + struct dma_fence_cb *data) +{ + struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + + i915_sw_fence_complete(cb->fence); + kfree(cb); +} + static void timer_i915_sw_fence_wake(struct timer_list *t) { - struct i915_sw_dma_fence_cb *cb = from_timer(cb, t, timer); + struct i915_sw_dma_fence_cb_timer *cb = from_timer(cb, t, timer); struct i915_sw_fence *fence; - fence = xchg(&cb->fence, NULL); + fence = xchg(&cb->base.fence, NULL); if (!fence) return; @@ -388,27 +401,24 @@ static void timer_i915_sw_fence_wake(struct timer_list *t) i915_sw_fence_complete(fence); } -static void dma_i915_sw_fence_wake(struct dma_fence *dma, - struct dma_fence_cb *data) +static void dma_i915_sw_fence_wake_timer(struct dma_fence *dma, + struct dma_fence_cb *data) { - struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base); + struct i915_sw_dma_fence_cb_timer *cb = + container_of(data, typeof(*cb), base.base); struct i915_sw_fence *fence; - fence = xchg(&cb->fence, NULL); + fence = xchg(&cb->base.fence, NULL); if (fence) i915_sw_fence_complete(fence); - if (cb->dma) { - irq_work_queue(&cb->work); - return; - } - - kfree(cb); + irq_work_queue(&cb->work); } static void irq_i915_sw_fence_work(struct irq_work *wrk) { - struct i915_sw_dma_fence_cb *cb = container_of(wrk, typeof(*cb), work); + struct i915_sw_dma_fence_cb_timer *cb = + container_of(wrk, typeof(*cb), work); del_timer_sync(&cb->timer); dma_fence_put(cb->dma); @@ -422,6 +432,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, gfp_t gfp) { struct i915_sw_dma_fence_cb *cb; + dma_fence_func_t func; int ret; debug_fence_assert(fence); @@ -430,7 +441,10 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, if (dma_fence_is_signaled(dma)) return 0; - cb = kmalloc(sizeof(*cb), gfp); + cb = kmalloc(timeout ? + sizeof(struct i915_sw_dma_fence_cb_timer) : + sizeof(struct i915_sw_dma_fence_cb), + gfp); if (!cb) { if (!gfpflags_allow_blocking(gfp)) return -ENOMEM; @@ -441,21 +455,26 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, cb->fence = fence; i915_sw_fence_await(fence); - cb->dma = NULL; + func = dma_i915_sw_fence_wake; if (timeout) { - cb->dma = dma_fence_get(dma); - init_irq_work(&cb->work, irq_i915_sw_fence_work); + struct i915_sw_dma_fence_cb_timer *timer = + container_of(cb, typeof(*timer), base); - timer_setup(&cb->timer, + timer->dma = dma_fence_get(dma); + init_irq_work(&timer->work, irq_i915_sw_fence_work); + + timer_setup(&timer->timer, timer_i915_sw_fence_wake, TIMER_IRQSAFE); - mod_timer(&cb->timer, round_jiffies_up(jiffies + timeout)); + mod_timer(&timer->timer, round_jiffies_up(jiffies + timeout)); + + func = dma_i915_sw_fence_wake_timer; } - ret = dma_fence_add_callback(dma, &cb->base, dma_i915_sw_fence_wake); + ret = dma_fence_add_callback(dma, &cb->base, func); if (ret == 0) { ret = 1; } else { - dma_i915_sw_fence_wake(dma, &cb->base); + func(dma, &cb->base); if (ret == -ENOENT) /* fence already signaled */ ret = 0; } -- GitLab From 99e48bf98dd036090b480a12c39e8b971731247e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 15 Jan 2018 09:20:41 +0000 Subject: [PATCH 0080/1646] drm/i915: Lock out execlist tasklet while peeking inside for busy-stats In order to prevent a race condition where we may end up overaccounting the active state and leaving the busy-stats believing the GPU is 100% busy, lock out the tasklet while we reconstruct the busy state. There is no direct spinlock guard for the execlists->port[], so we need to utilise tasklet_disable() as a synchronous barrier to prevent it, the only writer to execlists->port[], from running at the same time as the enable. Fixes: 4900727d35bb ("drm/i915/pmu: Reconstruct active state on starting busy-stats") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20180115092041.13509-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_engine_cs.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index d790bdc227ffb..b221610f2365f 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1943,16 +1943,22 @@ intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) */ int intel_enable_engine_stats(struct intel_engine_cs *engine) { + struct intel_engine_execlists *execlists = &engine->execlists; unsigned long flags; + int err = 0; if (!intel_engine_supports_stats(engine)) return -ENODEV; + tasklet_disable(&execlists->tasklet); spin_lock_irqsave(&engine->stats.lock, flags); - if (engine->stats.enabled == ~0) - goto busy; + + if (unlikely(engine->stats.enabled == ~0)) { + err = -EBUSY; + goto unlock; + } + if (engine->stats.enabled++ == 0) { - struct intel_engine_execlists *execlists = &engine->execlists; const struct execlist_port *port = execlists->port; unsigned int num_ports = execlists_num_ports(execlists); @@ -1967,14 +1973,12 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) if (engine->stats.active) engine->stats.start = engine->stats.enabled_at; } - spin_unlock_irqrestore(&engine->stats.lock, flags); - return 0; - -busy: +unlock: spin_unlock_irqrestore(&engine->stats.lock, flags); + tasklet_enable(&execlists->tasklet); - return -EBUSY; + return err; } static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) -- GitLab From 2a678996d885e8464875710f6101d6c80e86ab14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Noralf=20Tr=C3=B8nnes?= Date: Wed, 10 Jan 2018 19:59:34 +0100 Subject: [PATCH 0081/1646] drm/tinydrm/mi0283qt: Use common include order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Include linux headers before drm headers as it's commonly done. Signed-off-by: Noralf Trønnes Reviewed-by: David Lechner Link: https://patchwork.freedesktop.org/patch/msgid/20180110185940.53841-2-noralf@tronnes.org --- drivers/gpu/drm/tinydrm/mi0283qt.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c index 674d407640bee..45f02b6052b12 100644 --- a/drivers/gpu/drm/tinydrm/mi0283qt.c +++ b/drivers/gpu/drm/tinydrm/mi0283qt.c @@ -9,17 +9,18 @@ * (at your option) any later version. */ -#include -#include -#include -#include -#include #include #include #include #include #include #include + +#include +#include +#include +#include +#include #include