Commit e82c98f2 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-6.4-2023-04-14' of https://gitlab.freedesktop.org/agd5f/linux into drm-next



amd-drm-next-6.4-2023-04-14:

amdgpu:
- S4 fixes for APUs
- GFX11 fixes
- Misc code cleanups
- DCN 3.2 fixes
- DCN 3.1.4 fixes
- FPO/FAMS work to improve display power savings
- DP fixes
- UMC 8.10 code cleanup
- SDMA v4 fix
- GPU clock counter fixes
- SMU 13 fixes
- Sdma v6 invalidation fix for preemption
- RAS fixes
- S0ix fix
- GC 9.4.3 updates

amdkfd:
- Fix user pointers with IOMMU
- Fix coherency flag handling

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230414204609.7942-1-alexander.deucher@amd.com
parents afa351a1 541372bb
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -136,6 +136,7 @@ amdgpu-y += \
	gfx_v9_0.o \
	gfx_v9_4.o \
	gfx_v9_4_2.o \
	gfx_v9_4_3.o \
	gfx_v10_0.o \
	imu_v11_0.o \
	gfx_v11_0.o \
+2 −3
Original line number Diff line number Diff line
@@ -185,7 +185,6 @@ extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display;
extern uint amdgpu_pp_feature_mask;
extern uint amdgpu_force_long_training;
extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
@@ -471,7 +470,7 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
/*
 * Writeback
 */
#define AMDGPU_MAX_WB 256	/* Reserve at most 256 WB slots for amdgpu-owned rings. */
#define AMDGPU_MAX_WB 1024	/* Reserve at most 1024 WB slots for amdgpu-owned rings. */

struct amdgpu_wb {
	struct amdgpu_bo	*wb_obj;
@@ -1222,7 +1221,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
	((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r)))
#define amdgpu_asic_invalidate_hdp(adev, r) \
	((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : \
	 ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : 0))
	 ((adev)->hdp.funcs->invalidate_hdp ? (adev)->hdp.funcs->invalidate_hdp((adev), (r)) : (void)0))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
+6 −1
Original line number Diff line number Diff line
@@ -981,7 +981,12 @@ static bool amdgpu_atcs_pci_probe_handle(struct pci_dev *pdev)
 */
bool amdgpu_acpi_should_gpu_reset(struct amdgpu_device *adev)
{
	if (adev->flags & AMD_IS_APU)
	if ((adev->flags & AMD_IS_APU) &&
	    adev->gfx.imu.funcs) /* Not need to do mode2 reset for IMU enabled APUs */
		return false;

	if ((adev->flags & AMD_IS_APU) &&
	    amdgpu_acpi_is_s3_active(adev))
		return false;

	if (amdgpu_sriov_vf(adev))
+3 −3
Original line number Diff line number Diff line
@@ -96,7 +96,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
					 size_t *start_offset)
{
	/*
	 * The first num_doorbells are used by amdgpu.
	 * The first num_kernel_doorbells are used by amdgpu.
	 * amdkfd takes whatever's left in the aperture.
	 */
	if (adev->enable_mes) {
@@ -109,11 +109,11 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
		*aperture_base = adev->doorbell.base;
		*aperture_size = 0;
		*start_offset = 0;
	} else if (adev->doorbell.size > adev->doorbell.num_doorbells *
	} else if (adev->doorbell.size > adev->doorbell.num_kernel_doorbells *
						sizeof(u32)) {
		*aperture_base = adev->doorbell.base;
		*aperture_size = adev->doorbell.size;
		*start_offset = adev->doorbell.num_doorbells * sizeof(u32);
		*start_offset = adev->doorbell.num_kernel_doorbells * sizeof(u32);
	} else {
		*aperture_base = 0;
		*aperture_size = 0;
+31 −11
Original line number Diff line number Diff line
@@ -82,6 +82,25 @@ static bool kfd_mem_is_attached(struct amdgpu_vm *avm,
	return false;
}

/**
 * reuse_dmamap() - Check whether adev can share the original
 * userptr BO
 *
 * If both adev and bo_adev are in direct mapping or
 * in the same iommu group, they can share the original BO.
 *
 * @adev: Device to which can or cannot share the original BO
 * @bo_adev: Device to which allocated BO belongs to
 *
 * Return: returns true if adev can share original userptr BO,
 * false otherwise.
 */
static bool reuse_dmamap(struct amdgpu_device *adev, struct amdgpu_device *bo_adev)
{
	return (adev->ram_is_direct_mapped && bo_adev->ram_is_direct_mapped) ||
			(adev->dev->iommu_group == bo_adev->dev->iommu_group);
}

/* Set memory usage limits. Current, limits are
 *  System (TTM + userptr) memory - 15/16th System RAM
 *  TTM memory - 3/8th System RAM
@@ -253,15 +272,19 @@ create_dmamap_sg_bo(struct amdgpu_device *adev,
		 struct kgd_mem *mem, struct amdgpu_bo **bo_out)
{
	struct drm_gem_object *gem_obj;
	int ret, align;
	int ret;
	uint64_t flags = 0;

	ret = amdgpu_bo_reserve(mem->bo, false);
	if (ret)
		return ret;

	align = 1;
	ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align,
			AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE,
	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)
		flags |= mem->bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
					AMDGPU_GEM_CREATE_UNCACHED);

	ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1,
			AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags,
			ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj);

	amdgpu_bo_unreserve(mem->bo);
@@ -481,9 +504,6 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
	if (unlikely(ret))
		goto release_sg;

	drm_prime_sg_to_dma_addr_array(ttm->sg, ttm->dma_address,
				       ttm->num_pages);

	amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
	if (ret)
@@ -805,11 +825,11 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem,
			 va + bo_size, vm);

		if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) ||
		    (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && adev->ram_is_direct_mapped) ||
		    (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) ||
			same_hive) {
			/* Mappings on the local GPU, or VRAM mappings in the
			 * local hive, or userptr mapping IOMMU direct map mode
			 * share the original BO
			 * local hive, or userptr mapping can reuse dma map
			 * address space share the original BO
			 */
			attachment[i]->type = KFD_MEM_ATT_SHARED;
			bo[i] = mem->bo;
Loading