Commit 1f1c24de authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-6.3-2023-01-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next



amd-drm-next-6.3-2023-01-13:

amdgpu:
- Fix possible segfault in failure case
- Rework FW requests to happen in early_init for all IPs so
  that we don't lose the sbios console if FW is missing
- PSR fixes
- Misc cleanups
- Unload fix
- SMU13 fixes

amdkfd:
- Fix for cleared VRAM BOs
- Fix cleanup if GPUVM creation fails
- Memory accounting fix
- Use resource_size rather than open codeing it
- GC11 mGPU fix

radeon:
- Fix memory leak on shutdown

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230113225911.7776-1-alexander.deucher@amd.com
parents 45be2048 0c2dece8
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -97,7 +97,7 @@ struct amdgpu_amdkfd_fence {

struct amdgpu_kfd_dev {
	struct kfd_dev *dev;
	uint64_t vram_used;
	int64_t vram_used;
	uint64_t vram_used_aligned;
	bool init_complete;
	struct work_struct reset_work;
@@ -271,9 +271,9 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
		((struct drm_file *)(drm_priv))->driver_priv)->vm)

int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
				     struct file *filp, u32 pasid);
				     struct amdgpu_vm *avm, u32 pasid);
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
					struct file *filp,
					struct amdgpu_vm *avm,
					void **process_info,
					struct dma_fence **ef);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
+10 −22
Original line number Diff line number Diff line
@@ -1431,18 +1431,11 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
}

int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
				     struct file *filp, u32 pasid)
				     struct amdgpu_vm *avm, u32 pasid)

{
	struct amdgpu_fpriv *drv_priv;
	struct amdgpu_vm *avm;
	int ret;

	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
	if (ret)
		return ret;
	avm = &drv_priv->vm;

	/* Free the original amdgpu allocated pasid,
	 * will be replaced with kfd allocated pasid.
	 */
@@ -1459,19 +1452,12 @@ int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
}

int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
					   struct file *filp,
					   struct amdgpu_vm *avm,
					   void **process_info,
					   struct dma_fence **ef)
{
	struct amdgpu_fpriv *drv_priv;
	struct amdgpu_vm *avm;
	int ret;

	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
	if (ret)
		return ret;
	avm = &drv_priv->vm;

	/* Already a compute VM? */
	if (avm->process_info)
		return -EINVAL;
@@ -1613,6 +1599,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
	struct amdgpu_bo *bo;
	struct drm_gem_object *gobj = NULL;
	u32 domain, alloc_domain;
	uint64_t aligned_size;
	u64 alloc_flags;
	int ret;

@@ -1668,22 +1655,23 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
	 * the memory.
	 */
	if ((*mem)->aql_queue)
		size = size >> 1;
		size >>= 1;
	aligned_size = PAGE_ALIGN(size);

	(*mem)->alloc_flags = flags;

	amdgpu_sync_create(&(*mem)->sync);

	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, flags);
	ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags);
	if (ret) {
		pr_debug("Insufficient memory\n");
		goto err_reserve_limit;
	}

	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
			va, size, domain_string(alloc_domain));
			va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain));

	ret = amdgpu_gem_object_create(adev, size, 1, alloc_domain, alloc_flags,
	ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags,
				       bo_type, NULL, &gobj);
	if (ret) {
		pr_debug("Failed to create BO on domain %s. ret %d\n",
@@ -1740,7 +1728,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
	/* Don't unreserve system mem limit twice */
	goto err_reserve_limit;
err_bo_create:
	amdgpu_amdkfd_unreserve_mem_limit(adev, size, flags);
	amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags);
err_reserve_limit:
	mutex_destroy(&(*mem)->lock);
	if (gobj)
@@ -2100,7 +2088,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b
	}

	amdgpu_amdkfd_remove_eviction_fence(
		bo, bo->kfd_bo->process_info->eviction_fence);
		bo, bo->vm_bo->vm->process_info->eviction_fence);

	amdgpu_bo_unreserve(bo);

+2 −9
Original line number Diff line number Diff line
@@ -411,17 +411,10 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
				return -EINVAL;
			}

			err = request_firmware(&adev->pm.fw, fw_name, adev->dev);
			if (err) {
				DRM_ERROR("Failed to request firmware\n");
				return err;
			}

			err = amdgpu_ucode_validate(adev->pm.fw);
			err = amdgpu_ucode_request(adev, &adev->pm.fw, fw_name);
			if (err) {
				DRM_ERROR("Failed to load firmware \"%s\"", fw_name);
				release_firmware(adev->pm.fw);
				adev->pm.fw = NULL;
				amdgpu_ucode_release(&adev->pm.fw);
				return err;
			}

+11 −11
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <generated/utsrelease.h>
#include <linux/pci-p2pdma.h>

#include <drm/drm_aperture.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_probe_helper.h>
@@ -90,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
#define AMDGPU_MAX_RETRY_LIMIT		2
#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)

static const struct drm_driver amdgpu_kms_driver;

const char *amdgpu_asic_name[] = {
	"TAHITI",
	"PITCAIRN",
@@ -1982,17 +1985,10 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
	}

	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
	err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev);
	if (err) {
		dev_err(adev->dev,
			"Failed to load gpu_info firmware \"%s\"\n",
			fw_name);
		goto out;
	}
	err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw);
	err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
	if (err) {
		dev_err(adev->dev,
			"Failed to validate gpu_info firmware \"%s\"\n",
			"Failed to get gpu_info firmware \"%s\"\n",
			fw_name);
		goto out;
	}
@@ -3688,6 +3684,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	if (r)
		return r;

	/* Get rid of things like offb */
	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
	if (r)
		return r;

	/* Enable TMZ based on IP_VERSION */
	amdgpu_gmc_tmz_set(adev);

@@ -4023,8 +4024,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)

	amdgpu_fence_driver_sw_fini(adev);
	amdgpu_device_ip_fini(adev);
	release_firmware(adev->firmware.gpu_info_fw);
	adev->firmware.gpu_info_fw = NULL;
	amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
	adev->accel_working = false;
	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));

+0 −6
Original line number Diff line number Diff line
@@ -23,7 +23,6 @@
 */

#include <drm/amdgpu_drm.h>
#include <drm/drm_aperture.h>
#include <drm/drm_drv.h>
#include <drm/drm_fbdev_generic.h>
#include <drm/drm_gem.h>
@@ -2124,11 +2123,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
	}
#endif

	/* Get rid of things like offb */
	ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
	if (ret)
		return ret;

	adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
	if (IS_ERR(adev))
		return PTR_ERR(adev);
Loading