Commit 38a15ad9 authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'amd-drm-next-5.18-2022-02-25' of...

Merge tag 'amd-drm-next-5.18-2022-02-25' of https://gitlab.freedesktop.org/agd5f/linux

 into drm-next

amd-drm-next-5.18-2022-02-25:

amdgpu:
- Raven2 suspend/resume fix
- SDMA 5.2.6 updates
- VCN 3.1.2 updates
- SMU 13.0.5 updates
- DCN 3.1.5 updates
- Virtual display fixes
- SMU code cleanup
- Harvest fixes
- Expose benchmark tests via debugfs
- Drop no longer relevant gart aperture tests
- More RAS restructuring
- W=1 fixes
- PSR rework
- DP/VGA adapter fixes
- DP MST fixes
- GPUVM eviction fix
- GPU reset debugfs register dumping support
- Misc display fixes
- SR-IOV fix
- Aldebaran mGPU fix
- Add module parameter to disable XGMI for testing

amdkfd:
- IH ring overflow logging fixes
- CRIU fixes
- Misc fixes

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220225183535.5907-1-alexander.deucher@amd.com
parents 6c64ae22 111aeed2
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
	atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \
	amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \
	amdgpu_gem.o amdgpu_ring.o \
	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
	atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+8 −9
Original line number Diff line number Diff line
@@ -155,8 +155,6 @@ extern int amdgpu_vis_vram_limit;
extern int amdgpu_gart_size;
extern int amdgpu_gtt_size;
extern int amdgpu_moverate;
extern int amdgpu_benchmarking;
extern int amdgpu_testing;
extern int amdgpu_audio;
extern int amdgpu_disp_priority;
extern int amdgpu_hw_i2c;
@@ -213,6 +211,7 @@ extern int amdgpu_mes;
extern int amdgpu_noretry;
extern int amdgpu_force_asic_type;
extern int amdgpu_smartshift_bias;
extern int amdgpu_use_xgmi_p2p;
#ifdef CONFIG_HSA_AMD
extern int sched_policy;
extern bool debug_evictions;
@@ -586,13 +585,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
/*
 * Benchmarking
 */
void amdgpu_benchmark(struct amdgpu_device *adev, int test_number);


/*
 * Testing
 */
void amdgpu_test_moves(struct amdgpu_device *adev);
int amdgpu_benchmark(struct amdgpu_device *adev, int test_number);

/*
 * ASIC specific register table accessible by UMD
@@ -1102,6 +1095,12 @@ struct amdgpu_device {
	struct ip_discovery_top         *ip_top;

	struct amdgpu_reset_domain	*reset_domain;

	struct mutex			benchmark_mutex;

	/* reset dump register */
	uint32_t                        *reset_dump_reg_list;
	int                             num_regs;
};

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+130 −114
Original line number Diff line number Diff line
@@ -29,14 +29,13 @@
#define AMDGPU_BENCHMARK_COMMON_MODES_N 17

static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
				    uint64_t saddr, uint64_t daddr, int n)
				    uint64_t saddr, uint64_t daddr, int n, s64 *time_ms)
{
	unsigned long start_jiffies;
	unsigned long end_jiffies;
	ktime_t stime, etime;
	struct dma_fence *fence;
	int i, r;

	start_jiffies = jiffies;
	stime = ktime_get();
	for (i = 0; i < n; i++) {
		struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
		r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence,
@@ -48,120 +47,81 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size,
		if (r)
			goto exit_do_move;
	}
	end_jiffies = jiffies;
	r = jiffies_to_msecs(end_jiffies - start_jiffies);

exit_do_move:
	etime = ktime_get();
	*time_ms = ktime_ms_delta(etime, stime);

	return r;
}


static void amdgpu_benchmark_log_results(int n, unsigned size,
					 unsigned int time,
static void amdgpu_benchmark_log_results(struct amdgpu_device *adev,
					 int n, unsigned size,
					 s64 time_ms,
					 unsigned sdomain, unsigned ddomain,
					 char *kind)
{
	unsigned int throughput = (n * (size >> 10)) / time;
	DRM_INFO("amdgpu: %s %u bo moves of %u kB from"
		 " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
		 kind, n, size >> 10, sdomain, ddomain, time,
	s64 throughput = (n * (size >> 10));

	throughput = div64_s64(throughput, time_ms);

	dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from"
		 " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n",
		 kind, n, size >> 10, sdomain, ddomain, time_ms,
		 throughput * 8, throughput);
}

static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
				 unsigned sdomain, unsigned ddomain)
{
	struct amdgpu_bo *dobj = NULL;
	struct amdgpu_bo *sobj = NULL;
	struct amdgpu_bo_param bp;
	uint64_t saddr, daddr;
	s64 time_ms;
	int r, n;
	int time;

	memset(&bp, 0, sizeof(bp));
	bp.size = size;
	bp.byte_align = PAGE_SIZE;
	bp.domain = sdomain;
	bp.flags = 0;
	bp.type = ttm_bo_type_kernel;
	bp.resv = NULL;
	bp.bo_ptr_size = sizeof(struct amdgpu_bo);

	n = AMDGPU_BENCHMARK_ITERATIONS;
	r = amdgpu_bo_create(adev, &bp, &sobj);
	if (r) {
		goto out_cleanup;
	}
	r = amdgpu_bo_reserve(sobj, false);
	if (unlikely(r != 0))
		goto out_cleanup;
	r = amdgpu_bo_pin(sobj, sdomain);
	if (r) {
		amdgpu_bo_unreserve(sobj);
		goto out_cleanup;
	}
	r = amdgpu_ttm_alloc_gart(&sobj->tbo);
	amdgpu_bo_unreserve(sobj);
	if (r) {
		goto out_cleanup;
	}
	saddr = amdgpu_bo_gpu_offset(sobj);
	bp.domain = ddomain;
	r = amdgpu_bo_create(adev, &bp, &dobj);
	if (r) {
		goto out_cleanup;
	}
	r = amdgpu_bo_reserve(dobj, false);
	if (unlikely(r != 0))
		goto out_cleanup;
	r = amdgpu_bo_pin(dobj, ddomain);
	if (r) {
		amdgpu_bo_unreserve(sobj);

	r = amdgpu_bo_create_kernel(adev, size,
				    PAGE_SIZE, sdomain,
				    &sobj,
				    &saddr,
				    NULL);
	if (r)
		goto out_cleanup;
	}
	r = amdgpu_ttm_alloc_gart(&dobj->tbo);
	amdgpu_bo_unreserve(dobj);
	if (r) {
	r = amdgpu_bo_create_kernel(adev, size,
				    PAGE_SIZE, ddomain,
				    &dobj,
				    &daddr,
				    NULL);
	if (r)
		goto out_cleanup;
	}
	daddr = amdgpu_bo_gpu_offset(dobj);

	if (adev->mman.buffer_funcs) {
		time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n);
		if (time < 0)
		r = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n, &time_ms);
		if (r)
			goto out_cleanup;
		if (time > 0)
			amdgpu_benchmark_log_results(n, size, time,
		else
			amdgpu_benchmark_log_results(adev, n, size, time_ms,
						     sdomain, ddomain, "dma");
	}

out_cleanup:
	/* Check error value now. The value can be overwritten when clean up.*/
	if (r) {
		DRM_ERROR("Error while benchmarking BO move.\n");
	}
	if (r < 0)
		dev_info(adev->dev, "Error while benchmarking BO move.\n");

	if (sobj) {
		r = amdgpu_bo_reserve(sobj, true);
		if (likely(r == 0)) {
			amdgpu_bo_unpin(sobj);
			amdgpu_bo_unreserve(sobj);
		}
		amdgpu_bo_unref(&sobj);
	}
	if (dobj) {
		r = amdgpu_bo_reserve(dobj, true);
		if (likely(r == 0)) {
			amdgpu_bo_unpin(dobj);
			amdgpu_bo_unreserve(dobj);
		}
		amdgpu_bo_unref(&dobj);
	}
	if (sobj)
		amdgpu_bo_free_kernel(&sobj, &saddr, NULL);
	if (dobj)
		amdgpu_bo_free_kernel(&dobj, &daddr, NULL);
	return r;
}

void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
int amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
{
	int i;
	int i, r;
	static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = {
		640 * 480 * 4,
		720 * 480 * 4,
@@ -182,63 +142,119 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number)
		1920 * 1200 * 4
	};

	mutex_lock(&adev->benchmark_mutex);
	switch (test_number) {
	case 1:
		dev_info(adev->dev,
			 "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n",
			 test_number);
		/* simple test, VRAM to GTT and GTT to VRAM */
		amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT,
					  AMDGPU_GEM_DOMAIN_VRAM);
		amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
		if (r)
			goto done;
		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
					  AMDGPU_GEM_DOMAIN_GTT);
		if (r)
			goto done;
		break;
	case 2:
		dev_info(adev->dev,
			 "benchmark test: %d (simple test, VRAM to VRAM)\n",
			 test_number);
		/* simple test, VRAM to VRAM */
		amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
		r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM,
					  AMDGPU_GEM_DOMAIN_VRAM);
		if (r)
			goto done;
		break;
	case 3:
		dev_info(adev->dev,
			 "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n",
			 test_number);
		/* GTT to VRAM, buffer size sweep, powers of 2 */
		for (i = 1; i <= 16384; i <<= 1)
			amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
		for (i = 1; i <= 16384; i <<= 1) {
			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
						  AMDGPU_GEM_DOMAIN_GTT,
						  AMDGPU_GEM_DOMAIN_VRAM);
			if (r)
				goto done;
		}
		break;
	case 4:
		dev_info(adev->dev,
			 "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n",
			 test_number);
		/* VRAM to GTT, buffer size sweep, powers of 2 */
		for (i = 1; i <= 16384; i <<= 1)
			amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
		for (i = 1; i <= 16384; i <<= 1) {
			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
						  AMDGPU_GEM_DOMAIN_VRAM,
						  AMDGPU_GEM_DOMAIN_GTT);
			if (r)
				goto done;
		}
		break;
	case 5:
		dev_info(adev->dev,
			 "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n",
			 test_number);
		/* VRAM to VRAM, buffer size sweep, powers of 2 */
		for (i = 1; i <= 16384; i <<= 1)
			amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
		for (i = 1; i <= 16384; i <<= 1) {
			r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE,
						  AMDGPU_GEM_DOMAIN_VRAM,
						  AMDGPU_GEM_DOMAIN_VRAM);
			if (r)
				goto done;
		}
		break;
	case 6:
		dev_info(adev->dev,
			 "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n",
			 test_number);
		/* GTT to VRAM, buffer size sweep, common modes */
		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
			amdgpu_benchmark_move(adev, common_modes[i],
		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
			r = amdgpu_benchmark_move(adev, common_modes[i],
						  AMDGPU_GEM_DOMAIN_GTT,
						  AMDGPU_GEM_DOMAIN_VRAM);
			if (r)
				goto done;
		}
		break;
	case 7:
		dev_info(adev->dev,
			 "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n",
			 test_number);
		/* VRAM to GTT, buffer size sweep, common modes */
		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
			amdgpu_benchmark_move(adev, common_modes[i],
		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
			r = amdgpu_benchmark_move(adev, common_modes[i],
						  AMDGPU_GEM_DOMAIN_VRAM,
						  AMDGPU_GEM_DOMAIN_GTT);
			if (r)
				goto done;
		}
		break;
	case 8:
		dev_info(adev->dev,
			 "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n",
			 test_number);
		/* VRAM to VRAM, buffer size sweep, common modes */
		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++)
			amdgpu_benchmark_move(adev, common_modes[i],
		for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) {
			r = amdgpu_benchmark_move(adev, common_modes[i],
					      AMDGPU_GEM_DOMAIN_VRAM,
					      AMDGPU_GEM_DOMAIN_VRAM);
			if (r)
				goto done;
		}
		break;

	default:
		DRM_ERROR("Unknown benchmark\n");
		dev_info(adev->dev, "Unknown benchmark %d\n", test_number);
		r = -EINVAL;
		break;
	}

done:
	mutex_unlock(&adev->benchmark_mutex);

	return r;
}
+105 −0
Original line number Diff line number Diff line
@@ -1364,6 +1364,25 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val)
	return 0;
}

static int amdgpu_debugfs_benchmark(void *data, u64 val)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)data;
	struct drm_device *dev = adev_to_drm(adev);
	int r;

	r = pm_runtime_get_sync(dev->dev);
	if (r < 0) {
		pm_runtime_put_autosuspend(dev->dev);
		return r;
	}

	r = amdgpu_benchmark(adev, val);

	pm_runtime_mark_last_busy(dev->dev);
	pm_runtime_put_autosuspend(dev->dev);

	return r;
}

static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused)
{
@@ -1400,6 +1419,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram,
			 NULL, "%lld\n");
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt,
			 NULL, "%lld\n");
DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark,
			 "%lld\n");

static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
					  struct dma_fence **fences)
@@ -1619,6 +1640,86 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
			amdgpu_debugfs_sclk_set, "%llu\n");

static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
				char __user *buf, size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
	char reg_offset[12];
	int i, ret, len = 0;

	if (*pos)
		return 0;

	memset(reg_offset, 0, 12);
	ret = down_read_killable(&adev->reset_domain->sem);
	if (ret)
		return ret;

	for (i = 0; i < adev->num_regs; i++) {
		sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]);
		up_read(&adev->reset_domain->sem);
		if (copy_to_user(buf + len, reg_offset, strlen(reg_offset)))
			return -EFAULT;

		len += strlen(reg_offset);
		ret = down_read_killable(&adev->reset_domain->sem);
		if (ret)
			return ret;
	}

	up_read(&adev->reset_domain->sem);
	*pos += len;

	return len;
}

static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
			const char __user *buf, size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
	char reg_offset[11];
	uint32_t *tmp;
	int ret, i = 0, len = 0;

	do {
		memset(reg_offset, 0, 11);
		if (copy_from_user(reg_offset, buf + len,
					min(10, ((int)size-len)))) {
			ret = -EFAULT;
			goto error_free;
		}

		tmp = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL);
		if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) {
			ret = -EINVAL;
			goto error_free;
		}

		len += ret;
		i++;
	} while (len < size);

	ret = down_write_killable(&adev->reset_domain->sem);
	if (ret)
		goto error_free;

	swap(adev->reset_dump_reg_list, tmp);
	adev->num_regs = i;
	up_write(&adev->reset_domain->sem);
	ret = size;

error_free:
	kfree(tmp);
	return ret;
}

static const struct file_operations amdgpu_reset_dump_register_list = {
	.owner = THIS_MODULE,
	.read = amdgpu_reset_dump_register_list_read,
	.write = amdgpu_reset_dump_register_list_write,
	.llseek = default_llseek
};

int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
	struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
@@ -1685,6 +1786,10 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
			    &amdgpu_debugfs_test_ib_fops);
	debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
			    &amdgpu_debugfs_vm_info_fops);
	debugfs_create_file("amdgpu_benchmark", 0200, root, adev,
			    &amdgpu_benchmark_fops);
	debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev,
			    &amdgpu_reset_dump_register_list);

	adev->debugfs_vbios_blob.data = adev->bios;
	adev->debugfs_vbios_blob.size = adev->bios_size;
+23 −13
Original line number Diff line number Diff line
@@ -1539,6 +1539,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
		amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
	}

	if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
		dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
		amdgpu_reset_method = -1;
	}

	amdgpu_device_check_smu_prv_buffer_size(adev);

	amdgpu_device_check_vm_size(adev);
@@ -3609,6 +3614,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	mutex_init(&adev->psp.mutex);
	mutex_init(&adev->notifier_lock);
	mutex_init(&adev->pm.stable_pstate_ctx_lock);
	mutex_init(&adev->benchmark_mutex);

	amdgpu_device_init_apu_flags(adev);

@@ -3872,19 +3878,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
	} else
		adev->ucode_sysfs_en = true;

	if ((amdgpu_testing & 1)) {
		if (adev->accel_working)
			amdgpu_test_moves(adev);
		else
			DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n");
	}
	if (amdgpu_benchmarking) {
		if (adev->accel_working)
			amdgpu_benchmark(adev, amdgpu_benchmarking);
		else
			DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n");
	}

	/*
	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
	 * Otherwise the mgpu fan boost feature will be skipped due to the
@@ -4723,6 +4716,22 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
	return r;
}

static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
{
	uint32_t reg_value;
	int i;

	lockdep_assert_held(&adev->reset_domain->sem);
	dump_stack();

	for (i = 0; i < adev->num_regs; i++) {
		reg_value = RREG32(adev->reset_dump_reg_list[i]);
		trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
	}

	return 0;
}

int amdgpu_do_asic_reset(struct list_head *device_list_handle,
			 struct amdgpu_reset_context *reset_context)
{
@@ -4733,6 +4742,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
	/* Try reset handler method first */
	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
				    reset_list);
	amdgpu_reset_reg_dumps(tmp_adev);
	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
	/* If reset handler not implemented, continue; otherwise return */
	if (r == -ENOSYS)
Loading