Commit 4f3d93c6 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'drm-fixes-2021-12-31' of git://anongit.freedesktop.org/drm/drm

Pull drm fixes from Dave Airlie:
 "This is a bit bigger than I'd like, however it has two weeks of amdgpu
  fixes in it, since they missed last week, which was very small.

  The nouveau regression is probably the biggest fix in here, and it
  needs to go into 5.15 as well, two i915 fixes, and then a scattering
  of amdgpu fixes. The biggest fix in there is for a fencing NULL
  pointer dereference, the rest are pretty minor.

  For the misc team, I've pulled the two misc fixes manually since I'm
  not sure what is happening at this time of year!

  The amdgpu maintainers have the outstanding runpm regression to fix
  still, they are just working through the last bits of it now.

  Summary:

  nouveau:
   - fencing regression fix

  i915:
   - Fix possible uninitialized variable
   - Fix composite fence seqno icrement on each fence creation

  amdgpu:
   - Fencing fix
   - XGMI fix
   - VCN regression fix
   - IP discovery regression fixes
   - Fix runpm documentation
   - Suspend/resume fixes
   - Yellow Carp display fixes
   - MCLK power management fix
   - dma-buf fix"

* tag 'drm-fixes-2021-12-31' of git://anongit.freedesktop.org/drm/drm:
  drm/amd/display: Changed pipe split policy to allow for multi-display pipe split
  drm/amd/display: Fix USB4 null pointer dereference in update_psp_stream_config
  drm/amd/display: Set optimize_pwr_state for DCN31
  drm/amd/display: Send s0i2_rdy in stream_count == 0 optimization
  drm/amd/display: Added power down for DCN10
  drm/amd/display: fix B0 TMDS deepcolor no dislay issue
  drm/amdgpu: no DC support for headless chips
  drm/amdgpu: put SMU into proper state on runpm suspending for BOCO capable platform
  drm/amdgpu: always reset the asic in suspend (v2)
  drm/amd/pm: skip setting gfx cgpg in the s0ix suspend-resume
  drm/i915: Increment composite fence seqno
  drm/i915: Fix possible uninitialized variable in parallel extension
  drm/amdgpu: fix runpm documentation
  drm/nouveau: wait for the exclusive fence after the shared ones v2
  drm/amdgpu: add support for IP discovery gc_info table v2
  drm/amdgpu: When the VCN(1.0) block is suspended, powergating is explicitly enabled
  drm/amd/pm: Fix xgmi link control on aldebaran
  drm/amdgpu: introduce new amdgpu_fence object to indicate the job embedded fence
  drm/amdgpu: fix dropped backing store handling in amdgpu_dma_buf_move_notify
parents 012e3322 ce9b333c
Loading
Loading
Loading
Loading
+8 −9
Original line number Diff line number Diff line
@@ -3166,6 +3166,12 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
{
	switch (asic_type) {
#ifdef CONFIG_DRM_AMDGPU_SI
	case CHIP_HAINAN:
#endif
	case CHIP_TOPAZ:
		/* chips with no display hardware */
		return false;
#if defined(CONFIG_DRM_AMD_DC)
	case CHIP_TAHITI:
	case CHIP_PITCAIRN:
@@ -4461,7 +4467,7 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
				 struct amdgpu_reset_context *reset_context)
{
	int i, j, r = 0;
	int i, r = 0;
	struct amdgpu_job *job = NULL;
	bool need_full_reset =
		test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
@@ -4483,15 +4489,8 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,

		/*clear job fence from fence drv to avoid force_completion
		 *leave NULL and vm flush fence in fence drv */
		for (j = 0; j <= ring->fence_drv.num_fences_mask; j++) {
			struct dma_fence *old, **ptr;
		amdgpu_fence_driver_clear_job_fences(ring);

			ptr = &ring->fence_drv.fences[j];
			old = rcu_dereference_protected(*ptr, 1);
			if (old && test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &old->flags)) {
				RCU_INIT_POINTER(*ptr, NULL);
			}
		}
		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
		amdgpu_fence_driver_force_completion(ring);
	}
+54 −22
Original line number Diff line number Diff line
@@ -526,10 +526,15 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
	}
}

union gc_info {
	struct gc_info_v1_0 v1;
	struct gc_info_v2_0 v2;
};

int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
	struct binary_header *bhdr;
	struct gc_info_v1_0 *gc_info;
	union gc_info *gc_info;

	if (!adev->mman.discovery_bin) {
		DRM_ERROR("ip discovery uninitialized\n");
@@ -537,28 +542,55 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
	}

	bhdr = (struct binary_header *)adev->mman.discovery_bin;
	gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin +
	gc_info = (union gc_info *)(adev->mman.discovery_bin +
			le16_to_cpu(bhdr->table_list[GC].offset));

	adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
	adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
					      le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
	adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
	adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
	adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
	adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
	adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
	adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
	adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
	adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
	adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
	adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
	adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
	adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
	adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
					 le32_to_cpu(gc_info->gc_num_sa_per_se);
	adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);

	switch (gc_info->v1.header.version_major) {
	case 1:
		adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
		adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
						      le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa));
		adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
		adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se);
		adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c);
		adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs);
		adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds);
		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth);
		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth);
		adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer);
		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size);
		adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd);
		adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu);
		adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size);
		adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
			le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
		adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
		break;
	case 2:
		adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
		adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh);
		adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
		adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se);
		adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs);
		adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs);
		adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds);
		adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth);
		adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth);
		adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer);
		adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size);
		adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd);
		adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu);
		adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size);
		adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
			le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
		adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
		break;
	default:
		dev_err(adev->dev,
			"Unhandled GC info table %d.%d\n",
			gc_info->v1.header.version_major,
			gc_info->v1.header.version_minor);
		return -EINVAL;
	}
	return 0;
}

+1 −1
Original line number Diff line number Diff line
@@ -384,7 +384,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
	struct amdgpu_vm_bo_base *bo_base;
	int r;

	if (bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
	if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
		return;

	r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
+23 −4
Original line number Diff line number Diff line
@@ -328,10 +328,11 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);

/**
 * DOC: runpm (int)
 * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
 * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
 * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down
 * the dGPUs when they are idle if supported. The default is -1 (auto enable).
 * Setting the value to 0 disables this functionality.
 */
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)");
MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);

/**
@@ -2153,7 +2154,10 @@ static int amdgpu_pmops_suspend(struct device *dev)
	adev->in_s3 = true;
	r = amdgpu_device_suspend(drm_dev, true);
	adev->in_s3 = false;

	if (r)
		return r;
	if (!adev->in_s0ix)
		r = amdgpu_asic_reset(adev);
	return r;
}

@@ -2234,12 +2238,27 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
	if (amdgpu_device_supports_px(drm_dev))
		drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;

	/*
	 * By setting mp1_state as PP_MP1_STATE_UNLOAD, MP1 will do some
	 * proper cleanups and put itself into a state ready for PNP. That
	 * can address some random resuming failure observed on BOCO capable
	 * platforms.
	 * TODO: this may be also needed for PX capable platform.
	 */
	if (amdgpu_device_supports_boco(drm_dev))
		adev->mp1_state = PP_MP1_STATE_UNLOAD;

	ret = amdgpu_device_suspend(drm_dev, false);
	if (ret) {
		adev->in_runpm = false;
		if (amdgpu_device_supports_boco(drm_dev))
			adev->mp1_state = PP_MP1_STATE_NONE;
		return ret;
	}

	if (amdgpu_device_supports_boco(drm_dev))
		adev->mp1_state = PP_MP1_STATE_NONE;

	if (amdgpu_device_supports_px(drm_dev)) {
		/* Only need to handle PCI state in the driver for ATPX
		 * PCI core handles it for _PR3.
+87 −39
Original line number Diff line number Diff line
@@ -77,11 +77,13 @@ void amdgpu_fence_slab_fini(void)
 * Cast helper
 */
static const struct dma_fence_ops amdgpu_fence_ops;
static const struct dma_fence_ops amdgpu_job_fence_ops;
static inline struct amdgpu_fence *to_amdgpu_fence(struct dma_fence *f)
{
	struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);

	if (__f->base.ops == &amdgpu_fence_ops)
	if (__f->base.ops == &amdgpu_fence_ops ||
	    __f->base.ops == &amdgpu_job_fence_ops)
		return __f;

	return NULL;
@@ -158,19 +160,18 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
	}

	seq = ++ring->fence_drv.sync_seq;
	if (job != NULL && job->job_run_counter) {
	if (job && job->job_run_counter) {
		/* reinit seq for resubmitted jobs */
		fence->seqno = seq;
	} else {
		if (job)
			dma_fence_init(fence, &amdgpu_job_fence_ops,
				       &ring->fence_drv.lock,
				       adev->fence_context + ring->idx, seq);
		else
			dma_fence_init(fence, &amdgpu_fence_ops,
				       &ring->fence_drv.lock,
				adev->fence_context + ring->idx,
				seq);
	}

	if (job != NULL) {
		/* mark this fence has a parent job */
		set_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &fence->flags);
				       adev->fence_context + ring->idx, seq);
	}

	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
@@ -620,6 +621,25 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
	}
}

/**
 * amdgpu_fence_driver_clear_job_fences - clear job embedded fences of ring
 *
 * @ring: fence of the ring to be cleared
 *
 */
void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring)
{
	int i;
	struct dma_fence *old, **ptr;

	for (i = 0; i <= ring->fence_drv.num_fences_mask; i++) {
		ptr = &ring->fence_drv.fences[i];
		old = rcu_dereference_protected(*ptr, 1);
		if (old && old->ops == &amdgpu_job_fence_ops)
			RCU_INIT_POINTER(*ptr, NULL);
	}
}

/**
 * amdgpu_fence_driver_force_completion - force signal latest fence of ring
 *
@@ -643,16 +663,14 @@ static const char *amdgpu_fence_get_driver_name(struct dma_fence *fence)

static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
{
	struct amdgpu_ring *ring;
	return (const char *)to_amdgpu_fence(f)->ring->name;
}

	if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
static const char *amdgpu_job_fence_get_timeline_name(struct dma_fence *f)
{
	struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);

		ring = to_amdgpu_ring(job->base.sched);
	} else {
		ring = to_amdgpu_fence(f)->ring;
	}
	return (const char *)ring->name;
	return (const char *)to_amdgpu_ring(job->base.sched)->name;
}

/**
@@ -665,18 +683,25 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
 */
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
	struct amdgpu_ring *ring;

	if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
		struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);
	if (!timer_pending(&to_amdgpu_fence(f)->ring->fence_drv.fallback_timer))
		amdgpu_fence_schedule_fallback(to_amdgpu_fence(f)->ring);

		ring = to_amdgpu_ring(job->base.sched);
	} else {
		ring = to_amdgpu_fence(f)->ring;
	return true;
}

	if (!timer_pending(&ring->fence_drv.fallback_timer))
		amdgpu_fence_schedule_fallback(ring);
/**
 * amdgpu_job_fence_enable_signaling - enable signalling on job fence
 * @f: fence
 *
 * This is the simliar function with amdgpu_fence_enable_signaling above, it
 * only handles the job embedded fence.
 */
static bool amdgpu_job_fence_enable_signaling(struct dma_fence *f)
{
	struct amdgpu_job *job = container_of(f, struct amdgpu_job, hw_fence);

	if (!timer_pending(&to_amdgpu_ring(job->base.sched)->fence_drv.fallback_timer))
		amdgpu_fence_schedule_fallback(to_amdgpu_ring(job->base.sched));

	return true;
}
@@ -692,19 +717,23 @@ static void amdgpu_fence_free(struct rcu_head *rcu)
{
	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);

	if (test_bit(AMDGPU_FENCE_FLAG_EMBED_IN_JOB_BIT, &f->flags)) {
	/* free job if fence has a parent job */
		struct amdgpu_job *job;

		job = container_of(f, struct amdgpu_job, hw_fence);
		kfree(job);
	} else {
	/* free fence_slab if it's separated fence*/
		struct amdgpu_fence *fence;

		fence = to_amdgpu_fence(f);
		kmem_cache_free(amdgpu_fence_slab, fence);
	kmem_cache_free(amdgpu_fence_slab, to_amdgpu_fence(f));
}

/**
 * amdgpu_job_fence_free - free up the job with embedded fence
 *
 * @rcu: RCU callback head
 *
 * Free up the job with embedded fence after the RCU grace period.
 */
static void amdgpu_job_fence_free(struct rcu_head *rcu)
{
	struct dma_fence *f = container_of(rcu, struct dma_fence, rcu);

	/* free job if fence has a parent job */
	kfree(container_of(f, struct amdgpu_job, hw_fence));
}

/**
@@ -720,6 +749,19 @@ static void amdgpu_fence_release(struct dma_fence *f)
	call_rcu(&f->rcu, amdgpu_fence_free);
}

/**
 * amdgpu_job_fence_release - callback that job embedded fence can be freed
 *
 * @f: fence
 *
 * This is the simliar function with amdgpu_fence_release above, it
 * only handles the job embedded fence.
 */
static void amdgpu_job_fence_release(struct dma_fence *f)
{
	call_rcu(&f->rcu, amdgpu_job_fence_free);
}

static const struct dma_fence_ops amdgpu_fence_ops = {
	.get_driver_name = amdgpu_fence_get_driver_name,
	.get_timeline_name = amdgpu_fence_get_timeline_name,
@@ -727,6 +769,12 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
	.release = amdgpu_fence_release,
};

static const struct dma_fence_ops amdgpu_job_fence_ops = {
	.get_driver_name = amdgpu_fence_get_driver_name,
	.get_timeline_name = amdgpu_job_fence_get_timeline_name,
	.enable_signaling = amdgpu_job_fence_enable_signaling,
	.release = amdgpu_job_fence_release,
};

/*
 * Fence debugfs
Loading