Commit bf36b52e authored by Andrey Grodzovsky's avatar Andrey Grodzovsky Committed by Alex Deucher
Browse files

drm/amdgpu: Avoid accessing HW when suspending SW state



At this point the ASIC is already post reset by the HW/PSP
so the HW not in proper state to be configured for suspension,
some blocks might be even gated and so best is to avoid touching it.

v2: Rename in_dpc to more meaningful name

Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c9a6b82f
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -989,6 +989,7 @@ struct amdgpu_device {
	atomic_t			throttling_logging_enabled;
	struct ratelimit_state		throttling_logging_rs;
	uint32_t			ras_features;
	bool                            in_pci_err_recovery;
};

static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
+38 −0
Original line number Diff line number Diff line
@@ -319,6 +319,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
{
	uint32_t ret;

	if (adev->in_pci_err_recovery)
		return 0;

	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) &&
	    down_read_trylock(&adev->reset_sem)) {
		ret = amdgpu_kiq_rreg(adev, reg);
@@ -356,6 +359,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 * Returns the 8 bit value from the offset specified.
 */
uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
	if (adev->in_pci_err_recovery)
		return 0;

	if (offset < adev->rmmio_size)
		return (readb(adev->rmmio + offset));
	BUG();
@@ -377,6 +383,9 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
 * Writes the value specified to the offset specified.
 */
void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
	if (adev->in_pci_err_recovery)
		return;

	if (offset < adev->rmmio_size)
		writeb(value, adev->rmmio + offset);
	else
@@ -387,6 +396,9 @@ static inline void amdgpu_mm_wreg_mmio(struct amdgpu_device *adev,
				       uint32_t reg, uint32_t v,
				       uint32_t acc_flags)
{
	if (adev->in_pci_err_recovery)
		return;

	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);

	if ((reg * 4) < adev->rmmio_size)
@@ -414,6 +426,9 @@ static inline void amdgpu_mm_wreg_mmio(struct amdgpu_device *adev,
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
		    uint32_t acc_flags)
{
	if (adev->in_pci_err_recovery)
		return;

	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) &&
	    down_read_trylock(&adev->reset_sem)) {
		amdgpu_kiq_wreg(adev, reg, v);
@@ -432,6 +447,9 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
		    uint32_t acc_flags)
{
	if (adev->in_pci_err_recovery)
		return;

	if (amdgpu_sriov_fullaccess(adev) &&
		adev->gfx.rlc.funcs &&
		adev->gfx.rlc.funcs->is_rlcg_access_range) {
@@ -453,6 +471,9 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t
 */
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
{
	if (adev->in_pci_err_recovery)
		return 0;

	if ((reg * 4) < adev->rio_mem_size)
		return ioread32(adev->rio_mem + (reg * 4));
	else {
@@ -472,6 +493,9 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
 */
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
	if (adev->in_pci_err_recovery)
		return;

	if ((reg * 4) < adev->rio_mem_size)
		iowrite32(v, adev->rio_mem + (reg * 4));
	else {
@@ -491,6 +515,9 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 */
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
{
	if (adev->in_pci_err_recovery)
		return 0;

	if (index < adev->doorbell.num_doorbells) {
		return readl(adev->doorbell.ptr + index);
	} else {
@@ -511,6 +538,9 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
 */
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
{
	if (adev->in_pci_err_recovery)
		return;

	if (index < adev->doorbell.num_doorbells) {
		writel(v, adev->doorbell.ptr + index);
	} else {
@@ -529,6 +559,9 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
 */
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
{
	if (adev->in_pci_err_recovery)
		return 0;

	if (index < adev->doorbell.num_doorbells) {
		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
	} else {
@@ -549,6 +582,9 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
 */
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
{
	if (adev->in_pci_err_recovery)
		return;

	if (index < adev->doorbell.num_doorbells) {
		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
	} else {
@@ -4778,7 +4814,9 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)

	pci_restore_state(pdev);

	adev->in_pci_err_recovery = true;
	r = amdgpu_device_ip_suspend(adev);
	adev->in_pci_err_recovery = false;
	if (r)
		goto out;

+6 −0
Original line number Diff line number Diff line
@@ -693,6 +693,9 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
	struct amdgpu_ring *ring = &kiq->ring;

	if (adev->in_pci_err_recovery)
		return 0;

	BUG_ON(!ring->funcs->emit_rreg);

	spin_lock_irqsave(&kiq->ring_lock, flags);
@@ -757,6 +760,9 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)

	BUG_ON(!ring->funcs->emit_wreg);

	if (adev->in_pci_err_recovery)
		return;

	spin_lock_irqsave(&kiq->ring_lock, flags);
	amdgpu_ring_alloc(ring, 32);
	amdgpu_ring_emit_wreg(ring, reg, v);
+6 −0
Original line number Diff line number Diff line
@@ -219,6 +219,9 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
	int i;
	struct amdgpu_device *adev = psp->adev;

	if (psp->adev->in_pci_err_recovery)
		return 0;

	for (i = 0; i < adev->usec_timeout; i++) {
		val = RREG32(reg_index);
		if (check_changed) {
@@ -245,6 +248,9 @@ psp_cmd_submit_buf(struct psp_context *psp,
	bool ras_intr = false;
	bool skip_unsupport = false;

	if (psp->adev->in_pci_err_recovery)
		return 0;

	mutex_lock(&psp->mutex);

	memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE);
+11 −7
Original line number Diff line number Diff line
@@ -6980,6 +6980,8 @@ static int gfx_v10_0_hw_fini(void *handle)

	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);

	if (!adev->in_pci_err_recovery) {
#ifndef BRING_UP_DEBUG
		if (amdgpu_async_gfx_ring) {
			r = gfx_v10_0_kiq_disable_kgq(adev);
@@ -6989,6 +6991,8 @@ static int gfx_v10_0_hw_fini(void *handle)
#endif
		if (amdgpu_gfx_disable_kcq(adev))
			DRM_ERROR("KCQ disable failed\n");
	}

	if (amdgpu_sriov_vf(adev)) {
		gfx_v10_0_cp_gfx_enable(adev, false);
		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
Loading