Commit 52f2e83e authored by Bhawanpreet Lakha's avatar Bhawanpreet Lakha Committed by Alex Deucher
Browse files

drm/amdgpu/display: add MALL support (v2)



Enable Memory Access at Last Level (MALL) feature for display.

v2: squash in 64 bit division fixes

Signed-off-by: default avatarBhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4005809b
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@
#define DALSMC_MSG_GetDcModeMaxDpmFreq            0xC
#define DALSMC_MSG_SetMinDeepSleepDcefclk         0xD
#define DALSMC_MSG_NumOfDisplays                  0xE
#define DALSMC_MSG_SetDisplayRefreshFromMall      0xF
#define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10
#define DALSMC_MSG_BacoAudioD3PME                 0x11
#define DALSMC_Message_Count                      0x12
+10 −0
Original line number Diff line number Diff line
@@ -145,6 +145,16 @@ static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;

	/* Set D - MALL - SR enter and exit times adjusted for MALL */
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
}

void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
+9 −0
Original line number Diff line number Diff line
@@ -297,6 +297,15 @@ void dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t nu
			DALSMC_MSG_NumOfDisplays, num_displays, NULL);
}

void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale)
{
	/* bits 8:7 for cache timer scale, bits 6:1 for cache timer delay, bit 0 = 1 for enable, = 0 for disable */
	uint32_t param = (cache_timer_scale << 7) | (cache_timer_delay << 1) | (enable ? 1 : 0);

	dcn30_smu_send_msg_with_param(clk_mgr,
			DALSMC_MSG_SetDisplayRefreshFromMall, param, NULL);
}

void dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable)
{
	smu_print("SMU Set external client df cstate allow: enable = %d\n", enable);
+2 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ typedef enum {
typedef enum {
	WATERMARKS_CLOCK_RANGE = 0,
	WATERMARKS_DUMMY_PSTATE,
	WATERMARKS_MALL,
	WATERMARKS_COUNT,
} WATERMARKS_FLAGS_e;

@@ -102,6 +103,7 @@ unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, P
unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk);
void         dcn30_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
void         dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
void         dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale);
void         dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable);
void         dcn30_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);

+67 −0
Original line number Diff line number Diff line
@@ -696,6 +696,10 @@ void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx)

bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
{
	union dmub_rb_cmd cmd;
	unsigned int surface_size, refresh_hz, denom;
	uint32_t tmr_delay = 0, tmr_scale = 0;

	if (!dc->ctx->dmub_srv)
		return false;

@@ -710,12 +714,75 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
					/* Fail eligibility on a visible stream */
					break;
			}

			// TODO: remove hard code size
			if (surface_size < 128 * 1024 * 1024) {
				refresh_hz = div_u64((unsigned long long) dc->current_state->streams[0]->timing.pix_clk_100hz *
						     100LL,
						     (dc->current_state->streams[0]->timing.v_total *
						      dc->current_state->streams[0]->timing.h_total));

				/*
				 * Delay_Us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
				 * Delay_Us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
				 * (Delay_Us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
				 * MallFrameCacheTmrDly = ((Delay_Us / 65.28) / 2^MallFrameCacheTmrScale) - 64
				 *                      = (1000000 / refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
				 *                      = 1000000 / (refresh * 65.28 * 2^MallFrameCacheTmrScale) - 64
				 *                      = (1000000 * 100) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
				 *
				 * need to round up the result of the division before the subtraction
				 */
				denom = refresh_hz * 6528;
				tmr_delay = div_u64((100000000LL + denom - 1), denom) - 64LL;

				/* scale should be increased until it fits into 6 bits */
				while (tmr_delay & ~0x3F) {
					tmr_scale++;

					if (tmr_scale > 3) {
						/* The delay exceeds the range of the hystersis timer */
						ASSERT(false);
						return false;
					}

					denom *= 2;
					tmr_delay = div_u64((100000000LL + denom - 1), denom) - 64LL;
				}

				/* Enable MALL */
				memset(&cmd, 0, sizeof(cmd));
				cmd.mall.header.type = DMUB_CMD__MALL;
				cmd.mall.header.sub_type =
					DMUB_CMD__MALL_ACTION_ALLOW;
				cmd.mall.header.payload_bytes =
					sizeof(cmd.mall) -
					sizeof(cmd.mall.header);
				cmd.mall.tmr_delay = tmr_delay;
				cmd.mall.tmr_scale = tmr_scale;

				dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
				dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);

				return true;
			}
		}

		/* No applicable optimizations */
		return false;
	}

	/* Disable MALL */
	memset(&cmd, 0, sizeof(cmd));
	cmd.mall.header.type = DMUB_CMD__MALL;
	cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_DISALLOW;
	cmd.mall.header.payload_bytes =
		sizeof(cmd.mall) - sizeof(cmd.mall.header);

	dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
	dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
	dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);

	return true;
}

Loading