Commit aa44beb5 authored by Jane Jian's avatar Jane Jian Committed by Alex Deucher
Browse files

drm/amdgpu/vcn: Add sriov VCN v4_0 unified queue support



Enable unified queue support for sriov, abandon all previous
multi-queue settings

Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarJane Jian <Jane.Jian@amd.com>
Reviewed-by: default avatarRuijing Dong <ruijing.dong@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 60e9c7ee
Loading
Loading
Loading
Loading
+2 −3
Original line number Diff line number Diff line
@@ -1908,10 +1908,9 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev)
		case IP_VERSION(4, 0, 0):
		case IP_VERSION(4, 0, 2):
		case IP_VERSION(4, 0, 4):
			if (!amdgpu_sriov_vf(adev)) {
			amdgpu_device_ip_block_add(adev, &vcn_v4_0_ip_block);
			if (!amdgpu_sriov_vf(adev))
				amdgpu_device_ip_block_add(adev, &jpeg_v4_0_ip_block);
			}
			break;
		default:
			dev_err(adev->dev,
+13 −0
Original line number Diff line number Diff line
@@ -161,6 +161,7 @@
#define AMDGPU_VCN_SW_RING_FLAG		(1 << 9)
#define AMDGPU_VCN_FW_LOGGING_FLAG	(1 << 10)
#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11)
#define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 12)

#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER	0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER		0x00000001
@@ -317,12 +318,24 @@ struct amdgpu_fw_shared {
	struct amdgpu_fw_shared_smu_interface_info smu_interface_info;
};

struct amdgpu_fw_shared_rb_setup {
	uint32_t is_rb_enabled_flags;
	uint32_t rb_addr_lo;
	uint32_t rb_addr_hi;
	uint32_t  rb_size;
	uint32_t  rb4_addr_lo;
	uint32_t  rb4_addr_hi;
	uint32_t  rb4_size;
	uint32_t  reserved[6];
};

struct amdgpu_vcn4_fw_shared {
	uint32_t present_flag_0;
	uint8_t pad[12];
	struct amdgpu_fw_shared_unified_queue_struct sq;
	uint8_t pad1[8];
	struct amdgpu_fw_shared_fw_logging fw_log;
	struct amdgpu_fw_shared_rb_setup rb_setup;
};

struct amdgpu_vcn_fwlog {
+285 −12
Original line number Diff line number Diff line
@@ -30,6 +30,7 @@
#include "soc15d.h"
#include "soc15_hw_ip.h"
#include "vcn_v2_0.h"
#include "mmsch_v4_0.h"

#include "vcn/vcn_4_0_0_offset.h"
#include "vcn/vcn_4_0_0_sh_mask.h"
@@ -45,6 +46,8 @@
#define VCN_VID_SOC_ADDRESS_2_0							0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0						0x48300

#define VCN_HARVEST_MMSCH								0

#define RDECODE_MSG_CREATE							0x00000000
#define RDECODE_MESSAGE_CREATE							0x00000001

@@ -53,12 +56,14 @@ static int amdgpu_ih_clientid_vcns[] = {
	SOC15_IH_CLIENTID_VCN1
};

static int vcn_v4_0_start_sriov(struct amdgpu_device *adev);
static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev);
static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v4_0_set_powergating_state(void *handle,
        enum amd_powergating_state state);
static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
        int inst_idx, struct dpg_pause_state *new_state);
static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);

/**
 * vcn_v4_0_early_init - set function pointers
@@ -71,6 +76,9 @@ static int vcn_v4_0_early_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	if (amdgpu_sriov_vf(adev))
		adev->vcn.harvest_config = VCN_HARVEST_MMSCH;

	/* re-use enc ring as unified ring */
	adev->vcn.num_enc_rings = 1;

@@ -92,6 +100,7 @@ static int vcn_v4_0_sw_init(void *handle)
	struct amdgpu_ring *ring;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	int i, r;
	int vcn_doorbell_index = 0;

	r = amdgpu_vcn_sw_init(adev);
	if (r)
@@ -103,6 +112,12 @@ static int vcn_v4_0_sw_init(void *handle)
	if (r)
		return r;

	if (amdgpu_sriov_vf(adev)) {
		vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET;
		/* get DWORD offset */
		vcn_doorbell_index = vcn_doorbell_index << 1;
	}

	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
		volatile struct amdgpu_vcn4_fw_shared *fw_shared;

@@ -119,6 +134,9 @@ static int vcn_v4_0_sw_init(void *handle)

		ring = &adev->vcn.inst[i].ring_enc[0];
		ring->use_doorbell = true;
		if (amdgpu_sriov_vf(adev))
			ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1;
		else
			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;

		sprintf(ring->name, "vcn_unified_%d", i);
@@ -132,10 +150,19 @@ static int vcn_v4_0_sw_init(void *handle)
		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE);
		fw_shared->sq.is_enabled = 1;

		if (amdgpu_sriov_vf(adev))
			fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);

		if (amdgpu_vcnfw_log)
			amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
	}

	if (amdgpu_sriov_vf(adev)) {
		r = amdgpu_virt_alloc_mm_table(adev);
		if (r)
			return r;
	}

	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
		adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode;

@@ -169,6 +196,9 @@ static int vcn_v4_0_sw_fini(void *handle)
		drm_dev_exit(idx);
	}

	if (amdgpu_sriov_vf(adev))
		amdgpu_virt_free_mm_table(adev);

	r = amdgpu_vcn_suspend(adev);
	if (r)
		return r;
@@ -191,6 +221,28 @@ static int vcn_v4_0_hw_init(void *handle)
	struct amdgpu_ring *ring;
	int i, r;

	if (amdgpu_sriov_vf(adev)) {
		r = vcn_v4_0_start_sriov(adev);
		if (r)
			goto done;

		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
			if (adev->vcn.harvest_config & (1 << i))
				continue;

			ring = &adev->vcn.inst[i].ring_enc[0];
			if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
				ring->sched.ready = false;
				ring->no_scheduler = true;
				dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
			} else {
				ring->wptr = 0;
				ring->wptr_old = 0;
				vcn_v4_0_unified_ring_set_wptr(ring);
				ring->sched.ready = true;
			}
		}
	} else {
		for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
			if (adev->vcn.harvest_config & (1 << i))
				continue;
@@ -203,6 +255,8 @@ static int vcn_v4_0_hw_init(void *handle)
			r = amdgpu_ring_test_helper(ring);
			if (r)
				goto done;

		}
	}

done:
@@ -230,7 +284,7 @@ static int vcn_v4_0_hw_fini(void *handle)
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		if (adev->vcn.harvest_config & (1 << i))
			continue;

		if (!amdgpu_sriov_vf(adev)) {
			if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
                        (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
                                RREG32_SOC15(VCN, i, regUVD_STATUS))) {
@@ -238,6 +292,8 @@ static int vcn_v4_0_hw_fini(void *handle)
			}
		}

	}

	return 0;
}

@@ -1107,6 +1163,214 @@ static int vcn_v4_0_start(struct amdgpu_device *adev)
	return 0;
}

static int vcn_v4_0_start_sriov(struct amdgpu_device *adev)
{
	int i;
	struct amdgpu_ring *ring_enc;
	uint64_t cache_addr;
	uint64_t rb_enc_addr;
	uint64_t ctx_addr;
	uint32_t param, resp, expected;
	uint32_t offset, cache_size;
	uint32_t tmp, timeout;

	struct amdgpu_mm_table *table = &adev->virt.mm_table;
	uint32_t *table_loc;
	uint32_t table_size;
	uint32_t size, size_dw;
	uint32_t init_status;
	uint32_t enabled_vcn;

	struct mmsch_v4_0_cmd_direct_write
		direct_wt = { {0} };
	struct mmsch_v4_0_cmd_direct_read_modify_write
		direct_rd_mod_wt = { {0} };
	struct mmsch_v4_0_cmd_end end = { {0} };
	struct mmsch_v4_0_init_header header;

	volatile struct amdgpu_vcn4_fw_shared *fw_shared;
	volatile struct amdgpu_fw_shared_rb_setup *rb_setup;

	direct_wt.cmd_header.command_type =
		MMSCH_COMMAND__DIRECT_REG_WRITE;
	direct_rd_mod_wt.cmd_header.command_type =
		MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
	end.cmd_header.command_type =
		MMSCH_COMMAND__END;

	header.version = MMSCH_VERSION;
	header.total_size = sizeof(struct mmsch_v4_0_init_header) >> 2;
	for (i = 0; i < AMDGPU_MAX_VCN_INSTANCES; i++) {
		header.inst[i].init_status = 0;
		header.inst[i].table_offset = 0;
		header.inst[i].table_size = 0;
	}

	table_loc = (uint32_t *)table->cpu_addr;
	table_loc += header.total_size;
	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
		if (adev->vcn.harvest_config & (1 << i))
			continue;

		table_size = 0;

		MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_STATUS),
			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);

		cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);

		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
			offset = 0;
			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
				regUVD_VCPU_CACHE_OFFSET0),
				0);
		} else {
			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
				regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
				lower_32_bits(adev->vcn.inst[i].gpu_addr));
			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
				regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
				upper_32_bits(adev->vcn.inst[i].gpu_addr));
			offset = cache_size;
			MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
				regUVD_VCPU_CACHE_OFFSET0),
				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
		}

		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_VCPU_CACHE_SIZE0),
			cache_size);

		cache_addr = adev->vcn.inst[i].gpu_addr + offset;
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
			lower_32_bits(cache_addr));
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
			upper_32_bits(cache_addr));
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_VCPU_CACHE_OFFSET1),
			0);
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_VCPU_CACHE_SIZE1),
			AMDGPU_VCN_STACK_SIZE);

		cache_addr = adev->vcn.inst[i].gpu_addr + offset +
			AMDGPU_VCN_STACK_SIZE;
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
			lower_32_bits(cache_addr));
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
			upper_32_bits(cache_addr));
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_VCPU_CACHE_OFFSET2),
			0);
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_VCPU_CACHE_SIZE2),
			AMDGPU_VCN_CONTEXT_SIZE);

		fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
		rb_setup = &fw_shared->rb_setup;

		ring_enc = &adev->vcn.inst[i].ring_enc[0];
		ring_enc->wptr = 0;
		rb_enc_addr = ring_enc->gpu_addr;

		rb_setup->is_rb_enabled_flags |= RB_ENABLED;
		rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr);
		rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr);
		rb_setup->rb_size = ring_enc->ring_size / 4;
		fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG);

		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
			lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
			upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr));
		MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
			regUVD_VCPU_NONCACHE_SIZE0),
			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)));

		/* add end packet */
		MMSCH_V4_0_INSERT_END();

		/* refine header */
		header.inst[i].init_status = 0;
		header.inst[i].table_offset = header.total_size;
		header.inst[i].table_size = table_size;
		header.total_size += table_size;
	}

	/* Update init table header in memory */
	size = sizeof(struct mmsch_v4_0_init_header);
	table_loc = (uint32_t *)table->cpu_addr;
	memcpy((void *)table_loc, &header, size);

	/* message MMSCH (in VCN[0]) to initialize this client
	 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
	 * of memory descriptor location
	 */
	ctx_addr = table->gpu_addr;
	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));

	/* 2, update vmid of descriptor */
	tmp = RREG32_SOC15(VCN, 0, regMMSCH_VF_VMID);
	tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
	/* use domain0 for MM scheduler */
	tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
	WREG32_SOC15(VCN, 0, regMMSCH_VF_VMID, tmp);

	/* 3, notify mmsch about the size of this descriptor */
	size = header.total_size;
	WREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE, size);

	/* 4, set resp to zero */
	WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP, 0);

	/* 5, kick off the initialization and wait until
	 * MMSCH_VF_MAILBOX_RESP becomes non-zero
	 */
	param = 0x00000001;
	WREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_HOST, param);
	tmp = 0;
	timeout = 1000;
	resp = 0;
	expected = MMSCH_VF_MAILBOX_RESP__OK;
	while (resp != expected) {
		resp = RREG32_SOC15(VCN, 0, regMMSCH_VF_MAILBOX_RESP);
		if (resp != 0)
			break;

		udelay(10);
		tmp = tmp + 10;
		if (tmp >= timeout) {
			DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
				" waiting for regMMSCH_VF_MAILBOX_RESP "\
				"(expected=0x%08x, readback=0x%08x)\n",
				tmp, expected, resp);
			return -EBUSY;
		}
	}
	enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0;
	init_status = ((struct mmsch_v4_0_init_header *)(table_loc))->inst[enabled_vcn].init_status;
	if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE
	&& init_status != MMSCH_VF_ENGINE_STATUS__PASS)
		DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\
			"status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status);

	return 0;
}

/**
 * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode
 *
@@ -1596,6 +1860,15 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	int ret;

	/* for SRIOV, guest should not control VCN Power-gating
	 * MMSCH FW should control Power-gating and clock-gating
	 * guest should avoid touching CGC and PG
	 */
	if (amdgpu_sriov_vf(adev)) {
		adev->vcn.cur_state = AMD_PG_STATE_UNGATE;
		return 0;
	}

	if(state == adev->vcn.cur_state)
		return 0;