Commit 0c97a19a authored by Jiadong.Zhu's avatar Jiadong.Zhu Committed by Alex Deucher
Browse files

drm/amdgpu: Add software ring callbacks for gfx9 (v8)



Set ring functions with software ring callbacks on gfx9.

The software ring could be tested by debugfs_test_ib case.

v2: Set sw_ring 2 to enable software ring by default.
v3: Remove the parameter for software ring enablement.
v4: Use amdgpu_ring_init/fini for software rings.
v5: Update for code format. Fix conflict.
v6: Remove unnecessary checks and enable software ring on gfx9 by default.
v7: Use static array for software ring names and priorities.
v8: Stop creating software rings if no gfx ring existed.

Cc: Christian Koenig <Christian.Koenig@amd.com>
Cc: Luben Tuikov <Luben.Tuikov@amd.com>
Cc: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
Cc: Michel Dänzer <michel@daenzer.net>
Cc: Likun Gao <Likun.Gao@amd.com>
Signed-off-by: default avatarJiadong.Zhu <Jiadong.Zhu@amd.com>
Acked-by: default avatarLuben Tuikov <luben.tuikov@amd.com>
Acked-by: default avatarHuang Rui <ray.huang@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ded946f3
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -354,6 +354,7 @@ struct amdgpu_gfx {

	bool				is_poweron;

	struct amdgpu_ring		sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
	struct amdgpu_ring_mux          muxer;
};

+1 −0
Original line number Diff line number Diff line
@@ -39,6 +39,7 @@ struct amdgpu_vm;
#define AMDGPU_MAX_RINGS		28
#define AMDGPU_MAX_HWIP_RINGS		8
#define AMDGPU_MAX_GFX_RINGS		2
#define AMDGPU_MAX_SW_GFX_RINGS         2
#define AMDGPU_MAX_COMPUTE_RINGS	8
#define AMDGPU_MAX_VCE_RINGS		3
#define AMDGPU_MAX_UVD_ENC_RINGS	2
+20 −0
Original line number Diff line number Diff line
@@ -29,6 +29,14 @@

#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)

static const struct ring_info {
	unsigned int hw_pio;
	const char *ring_name;
} sw_ring_info[] = {
	{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
	{ AMDGPU_RING_PRIO_2, "gfx_high"},
};

int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
			 unsigned int entry_size)
{
@@ -219,3 +227,15 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
	WARN_ON(!ring->is_sw_ring);
}

const char *amdgpu_sw_ring_name(int idx)
{
	return idx < ARRAY_SIZE(sw_ring_info) ?
		sw_ring_info[idx].ring_name : NULL;
}

unsigned int amdgpu_sw_ring_priority(int idx)
{
	return idx < ARRAY_SIZE(sw_ring_info) ?
		sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
}
+2 −0
Original line number Diff line number Diff line
@@ -73,4 +73,6 @@ void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);

const char *amdgpu_sw_ring_name(int idx);
unsigned int amdgpu_sw_ring_priority(int idx);
#endif
+112 −1
Original line number Diff line number Diff line
@@ -47,6 +47,7 @@

#include "amdgpu_ras.h"

#include "amdgpu_ring_mux.h"
#include "gfx_v9_4.h"
#include "gfx_v9_0.h"
#include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
#include "asic_reg/gc/gc_9_0_default.h"

#define GFX9_NUM_GFX_RINGS     1
#define GFX9_NUM_SW_GFX_RINGS  2
#define GFX9_MEC_HPD_SIZE 4096
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -2103,6 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle)
	struct amdgpu_ring *ring;
	struct amdgpu_kiq *kiq;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	unsigned int hw_prio;

	switch (adev->ip_versions[GC_HWIP][0]) {
	case IP_VERSION(9, 0, 1):
@@ -2186,6 +2189,9 @@ static int gfx_v9_0_sw_init(void *handle)
			sprintf(ring->name, "gfx_%d", i);
		ring->use_doorbell = true;
		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;

		/* disable scheduler on the real ring */
		ring->no_scheduler = true;
		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
				     AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -2193,6 +2199,41 @@ static int gfx_v9_0_sw_init(void *handle)
			return r;
	}

	/* set up the software rings */
	if (adev->gfx.num_gfx_rings) {
		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
			ring = &adev->gfx.sw_gfx_ring[i];
			ring->ring_obj = NULL;
			sprintf(ring->name, amdgpu_sw_ring_name(i));
			ring->use_doorbell = true;
			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
			ring->is_sw_ring = true;
			hw_prio = amdgpu_sw_ring_priority(i);
			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
					     NULL);
			if (r)
				return r;
			ring->wptr = 0;
		}

		/* init the muxer and add software rings */
		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
					 GFX9_NUM_SW_GFX_RINGS);
		if (r) {
			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
			return r;
		}
		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
							&adev->gfx.sw_gfx_ring[i]);
			if (r) {
				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
				return r;
			}
		}
	}

	/* set up the compute queues - allocate horizontally across pipes */
	ring_id = 0;
	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@@ -2243,6 +2284,12 @@ static int gfx_v9_0_sw_fini(void *handle)
	int i;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	if (adev->gfx.num_gfx_rings) {
		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
		amdgpu_ring_mux_fini(&adev->gfx.muxer);
	}

	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
	for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -5712,7 +5759,11 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,

	switch (me_id) {
	case 0:
		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
		/* Fence signals are handled on the software rings*/
		if (adev->gfx.num_gfx_rings) {
			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
		}
		break;
	case 1:
	case 2:
@@ -6717,6 +6768,61 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
};

static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
	.type = AMDGPU_RING_TYPE_GFX,
	.align_mask = 0xff,
	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
	.support_64bit_ptrs = true,
	.secure_submission_supported = true,
	.vmhub = AMDGPU_GFXHUB_0,
	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
	.emit_frame_size = /* totally 242 maximum if 16 IBs */
		5 +  /* COND_EXEC */
		7 +  /* PIPELINE_SYNC */
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
		2 + /* VM_FLUSH */
		8 +  /* FENCE for VM_FLUSH */
		20 + /* GDS switch */
		4 + /* double SWITCH_BUFFER,
		     * the first COND_EXEC jump to the place just
		     * prior to this double SWITCH_BUFFER
		     */
		5 + /* COND_EXEC */
		7 +	 /*	HDP_flush */
		4 +	 /*	VGT_flush */
		14 + /*	CE_META */
		31 + /*	DE_META */
		3 + /* CNTX_CTRL */
		5 + /* HDP_INVL */
		8 + 8 + /* FENCE x2 */
		2 + /* SWITCH_BUFFER */
		7, /* gfx_v9_0_emit_mem_sync */
	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
	.emit_fence = gfx_v9_0_ring_emit_fence,
	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
	.test_ring = gfx_v9_0_ring_test_ring,
	.test_ib = gfx_v9_0_ring_test_ib,
	.insert_nop = amdgpu_sw_ring_insert_nop,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.emit_switch_buffer = gfx_v9_ring_emit_sb,
	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
	.emit_wreg = gfx_v9_0_ring_emit_wreg,
	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
	.soft_recovery = gfx_v9_0_ring_soft_recovery,
	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
};

static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
	.type = AMDGPU_RING_TYPE_COMPUTE,
	.align_mask = 0xff,
@@ -6794,6 +6900,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;

	if (adev->gfx.num_gfx_rings) {
		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
	}

	for (i = 0; i < adev->gfx.num_compute_rings; i++)
		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
}