Commit f0eee9ae authored by Ben Skeggs's avatar Ben Skeggs
Browse files

drm/nouveau: allow accelerated buffer moves even when gr isn't present



There's no need to avoid using copy engines if gr init fails for some
reason (usually missing FW, or incomplete bring-up).

It's not terribly useful for an end-user, but it'll slightly speed up
suspend/resume when saving fb contents, and allow for host/ce code to
be validated.

Signed-off-by: default avatarBen Skeggs <bskeggs@redhat.com>
parent 0f9976dd
Loading
Loading
Loading
Loading
+128 −85
Original line number Diff line number Diff line
@@ -282,18 +282,134 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
}

static void
nouveau_accel_fini(struct nouveau_drm *drm)
nouveau_accel_ce_fini(struct nouveau_drm *drm)
{
	nouveau_channel_idle(drm->cechan);
	nvif_object_fini(&drm->ttm.copy);
	nouveau_channel_del(&drm->cechan);
}

static void
nouveau_accel_ce_init(struct nouveau_drm *drm)
{
	struct nvif_device *device = &drm->client.device;
	int ret = 0;

	/* Allocate channel that has access to a (preferably async) copy
	 * engine, to use for TTM buffer moves.
	 */
	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
		ret = nouveau_channel_new(drm, device,
					  nvif_fifo_runlist_ce(device), 0,
					  true, &drm->cechan);
	} else
	if (device->info.chipset >= 0xa3 &&
	    device->info.chipset != 0xaa &&
	    device->info.chipset != 0xac) {
		/* Prior to Kepler, there's only a single runlist, so all
		 * engines can be accessed from any channel.
		 *
		 * We still want to use a separate channel though.
		 */
		ret = nouveau_channel_new(drm, device, NvDmaFB, NvDmaTT, false,
					  &drm->cechan);
	}

	if (ret)
		NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
}

static void
nouveau_accel_gr_fini(struct nouveau_drm *drm)
{
	nouveau_channel_idle(drm->channel);
	nvif_object_fini(&drm->ntfy);
	nvkm_gpuobj_del(&drm->notify);
	nvif_object_fini(&drm->nvsw);
	nouveau_channel_del(&drm->channel);
}

	nouveau_channel_idle(drm->cechan);
	nvif_object_fini(&drm->ttm.copy);
	nouveau_channel_del(&drm->cechan);
static void
nouveau_accel_gr_init(struct nouveau_drm *drm)
{
	struct nvif_device *device = &drm->client.device;
	u32 arg0, arg1;
	int ret;

	/* Allocate channel that has access to the graphics engine. */
	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
		arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR);
		arg1 = 1;
	} else {
		arg0 = NvDmaFB;
		arg1 = NvDmaTT;
	}

	ret = nouveau_channel_new(drm, device, arg0, arg1, false,
				  &drm->channel);
	if (ret) {
		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
		nouveau_accel_gr_fini(drm);
		return;
	}

	/* A SW class is used on pre-NV50 HW to assist with handling the
	 * synchronisation of page flips, as well as to implement fences
	 * on TNT/TNT2 HW that lacks any kind of support in host.
	 */
	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
		ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW,
				       nouveau_abi16_swclass(drm), NULL, 0,
				       &drm->nvsw);
		if (ret == 0) {
			ret = RING_SPACE(drm->channel, 2);
			if (ret == 0) {
				BEGIN_NV04(drm->channel, NvSubSw, 0, 1);
				OUT_RING  (drm->channel, drm->nvsw.handle);
			}
		}

		if (ret) {
			NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
			nouveau_accel_gr_fini(drm);
			return;
		}
	}

	/* NvMemoryToMemoryFormat requires a notifier ctxdma for some reason,
	 * even if notification is never requested, so, allocate a ctxdma on
	 * any GPU where it's possible we'll end up using M2MF for BO moves.
	 */
	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
		ret = nvkm_gpuobj_new(nvxx_device(device), 32, 0, false, NULL,
				      &drm->notify);
		if (ret) {
			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
			nouveau_accel_gr_fini(drm);
			return;
		}

		ret = nvif_object_init(&drm->channel->user, NvNotify0,
				       NV_DMA_IN_MEMORY,
				       &(struct nv_dma_v0) {
						.target = NV_DMA_V0_TARGET_VRAM,
						.access = NV_DMA_V0_ACCESS_RDWR,
						.start = drm->notify->addr,
						.limit = drm->notify->addr + 31
				       }, sizeof(struct nv_dma_v0),
				       &drm->ntfy);
		if (ret) {
			nouveau_accel_gr_fini(drm);
			return;
		}
	}
}

static void
nouveau_accel_fini(struct nouveau_drm *drm)
{
	nouveau_accel_ce_fini(drm);
	nouveau_accel_gr_fini(drm);
	if (drm->fence)
		nouveau_fence(drm)->dtor(drm);
}
@@ -303,23 +419,16 @@ nouveau_accel_init(struct nouveau_drm *drm)
{
	struct nvif_device *device = &drm->client.device;
	struct nvif_sclass *sclass;
	u32 arg0, arg1;
	int ret, i, n;

	if (nouveau_noaccel)
		return;

	/* Initialise global support for channels, and synchronisation. */
	ret = nouveau_channels_init(drm);
	if (ret)
		return;

	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) {
		ret = nvif_user_init(device);
		if (ret)
			return;
	}

	/* initialise synchronisation routines */
	/*XXX: this is crap, but the fence/channel stuff is a little
	 *     backwards in some places.  this will be fixed.
	 */
@@ -366,84 +475,18 @@ nouveau_accel_init(struct nouveau_drm *drm)
		return;
	}

	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
		ret = nouveau_channel_new(drm, &drm->client.device,
					  nvif_fifo_runlist_ce(device), 0,
					  true, &drm->cechan);
		if (ret)
			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);

		arg0 = nvif_fifo_runlist(device, NV_DEVICE_INFO_ENGINE_GR);
		arg1 = 1;
	} else
	if (device->info.chipset >= 0xa3 &&
	    device->info.chipset != 0xaa &&
	    device->info.chipset != 0xac) {
		ret = nouveau_channel_new(drm, &drm->client.device,
					  NvDmaFB, NvDmaTT, false,
					  &drm->cechan);
	/* Volta requires access to a doorbell register for kickoff. */
	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_VOLTA) {
		ret = nvif_user_init(device);
		if (ret)
			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);

		arg0 = NvDmaFB;
		arg1 = NvDmaTT;
	} else {
		arg0 = NvDmaFB;
		arg1 = NvDmaTT;
	}

	ret = nouveau_channel_new(drm, &drm->client.device,
				  arg0, arg1, false, &drm->channel);
	if (ret) {
		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
		nouveau_accel_fini(drm);
		return;
	}

	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
		ret = nvif_object_init(&drm->channel->user, NVDRM_NVSW,
				       nouveau_abi16_swclass(drm), NULL, 0,
				       &drm->nvsw);
		if (ret == 0) {
			ret = RING_SPACE(drm->channel, 2);
			if (ret == 0) {
				BEGIN_NV04(drm->channel, NvSubSw, 0, 1);
				OUT_RING  (drm->channel, drm->nvsw.handle);
			}
		}

		if (ret) {
			NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
			nouveau_accel_fini(drm);
			return;
		}
	}

	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
		ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0,
				      false, NULL, &drm->notify);
		if (ret) {
			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
			nouveau_accel_fini(drm);
			return;
	}

		ret = nvif_object_init(&drm->channel->user, NvNotify0,
				       NV_DMA_IN_MEMORY,
				       &(struct nv_dma_v0) {
						.target = NV_DMA_V0_TARGET_VRAM,
						.access = NV_DMA_V0_ACCESS_RDWR,
						.start = drm->notify->addr,
						.limit = drm->notify->addr + 31
				       }, sizeof(struct nv_dma_v0),
				       &drm->ntfy);
		if (ret) {
			nouveau_accel_fini(drm);
			return;
		}
	}

	/* Allocate channels we need to support various functions. */
	nouveau_accel_gr_init(drm);
	nouveau_accel_ce_init(drm);

	/* Initialise accelerated TTM buffer moves. */
	nouveau_bo_move_init(drm);
}