Merge tag 'misc-habanalabs-next-2020-07-24' of... (860e73b4) · Commits · EulixOS / Software / Kernel

drivers/misc/habanalabs/Makefile

+5 −6

Original line number	Diff line number	Diff line
		@@ -3,16 +3,15 @@
		# Makefile for HabanaLabs AI accelerators driver
		#

		obj-m := habanalabs.o
		obj-$(CONFIG_HABANA_AI) := habanalabs.o

		habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
		command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
		command_submission.o mmu.o firmware_if.o pci.o

		habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
		include $(src)/common/Makefile
		habanalabs-y += $(HL_COMMON_FILES)

		include $(src)/goya/Makefile
		habanalabs-y += $(HL_GOYA_FILES)

		include $(src)/gaudi/Makefile
		habanalabs-y += $(HL_GAUDI_FILES)

		habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o

drivers/misc/habanalabs/common/Makefile

0 → 100644

+9 −0

Original line number	Diff line number	Diff line
		# SPDX-License-Identifier: GPL-2.0-only
		subdir-ccflags-y += -I$(src)/common

		HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
		common/asid.o common/habanalabs_ioctl.o \
		common/command_buffer.o common/hw_queue.o common/irq.o \
		common/sysfs.o common/hwmon.o common/memory.o \
		common/command_submission.o common/mmu.o common/firmware_if.o \
		common/pci.o

drivers/misc/habanalabs/asid.c→drivers/misc/habanalabs/common/asid.c

+0 −0

File moved.

View file

drivers/misc/habanalabs/command_buffer.c→drivers/misc/habanalabs/common/command_buffer.c

+52 −30

Original line number	Diff line number	Diff line
		@@ -10,12 +10,18 @@

		#include <linux/mm.h>
		#include <linux/slab.h>
		#include <linux/genalloc.h>

		static void cb_fini(struct hl_device hdev, struct hl_cb cb)
		{
		if (cb->is_internal)
		gen_pool_free(hdev->internal_cb_pool,
		cb->kernel_address, cb->size);
		else
		hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
		(void *) (uintptr_t) cb->kernel_address,
		cb->bus_address);

		kfree(cb);
		}

		@@ -44,9 +50,10 @@ static void cb_release(struct kref *ref)
		}

		static struct hl_cb hl_cb_alloc(struct hl_device hdev, u32 cb_size,
		int ctx_id)
		int ctx_id, bool internal_cb)
		{
		struct hl_cb *cb;
		u32 cb_offset;
		void *p;

		/*
		@@ -65,13 +72,25 @@ static struct hl_cb hl_cb_alloc(struct hl_device hdev, u32 cb_size,
		if (!cb)
		return NULL;

		if (ctx_id == HL_KERNEL_ASID_ID)
		if (internal_cb) {
		p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size);
		if (!p) {
		kfree(cb);
		return NULL;
		}

		cb_offset = p - hdev->internal_cb_pool_virt_addr;
		cb->is_internal = true;
		cb->bus_address = hdev->internal_cb_va_base + cb_offset;
		} else if (ctx_id == HL_KERNEL_ASID_ID) {
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
		&cb->bus_address, GFP_ATOMIC);
		else
		} else {
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
		&cb->bus_address,
		GFP_USER \| __GFP_ZERO);
		}

		if (!p) {
		dev_err(hdev->dev,
		"failed to allocate %d of dma memory for CB\n",
		@@ -87,7 +106,7 @@ static struct hl_cb hl_cb_alloc(struct hl_device hdev, u32 cb_size,
		}

		int hl_cb_create(struct hl_device hdev, struct hl_cb_mgr mgr,
		u32 cb_size, u64 *handle, int ctx_id)
		u32 cb_size, u64 *handle, int ctx_id, bool internal_cb)
		{
		struct hl_cb *cb;
		bool alloc_new_cb = true;
		@@ -112,6 +131,7 @@ int hl_cb_create(struct hl_device hdev, struct hl_cb_mgr mgr,
		goto out_err;
		}

		if (!internal_cb) {
		/* Minimum allocation must be PAGE SIZE */
		if (cb_size < PAGE_SIZE)
		cb_size = PAGE_SIZE;
		@@ -121,8 +141,8 @@ int hl_cb_create(struct hl_device hdev, struct hl_cb_mgr mgr,

		spin_lock(&hdev->cb_pool_lock);
		if (!list_empty(&hdev->cb_pool)) {
		cb = list_first_entry(&hdev->cb_pool, typeof(*cb),
		pool_list);
		cb = list_first_entry(&hdev->cb_pool,
		typeof(*cb), pool_list);
		list_del(&cb->pool_list);
		spin_unlock(&hdev->cb_pool_lock);
		alloc_new_cb = false;
		@@ -131,9 +151,10 @@ int hl_cb_create(struct hl_device hdev, struct hl_cb_mgr mgr,
		dev_dbg(hdev->dev, "CB pool is empty\n");
		}
		}
		}

		if (alloc_new_cb) {
		cb = hl_cb_alloc(hdev, cb_size, ctx_id);
		cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb);
		if (!cb) {
		rc = -ENOMEM;
		goto out_err;
		@@ -230,7 +251,7 @@ int hl_cb_ioctl(struct hl_fpriv hpriv, void data)
		} else {
		rc = hl_cb_create(hdev, &hpriv->cb_mgr,
		args->in.cb_size, &handle,
		hpriv->ctx->asid);
		hpriv->ctx->asid, false);
		}

		memset(args, 0, sizeof(*args));
		@@ -398,14 +419,15 @@ void hl_cb_mgr_fini(struct hl_device hdev, struct hl_cb_mgr mgr)
		idr_destroy(&mgr->cb_handles);
		}

		struct hl_cb hl_cb_kernel_create(struct hl_device hdev, u32 cb_size)
		struct hl_cb hl_cb_kernel_create(struct hl_device hdev, u32 cb_size,
		bool internal_cb)
		{
		u64 cb_handle;
		struct hl_cb *cb;
		int rc;

		rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
		HL_KERNEL_ASID_ID);
		HL_KERNEL_ASID_ID, internal_cb);
		if (rc) {
		dev_err(hdev->dev,
		"Failed to allocate CB for the kernel driver %d\n", rc);
		@@ -437,7 +459,7 @@ int hl_cb_pool_init(struct hl_device *hdev)

		for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) {
		cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size,
		HL_KERNEL_ASID_ID);
		HL_KERNEL_ASID_ID, false);
		if (cb) {
		cb->is_pool = true;
		list_add(&cb->pool_list, &hdev->cb_pool);

drivers/misc/habanalabs/command_submission.c→drivers/misc/habanalabs/common/command_submission.c

+74 −23

Original line number	Diff line number	Diff line
		@@ -246,6 +246,18 @@ static void free_job(struct hl_device hdev, struct hl_cs_job job)
		kfree(job);
		}

		static void cs_counters_aggregate(struct hl_device hdev, struct hl_ctx ctx)
		{
		hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
		ctx->cs_counters.device_in_reset_drop_cnt;
		hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
		ctx->cs_counters.out_of_mem_drop_cnt;
		hdev->aggregated_cs_counters.parsing_drop_cnt +=
		ctx->cs_counters.parsing_drop_cnt;
		hdev->aggregated_cs_counters.queue_full_drop_cnt +=
		ctx->cs_counters.queue_full_drop_cnt;
		}

		static void cs_do_release(struct kref *ref)
		{
		struct hl_cs *cs = container_of(ref, struct hl_cs,
		@@ -349,6 +361,9 @@ static void cs_do_release(struct kref *ref)
		dma_fence_signal(cs->fence);
		dma_fence_put(cs->fence);

		cs_counters_aggregate(hdev, cs->ctx);

		kfree(cs->jobs_in_queue_cnt);
		kfree(cs);
		}

		@@ -373,9 +388,9 @@ static void cs_timedout(struct work_struct *work)
		hdev = cs->ctx->hdev;
		ctx_asid = cs->ctx->asid;

		/* TODO: add information about last signaled seq and last emitted seq */
		dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
		ctx_asid, cs->sequence);
		dev_err(hdev->dev,
		"Command submission %llu has not finished in time!\n",
		cs->sequence);

		cs_put(cs);

		@@ -418,21 +433,29 @@ static int allocate_cs(struct hl_device hdev, struct hl_ctx ctx,
		spin_lock(&ctx->cs_lock);

		cs_cmpl->cs_seq = ctx->cs_sequence;
		other = ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)];
		other = ctx->cs_pending[cs_cmpl->cs_seq &
		(hdev->asic_prop.max_pending_cs - 1)];
		if ((other) && (!dma_fence_is_signaled(other))) {
		spin_unlock(&ctx->cs_lock);
		dev_dbg(hdev->dev,
		"Rejecting CS because of too many in-flights CS\n");
		rc = -EAGAIN;
		goto free_fence;
		}

		cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
		sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
		if (!cs->jobs_in_queue_cnt) {
		rc = -ENOMEM;
		goto free_fence;
		}

		dma_fence_init(&cs_cmpl->base_fence, &hl_fence_ops, &cs_cmpl->lock,
		ctx->asid, ctx->cs_sequence);

		cs->sequence = cs_cmpl->cs_seq;

		ctx->cs_pending[cs_cmpl->cs_seq & (HL_MAX_PENDING_CS - 1)] =
		ctx->cs_pending[cs_cmpl->cs_seq &
		(hdev->asic_prop.max_pending_cs - 1)] =
		&cs_cmpl->base_fence;
		ctx->cs_sequence++;

		@@ -447,6 +470,7 @@ static int allocate_cs(struct hl_device hdev, struct hl_ctx ctx,
		return 0;

		free_fence:
		spin_unlock(&ctx->cs_lock);
		kfree(cs_cmpl);
		free_cs:
		kfree(cs);
		@@ -463,10 +487,12 @@ static void cs_rollback(struct hl_device hdev, struct hl_cs cs)

		void hl_cs_rollback_all(struct hl_device *hdev)
		{
		int i;
		struct hl_cs cs, tmp;

		/* flush all completions */
		flush_workqueue(hdev->cq_wq);
		for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
		flush_workqueue(hdev->cq_wq[i]);

		/* Make sure we don't have leftovers in the H/W queues mirror list */
		list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
		@@ -499,10 +525,18 @@ static int validate_queue_index(struct hl_device *hdev,
		struct asic_fixed_properties *asic = &hdev->asic_prop;
		struct hw_queue_properties *hw_queue_prop;

		/* This must be checked here to prevent out-of-bounds access to
		* hw_queues_props array
		*/
		if (chunk->queue_index >= asic->max_queues) {
		dev_err(hdev->dev, "Queue index %d is invalid\n",
		chunk->queue_index);
		return -EINVAL;
		}

		hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];

		if ((chunk->queue_index >= HL_MAX_QUEUES) \|\|
		(hw_queue_prop->type == QUEUE_TYPE_NA)) {
		if (hw_queue_prop->type == QUEUE_TYPE_NA) {
		dev_err(hdev->dev, "Queue index %d is invalid\n",
		chunk->queue_index);
		return -EINVAL;
		@@ -630,12 +664,15 @@ static int cs_ioctl_default(struct hl_fpriv hpriv, void __user chunks,

		rc = validate_queue_index(hdev, chunk, &queue_type,
		&is_kernel_allocated_cb);
		if (rc)
		if (rc) {
		hpriv->ctx->cs_counters.parsing_drop_cnt++;
		goto free_cs_object;
		}

		if (is_kernel_allocated_cb) {
		cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
		if (!cb) {
		hpriv->ctx->cs_counters.parsing_drop_cnt++;
		rc = -EINVAL;
		goto free_cs_object;
		}
		@@ -649,6 +686,7 @@ static int cs_ioctl_default(struct hl_fpriv hpriv, void __user chunks,
		job = hl_cs_allocate_job(hdev, queue_type,
		is_kernel_allocated_cb);
		if (!job) {
		hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
		dev_err(hdev->dev, "Failed to allocate a new job\n");
		rc = -ENOMEM;
		if (is_kernel_allocated_cb)
		@@ -681,6 +719,7 @@ static int cs_ioctl_default(struct hl_fpriv hpriv, void __user chunks,

		rc = cs_parser(hpriv, job);
		if (rc) {
		hpriv->ctx->cs_counters.parsing_drop_cnt++;
		dev_err(hdev->dev,
		"Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
		cs->ctx->asid, cs->sequence, job->id, rc);
		@@ -689,6 +728,7 @@ static int cs_ioctl_default(struct hl_fpriv hpriv, void __user chunks,
		}

		if (int_queues_only) {
		hpriv->ctx->cs_counters.parsing_drop_cnt++;
		dev_err(hdev->dev,
		"Reject CS %d.%llu because only internal queues jobs are present\n",
		cs->ctx->asid, cs->sequence);
		@@ -738,6 +778,7 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		struct hl_cs_job *job;
		struct hl_cs *cs;
		struct hl_cb *cb;
		enum hl_queue_type q_type;
		u64 *signal_seq_arr = NULL, signal_seq;
		u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
		int rc;
		@@ -770,9 +811,10 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		chunk = &cs_chunk_array[0];
		q_idx = chunk->queue_index;
		hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
		q_type = hw_queue_prop->type;

		if ((q_idx >= HL_MAX_QUEUES) \|\|
		(hw_queue_prop->type != QUEUE_TYPE_EXT)) {
		if ((q_idx >= hdev->asic_prop.max_queues) \|\|
		(!hw_queue_prop->supports_sync_stream)) {
		dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
		rc = -EINVAL;
		goto free_cs_chunk_array;
		@@ -869,25 +911,28 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,

		*cs_seq = cs->sequence;

		job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
		job = hl_cs_allocate_job(hdev, q_type, true);
		if (!job) {
		ctx->cs_counters.out_of_mem_drop_cnt++;
		dev_err(hdev->dev, "Failed to allocate a new job\n");
		rc = -ENOMEM;
		goto put_cs;
		}

		cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
		if (cs->type == CS_TYPE_WAIT)
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
		else
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);

		cb = hl_cb_kernel_create(hdev, cb_size,
		q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
		if (!cb) {
		ctx->cs_counters.out_of_mem_drop_cnt++;
		kfree(job);
		rc = -EFAULT;
		goto put_cs;
		}

		if (cs->type == CS_TYPE_WAIT)
		cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
		else
		cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);

		job->id = 0;
		job->cs = cs;
		job->user_cb = cb;
		@@ -1126,7 +1171,7 @@ static long _hl_cs_wait_ioctl(struct hl_device *hdev,
		rc = PTR_ERR(fence);
		if (rc == -EINVAL)
		dev_notice_ratelimited(hdev->dev,
		"Can't wait on seq %llu because current CS is at seq %llu\n",
		"Can't wait on CS %llu because current CS is at seq %llu\n",
		seq, ctx->cs_sequence);
		} else if (fence) {
		rc = dma_fence_wait_timeout(fence, true, timeout);
		@@ -1159,15 +1204,21 @@ int hl_cs_wait_ioctl(struct hl_fpriv hpriv, void data)
		memset(args, 0, sizeof(*args));

		if (rc < 0) {
		dev_err_ratelimited(hdev->dev,
		"Error %ld on waiting for CS handle %llu\n",
		rc, seq);
		if (rc == -ERESTARTSYS) {
		dev_err_ratelimited(hdev->dev,
		"user process got signal while waiting for CS handle %llu\n",
		seq);
		args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
		rc = -EINTR;
		} else if (rc == -ETIMEDOUT) {
		dev_err_ratelimited(hdev->dev,
		"CS %llu has timed-out while user process is waiting for it\n",
		seq);
		args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
		} else if (rc == -EIO) {
		dev_err_ratelimited(hdev->dev,
		"CS %llu has been aborted while user process is waiting for it\n",
		seq);
		args->out.status = HL_WAIT_CS_STATUS_ABORTED;
		}
		return rc;