Commit 9158bf69 authored by farah kassabri's avatar farah kassabri Committed by Oded Gabbay
Browse files

habanalabs: Timestamps buffers registration



Timestamp registration API allows the user to register
a timestamp record event which will make the driver set
timestamp when CQ counter reaches the target value
and write it to a specific location specified
by the user.
This is a non blocking API, unlike the wait_for_interrupt
which is a blocking one.

Signed-off-by: default avatarfarah kassabri <fkassabri@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent b32cd104
Loading
Loading
Loading
Loading
+163 −34
Original line number Diff line number Diff line
@@ -14,6 +14,8 @@
#define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
				HL_CS_FLAGS_COLLECTIVE_WAIT)

#define MAX_TS_ITER_NUM 10

/**
 * enum hl_cs_wait_status - cs wait status
 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
@@ -924,7 +926,7 @@ void hl_cs_rollback_all(struct hl_device *hdev)
	int i;
	struct hl_cs *cs, *tmp;

	flush_workqueue(hdev->sob_reset_wq);
	flush_workqueue(hdev->ts_free_obj_wq);

	/* flush all completions before iterating over the CS mirror list in
	 * order to avoid a race with the release functions
@@ -948,14 +950,20 @@ void hl_cs_rollback_all(struct hl_device *hdev)
static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
{
	struct hl_user_pending_interrupt *pend;
	struct hl_user_pending_interrupt *pend, *temp;
	unsigned long flags;

	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
		if (pend->ts_reg_info.ts_buff) {
			list_del(&pend->wait_list_node);
			hl_ts_put(pend->ts_reg_info.ts_buff);
			hl_cb_put(pend->ts_reg_info.cq_cb);
		} else {
			pend->fence.error = -EIO;
			complete_all(&pend->fence.completion);
		}
	}
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
}

@@ -2857,43 +2865,133 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
	return 0;
}

static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
					struct hl_cb *cq_cb,
					u64 ts_offset, u64 cq_offset, u64 target_value,
					spinlock_t *wait_list_lock,
					struct hl_user_pending_interrupt **pend)
{
	struct hl_user_pending_interrupt *requested_offset_record =
				(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
				ts_offset;
	struct hl_user_pending_interrupt *cb_last =
			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
	unsigned long flags, iter_counter = 0;
	u64 current_cq_counter;

	/* Validate ts_offset not exceeding last max */
	if (requested_offset_record > cb_last) {
		dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
								(u64)(uintptr_t)cb_last);
		return -EINVAL;
	}

start_over:
	spin_lock_irqsave(wait_list_lock, flags);

	/* Unregister only if we didn't reach the target value
	 * since in this case there will be no handling in irq context
	 * and then it's safe to delete the node out of the interrupt list
	 * then re-use it on other interrupt
	 */
	if (requested_offset_record->ts_reg_info.in_use) {
		current_cq_counter = *requested_offset_record->cq_kernel_addr;
		if (current_cq_counter < requested_offset_record->cq_target_value) {
			list_del(&requested_offset_record->wait_list_node);
			spin_unlock_irqrestore(wait_list_lock, flags);

			hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);

			dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
		} else {
			dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");

			/* irq handling in the middle give it time to finish */
			spin_unlock_irqrestore(wait_list_lock, flags);
			usleep_range(1, 10);
			if (++iter_counter == MAX_TS_ITER_NUM) {
				dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
				return -EINVAL;
			}

			goto start_over;
		}
	} else {
		spin_unlock_irqrestore(wait_list_lock, flags);
	}

	/* Fill up the new registration node info */
	requested_offset_record->ts_reg_info.in_use = 1;
	requested_offset_record->ts_reg_info.ts_buff = ts_buff;
	requested_offset_record->ts_reg_info.cq_cb = cq_cb;
	requested_offset_record->ts_reg_info.timestamp_kernel_addr =
			(u64 *) ts_buff->user_buff_address + ts_offset;
	requested_offset_record->cq_kernel_addr =
			(u64 *) cq_cb->kernel_address + cq_offset;
	requested_offset_record->cq_target_value = target_value;

	*pend = requested_offset_record;

	dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
						(u64)(uintptr_t)requested_offset_record);
	return 0;
}

static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
				struct hl_cb_mgr *cb_mgr, u64 timeout_us,
				u64 cq_counters_handle,	u64 cq_counters_offset,
				struct hl_cb_mgr *cb_mgr, struct hl_ts_mgr *ts_mgr,
				u64 timeout_us, u64 cq_counters_handle,	u64 cq_counters_offset,
				u64 target_value, struct hl_user_interrupt *interrupt,
				bool register_ts_record, u64 ts_handle, u64 ts_offset,
				u32 *status, u64 *timestamp)
{
	u32 cq_patched_handle, ts_patched_handle;
	struct hl_user_pending_interrupt *pend;
	struct hl_ts_buff *ts_buff;
	struct hl_cb *cq_cb;
	unsigned long timeout, flags;
	long completion_rc;
	struct hl_cb *cb;
	int rc = 0;
	u32 handle;

	timeout = hl_usecs64_to_jiffies(timeout_us);

	hl_ctx_get(hdev, ctx);

	cq_counters_handle >>= PAGE_SHIFT;
	handle = (u32) cq_counters_handle;
	cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
	cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
	if (!cq_cb) {
		rc = -EINVAL;
		goto put_ctx;
	}

	cb = hl_cb_get(hdev, cb_mgr, handle);
	if (!cb) {
		hl_ctx_put(ctx);
		return -EINVAL;
	if (register_ts_record) {
		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
					interrupt->interrupt_id, ts_offset, cq_counters_offset);

		ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
		ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
		if (!ts_buff) {
			rc = -EINVAL;
			goto put_cq_cb;
		}

		/* Find first available record */
		rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
						cq_counters_offset, target_value,
						&interrupt->wait_list_lock, &pend);
		if (rc)
			goto put_ts_buff;
	} else {
		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
		if (!pend) {
		hl_cb_put(cb);
		hl_ctx_put(ctx);
		return -ENOMEM;
			rc = -ENOMEM;
			goto put_cq_cb;
		}

		hl_fence_init(&pend->fence, ULONG_MAX);

	pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
		pend->cq_target_value = target_value;
	}

	spin_lock_irqsave(&interrupt->wait_list_lock, flags);

@@ -2901,13 +2999,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	 * before we added the node to the wait list
	 */
	if (*pend->cq_kernel_addr >= target_value) {
		if (register_ts_record)
			pend->ts_reg_info.in_use = 0;
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

		*status = HL_WAIT_CS_STATUS_COMPLETED;
		/* There was no interrupt, we assume the completion is now. */

		if (register_ts_record) {
			*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
			goto put_ts_buff;
		} else {
			pend->fence.timestamp = ktime_get();
			goto set_timestamp;

		}
	} else if (!timeout_us) {
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		*status = HL_WAIT_CS_STATUS_BUSY;
@@ -2916,11 +3020,19 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	}

	/* Add pending user interrupt to relevant list for the interrupt
	 * handler to monitor
	 * handler to monitor.
	 * Note that we cannot have sorted list by target value,
	 * in order to shorten the list pass loop, since
	 * same list could have nodes for different cq counter handle.
	 */
	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

	if (register_ts_record) {
		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
		goto ts_registration_exit;
	}

	/* Wait for interrupt handler to signal completion */
	completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
								timeout);
@@ -2952,15 +3064,30 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
		}
	}

	/*
	 * We keep removing the node from list here, and not at the irq handler
	 * for completion timeout case. and if it's a registration
	 * for ts record, the node will be deleted in the irq handler after
	 * we reach the target value.
	 */
	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	list_del(&pend->wait_list_node);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

set_timestamp:
	*timestamp = ktime_to_ns(pend->fence.timestamp);

	kfree(pend);
	hl_cb_put(cb);
	hl_cb_put(cq_cb);
ts_registration_exit:
	hl_ctx_put(ctx);

	return rc;

put_ts_buff:
	hl_ts_put(ts_buff);
put_cq_cb:
	hl_cb_put(cq_cb);
put_ctx:
	hl_ctx_put(ctx);

	return rc;
@@ -3119,11 +3246,13 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
		interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];

	if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
				args->in.interrupt_timeout_us, args->in.cq_counters_handle,
				args->in.cq_counters_offset,
				args->in.target, interrupt, &status,
				&timestamp);
				args->in.target, interrupt,
				!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
				args->in.timestamp_handle, args->in.timestamp_offset,
				&status, &timestamp);
	else
		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
				args->in.interrupt_timeout_us, args->in.addr,
+11 −7
Original line number Diff line number Diff line
@@ -145,6 +145,7 @@ static int hl_device_release(struct inode *inode, struct file *filp)
	hl_release_pending_user_interrupts(hpriv->hdev);

	hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
	hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
	hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);

	if (!hl_hpriv_put(hpriv))
@@ -209,6 +210,9 @@ static int hl_mmap(struct file *filp, struct vm_area_struct *vma)

	case HL_MMAP_TYPE_BLOCK:
		return hl_hw_block_mmap(hpriv, vma);

	case HL_MMAP_TYPE_TS_BUFF:
		return hl_ts_mmap(hpriv, vma);
	}

	return -EINVAL;
@@ -410,10 +414,10 @@ static int device_early_init(struct hl_device *hdev)
		goto free_cq_wq;
	}

	hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
	if (!hdev->sob_reset_wq) {
	hdev->ts_free_obj_wq = alloc_workqueue("hl-ts-free-obj", WQ_UNBOUND, 0);
	if (!hdev->ts_free_obj_wq) {
		dev_err(hdev->dev,
			"Failed to allocate SOB reset workqueue\n");
			"Failed to allocate Timestamp registration free workqueue\n");
		rc = -ENOMEM;
		goto free_eq_wq;
	}
@@ -422,7 +426,7 @@ static int device_early_init(struct hl_device *hdev)
					GFP_KERNEL);
	if (!hdev->hl_chip_info) {
		rc = -ENOMEM;
		goto free_sob_reset_wq;
		goto free_ts_free_wq;
	}

	rc = hl_mmu_if_set_funcs(hdev);
@@ -461,8 +465,8 @@ static int device_early_init(struct hl_device *hdev)
	hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
free_chip_info:
	kfree(hdev->hl_chip_info);
free_sob_reset_wq:
	destroy_workqueue(hdev->sob_reset_wq);
free_ts_free_wq:
	destroy_workqueue(hdev->ts_free_obj_wq);
free_eq_wq:
	destroy_workqueue(hdev->eq_wq);
free_cq_wq:
@@ -501,7 +505,7 @@ static void device_early_fini(struct hl_device *hdev)

	kfree(hdev->hl_chip_info);

	destroy_workqueue(hdev->sob_reset_wq);
	destroy_workqueue(hdev->ts_free_obj_wq);
	destroy_workqueue(hdev->eq_wq);
	destroy_workqueue(hdev->device_reset_work.wq);

+97 −9
Original line number Diff line number Diff line
@@ -31,14 +31,15 @@
#define HL_NAME				"habanalabs"

/* Use upper bits of mmap offset to store habana driver specific information.
 * bits[63:61] - Encode mmap type
 * bits[63:59] - Encode mmap type
 * bits[45:0]  - mmap offset value
 *
 * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
 *  defines are w.r.t to PAGE_SIZE
 */
#define HL_MMAP_TYPE_SHIFT		(61 - PAGE_SHIFT)
#define HL_MMAP_TYPE_MASK		(0x7ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_SHIFT		(59 - PAGE_SHIFT)
#define HL_MMAP_TYPE_MASK		(0x1full << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_TS_BUFF		(0x10ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_BLOCK		(0x4ull << HL_MMAP_TYPE_SHIFT)
#define HL_MMAP_TYPE_CB			(0x2ull << HL_MMAP_TYPE_SHIFT)

@@ -709,6 +710,40 @@ struct hl_cb_mgr {
	struct idr		cb_handles; /* protected by cb_lock */
};

/**
 * struct hl_ts_mgr - describes the timestamp registration memory manager.
 * @ts_lock: protects ts_handles.
 * @ts_handles: an idr to hold all ts bufferes handles.
 */
struct hl_ts_mgr {
	spinlock_t		ts_lock;
	struct idr		ts_handles;
};

/**
 * struct hl_ts_buff - describes a timestamp buffer.
 * @refcount: reference counter for usage of the buffer.
 * @hdev: pointer to device this buffer belongs to.
 * @mmap: true if the buff is currently mapped to user.
 * @kernel_buff_address: Holds the internal buffer's kernel virtual address.
 * @user_buff_address: Holds the user buffer's kernel virtual address.
 * @id: the buffer ID.
 * @mmap_size: Holds the buffer size that was mmaped.
 * @kernel_buff_size: Holds the internal kernel buffer size.
 * @user_buff_size: Holds the user buffer size.
 */
struct hl_ts_buff {
	struct kref		refcount;
	struct hl_device	*hdev;
	atomic_t		mmap;
	void			*kernel_buff_address;
	void			*user_buff_address;
	u32			id;
	u32			mmap_size;
	u32			kernel_buff_size;
	u32			user_buff_size;
};

/**
 * struct hl_cb - describes a Command Buffer.
 * @refcount: reference counter for usage of the CB.
@@ -886,9 +921,54 @@ struct hl_user_interrupt {
	u32			interrupt_id;
};

/**
 * struct timestamp_reg_free_node - holds the timestamp registration free objects node
 * @free_objects_node: node in the list free_obj_jobs
 * @cq_cb: pointer to cq command buffer to be freed
 * @ts_buff: pointer to timestamp buffer to be freed
 */
struct timestamp_reg_free_node {
	struct list_head	free_objects_node;
	struct hl_cb		*cq_cb;
	struct hl_ts_buff	*ts_buff;
};

/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
 * the job will be to pass over the free_obj_jobs list and put refcount to objects
 * in each node of the list
 * @free_obj: workqueue object to free timestamp registration node objects
 * @hdev: pointer to the device structure
 * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node)
 */
struct timestamp_reg_work_obj {
	struct work_struct	free_obj;
	struct hl_device	*hdev;
	struct list_head	*free_obj_head;
};

/* struct timestamp_reg_info - holds the timestamp registration related data.
 * @ts_buff: pointer to the timestamp buffer which include both user/kernel buffers.
 *           relevant only when doing timestamps records registration.
 * @cq_cb: pointer to CQ counter CB.
 * @timestamp_kernel_addr: timestamp handle address, where to set timestamp
 *                         relevant only when doing timestamps records
 *                         registration.
 * @in_use: indicates if the node already in use. relevant only when doing
 *          timestamps records registration, since in this case the driver
 *          will have it's own buffer which serve as a records pool instead of
 *          allocating records dynamically.
 */
struct timestamp_reg_info {
	struct hl_ts_buff	*ts_buff;
	struct hl_cb		*cq_cb;
	u64			*timestamp_kernel_addr;
	u8			in_use;
};

/**
 * struct hl_user_pending_interrupt - holds a context to a user thread
 *                                    pending on an interrupt
 * @ts_reg_info: holds the timestamps registration nodes info
 * @wait_list_node: node in the list of user threads pending on an interrupt
 * @fence: hl fence object for interrupt completion
 * @cq_target_value: CQ target value
@@ -896,6 +976,7 @@ struct hl_user_interrupt {
 *                  handler for taget value comparison
 */
struct hl_user_pending_interrupt {
	struct timestamp_reg_info	ts_reg_info;
	struct list_head		wait_list_node;
	struct hl_fence			fence;
	u64				cq_target_value;
@@ -1833,6 +1914,7 @@ struct hl_debug_params {
 * @ctx: current executing context. TODO: remove for multiple ctx per process
 * @ctx_mgr: context manager to handle multiple context for this FD.
 * @cb_mgr: command buffer manager to handle multiple buffers for this FD.
 * @ts_mem_mgr: timestamp registration manager for alloc/free/map timestamp buffers.
 * @debugfs_list: list of relevant ASIC debugfs.
 * @dev_node: node in the device list of file private data
 * @refcount: number of related contexts.
@@ -1845,6 +1927,7 @@ struct hl_fpriv {
	struct hl_ctx		*ctx;
	struct hl_ctx_mgr	ctx_mgr;
	struct hl_cb_mgr	cb_mgr;
	struct hl_ts_mgr	ts_mem_mgr;
	struct list_head	debugfs_list;
	struct list_head	dev_node;
	struct kref		refcount;
@@ -2517,7 +2600,7 @@ struct hl_reset_info {
 * @cq_wq: work queues of completion queues for executing work in process
 *         context.
 * @eq_wq: work queue of event queue for executing work in process context.
 * @sob_reset_wq: work queue for sob reset executions.
 * @ts_free_obj_wq: work queue for timestamp registration objects release.
 * @kernel_ctx: Kernel driver context structure.
 * @kernel_queues: array of hl_hw_queue.
 * @cs_mirror_list: CS mirror list for TDR.
@@ -2645,7 +2728,7 @@ struct hl_device {
	struct hl_user_interrupt	common_user_interrupt;
	struct workqueue_struct		**cq_wq;
	struct workqueue_struct		*eq_wq;
	struct workqueue_struct		*sob_reset_wq;
	struct workqueue_struct		*ts_free_obj_wq;
	struct hl_ctx			*kernel_ctx;
	struct hl_hw_queue		*kernel_queues;
	struct list_head		cs_mirror_list;
@@ -3128,6 +3211,11 @@ __printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
					const char *format, ...);
char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
void hl_ts_mgr_init(struct hl_ts_mgr *mgr);
void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr);
int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr, u32 handle);
void hl_ts_put(struct hl_ts_buff *buff);

#ifdef CONFIG_DEBUG_FS

+2 −0
Original line number Diff line number Diff line
@@ -140,6 +140,7 @@ int hl_device_open(struct inode *inode, struct file *filp)

	hl_cb_mgr_init(&hpriv->cb_mgr);
	hl_ctx_mgr_init(&hpriv->ctx_mgr);
	hl_ts_mgr_init(&hpriv->ts_mem_mgr);

	hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);

@@ -184,6 +185,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
out_err:
	mutex_unlock(&hdev->fpriv_list_lock);
	hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
	hl_ts_mgr_fini(hpriv->hdev, &hpriv->ts_mem_mgr);
	hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
	filp->private_data = NULL;
	mutex_destroy(&hpriv->restore_phase_mutex);
+121 −6
Original line number Diff line number Diff line
@@ -137,22 +137,137 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
	return IRQ_HANDLED;
}

/*
 * hl_ts_free_objects - handler of the free objects workqueue.
 * This function should put refcount to objects that the registration node
 * took refcount to them.
 * @work: workqueue object pointer
 */
static void hl_ts_free_objects(struct work_struct *work)
{
	struct timestamp_reg_work_obj *job =
			container_of(work, struct timestamp_reg_work_obj, free_obj);
	struct timestamp_reg_free_node *free_obj, *temp_free_obj;
	struct list_head *free_list_head = job->free_obj_head;
	struct hl_device *hdev = job->hdev;

	list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
		dev_dbg(hdev->dev, "About to put refcount to ts_buff (%p) cq_cb(%p)\n",
					free_obj->ts_buff,
					free_obj->cq_cb);

		hl_ts_put(free_obj->ts_buff);
		hl_cb_put(free_obj->cq_cb);
		kfree(free_obj);
	}

	kfree(free_list_head);
	kfree(job);
}

/*
 * This function called with spin_lock of wait_list_lock taken
 * This function will set timestamp and delete the registration node from the
 * wait_list_lock.
 * and since we're protected with spin_lock here, so we cannot just put the refcount
 * for the objects here, since the release function may be called and it's also a long
 * logic (which might sleep also) that cannot be handled in irq context.
 * so here we'll be filling a list with nodes of "put" jobs and then will send this
 * list to a dedicated workqueue to do the actual put.
 */
int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
						struct list_head **free_list)
{
	struct timestamp_reg_free_node *free_node;
	u64 timestamp;

	if (!(*free_list)) {
		/* Alloc/Init the timestamp registration free objects list */
		*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
		if (!(*free_list))
			return -ENOMEM;

		INIT_LIST_HEAD(*free_list);
	}

	free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
	if (!free_node)
		return -ENOMEM;

	timestamp = ktime_get_ns();

	*pend->ts_reg_info.timestamp_kernel_addr = timestamp;

	dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
			pend->ts_reg_info.timestamp_kernel_addr,
			*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);

	list_del(&pend->wait_list_node);

	/* Mark kernel CB node as free */
	pend->ts_reg_info.in_use = 0;

	/* Putting the refcount for ts_buff and cq_cb objects will be handled
	 * in workqueue context, just add job to free_list.
	 */
	free_node->ts_buff = pend->ts_reg_info.ts_buff;
	free_node->cq_cb = pend->ts_reg_info.cq_cb;
	list_add(&free_node->free_objects_node, *free_list);

	return 0;
}

static void handle_user_cq(struct hl_device *hdev,
			struct hl_user_interrupt *user_cq)
{
	struct hl_user_pending_interrupt *pend;
	struct hl_user_pending_interrupt *pend, *temp_pend;
	struct list_head *ts_reg_free_list_head = NULL;
	struct timestamp_reg_work_obj *job;
	bool reg_node_handle_fail = false;
	ktime_t now = ktime_get();
	int rc;

	/* For registration nodes:
	 * As part of handling the registration nodes, we should put refcount to
	 * some objects. the problem is that we cannot do that under spinlock
	 * or in irq handler context at all (since release functions are long and
	 * might sleep), so we will need to handle that part in workqueue context.
	 * To avoid handling kmalloc failure which compels us rolling back actions
	 * and move nodes hanged on the free list back to the interrupt wait list
	 * we always alloc the job of the WQ at the beginning.
	 */
	job = kmalloc(sizeof(*job), GFP_ATOMIC);
	if (!job)
		return;

	spin_lock(&user_cq->wait_list_lock);
	list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
		if ((pend->cq_kernel_addr &&
				*(pend->cq_kernel_addr) >= pend->cq_target_value) ||
	list_for_each_entry_safe(pend, temp_pend, &user_cq->wait_list_head, wait_list_node) {
		if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
				!pend->cq_kernel_addr) {
			if (pend->ts_reg_info.ts_buff) {
				if (!reg_node_handle_fail) {
					rc = handle_registration_node(hdev, pend,
									&ts_reg_free_list_head);
					if (rc)
						reg_node_handle_fail = true;
				}
			} else {
				/* Handle wait target value node */
				pend->fence.timestamp = now;
				complete_all(&pend->fence.completion);
			}
		}
	}
	spin_unlock(&user_cq->wait_list_lock);

	if (ts_reg_free_list_head) {
		INIT_WORK(&job->free_obj, hl_ts_free_objects);
		job->free_obj_head = ts_reg_free_list_head;
		job->hdev = hdev;
		queue_work(hdev->ts_free_obj_wq, &job->free_obj);
	} else {
		kfree(job);
	}
}

/**
Loading