Merge tag 'misc-habanalabs-next-2022-02-28' of... (4dee7a71) · Commits · EulixOS / Software / Kernel

Documentation/ABI/testing/debugfs-driver-habanalabs

+2 −18

Original line number	Diff line number	Diff line
		@@ -12,24 +12,7 @@ What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
		Date: May 2020
		KernelVersion: 5.8
		Contact: ogabbay@kernel.org
		Description: Allow the root user to disable/enable in runtime the clock
		gating mechanism in Gaudi. Due to how Gaudi is built, the
		clock gating needs to be disabled in order to access the
		registers of the TPC and MME engines. This is sometimes needed
		during debug by the user and hence the user needs this option.
		The user can supply a bitmask value, each bit represents
		a different engine to disable/enable its clock gating feature.
		The bitmask is composed of 20 bits:

		======= ============
		0 - 7 DMA channels
		8 - 11 MME engines
		12 - 19 TPC engines
		======= ============

		The bit's location of a specific engine can be determined
		using (1 << GAUDI_ENGINE_ID_). GAUDI_ENGINE_ID_ values
		are defined in uapi habanalabs.h file in enum gaudi_engine_id
		Description: This setting is now deprecated as clock gating is handled solely by the f/w

		What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
		Date: Jan 2019
		@@ -239,6 +222,7 @@ KernelVersion: 5.6
		Contact: ogabbay@kernel.org
		Description: Sets the stop-on_error option for the device engines. Value of
		"0" is for disable, otherwise enable.
		Relevant only for GOYA and GAUDI.

		What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
		Date: Sep 2021

Documentation/ABI/testing/sysfs-driver-habanalabs

+14 −2

Original line number	Diff line number	Diff line
		@@ -69,6 +69,12 @@ KernelVersion: 5.1
		Contact: ogabbay@kernel.org
		Description: Displays the device's version from the eFuse

		What: /sys/class/habanalabs/hl<n>/fw_os_ver
		Date: Dec 2021
		KernelVersion: 5.18
		Contact: ogabbay@kernel.org
		Description: Version of the firmware OS running on the device's CPU

		What: /sys/class/habanalabs/hl<n>/hard_reset
		Date: Jan 2019
		KernelVersion: 5.1
		@@ -115,7 +121,7 @@ What: /sys/class/habanalabs/hl<n>/infineon_ver
		Date: Jan 2019
		KernelVersion: 5.1
		Contact: ogabbay@kernel.org
		Description: Version of the Device's power supply F/W code
		Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI

		What: /sys/class/habanalabs/hl<n>/max_power
		Date: Jan 2019
		@@ -221,3 +227,9 @@ Date: Jan 2019
		KernelVersion: 5.1
		Contact: ogabbay@kernel.org
		Description: Version of the u-boot running on the device's CPU

		What: /sys/class/habanalabs/hl<n>/vrm_ver
		Date: Jan 2022
		KernelVersion: not yet upstreamed
		Contact: ogabbay@kernel.org
		Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI

drivers/misc/habanalabs/common/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
		common/command_buffer.o common/hw_queue.o common/irq.o \
		common/sysfs.o common/hwmon.o common/memory.o \
		common/command_submission.o common/firmware_if.o \
		common/state_dump.o common/hwmgr.o
		common/state_dump.o

drivers/misc/habanalabs/common/command_buffer.c

+3 −1

Original line number	Diff line number	Diff line
		@@ -424,8 +424,8 @@ int hl_cb_ioctl(struct hl_fpriv hpriv, void data)
		{
		union hl_cb_args *args = data;
		struct hl_device *hdev = hpriv->hdev;
		u64 handle = 0, device_va = 0;
		enum hl_device_status status;
		u64 handle = 0, device_va;
		u32 usage_cnt = 0;
		int rc;

		@@ -464,6 +464,8 @@ int hl_cb_ioctl(struct hl_fpriv hpriv, void data)
		args->in.flags,
		&usage_cnt,
		&device_va);
		if (rc)
		break;

		memset(&args->out, 0, sizeof(args->out));

drivers/misc/habanalabs/common/command_submission.c

+205 −60

Original line number	Diff line number	Diff line
		@@ -14,6 +14,8 @@
		#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL \| HL_CS_FLAGS_WAIT \| \
		HL_CS_FLAGS_COLLECTIVE_WAIT)

		#define MAX_TS_ITER_NUM 10

		/**
		* enum hl_cs_wait_status - cs wait status
		* @CS_WAIT_STATUS_BUSY: cs was not completed yet
		@@ -919,12 +921,13 @@ static void cs_rollback(struct hl_device hdev, struct hl_cs cs)
		complete_job(hdev, job);
		}

		void hl_cs_rollback_all(struct hl_device *hdev)
		void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
		{
		int i;
		struct hl_cs cs, tmp;

		flush_workqueue(hdev->sob_reset_wq);
		if (!skip_wq_flush) {
		flush_workqueue(hdev->ts_free_obj_wq);

		/* flush all completions before iterating over the CS mirror list in
		* order to avoid a race with the release functions
		@@ -932,6 +935,8 @@ void hl_cs_rollback_all(struct hl_device *hdev)
		for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
		flush_workqueue(hdev->cq_wq[i]);

		}

		/* Make sure we don't have leftovers in the CS mirror list */
		list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) {
		cs_get(cs);
		@@ -948,14 +953,20 @@ void hl_cs_rollback_all(struct hl_device *hdev)
		static void
		wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
		{
		struct hl_user_pending_interrupt *pend;
		struct hl_user_pending_interrupt pend, temp;
		unsigned long flags;

		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
		list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
		list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
		if (pend->ts_reg_info.ts_buff) {
		list_del(&pend->wait_list_node);
		hl_ts_put(pend->ts_reg_info.ts_buff);
		hl_cb_put(pend->ts_reg_info.cq_cb);
		} else {
		pend->fence.error = -EIO;
		complete_all(&pend->fence.completion);
		}
		}
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		}

		@@ -2063,13 +2074,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
		idp = &ctx->sig_mgr.handles;
		idr_for_each_entry(idp, encaps_sig_hdl, id) {
		if (encaps_sig_hdl->cs_seq == signal_seq) {
		handle_found = true;
		/* get refcount to protect removing
		* this handle from idr, needed when
		* multiple wait cs are used with offset
		/* get refcount to protect removing this handle from idr,
		* needed when multiple wait cs are used with offset
		* to wait on reserved encaps signals.
		* Since kref_put of this handle is executed outside the
		* current lock, it is possible that the handle refcount
		* is 0 but it yet to be removed from the list. In this
		* case need to consider the handle as not valid.
		*/
		kref_get(&encaps_sig_hdl->refcount);
		if (kref_get_unless_zero(&encaps_sig_hdl->refcount))
		handle_found = true;
		break;
		}
		}
		@@ -2739,7 +2753,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv hpriv, void data)
		mcs_data.update_ts = false;
		rc = hl_cs_poll_fences(&mcs_data, mcs_compl);

		if (mcs_data.completion_bitmap)
		if (rc \|\| mcs_data.completion_bitmap)
		break;

		/*
		@@ -2854,64 +2868,174 @@ static int hl_cs_wait_ioctl(struct hl_fpriv hpriv, void data)
		return 0;
		}

		static int ts_buff_get_kernel_ts_record(struct hl_ts_buff *ts_buff,
		struct hl_cb *cq_cb,
		u64 ts_offset, u64 cq_offset, u64 target_value,
		spinlock_t *wait_list_lock,
		struct hl_user_pending_interrupt **pend)
		{
		struct hl_user_pending_interrupt *requested_offset_record =
		(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
		ts_offset;
		struct hl_user_pending_interrupt *cb_last =
		(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
		(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
		unsigned long flags, iter_counter = 0;
		u64 current_cq_counter;

		/* Validate ts_offset not exceeding last max */
		if (requested_offset_record > cb_last) {
		dev_err(ts_buff->hdev->dev, "Ts offset exceeds max CB offset(0x%llx)\n",
		(u64)(uintptr_t)cb_last);
		return -EINVAL;
		}

		start_over:
		spin_lock_irqsave(wait_list_lock, flags);

		/* Unregister only if we didn't reach the target value
		* since in this case there will be no handling in irq context
		* and then it's safe to delete the node out of the interrupt list
		* then re-use it on other interrupt
		*/
		if (requested_offset_record->ts_reg_info.in_use) {
		current_cq_counter = *requested_offset_record->cq_kernel_addr;
		if (current_cq_counter < requested_offset_record->cq_target_value) {
		list_del(&requested_offset_record->wait_list_node);
		spin_unlock_irqrestore(wait_list_lock, flags);

		hl_ts_put(requested_offset_record->ts_reg_info.ts_buff);
		hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);

		dev_dbg(ts_buff->hdev->dev, "ts node removed from interrupt list now can re-use\n");
		} else {
		dev_dbg(ts_buff->hdev->dev, "ts node in middle of irq handling\n");

		/* irq handling in the middle give it time to finish */
		spin_unlock_irqrestore(wait_list_lock, flags);
		usleep_range(1, 10);
		if (++iter_counter == MAX_TS_ITER_NUM) {
		dev_err(ts_buff->hdev->dev, "handling registration interrupt took too long!!\n");
		return -EINVAL;
		}

		goto start_over;
		}
		} else {
		spin_unlock_irqrestore(wait_list_lock, flags);
		}

		/* Fill up the new registration node info */
		requested_offset_record->ts_reg_info.in_use = 1;
		requested_offset_record->ts_reg_info.ts_buff = ts_buff;
		requested_offset_record->ts_reg_info.cq_cb = cq_cb;
		requested_offset_record->ts_reg_info.timestamp_kernel_addr =
		(u64 *) ts_buff->user_buff_address + ts_offset;
		requested_offset_record->cq_kernel_addr =
		(u64 *) cq_cb->kernel_address + cq_offset;
		requested_offset_record->cq_target_value = target_value;

		*pend = requested_offset_record;

		dev_dbg(ts_buff->hdev->dev, "Found available node in TS kernel CB(0x%llx)\n",
		(u64)(uintptr_t)requested_offset_record);
		return 0;
		}

		static int _hl_interrupt_wait_ioctl(struct hl_device hdev, struct hl_ctx ctx,
		struct hl_cb_mgr *cb_mgr, u64 timeout_us,
		u64 cq_counters_handle, u64 cq_counters_offset,
		struct hl_cb_mgr cb_mgr, struct hl_ts_mgr ts_mgr,
		u64 timeout_us, u64 cq_counters_handle, u64 cq_counters_offset,
		u64 target_value, struct hl_user_interrupt *interrupt,
		u32 *status,
		u64 *timestamp)
		bool register_ts_record, u64 ts_handle, u64 ts_offset,
		u32 status, u64 timestamp)
		{
		u32 cq_patched_handle, ts_patched_handle;
		struct hl_user_pending_interrupt *pend;
		struct hl_ts_buff *ts_buff;
		struct hl_cb *cq_cb;
		unsigned long timeout, flags;
		long completion_rc;
		struct hl_cb *cb;
		int rc = 0;
		u32 handle;

		timeout = hl_usecs64_to_jiffies(timeout_us);

		hl_ctx_get(hdev, ctx);

		cq_counters_handle >>= PAGE_SHIFT;
		handle = (u32) cq_counters_handle;
		cq_patched_handle = lower_32_bits(cq_counters_handle >> PAGE_SHIFT);
		cq_cb = hl_cb_get(hdev, cb_mgr, cq_patched_handle);
		if (!cq_cb) {
		rc = -EINVAL;
		goto put_ctx;
		}

		cb = hl_cb_get(hdev, cb_mgr, handle);
		if (!cb) {
		hl_ctx_put(ctx);
		return -EINVAL;
		if (register_ts_record) {
		dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, ts offset: %llu, cq_offset: %llu\n",
		interrupt->interrupt_id, ts_offset, cq_counters_offset);

		ts_patched_handle = lower_32_bits(ts_handle >> PAGE_SHIFT);
		ts_buff = hl_ts_get(hdev, ts_mgr, ts_patched_handle);
		if (!ts_buff) {
		rc = -EINVAL;
		goto put_cq_cb;
		}

		/* Find first available record */
		rc = ts_buff_get_kernel_ts_record(ts_buff, cq_cb, ts_offset,
		cq_counters_offset, target_value,
		&interrupt->wait_list_lock, &pend);
		if (rc)
		goto put_ts_buff;
		} else {
		pend = kzalloc(sizeof(*pend), GFP_KERNEL);
		if (!pend) {
		hl_cb_put(cb);
		hl_ctx_put(ctx);
		return -ENOMEM;
		rc = -ENOMEM;
		goto put_cq_cb;
		}

		hl_fence_init(&pend->fence, ULONG_MAX);

		pend->cq_kernel_addr = (u64 *) cb->kernel_address + cq_counters_offset;
		pend->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_counters_offset;
		pend->cq_target_value = target_value;
		}

		spin_lock_irqsave(&interrupt->wait_list_lock, flags);

		/* We check for completion value as interrupt could have been received
		* before we added the node to the wait list
		*/
		if (*pend->cq_kernel_addr >= target_value) {
		if (register_ts_record)
		pend->ts_reg_info.in_use = 0;
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

		*status = HL_WAIT_CS_STATUS_COMPLETED;
		/* There was no interrupt, we assume the completion is now. */

		if (register_ts_record) {
		*pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns();
		goto put_ts_buff;
		} else {
		pend->fence.timestamp = ktime_get();
		goto set_timestamp;
		}

		if (!timeout_us \|\| (*status == HL_WAIT_CS_STATUS_COMPLETED))
		} else if (!timeout_us) {
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		*status = HL_WAIT_CS_STATUS_BUSY;
		pend->fence.timestamp = ktime_get();
		goto set_timestamp;
		}

		/* Add pending user interrupt to relevant list for the interrupt
		* handler to monitor
		* handler to monitor.
		* Note that we cannot have sorted list by target value,
		* in order to shorten the list pass loop, since
		* same list could have nodes for different cq counter handle.
		*/
		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
		list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

		if (register_ts_record) {
		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
		goto ts_registration_exit;
		}

		/* Wait for interrupt handler to signal completion */
		completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
		timeout);
		@@ -2932,23 +3056,41 @@ static int _hl_interrupt_wait_ioctl(struct hl_device hdev, struct hl_ctx ctx,
		rc = -EIO;
		*status = HL_WAIT_CS_STATUS_ABORTED;
		} else {
		dev_err_ratelimited(hdev->dev, "Waiting for interrupt ID %d timedout\n",
		interrupt->interrupt_id);
		rc = -ETIMEDOUT;
		}
		/* The wait has timed-out. We don't know anything beyond that
		* because the workload wasn't submitted through the driver.
		* Therefore, from driver's perspective, the workload is still
		* executing.
		*/
		rc = 0;
		*status = HL_WAIT_CS_STATUS_BUSY;
		}
		}
		}

		/*
		* We keep removing the node from list here, and not at the irq handler
		* for completion timeout case. and if it's a registration
		* for ts record, the node will be deleted in the irq handler after
		* we reach the target value.
		*/
		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
		list_del(&pend->wait_list_node);
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);

		set_timestamp:
		*timestamp = ktime_to_ns(pend->fence.timestamp);

		kfree(pend);
		hl_cb_put(cb);
		hl_cb_put(cq_cb);
		ts_registration_exit:
		hl_ctx_put(ctx);

		return rc;

		put_ts_buff:
		hl_ts_put(ts_buff);
		put_cq_cb:
		hl_cb_put(cq_cb);
		put_ctx:
		hl_ctx_put(ctx);

		return rc;
		@@ -3049,6 +3191,12 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
		interrupt->interrupt_id);
		rc = -EINTR;
		} else {
		/* The wait has timed-out. We don't know anything beyond that
		* because the workload wasn't submitted through the driver.
		* Therefore, from driver's perspective, the workload is still
		* executing.
		*/
		rc = 0;
		*status = HL_WAIT_CS_STATUS_BUSY;
		}

		@@ -3101,23 +3249,20 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv hpriv, void data)
		interrupt = &hdev->user_interrupt[interrupt_id - first_interrupt];

		if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ)
		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr,
		rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &hpriv->cb_mgr, &hpriv->ts_mem_mgr,
		args->in.interrupt_timeout_us, args->in.cq_counters_handle,
		args->in.cq_counters_offset,
		args->in.target, interrupt, &status,
		&timestamp);
		args->in.target, interrupt,
		!!(args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT),
		args->in.timestamp_handle, args->in.timestamp_offset,
		&status, &timestamp);
		else
		rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx,
		args->in.interrupt_timeout_us, args->in.addr,
		args->in.target, interrupt, &status,
		&timestamp);
		if (rc) {
		if (rc != -EINTR)
		dev_err_ratelimited(hdev->dev,
		"interrupt_wait_ioctl failed (%d)\n", rc);

		if (rc)
		return rc;
		}

		memset(args, 0, sizeof(*args));
		args->out.status = status;