Commit d36d68fd authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge tag 'drm-habanalabs-next-2023-03-20' of...

Merge tag 'drm-habanalabs-next-2023-03-20' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux

 into drm-next

This tag contains habanalabs driver and accel changes for v6.4:

- uAPI changes:

  - Add opcodes to the CS ioctl to allow user to stall/resume specific engines
    inside Gaudi2. This is to allow the user to perform power
    testing/measurements when training different topologies.

  - Expose in the INFO ioctl the amount of device memory that the driver
    and f/w reserve for themselves.

  - Expose in the INFO ioctl a bit-mask of the available rotator engines
    in Gaudi2. This is to align with other engines that are already exposed.

  - Expose in the INFO ioctl the register's address of the f/w that should
    be used to trigger interrupts from within the user's code running in the
    compute engines.

  - Add a critical-event bit in the eventfd bitmask so the user will know the
    event that was received was critical, and a reset will now occur

  - Expose in the INFO ioctl two new opcodes to fetch information on h/w and
    f/w events. The events recorded are the events that were reported in the
    eventfd.

- New features and improvements:

  - Add a dedicated interrupt ID in MSI-X in the device to the notification of
    an unexpected user-related event in Gaudi2. Handle it in the driver by
    reporting this event.

  - Allow the user to fetch the device memory current usage even when the
    device is undergoing compute-reset (a reset type that only clears the
    compute engines).

  - Enable graceful reset mechanism for compute-reset. This will give the
    user a few seconds before the device is reset. For example, the user can,
    during that time, perform certain device operations (dump data for debug)
    or close the device in an orderly fashion.

  - Align the decoder with the rest of the engines in regard to notification
    to the user about interrupts and in regard to performing graceful reset
    when needed (instead of immediate reset).

  - Add support for assert interrupt from the TPC engine.

  - Get the reset type that is necessary to perform per event from the
    auto-generated irq_map array.

  - Print the specific reason why a device is still in use when notifying to
    the user about it (after the user closed the device's FD).

  - Move to threaded IRQ when handling interrupts of workload completions.

- Firmware related fixes:

  - Fix RAZWI event handler to match newest f/w version.

  - Read error cause register in dma core events because the f/w doesn't
    do that.

  - Increase maximum time to wait for completion of Gaudi2 reset due to f/w
    bug.

  - Align to the latest firmware specs.

- Enforce the release order of the compute device and dma-buf.
  i.e increment the device file refcount for any dma-buf that was exported
  for that device. This will make sure the compute device release function
  won't be called until the user closes all the FDs of the relevant
  dma-bufs. Without this change, closing the device's FD before/without
  closing the dma-buf's FD would always lead to hard-reset of the device.

- Fix a link in the drm documentation to correctly point to the accel section.

- Compilation warnings cleanups

- Misc bug fixes and code cleanups

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmQYfcAACgkQZR1NuKta
# 54DB4Af/SuiHZkVXwr+yHPv9El726rz9ZQD7mQtzNmehWGonwAvz15yqocNMUSbF
# JbqE/vrZjvbXrP1Uv5UrlRVdnFHSPV18VnHU4BMS/WOm19SsR6vZ0QOXOoa6/AUb
# w+kF3D//DbFI4/mTGfpH5/pzwu51ti8aVktosPFlHIa8iI8CB4/4IV+ivQ8UW4oK
# HyDRkIvHdRmER7vGOfhwhsr4zdqSlJBYrv3C3Z1dkSYBPW/5ICbiM1UlKycwdYKI
# cajQBSdUQwUCWnI+i8RmSy3kjNO6OE4XRUvTv89F2bQeyK/1rJLG2m2xZR/Ml/o5
# 7Cgvbn0hWZyeqe7OObYiBlSOBSehCA==
# =wclm
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 21 Mar 2023 01:37:36 AEST
# gpg:                using RSA key ED311BA00042EF52DCB412C5651D4DB8AB5AE780
# gpg: Can't check signature: No public key
From: Oded Gabbay <ogabbay@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230320154026.GA766126@ogabbay-vm-u20.habana-labs.com
parents d240daa2 75b44575
Loading
Loading
Loading
Loading
+100 −30
Original line number Diff line number Diff line
@@ -14,10 +14,10 @@
#define HL_CS_FLAGS_TYPE_MASK	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
			HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
			HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \
			HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
			HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)


#define MAX_TS_ITER_NUM 10
#define MAX_TS_ITER_NUM 100

/**
 * enum hl_cs_wait_status - cs wait status
@@ -657,7 +657,7 @@ static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
	/*
	 * we get refcount upon reservation of signals or signal/wait cs for the
	 * hw_sob object, and need to put it when the first staged cs
	 * (which cotains the encaps signals) or cs signal/wait is completed.
	 * (which contains the encaps signals) or cs signal/wait is completed.
	 */
	if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
			(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
@@ -1082,9 +1082,8 @@ static void
wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
{
	struct hl_user_pending_interrupt *pend, *temp;
	unsigned long flags;

	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	spin_lock(&interrupt->wait_list_lock);
	list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, wait_list_node) {
		if (pend->ts_reg_info.buf) {
			list_del(&pend->wait_list_node);
@@ -1095,7 +1094,7 @@ wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
			complete_all(&pend->fence.completion);
		}
	}
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
	spin_unlock(&interrupt->wait_list_lock);
}

void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -1168,6 +1167,22 @@ static void cs_completion(struct work_struct *work)
		hl_complete_job(hdev, job);
}

u32 hl_get_active_cs_num(struct hl_device *hdev)
{
	u32 active_cs_num = 0;
	struct hl_cs *cs;

	spin_lock(&hdev->cs_mirror_lock);

	list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node)
		if (!cs->completed)
			active_cs_num++;

	spin_unlock(&hdev->cs_mirror_lock);

	return active_cs_num;
}

static int validate_queue_index(struct hl_device *hdev,
				struct hl_cs_chunk *chunk,
				enum hl_queue_type *queue_type,
@@ -1304,6 +1319,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
		return CS_UNRESERVE_SIGNALS;
	else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
		return CS_TYPE_ENGINE_CORE;
	else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND)
		return CS_TYPE_ENGINES;
	else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES)
		return CS_TYPE_FLUSH_PCI_HBW_WRITES;
	else
@@ -2429,10 +2446,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
						u32 num_engine_cores, u32 core_command)
{
	int rc;
	struct hl_device *hdev = hpriv->hdev;
	void __user *engine_cores_arr;
	u32 *cores;
	int rc;

	if (!hdev->asic_prop.supports_engine_modes)
		return -EPERM;

	if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
		dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
@@ -2461,6 +2481,48 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
	return rc;
}

static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr,
						u32 num_engines, enum hl_engine_command command)
{
	struct hl_device *hdev = hpriv->hdev;
	u32 *engines, max_num_of_engines;
	void __user *engines_arr;
	int rc;

	if (!hdev->asic_prop.supports_engine_modes)
		return -EPERM;

	if (command >= HL_ENGINE_COMMAND_MAX) {
		dev_err(hdev->dev, "Engine command is invalid\n");
		return -EINVAL;
	}

	max_num_of_engines = hdev->asic_prop.max_num_of_engines;
	if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT)
		max_num_of_engines = hdev->asic_prop.num_engine_cores;

	if (!num_engines || num_engines > max_num_of_engines) {
		dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines);
		return -EINVAL;
	}

	engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr;
	engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL);
	if (!engines)
		return -ENOMEM;

	if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) {
		dev_err(hdev->dev, "Failed to copy engine-ids array from user\n");
		kfree(engines);
		return -EFAULT;
	}

	rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command);
	kfree(engines);

	return rc;
}

static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv)
{
	struct hl_device *hdev = hpriv->hdev;
@@ -2532,6 +2594,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
				args->in.num_engine_cores, args->in.core_command);
		break;
	case CS_TYPE_ENGINES:
		rc = cs_ioctl_engines(hpriv, args->in.engines,
				args->in.num_engines, args->in.engine_command);
		break;
	case CS_TYPE_FLUSH_PCI_HBW_WRITES:
		rc = cs_ioctl_flush_pci_hbw_writes(hpriv);
		break;
@@ -3143,8 +3209,9 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
	struct hl_user_pending_interrupt *cb_last =
			(struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address +
			(ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt));
	unsigned long flags, iter_counter = 0;
	unsigned long iter_counter = 0;
	u64 current_cq_counter;
	ktime_t timestamp;

	/* Validate ts_offset not exceeding last max */
	if (requested_offset_record >= cb_last) {
@@ -3153,8 +3220,10 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
		return -EINVAL;
	}

	timestamp = ktime_get();

start_over:
	spin_lock_irqsave(wait_list_lock, flags);
	spin_lock(wait_list_lock);

	/* Unregister only if we didn't reach the target value
	 * since in this case there will be no handling in irq context
@@ -3165,7 +3234,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
		current_cq_counter = *requested_offset_record->cq_kernel_addr;
		if (current_cq_counter < requested_offset_record->cq_target_value) {
			list_del(&requested_offset_record->wait_list_node);
			spin_unlock_irqrestore(wait_list_lock, flags);
			spin_unlock(wait_list_lock);

			hl_mmap_mem_buf_put(requested_offset_record->ts_reg_info.buf);
			hl_cb_put(requested_offset_record->ts_reg_info.cq_cb);
@@ -3176,13 +3245,14 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
			dev_dbg(buf->mmg->dev,
				"ts node in middle of irq handling\n");

			/* irq handling in the middle give it time to finish */
			spin_unlock_irqrestore(wait_list_lock, flags);
			usleep_range(1, 10);
			/* irq thread handling in the middle give it time to finish */
			spin_unlock(wait_list_lock);
			usleep_range(100, 1000);
			if (++iter_counter == MAX_TS_ITER_NUM) {
				dev_err(buf->mmg->dev,
					"handling registration interrupt took too long!!\n");
				return -EINVAL;
					"Timestamp offset processing reached timeout of %lld ms\n",
					ktime_ms_delta(ktime_get(), timestamp));
				return -EAGAIN;
			}

			goto start_over;
@@ -3197,7 +3267,7 @@ static int ts_buff_get_kernel_ts_record(struct hl_mmap_mem_buf *buf,
				(u64 *) cq_cb->kernel_address + cq_offset;
		requested_offset_record->cq_target_value = target_value;

		spin_unlock_irqrestore(wait_list_lock, flags);
		spin_unlock(wait_list_lock);
	}

	*pend = requested_offset_record;
@@ -3217,7 +3287,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	struct hl_user_pending_interrupt *pend;
	struct hl_mmap_mem_buf *buf;
	struct hl_cb *cq_cb;
	unsigned long timeout, flags;
	unsigned long timeout;
	long completion_rc;
	int rc = 0;

@@ -3264,7 +3334,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
		pend->cq_target_value = target_value;
	}

	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	spin_lock(&interrupt->wait_list_lock);

	/* We check for completion value as interrupt could have been received
	 * before we added the node to the wait list
@@ -3272,7 +3342,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	if (*pend->cq_kernel_addr >= target_value) {
		if (register_ts_record)
			pend->ts_reg_info.in_use = 0;
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		spin_unlock(&interrupt->wait_list_lock);

		*status = HL_WAIT_CS_STATUS_COMPLETED;

@@ -3284,7 +3354,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
			goto set_timestamp;
		}
	} else if (!timeout_us) {
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		spin_unlock(&interrupt->wait_list_lock);
		*status = HL_WAIT_CS_STATUS_BUSY;
		pend->fence.timestamp = ktime_get();
		goto set_timestamp;
@@ -3309,7 +3379,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
		pend->ts_reg_info.in_use = 1;

	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
	spin_unlock(&interrupt->wait_list_lock);

	if (register_ts_record) {
		rc = *status = HL_WAIT_CS_STATUS_COMPLETED;
@@ -3353,9 +3423,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
	 * for ts record, the node will be deleted in the irq handler after
	 * we reach the target value.
	 */
	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	spin_lock(&interrupt->wait_list_lock);
	list_del(&pend->wait_list_node);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
	spin_unlock(&interrupt->wait_list_lock);

set_timestamp:
	*timestamp = ktime_to_ns(pend->fence.timestamp);
@@ -3383,7 +3453,7 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
				u64 *timestamp)
{
	struct hl_user_pending_interrupt *pend;
	unsigned long timeout, flags;
	unsigned long timeout;
	u64 completion_value;
	long completion_rc;
	int rc = 0;
@@ -3403,9 +3473,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
	/* Add pending user interrupt to relevant list for the interrupt
	 * handler to monitor
	 */
	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	spin_lock(&interrupt->wait_list_lock);
	list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
	spin_unlock(&interrupt->wait_list_lock);

	/* We check for completion value as interrupt could have been received
	 * before we added the node to the wait list
@@ -3436,14 +3506,14 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
	 * If comparison fails, keep waiting until timeout expires
	 */
	if (completion_rc > 0) {
		spin_lock_irqsave(&interrupt->wait_list_lock, flags);
		spin_lock(&interrupt->wait_list_lock);
		/* reinit_completion must be called before we check for user
		 * completion value, otherwise, if interrupt is received after
		 * the comparison and before the next wait_for_completion,
		 * we will reach timeout and fail
		 */
		reinit_completion(&pend->fence.completion);
		spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
		spin_unlock(&interrupt->wait_list_lock);

		if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
			dev_err(hdev->dev, "Failed to copy completion value from user\n");
@@ -3480,9 +3550,9 @@ static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_
	}

remove_pending_user_interrupt:
	spin_lock_irqsave(&interrupt->wait_list_lock, flags);
	spin_lock(&interrupt->wait_list_lock);
	list_del(&pend->wait_list_node);
	spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
	spin_unlock(&interrupt->wait_list_lock);

	*timestamp = ktime_to_ns(pend->fence.timestamp);

+75 −67
Original line number Diff line number Diff line
@@ -258,7 +258,7 @@ static int vm_show(struct seq_file *s, void *data)
	if (!dev_entry->hdev->mmu_enable)
		return 0;

	spin_lock(&dev_entry->ctx_mem_hash_spinlock);
	mutex_lock(&dev_entry->ctx_mem_hash_mutex);

	list_for_each_entry(ctx, &dev_entry->ctx_mem_hash_list, debugfs_list) {
		once = false;
@@ -329,7 +329,7 @@ static int vm_show(struct seq_file *s, void *data)

	}

	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);

	ctx = hl_get_compute_ctx(dev_entry->hdev);
	if (ctx) {
@@ -1583,209 +1583,216 @@ static const struct file_operations hl_debugfs_fops = {
	.release = single_release,
};

static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry)
static void add_secured_nodes(struct hl_dbg_device_entry *dev_entry, struct dentry *root)
{
	debugfs_create_u8("i2c_bus",
				0644,
				dev_entry->root,
				root,
				&dev_entry->i2c_bus);

	debugfs_create_u8("i2c_addr",
				0644,
				dev_entry->root,
				root,
				&dev_entry->i2c_addr);

	debugfs_create_u8("i2c_reg",
				0644,
				dev_entry->root,
				root,
				&dev_entry->i2c_reg);

	debugfs_create_u8("i2c_len",
				0644,
				dev_entry->root,
				root,
				&dev_entry->i2c_len);

	debugfs_create_file("i2c_data",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_i2c_data_fops);

	debugfs_create_file("led0",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_led0_fops);

	debugfs_create_file("led1",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_led1_fops);

	debugfs_create_file("led2",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_led2_fops);
}

void hl_debugfs_add_device(struct hl_device *hdev)
static void add_files_to_device(struct hl_device *hdev, struct hl_dbg_device_entry *dev_entry,
				struct dentry *root)
{
	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
	int count = ARRAY_SIZE(hl_debugfs_list);
	struct hl_debugfs_entry *entry;
	int i;

	dev_entry->hdev = hdev;
	dev_entry->entry_arr = kmalloc_array(count,
					sizeof(struct hl_debugfs_entry),
					GFP_KERNEL);
	if (!dev_entry->entry_arr)
		return;

	dev_entry->data_dma_blob_desc.size = 0;
	dev_entry->data_dma_blob_desc.data = NULL;
	dev_entry->mon_dump_blob_desc.size = 0;
	dev_entry->mon_dump_blob_desc.data = NULL;

	INIT_LIST_HEAD(&dev_entry->file_list);
	INIT_LIST_HEAD(&dev_entry->cb_list);
	INIT_LIST_HEAD(&dev_entry->cs_list);
	INIT_LIST_HEAD(&dev_entry->cs_job_list);
	INIT_LIST_HEAD(&dev_entry->userptr_list);
	INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
	mutex_init(&dev_entry->file_mutex);
	init_rwsem(&dev_entry->state_dump_sem);
	spin_lock_init(&dev_entry->cb_spinlock);
	spin_lock_init(&dev_entry->cs_spinlock);
	spin_lock_init(&dev_entry->cs_job_spinlock);
	spin_lock_init(&dev_entry->userptr_spinlock);
	spin_lock_init(&dev_entry->ctx_mem_hash_spinlock);

	dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
						hl_debug_root);

	debugfs_create_x64("memory_scrub_val",
				0644,
				dev_entry->root,
				root,
				&hdev->memory_scrub_val);

	debugfs_create_file("memory_scrub",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_mem_scrub_fops);

	debugfs_create_x64("addr",
				0644,
				dev_entry->root,
				root,
				&dev_entry->addr);

	debugfs_create_file("data32",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_data32b_fops);

	debugfs_create_file("data64",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_data64b_fops);

	debugfs_create_file("set_power_state",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_power_fops);

	debugfs_create_file("device",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_device_fops);

	debugfs_create_file("clk_gate",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_clk_gate_fops);

	debugfs_create_file("stop_on_err",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_stop_on_err_fops);

	debugfs_create_file("dump_security_violations",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_security_violations_fops);

	debugfs_create_file("dump_razwi_events",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_razwi_check_fops);

	debugfs_create_file("dma_size",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_dma_size_fops);

	debugfs_create_blob("data_dma",
				0400,
				dev_entry->root,
				root,
				&dev_entry->data_dma_blob_desc);

	debugfs_create_file("monitor_dump_trig",
				0200,
				dev_entry->root,
				root,
				dev_entry,
				&hl_monitor_dump_fops);

	debugfs_create_blob("monitor_dump",
				0400,
				dev_entry->root,
				root,
				&dev_entry->mon_dump_blob_desc);

	debugfs_create_x8("skip_reset_on_timeout",
				0644,
				dev_entry->root,
				root,
				&hdev->reset_info.skip_reset_on_timeout);

	debugfs_create_file("state_dump",
				0600,
				dev_entry->root,
				root,
				dev_entry,
				&hl_state_dump_fops);

	debugfs_create_file("timeout_locked",
				0644,
				dev_entry->root,
				root,
				dev_entry,
				&hl_timeout_locked_fops);

	debugfs_create_u32("device_release_watchdog_timeout",
				0644,
				dev_entry->root,
				root,
				&hdev->device_release_watchdog_timeout_sec);

	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
		debugfs_create_file(hl_debugfs_list[i].name,
					0444,
					dev_entry->root,
					root,
					entry,
					&hl_debugfs_fops);
		entry->info_ent = &hl_debugfs_list[i];
		entry->dev_entry = dev_entry;
	}
}

void hl_debugfs_add_device(struct hl_device *hdev)
{
	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
	int count = ARRAY_SIZE(hl_debugfs_list);

	dev_entry->hdev = hdev;
	dev_entry->entry_arr = kmalloc_array(count,
					sizeof(struct hl_debugfs_entry),
					GFP_KERNEL);
	if (!dev_entry->entry_arr)
		return;

	dev_entry->data_dma_blob_desc.size = 0;
	dev_entry->data_dma_blob_desc.data = NULL;
	dev_entry->mon_dump_blob_desc.size = 0;
	dev_entry->mon_dump_blob_desc.data = NULL;

	INIT_LIST_HEAD(&dev_entry->file_list);
	INIT_LIST_HEAD(&dev_entry->cb_list);
	INIT_LIST_HEAD(&dev_entry->cs_list);
	INIT_LIST_HEAD(&dev_entry->cs_job_list);
	INIT_LIST_HEAD(&dev_entry->userptr_list);
	INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
	mutex_init(&dev_entry->file_mutex);
	init_rwsem(&dev_entry->state_dump_sem);
	spin_lock_init(&dev_entry->cb_spinlock);
	spin_lock_init(&dev_entry->cs_spinlock);
	spin_lock_init(&dev_entry->cs_job_spinlock);
	spin_lock_init(&dev_entry->userptr_spinlock);
	mutex_init(&dev_entry->ctx_mem_hash_mutex);

	dev_entry->root = debugfs_create_dir(dev_name(hdev->dev),
						hl_debug_root);

	add_files_to_device(hdev, dev_entry, dev_entry->root);
	if (!hdev->asic_prop.fw_security_enabled)
		add_secured_nodes(dev_entry);
		add_secured_nodes(dev_entry, dev_entry->root);
}

void hl_debugfs_remove_device(struct hl_device *hdev)
@@ -1795,6 +1802,7 @@ void hl_debugfs_remove_device(struct hl_device *hdev)

	debugfs_remove_recursive(entry->root);

	mutex_destroy(&entry->ctx_mem_hash_mutex);
	mutex_destroy(&entry->file_mutex);

	vfree(entry->data_dma_blob_desc.data);
@@ -1901,18 +1909,18 @@ void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
{
	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;

	spin_lock(&dev_entry->ctx_mem_hash_spinlock);
	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
	list_add(&ctx->debugfs_list, &dev_entry->ctx_mem_hash_list);
	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
}

void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
{
	struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;

	spin_lock(&dev_entry->ctx_mem_hash_spinlock);
	mutex_lock(&dev_entry->ctx_mem_hash_mutex);
	list_del(&ctx->debugfs_list);
	spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
	mutex_unlock(&dev_entry->ctx_mem_hash_mutex);
}

/**
+16 −6
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ static void dec_print_abnrm_intr_source(struct hl_device *hdev, u32 irq_status)
static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_id)
{
	bool reset_required = false;
	u32 irq_status;
	u32 irq_status, event_mask;

	irq_status = RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);

@@ -54,17 +54,27 @@ static void dec_error_intr_work(struct hl_device *hdev, u32 base_addr, u32 core_

	dec_print_abnrm_intr_source(hdev, irq_status);

	if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK)
		reset_required = true;

	/* Clear the interrupt */
	WREG32(base_addr + VCMD_IRQ_STATUS_OFFSET, irq_status);

	/* Flush the interrupt clear */
	RREG32(base_addr + VCMD_IRQ_STATUS_OFFSET);

	if (reset_required)
		hl_device_reset(hdev, HL_DRV_RESET_HARD);
	if (irq_status & VCMD_IRQ_STATUS_TIMEOUT_MASK) {
		reset_required = true;
		event_mask = HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
	} else if (irq_status & VCMD_IRQ_STATUS_CMDERR_MASK) {
		event_mask = HL_NOTIFIER_EVENT_UNDEFINED_OPCODE;
	} else {
		event_mask = HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
	}

	if (reset_required) {
		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
		hl_device_cond_reset(hdev, 0, event_mask);
	} else {
		hl_notifier_event_send_all(hdev, event_mask);
	}
}

static void dec_completion_abnrm(struct work_struct *work)
+206 −109

File changed.

Preview size limit exceeded, changes collapsed.

+1 −1
Original line number Diff line number Diff line
@@ -3152,7 +3152,7 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in
int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode,
						dma_addr_t buff, u32 *size)
{
	struct cpucp_packet pkt = {0};
	struct cpucp_packet pkt = {};
	u64 result;
	int rc = 0;

Loading