Commit 4be9fb53 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay
Browse files

habanalabs: add debugfs node for configuring CS timeout



Command submission timeout is currently determined during driver
loading time. As some environments requires this timeout to be
modified in runtime, we introduce a new debugfs node that controls
the timeout value without the need to reload the driver.

Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 511c1957
Loading
Loading
Loading
Loading
+6 −0
Original line number Diff line number Diff line
@@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
                Writing an integer X discards X state dumps, so that the
                next read would return X+1-st newest state dump.

What:           /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date:           Sep 2021
KernelVersion:  5.16
Contact:        obitton@habana.ai
Description:    Sets the command submission timeout value in seconds.

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
+51 −0
Original line number Diff line number Diff line
@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
	return count;
}

static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
					size_t count, loff_t *ppos)
{
	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
	struct hl_device *hdev = entry->hdev;
	char tmp_buf[200];
	ssize_t rc;

	if (*ppos)
		return 0;

	sprintf(tmp_buf, "%d\n",
		jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
	rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
			strlen(tmp_buf) + 1);

	return rc;
}

static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
				     size_t count, loff_t *ppos)
{
	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
	struct hl_device *hdev = entry->hdev;
	u32 value;
	ssize_t rc;

	rc = kstrtouint_from_user(buf, count, 10, &value);
	if (rc)
		return rc;

	if (value)
		hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
	else
		hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;

	return count;
}

static const struct file_operations hl_data32b_fops = {
	.owner = THIS_MODULE,
	.read = hl_data_read32,
@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
	.write = hl_state_dump_write
};

static const struct file_operations hl_timeout_locked_fops = {
	.owner = THIS_MODULE,
	.read = hl_timeout_locked_read,
	.write = hl_timeout_locked_write
};

static const struct hl_info_list hl_debugfs_list[] = {
	{"command_buffers", command_buffers_show, NULL},
	{"command_submission", command_submission_show, NULL},
@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
				dev_entry,
				&hl_state_dump_fops);

	debugfs_create_file("timeout_locked",
				0644,
				dev_entry->root,
				dev_entry,
				&hl_timeout_locked_fops);

	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
		debugfs_create_file(hl_debugfs_list[i].name,
					0444,