Commit 8254ee0e authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2021-06-22' of...

Merge tag 'misc-habanalabs-next-2021-06-22' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.14:

- Change communication protocol with f/w. The new protocl allows better
  backward compatibility between different f/w versions and is more
  stable.
- Send hard-reset cause to f/w after a hard-reset has happened.
- Move to indirection when generating interrupts to f/w.
- Better progress and error messages during the f/w load stage.
- Recognize that f/w is with enabled security according to device ID.
- Add validity check to event queue mechanism.
- Add new event from f/w that will indicate a daemon has been terminated
  inside the f/w.

- Move to TLB cache range invalidation in the device's MMU.
- Disable memory scrubbing by default for performance.

- Many fixes for sparse/smatch reported errors.
- Enable by default stop-on-err in the ASIC.
- Move to ASYNC device probing to speedup loading of driver in server
  with multiple devices.
- Fix to stop using disabled NIC ports when doing collective operation.
- Use standard error codes instead of positive values.
- Add support for resetting device after user has finished using it.
- Add debugfs option to avoid reset when a CS has got stuck.
- Add print of the last 8 CS pointers in case of error in QMANs.
- Add statistics on opening of the FD of a device.

* tag 'misc-habanalabs-next-2021-06-22' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (72 commits)
  habanalabs/gaudi: refactor hard-reset related code
  habanalabs/gaudi: add support for NIC DERR
  habanalabs: add validity check for signal cs
  habanalabs: get lower/upper 32 bits via masking
  habanalabs: allow reset upon device release
  debugfs: add skip_reset_on_timeout option
  habanalabs: fix typo
  habanalabs/gaudi: correct driver events numbering
  habanalabs: remove a rogue #ifdef
  habanalabs/gaudi: print last QM PQEs on error
  habanalabs/goya: add '__force' attribute to suppress false alarm
  habanalabs: added open_stats info ioctl
  habanalabs/gaudi: set the correct rc in case of err
  habanalabs/gaudi: update coresight configuration
  habanalabs: remove node from list before freeing the node
  habanalabs: set rc as 'valid' in case of intentional func exit
  habanalabs: zero complex structures using memset
  habanalabs: print more info when failing to pin user memory
  habanalabs: Fix an error handling path in 'hl_pci_probe()'
  habanalabs: print firmware versions
  ...
parents 1730a594 b7a71fdd
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -207,6 +207,14 @@ Contact: ogabbay@kernel.org
Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                for D3Hot

What:           /sys/kernel/debug/habanalabs/hl<n>/skip_reset_on_timeout
Date:           Jun 2021
KernelVersion:  5.13
Contact:        ynudelman@habana.ai
Description:    Sets the skip reset on timeout option for the device. Value of
                "0" means device will be reset in case some CS has timed out,
                otherwise it will not be reset.

What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date:           Mar 2020
KernelVersion:  5.6
+76 −5
Original line number Diff line number Diff line
@@ -556,6 +556,13 @@ static void cs_do_release(struct kref *ref)
	else if (!cs->submitted)
		cs->fence->error = -EBUSY;

	if (unlikely(cs->skip_reset_on_timeout)) {
		dev_err(hdev->dev,
			"Command submission %llu completed after %llu (s)\n",
			cs->sequence,
			div_u64(jiffies - cs->submission_time_jiffies, HZ));
	}

	if (cs->timestamp)
		cs->fence->timestamp = ktime_get();
	complete_all(&cs->fence->completion);
@@ -571,6 +578,8 @@ static void cs_timedout(struct work_struct *work)
	int rc;
	struct hl_cs *cs = container_of(work, struct hl_cs,
						 work_tdr.work);
	bool skip_reset_on_timeout = cs->skip_reset_on_timeout;

	rc = cs_get_unless_zero(cs);
	if (!rc)
		return;
@@ -581,6 +590,7 @@ static void cs_timedout(struct work_struct *work)
	}

	/* Mark the CS is timed out so we won't try to cancel its TDR */
	if (likely(!skip_reset_on_timeout))
		cs->timedout = true;

	hdev = cs->ctx->hdev;
@@ -613,11 +623,13 @@ static void cs_timedout(struct work_struct *work)

	cs_put(cs);

	if (likely(!skip_reset_on_timeout)) {
		if (hdev->reset_on_lockup)
		hl_device_reset(hdev, 0);
			hl_device_reset(hdev, HL_RESET_TDR);
		else
			hdev->needs_reset = true;
	}
}

static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
			enum hl_cs_type cs_type, u64 user_sequence,
@@ -650,6 +662,10 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
	cs->type = cs_type;
	cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
	cs->timeout_jiffies = timeout;
	cs->skip_reset_on_timeout =
		hdev->skip_reset_on_timeout ||
		!!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT);
	cs->submission_time_jiffies = jiffies;
	INIT_LIST_HEAD(&cs->job_list);
	INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
	kref_init(&cs->refcount);
@@ -1481,6 +1497,61 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
	return rc;
}

/*
 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
 * if the SOB value reaches the max value move to the other SOB reserved
 * to the queue.
 * Note that this function must be called while hw_queues_lock is taken.
 */
int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
			struct hl_hw_sob **hw_sob, u32 count)
{
	struct hl_sync_stream_properties *prop;
	struct hl_hw_sob *sob = *hw_sob, *other_sob;
	u8 other_sob_offset;

	prop = &hdev->kernel_queues[q_idx].sync_stream_prop;

	kref_get(&sob->kref);

	/* check for wraparound */
	if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
		/*
		 * Decrement as we reached the max value.
		 * The release function won't be called here as we've
		 * just incremented the refcount right before calling this
		 * function.
		 */
		kref_put(&sob->kref, hl_sob_reset_error);

		/*
		 * check the other sob value, if it still in use then fail
		 * otherwise make the switch
		 */
		other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS;
		other_sob = &prop->hw_sob[other_sob_offset];

		if (kref_read(&other_sob->kref) != 1) {
			dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n",
								q_idx);
			return -EINVAL;
		}

		prop->next_sob_val = 1;

		/* only two SOBs are currently in use */
		prop->curr_sob_offset = other_sob_offset;
		*hw_sob = other_sob;

		dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
				prop->curr_sob_offset, q_idx);
	} else {
		prop->next_sob_val += count;
	}

	return 0;
}

static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
		struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
{
+0 −9
Original line number Diff line number Diff line
@@ -12,7 +12,6 @@
static void hl_ctx_fini(struct hl_ctx *ctx)
{
	struct hl_device *hdev = ctx->hdev;
	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
	int i;

	/* Release all allocated pending cb's, those cb's were never
@@ -57,14 +56,6 @@ static void hl_ctx_fini(struct hl_ctx *ctx)

		/* Scrub both SRAM and DRAM */
		hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);

		if ((!hdev->pldm) && (hdev->pdev) &&
				(!hdev->asic_funcs->is_device_idle(hdev,
					idle_mask,
					HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
			dev_notice(hdev->dev,
					"device not idle after user context is closed (0x%llx, 0x%llx)\n",
						idle_mask[0], idle_mask[1]);
	} else {
		dev_dbg(hdev->dev, "closing kernel context\n");
		hdev->asic_funcs->ctx_fini(ctx);
+5 −0
Original line number Diff line number Diff line
@@ -1278,6 +1278,11 @@ void hl_debugfs_add_device(struct hl_device *hdev)
				dev_entry->root,
				&dev_entry->blob_desc);

	debugfs_create_x8("skip_reset_on_timeout",
				0644,
				dev_entry->root,
				&hdev->skip_reset_on_timeout);

	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
		debugfs_create_file(hl_debugfs_list[i].name,
					0444,
+67 −15
Original line number Diff line number Diff line
@@ -51,6 +51,8 @@ bool hl_device_operational(struct hl_device *hdev,

static void hpriv_release(struct kref *ref)
{
	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
	bool device_is_idle = true;
	struct hl_fpriv *hpriv;
	struct hl_device *hdev;

@@ -71,8 +73,20 @@ static void hpriv_release(struct kref *ref)

	kfree(hpriv);

	if (hdev->reset_upon_device_release)
		hl_device_reset(hdev, 0);
	if ((!hdev->pldm) && (hdev->pdev) &&
			(!hdev->asic_funcs->is_device_idle(hdev,
				idle_mask,
				HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
		dev_err(hdev->dev,
			"device not idle after user context is closed (0x%llx_%llx)\n",
			idle_mask[1], idle_mask[0]);

		device_is_idle = false;
	}

	if ((hdev->reset_if_device_not_idle && !device_is_idle)
			|| hdev->reset_upon_device_release)
		hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
}

void hl_hpriv_get(struct hl_fpriv *hpriv)
@@ -118,6 +132,9 @@ static int hl_device_release(struct inode *inode, struct file *filp)
		dev_warn(hdev->dev,
			"Device is still in use because there are live CS and/or memory mappings\n");

	hdev->last_open_session_duration_jif =
		jiffies - hdev->last_successful_open_jif;

	return 0;
}

@@ -868,7 +885,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
	bool hard_reset, from_hard_reset_thread;
	bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
	int i, rc;

	if (!hdev->init_done) {
@@ -880,11 +897,28 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	hard_reset = (flags & HL_RESET_HARD) != 0;
	from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;

	if ((!hard_reset) && (!hdev->supports_soft_reset)) {
		dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
	if (!hard_reset && !hdev->supports_soft_reset) {
		hard_instead_soft = true;
		hard_reset = true;
	}

	if (hdev->reset_upon_device_release &&
			(flags & HL_RESET_DEVICE_RELEASE)) {
		dev_dbg(hdev->dev,
			"Perform %s-reset upon device release\n",
			hard_reset ? "hard" : "soft");
		goto do_reset;
	}

	if (!hard_reset && !hdev->allow_external_soft_reset) {
		hard_instead_soft = true;
		hard_reset = true;
	}

	if (hard_instead_soft)
		dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");

do_reset:
	/* Re-entry of reset thread */
	if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
		goto kill_processes;
@@ -900,6 +934,19 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		if (rc)
			return 0;

		/*
		 * 'reset cause' is being updated here, because getting here
		 * means that it's the 1st time and the last time we're here
		 * ('in_reset' makes sure of it). This makes sure that
		 * 'reset_cause' will continue holding its 1st recorded reason!
		 */
		if (flags & HL_RESET_HEARTBEAT)
			hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
		else if (flags & HL_RESET_TDR)
			hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
		else
			hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;

		/*
		 * if reset is due to heartbeat, device CPU is no responsive in
		 * which case no point sending PCI disable message to it
@@ -943,9 +990,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		hdev->process_kill_trial_cnt = 0;

		/*
		 * Because the reset function can't run from interrupt or
		 * from heartbeat work, we need to call the reset function
		 * from a dedicated work
		 * Because the reset function can't run from heartbeat work,
		 * we need to call the reset function from a dedicated work.
		 */
		queue_delayed_work(hdev->device_reset_work.wq,
			&hdev->device_reset_work.reset_work, 0);
@@ -1096,8 +1142,8 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
		dev_err(hdev->dev,
			"device is not idle (mask %#llx %#llx) after reset\n",
			idle_mask[0], idle_mask[1]);
			"device is not idle (mask 0x%llx_%llx) after reset\n",
			idle_mask[1], idle_mask[0]);
		rc = -EIO;
		goto out_err;
	}
@@ -1334,8 +1380,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
	}

	/*
	 * From this point, in case of an error, add char devices and create
	 * sysfs nodes as part of the error flow, to allow debugging.
	 * From this point, override rc (=0) in case of an error to allow
	 * debugging (by adding char devices and create sysfs nodes as part of
	 * the error flow).
	 */
	add_cdev_sysfs_on_err = true;

@@ -1369,7 +1416,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)

	dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
		hdev->asic_name,
		hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
		hdev->asic_prop.dram_size / SZ_1G);

	rc = hl_vm_init(hdev);
	if (rc) {
@@ -1475,6 +1522,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
void hl_device_fini(struct hl_device *hdev)
{
	ktime_t timeout;
	u64 reset_sec;
	int i, rc;

	dev_info(hdev->dev, "Removing device\n");
@@ -1482,6 +1530,11 @@ void hl_device_fini(struct hl_device *hdev)
	hdev->device_fini_pending = 1;
	flush_delayed_work(&hdev->device_reset_work.reset_work);

	if (hdev->pldm)
		reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
	else
		reset_sec = HL_HARD_RESET_MAX_TIMEOUT;

	/*
	 * This function is competing with the reset function, so try to
	 * take the reset atomic and if we are already in middle of reset,
@@ -1490,8 +1543,7 @@ void hl_device_fini(struct hl_device *hdev)
	 * ports, the hard reset could take between 10-30 seconds
	 */

	timeout = ktime_add_us(ktime_get(),
				HL_HARD_RESET_MAX_TIMEOUT * 1000 * 1000);
	timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
	rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
	while (rc) {
		usleep_range(50, 200);
Loading