Commit ce80098d authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay
Browse files

habanalabs: support hard-reset scheduling during soft-reset



As hard-reset can be requested during soft-reset, driver must allow
it or else critical events received during soft-reset will be
ignored.

Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 42eb2872
Loading
Loading
Loading
Loading
+28 −3
Original line number Diff line number Diff line
@@ -978,7 +978,7 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
int hl_device_reset(struct hl_device *hdev, u32 flags)
{
	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
								reset_upon_device_release = false;
			reset_upon_device_release = false, schedule_hard_reset = false;
	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
	struct hl_ctx *ctx;
	int i, rc;
@@ -1031,6 +1031,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		/* Block future CS/VM/JOB completion operations */
		spin_lock(&hdev->reset_info.lock);
		if (hdev->reset_info.in_reset) {
			/* We only allow scheduling of a hard reset during soft reset */
			if (hard_reset && hdev->reset_info.is_in_soft_reset)
				hdev->reset_info.hard_reset_schedule_flags = flags;
			spin_unlock(&hdev->reset_info.lock);
			return 0;
		}
@@ -1193,7 +1196,6 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
	 * is required for the initialization itself
	 */
	hdev->disabled = false;
	hdev->reset_info.is_in_soft_reset = false;

	rc = hdev->asic_funcs->hw_init(hdev);
	if (rc) {
@@ -1243,7 +1245,20 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		}
	}

	spin_lock(&hdev->reset_info.lock);
	hdev->reset_info.is_in_soft_reset = false;

	/* Schedule hard reset only if requested and if not already in hard reset.
	 * We keep 'in_reset' enabled, so no other reset can go in during the hard
	 * reset schedule
	 */
	if (!hard_reset && hdev->reset_info.hard_reset_schedule_flags)
		schedule_hard_reset = true;
	else
		hdev->reset_info.in_reset = 0;

	spin_unlock(&hdev->reset_info.lock);

	hdev->reset_info.needs_reset = false;

	dev_notice(hdev->dev, "Successfully finished resetting the device\n");
@@ -1261,6 +1276,16 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
		hdev->reset_info.soft_reset_cnt++;
	}

	if (schedule_hard_reset) {
		dev_info(hdev->dev, "Performing hard reset scheduled during soft reset\n");
		flags = hdev->reset_info.hard_reset_schedule_flags;
		hdev->reset_info.hard_reset_schedule_flags = 0;
		hdev->disabled = true;
		hard_reset = true;
		handle_reset_trigger(hdev, flags);
		goto again;
	}

	return 0;

out_err:
+3 −0
Original line number Diff line number Diff line
@@ -2460,6 +2460,8 @@ struct last_error_session_info {
 * @lock: lock to protect critical reset flows.
 * @soft_reset_cnt: number of soft reset since the driver was loaded.
 * @hard_reset_cnt: number of hard reset since the driver was loaded.
 * @hard_reset_schedule_flags: hard reset is scheduled to after current soft reset,
 *                             here we hold the hard reset flags.
 * @in_reset: is device in reset flow.
 * @is_in_soft_reset: Device is currently in soft reset process.
 * @needs_reset: true if reset_on_lockup is false and device should be reset
@@ -2478,6 +2480,7 @@ struct hl_reset_info {
	spinlock_t	lock;
	u32		soft_reset_cnt;
	u32		hard_reset_cnt;
	u32		hard_reset_schedule_flags;
	u8		in_reset;
	u8		is_in_soft_reset;
	u8		needs_reset;