Commit 672ae26c authored by Kashyap Desai's avatar Kashyap Desai Committed by Martin K. Petersen
Browse files

scsi: mpi3mr: Add support for internal watchdog thread

The watchdog thread is the driver's internal thread which does a few things
such as detecting firmware faults, resetting the controller, performing
timestamp sync, etc.

Link: https://lore.kernel.org/r/20210520152545.2710479-6-kashyap.desai@broadcom.com


Cc: sathya.prakash@broadcom.com
Reviewed-by: default avatarHannes Reinecke <hare@suse.de>
Reviewed-by: default avatarTomas Henzl <thenzl@redhat.com>
Reviewed-by: default avatarHimanshu Madhani <himanshu.madhani@oracle.com>
Signed-off-by: default avatarKashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
parent 023ab2a9
Loading
Loading
Loading
Loading
+11 −0
Original line number Diff line number Diff line
@@ -481,6 +481,10 @@ struct scmd_priv {
 * @sense_buf_q_dma: Sense buffer queue DMA address
 * @sbq_lock: Sense buffer queue lock
 * @sbq_host_index: Sense buffer queuehost index
 * @watchdog_work_q_name: Fault watchdog worker thread name
 * @watchdog_work_q: Fault watchdog worker thread
 * @watchdog_work: Fault watchdog work
 * @watchdog_lock: Fault watchdog lock
 * @is_driver_loading: Is driver still loading
 * @scan_started: Async scan started
 * @scan_failed: Asycn scan failed
@@ -494,6 +498,7 @@ struct scmd_priv {
 * @chain_buf_lock: Chain buffer list lock
 * @reset_in_progress: Reset in progress flag
 * @unrecoverable: Controller unrecoverable flag
 * @diagsave_timeout: Diagnostic information save timeout
 * @logging_level: Controller debug logging level
 * @current_event: Firmware event currently in process
 * @driver_info: Driver, Kernel, OS information to firmware
@@ -575,6 +580,11 @@ struct mpi3mr_ioc {
	spinlock_t sbq_lock;
	u32 sbq_host_index;

	char watchdog_work_q_name[20];
	struct workqueue_struct *watchdog_work_q;
	struct delayed_work watchdog_work;
	spinlock_t watchdog_lock;

	u8 is_driver_loading;
	u8 scan_started;
	u16 scan_failed;
@@ -592,6 +602,7 @@ struct mpi3mr_ioc {
	u8 reset_in_progress;
	u8 unrecoverable;

	u16 diagsave_timeout;
	int logging_level;

	struct mpi3mr_fwevt *current_event;
+125 −0
Original line number Diff line number Diff line
@@ -1463,6 +1463,129 @@ int mpi3mr_op_request_post(struct mpi3mr_ioc *mrioc,
	return retval;
}

/**
 * mpi3mr_watchdog_work - watchdog thread to monitor faults
 * @work: work struct
 *
 * Watch dog work periodically executed (1 second interval) to
 * monitor firmware fault and to issue periodic timer sync to
 * the firmware.
 *
 * Return: Nothing.
 */
static void mpi3mr_watchdog_work(struct work_struct *work)
{
	struct mpi3mr_ioc *mrioc =
	    container_of(work, struct mpi3mr_ioc, watchdog_work.work);
	unsigned long flags;
	enum mpi3mr_iocstate ioc_state;
	u32 fault, host_diagnostic;

	/*Check for fault state every one second and issue Soft reset*/
	ioc_state = mpi3mr_get_iocstate(mrioc);
	if (ioc_state == MRIOC_STATE_FAULT) {
		fault = readl(&mrioc->sysif_regs->fault) &
		    MPI3_SYSIF_FAULT_CODE_MASK;
		host_diagnostic = readl(&mrioc->sysif_regs->host_diagnostic);
		if (host_diagnostic & MPI3_SYSIF_HOST_DIAG_SAVE_IN_PROGRESS) {
			if (!mrioc->diagsave_timeout) {
				mpi3mr_print_fault_info(mrioc);
				ioc_warn(mrioc, "Diag save in progress\n");
			}
			if ((mrioc->diagsave_timeout++) <=
			    MPI3_SYSIF_DIAG_SAVE_TIMEOUT)
				goto schedule_work;
		} else
			mpi3mr_print_fault_info(mrioc);
		mrioc->diagsave_timeout = 0;

		if (fault == MPI3_SYSIF_FAULT_CODE_FACTORY_RESET) {
			ioc_info(mrioc,
			    "Factory Reset fault occurred marking controller as unrecoverable"
			    );
			mrioc->unrecoverable = 1;
			goto out;
		}

		if ((fault == MPI3_SYSIF_FAULT_CODE_DIAG_FAULT_RESET) ||
		    (fault == MPI3_SYSIF_FAULT_CODE_SOFT_RESET_IN_PROGRESS) ||
		    (mrioc->reset_in_progress))
			goto out;
		if (fault == MPI3_SYSIF_FAULT_CODE_CI_ACTIVATION_RESET)
			mpi3mr_soft_reset_handler(mrioc,
			    MPI3MR_RESET_FROM_CIACTIV_FAULT, 0);
		else
			mpi3mr_soft_reset_handler(mrioc,
			    MPI3MR_RESET_FROM_FAULT_WATCH, 0);
	}

schedule_work:
	spin_lock_irqsave(&mrioc->watchdog_lock, flags);
	if (mrioc->watchdog_work_q)
		queue_delayed_work(mrioc->watchdog_work_q,
		    &mrioc->watchdog_work,
		    msecs_to_jiffies(MPI3MR_WATCHDOG_INTERVAL));
	spin_unlock_irqrestore(&mrioc->watchdog_lock, flags);
out:
	return;
}

/**
 * mpi3mr_start_watchdog - Start watchdog
 * @mrioc: Adapter instance reference
 *
 * Create and start the watchdog thread to monitor controller
 * faults.
 *
 * Return: Nothing.
 */
void mpi3mr_start_watchdog(struct mpi3mr_ioc *mrioc)
{
	if (mrioc->watchdog_work_q)
		return;

	INIT_DELAYED_WORK(&mrioc->watchdog_work, mpi3mr_watchdog_work);
	snprintf(mrioc->watchdog_work_q_name,
	    sizeof(mrioc->watchdog_work_q_name), "watchdog_%s%d", mrioc->name,
	    mrioc->id);
	mrioc->watchdog_work_q =
	    create_singlethread_workqueue(mrioc->watchdog_work_q_name);
	if (!mrioc->watchdog_work_q) {
		ioc_err(mrioc, "%s: failed (line=%d)\n", __func__, __LINE__);
		return;
	}

	if (mrioc->watchdog_work_q)
		queue_delayed_work(mrioc->watchdog_work_q,
		    &mrioc->watchdog_work,
		    msecs_to_jiffies(MPI3MR_WATCHDOG_INTERVAL));
}

/**
 * mpi3mr_stop_watchdog - Stop watchdog
 * @mrioc: Adapter instance reference
 *
 * Stop the watchdog thread created to monitor controller
 * faults.
 *
 * Return: Nothing.
 */
void mpi3mr_stop_watchdog(struct mpi3mr_ioc *mrioc)
{
	unsigned long flags;
	struct workqueue_struct *wq;

	spin_lock_irqsave(&mrioc->watchdog_lock, flags);
	wq = mrioc->watchdog_work_q;
	mrioc->watchdog_work_q = NULL;
	spin_unlock_irqrestore(&mrioc->watchdog_lock, flags);
	if (wq) {
		if (!cancel_delayed_work_sync(&mrioc->watchdog_work))
			flush_workqueue(wq);
		destroy_workqueue(wq);
	}
}

/**
 * mpi3mr_setup_admin_qpair - Setup admin queue pair
 * @mrioc: Adapter instance reference
@@ -2609,6 +2732,8 @@ void mpi3mr_cleanup_ioc(struct mpi3mr_ioc *mrioc)
{
	enum mpi3mr_iocstate ioc_state;

	mpi3mr_stop_watchdog(mrioc);

	mpi3mr_ioc_disable_intr(mrioc);

	ioc_state = mpi3mr_get_iocstate(mrioc);
+4 −1
Original line number Diff line number Diff line
@@ -559,6 +559,7 @@ static int mpi3mr_scan_finished(struct Scsi_Host *shost,
	if (mrioc->scan_started)
		return 0;
	ioc_info(mrioc, "%s :port enable: SUCCESS\n", __func__);
	mpi3mr_start_watchdog(mrioc);
	mrioc->is_driver_loading = 0;

	return 1;
@@ -843,9 +844,11 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id)
	spin_lock_init(&mrioc->admin_req_lock);
	spin_lock_init(&mrioc->reply_free_queue_lock);
	spin_lock_init(&mrioc->sbq_lock);
	spin_lock_init(&mrioc->watchdog_lock);
	spin_lock_init(&mrioc->chain_buf_lock);

	mpi3mr_init_drv_cmd(&mrioc->init_cmds, MPI3MR_HOSTTAG_INITCMDS);

	if (pdev->revision)
		mrioc->enable_segqueue = true;