Commit b2b9a8d7 authored by Shannon Nelson's avatar Shannon Nelson Committed by David S. Miller
Browse files

ionic: avoid races in ionic_heartbeat_check



Rework the heartbeat checks to be sure that we're getting an
atomic operation.  Through testing we found occasions where a
separate thread could clash with this check and cause erroneous
heartbeat check results.

Signed-off-by: default avatarAllen Hubbe <allenbh@pensando.io>
Signed-off-by: default avatarShannon Nelson <snelson@pensando.io>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 230efff4
Loading
Loading
Loading
Loading
+58 −35
Original line number Diff line number Diff line
@@ -24,6 +24,9 @@ static void ionic_watchdog_cb(struct timer_list *t)
		return;

	hb = ionic_heartbeat_check(ionic);
	dev_dbg(ionic->dev, "%s: hb %d running %d UP %d\n",
		__func__, hb, netif_running(lif->netdev),
		test_bit(IONIC_LIF_F_UP, lif->state));

	if (hb >= 0 &&
	    !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
@@ -91,9 +94,17 @@ int ionic_dev_setup(struct ionic *ionic)
		return -EFAULT;
	}

	idev->last_fw_status = 0xff;
	timer_setup(&ionic->watchdog_timer, ionic_watchdog_cb, 0);
	ionic->watchdog_period = IONIC_WATCHDOG_SECS * HZ;

	/* set times to ensure the first check will proceed */
	atomic_long_set(&idev->last_check_time, jiffies - 2 * HZ);
	idev->last_hb_time = jiffies - 2 * ionic->watchdog_period;
	/* init as ready, so no transition if the first check succeeds */
	idev->last_fw_hb = 0;
	idev->fw_hb_ready = true;
	idev->fw_status_ready = true;

	mod_timer(&ionic->watchdog_timer,
		  round_jiffies(jiffies + ionic->watchdog_period));

@@ -107,29 +118,38 @@ int ionic_dev_setup(struct ionic *ionic)
int ionic_heartbeat_check(struct ionic *ionic)
{
	struct ionic_dev *idev = &ionic->idev;
	unsigned long hb_time;
	unsigned long check_time, last_check_time;
	bool fw_status_ready, fw_hb_ready;
	u8 fw_status;
	u32 hb;
	u32 fw_hb;

	/* wait a little more than one second before testing again */
	hb_time = jiffies;
	if (time_before(hb_time, (idev->last_hb_time + ionic->watchdog_period)))
	/* wait a least one second before testing again */
	check_time = jiffies;
	last_check_time = atomic_long_read(&idev->last_check_time);
do_check_time:
	if (time_before(check_time, last_check_time + HZ))
		return 0;
	if (!atomic_long_try_cmpxchg_relaxed(&idev->last_check_time,
					     &last_check_time, check_time)) {
		/* if called concurrently, only the first should proceed. */
		dev_dbg(ionic->dev, "%s: do_check_time again\n", __func__);
		goto do_check_time;
	}

	/* firmware is useful only if the running bit is set and
	 * fw_status != 0xff (bad PCI read)
	 */
	fw_status = ioread8(&idev->dev_info_regs->fw_status);
	if (fw_status != 0xff)
		fw_status &= IONIC_FW_STS_F_RUNNING;  /* use only the run bit */
	fw_status_ready = (fw_status != 0xff) && (fw_status & IONIC_FW_STS_F_RUNNING);

	/* is this a transition? */
	if (fw_status != idev->last_fw_status &&
	    idev->last_fw_status != 0xff) {
	if (fw_status_ready != idev->fw_status_ready) {
		struct ionic_lif *lif = ionic->lif;
		bool trigger = false;

		if (!fw_status || fw_status == 0xff) {
		idev->fw_status_ready = fw_status_ready;

		if (!fw_status_ready) {
			dev_info(ionic->dev, "FW stopped %u\n", fw_status);
			if (lif && !test_bit(IONIC_LIF_F_FW_RESET, lif->state))
				trigger = true;
@@ -143,44 +163,47 @@ int ionic_heartbeat_check(struct ionic *ionic)
			struct ionic_deferred_work *work;

			work = kzalloc(sizeof(*work), GFP_ATOMIC);
			if (!work) {
				dev_err(ionic->dev, "LIF reset trigger dropped\n");
			} else {
			if (work) {
				work->type = IONIC_DW_TYPE_LIF_RESET;
				if (fw_status & IONIC_FW_STS_F_RUNNING &&
				    fw_status != 0xff)
					work->fw_status = 1;
				work->fw_status = fw_status_ready;
				ionic_lif_deferred_enqueue(&lif->deferred, work);
			}
		}
	}
	idev->last_fw_status = fw_status;

	if (!fw_status || fw_status == 0xff)
	if (!fw_status_ready)
		return -ENXIO;

	/* early FW has no heartbeat, else FW will return non-zero */
	hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
	if (!hb)
	/* wait at least one watchdog period since the last heartbeat */
	last_check_time = idev->last_hb_time;
	if (time_before(check_time, last_check_time + ionic->watchdog_period))
		return 0;

	/* are we stalled? */
	if (hb == idev->last_hb) {
		/* only complain once for each stall seen */
		if (idev->last_hb_time != 1) {
			dev_info(ionic->dev, "FW heartbeat stalled at %d\n",
				 idev->last_hb);
			idev->last_hb_time = 1;
		}
	fw_hb = ioread32(&idev->dev_info_regs->fw_heartbeat);
	fw_hb_ready = fw_hb != idev->last_fw_hb;

		return -ENXIO;
	/* early FW version had no heartbeat, so fake it */
	if (!fw_hb_ready && !fw_hb)
		fw_hb_ready = true;

	dev_dbg(ionic->dev, "%s: fw_hb %u last_fw_hb %u ready %u\n",
		__func__, fw_hb, idev->last_fw_hb, fw_hb_ready);

	idev->last_fw_hb = fw_hb;

	/* log a transition */
	if (fw_hb_ready != idev->fw_hb_ready) {
		idev->fw_hb_ready = fw_hb_ready;
		if (!fw_hb_ready)
			dev_info(ionic->dev, "FW heartbeat stalled at %d\n", fw_hb);
		else
			dev_info(ionic->dev, "FW heartbeat restored at %d\n", fw_hb);
	}

	if (idev->last_hb_time == 1)
		dev_info(ionic->dev, "FW heartbeat restored at %d\n", hb);
	if (!fw_hb_ready)
		return -ENXIO;

	idev->last_hb = hb;
	idev->last_hb_time = hb_time;
	idev->last_hb_time = check_time;

	return 0;
}
+5 −2
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@
#ifndef _IONIC_DEV_H_
#define _IONIC_DEV_H_

#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/workqueue.h>

@@ -135,9 +136,11 @@ struct ionic_dev {
	union ionic_dev_info_regs __iomem *dev_info_regs;
	union ionic_dev_cmd_regs __iomem *dev_cmd_regs;

	atomic_long_t last_check_time;
	unsigned long last_hb_time;
	u32 last_hb;
	u8 last_fw_status;
	u32 last_fw_hb;
	bool fw_hb_ready;
	bool fw_status_ready;

	u64 __iomem *db_pages;
	dma_addr_t phy_db_pages;