Commit f0c875ff authored by Kashyap Desai's avatar Kashyap Desai Committed by Leon Romanovsky
Browse files

RDMA/bnxt_re: use firmware provided max request timeout



Firmware provides max request timeout value as part of hwrm_ver_get
API. Driver gets the timeout from firmware and if that interface is
not available then fall back to hardcoded timeout value.
Also, Add a helper function to check the FW status.

Signed-off-by: default avatarKashyap Desai <kashyap.desai@broadcom.com>
Signed-off-by: default avatarSelvin Xavier <selvin.xavier@broadcom.com>
Link: https://lore.kernel.org/r/1686308514-11996-16-git-send-email-selvin.xavier@broadcom.com


Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
parent a0027852
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -1041,6 +1041,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
	struct bnxt_en_dev *en_dev = rdev->en_dev;
	struct hwrm_ver_get_output resp = {0};
	struct hwrm_ver_get_input req = {0};
	struct bnxt_qplib_chip_ctx *cctx;
	struct bnxt_fw_msg fw_msg;
	int rc = 0;

@@ -1058,11 +1059,18 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
			  rc);
		return;
	}

	cctx = rdev->chip_ctx;
	rdev->qplib_ctx.hwrm_intf_ver =
		(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
		(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
		(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
		le16_to_cpu(resp.hwrm_intf_patch);

	cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);

	if (!cctx->hwrm_cmd_max_timeout)
		cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
}

static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)
+48 −11
Original line number Diff line number Diff line
@@ -89,6 +89,41 @@ static int bnxt_qplib_map_rc(u8 opcode)
	}
}

/**
 * bnxt_re_is_fw_stalled   -	Check firmware health
 * @rcfw      -   rcfw channel instance of rdev
 * @cookie    -   cookie to track the command
 * @opcode    -   rcfw submitted for given opcode
 * @cbit      -   bitmap entry of cookie
 *
 * If firmware has not responded any rcfw command within
 * rcfw->max_timeout, consider firmware as stalled.
 *
 * Returns:
 * 0 if firmware is responding
 * -ENODEV if firmware is not responding
 */
static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
				 u16 cookie, u8 opcode, u16 cbit)
{
	struct bnxt_qplib_cmdq_ctx *cmdq;

	cmdq = &rcfw->cmdq;

	if (time_after(jiffies, cmdq->last_seen +
		      (rcfw->max_timeout * HZ))) {
		dev_warn_ratelimited(&rcfw->pdev->dev,
				     "%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
				     __func__, cookie, opcode,
				     jiffies_to_msecs(jiffies - cmdq->last_seen),
				     rcfw->max_timeout * 1000,
				     test_bit(cbit, cmdq->cmdq_bitmap));
		return -ENODEV;
	}

	return 0;
}

/**
 * __wait_for_resp   -	Don't hold the cpu context and wait for response
 * @rcfw      -   rcfw channel instance of rdev
@@ -105,6 +140,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
{
	struct bnxt_qplib_cmdq_ctx *cmdq;
	u16 cbit;
	int ret;

	cmdq = &rcfw->cmdq;
	cbit = cookie % rcfw->cmdq_depth;
@@ -118,8 +154,8 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
		wait_event_timeout(cmdq->waitq,
				   !test_bit(cbit, cmdq->cmdq_bitmap) ||
				   test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
				   msecs_to_jiffies(RCFW_FW_STALL_TIMEOUT_SEC
						    * 1000));
				   msecs_to_jiffies(rcfw->max_timeout * 1000));

		if (!test_bit(cbit, cmdq->cmdq_bitmap))
			return 0;

@@ -128,10 +164,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
		if (!test_bit(cbit, cmdq->cmdq_bitmap))
			return 0;

		/* Firmware stall is detected */
		if (time_after(jiffies, cmdq->last_seen +
			      (RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
			return -ENODEV;
		ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
		if (ret)
			return ret;

	} while (true);
};
@@ -352,6 +387,7 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
	struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
	unsigned long issue_time;
	u16 cbit;
	int ret;

	cbit = cookie % rcfw->cmdq_depth;
	issue_time = jiffies;
@@ -368,11 +404,10 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
		if (!test_bit(cbit, cmdq->cmdq_bitmap))
			return 0;
		if (jiffies_to_msecs(jiffies - issue_time) >
		    (RCFW_FW_STALL_TIMEOUT_SEC * 1000)) {
			/* Firmware stall is detected */
			if (time_after(jiffies, cmdq->last_seen +
				      (RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
				return -ENODEV;
		    (rcfw->max_timeout * 1000)) {
			ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
			if (ret)
				return ret;
		}
	} while (true);
};
@@ -951,6 +986,8 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
	if (!rcfw->qp_tbl)
		goto fail;

	rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;

	return 0;

fail:
+3 −1
Original line number Diff line number Diff line
@@ -51,7 +51,7 @@

#define RCFW_DBR_PCI_BAR_REGION		2
#define RCFW_DBR_BASE_PAGE_SHIFT	12
#define RCFW_FW_STALL_TIMEOUT_SEC	40
#define RCFW_FW_STALL_MAX_TIMEOUT	40

/* Cmdq contains a fix number of a 16-Byte slots */
struct bnxt_qplib_cmdqe {
@@ -227,6 +227,8 @@ struct bnxt_qplib_rcfw {
	atomic_t rcfw_intr_enabled;
	struct semaphore rcfw_inflight;
	atomic_t timeout_send;
	/* cached from chip cctx for quick reference in slow path */
	u16 max_timeout;
};

struct bnxt_qplib_cmdqmsg {
+1 −0
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@ struct bnxt_qplib_chip_ctx {
	u8	chip_rev;
	u8	chip_metal;
	u16	hw_stats_size;
	u16	hwrm_cmd_max_timeout;
	struct bnxt_qplib_drv_modes modes;
};