Commit d1cbd165 authored by Michael Chan's avatar Michael Chan Committed by Jakub Kicinski
Browse files

bnxt_en: Retry sending the first message to firmware if it is under reset.



The first HWRM_VER_GET message to firmware during probe may timeout if
firmware is under reset.  This can happen during hot-plug for example.
On P5 and newer chips, we can check if firmware is in the boot stage by
reading a status register.  Retry 5 times if the status register shows
that firmware is not ready and not in error state.

Reviewed-by: default avatarEdwin Peer <edwin.peer@broadcom.com>
Signed-off-by: default avatarMichael Chan <michael.chan@broadcom.com>
Acked-by: default avatarWillem de Bruijn <willemb@google.com>
Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent b187e4ba
Loading
Loading
Loading
Loading
+35 −7
Original line number Diff line number Diff line
@@ -7441,17 +7441,29 @@ static void bnxt_try_map_fw_health_reg(struct bnxt *bp)

	sig = readl(hs + offsetof(struct hcomm_status, sig_ver));
	if ((sig & HCOMM_STATUS_SIGNATURE_MASK) != HCOMM_STATUS_SIGNATURE_VAL) {
		if (!bp->chip_num) {
			__bnxt_map_fw_health_reg(bp, BNXT_GRC_REG_BASE);
			bp->chip_num = readl(bp->bar0 +
					     BNXT_FW_HEALTH_WIN_BASE +
					     BNXT_GRC_REG_CHIP_NUM);
		}
		if (!BNXT_CHIP_P5(bp)) {
			if (bp->fw_health)
				bp->fw_health->status_reliable = false;
			return;
		}
		status_loc = BNXT_GRC_REG_STATUS_P5 |
			     BNXT_FW_HEALTH_REG_TYPE_BAR0;
	} else {
		status_loc = readl(hs + offsetof(struct hcomm_status,
						 fw_status_loc));
	}

	if (__bnxt_alloc_fw_health(bp)) {
		netdev_warn(bp->dev, "no memory for firmware status checks\n");
		return;
	}

	status_loc = readl(hs + offsetof(struct hcomm_status, fw_status_loc));
	bp->fw_health->regs[BNXT_FW_HEALTH_REG] = status_loc;
	reg_type = BNXT_FW_HEALTH_REG_TYPE(status_loc);
	if (reg_type == BNXT_FW_HEALTH_REG_TYPE_GRC) {
@@ -9355,14 +9367,30 @@ static int bnxt_fw_reset_via_optee(struct bnxt *bp)
static int bnxt_try_recover_fw(struct bnxt *bp)
{
	if (bp->fw_health && bp->fw_health->status_reliable) {
		u32 sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
		int retry = 0, rc;
		u32 sts;

		mutex_lock(&bp->hwrm_cmd_lock);
		do {
			rc = __bnxt_hwrm_ver_get(bp, true);
			sts = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
			if (!sts || !BNXT_FW_IS_BOOTING(sts))
				break;
			retry++;
		} while (rc == -EBUSY && retry < BNXT_FW_RETRY);
		mutex_unlock(&bp->hwrm_cmd_lock);

		netdev_err(bp->dev, "Firmware not responding, status: 0x%x\n",
		if (!BNXT_FW_IS_HEALTHY(sts)) {
			netdev_err(bp->dev,
				   "Firmware not responding, status: 0x%x\n",
				   sts);
			rc = -ENODEV;
		}
		if (sts & FW_STATUS_REG_CRASHED_NO_MASTER) {
			netdev_warn(bp->dev, "Firmware recover via OP-TEE requested\n");
			return bnxt_fw_reset_via_optee(bp);
		}
		return rc;
	}

	return -ENODEV;
+7 −0
Original line number Diff line number Diff line
@@ -1345,9 +1345,14 @@ struct bnxt_test_info {
#define BNXT_CAG_REG_LEGACY_INT_STATUS		0x4014
#define BNXT_CAG_REG_BASE			0x300000

#define BNXT_GRC_REG_STATUS_P5			0x520

#define BNXT_GRCPF_REG_KONG_COMM		0xA00
#define BNXT_GRCPF_REG_KONG_COMM_TRIGGER	0xB00

#define BNXT_GRC_REG_CHIP_NUM			0x48
#define BNXT_GRC_REG_BASE			0x260000

#define BNXT_GRC_BASE_MASK			0xfffff000
#define BNXT_GRC_OFFSET_MASK			0x00000ffc

@@ -1547,6 +1552,8 @@ struct bnxt_fw_reporter_ctx {
#define BNXT_FW_IS_ERR(sts)		(((sts) & BNXT_FW_STATUS_HEALTH_MSK) > \
					 BNXT_FW_STATUS_HEALTHY)

#define BNXT_FW_RETRY			5

struct bnxt {
	void __iomem		*bar0;
	void __iomem		*bar1;