Commit ac397934 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'bnxt_en-Error-recovery-fixes'



Michael Chan says:

====================
bnxt_en: Error recovery fixes.

This patch series contains fixes mostly for the error recovery feature
and related areas.  Please queue the series for -stable also.  Thanks.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 868afbac 7e334fc8
Loading
Loading
Loading
Loading
+42 −19
Original line number Diff line number Diff line
@@ -2001,6 +2001,9 @@ static int bnxt_async_event_process(struct bnxt *bp,
	case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
		u32 data1 = le32_to_cpu(cmpl->event_data1);

		if (!bp->fw_health)
			goto async_event_process_exit;

		bp->fw_reset_timestamp = jiffies;
		bp->fw_reset_min_dsecs = cmpl->timestamp_lo;
		if (!bp->fw_reset_min_dsecs)
@@ -4421,8 +4424,9 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
			    FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);

	req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
	flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE |
		FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
	flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
	if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
		flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
	if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
		flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT |
			 FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT;
@@ -7115,14 +7119,6 @@ static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
	if (rc)
		goto err_recovery_out;
	if (!fw_health) {
		fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
		bp->fw_health = fw_health;
		if (!fw_health) {
			rc = -ENOMEM;
			goto err_recovery_out;
		}
	}
	fw_health->flags = le32_to_cpu(resp->flags);
	if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
	    !(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
@@ -8796,6 +8792,9 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
		if (fw_reset) {
			if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
				bnxt_ulp_stop(bp);
			bnxt_free_ctx_mem(bp);
			kfree(bp->ctx);
			bp->ctx = NULL;
			rc = bnxt_fw_init_one(bp);
			if (rc) {
				set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
@@ -9990,8 +9989,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
	struct bnxt_fw_health *fw_health = bp->fw_health;
	u32 val;

	if (!fw_health || !fw_health->enabled ||
	    test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
	if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
		return;

	if (fw_health->tmr_counter) {
@@ -10482,6 +10480,23 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
	bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
}

static void bnxt_alloc_fw_health(struct bnxt *bp)
{
	if (bp->fw_health)
		return;

	if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
	    !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
		return;

	bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
	if (!bp->fw_health) {
		netdev_warn(bp->dev, "Failed to allocate fw_health\n");
		bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
		bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
	}
}

static int bnxt_fw_init_one_p1(struct bnxt *bp)
{
	int rc;
@@ -10528,6 +10543,7 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
		netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
			    rc);

	bnxt_alloc_fw_health(bp);
	rc = bnxt_hwrm_error_recovery_qcfg(bp);
	if (rc)
		netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
@@ -10609,6 +10625,12 @@ static int bnxt_fw_init_one(struct bnxt *bp)
	rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
	if (rc)
		return rc;

	/* In case fw capabilities have changed, destroy the unneeded
	 * reporters and create newly capable ones.
	 */
	bnxt_dl_fw_reporters_destroy(bp, false);
	bnxt_dl_fw_reporters_create(bp);
	bnxt_fw_init_one_p3(bp);
	return 0;
}
@@ -10751,8 +10773,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
		bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
		return;
	case BNXT_FW_RESET_STATE_ENABLE_DEV:
		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
		    bp->fw_health) {
		if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
			u32 val;

			val = bnxt_fw_health_readl(bp,
@@ -11396,11 +11417,11 @@ static void bnxt_remove_one(struct pci_dev *pdev)
	struct net_device *dev = pci_get_drvdata(pdev);
	struct bnxt *bp = netdev_priv(dev);

	if (BNXT_PF(bp)) {
	if (BNXT_PF(bp))
		bnxt_sriov_disable(bp);
		bnxt_dl_unregister(bp);
	}

	bnxt_dl_fw_reporters_destroy(bp, true);
	bnxt_dl_unregister(bp);
	pci_disable_pcie_error_reporting(pdev);
	unregister_netdev(dev);
	bnxt_shutdown_tc(bp);
@@ -11415,6 +11436,8 @@ static void bnxt_remove_one(struct pci_dev *pdev)
	bnxt_dcb_free(bp);
	kfree(bp->edev);
	bp->edev = NULL;
	kfree(bp->fw_health);
	bp->fw_health = NULL;
	bnxt_cleanup_pci(bp);
	bnxt_free_ctx_mem(bp);
	kfree(bp->ctx);
@@ -11875,8 +11898,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
	if (rc)
		goto init_err_cleanup_tc;

	if (BNXT_PF(bp))
	bnxt_dl_register(bp);
	bnxt_dl_fw_reporters_create(bp);

	netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
		    board_info[ent->driver_data].name,
+62 −31
Original line number Diff line number Diff line
@@ -39,11 +39,10 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
				     struct netlink_ext_ack *extack)
{
	struct bnxt *bp = devlink_health_reporter_priv(reporter);
	struct bnxt_fw_health *health = bp->fw_health;
	u32 val, health_status;
	int rc;

	if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
	if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
		return 0;

	val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
@@ -126,21 +125,15 @@ struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = {
	.recover = bnxt_fw_fatal_recover,
};

static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
void bnxt_dl_fw_reporters_create(struct bnxt *bp)
{
	struct bnxt_fw_health *health = bp->fw_health;

	if (!health)
	if (!bp->dl || !health)
		return;

	health->fw_reporter =
		devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
					       0, false, bp);
	if (IS_ERR(health->fw_reporter)) {
		netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
			    PTR_ERR(health->fw_reporter));
		health->fw_reporter = NULL;
	}
	if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
		goto err_recovery;

	health->fw_reset_reporter =
		devlink_health_reporter_create(bp->dl,
@@ -150,8 +143,30 @@ static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
		netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
			    PTR_ERR(health->fw_reset_reporter));
		health->fw_reset_reporter = NULL;
		bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
	}

err_recovery:
	if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
		return;

	if (!health->fw_reporter) {
		health->fw_reporter =
			devlink_health_reporter_create(bp->dl,
						       &bnxt_dl_fw_reporter_ops,
						       0, false, bp);
		if (IS_ERR(health->fw_reporter)) {
			netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
				    PTR_ERR(health->fw_reporter));
			health->fw_reporter = NULL;
			bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
			return;
		}
	}

	if (health->fw_fatal_reporter)
		return;

	health->fw_fatal_reporter =
		devlink_health_reporter_create(bp->dl,
					       &bnxt_dl_fw_fatal_reporter_ops,
@@ -160,24 +175,35 @@ static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
		netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
			    PTR_ERR(health->fw_fatal_reporter));
		health->fw_fatal_reporter = NULL;
		bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
	}
}

static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
{
	struct bnxt_fw_health *health = bp->fw_health;

	if (!health)
	if (!bp->dl || !health)
		return;

	if (health->fw_reporter)
		devlink_health_reporter_destroy(health->fw_reporter);

	if (health->fw_reset_reporter)
	if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
	    health->fw_reset_reporter) {
		devlink_health_reporter_destroy(health->fw_reset_reporter);
		health->fw_reset_reporter = NULL;
	}

	if (health->fw_fatal_reporter)
	if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all)
		return;

	if (health->fw_reporter) {
		devlink_health_reporter_destroy(health->fw_reporter);
		health->fw_reporter = NULL;
	}

	if (health->fw_fatal_reporter) {
		devlink_health_reporter_destroy(health->fw_fatal_reporter);
		health->fw_fatal_reporter = NULL;
	}
}

void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
@@ -185,9 +211,6 @@ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
	struct bnxt_fw_health *fw_health = bp->fw_health;
	struct bnxt_fw_reporter_ctx fw_reporter_ctx;

	if (!fw_health)
		return;

	fw_reporter_ctx.sp_event = event;
	switch (event) {
	case BNXT_FW_RESET_NOTIFY_SP_EVENT:
@@ -247,6 +270,8 @@ static const struct devlink_ops bnxt_dl_ops = {
	.flash_update	  = bnxt_dl_flash_update,
};

static const struct devlink_ops bnxt_vf_dl_ops;

enum bnxt_dl_param_id {
	BNXT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
	BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
@@ -460,7 +485,10 @@ int bnxt_dl_register(struct bnxt *bp)
		return -ENOTSUPP;
	}

	if (BNXT_PF(bp))
		dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
	else
		dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
	if (!dl) {
		netdev_warn(bp->dev, "devlink_alloc failed");
		return -ENOMEM;
@@ -479,6 +507,9 @@ int bnxt_dl_register(struct bnxt *bp)
		goto err_dl_free;
	}

	if (!BNXT_PF(bp))
		return 0;

	rc = devlink_params_register(dl, bnxt_dl_params,
				     ARRAY_SIZE(bnxt_dl_params));
	if (rc) {
@@ -506,8 +537,6 @@ int bnxt_dl_register(struct bnxt *bp)

	devlink_params_publish(dl);

	bnxt_dl_fw_reporters_create(bp);

	return 0;

err_dl_port_unreg:
@@ -530,12 +559,14 @@ void bnxt_dl_unregister(struct bnxt *bp)
	if (!dl)
		return;

	bnxt_dl_fw_reporters_destroy(bp);
	devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
	if (BNXT_PF(bp)) {
		devlink_port_params_unregister(&bp->dl_port,
					       bnxt_dl_port_params,
					       ARRAY_SIZE(bnxt_dl_port_params));
		devlink_port_unregister(&bp->dl_port);
		devlink_params_unregister(dl, bnxt_dl_params,
					  ARRAY_SIZE(bnxt_dl_params));
	}
	devlink_unregister(dl);
	devlink_free(dl);
}
+2 −0
Original line number Diff line number Diff line
@@ -58,6 +58,8 @@ struct bnxt_dl_nvm_param {

void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy);
void bnxt_dl_fw_reporters_create(struct bnxt *bp);
void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
int bnxt_dl_register(struct bnxt *bp);
void bnxt_dl_unregister(struct bnxt *bp);

+30 −8
Original line number Diff line number Diff line
@@ -3071,8 +3071,15 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
			}
		}

		if (info->dest_buf)
		if (info->dest_buf) {
			if ((info->seg_start + off + len) <=
			    BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
				memcpy(info->dest_buf + off, dma_buf, len);
			} else {
				rc = -ENOBUFS;
				break;
			}
		}

		if (cmn_req->req_type ==
				cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
@@ -3126,7 +3133,7 @@ static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,

static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
					   u16 segment_id, u32 *seg_len,
					   void *buf, u32 offset)
					   void *buf, u32 buf_len, u32 offset)
{
	struct hwrm_dbg_coredump_retrieve_input req = {0};
	struct bnxt_hwrm_dbg_dma_info info = {NULL};
@@ -3141,8 +3148,11 @@ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
				seq_no);
	info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
				     data_len);
	if (buf)
	if (buf) {
		info.dest_buf = buf + offset;
		info.buf_len = buf_len;
		info.seg_start = offset;
	}

	rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
	if (!rc)
@@ -3232,14 +3242,17 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
{
	u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
	u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
	struct coredump_segment_record *seg_record = NULL;
	u32 offset = 0, seg_hdr_len, seg_record_len;
	struct bnxt_coredump_segment_hdr seg_hdr;
	struct bnxt_coredump coredump = {NULL};
	time64_t start_time;
	u16 start_utc;
	int rc = 0, i;

	if (buf)
		buf_len = *dump_len;

	start_time = ktime_get_real_seconds();
	start_utc = sys_tz.tz_minuteswest * 60;
	seg_hdr_len = sizeof(seg_hdr);
@@ -3272,6 +3285,12 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
		u32 duration = 0, seg_len = 0;
		unsigned long start, end;

		if (buf && ((offset + seg_hdr_len) >
			    BNXT_COREDUMP_BUF_LEN(buf_len))) {
			rc = -ENOBUFS;
			goto err;
		}

		start = jiffies;

		rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
@@ -3284,9 +3303,11 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)

		/* Write segment data into the buffer */
		rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
						     &seg_len, buf,
						     &seg_len, buf, buf_len,
						     offset + seg_hdr_len);
		if (rc)
		if (rc && rc == -ENOBUFS)
			goto err;
		else if (rc)
			netdev_err(bp->dev,
				   "Failed to retrieve coredump for seg = %d\n",
				   seg_record->segment_id);
@@ -3316,7 +3337,8 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
					  rc);
	kfree(coredump.data);
	*dump_len += sizeof(struct bnxt_coredump_record);

	if (rc == -ENOBUFS)
		netdev_err(bp->dev, "Firmware returned large coredump buffer");
	return rc;
}

+4 −0
Original line number Diff line number Diff line
@@ -31,6 +31,8 @@ struct bnxt_coredump {
	u16		total_segs;
};

#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))

struct bnxt_hwrm_dbg_dma_info {
	void *dest_buf;
	int dest_buf_size;
@@ -38,6 +40,8 @@ struct bnxt_hwrm_dbg_dma_info {
	u16 seq_off;
	u16 data_len_off;
	u16 segs;
	u32 seg_start;
	u32 buf_len;
};

struct hwrm_dbg_cmn_input {
Loading