Commit bf23ffc8 authored by Thinh Tran's avatar Thinh Tran Committed by Jakub Kicinski
Browse files

bnx2x: new flag for track HW resource allocation



While injecting PCIe errors to the upstream PCIe switch of
a BCM57810 NIC, system hangs/crashes were observed.

After several calls to bnx2x_tx_timout() complete,
bnx2x_nic_unload() is called to free up HW resources
and bnx2x_napi_disable() is called to release NAPI objects.
Later, when the EEH driver calls bnx2x_io_slot_reset() to
complete the recovery process, bnx2x attempts to disable
NAPI again by calling bnx2x_napi_disable() and freeing
resources which have already been freed, resulting in a
hang or crash.

Introduce a new flag to track the HW resource and NAPI
allocation state, refactor duplicated code into a single
function, check page pool allocation status before freeing,
and reduces debug output when a TX timeout event occurs.

Reviewed-by: default avatarManish Chopra <manishc@marvell.com>
Tested-by: default avatarAbdul Haleem <abdhalee@in.ibm.com>
Tested-by: default avatarDavid Christensen <drc@linux.vnet.ibm.com>
Reviewed-by: default avatarSimon Horman <simon.horman@corigine.com>
Tested-by: default avatarVenkata Sai Duggi <venkata.sai.duggi@ibm.com>
Signed-off-by: default avatarThinh Tran <thinhtr@linux.vnet.ibm.com>
Link: https://lore.kernel.org/r/20230818161443.708785-2-thinhtr@linux.vnet.ibm.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 6dc5774d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -1508,6 +1508,8 @@ struct bnx2x {
	bool			cnic_loaded;
	struct cnic_eth_dev	*(*cnic_probe)(struct net_device *);

	bool                    nic_stopped;

	/* Flag that indicates that we can start looking for FCoE L2 queue
	 * completions in the default status block.
	 */
+13 −8
Original line number Diff line number Diff line
@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
	bnx2x_add_all_napi(bp);
	DP(NETIF_MSG_IFUP, "napi added\n");
	bnx2x_napi_enable(bp);
	bp->nic_stopped = false;

	if (IS_PF(bp)) {
		/* set pf load just before approaching the MCP */
@@ -2960,6 +2961,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
load_error1:
	bnx2x_napi_disable(bp);
	bnx2x_del_all_napi(bp);
	bp->nic_stopped = true;

	/* clear pf_load status, as it was already set */
	if (IS_PF(bp))
@@ -3095,6 +3097,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
		if (!CHIP_IS_E1x(bp))
			bnx2x_pf_disable(bp);

		if (!bp->nic_stopped) {
			/* Disable HW interrupts, NAPI */
			bnx2x_netif_stop(bp, 1);
			/* Delete all NAPI objects */
@@ -3103,6 +3106,8 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
				bnx2x_del_all_napi_cnic(bp);
			/* Release IRQs */
			bnx2x_free_irq(bp);
			bp->nic_stopped = true;
		}

		/* Report UNLOAD_DONE to MCP */
		bnx2x_send_unload_done(bp, false);
+19 −13
Original line number Diff line number Diff line
@@ -9474,6 +9474,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)
		}
	}

	if (!bp->nic_stopped) {
		/* Disable HW interrupts, NAPI */
		bnx2x_netif_stop(bp, 1);
		/* Delete all NAPI objects */
@@ -9483,6 +9484,8 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode, bool keep_link)

		/* Release IRQs */
		bnx2x_free_irq(bp);
		bp->nic_stopped = true;
	}

	/* Reset the chip, unless PCI function is offline. If we reach this
	 * point following a PCI error handling, it means device is really
@@ -14238,6 +14241,7 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
		}
		bnx2x_drain_tx_queues(bp);
		bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
		if (!bp->nic_stopped) {
			bnx2x_netif_stop(bp, 1);
			bnx2x_del_all_napi(bp);

@@ -14245,6 +14249,8 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
				bnx2x_del_all_napi_cnic(bp);

			bnx2x_free_irq(bp);
			bp->nic_stopped = true;
		}

		/* Report UNLOAD_DONE to MCP */
		bnx2x_send_unload_done(bp, true);
+10 −7
Original line number Diff line number Diff line
@@ -529,6 +529,7 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
	bnx2x_vfpf_finalize(bp, &req->first_tlv);

free_irq:
	if (!bp->nic_stopped) {
		/* Disable HW interrupts, NAPI */
		bnx2x_netif_stop(bp, 0);
		/* Delete all NAPI objects */
@@ -536,6 +537,8 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)

		/* Release IRQs */
		bnx2x_free_irq(bp);
		bp->nic_stopped = true;
	}
}

static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,