Commit 022add1d authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'eth-bnxt-handle-invalid-tx-completions-more-gracefully'

Jakub Kicinski says:

====================
eth: bnxt: handle invalid Tx completions more gracefully

bnxt trusts the events generated by the device which may lead to kernel
crashes. These are extremely rare but they do happen. For a while
I thought crashing may be intentional, because device reporting invalid
completions should never happen, and having a core dump could be useful
if it does. But in practice I haven't found any clues in the core dumps,
and panic_on_warn exists.

Series was tested by forcing the recovery path manually. Because of
how rare the real crashes are I can't confirm it works for the actual
device errors until it's been widely deployed.

v1: https://lore.kernel.org/all/20230710205611.1198878-1-kuba@kernel.org/
====================

Link: https://lore.kernel.org/r/20230720010440.1967136-1-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 59be3baa 2b56b3d9
Loading
Loading
Loading
Loading
+84 −70
Original line number Diff line number Diff line
@@ -293,6 +293,60 @@ static void bnxt_db_cq(struct bnxt *bp, struct bnxt_db_info *db, u32 idx)
		BNXT_DB_CQ(db, idx);
}

static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
{
	if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)))
		return;

	if (BNXT_PF(bp))
		queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
	else
		schedule_delayed_work(&bp->fw_reset_task, delay);
}

static void __bnxt_queue_sp_work(struct bnxt *bp)
{
	if (BNXT_PF(bp))
		queue_work(bnxt_pf_wq, &bp->sp_task);
	else
		schedule_work(&bp->sp_task);
}

static void bnxt_queue_sp_work(struct bnxt *bp, unsigned int event)
{
	set_bit(event, &bp->sp_event);
	__bnxt_queue_sp_work(bp);
}

static void bnxt_sched_reset_rxr(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
{
	if (!rxr->bnapi->in_reset) {
		rxr->bnapi->in_reset = true;
		if (bp->flags & BNXT_FLAG_CHIP_P5)
			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
		else
			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
		__bnxt_queue_sp_work(bp);
	}
	rxr->rx_next_cons = 0xffff;
}

void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
			  int idx)
{
	struct bnxt_napi *bnapi = txr->bnapi;

	if (bnapi->tx_fault)
		return;

	netdev_err(bp->dev, "Invalid Tx completion (ring:%d tx_pkts:%d cons:%u prod:%u i:%d)",
		   txr->txq_index, bnapi->tx_pkts,
		   txr->tx_cons, txr->tx_prod, idx);
	WARN_ON_ONCE(1);
	bnapi->tx_fault = 1;
	bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
}

const u16 bnxt_lhint_arr[] = {
	TX_BD_FLAGS_LHINT_512_AND_SMALLER,
	TX_BD_FLAGS_LHINT_512_TO_1023,
@@ -652,6 +706,11 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
		skb = tx_buf->skb;
		tx_buf->skb = NULL;

		if (unlikely(!skb)) {
			bnxt_sched_reset_txr(bp, txr, i);
			return;
		}

		tx_bytes += skb->len;

		if (tx_buf->is_push) {
@@ -1234,38 +1293,6 @@ static int bnxt_discard_rx(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
	return 0;
}

static void bnxt_queue_fw_reset_work(struct bnxt *bp, unsigned long delay)
{
	if (!(test_bit(BNXT_STATE_IN_FW_RESET, &bp->state)))
		return;

	if (BNXT_PF(bp))
		queue_delayed_work(bnxt_pf_wq, &bp->fw_reset_task, delay);
	else
		schedule_delayed_work(&bp->fw_reset_task, delay);
}

static void bnxt_queue_sp_work(struct bnxt *bp)
{
	if (BNXT_PF(bp))
		queue_work(bnxt_pf_wq, &bp->sp_task);
	else
		schedule_work(&bp->sp_task);
}

static void bnxt_sched_reset(struct bnxt *bp, struct bnxt_rx_ring_info *rxr)
{
	if (!rxr->bnapi->in_reset) {
		rxr->bnapi->in_reset = true;
		if (bp->flags & BNXT_FLAG_CHIP_P5)
			set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
		else
			set_bit(BNXT_RST_RING_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
	}
	rxr->rx_next_cons = 0xffff;
}

static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id)
{
	struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map;
@@ -1320,7 +1347,7 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
		netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n",
			    cons, rxr->rx_next_cons,
			    TPA_START_ERROR_CODE(tpa_start1));
		bnxt_sched_reset(bp, rxr);
		bnxt_sched_reset_rxr(bp, rxr);
		return;
	}
	/* Store cfa_code in tpa_info to use in tpa_end
@@ -1844,7 +1871,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
		if (rxr->rx_next_cons != 0xffff)
			netdev_warn(bp->dev, "RX cons %x != expected cons %x\n",
				    cons, rxr->rx_next_cons);
		bnxt_sched_reset(bp, rxr);
		bnxt_sched_reset_rxr(bp, rxr);
		if (rc1)
			return rc1;
		goto next_rx_no_prod_no_len;
@@ -1882,7 +1909,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
			    !(bp->fw_cap & BNXT_FW_CAP_RING_MONITOR)) {
				netdev_warn_once(bp->dev, "RX buffer error %x\n",
						 rx_err);
				bnxt_sched_reset(bp, rxr);
				bnxt_sched_reset_rxr(bp, rxr);
			}
		}
		goto next_rx_no_len;
@@ -2329,7 +2356,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
			goto async_event_process_exit;
		}
		rxr = bp->bnapi[grp_idx]->rx_ring;
		bnxt_sched_reset(bp, rxr);
		bnxt_sched_reset_rxr(bp, rxr);
		goto async_event_process_exit;
	}
	case ASYNC_EVENT_CMPL_EVENT_ID_ECHO_REQUEST: {
@@ -2384,7 +2411,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
	default:
		goto async_event_process_exit;
	}
	bnxt_queue_sp_work(bp);
	__bnxt_queue_sp_work(bp);
async_event_process_exit:
	return 0;
}
@@ -2413,8 +2440,7 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
		}

		set_bit(vf_id - bp->pf.first_vf_id, bp->pf.vf_event_bmap);
		set_bit(BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
		bnxt_queue_sp_work(bp, BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT);
		break;

	case CMPL_BASE_TYPE_HWRM_ASYNC_EVENT:
@@ -2571,7 +2597,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,

static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
{
	if (bnapi->tx_pkts) {
	if (bnapi->tx_pkts && !bnapi->tx_fault) {
		bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
		bnapi->tx_pkts = 0;
	}
@@ -9424,6 +9450,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
		struct bnxt_napi *bnapi = bp->bnapi[i];
		struct bnxt_cp_ring_info *cpr;

		bnapi->tx_fault = 0;

		cpr = &bnapi->cp_ring;
		if (bnapi->in_reset)
			cpr->sw_stats.rx.rx_resets++;
@@ -11031,8 +11059,7 @@ static void bnxt_set_rx_mode(struct net_device *dev)
	if (mask != vnic->rx_mask || uc_update || mc_update) {
		vnic->rx_mask = mask;

		set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
		bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
	}
}

@@ -11597,8 +11624,7 @@ static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue)
	struct bnxt *bp = netdev_priv(dev);

	netdev_err(bp->dev,  "TX timeout detected, starting reset task!\n");
	set_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event);
	bnxt_queue_sp_work(bp);
	bnxt_queue_sp_work(bp, BNXT_RESET_TASK_SP_EVENT);
}

static void bnxt_fw_health_check(struct bnxt *bp)
@@ -11635,8 +11661,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
	return;

fw_reset:
	set_bit(BNXT_FW_EXCEPTION_SP_EVENT, &bp->sp_event);
	bnxt_queue_sp_work(bp);
	bnxt_queue_sp_work(bp, BNXT_FW_EXCEPTION_SP_EVENT);
}

static void bnxt_timer(struct timer_list *t)
@@ -11653,21 +11678,15 @@ static void bnxt_timer(struct timer_list *t)
	if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
		bnxt_fw_health_check(bp);

	if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks) {
		set_bit(BNXT_PERIODIC_STATS_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
	}
	if (BNXT_LINK_IS_UP(bp) && bp->stats_coal_ticks)
		bnxt_queue_sp_work(bp, BNXT_PERIODIC_STATS_SP_EVENT);

	if (bnxt_tc_flower_enabled(bp)) {
		set_bit(BNXT_FLOW_STATS_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
	}
	if (bnxt_tc_flower_enabled(bp))
		bnxt_queue_sp_work(bp, BNXT_FLOW_STATS_SP_EVENT);

#ifdef CONFIG_RFS_ACCEL
	if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count) {
		set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
	}
	if ((bp->flags & BNXT_FLAG_RFS) && bp->ntp_fltr_count)
		bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT);
#endif /*CONFIG_RFS_ACCEL*/

	if (bp->link_info.phy_retry) {
@@ -11675,21 +11694,17 @@ static void bnxt_timer(struct timer_list *t)
			bp->link_info.phy_retry = false;
			netdev_warn(bp->dev, "failed to update phy settings after maximum retries.\n");
		} else {
			set_bit(BNXT_UPDATE_PHY_SP_EVENT, &bp->sp_event);
			bnxt_queue_sp_work(bp);
			bnxt_queue_sp_work(bp, BNXT_UPDATE_PHY_SP_EVENT);
		}
	}

	if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state)) {
		set_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
	}
	if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
		bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);

	if ((bp->flags & BNXT_FLAG_CHIP_P5) && !bp->chip_rev &&
	    netif_carrier_ok(dev)) {
		set_bit(BNXT_RING_COAL_NOW_SP_EVENT, &bp->sp_event);
		bnxt_queue_sp_work(bp);
	}
	    netif_carrier_ok(dev))
		bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT);

bnxt_restart_timer:
	mod_timer(&bp->timer, jiffies + bp->current_interval);
}
@@ -12968,8 +12983,7 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
	bp->ntp_fltr_count++;
	spin_unlock_bh(&bp->ntp_fltr_lock);

	set_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event);
	bnxt_queue_sp_work(bp);
	bnxt_queue_sp_work(bp, BNXT_RX_NTP_FLTR_SP_EVENT);

	return new_fltr->sw_id;

+3 −0
Original line number Diff line number Diff line
@@ -1008,6 +1008,7 @@ struct bnxt_napi {
					  int);
	int			tx_pkts;
	u8			events;
	u8			tx_fault:1;

	u32			flags;
#define BNXT_NAPI_FLAG_XDP	0x1
@@ -2329,6 +2330,8 @@ int bnxt_get_avail_msix(struct bnxt *bp, int num);
int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init);
void bnxt_tx_disable(struct bnxt *bp);
void bnxt_tx_enable(struct bnxt *bp);
void bnxt_sched_reset_txr(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
			  int idx);
void bnxt_report_link(struct bnxt *bp);
int bnxt_update_link(struct bnxt *bp, bool chng_link_state);
int bnxt_hwrm_set_pause(struct bnxt *);
+4 −0
Original line number Diff line number Diff line
@@ -149,6 +149,7 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
			tx_buf->action = 0;
			tx_buf->xdpf = NULL;
		} else if (tx_buf->action == XDP_TX) {
			tx_buf->action = 0;
			rx_doorbell_needed = true;
			last_tx_cons = tx_cons;

@@ -158,6 +159,9 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
				tx_buf = &txr->tx_buf_ring[tx_cons];
				page_pool_recycle_direct(rxr->page_pool, tx_buf->page);
			}
		} else {
			bnxt_sched_reset_txr(bp, txr, i);
			return;
		}
		tx_cons = NEXT_TX(tx_cons);
	}