Commit b4eaf3e8 authored by Vitor Soares's avatar Vitor Soares Committed by Liu Jian
Browse files

can: mcp251xfd: fix infinite loop when xmit fails

mainline inclusion
from mainline-v6.10-rc6
commit d8fb63e46c884c898a38f061c2330f7729e75510
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAGENL
CVE: CVE-2024-41088

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d8fb63e46c884c898a38f061c2330f7729e75510



---------------------------

When the mcp251xfd_start_xmit() function fails, the driver stops
processing messages, and the interrupt routine does not return,
running indefinitely even after killing the running application.

Error messages:
[  441.298819] mcp251xfd spi2.0 can0: ERROR in mcp251xfd_start_xmit: -16
[  441.306498] mcp251xfd spi2.0 can0: Transmit Event FIFO buffer not empty. (seq=0x000017c7, tef_tail=0x000017cf, tef_head=0x000017d0, tx_head=0x000017d3).
... and repeat forever.

The issue can be triggered when multiple devices share the same SPI
interface. And there is concurrent access to the bus.

The problem occurs because tx_ring->head increments even if
mcp251xfd_start_xmit() fails. Consequently, the driver skips one TX
package while still expecting a response in
mcp251xfd_handle_tefif_one().

Resolve the issue by starting a workqueue to write the tx obj
synchronously if err = -EBUSY. In case of another error, decrement
tx_ring->head, remove skb from the echo stack, and drop the message.

Fixes: 55e5b97f ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarVitor Soares <vitor.soares@toradex.com>
Link: https://lore.kernel.org/all/20240517134355.770777-1-ivitro@gmail.com


[mkl: use more imperative wording in patch description]
Signed-off-by: default avatarMarc Kleine-Budde <mkl@pengutronix.de>

Conflicts:
	drivers/net/can/spi/mcp251xfd/mcp251xfd-tx.c
	drivers/net/can/spi/mcp251xfd/mcp251xfd.h
[Mainly because we did not backport
09b0eb92 ("can: mcp251xfd: move TX handling into separate file"),
f318482a ("can: dev: can_free_echo_skb(): extend to return can frame
 length").]
Signed-off-by: default avatarLiu Jian <liujian56@huawei.com>
parent 03b9074c
Loading
Loading
Loading
Loading
+60 −9
Original line number Diff line number Diff line
@@ -2304,6 +2304,39 @@ mcp251xfd_tx_obj_from_skb(const struct mcp251xfd_priv *priv,
	tx_obj->xfer[0].len = len;
}

static void mcp251xfd_tx_failure_drop(const struct mcp251xfd_priv *priv,
				      struct mcp251xfd_tx_ring *tx_ring,
				      int err)
{
	struct net_device *ndev = priv->ndev;
	struct net_device_stats *stats = &ndev->stats;
	unsigned int frame_len = 0;
	u8 tx_head;

	tx_ring->head--;
	stats->tx_dropped++;
	tx_head = mcp251xfd_get_tx_head(tx_ring);
	can_free_echo_skb(ndev, tx_head);
	netdev_completed_queue(ndev, 1, frame_len);
	netif_wake_queue(ndev);

	if (net_ratelimit())
		netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
}

void mcp251xfd_tx_obj_write_sync(struct work_struct *work)
{
	struct mcp251xfd_priv *priv = container_of(work, struct mcp251xfd_priv,
			tx_work);
	struct mcp251xfd_tx_obj *tx_obj = priv->tx_work_obj;
	struct mcp251xfd_tx_ring *tx_ring = priv->tx;
	int err;

	err = spi_sync(priv->spi, &tx_obj->msg);
	if (err)
		mcp251xfd_tx_failure_drop(priv, tx_ring, err);
}

static int mcp251xfd_tx_obj_write(const struct mcp251xfd_priv *priv,
				  struct mcp251xfd_tx_obj *tx_obj)
{
@@ -2335,6 +2368,11 @@ static bool mcp251xfd_tx_busy(const struct mcp251xfd_priv *priv,
	return false;
}

static bool mcp251xfd_work_busy(struct work_struct *work)
{
	return work_busy(work);
}

static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
					struct net_device *ndev)
{
@@ -2347,7 +2385,8 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
	if (can_dropped_invalid_skb(ndev, skb))
		return NETDEV_TX_OK;

	if (mcp251xfd_tx_busy(priv, tx_ring))
	if (mcp251xfd_tx_busy(priv, tx_ring) ||
	    mcp251xfd_work_busy(&priv->tx_work))
		return NETDEV_TX_BUSY;

	tx_obj = mcp251xfd_get_tx_obj_next(tx_ring);
@@ -2362,13 +2401,13 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
	can_put_echo_skb(skb, ndev, tx_head);

	err = mcp251xfd_tx_obj_write(priv, tx_obj);
	if (err)
		goto out_err;

	return NETDEV_TX_OK;

 out_err:
	netdev_err(priv->ndev, "ERROR in %s: %d\n", __func__, err);
	if (err == -EBUSY) {
		netif_stop_queue(ndev);
		priv->tx_work_obj = tx_obj;
		queue_work(priv->wq, &priv->tx_work);
	} else if (err) {
		mcp251xfd_tx_failure_drop(priv, tx_ring, err);
	}

	return NETDEV_TX_OK;
}
@@ -2403,11 +2442,20 @@ static int mcp251xfd_open(struct net_device *ndev)

	can_rx_offload_enable(&priv->offload);

	priv->wq = alloc_ordered_workqueue("%s-mcp251xfd_wq",
					   WQ_FREEZABLE | WQ_MEM_RECLAIM,
					   dev_name(&spi->dev));
	if (!priv->wq) {
		err = -ENOMEM;
		goto out_can_rx_offload_disable;
	}
	INIT_WORK(&priv->tx_work, mcp251xfd_tx_obj_write_sync);

	err = request_threaded_irq(spi->irq, NULL, mcp251xfd_irq,
				   IRQF_ONESHOT, dev_name(&spi->dev),
				   priv);
	if (err)
		goto out_can_rx_offload_disable;
		goto out_destroy_workqueue;

	err = mcp251xfd_chip_interrupts_enable(priv);
	if (err)
@@ -2419,6 +2467,8 @@ static int mcp251xfd_open(struct net_device *ndev)

 out_free_irq:
	free_irq(spi->irq, priv);
 out_destroy_workqueue:
	destroy_workqueue(priv->wq);
 out_can_rx_offload_disable:
	can_rx_offload_disable(&priv->offload);
 out_transceiver_disable:
@@ -2441,6 +2491,7 @@ static int mcp251xfd_stop(struct net_device *ndev)
	netif_stop_queue(ndev);
	mcp251xfd_chip_interrupts_disable(priv);
	free_irq(ndev->irq, priv);
	destroy_workqueue(priv->wq);
	can_rx_offload_disable(&priv->offload);
	mcp251xfd_chip_stop(priv, CAN_STATE_STOPPED);
	mcp251xfd_transceiver_disable(priv);
+4 −0
Original line number Diff line number Diff line
@@ -584,6 +584,10 @@ struct mcp251xfd_priv {
	struct mcp251xfd_tx_ring tx[1];
	struct mcp251xfd_rx_ring *rx[1];

	struct workqueue_struct *wq;
	struct work_struct tx_work;
	struct mcp251xfd_tx_obj *tx_work_obj;

	u8 rx_ring_num;

	struct mcp251xfd_ecc ecc;