Commit 4e023b44 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-lantiq_xrx200-improve-ethernet-performance'

Aleksander Jan Bajkowski says:

====================
net: lantiq_xrx200: improve ethernet performance

This patchset improves Ethernet performance by 15%.

NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):

	Down		Up
Before	539 Mbps	599 Mbps
After	624 Mbps	695 Mbps
====================

Link: https://lore.kernel.org/r/20220104151144.181736-1-olek2@wp.pl


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 87eee9c5 e0155935
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -8,7 +8,7 @@
#define LTQ_DMA_H__

#define LTQ_DESC_SIZE		0x08	/* each descriptor is 64bit */
#define LTQ_DESC_NUM		0x40	/* 64 descriptors / channel */
#define LTQ_DESC_NUM		0xC0	/* 192 descriptors / channel */

#define LTQ_DMA_OWN		BIT(31) /* owner bit */
#define LTQ_DMA_C		BIT(30) /* complete bit */
+40 −22
Original line number Diff line number Diff line
@@ -63,7 +63,11 @@ struct xrx200_chan {

	struct napi_struct napi;
	struct ltq_dma_channel dma;

	union {
		struct sk_buff *skb[LTQ_DESC_NUM];
		void *rx_buff[LTQ_DESC_NUM];
	};

	struct sk_buff *skb_head;
	struct sk_buff *skb_tail;
@@ -78,6 +82,7 @@ struct xrx200_priv {
	struct xrx200_chan chan_rx;

	u16 rx_buf_size;
	u16 rx_skb_size;

	struct net_device *net_dev;
	struct device *dev;
@@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu)
	return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN);
}

static int xrx200_skb_size(u16 buf_size)
{
	return SKB_DATA_ALIGN(buf_size + NET_SKB_PAD + NET_IP_ALIGN) +
		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
}

/* drop all the packets from the DMA ring */
static void xrx200_flush_dma(struct xrx200_chan *ch)
{
@@ -173,30 +184,29 @@ static int xrx200_close(struct net_device *net_dev)
	return 0;
}

static int xrx200_alloc_skb(struct xrx200_chan *ch)
static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int size))
{
	struct sk_buff *skb = ch->skb[ch->dma.desc];
	void *buf = ch->rx_buff[ch->dma.desc];
	struct xrx200_priv *priv = ch->priv;
	dma_addr_t mapping;
	int ret = 0;

	ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev,
							  priv->rx_buf_size);
	if (!ch->skb[ch->dma.desc]) {
	ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size);
	if (!ch->rx_buff[ch->dma.desc]) {
		ret = -ENOMEM;
		goto skip;
	}

	mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data,
	mapping = dma_map_single(priv->dev, ch->rx_buff[ch->dma.desc],
				 priv->rx_buf_size, DMA_FROM_DEVICE);
	if (unlikely(dma_mapping_error(priv->dev, mapping))) {
		dev_kfree_skb_any(ch->skb[ch->dma.desc]);
		ch->skb[ch->dma.desc] = skb;
		skb_free_frag(ch->rx_buff[ch->dma.desc]);
		ch->rx_buff[ch->dma.desc] = buf;
		ret = -ENOMEM;
		goto skip;
	}

	ch->dma.desc_base[ch->dma.desc].addr = mapping;
	ch->dma.desc_base[ch->dma.desc].addr = mapping + NET_SKB_PAD + NET_IP_ALIGN;
	/* Make sure the address is written before we give it to HW */
	wmb();
skip:
@@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
{
	struct xrx200_priv *priv = ch->priv;
	struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
	struct sk_buff *skb = ch->skb[ch->dma.desc];
	void *buf = ch->rx_buff[ch->dma.desc];
	u32 ctl = desc->ctl;
	int len = (ctl & LTQ_DMA_SIZE_MASK);
	struct net_device *net_dev = priv->net_dev;
	struct sk_buff *skb;
	int ret;

	ret = xrx200_alloc_skb(ch);
	ret = xrx200_alloc_buf(ch, napi_alloc_frag);

	ch->dma.desc++;
	ch->dma.desc %= LTQ_DESC_NUM;
@@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx200_chan *ch)
		return ret;
	}

	skb = build_skb(buf, priv->rx_skb_size);
	skb_reserve(skb, NET_SKB_PAD);
	skb_put(skb, len);

	/* add buffers to skb via skb->frag_list */
	if (ctl & LTQ_DMA_SOP) {
		ch->skb_head = skb;
		ch->skb_tail = skb;
		skb_reserve(skb, NET_IP_ALIGN);
	} else if (ch->skb_head) {
		if (ch->skb_head == ch->skb_tail)
			skb_shinfo(ch->skb_tail)->frag_list = skb;
		else
			ch->skb_tail->next = skb;
		ch->skb_tail = skb;
		skb_reserve(ch->skb_tail, -NET_IP_ALIGN);
		ch->skb_head->len += skb->len;
		ch->skb_head->data_len += skb->len;
		ch->skb_head->truesize += skb->truesize;
@@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu)
	struct xrx200_chan *ch_rx = &priv->chan_rx;
	int old_mtu = net_dev->mtu;
	bool running = false;
	struct sk_buff *skb;
	void *buff;
	int curr_desc;
	int ret = 0;

	net_dev->mtu = new_mtu;
	priv->rx_buf_size = xrx200_buffer_size(new_mtu);
	priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);

	if (new_mtu <= old_mtu)
		return ret;
@@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net_dev, int new_mtu)

	for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
	     ch_rx->dma.desc++) {
		skb = ch_rx->skb[ch_rx->dma.desc];
		ret = xrx200_alloc_skb(ch_rx);
		buff = ch_rx->rx_buff[ch_rx->dma.desc];
		ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
		if (ret) {
			net_dev->mtu = old_mtu;
			priv->rx_buf_size = xrx200_buffer_size(old_mtu);
			priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
			break;
		}
		dev_kfree_skb_any(skb);
		skb_free_frag(buff);
	}

	ch_rx->dma.desc = curr_desc;
@@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv)
	ltq_dma_alloc_rx(&ch_rx->dma);
	for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
	     ch_rx->dma.desc++) {
		ret = xrx200_alloc_skb(ch_rx);
		ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
		if (ret)
			goto rx_free;
	}
@@ -511,7 +526,7 @@ static int xrx200_dma_init(struct xrx200_priv *priv)
	/* free the allocated RX ring */
	for (i = 0; i < LTQ_DESC_NUM; i++) {
		if (priv->chan_rx.skb[i])
			dev_kfree_skb_any(priv->chan_rx.skb[i]);
			skb_free_frag(priv->chan_rx.rx_buff[i]);
	}

rx_free:
@@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx200_priv *priv)

	/* free the allocated RX ring */
	for (i = 0; i < LTQ_DESC_NUM; i++)
		dev_kfree_skb_any(priv->chan_rx.skb[i]);
		skb_free_frag(priv->chan_rx.rx_buff[i]);
}

static int xrx200_probe(struct platform_device *pdev)
@@ -553,6 +568,7 @@ static int xrx200_probe(struct platform_device *pdev)
	net_dev->min_mtu = ETH_ZLEN;
	net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0);
	priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN);
	priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);

	/* load the memory ranges */
	priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
@@ -597,8 +613,10 @@ static int xrx200_probe(struct platform_device *pdev)
			 PMAC_HD_CTL);

	/* setup NAPI */
	netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32);
	netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32);
	netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx,
		       NAPI_POLL_WEIGHT);
	netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping,
			  NAPI_POLL_WEIGHT);

	platform_set_drvdata(pdev, priv);