Commit ca5ebbfe authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'net-atomic-dev-stats'



Eric Dumazet says:

====================
net: add atomic dev->stats infra

Long standing KCSAN issues are caused by data-race around
some dev->stats changes.

Most performance critical paths already use per-cpu
variables, or per-queue ones.

It is reasonable (and more correct) to use atomic operations
for the slow paths.

First patch adds the infrastructure, then three patches address
the most common paths that syzbot is playing with.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 68d268d0 c4794d22
Loading
Loading
Loading
Loading
+35 −23
Original line number Diff line number Diff line
@@ -171,31 +171,38 @@ static inline bool dev_xmit_complete(int rc)
 *	(unsigned long) so they can be read and written atomically.
 */

#define NET_DEV_STAT(FIELD)			\
	union {					\
		unsigned long FIELD;		\
		atomic_long_t __##FIELD;	\
	}

struct net_device_stats {
	unsigned long	rx_packets;
	unsigned long	tx_packets;
	unsigned long	rx_bytes;
	unsigned long	tx_bytes;
	unsigned long	rx_errors;
	unsigned long	tx_errors;
	unsigned long	rx_dropped;
	unsigned long	tx_dropped;
	unsigned long	multicast;
	unsigned long	collisions;
	unsigned long	rx_length_errors;
	unsigned long	rx_over_errors;
	unsigned long	rx_crc_errors;
	unsigned long	rx_frame_errors;
	unsigned long	rx_fifo_errors;
	unsigned long	rx_missed_errors;
	unsigned long	tx_aborted_errors;
	unsigned long	tx_carrier_errors;
	unsigned long	tx_fifo_errors;
	unsigned long	tx_heartbeat_errors;
	unsigned long	tx_window_errors;
	unsigned long	rx_compressed;
	unsigned long	tx_compressed;
	NET_DEV_STAT(rx_packets);
	NET_DEV_STAT(tx_packets);
	NET_DEV_STAT(rx_bytes);
	NET_DEV_STAT(tx_bytes);
	NET_DEV_STAT(rx_errors);
	NET_DEV_STAT(tx_errors);
	NET_DEV_STAT(rx_dropped);
	NET_DEV_STAT(tx_dropped);
	NET_DEV_STAT(multicast);
	NET_DEV_STAT(collisions);
	NET_DEV_STAT(rx_length_errors);
	NET_DEV_STAT(rx_over_errors);
	NET_DEV_STAT(rx_crc_errors);
	NET_DEV_STAT(rx_frame_errors);
	NET_DEV_STAT(rx_fifo_errors);
	NET_DEV_STAT(rx_missed_errors);
	NET_DEV_STAT(tx_aborted_errors);
	NET_DEV_STAT(tx_carrier_errors);
	NET_DEV_STAT(tx_fifo_errors);
	NET_DEV_STAT(tx_heartbeat_errors);
	NET_DEV_STAT(tx_window_errors);
	NET_DEV_STAT(rx_compressed);
	NET_DEV_STAT(tx_compressed);
};
#undef NET_DEV_STAT

/* per-cpu stats, allocated on demand.
 * Try to fit them in a single cache line, for dev_get_stats() sake.
@@ -5171,4 +5178,9 @@ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

extern struct net_device *blackhole_netdev;

/* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */
#define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD)
#define DEV_STATS_ADD(DEV, FIELD, VAL) 	\
		atomic_long_add((VAL), &(DEV)->stats.__##FIELD)

#endif	/* _LINUX_NETDEVICE_H */
+2 −3
Original line number Diff line number Diff line
@@ -356,9 +356,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
				 struct net *net)
{
	/* TODO : stats should be SMP safe */
	dev->stats.rx_packets++;
	dev->stats.rx_bytes += skb->len;
	DEV_STATS_INC(dev, rx_packets);
	DEV_STATS_ADD(dev, rx_bytes, skb->len);
	__skb_tunnel_rx(skb, dev, net);
}

+3 −11
Original line number Diff line number Diff line
@@ -10369,24 +10369,16 @@ void netdev_run_todo(void)
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
			     const struct net_device_stats *netdev_stats)
{
#if BITS_PER_LONG == 64
	BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
	memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
	/* zero out counters that only exist in rtnl_link_stats64 */
	memset((char *)stats64 + sizeof(*netdev_stats), 0,
	       sizeof(*stats64) - sizeof(*netdev_stats));
#else
	size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
	const unsigned long *src = (const unsigned long *)netdev_stats;
	size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t);
	const atomic_long_t *src = (atomic_long_t *)netdev_stats;
	u64 *dst = (u64 *)stats64;

	BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
	for (i = 0; i < n; i++)
		dst[i] = src[i];
		dst[i] = atomic_long_read(&src[i]);
	/* zero out counters that only exist in rtnl_link_stats64 */
	memset((char *)stats64 + n * sizeof(u64), 0,
	       sizeof(*stats64) - n * sizeof(u64));
#endif
}
EXPORT_SYMBOL(netdev_stats_to_stats64);

+5 −5
Original line number Diff line number Diff line
@@ -510,7 +510,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,

err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	DEV_STATS_INC(dev, tx_dropped);
}

static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -592,7 +592,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)

err_free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	DEV_STATS_INC(dev, tx_dropped);
}

static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
@@ -663,7 +663,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,

free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	DEV_STATS_INC(dev, tx_dropped);
	return NETDEV_TX_OK;
}

@@ -717,7 +717,7 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,

free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	DEV_STATS_INC(dev, tx_dropped);
	return NETDEV_TX_OK;
}

@@ -745,7 +745,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,

free_skb:
	kfree_skb(skb);
	dev->stats.tx_dropped++;
	DEV_STATS_INC(dev, tx_dropped);
	return NETDEV_TX_OK;
}

+16 −16
Original line number Diff line number Diff line
@@ -368,23 +368,23 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,

#ifdef CONFIG_NET_IPGRE_BROADCAST
	if (ipv4_is_multicast(iph->daddr)) {
		tunnel->dev->stats.multicast++;
		DEV_STATS_INC(tunnel->dev, multicast);
		skb->pkt_type = PACKET_BROADCAST;
	}
#endif

	if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
	     ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
		tunnel->dev->stats.rx_crc_errors++;
		tunnel->dev->stats.rx_errors++;
		DEV_STATS_INC(tunnel->dev, rx_crc_errors);
		DEV_STATS_INC(tunnel->dev, rx_errors);
		goto drop;
	}

	if (tunnel->parms.i_flags&TUNNEL_SEQ) {
		if (!(tpi->flags&TUNNEL_SEQ) ||
		    (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
			tunnel->dev->stats.rx_fifo_errors++;
			tunnel->dev->stats.rx_errors++;
			DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
			DEV_STATS_INC(tunnel->dev, rx_errors);
			goto drop;
		}
		tunnel->i_seqno = ntohl(tpi->seq) + 1;
@@ -398,8 +398,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
					&iph->saddr, iph->tos);
		if (err > 1) {
			++tunnel->dev->stats.rx_frame_errors;
			++tunnel->dev->stats.rx_errors;
			DEV_STATS_INC(tunnel->dev, rx_frame_errors);
			DEV_STATS_INC(tunnel->dev, rx_errors);
			goto drop;
		}
	}
@@ -581,7 +581,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
	if (!rt) {
		rt = ip_route_output_key(tunnel->net, &fl4);
		if (IS_ERR(rt)) {
			dev->stats.tx_carrier_errors++;
			DEV_STATS_INC(dev, tx_carrier_errors);
			goto tx_error;
		}
		if (use_cache)
@@ -590,7 +590,7 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
	}
	if (rt->dst.dev == dev) {
		ip_rt_put(rt);
		dev->stats.collisions++;
		DEV_STATS_INC(dev, collisions);
		goto tx_error;
	}

@@ -625,10 +625,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		      df, !net_eq(tunnel->net, dev_net(dev)));
	return;
tx_error:
	dev->stats.tx_errors++;
	DEV_STATS_INC(dev, tx_errors);
	goto kfree;
tx_dropped:
	dev->stats.tx_dropped++;
	DEV_STATS_INC(dev, tx_dropped);
kfree:
	kfree_skb(skb);
}
@@ -662,7 +662,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		/* NBMA tunnel */

		if (!skb_dst(skb)) {
			dev->stats.tx_fifo_errors++;
			DEV_STATS_INC(dev, tx_fifo_errors);
			goto tx_error;
		}

@@ -749,7 +749,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
		rt = ip_route_output_key(tunnel->net, &fl4);

		if (IS_ERR(rt)) {
			dev->stats.tx_carrier_errors++;
			DEV_STATS_INC(dev, tx_carrier_errors);
			goto tx_error;
		}
		if (use_cache)
@@ -762,7 +762,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,

	if (rt->dst.dev == dev) {
		ip_rt_put(rt);
		dev->stats.collisions++;
		DEV_STATS_INC(dev, collisions);
		goto tx_error;
	}

@@ -805,7 +805,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,

	if (skb_cow_head(skb, dev->needed_headroom)) {
		ip_rt_put(rt);
		dev->stats.tx_dropped++;
		DEV_STATS_INC(dev, tx_dropped);
		kfree_skb(skb);
		return;
	}
@@ -819,7 +819,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
	dst_link_failure(skb);
#endif
tx_error:
	dev->stats.tx_errors++;
	DEV_STATS_INC(dev, tx_errors);
	kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
Loading