Commit b26ef81c authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

drop_monitor: remove quadratic behavior



drop_monitor is using an unique list on which all netdevices in
the host have an element, regardless of their netns.

This scales poorly, not only at device unregister time (what I
caught during my netns dismantle stress tests), but also at packet
processing time whenever trace_napi_poll_hit() is called.

If the intent was to avoid adding one pointer in 'struct net_device'
then surely we prefer O(1) behavior.

Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 503310a5
Loading
Loading
Loading
Loading
+3 −1
Original line number Diff line number Diff line
@@ -2236,7 +2236,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_MRP)
	struct mrp_port __rcu	*mrp_port;
#endif

#if IS_ENABLED(CONFIG_NET_DROP_MONITOR)
	struct dm_hw_stat_delta __rcu *dm_private;
#endif
	struct device		dev;
	const struct attribute_group *sysfs_groups[4];
	const struct attribute_group *sysfs_rx_queue_group;
+24 −55
Original line number Diff line number Diff line
@@ -64,7 +64,6 @@ static const char * const drop_reasons[] = {
/* net_dm_mutex
 *
 * An overall lock guarding every operation coming from userspace.
 * It also guards the global 'hw_stats_list' list.
 */
static DEFINE_MUTEX(net_dm_mutex);

@@ -100,11 +99,9 @@ struct per_cpu_dm_data {
};

struct dm_hw_stat_delta {
	struct net_device *dev;
	unsigned long last_rx;
	struct list_head list;
	struct rcu_head rcu;
	unsigned long last_drop_val;
	struct rcu_head rcu;
};

static struct genl_family net_drop_monitor_family;
@@ -115,7 +112,6 @@ static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data);
static int dm_hit_limit = 64;
static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);

static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len;
@@ -287,33 +283,27 @@ static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb,
static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
				int work, int budget)
{
	struct dm_hw_stat_delta *new_stat;

	struct net_device *dev = napi->dev;
	struct dm_hw_stat_delta *stat;
	/*
	 * Don't check napi structures with no associated device
	 */
	if (!napi->dev)
	if (!dev)
		return;

	rcu_read_lock();
	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
		struct net_device *dev;

	stat = rcu_dereference(dev->dm_private);
	if (stat) {
		/*
		 * only add a note to our monitor buffer if:
		 * 1) this is the dev we received on
		 * 2) its after the last_rx delta
		 * 3) our rx_dropped count has gone up
		 * 1) its after the last_rx delta
		 * 2) our rx_dropped count has gone up
		 */
		/* Paired with WRITE_ONCE() in dropmon_net_event() */
		dev = READ_ONCE(new_stat->dev);
		if ((dev == napi->dev)  &&
		    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
		if (time_after(jiffies, stat->last_rx + dm_hw_check_delta) &&
		    (dev->stats.rx_dropped != stat->last_drop_val)) {
			trace_drop_common(NULL, NULL);
			new_stat->last_drop_val = napi->dev->stats.rx_dropped;
			new_stat->last_rx = jiffies;
			break;
			stat->last_drop_val = dev->stats.rx_dropped;
			stat->last_rx = jiffies;
		}
	}
	rcu_read_unlock();
@@ -1198,7 +1188,6 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)

static void net_dm_trace_off_set(void)
{
	struct dm_hw_stat_delta *new_stat, *temp;
	const struct net_dm_alert_ops *ops;
	int cpu;

@@ -1222,13 +1211,6 @@ static void net_dm_trace_off_set(void)
			consume_skb(skb);
	}

	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
		if (new_stat->dev == NULL) {
			list_del_rcu(&new_stat->list);
			kfree_rcu(new_stat, rcu);
		}
	}

	module_put(THIS_MODULE);
}

@@ -1589,41 +1571,28 @@ static int dropmon_net_event(struct notifier_block *ev_block,
			     unsigned long event, void *ptr)
{
	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
	struct dm_hw_stat_delta *new_stat = NULL;
	struct dm_hw_stat_delta *tmp;
	struct dm_hw_stat_delta *stat;

	switch (event) {
	case NETDEV_REGISTER:
		new_stat = kzalloc(sizeof(struct dm_hw_stat_delta), GFP_KERNEL);

		if (!new_stat)
			goto out;

		new_stat->dev = dev;
		new_stat->last_rx = jiffies;
		mutex_lock(&net_dm_mutex);
		list_add_rcu(&new_stat->list, &hw_stats_list);
		mutex_unlock(&net_dm_mutex);
		if (WARN_ON_ONCE(rtnl_dereference(dev->dm_private)))
			break;
		stat = kzalloc(sizeof(*stat), GFP_KERNEL);
		if (!stat)
			break;
	case NETDEV_UNREGISTER:
		mutex_lock(&net_dm_mutex);
		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
			if (new_stat->dev == dev) {

				/* Paired with READ_ONCE() in trace_napi_poll_hit() */
				WRITE_ONCE(new_stat->dev, NULL);
		stat->last_rx = jiffies;
		rcu_assign_pointer(dev->dm_private, stat);

				if (trace_state == TRACE_OFF) {
					list_del_rcu(&new_stat->list);
					kfree_rcu(new_stat, rcu);
		break;
	case NETDEV_UNREGISTER:
		stat = rtnl_dereference(dev->dm_private);
		if (stat) {
			rcu_assign_pointer(dev->dm_private, NULL);
			kfree_rcu(stat, rcu);
		}
			}
		}
		mutex_unlock(&net_dm_mutex);
		break;
	}
out:
	return NOTIFY_DONE;
}