Commit c002496b authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'ipv6-loopback'



Eric Dumazet says:

====================
ipv6: remove addrconf reliance on loopback

Second patch in this series removes IPv6 requirement about the netns
loopback device being the last device being dismantled.

This was needed because rt6_uncached_list_flush_dev()
and ip6_dst_ifdown() had to switch dst dev to a known
device (loopback).

Instead of loopback, we can use the (hidden) blackhole_netdev
which is also always there.

This will allow future simplfications of netdev_run_to()
and other parts of the stack like default_device_exit_batch().

Last two patches are optimizations for both IP families.
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 926eae60 29e5375d
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -367,9 +367,8 @@ struct rt6_statistics {
	__u32		fib_rt_cache;		/* cached rt entries in exception table */
	__u32		fib_discarded_routes;	/* total number of routes delete */

	/* The following stats are not protected by any lock */
	/* The following stat is not protected by any lock */
	atomic_t	fib_rt_alloc;		/* total number of routes alloced */
	atomic_t	fib_rt_uncache;		/* rt entries in uncached list */
};

#define RTN_TL_ROOT	0x0001
+9 −3
Original line number Diff line number Diff line
@@ -1485,6 +1485,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
struct uncached_list {
	spinlock_t		lock;
	struct list_head	head;
	struct list_head	quarantine;
};

static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
@@ -1506,7 +1507,7 @@ void rt_del_uncached_list(struct rtable *rt)
		struct uncached_list *ul = rt->rt_uncached_list;

		spin_lock_bh(&ul->lock);
		list_del(&rt->rt_uncached);
		list_del_init(&rt->rt_uncached);
		spin_unlock_bh(&ul->lock);
	}
}
@@ -1521,20 +1522,24 @@ static void ipv4_dst_destroy(struct dst_entry *dst)

void rt_flush_dev(struct net_device *dev)
{
	struct rtable *rt;
	struct rtable *rt, *safe;
	int cpu;

	for_each_possible_cpu(cpu) {
		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);

		if (list_empty(&ul->head))
			continue;

		spin_lock_bh(&ul->lock);
		list_for_each_entry(rt, &ul->head, rt_uncached) {
		list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) {
			if (rt->dst.dev != dev)
				continue;
			rt->dst.dev = blackhole_netdev;
			dev_replace_track(dev, blackhole_netdev,
					  &rt->dst.dev_tracker,
					  GFP_ATOMIC);
			list_move(&rt->rt_uncached, &ul->quarantine);
		}
		spin_unlock_bh(&ul->lock);
	}
@@ -3706,6 +3711,7 @@ int __init ip_rt_init(void)
		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);

		INIT_LIST_HEAD(&ul->head);
		INIT_LIST_HEAD(&ul->quarantine);
		spin_lock_init(&ul->lock);
	}
#ifdef CONFIG_IP_ROUTE_CLASSID
+32 −46
Original line number Diff line number Diff line
@@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)

	ASSERT_RTNL();

	if (dev->mtu < IPV6_MIN_MTU)
	if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
		return ERR_PTR(-EINVAL);

	ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
@@ -400,6 +400,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
	/* We refer to the device */
	dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);

	if (dev != blackhole_netdev) {
		if (snmp6_alloc_dev(ndev) < 0) {
			netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
				   __func__);
@@ -414,7 +415,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
				   __func__, dev->name);
			goto err_release;
		}

	}
	/* One reference from device. */
	refcount_set(&ndev->refcnt, 1);

@@ -445,15 +446,18 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)

	ipv6_mc_init_dev(ndev);
	ndev->tstamp = jiffies;
	if (dev != blackhole_netdev) {
		err = addrconf_sysctl_register(ndev);
		if (err) {
			ipv6_mc_destroy_dev(ndev);
			snmp6_unregister_dev(ndev);
			goto err_release;
		}
	}
	/* protected by rtnl_lock */
	rcu_assign_pointer(dev->ip6_ptr, ndev);

	if (dev != blackhole_netdev) {
		/* Join interface-local all-node multicast group */
		ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);

@@ -463,7 +467,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
		/* Join all-router multicast group if forwarding is set */
		if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
			ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);

	}
	return ndev;

err_release:
@@ -7233,26 +7237,8 @@ int __init addrconf_init(void)
		goto out_nowq;
	}

	/* The addrconf netdev notifier requires that loopback_dev
	 * has it's ipv6 private information allocated and setup
	 * before it can bring up and give link-local addresses
	 * to other devices which are up.
	 *
	 * Unfortunately, loopback_dev is not necessarily the first
	 * entry in the global dev_base list of net devices.  In fact,
	 * it is likely to be the very last entry on that list.
	 * So this causes the notifier registry below to try and
	 * give link-local addresses to all devices besides loopback_dev
	 * first, then loopback_dev, which cases all the non-loopback_dev
	 * devices to fail to get a link-local address.
	 *
	 * So, as a temporary fix, allocate the ipv6 structure for
	 * loopback_dev first by hand.
	 * Longer term, all of the dependencies ipv6 has upon the loopback
	 * device and it being up should be removed.
	 */
	rtnl_lock();
	idev = ipv6_add_dev(init_net.loopback_dev);
	idev = ipv6_add_dev(blackhole_netdev);
	rtnl_unlock();
	if (IS_ERR(idev)) {
		err = PTR_ERR(idev);
+22 −20
Original line number Diff line number Diff line
@@ -130,6 +130,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list {
	spinlock_t		lock;
	struct list_head	head;
	struct list_head	quarantine;
};

static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
@@ -149,35 +150,34 @@ void rt6_uncached_list_del(struct rt6_info *rt)
{
	if (!list_empty(&rt->rt6i_uncached)) {
		struct uncached_list *ul = rt->rt6i_uncached_list;
		struct net *net = dev_net(rt->dst.dev);

		spin_lock_bh(&ul->lock);
		list_del(&rt->rt6i_uncached);
		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
		list_del_init(&rt->rt6i_uncached);
		spin_unlock_bh(&ul->lock);
	}
}

static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
static void rt6_uncached_list_flush_dev(struct net_device *dev)
{
	struct net_device *loopback_dev = net->loopback_dev;
	int cpu;

	if (dev == loopback_dev)
		return;

	for_each_possible_cpu(cpu) {
		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
		struct rt6_info *rt;
		struct rt6_info *rt, *safe;

		if (list_empty(&ul->head))
			continue;

		spin_lock_bh(&ul->lock);
		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
		list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) {
			struct inet6_dev *rt_idev = rt->rt6i_idev;
			struct net_device *rt_dev = rt->dst.dev;
			bool handled = false;

			if (rt_idev->dev == dev) {
				rt->rt6i_idev = in6_dev_get(loopback_dev);
				rt->rt6i_idev = in6_dev_get(blackhole_netdev);
				in6_dev_put(rt_idev);
				handled = true;
			}

			if (rt_dev == dev) {
@@ -185,7 +185,11 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
				dev_replace_track(rt_dev, blackhole_netdev,
						  &rt->dst.dev_tracker,
						  GFP_ATOMIC);
				handled = true;
			}
			if (handled)
				list_move(&rt->rt6i_uncached,
					  &ul->quarantine);
		}
		spin_unlock_bh(&ul->lock);
	}
@@ -373,13 +377,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
{
	struct rt6_info *rt = (struct rt6_info *)dst;
	struct inet6_dev *idev = rt->rt6i_idev;
	struct net_device *loopback_dev =
		dev_net(dev)->loopback_dev;

	if (idev && idev->dev != loopback_dev) {
		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
		if (loopback_idev) {
			rt->rt6i_idev = loopback_idev;
	if (idev && idev->dev != blackhole_netdev) {
		struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);

		if (blackhole_idev) {
			rt->rt6i_idev = blackhole_idev;
			in6_dev_put(idev);
		}
	}
@@ -2244,7 +2247,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
			 * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
			 */
			rt6_uncached_list_add(rt);
			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
			rcu_read_unlock();

			return rt;
@@ -3287,7 +3289,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
	 * do proper release of the net_device
	 */
	rt6_uncached_list_add(rt);
	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);

	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);

@@ -4896,7 +4897,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
void rt6_disable_ip(struct net_device *dev, unsigned long event)
{
	rt6_sync_down_dev(dev, event);
	rt6_uncached_list_flush_dev(dev_net(dev), dev);
	rt6_uncached_list_flush_dev(dev);
	neigh_ifdown(&nd_tbl, dev);
}

@@ -6736,6 +6737,7 @@ int __init ip6_route_init(void)
		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);

		INIT_LIST_HEAD(&ul->head);
		INIT_LIST_HEAD(&ul->quarantine);
		spin_lock_init(&ul->lock);
	}

+0 −1
Original line number Diff line number Diff line
@@ -92,7 +92,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
	xdst->u.rt6.rt6i_src = rt->rt6i_src;
	INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
	rt6_uncached_list_add(&xdst->u.rt6);
	atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);

	return 0;
}