Commit 4caaf758 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-speedup-netns-dismantles'

Eric Dumazet says:

====================
net: speedup netns dismantles

From: Eric Dumazet <edumazet@google.com>

In this series, I made network namespace deletions more scalable,
by 4x on the little benchmark described in this cover letter.

- Remove bottleneck on ipv6 addrconf, by replacing a global
  hash table to a per netns one.

- Rework many (struct pernet_operations)->exit() handlers to
  exit_batch() ones. This removes many rtnl acquisitions,
  and gives to cleanup_net() kind of a priority over rtnl
  ownership.

Tested on a host with 24 cpus (48 HT)

Test script:

for nr in {1..10}
do
  (for i in {1..10000}; do unshare -n /bin/bash -c "ifconfig lo up"; done) &
done
wait

for i in {1..10}
do
  sleep 1
  echo 3 >/proc/sys/vm/drop_caches
  grep net_namespace /proc/slabinfo
done

Before: We can see host struggles to clean the netns, even after there are no new creations.
Memory cost is high, because each netns consumes a good amount of memory.

time ./unshare10.sh
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      37214  37792   3968    1    1 : tunables   24   12    8 : slabdata  37214  37792    192

real	6m57.766s
user	3m37.277s
sys	40m4.826s

After: We can see the script completes much faster,
the kernel thread doing the cleanup_net() keeps up just fine.
Memory cost is not too big.

time ./unshare10.sh
net_namespace       9945   9945   4096    1    1 : tunables   24   12    8 : slabdata   9945   9945      0
net_namespace       4087   4665   4096    1    1 : tunables   24   12    8 : slabdata   4087   4665    192
net_namespace       4082   4607   4096    1    1 : tunables   24   12    8 : slabdata   4082   4607    192
net_namespace        234    761   4096    1    1 : tunables   24   12    8 : slabdata    234    761    192
net_namespace        224    751   4096    1    1 : tunables   24   12    8 : slabdata    224    751    192
net_namespace        218    745   4096    1    1 : tunables   24   12    8 : slabdata    218    745    192
net_namespace        193    667   4096    1    1 : tunables   24   12    8 : slabdata    193    667    172
net_namespace        167    609   4096    1    1 : tunables   24   12    8 : slabdata    167    609    152
net_namespace        167    609   4096    1    1 : tunables   24   12    8 : slabdata    167    609    152
net_namespace        157    609   4096    1    1 : tunables   24   12    8 : slabdata    157    609    152

real    1m43.876s
user    3m39.728s
sys 7m36.342s
====================

Link: https://lore.kernel.org/r/20220208045038.2635826-1-eric.dumazet@gmail.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b2309a71 ee403248
Loading
Loading
Loading
Loading
+19 −8
Original line number Diff line number Diff line
@@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net)
	return 0;
}

static void __net_exit bond_net_exit(struct net *net)
static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
	struct bond_net *bn = net_generic(net, bond_net_id);
	struct bonding *bond, *tmp_bond;
	struct bond_net *bn;
	struct net *net;
	LIST_HEAD(list);

	list_for_each_entry(net, net_list, exit_list) {
		bn = net_generic(net, bond_net_id);
		bond_destroy_sysfs(bn);
	}

	/* Kill off any bonds created after unregistering bond rtnl ops */
	rtnl_lock();
	list_for_each_entry(net, net_list, exit_list) {
		struct bonding *bond, *tmp_bond;

		bn = net_generic(net, bond_net_id);
		list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
			unregister_netdevice_queue(bond->dev, &list);
	}
	unregister_netdevice_many(&list);
	rtnl_unlock();

	list_for_each_entry(net, net_list, exit_list) {
		bn = net_generic(net, bond_net_id);
		bond_destroy_proc_dir(bn);
	}
}

static struct pernet_operations bond_net_ops = {
	.init = bond_net_init,
	.exit = bond_net_exit,
	.exit_batch = bond_net_exit_batch,
	.id   = &bond_net_id,
	.size = sizeof(struct bond_net),
};
+0 −1
Original line number Diff line number Diff line
@@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)
}

/* Destroy the bonding directory under /proc/net, if empty.
 * Caller must hold rtnl_lock.
 */
void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
{
+5 −0
Original line number Diff line number Diff line
@@ -92,6 +92,11 @@ struct netns_ipv6 {
	struct sock             *tcp_sk;
	struct sock             *igmp_sk;
	struct sock		*mc_autojoin_sk;

	struct hlist_head	*inet6_addr_lst;
	spinlock_t		addrconf_hash_lock;
	struct delayed_work	addr_chk_work;

#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
	struct mr_table		*mrt6;
+6 −3
Original line number Diff line number Diff line
@@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
	return 0;
}

static void __net_exit cangw_pernet_exit(struct net *net)
static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
	struct net *net;

	rtnl_lock();
	list_for_each_entry(net, net_list, exit_list)
		cgw_remove_all_jobs(net);
	rtnl_unlock();
}

static struct pernet_operations cangw_pernet_ops = {
	.init = cangw_pernet_init,
	.exit = cangw_pernet_exit,
	.exit_batch = cangw_pernet_exit_batch,
};

static __init int cgw_module_init(void)
+14 −8
Original line number Diff line number Diff line
@@ -10850,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
	.exit = netdev_exit,
};

static void __net_exit default_device_exit(struct net *net)
static void __net_exit default_device_exit_net(struct net *net)
{
	struct net_device *dev, *aux;
	/*
	 * Push all migratable network devices back to the
	 * initial network namespace
	 */
	rtnl_lock();
	ASSERT_RTNL();
	for_each_netdev_safe(net, dev, aux) {
		int err;
		char fb_name[IFNAMSIZ];
@@ -10881,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net)
			BUG();
		}
	}
	rtnl_unlock();
}

static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
{
	/* Return with the rtnl_lock held when there are no network
	/* Return (with the rtnl_lock held) when there are no network
	 * devices unregistering in any network namespace in net_list.
	 */
	struct net *net;
	bool unregistering;
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
	bool unregistering;
	struct net *net;

	ASSERT_RTNL();
	add_wait_queue(&netdev_unregistering_wq, &wait);
	for (;;) {
		unregistering = false;
		rtnl_lock();

		list_for_each_entry(net, net_list, exit_list) {
			if (net->dev_unreg_count > 0) {
				unregistering = true;
@@ -10908,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
		__rtnl_unlock();

		wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
		rtnl_lock();
	}
	remove_wait_queue(&netdev_unregistering_wq, &wait);
}
@@ -10923,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
	struct net *net;
	LIST_HEAD(dev_kill_list);

	rtnl_lock();
	list_for_each_entry(net, net_list, exit_list) {
		default_device_exit_net(net);
		cond_resched();
	}
	/* To prevent network device cleanup code from dereferencing
	 * loopback devices or network devices that have been freed
	 * wait here for all pending unregistrations to complete,
@@ -10935,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
	 * default_device_exit_batch.
	 */
	rtnl_lock_unregistering(net_list);

	list_for_each_entry(net, net_list, exit_list) {
		for_each_netdev_reverse(net, dev) {
			if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
@@ -10948,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}

static struct pernet_operations __net_initdata default_device_ops = {
	.exit = default_device_exit,
	.exit_batch = default_device_exit_batch,
};

Loading