Commit df428e40 authored by David S. Miller's avatar David S. Miller
Browse files

Merge branch 'per-netns-notifier'



Jiri Pirko says:

====================
net: introduce per-netns netdevice notifiers and use them in mlxsw

Some drivers, like mlxsw, are not interested in notifications coming in
for netdevices from other network namespaces. So introduce per-netns
notifiers and allow to reduce overhead by listening only for
notifications from the same netns.

This is also a preparation for upcoming patchset "devlink: allow devlink
instances to change network namespace". This resolves deadlock during
reload mlxsw into initial netns made possible by
328fbe74 ("net: Close race between {un, }register_netdevice_notifier() and setup_net()/cleanup_net()").
====================

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c01ebd6c f1cdaa07
Loading
Loading
Loading
Loading
+6 −3
Original line number Diff line number Diff line
@@ -4864,7 +4864,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
	 * respin.
	 */
	mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
	err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
	err = register_netdevice_notifier_net(&init_net,
					      &mlxsw_sp->netdevice_nb);
	if (err) {
		dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
		goto err_netdev_notifier;
@@ -4887,7 +4888,8 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
err_ports_create:
	mlxsw_sp_dpipe_fini(mlxsw_sp);
err_dpipe_init:
	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
	unregister_netdevice_notifier_net(&init_net,
					  &mlxsw_sp->netdevice_nb);
err_netdev_notifier:
	if (mlxsw_sp->clock)
		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
@@ -4973,7 +4975,8 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)

	mlxsw_sp_ports_remove(mlxsw_sp);
	mlxsw_sp_dpipe_fini(mlxsw_sp);
	unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
	unregister_netdevice_notifier_net(&init_net,
					  &mlxsw_sp->netdevice_nb);
	if (mlxsw_sp->clock) {
		mlxsw_sp->ptp_ops->fini(mlxsw_sp->ptp_state);
		mlxsw_sp->ptp_ops->clock_fini(mlxsw_sp->clock);
+6 −0
Original line number Diff line number Diff line
@@ -2504,6 +2504,9 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd);

int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb);
int unregister_netdevice_notifier_net(struct net *net,
				      struct notifier_block *nb);

struct netdev_notifier_info {
	struct net_device	*dev;
@@ -2574,6 +2577,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */
		list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
#define for_each_netdev_continue(net, d)		\
		list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_continue_reverse(net, d)		\
		list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \
						     dev_list)
#define for_each_netdev_continue_rcu(net, d)		\
	list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list)
#define for_each_netdev_in_bond_rcu(bond, slave)	\
+5 −1
Original line number Diff line number Diff line
@@ -36,6 +36,7 @@
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
#include <linux/notifier.h>

struct user_namespace;
struct proc_dir_entry;
@@ -96,6 +97,8 @@ struct net {
	struct list_head 	dev_base_head;
	struct hlist_head 	*dev_name_head;
	struct hlist_head	*dev_index_head;
	struct raw_notifier_head	netdev_chain;

	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
	int			ifindex;
	unsigned int		dev_unreg_count;
@@ -317,7 +320,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
/* Protected by net_rwsem */
#define for_each_net(VAR)				\
	list_for_each_entry(VAR, &net_namespace_list, list)

#define for_each_net_continue_reverse(VAR)		\
	list_for_each_entry_continue_reverse(VAR, &net_namespace_list, list)
#define for_each_net_rcu(VAR)				\
	list_for_each_entry_rcu(VAR, &net_namespace_list, list)

+148 −28
Original line number Diff line number Diff line
@@ -1725,6 +1725,62 @@ static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
	return nb->notifier_call(nb, val, &info);
}

static int call_netdevice_register_notifiers(struct notifier_block *nb,
					     struct net_device *dev)
{
	int err;

	err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
	err = notifier_to_errno(err);
	if (err)
		return err;

	if (!(dev->flags & IFF_UP))
		return 0;

	call_netdevice_notifier(nb, NETDEV_UP, dev);
	return 0;
}

static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
						struct net_device *dev)
{
	if (dev->flags & IFF_UP) {
		call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
					dev);
		call_netdevice_notifier(nb, NETDEV_DOWN, dev);
	}
	call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
}

static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
						 struct net *net)
{
	struct net_device *dev;
	int err;

	for_each_netdev(net, dev) {
		err = call_netdevice_register_notifiers(nb, dev);
		if (err)
			goto rollback;
	}
	return 0;

rollback:
	for_each_netdev_continue_reverse(net, dev)
		call_netdevice_unregister_notifiers(nb, dev);
	return err;
}

static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
						    struct net *net)
{
	struct net_device *dev;

	for_each_netdev(net, dev)
		call_netdevice_unregister_notifiers(nb, dev);
}

static int dev_boot_phase = 1;

/**
@@ -1743,8 +1799,6 @@ static int dev_boot_phase = 1;

int register_netdevice_notifier(struct notifier_block *nb)
{
	struct net_device *dev;
	struct net_device *last;
	struct net *net;
	int err;

@@ -1757,17 +1811,9 @@ int register_netdevice_notifier(struct notifier_block *nb)
	if (dev_boot_phase)
		goto unlock;
	for_each_net(net) {
		for_each_netdev(net, dev) {
			err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
			err = notifier_to_errno(err);
		err = call_netdevice_register_net_notifiers(nb, net);
		if (err)
			goto rollback;

			if (!(dev->flags & IFF_UP))
				continue;

			call_netdevice_notifier(nb, NETDEV_UP, dev);
		}
	}

unlock:
@@ -1776,22 +1822,9 @@ int register_netdevice_notifier(struct notifier_block *nb)
	return err;

rollback:
	last = dev;
	for_each_net(net) {
		for_each_netdev(net, dev) {
			if (dev == last)
				goto outroll;

			if (dev->flags & IFF_UP) {
				call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
							dev);
				call_netdevice_notifier(nb, NETDEV_DOWN, dev);
			}
			call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
		}
	}
	for_each_net_continue_reverse(net)
		call_netdevice_unregister_net_notifiers(nb, net);

outroll:
	raw_notifier_chain_unregister(&netdev_chain, nb);
	goto unlock;
}
@@ -1841,6 +1874,80 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_netdevice_notifier);

/**
 * register_netdevice_notifier_net - register a per-netns network notifier block
 * @net: network namespace
 * @nb: notifier
 *
 * Register a notifier to be called when network device events occur.
 * The notifier passed is linked into the kernel structures and must
 * not be reused until it has been unregistered. A negative errno code
 * is returned on a failure.
 *
 * When registered all registration and up events are replayed
 * to the new notifier to allow device to have a race free
 * view of the network device list.
 */

int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
{
	int err;

	rtnl_lock();
	err = raw_notifier_chain_register(&net->netdev_chain, nb);
	if (err)
		goto unlock;
	if (dev_boot_phase)
		goto unlock;

	err = call_netdevice_register_net_notifiers(nb, net);
	if (err)
		goto chain_unregister;

unlock:
	rtnl_unlock();
	return err;

chain_unregister:
	raw_notifier_chain_unregister(&netdev_chain, nb);
	goto unlock;
}
EXPORT_SYMBOL(register_netdevice_notifier_net);

/**
 * unregister_netdevice_notifier_net - unregister a per-netns
 *                                     network notifier block
 * @net: network namespace
 * @nb: notifier
 *
 * Unregister a notifier previously registered by
 * register_netdevice_notifier(). The notifier is unlinked into the
 * kernel structures and may then be reused. A negative errno code
 * is returned on a failure.
 *
 * After unregistering unregister and down device events are synthesized
 * for all devices on the device list to the removed notifier to remove
 * the need for special case cleanup code.
 */

int unregister_netdevice_notifier_net(struct net *net,
				      struct notifier_block *nb)
{
	int err;

	rtnl_lock();
	err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
	if (err)
		goto unlock;

	call_netdevice_unregister_net_notifiers(nb, net);

unlock:
	rtnl_unlock();
	return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier_net);

/**
 *	call_netdevice_notifiers_info - call all network notifier blocks
 *	@val: value passed unmodified to notifier function
@@ -1853,7 +1960,18 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
static int call_netdevice_notifiers_info(unsigned long val,
					 struct netdev_notifier_info *info)
{
	struct net *net = dev_net(info->dev);
	int ret;

	ASSERT_RTNL();

	/* Run per-netns notifier block chain first, then run the global one.
	 * Hopefully, one day, the global one is going to be removed after
	 * all notifier block registrators get converted to be per-netns.
	 */
	ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
	if (ret & NOTIFY_STOP_MASK)
		return ret;
	return raw_notifier_call_chain(&netdev_chain, val, info);
}

@@ -9752,6 +9870,8 @@ static int __net_init netdev_init(struct net *net)
	if (net->dev_index_head == NULL)
		goto err_idx;

	RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);

	return 0;

err_idx: