Commit 5bdc312c authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-store-netdevs-in-an-xarray'

Jakub Kicinski says:

====================
net: store netdevs in an xarray

One of more annoying developer experience gaps we have in netlink
is iterating over netdevs. It's painful. Add an xarray to make
it trivial.

v1: https://lore.kernel.org/all/20230722014237.4078962-1-kuba@kernel.org/
====================

Link: https://lore.kernel.org/r/20230726185530.2247698-1-kuba@kernel.org


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 083476a2 84e00d9b
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -3016,6 +3016,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */
			if (netdev_master_upper_dev_get_rcu(slave) == (bond))
#define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)

#define for_each_netdev_dump(net, d, ifindex)				\
	xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex))

static inline struct net_device *next_net_device(struct net_device *dev)
{
	struct list_head *lh;
+3 −1
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@
#include <linux/idr.h>
#include <linux/skbuff.h>
#include <linux/notifier.h>
#include <linux/xarray.h>

struct user_namespace;
struct proc_dir_entry;
@@ -69,7 +70,7 @@ struct net {
	atomic_t		dev_unreg_count;

	unsigned int		dev_base_seq;	/* protected by rtnl_mutex */
	int			ifindex;
	u32			ifindex;

	spinlock_t		nsid_lock;
	atomic_t		fnhe_genid;
@@ -110,6 +111,7 @@ struct net {

	struct hlist_head 	*dev_name_head;
	struct hlist_head	*dev_index_head;
	struct xarray		dev_by_index;
	struct raw_notifier_head	netdev_chain;

	/* Note that @hash_mix can be read millions times per second,
+54 −28
Original line number Diff line number Diff line
@@ -388,6 +388,8 @@ static void list_netdevice(struct net_device *dev)
	hlist_add_head_rcu(&dev->index_hlist,
			   dev_index_hash(net, dev->ifindex));
	write_unlock(&dev_base_lock);
	/* We reserved the ifindex, this can't fail */
	WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));

	dev_base_seq_inc(net);
}
@@ -397,8 +399,12 @@ static void list_netdevice(struct net_device *dev)
 */
static void unlist_netdevice(struct net_device *dev, bool lock)
{
	struct net *net = dev_net(dev);

	ASSERT_RTNL();

	xa_erase(&net->dev_by_index, dev->ifindex);

	/* Unlink dev from the device chain */
	if (lock)
		write_lock(&dev_base_lock);
@@ -9565,23 +9571,35 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
}

/**
 *	dev_new_index	-	allocate an ifindex
 * dev_index_reserve() - allocate an ifindex in a namespace
 * @net: the applicable net namespace
 * @ifindex: requested ifindex, pass %0 to get one allocated
 *
 *	Returns a suitable unique value for a new device interface
 *	number.  The caller must hold the rtnl semaphore or the
 *	dev_base_lock to be sure it remains unique.
 * Allocate a ifindex for a new device. Caller must either use the ifindex
 * to store the device (via list_netdevice()) or call dev_index_release()
 * to give the index up.
 *
 * Return: a suitable unique value for a new device interface number or -errno.
 */
static int dev_new_index(struct net *net)
static int dev_index_reserve(struct net *net, u32 ifindex)
{
	int ifindex = net->ifindex;
	int err;

	for (;;) {
		if (++ifindex <= 0)
			ifindex = 1;
		if (!__dev_get_by_index(net, ifindex))
			return net->ifindex = ifindex;
	if (!ifindex)
		err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
				      xa_limit_31b, &net->ifindex, GFP_KERNEL);
	else
		err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
	if (err < 0)
		return err;

	return ifindex;
}

static void dev_index_release(struct net *net, int ifindex)
{
	/* Expect only unused indexes, unlist_netdevice() removes the used */
	WARN_ON(xa_erase(&net->dev_by_index, ifindex));
}

/* Delayed registration/unregisteration */
@@ -10051,11 +10069,10 @@ int register_netdevice(struct net_device *dev)
		goto err_uninit;
	}

	ret = -EBUSY;
	if (!dev->ifindex)
		dev->ifindex = dev_new_index(net);
	else if (__dev_get_by_index(net, dev->ifindex))
	ret = dev_index_reserve(net, dev->ifindex);
	if (ret < 0)
		goto err_uninit;
	dev->ifindex = ret;

	/* Transfer changeable features to wanted_features and enable
	 * software offloads (GSO and GRO).
@@ -10102,7 +10119,7 @@ int register_netdevice(struct net_device *dev)
	ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
	ret = notifier_to_errno(ret);
	if (ret)
		goto err_uninit;
		goto err_ifindex_release;

	ret = netdev_register_kobject(dev);
	write_lock(&dev_base_lock);
@@ -10158,6 +10175,8 @@ int register_netdevice(struct net_device *dev)

err_uninit_notify:
	call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
err_ifindex_release:
	dev_index_release(net, dev->ifindex);
err_uninit:
	if (dev->netdev_ops->ndo_uninit)
		dev->netdev_ops->ndo_uninit(dev);
@@ -11035,9 +11054,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
	}

	/* Check that new_ifindex isn't used yet. */
	err = -EBUSY;
	if (new_ifindex && __dev_get_by_index(net, new_ifindex))
	if (new_ifindex) {
		err = dev_index_reserve(net, new_ifindex);
		if (err < 0)
			goto out;
	} else {
		/* If there is an ifindex conflict assign a new one */
		err = dev_index_reserve(net, dev->ifindex);
		if (err == -EBUSY)
			err = dev_index_reserve(net, 0);
		if (err < 0)
			goto out;
		new_ifindex = err;
	}

	/*
	 * And now a mini version of register_netdevice unregister_netdevice.
@@ -11065,13 +11094,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
	rcu_barrier();

	new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
	/* If there is an ifindex conflict assign a new one */
	if (!new_ifindex) {
		if (__dev_get_by_index(net, dev->ifindex))
			new_ifindex = dev_new_index(net);
		else
			new_ifindex = dev->ifindex;
	}

	rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
			    new_ifindex);
@@ -11249,6 +11271,9 @@ static int __net_init netdev_init(struct net *net)
	if (net->dev_index_head == NULL)
		goto err_idx;

	net->ifindex = 1;
	xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC);

	RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);

	return 0;
@@ -11346,6 +11371,7 @@ static void __net_exit netdev_exit(struct net *net)
{
	kfree(net->dev_name_head);
	kfree(net->dev_index_head);
	xa_destroy(&net->dev_by_index);
	if (net != &init_net)
		WARN_ON_ONCE(!list_empty(&net->dev_base_head));
}
+8 −29
Original line number Diff line number Diff line
@@ -101,43 +101,22 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
	struct net *net = sock_net(skb->sk);
	struct net_device *netdev;
	int idx = 0, s_idx;
	int h, s_h;
	int err;

	s_h = cb->args[0];
	s_idx = cb->args[1];
	int err = 0;

	rtnl_lock();

	for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
		struct hlist_head *head;

		idx = 0;
		head = &net->dev_index_head[h];
		hlist_for_each_entry(netdev, head, index_hlist) {
			if (idx < s_idx)
				goto cont;
	for_each_netdev_dump(net, netdev, cb->args[0]) {
		err = netdev_nl_dev_fill(netdev, skb,
					 NETLINK_CB(cb->skb).portid,
					 cb->nlh->nlmsg_seq, 0,
					 NETDEV_CMD_DEV_GET);
		if (err < 0)
			break;
cont:
			idx++;
	}
	}

	rtnl_unlock();

	if (err != -EMSGSIZE)
		return err;

	cb->args[1] = idx;
	cb->args[0] = h;
	cb->seq = net->dev_base_seq;

	return skb->len;
}

+17 −48
Original line number Diff line number Diff line
@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
 * @ops:        request ops of currently processed message type
 * @req_info:   parsed request header of processed request
 * @reply_data: data needed to compose the reply
 * @pos_hash:   saved iteration position - hashbucket
 * @pos_idx:    saved iteration position - index
 * @pos_ifindex: saved iteration position - ifindex
 *
 * These parameters are kept in struct netlink_callback as context preserved
 * between iterations. They are initialized by ethnl_default_start() and used
@@ -263,8 +262,7 @@ struct ethnl_dump_ctx {
	const struct ethnl_request_ops	*ops;
	struct ethnl_req_info		*req_info;
	struct ethnl_reply_data		*reply_data;
	int				pos_hash;
	int				pos_idx;
	unsigned long			pos_ifindex;
};

static const struct ethnl_request_ops *
@@ -490,55 +488,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
{
	struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
	struct net *net = sock_net(skb->sk);
	int s_idx = ctx->pos_idx;
	int h, idx = 0;
	struct net_device *dev;
	int ret = 0;

	rtnl_lock();
	for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
		struct hlist_head *head;
		struct net_device *dev;
		unsigned int seq;

		head = &net->dev_index_head[h];

restart_chain:
		seq = net->dev_base_seq;
		cb->seq = seq;
		idx = 0;
		hlist_for_each_entry(dev, head, index_hlist) {
			if (idx < s_idx)
				goto cont;
	for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
		dev_hold(dev);
		rtnl_unlock();

		ret = ethnl_default_dump_one(skb, dev, ctx, cb);

		rtnl_lock();
		dev_put(dev);
			if (ret < 0) {
				if (ret == -EOPNOTSUPP)
					goto lock_and_cont;

		if (ret < 0 && ret != -EOPNOTSUPP) {
			if (likely(skb->len))
				ret = skb->len;
				goto out;
			}
lock_and_cont:
			rtnl_lock();
			if (net->dev_base_seq != seq) {
				s_idx = idx + 1;
				goto restart_chain;
			break;
		}
cont:
			idx++;
		}

	}
	rtnl_unlock();

out:
	ctx->pos_hash = h;
	ctx->pos_idx = idx;
	nl_dump_check_consistent(cb, nlmsg_hdr(skb));

	return ret;
}

@@ -584,8 +554,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
	ctx->ops = ops;
	ctx->req_info = req_info;
	ctx->reply_data = reply_data;
	ctx->pos_hash = 0;
	ctx->pos_idx = 0;
	ctx->pos_ifindex = 0;

	return 0;

Loading