Commit b6e81138 authored by Kuniyuki Iwashima's avatar Kuniyuki Iwashima Committed by David S. Miller
Browse files

af_unix: Define a per-netns hash table.

This commit adds a per netns hash table for AF_UNIX, which size is fixed
as UNIX_HASH_SIZE for now.

The first implementation defines a per-netns hash table as a single array
of lock and list:

	struct unix_hashbucket {
		spinlock_t		lock;
		struct hlist_head	head;
	};

	struct netns_unix {
		struct unix_hashbucket	*hash;
		...
	};

But, Eric pointed out memory cost that the structure has holes because of
sizeof(spinlock_t), which is 4 (or more if LOCKDEP is enabled). [0]  It
could be expensive on a host with thousands of netns and few AF_UNIX
sockets.  For this reason, a per-netns hash table uses two dense arrays.

	struct unix_table {
		spinlock_t		*locks;
		struct hlist_head	*buckets;
	};

	struct netns_unix {
		struct unix_table	table;
		...
	};

Note the length of the list has a significant impact rather than lock
contention, so having shared locks can be an option.  But, per-netns
locks and lists still perform better than the global locks and per-netns
lists. [1]

Also, this patch adds a change so that struct netns_unix disappears from
struct net if CONFIG_UNIX is disabled.

[0]: https://lore.kernel.org/netdev/CANn89iLVxO5aqx16azNU7p7Z-nz5NrnM5QTqOzueVxEnkVTxyg@mail.gmail.com/
[1]: https://lore.kernel.org/netdev/20220617175215.1769-1-kuniyu@amazon.com/



Signed-off-by: default avatarKuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent f302d180
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -120,7 +120,9 @@ struct net {
	struct netns_core	core;
	struct netns_mib	mib;
	struct netns_packet	packet;
#if IS_ENABLED(CONFIG_UNIX)
	struct netns_unix	unx;
#endif
	struct netns_nexthop	nexthop;
	struct netns_ipv4	ipv4;
#if IS_ENABLED(CONFIG_IPV6)
+6 −0
Original line number Diff line number Diff line
@@ -5,8 +5,14 @@
#ifndef __NETNS_UNIX_H__
#define __NETNS_UNIX_H__

struct unix_table {
	spinlock_t		*locks;
	struct hlist_head	*buckets;
};

struct ctl_table_header;
struct netns_unix {
	struct unix_table	table;
	int			sysctl_max_dgram_qlen;
	struct ctl_table_header	*ctl;
};
+32 −6
Original line number Diff line number Diff line
@@ -3559,7 +3559,7 @@ static const struct net_proto_family unix_family_ops = {

static int __net_init unix_net_init(struct net *net)
{
	int error = -ENOMEM;
	int i;

	net->unx.sysctl_max_dgram_qlen = 10;
	if (unix_sysctl_register(net))
@@ -3567,18 +3567,44 @@ static int __net_init unix_net_init(struct net *net)

#ifdef CONFIG_PROC_FS
	if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
			sizeof(struct seq_net_private))) {
		unix_sysctl_unregister(net);
		goto out;
			     sizeof(struct seq_net_private)))
		goto err_sysctl;
#endif

	net->unx.table.locks = kvmalloc_array(UNIX_HASH_SIZE,
					      sizeof(spinlock_t), GFP_KERNEL);
	if (!net->unx.table.locks)
		goto err_proc;

	net->unx.table.buckets = kvmalloc_array(UNIX_HASH_SIZE,
						sizeof(struct hlist_head),
						GFP_KERNEL);
	if (!net->unx.table.buckets)
		goto free_locks;

	for (i = 0; i < UNIX_HASH_SIZE; i++) {
		spin_lock_init(&net->unx.table.locks[i]);
		INIT_HLIST_HEAD(&net->unx.table.buckets[i]);
	}

	return 0;

free_locks:
	kvfree(net->unx.table.locks);
err_proc:
#ifdef CONFIG_PROC_FS
	remove_proc_entry("unix", net->proc_net);
err_sysctl:
#endif
	error = 0;
	unix_sysctl_unregister(net);
out:
	return error;
	return -ENOMEM;
}

static void __net_exit unix_net_exit(struct net *net)
{
	kvfree(net->unx.table.buckets);
	kvfree(net->unx.table.locks);
	unix_sysctl_unregister(net);
	remove_proc_entry("unix", net->proc_net);
}