Commit 94531cfc authored by Jiang Wang's avatar Jiang Wang Committed by Andrii Nakryiko
Browse files

af_unix: Add unix_stream_proto for sockmap



Previously, sockmap for AF_UNIX protocol only supports
dgram type. This patch add unix stream type support, which
is similar to unix_dgram_proto. To support sockmap, dgram
and stream cannot share the same unix_proto anymore, because
they have different implementations, such as unhash for stream
type (which will remove closed or disconnected sockets from the map),
so rename unix_proto to unix_dgram_proto and add a new
unix_stream_proto.

Also implement stream related sockmap functions.
And add dgram key words to those dgram specific functions.

Signed-off-by: default avatarJiang Wang <jiang.wang@bytedance.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Reviewed-by: default avatarCong Wang <cong.wang@bytedance.com>
Acked-by: default avatarJakub Sitnicki <jakub@cloudflare.com>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210816190327.2739291-3-jiang.wang@bytedance.com
parent 77462de1
Loading
Loading
Loading
Loading
+6 −2
Original line number Diff line number Diff line
@@ -87,6 +87,8 @@ long unix_outq_len(struct sock *sk);

int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
			 int flags);
int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg, size_t size,
			  int flags);
#ifdef CONFIG_SYSCTL
int unix_sysctl_register(struct net *net);
void unix_sysctl_unregister(struct net *net);
@@ -96,9 +98,11 @@ static inline void unix_sysctl_unregister(struct net *net) {}
#endif

#ifdef CONFIG_BPF_SYSCALL
extern struct proto unix_proto;
extern struct proto unix_dgram_proto;
extern struct proto unix_stream_proto;

int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void __init unix_bpf_build_proto(void);
#else
static inline void __init unix_bpf_build_proto(void)
+1 −0
Original line number Diff line number Diff line
@@ -1494,6 +1494,7 @@ void sock_map_unhash(struct sock *sk)
	rcu_read_unlock();
	saved_unhash(sk);
}
EXPORT_SYMBOL_GPL(sock_map_unhash);

void sock_map_close(struct sock *sk, long timeout)
{
+70 −13
Original line number Diff line number Diff line
@@ -798,17 +798,35 @@ static void unix_close(struct sock *sk, long timeout)
	 */
}

struct proto unix_proto = {
	.name			= "UNIX",
static void unix_unhash(struct sock *sk)
{
	/* Nothing to do here, unix socket does not need a ->unhash().
	 * This is merely for sockmap.
	 */
}

struct proto unix_dgram_proto = {
	.name			= "UNIX-DGRAM",
	.owner			= THIS_MODULE,
	.obj_size		= sizeof(struct unix_sock),
	.close			= unix_close,
#ifdef CONFIG_BPF_SYSCALL
	.psock_update_sk_prot	= unix_bpf_update_proto,
	.psock_update_sk_prot	= unix_dgram_bpf_update_proto,
#endif
};

static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
struct proto unix_stream_proto = {
	.name			= "UNIX-STREAM",
	.owner			= THIS_MODULE,
	.obj_size		= sizeof(struct unix_sock),
	.close			= unix_close,
	.unhash			= unix_unhash,
#ifdef CONFIG_BPF_SYSCALL
	.psock_update_sk_prot	= unix_stream_bpf_update_proto,
#endif
};

static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, int type)
{
	struct sock *sk = NULL;
	struct unix_sock *u;
@@ -817,7 +835,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
	if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
		goto out;

	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
	if (type == SOCK_STREAM)
		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_stream_proto, kern);
	else /*dgram and  seqpacket */
		sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_dgram_proto, kern);

	if (!sk)
		goto out;

@@ -879,7 +901,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
		return -ESOCKTNOSUPPORT;
	}

	return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
	return unix_create1(net, sock, kern, sock->type) ? 0 : -ENOMEM;
}

static int unix_release(struct socket *sock)
@@ -1293,7 +1315,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
	err = -ENOMEM;

	/* create new sock for complete connection */
	newsk = unix_create1(sock_net(sk), NULL, 0);
	newsk = unix_create1(sock_net(sk), NULL, 0, sock->type);
	if (newsk == NULL)
		goto out;

@@ -2323,8 +2345,10 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si
	struct sock *sk = sock->sk;

#ifdef CONFIG_BPF_SYSCALL
	if (sk->sk_prot != &unix_proto)
		return sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
	const struct proto *prot = READ_ONCE(sk->sk_prot);

	if (prot != &unix_dgram_proto)
		return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
					    flags & ~MSG_DONTWAIT, NULL);
#endif
	return __unix_dgram_recvmsg(sk, msg, size, flags);
@@ -2728,6 +2752,20 @@ static int unix_stream_read_actor(struct sk_buff *skb,
	return ret ?: chunk;
}

int __unix_stream_recvmsg(struct sock *sk, struct msghdr *msg,
			  size_t size, int flags)
{
	struct unix_stream_read_state state = {
		.recv_actor = unix_stream_read_actor,
		.socket = sk->sk_socket,
		.msg = msg,
		.size = size,
		.flags = flags
	};

	return unix_stream_read_generic(&state, true);
}

static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
			       size_t size, int flags)
{
@@ -2739,6 +2777,14 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
		.flags = flags
	};

#ifdef CONFIG_BPF_SYSCALL
	struct sock *sk = sock->sk;
	const struct proto *prot = READ_ONCE(sk->sk_prot);

	if (prot != &unix_stream_proto)
		return prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
					    flags & ~MSG_DONTWAIT, NULL);
#endif
	return unix_stream_read_generic(&state, true);
}

@@ -2799,7 +2845,9 @@ static int unix_shutdown(struct socket *sock, int mode)
		(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {

		int peer_mode = 0;
		const struct proto *prot = READ_ONCE(other->sk_prot);

		prot->unhash(other);
		if (mode&RCV_SHUTDOWN)
			peer_mode |= SEND_SHUTDOWN;
		if (mode&SEND_SHUTDOWN)
@@ -2808,11 +2856,13 @@ static int unix_shutdown(struct socket *sock, int mode)
		other->sk_shutdown |= peer_mode;
		unix_state_unlock(other);
		other->sk_state_change(other);
		if (peer_mode == SHUTDOWN_MASK)
		if (peer_mode == SHUTDOWN_MASK) {
			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
		else if (peer_mode & RCV_SHUTDOWN)
			other->sk_state = TCP_CLOSE;
		} else if (peer_mode & RCV_SHUTDOWN) {
			sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
		}
	}
	if (other)
		sock_put(other);

@@ -3289,7 +3339,13 @@ static int __init af_unix_init(void)

	BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));

	rc = proto_register(&unix_proto, 1);
	rc = proto_register(&unix_dgram_proto, 1);
	if (rc != 0) {
		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
		goto out;
	}

	rc = proto_register(&unix_stream_proto, 1);
	if (rc != 0) {
		pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
		goto out;
@@ -3310,7 +3366,8 @@ static int __init af_unix_init(void)
static void __exit af_unix_exit(void)
{
	sock_unregister(PF_UNIX);
	proto_unregister(&unix_proto);
	proto_unregister(&unix_dgram_proto);
	proto_unregister(&unix_stream_proto);
	unregister_pernet_subsys(&unix_net_ops);
}

+71 −22
Original line number Diff line number Diff line
@@ -38,7 +38,16 @@ static int unix_msg_wait_data(struct sock *sk, struct sk_psock *psock,
	return ret;
}

static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
static int __unix_recvmsg(struct sock *sk, struct msghdr *msg,
			  size_t len, int flags)
{
	if (sk->sk_type == SOCK_DGRAM)
		return __unix_dgram_recvmsg(sk, msg, len, flags);
	else
		return __unix_stream_recvmsg(sk, msg, len, flags);
}

static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
			    size_t len, int nonblock, int flags,
			    int *addr_len)
{
@@ -48,14 +57,14 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,

	psock = sk_psock_get(sk);
	if (unlikely(!psock))
		return __unix_dgram_recvmsg(sk, msg, len, flags);
		return __unix_recvmsg(sk, msg, len, flags);

	mutex_lock(&u->iolock);
	if (!skb_queue_empty(&sk->sk_receive_queue) &&
	    sk_psock_queue_empty(psock)) {
		mutex_unlock(&u->iolock);
		sk_psock_put(sk, psock);
		return __unix_dgram_recvmsg(sk, msg, len, flags);
		return __unix_recvmsg(sk, msg, len, flags);
	}

msg_bytes_ready:
@@ -71,7 +80,7 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
				goto msg_bytes_ready;
			mutex_unlock(&u->iolock);
			sk_psock_put(sk, psock);
			return __unix_dgram_recvmsg(sk, msg, len, flags);
			return __unix_recvmsg(sk, msg, len, flags);
		}
		copied = -EAGAIN;
	}
@@ -80,30 +89,55 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
	return copied;
}

static struct proto *unix_prot_saved __read_mostly;
static DEFINE_SPINLOCK(unix_prot_lock);
static struct proto unix_bpf_prot;
static struct proto *unix_dgram_prot_saved __read_mostly;
static DEFINE_SPINLOCK(unix_dgram_prot_lock);
static struct proto unix_dgram_bpf_prot;

static struct proto *unix_stream_prot_saved __read_mostly;
static DEFINE_SPINLOCK(unix_stream_prot_lock);
static struct proto unix_stream_bpf_prot;

static void unix_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
static void unix_dgram_bpf_rebuild_protos(struct proto *prot, const struct proto *base)
{
	*prot        = *base;
	prot->close  = sock_map_close;
	prot->recvmsg = unix_dgram_bpf_recvmsg;
	prot->recvmsg = unix_bpf_recvmsg;
}

static void unix_bpf_check_needs_rebuild(struct proto *ops)
static void unix_stream_bpf_rebuild_protos(struct proto *prot,
					   const struct proto *base)
{
	if (unlikely(ops != smp_load_acquire(&unix_prot_saved))) {
		spin_lock_bh(&unix_prot_lock);
		if (likely(ops != unix_prot_saved)) {
			unix_bpf_rebuild_protos(&unix_bpf_prot, ops);
			smp_store_release(&unix_prot_saved, ops);
	*prot        = *base;
	prot->close  = sock_map_close;
	prot->recvmsg = unix_bpf_recvmsg;
	prot->unhash  = sock_map_unhash;
}
		spin_unlock_bh(&unix_prot_lock);

static void unix_dgram_bpf_check_needs_rebuild(struct proto *ops)
{
	if (unlikely(ops != smp_load_acquire(&unix_dgram_prot_saved))) {
		spin_lock_bh(&unix_dgram_prot_lock);
		if (likely(ops != unix_dgram_prot_saved)) {
			unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, ops);
			smp_store_release(&unix_dgram_prot_saved, ops);
		}
		spin_unlock_bh(&unix_dgram_prot_lock);
	}
}

int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
static void unix_stream_bpf_check_needs_rebuild(struct proto *ops)
{
	if (unlikely(ops != smp_load_acquire(&unix_stream_prot_saved))) {
		spin_lock_bh(&unix_stream_prot_lock);
		if (likely(ops != unix_stream_prot_saved)) {
			unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, ops);
			smp_store_release(&unix_stream_prot_saved, ops);
		}
		spin_unlock_bh(&unix_stream_prot_lock);
	}
}

int unix_dgram_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
	if (sk->sk_type != SOCK_DGRAM)
		return -EOPNOTSUPP;
@@ -114,12 +148,27 @@ int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
		return 0;
	}

	unix_bpf_check_needs_rebuild(psock->sk_proto);
	WRITE_ONCE(sk->sk_prot, &unix_bpf_prot);
	unix_dgram_bpf_check_needs_rebuild(psock->sk_proto);
	WRITE_ONCE(sk->sk_prot, &unix_dgram_bpf_prot);
	return 0;
}

int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore)
{
	if (restore) {
		sk->sk_write_space = psock->saved_write_space;
		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
		return 0;
	}

	unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
	WRITE_ONCE(sk->sk_prot, &unix_stream_bpf_prot);
	return 0;
}

void __init unix_bpf_build_proto(void)
{
	unix_bpf_rebuild_protos(&unix_bpf_prot, &unix_proto);
	unix_dgram_bpf_rebuild_protos(&unix_dgram_bpf_prot, &unix_dgram_proto);
	unix_stream_bpf_rebuild_protos(&unix_stream_bpf_prot, &unix_stream_proto);

}