Commit 35ec462f authored by Enzo Matsumiya's avatar Enzo Matsumiya Committed by Wang Zhaolong
Browse files

smb: client: fix TCP timers deadlock after rmmod

mainline inclusion
from mainline-v6.10-rc2
commit e9f2517a3e18a54a3943c098d2226b245d488801
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBI5BZ
CVE: CVE-2024-54680

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e9f2517a3e18a54a3943c098d2226b245d488801



--------------------------------

Commit ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.")
fixed a netns UAF by manually enabled socket refcounting
(sk->sk_net_refcnt=1 and sock_inuse_add(net, 1)).

The reason the patch worked for that bug was because we now hold
references to the netns (get_net_track() gets a ref internally)
and they're properly released (internally, on __sk_destruct()),
but only because sk->sk_net_refcnt was set.

Problem:
(this happens regardless of CONFIG_NET_NS_REFCNT_TRACKER and regardless
if init_net or other)

Setting sk->sk_net_refcnt=1 *manually* and *after* socket creation is not
only out of cifs scope, but also technically wrong -- it's set conditionally
based on user (=1) vs kernel (=0) sockets.  And net/ implementations
seem to base their user vs kernel space operations on it.

e.g. upon TCP socket close, the TCP timers are not cleared because
sk->sk_net_refcnt=1:
(cf. commit 151c9c724d05 ("tcp: properly terminate timers for kernel sockets"))

net/ipv4/tcp.c:
    void tcp_close(struct sock *sk, long timeout)
    {
    	lock_sock(sk);
    	__tcp_close(sk, timeout);
    	release_sock(sk);
    	if (!sk->sk_net_refcnt)
    		inet_csk_clear_xmit_timers_sync(sk);
    	sock_put(sk);
    }

Which will throw a lockdep warning and then, as expected, deadlock on
tcp_write_timer().

A way to reproduce this is by running the reproducer from ef7134c7fc48
and then 'rmmod cifs'.  A few seconds later, the deadlock/lockdep
warning shows up.

Fix:
We shouldn't mess with socket internals ourselves, so do not set
sk_net_refcnt manually.

Also change __sock_create() to sock_create_kern() for explicitness.

As for non-init_net network namespaces, we deal with it the best way
we can -- hold an extra netns reference for server->ssocket and drop it
when it's released.  This ensures that the netns still exists whenever
we need to create/destroy server->ssocket, but is not directly tied to
it.

Fixes: ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarEnzo Matsumiya <ematsumiya@suse.de>
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
Conflicts:
	fs/cifs/connect.c
	fs/smb/client/connect.c
[Conflicts due to fs/cifs move to fs/smb/client]
Signed-off-by: default avatarWang Zhaolong <wangzhaolong1@huawei.com>
parent 77a429f9
Loading
Loading
Loading
Loading
+25 −10
Original line number Diff line number Diff line
@@ -918,6 +918,9 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
	if (server->ssocket) {
		sock_release(server->ssocket);
		server->ssocket = NULL;

		/* Release netns reference for the socket. */
		put_net(cifs_net_ns(server));
	}

	if (!list_empty(&server->pending_mid_q)) {
@@ -966,6 +969,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
		 */
	}

	/* Release netns reference for this server. */
	put_net(cifs_net_ns(server));
	kfree(server->hostname);
	kfree(server);
@@ -2584,6 +2588,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info)

	tcp_ses->ops = volume_info->ops;
	tcp_ses->vals = volume_info->vals;

	/* Grab netns reference for this server. */
	cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
	tcp_ses->hostname = extract_hostname(volume_info->UNC);
	if (IS_ERR(tcp_ses->hostname)) {
@@ -2701,14 +2707,17 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
out_err_crypto_release:
	cifs_crypto_secmech_release(tcp_ses);

	/* Release netns reference for this server. */
	put_net(cifs_net_ns(tcp_ses));

out_err:
	if (tcp_ses) {
		if (!IS_ERR(tcp_ses->hostname))
			kfree(tcp_ses->hostname);
		if (tcp_ses->ssocket)
		if (tcp_ses->ssocket) {
			sock_release(tcp_ses->ssocket);
			put_net(cifs_net_ns(tcp_ses));
		}
		kfree(tcp_ses);
	}
	return ERR_PTR(rc);
@@ -3743,22 +3752,22 @@ generic_ip_connect(struct TCP_Server_Info *server)

	if (socket == NULL) {
		struct net *net = cifs_net_ns(server);
		struct sock *sk;

		rc = __sock_create(net, sfamily, SOCK_STREAM,
				   IPPROTO_TCP, &socket, 1);
		rc = sock_create_kern(net, sfamily, SOCK_STREAM,
				   IPPROTO_TCP, &socket);
		if (rc < 0) {
			cifs_server_dbg(VFS, "Error %d creating socket\n", rc);
			server->ssocket = NULL;
			return rc;
		}

		sk = socket->sk;
		sk->sk_net_refcnt = 1;
		/*
		 * Grab netns reference for the socket.
		 *
		 * It'll be released here, on error, or in clean_demultiplex_info() upon server
		 * teardown.
		 */
		get_net(net);
#ifdef CONFIG_PROC_FS
		this_cpu_add(*net->core.sock_inuse, 1);
#endif

		/* BB other socket options to set KEEPALIVE, NODELAY? */
		cifs_dbg(FYI, "Socket created\n");
@@ -3771,8 +3780,10 @@ generic_ip_connect(struct TCP_Server_Info *server)
	}

	rc = bind_socket(server);
	if (rc < 0)
	if (rc < 0) {
		put_net(cifs_net_ns(server));
		return rc;
	}

	/*
	 * Eventually check for other socket options to change from
@@ -3808,6 +3819,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
		rc = 0;
	if (rc < 0) {
		cifs_dbg(FYI, "Error %d connecting to server\n", rc);
		put_net(cifs_net_ns(server));
		sock_release(socket);
		server->ssocket = NULL;
		return rc;
@@ -3816,6 +3828,9 @@ generic_ip_connect(struct TCP_Server_Info *server)
	if (sport == htons(RFC1001_PORT))
		rc = ip_rfc1001_connect(server);

	if (rc < 0)
		put_net(cifs_net_ns(server));

	return rc;
}