Commit c3e8d5a4 authored by Jakub Kicinski's avatar Jakub Kicinski
Browse files

Merge branch 'net-smc-two-fixes-for-smc-fallback'

Wen Gu says:

====================
net/smc: Two fixes for smc fallback

This patch set includes two fixes for smc fallback:

Patch 1/2 introduces some simple helpers to wrap the replacement
and restore of clcsock's callback functions. Make sure that only
the original callbacks will be saved and not overwritten.

Patch 2/2 fixes a syzbot reporting slab-out-of-bound issue where
smc_fback_error_report() accesses the already freed smc sock (see
https://lore.kernel.org/r/00000000000013ca8105d7ae3ada@google.com/).
The patch fixes it by resetting sk_user_data and restoring clcsock
callback functions timely in fallback situation.

But it should be noted that although patch 2/2 can fix the issue
of 'slab-out-of-bounds/use-after-free in smc_fback_error_report',
it can't pass the syzbot reproducer test. Because after applying
these two patches in upstream, syzbot reproducer triggered another
known issue like this:

==================================================================
BUG: KASAN: use-after-free in tcp_retransmit_timer+0x2ef3/0x3360 net/ipv4/tcp_timer.c:511
Read of size 8 at addr ffff888020328380 by task udevd/4158

CPU: 1 PID: 4158 Comm: udevd Not tainted 5.18.0-rc3-syzkaller-00074-gb05a5683eba6-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <IRQ>
  __dump_stack lib/dump_stack.c:88 [inline]
  dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
  print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313
  print_report mm/kasan/report.c:429 [inline]
  kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
  tcp_retransmit_timer+0x2ef3/0x3360 net/ipv4/tcp_timer.c:511
  tcp_write_timer_handler+0x5e6/0xbc0 net/ipv4/tcp_timer.c:622
  tcp_write_timer+0xa2/0x2b0 net/ipv4/tcp_timer.c:642
  call_timer_fn+0x1a5/0x6b0 kernel/time/timer.c:1421
  expire_timers kernel/time/timer.c:1466 [inline]
  __run_timers.part.0+0x679/0xa80 kernel/time/timer.c:1737
  __run_timers kernel/time/timer.c:1715 [inline]
  run_timer_softirq+0xb3/0x1d0 kernel/time/timer.c:1750
  __do_softirq+0x29b/0x9c2 kernel/softirq.c:558
  invoke_softirq kernel/softirq.c:432 [inline]
  __irq_exit_rcu+0x123/0x180 kernel/softirq.c:637
  irq_exit_rcu+0x5/0x20 kernel/softirq.c:649
  sysvec_apic_timer_interrupt+0x93/0xc0 arch/x86/kernel/apic/apic.c:1097
 </IRQ>
 ...
(detail report can be found in https://syzkaller.appspot.com/text?tag=CrashReport&x=15406b44f00000)

IMHO, the above issue is the same as this known one: https://syzkaller.appspot.com/bug?extid=694120e1002c117747ed,
and it doesn't seem to be related with SMC. The discussion about this known issue is ongoing and can be found in
https://lore.kernel.org/bpf/000000000000f75af905d3ba0716@google.com/T/.

And I added the temporary solution mentioned in the above discussion on
top of my two patches, the syzbot reproducer of 'slab-out-of-bounds/
use-after-free in smc_fback_error_report' no longer triggers any issue.
====================

Link: https://lore.kernel.org/r/1650614179-11529-1-git-send-email-guwen@linux.alibaba.com


Signed-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents ba5a4fdd 0558226c
Loading
Loading
Loading
Loading
+93 −42
Original line number Diff line number Diff line
@@ -243,11 +243,27 @@ struct proto smc_proto6 = {
};
EXPORT_SYMBOL_GPL(smc_proto6);

static void smc_fback_restore_callbacks(struct smc_sock *smc)
{
	struct sock *clcsk = smc->clcsock->sk;

	write_lock_bh(&clcsk->sk_callback_lock);
	clcsk->sk_user_data = NULL;

	smc_clcsock_restore_cb(&clcsk->sk_state_change, &smc->clcsk_state_change);
	smc_clcsock_restore_cb(&clcsk->sk_data_ready, &smc->clcsk_data_ready);
	smc_clcsock_restore_cb(&clcsk->sk_write_space, &smc->clcsk_write_space);
	smc_clcsock_restore_cb(&clcsk->sk_error_report, &smc->clcsk_error_report);

	write_unlock_bh(&clcsk->sk_callback_lock);
}

static void smc_restore_fallback_changes(struct smc_sock *smc)
{
	if (smc->clcsock->file) { /* non-accepted sockets have no file yet */
		smc->clcsock->file->private_data = smc->sk.sk_socket;
		smc->clcsock->file = NULL;
		smc_fback_restore_callbacks(smc);
	}
}

@@ -373,6 +389,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
	sk->sk_prot->hash(sk);
	sk_refcnt_debug_inc(sk);
	mutex_init(&smc->clcsock_release_lock);
	smc_init_saved_callbacks(smc);

	return sk;
}
@@ -744,47 +761,73 @@ static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk,

static void smc_fback_state_change(struct sock *clcsk)
{
	struct smc_sock *smc =
		smc_clcsock_user_data(clcsk);
	struct smc_sock *smc;

	if (!smc)
		return;
	smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change);
	read_lock_bh(&clcsk->sk_callback_lock);
	smc = smc_clcsock_user_data(clcsk);
	if (smc)
		smc_fback_forward_wakeup(smc, clcsk,
					 smc->clcsk_state_change);
	read_unlock_bh(&clcsk->sk_callback_lock);
}

static void smc_fback_data_ready(struct sock *clcsk)
{
	struct smc_sock *smc =
		smc_clcsock_user_data(clcsk);
	struct smc_sock *smc;

	if (!smc)
		return;
	smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready);
	read_lock_bh(&clcsk->sk_callback_lock);
	smc = smc_clcsock_user_data(clcsk);
	if (smc)
		smc_fback_forward_wakeup(smc, clcsk,
					 smc->clcsk_data_ready);
	read_unlock_bh(&clcsk->sk_callback_lock);
}

static void smc_fback_write_space(struct sock *clcsk)
{
	struct smc_sock *smc =
		smc_clcsock_user_data(clcsk);
	struct smc_sock *smc;

	if (!smc)
		return;
	smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space);
	read_lock_bh(&clcsk->sk_callback_lock);
	smc = smc_clcsock_user_data(clcsk);
	if (smc)
		smc_fback_forward_wakeup(smc, clcsk,
					 smc->clcsk_write_space);
	read_unlock_bh(&clcsk->sk_callback_lock);
}

static void smc_fback_error_report(struct sock *clcsk)
{
	struct smc_sock *smc =
		smc_clcsock_user_data(clcsk);
	struct smc_sock *smc;

	if (!smc)
		return;
	smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report);
	read_lock_bh(&clcsk->sk_callback_lock);
	smc = smc_clcsock_user_data(clcsk);
	if (smc)
		smc_fback_forward_wakeup(smc, clcsk,
					 smc->clcsk_error_report);
	read_unlock_bh(&clcsk->sk_callback_lock);
}

static void smc_fback_replace_callbacks(struct smc_sock *smc)
{
	struct sock *clcsk = smc->clcsock->sk;

	write_lock_bh(&clcsk->sk_callback_lock);
	clcsk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);

	smc_clcsock_replace_cb(&clcsk->sk_state_change, smc_fback_state_change,
			       &smc->clcsk_state_change);
	smc_clcsock_replace_cb(&clcsk->sk_data_ready, smc_fback_data_ready,
			       &smc->clcsk_data_ready);
	smc_clcsock_replace_cb(&clcsk->sk_write_space, smc_fback_write_space,
			       &smc->clcsk_write_space);
	smc_clcsock_replace_cb(&clcsk->sk_error_report, smc_fback_error_report,
			       &smc->clcsk_error_report);

	write_unlock_bh(&clcsk->sk_callback_lock);
}

static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
{
	struct sock *clcsk;
	int rc = 0;

	mutex_lock(&smc->clcsock_release_lock);
@@ -792,10 +835,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
		rc = -EBADF;
		goto out;
	}
	clcsk = smc->clcsock->sk;

	if (smc->use_fallback)
		goto out;
	smc->use_fallback = true;
	smc->fallback_rsn = reason_code;
	smc_stat_fallback(smc);
@@ -810,18 +850,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
		 * in smc sk->sk_wq and they should be woken up
		 * as clcsock's wait queue is woken up.
		 */
		smc->clcsk_state_change = clcsk->sk_state_change;
		smc->clcsk_data_ready = clcsk->sk_data_ready;
		smc->clcsk_write_space = clcsk->sk_write_space;
		smc->clcsk_error_report = clcsk->sk_error_report;

		clcsk->sk_state_change = smc_fback_state_change;
		clcsk->sk_data_ready = smc_fback_data_ready;
		clcsk->sk_write_space = smc_fback_write_space;
		clcsk->sk_error_report = smc_fback_error_report;

		smc->clcsock->sk->sk_user_data =
			(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
		smc_fback_replace_callbacks(smc);
	}
out:
	mutex_unlock(&smc->clcsock_release_lock);
@@ -1596,6 +1625,19 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
	 * function; switch it back to the original sk_data_ready function
	 */
	new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;

	/* if new clcsock has also inherited the fallback-specific callback
	 * functions, switch them back to the original ones.
	 */
	if (lsmc->use_fallback) {
		if (lsmc->clcsk_state_change)
			new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change;
		if (lsmc->clcsk_write_space)
			new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space;
		if (lsmc->clcsk_error_report)
			new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
	}

	(*new_smc)->clcsock = new_clcsock;
out:
	return rc;
@@ -2355,17 +2397,20 @@ static void smc_tcp_listen_work(struct work_struct *work)

static void smc_clcsock_data_ready(struct sock *listen_clcsock)
{
	struct smc_sock *lsmc =
		smc_clcsock_user_data(listen_clcsock);
	struct smc_sock *lsmc;

	read_lock_bh(&listen_clcsock->sk_callback_lock);
	lsmc = smc_clcsock_user_data(listen_clcsock);
	if (!lsmc)
		return;
		goto out;
	lsmc->clcsk_data_ready(listen_clcsock);
	if (lsmc->sk.sk_state == SMC_LISTEN) {
		sock_hold(&lsmc->sk); /* sock_put in smc_tcp_listen_work() */
		if (!queue_work(smc_tcp_ls_wq, &lsmc->tcp_listen_work))
			sock_put(&lsmc->sk);
	}
out:
	read_unlock_bh(&listen_clcsock->sk_callback_lock);
}

static int smc_listen(struct socket *sock, int backlog)
@@ -2397,10 +2442,12 @@ static int smc_listen(struct socket *sock, int backlog)
	/* save original sk_data_ready function and establish
	 * smc-specific sk_data_ready function
	 */
	smc->clcsk_data_ready = smc->clcsock->sk->sk_data_ready;
	smc->clcsock->sk->sk_data_ready = smc_clcsock_data_ready;
	write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
	smc->clcsock->sk->sk_user_data =
		(void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY);
	smc_clcsock_replace_cb(&smc->clcsock->sk->sk_data_ready,
			       smc_clcsock_data_ready, &smc->clcsk_data_ready);
	write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);

	/* save original ops */
	smc->ori_af_ops = inet_csk(smc->clcsock->sk)->icsk_af_ops;
@@ -2415,7 +2462,11 @@ static int smc_listen(struct socket *sock, int backlog)

	rc = kernel_listen(smc->clcsock, backlog);
	if (rc) {
		smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
		write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
		smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
				       &smc->clcsk_data_ready);
		smc->clcsock->sk->sk_user_data = NULL;
		write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
		goto out;
	}
	sk->sk_max_ack_backlog = backlog;
+29 −0
Original line number Diff line number Diff line
@@ -288,12 +288,41 @@ static inline struct smc_sock *smc_sk(const struct sock *sk)
	return (struct smc_sock *)sk;
}

static inline void smc_init_saved_callbacks(struct smc_sock *smc)
{
	smc->clcsk_state_change	= NULL;
	smc->clcsk_data_ready	= NULL;
	smc->clcsk_write_space	= NULL;
	smc->clcsk_error_report	= NULL;
}

static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk)
{
	return (struct smc_sock *)
	       ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY);
}

/* save target_cb in saved_cb, and replace target_cb with new_cb */
static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *),
					  void (*new_cb)(struct sock *),
					  void (**saved_cb)(struct sock *))
{
	/* only save once */
	if (!*saved_cb)
		*saved_cb = *target_cb;
	*target_cb = new_cb;
}

/* restore target_cb to saved_cb, and reset saved_cb to NULL */
static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *),
					  void (**saved_cb)(struct sock *))
{
	if (!*saved_cb)
		return;
	*target_cb = *saved_cb;
	*saved_cb = NULL;
}

extern struct workqueue_struct	*smc_hs_wq;	/* wq for handshake work */
extern struct workqueue_struct	*smc_close_wq;	/* wq for close work */

+4 −1
Original line number Diff line number Diff line
@@ -214,8 +214,11 @@ int smc_close_active(struct smc_sock *smc)
		sk->sk_state = SMC_CLOSED;
		sk->sk_state_change(sk); /* wake up accept */
		if (smc->clcsock && smc->clcsock->sk) {
			smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready;
			write_lock_bh(&smc->clcsock->sk->sk_callback_lock);
			smc_clcsock_restore_cb(&smc->clcsock->sk->sk_data_ready,
					       &smc->clcsk_data_ready);
			smc->clcsock->sk->sk_user_data = NULL;
			write_unlock_bh(&smc->clcsock->sk->sk_callback_lock);
			rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
		}
		smc_close_cleanup_listen(sk);