Commit 04c494e6 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller
Browse files

Revert "tcp/dccp: get rid of inet_twsk_purge()"



This reverts commits:

0dad4087 ("tcp/dccp: get rid of inet_twsk_purge()")
d507204d ("tcp/dccp: add tw->tw_bslot")

As Leonard pointed out, a newly allocated netns can happen
to reuse a freed 'struct net'.

While TCP TW timers were covered by my patches, other things were not:

1) Lookups in rx path (INET_MATCH() and INET6_MATCH()), as they look
  at 4-tuple plus the 'struct net' pointer.

2) /proc/net/tcp[6] and inet_diag, same reason.

3) hashinfo->bhash[], same reason.

Fixing all this seems risky, lets instead revert.

In the future, we might have a per netns tcp hash table, or
a per netns list of timewait sockets...

Fixes: 0dad4087 ("tcp/dccp: get rid of inet_twsk_purge()")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reported-by: default avatarLeonard Crestez <cdleonard@gmail.com>
Tested-by: default avatarLeonard Crestez <cdleonard@gmail.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9646ee44
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -71,7 +71,6 @@ struct inet_timewait_sock {
				tw_tos		: 8;
	u32			tw_txhash;
	u32			tw_priority;
	u32			tw_bslot; /* bind bucket slot */
	struct timer_list	tw_timer;
	struct inet_bind_bucket	*tw_tb;
};
@@ -110,6 +109,8 @@ static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo

void inet_twsk_deschedule_put(struct inet_timewait_sock *tw);

void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family);

static inline
struct net *twsk_net(const struct inet_timewait_sock *twsk)
{
+6 −0
Original line number Diff line number Diff line
@@ -1030,9 +1030,15 @@ static void __net_exit dccp_v4_exit_net(struct net *net)
	inet_ctl_sock_destroy(pn->v4_ctl_sk);
}

static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
{
	inet_twsk_purge(&dccp_hashinfo, AF_INET);
}

static struct pernet_operations dccp_v4_ops = {
	.init	= dccp_v4_init_net,
	.exit	= dccp_v4_exit_net,
	.exit_batch = dccp_v4_exit_batch,
	.id	= &dccp_v4_pernet_id,
	.size   = sizeof(struct dccp_v4_pernet),
};
+6 −0
Original line number Diff line number Diff line
@@ -1115,9 +1115,15 @@ static void __net_exit dccp_v6_exit_net(struct net *net)
	inet_ctl_sock_destroy(pn->v6_ctl_sk);
}

static void __net_exit dccp_v6_exit_batch(struct list_head *net_exit_list)
{
	inet_twsk_purge(&dccp_hashinfo, AF_INET6);
}

static struct pernet_operations dccp_v6_ops = {
	.init   = dccp_v6_init_net,
	.exit   = dccp_v6_exit_net,
	.exit_batch = dccp_v6_exit_batch,
	.id	= &dccp_v6_pernet_id,
	.size   = sizeof(struct dccp_v6_pernet),
};
+51 −7
Original line number Diff line number Diff line
@@ -52,7 +52,8 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
	spin_unlock(lock);

	/* Disassociate with bind bucket. */
	bhead = &hashinfo->bhash[tw->tw_bslot];
	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
			hashinfo->bhash_size)];

	spin_lock(&bhead->lock);
	inet_twsk_bind_unhash(tw, hashinfo);
@@ -111,12 +112,8 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
	   Note, that any socket with inet->num != 0 MUST be bound in
	   binding cache, even if it is closed.
	 */
	/* Cache inet_bhashfn(), because 'struct net' might be no longer
	 * available later in inet_twsk_kill().
	 */
	tw->tw_bslot = inet_bhashfn(twsk_net(tw), inet->inet_num,
				    hashinfo->bhash_size);
	bhead = &hashinfo->bhash[tw->tw_bslot];
	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
			hashinfo->bhash_size)];
	spin_lock(&bhead->lock);
	tw->tw_tb = icsk->icsk_bind_hash;
	WARN_ON(!icsk->icsk_bind_hash);
@@ -257,3 +254,50 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
	}
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);

void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
{
	struct inet_timewait_sock *tw;
	struct sock *sk;
	struct hlist_nulls_node *node;
	unsigned int slot;

	for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
		struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
restart_rcu:
		cond_resched();
		rcu_read_lock();
restart:
		sk_nulls_for_each_rcu(sk, node, &head->chain) {
			if (sk->sk_state != TCP_TIME_WAIT)
				continue;
			tw = inet_twsk(sk);
			if ((tw->tw_family != family) ||
				refcount_read(&twsk_net(tw)->ns.count))
				continue;

			if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
				continue;

			if (unlikely((tw->tw_family != family) ||
				     refcount_read(&twsk_net(tw)->ns.count))) {
				inet_twsk_put(tw);
				goto restart;
			}

			rcu_read_unlock();
			local_bh_disable();
			inet_twsk_deschedule_put(tw);
			local_bh_enable();
			goto restart_rcu;
		}
		/* If the nulls value we got at the end of this lookup is
		 * not the expected one, we must restart lookup.
		 * We probably met an item that was moved to another chain.
		 */
		if (get_nulls_value(node) != slot)
			goto restart;
		rcu_read_unlock();
	}
}
EXPORT_SYMBOL_GPL(inet_twsk_purge);
+2 −0
Original line number Diff line number Diff line
@@ -3173,6 +3173,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
	struct net *net;

	inet_twsk_purge(&tcp_hashinfo, AF_INET);

	list_for_each_entry(net, net_exit_list, exit_list)
		tcp_fastopen_ctx_destroy(net);
}
Loading