Loading include/net/inet_hashtables.h +48 −0 Original line number Diff line number Diff line Loading @@ -19,10 +19,14 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/tcp.h> /* only for TCP_LISTEN, damn :-( */ #include <linux/types.h> #include <linux/wait.h> #include <net/sock.h> #include <asm/atomic.h> /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. Loading Loading @@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); /* * - We may sleep inside this lock. * - If sleeping is not required (or called from BH), * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) { /* read_lock synchronizes to candidates to writers */ read_lock(&hashinfo->lhash_lock); atomic_inc(&hashinfo->lhash_users); read_unlock(&hashinfo->lhash_lock); } static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) { if (atomic_dec_and_test(&hashinfo->lhash_users)) wake_up(&hashinfo->lhash_wait); } static inline void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk, const int listen_possible) { struct hlist_head *list; rwlock_t *lock; BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); } else { sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); list = &hashinfo->ehash[sk->sk_hashent].chain; lock = &hashinfo->ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) wake_up(&hashinfo->lhash_wait); } #endif /* _INET_HASHTABLES_H */ include/net/tcp.h +0 −21 Original line number Diff line number Diff line Loading @@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, extern void tcp_enter_memory_pressure(void); extern void tcp_listen_wlock(void); /* - We may sleep inside this lock. * - If sleeping is not required (or called from BH), * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void tcp_listen_lock(void) { /* read_lock synchronizes to candidates to writers */ read_lock(&tcp_hashinfo.lhash_lock); atomic_inc(&tcp_hashinfo.lhash_users); read_unlock(&tcp_hashinfo.lhash_lock); } static inline void tcp_listen_unlock(void) { if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) wake_up(&tcp_hashinfo.lhash_wait); } static inline int keepalive_intvl_when(const struct tcp_sock *tp) { return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; Loading net/ipv4/inet_hashtables.c +32 −0 Original line number Diff line number Diff line Loading @@ -15,7 +15,9 @@ #include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/wait.h> #include <net/inet_hashtables.h> Loading Loading @@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) } EXPORT_SYMBOL(inet_put_port); /* * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves * this, _but_ remember, it adds useless work on UP machines (wake up each * exclusive lock release). It should be ifdefed really. */ void inet_listen_wlock(struct inet_hashinfo *hashinfo) { write_lock(&hashinfo->lhash_lock); if (atomic_read(&hashinfo->lhash_users)) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait_exclusive(&hashinfo->lhash_wait, &wait, TASK_UNINTERRUPTIBLE); if (!atomic_read(&hashinfo->lhash_users)) break; write_unlock_bh(&hashinfo->lhash_lock); schedule(); write_lock_bh(&hashinfo->lhash_lock); } finish_wait(&hashinfo->lhash_wait, &wait); } } EXPORT_SYMBOL(inet_listen_wlock); net/ipv4/tcp_diag.c +4 −4 Original line number Diff line number Diff line Loading @@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; tcp_listen_lock(); inet_listen_lock(&tcp_hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; Loading @@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (tcpdiag_dump_sock(skb, sk, cb) < 0) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); goto done; } Loading @@ -622,7 +622,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next_listen; if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); goto done; } Loading @@ -636,7 +636,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] = 0; cb->args[4] = 0; } tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; Loading net/ipv4/tcp_ipv4.c +9 −61 Original line number Diff line number Diff line Loading @@ -228,62 +228,11 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) return ret; } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves * this, _but_ remember, it adds useless work on UP machines (wake up each * exclusive lock release). It should be ifdefed really. */ void tcp_listen_wlock(void) { write_lock(&tcp_hashinfo.lhash_lock); if (atomic_read(&tcp_hashinfo.lhash_users)) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, &wait, TASK_UNINTERRUPTIBLE); if (!atomic_read(&tcp_hashinfo.lhash_users)) break; write_unlock_bh(&tcp_hashinfo.lhash_lock); schedule(); write_lock_bh(&tcp_hashinfo.lhash_lock); } finish_wait(&tcp_hashinfo.lhash_wait, &wait); } } static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) { struct hlist_head *list; rwlock_t *lock; BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_hashinfo.lhash_lock; tcp_listen_wlock(); } else { sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) wake_up(&tcp_hashinfo.lhash_wait); } static void tcp_v4_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); __tcp_v4_hash(sk, 1); __inet_hash(&tcp_hashinfo, sk, 1); local_bh_enable(); } } Loading @@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { local_bh_disable(); tcp_listen_wlock(); inet_listen_wlock(&tcp_hashinfo); lock = &tcp_hashinfo.lhash_lock; } else { struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; Loading Loading @@ -624,7 +573,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); __tcp_v4_hash(sk, 0); __inet_hash(&tcp_hashinfo, sk, 0); } spin_unlock(&head->lock); Loading @@ -641,7 +590,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v4_hash(sk, 0); __inet_hash(&tcp_hashinfo, sk, 0); spin_unlock_bh(&head->lock); return 0; } else { Loading Loading @@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); __tcp_v4_hash(newsk, 0); __inet_hash(&tcp_hashinfo, newsk, 0); __inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; Loading Loading @@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state* st = seq->private; tcp_listen_lock(); inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); Loading Loading @@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); Loading Loading @@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: Loading Loading @@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_listen_wlock); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); Loading Loading
include/net/inet_hashtables.h +48 −0 Original line number Diff line number Diff line Loading @@ -19,10 +19,14 @@ #include <linux/list.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/tcp.h> /* only for TCP_LISTEN, damn :-( */ #include <linux/types.h> #include <linux/wait.h> #include <net/sock.h> #include <asm/atomic.h> /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. Loading Loading @@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); /* * - We may sleep inside this lock. * - If sleeping is not required (or called from BH), * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) { /* read_lock synchronizes to candidates to writers */ read_lock(&hashinfo->lhash_lock); atomic_inc(&hashinfo->lhash_users); read_unlock(&hashinfo->lhash_lock); } static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) { if (atomic_dec_and_test(&hashinfo->lhash_users)) wake_up(&hashinfo->lhash_wait); } static inline void __inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk, const int listen_possible) { struct hlist_head *list; rwlock_t *lock; BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; lock = &hashinfo->lhash_lock; inet_listen_wlock(hashinfo); } else { sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); list = &hashinfo->ehash[sk->sk_hashent].chain; lock = &hashinfo->ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) wake_up(&hashinfo->lhash_wait); } #endif /* _INET_HASHTABLES_H */
include/net/tcp.h +0 −21 Original line number Diff line number Diff line Loading @@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, extern void tcp_enter_memory_pressure(void); extern void tcp_listen_wlock(void); /* - We may sleep inside this lock. * - If sleeping is not required (or called from BH), * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void tcp_listen_lock(void) { /* read_lock synchronizes to candidates to writers */ read_lock(&tcp_hashinfo.lhash_lock); atomic_inc(&tcp_hashinfo.lhash_users); read_unlock(&tcp_hashinfo.lhash_lock); } static inline void tcp_listen_unlock(void) { if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) wake_up(&tcp_hashinfo.lhash_wait); } static inline int keepalive_intvl_when(const struct tcp_sock *tp) { return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; Loading
net/ipv4/inet_hashtables.c +32 −0 Original line number Diff line number Diff line Loading @@ -15,7 +15,9 @@ #include <linux/config.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/wait.h> #include <net/inet_hashtables.h> Loading Loading @@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) } EXPORT_SYMBOL(inet_put_port); /* * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves * this, _but_ remember, it adds useless work on UP machines (wake up each * exclusive lock release). It should be ifdefed really. */ void inet_listen_wlock(struct inet_hashinfo *hashinfo) { write_lock(&hashinfo->lhash_lock); if (atomic_read(&hashinfo->lhash_users)) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait_exclusive(&hashinfo->lhash_wait, &wait, TASK_UNINTERRUPTIBLE); if (!atomic_read(&hashinfo->lhash_users)) break; write_unlock_bh(&hashinfo->lhash_lock); schedule(); write_lock_bh(&hashinfo->lhash_lock); } finish_wait(&hashinfo->lhash_wait, &wait); } } EXPORT_SYMBOL(inet_listen_wlock);
net/ipv4/tcp_diag.c +4 −4 Original line number Diff line number Diff line Loading @@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; tcp_listen_lock(); inet_listen_lock(&tcp_hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; Loading @@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (tcpdiag_dump_sock(skb, sk, cb) < 0) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); goto done; } Loading @@ -622,7 +622,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next_listen; if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); goto done; } Loading @@ -636,7 +636,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->args[3] = 0; cb->args[4] = 0; } tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; Loading
net/ipv4/tcp_ipv4.c +9 −61 Original line number Diff line number Diff line Loading @@ -228,62 +228,11 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) return ret; } /* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. * Look, when several writers sleep and reader wakes them up, all but one * immediately hit write lock and grab all the cpus. Exclusive sleep solves * this, _but_ remember, it adds useless work on UP machines (wake up each * exclusive lock release). It should be ifdefed really. */ void tcp_listen_wlock(void) { write_lock(&tcp_hashinfo.lhash_lock); if (atomic_read(&tcp_hashinfo.lhash_users)) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait, &wait, TASK_UNINTERRUPTIBLE); if (!atomic_read(&tcp_hashinfo.lhash_users)) break; write_unlock_bh(&tcp_hashinfo.lhash_lock); schedule(); write_lock_bh(&tcp_hashinfo.lhash_lock); } finish_wait(&tcp_hashinfo.lhash_wait, &wait); } } static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) { struct hlist_head *list; rwlock_t *lock; BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_hashinfo.lhash_lock; tcp_listen_wlock(); } else { sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); } __sk_add_node(sk, list); sock_prot_inc_use(sk->sk_prot); write_unlock(lock); if (listen_possible && sk->sk_state == TCP_LISTEN) wake_up(&tcp_hashinfo.lhash_wait); } static void tcp_v4_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { local_bh_disable(); __tcp_v4_hash(sk, 1); __inet_hash(&tcp_hashinfo, sk, 1); local_bh_enable(); } } Loading @@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk) if (sk->sk_state == TCP_LISTEN) { local_bh_disable(); tcp_listen_wlock(); inet_listen_wlock(&tcp_hashinfo); lock = &tcp_hashinfo.lhash_lock; } else { struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; Loading Loading @@ -624,7 +573,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) inet_bind_hash(sk, tb, port); if (sk_unhashed(sk)) { inet_sk(sk)->sport = htons(port); __tcp_v4_hash(sk, 0); __inet_hash(&tcp_hashinfo, sk, 0); } spin_unlock(&head->lock); Loading @@ -641,7 +590,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v4_hash(sk, 0); __inet_hash(&tcp_hashinfo, sk, 0); spin_unlock_bh(&head->lock); return 0; } else { Loading Loading @@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(newsk); __tcp_v4_hash(newsk, 0); __inet_hash(&tcp_hashinfo, newsk, 0); __inet_inherit_port(&tcp_hashinfo, sk, newsk); return newsk; Loading Loading @@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) void *rc; struct tcp_iter_state* st = seq->private; tcp_listen_lock(); inet_listen_lock(&tcp_hashinfo); st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_idx(seq, &pos); if (!rc) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_idx(seq, pos); Loading Loading @@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) case TCP_SEQ_STATE_LISTENING: rc = listening_get_next(seq, v); if (!rc) { tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); local_bh_disable(); st->state = TCP_SEQ_STATE_ESTABLISHED; rc = established_get_first(seq); Loading Loading @@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo); break; case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: Loading Loading @@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_listen_wlock); EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); Loading