Commit f3f05f70 authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller
Browse files

[INET]: Generalise the tcp_listen_ lock routines


Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6e04e021
...@@ -19,10 +19,14 @@ ...@@ -19,10 +19,14 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/tcp.h> /* only for TCP_LISTEN, damn :-( */
#include <linux/types.h> #include <linux/types.h>
#include <linux/wait.h>
#include <net/sock.h> #include <net/sock.h>
#include <asm/atomic.h>
/* This is for all connections with a full identity, no wildcards. /* This is for all connections with a full identity, no wildcards.
* New scheme, half the table is for TIME_WAIT, the other half is * New scheme, half the table is for TIME_WAIT, the other half is
* for the rest. I'll experiment with dynamic table growth later. * for the rest. I'll experiment with dynamic table growth later.
...@@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, ...@@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table,
extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
/*
* - We may sleep inside this lock.
* - If sleeping is not required (or called from BH),
* use plain read_(un)lock(&inet_hashinfo.lhash_lock).
*/
static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
{
/* read_lock synchronizes to candidates to writers */
read_lock(&hashinfo->lhash_lock);
atomic_inc(&hashinfo->lhash_users);
read_unlock(&hashinfo->lhash_lock);
}
static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
{
if (atomic_dec_and_test(&hashinfo->lhash_users))
wake_up(&hashinfo->lhash_wait);
}
static inline void __inet_hash(struct inet_hashinfo *hashinfo,
struct sock *sk, const int listen_possible)
{
struct hlist_head *list;
rwlock_t *lock;
BUG_TRAP(sk_unhashed(sk));
if (listen_possible && sk->sk_state == TCP_LISTEN) {
list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
lock = &hashinfo->lhash_lock;
inet_listen_wlock(hashinfo);
} else {
sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
list = &hashinfo->ehash[sk->sk_hashent].chain;
lock = &hashinfo->ehash[sk->sk_hashent].lock;
write_lock(lock);
}
__sk_add_node(sk, list);
sock_prot_inc_use(sk->sk_prot);
write_unlock(lock);
if (listen_possible && sk->sk_state == TCP_LISTEN)
wake_up(&hashinfo->lhash_wait);
}
#endif /* _INET_HASHTABLES_H */ #endif /* _INET_HASHTABLES_H */
...@@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, ...@@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
extern void tcp_enter_memory_pressure(void); extern void tcp_enter_memory_pressure(void);
extern void tcp_listen_wlock(void);
/* - We may sleep inside this lock.
* - If sleeping is not required (or called from BH),
* use plain read_(un)lock(&inet_hashinfo.lhash_lock).
*/
static inline void tcp_listen_lock(void)
{
/* read_lock synchronizes to candidates to writers */
read_lock(&tcp_hashinfo.lhash_lock);
atomic_inc(&tcp_hashinfo.lhash_users);
read_unlock(&tcp_hashinfo.lhash_lock);
}
static inline void tcp_listen_unlock(void)
{
if (atomic_dec_and_test(&tcp_hashinfo.lhash_users))
wake_up(&tcp_hashinfo.lhash_wait);
}
static inline int keepalive_intvl_when(const struct tcp_sock *tp) static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{ {
return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
......
...@@ -15,7 +15,9 @@ ...@@ -15,7 +15,9 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/wait.h>
#include <net/inet_hashtables.h> #include <net/inet_hashtables.h>
...@@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) ...@@ -89,3 +91,33 @@ void inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
} }
EXPORT_SYMBOL(inet_put_port); EXPORT_SYMBOL(inet_put_port);
/*
* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
* Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
* this, _but_ remember, it adds useless work on UP machines (wake up each
* exclusive lock release). It should be ifdefed really.
*/
void inet_listen_wlock(struct inet_hashinfo *hashinfo)
{
write_lock(&hashinfo->lhash_lock);
if (atomic_read(&hashinfo->lhash_users)) {
DEFINE_WAIT(wait);
for (;;) {
prepare_to_wait_exclusive(&hashinfo->lhash_wait,
&wait, TASK_UNINTERRUPTIBLE);
if (!atomic_read(&hashinfo->lhash_users))
break;
write_unlock_bh(&hashinfo->lhash_lock);
schedule();
write_lock_bh(&hashinfo->lhash_lock);
}
finish_wait(&hashinfo->lhash_wait, &wait);
}
}
EXPORT_SYMBOL(inet_listen_wlock);
...@@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -589,7 +589,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
if (cb->args[0] == 0) { if (cb->args[0] == 0) {
if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
goto skip_listen_ht; goto skip_listen_ht;
tcp_listen_lock(); inet_listen_lock(&tcp_hashinfo);
for (i = s_i; i < INET_LHTABLE_SIZE; i++) { for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
struct sock *sk; struct sock *sk;
struct hlist_node *node; struct hlist_node *node;
...@@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -613,7 +613,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
goto syn_recv; goto syn_recv;
if (tcpdiag_dump_sock(skb, sk, cb) < 0) { if (tcpdiag_dump_sock(skb, sk, cb) < 0) {
tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo);
goto done; goto done;
} }
...@@ -622,7 +622,7 @@ syn_recv: ...@@ -622,7 +622,7 @@ syn_recv:
goto next_listen; goto next_listen;
if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { if (tcpdiag_dump_reqs(skb, sk, cb) < 0) {
tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo);
goto done; goto done;
} }
...@@ -636,7 +636,7 @@ next_listen: ...@@ -636,7 +636,7 @@ next_listen:
cb->args[3] = 0; cb->args[3] = 0;
cb->args[4] = 0; cb->args[4] = 0;
} }
tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo);
skip_listen_ht: skip_listen_ht:
cb->args[0] = 1; cb->args[0] = 1;
s_i = num = s_num = 0; s_i = num = s_num = 0;
......
...@@ -228,62 +228,11 @@ fail: ...@@ -228,62 +228,11 @@ fail:
return ret; return ret;
} }
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP.
* Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
* this, _but_ remember, it adds useless work on UP machines (wake up each
* exclusive lock release). It should be ifdefed really.
*/
void tcp_listen_wlock(void)
{
write_lock(&tcp_hashinfo.lhash_lock);
if (atomic_read(&tcp_hashinfo.lhash_users)) {
DEFINE_WAIT(wait);
for (;;) {
prepare_to_wait_exclusive(&tcp_hashinfo.lhash_wait,
&wait, TASK_UNINTERRUPTIBLE);
if (!atomic_read(&tcp_hashinfo.lhash_users))
break;
write_unlock_bh(&tcp_hashinfo.lhash_lock);
schedule();
write_lock_bh(&tcp_hashinfo.lhash_lock);
}
finish_wait(&tcp_hashinfo.lhash_wait, &wait);
}
}
static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible)
{
struct hlist_head *list;
rwlock_t *lock;
BUG_TRAP(sk_unhashed(sk));
if (listen_possible && sk->sk_state == TCP_LISTEN) {
list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
lock = &tcp_hashinfo.lhash_lock;
tcp_listen_wlock();
} else {
sk->sk_hashent = inet_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
write_lock(lock);
}
__sk_add_node(sk, list);
sock_prot_inc_use(sk->sk_prot);
write_unlock(lock);
if (listen_possible && sk->sk_state == TCP_LISTEN)
wake_up(&tcp_hashinfo.lhash_wait);
}
static void tcp_v4_hash(struct sock *sk) static void tcp_v4_hash(struct sock *sk)
{ {
if (sk->sk_state != TCP_CLOSE) { if (sk->sk_state != TCP_CLOSE) {
local_bh_disable(); local_bh_disable();
__tcp_v4_hash(sk, 1); __inet_hash(&tcp_hashinfo, sk, 1);
local_bh_enable(); local_bh_enable();
} }
} }
...@@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk) ...@@ -297,7 +246,7 @@ void tcp_unhash(struct sock *sk)
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state == TCP_LISTEN) {
local_bh_disable(); local_bh_disable();
tcp_listen_wlock(); inet_listen_wlock(&tcp_hashinfo);
lock = &tcp_hashinfo.lhash_lock; lock = &tcp_hashinfo.lhash_lock;
} else { } else {
struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent]; struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[sk->sk_hashent];
...@@ -624,7 +573,7 @@ ok: ...@@ -624,7 +573,7 @@ ok:
inet_bind_hash(sk, tb, port); inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) { if (sk_unhashed(sk)) {
inet_sk(sk)->sport = htons(port); inet_sk(sk)->sport = htons(port);
__tcp_v4_hash(sk, 0); __inet_hash(&tcp_hashinfo, sk, 0);
} }
spin_unlock(&head->lock); spin_unlock(&head->lock);
...@@ -641,7 +590,7 @@ ok: ...@@ -641,7 +590,7 @@ ok:
tb = inet_sk(sk)->bind_hash; tb = inet_sk(sk)->bind_hash;
spin_lock_bh(&head->lock); spin_lock_bh(&head->lock);
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
__tcp_v4_hash(sk, 0); __inet_hash(&tcp_hashinfo, sk, 0);
spin_unlock_bh(&head->lock); spin_unlock_bh(&head->lock);
return 0; return 0;
} else { } else {
...@@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ...@@ -1479,7 +1428,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = dst_metric(dst, RTAX_ADVMSS); newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk); tcp_initialize_rcv_mss(newsk);
__tcp_v4_hash(newsk, 0); __inet_hash(&tcp_hashinfo, newsk, 0);
__inet_inherit_port(&tcp_hashinfo, sk, newsk); __inet_inherit_port(&tcp_hashinfo, sk, newsk);
return newsk; return newsk;
...@@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) ...@@ -2102,12 +2051,12 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
void *rc; void *rc;
struct tcp_iter_state* st = seq->private; struct tcp_iter_state* st = seq->private;
tcp_listen_lock(); inet_listen_lock(&tcp_hashinfo);
st->state = TCP_SEQ_STATE_LISTENING; st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_idx(seq, &pos); rc = listening_get_idx(seq, &pos);
if (!rc) { if (!rc) {
tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo);
local_bh_disable(); local_bh_disable();
st->state = TCP_SEQ_STATE_ESTABLISHED; st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_idx(seq, pos); rc = established_get_idx(seq, pos);
...@@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ...@@ -2140,7 +2089,7 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_LISTENING:
rc = listening_get_next(seq, v); rc = listening_get_next(seq, v);
if (!rc) { if (!rc) {
tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo);
local_bh_disable(); local_bh_disable();
st->state = TCP_SEQ_STATE_ESTABLISHED; st->state = TCP_SEQ_STATE_ESTABLISHED;
rc = established_get_first(seq); rc = established_get_first(seq);
...@@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) ...@@ -2168,7 +2117,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
} }
case TCP_SEQ_STATE_LISTENING: case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN) if (v != SEQ_START_TOKEN)
tcp_listen_unlock(); inet_listen_unlock(&tcp_hashinfo);
break; break;
case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED: case TCP_SEQ_STATE_ESTABLISHED:
...@@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops) ...@@ -2431,7 +2380,6 @@ void __init tcp_v4_init(struct net_proto_family *ops)
EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(ipv4_specific);
EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(inet_bind_bucket_create);
EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_hashinfo);
EXPORT_SYMBOL(tcp_listen_wlock);
EXPORT_SYMBOL(tcp_prot); EXPORT_SYMBOL(tcp_prot);
EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_unhash);
EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_conn_request);
......
...@@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) ...@@ -229,7 +229,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk)
if (sk->sk_state == TCP_LISTEN) { if (sk->sk_state == TCP_LISTEN) {
list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)]; list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
lock = &tcp_hashinfo.lhash_lock; lock = &tcp_hashinfo.lhash_lock;
tcp_listen_wlock(); inet_listen_wlock(&tcp_hashinfo);
} else { } else {
sk->sk_hashent = tcp_v6_sk_hashfn(sk); sk->sk_hashent = tcp_v6_sk_hashfn(sk);
list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment