Commit e48c414e authored by Arnaldo Carvalho de Melo's avatar Arnaldo Carvalho de Melo Committed by David S. Miller

[INET]: Generalise the TCP sock ID lookup routines

And also some TIME_WAIT functions.

[acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size
/tmp/before.size: 282955   13122    9312  305389   4a8ed net/ipv4/built-in.o
/tmp/after.size:  281566   13122    9312  304000   4a380 net/ipv4/built-in.o
[acme@toy net-2.6.14]$

I kept them still inlined, will uninline at some point to see what
would be the performance difference.
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 8feaf0c0
......@@ -30,6 +30,7 @@
#include <net/tcp_states.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
/* This is for all connections with a full identity, no wildcards.
* New scheme, half the table is for TIME_WAIT, the other half is
......@@ -285,13 +286,13 @@ extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
const int dif);
/* Optimize the common listener case. */
static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
const u32 daddr,
const unsigned short hnum,
const int dif)
static inline struct sock *
inet_lookup_listener(struct inet_hashinfo *hashinfo,
const u32 daddr,
const unsigned short hnum, const int dif)
{
struct sock *sk = NULL;
struct hlist_head *head;
const struct hlist_head *head;
read_lock(&hashinfo->lhash_lock);
head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
......@@ -351,4 +352,70 @@ sherry_cache:
((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#endif /* 64-bit arch */
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
*
* Local BH must be disabled here.
*/
static inline struct sock *
__inet_lookup_established(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 hnum,
const int dif)
{
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
struct sock *sk;
const struct hlist_node *node;
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size);
struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
read_lock(&head->lock);
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit;
}
sk = NULL;
out:
read_unlock(&head->lock);
return sk;
hit:
sock_hold(sk);
goto out;
}
static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 hnum,
const int dif)
{
struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
hnum, dif);
return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
}
static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 dport,
const int dif)
{
struct sock *sk;
local_bh_disable();
sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
local_bh_enable();
return sk;
}
#endif /* _INET_HASHTABLES_H */
......@@ -17,6 +17,7 @@
#include <linux/config.h>
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/types.h>
......@@ -32,6 +33,7 @@
#endif
struct inet_bind_bucket;
struct inet_hashinfo;
/*
* This is a TIME_WAIT sock. It works around the memory consumption
......@@ -139,4 +141,11 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw)
kmem_cache_free(tw->tw_prot->twsk_slab, tw);
}
}
extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo);
extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
struct sock *sk,
struct inet_hashinfo *hashinfo);
#endif /* _INET_TIMEWAIT_SOCK_ */
......@@ -255,28 +255,28 @@ struct sock {
/*
* Hashed lists helper routines
*/
static inline struct sock *__sk_head(struct hlist_head *head)
static inline struct sock *__sk_head(const struct hlist_head *head)
{
return hlist_entry(head->first, struct sock, sk_node);
}
static inline struct sock *sk_head(struct hlist_head *head)
static inline struct sock *sk_head(const struct hlist_head *head)
{
return hlist_empty(head) ? NULL : __sk_head(head);
}
static inline struct sock *sk_next(struct sock *sk)
static inline struct sock *sk_next(const struct sock *sk)
{
return sk->sk_node.next ?
hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
}
static inline int sk_unhashed(struct sock *sk)
static inline int sk_unhashed(const struct sock *sk)
{
return hlist_unhashed(&sk->sk_node);
}
static inline int sk_hashed(struct sock *sk)
static inline int sk_hashed(const struct sock *sk)
{
return sk->sk_node.pprev != NULL;
}
......@@ -494,7 +494,7 @@ extern int sk_wait_data(struct sock *sk, long *timeo);
struct request_sock_ops;
/* Here is the right place to enable sock refcounting debugging */
#define SOCK_REFCNT_DEBUG
//#define SOCK_REFCNT_DEBUG
/* Networking protocol blocks we attach to sockets.
* socket layer -> transport layer interface
......
......@@ -5,6 +5,7 @@
obj-y := route.o inetpeer.o protocol.o \
ip_input.o ip_fragment.o ip_forward.o ip_options.o \
ip_output.o ip_sockglue.o inet_hashtables.o \
inet_timewait_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
......
......@@ -162,3 +162,5 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
}
return result;
}
EXPORT_SYMBOL_GPL(__inet_lookup_listener);
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Generic TIME_WAIT sockets functions
*
* From code orinally in TCP
*/
#include <linux/config.h>
#include <net/inet_hashtables.h>
#include <net/inet_timewait_sock.h>
/* Must be called with locally disabled BHs. */
void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
{
struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb;
/* Unlink from established hashes. */
struct inet_ehash_bucket *ehead = &hashinfo->ehash[tw->tw_hashent];
write_lock(&ehead->lock);
if (hlist_unhashed(&tw->tw_node)) {
write_unlock(&ehead->lock);
return;
}
__hlist_del(&tw->tw_node);
sk_node_init(&tw->tw_node);
write_unlock(&ehead->lock);
/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
spin_unlock(&bhead->lock);
#ifdef SOCK_REFCNT_DEBUG
if (atomic_read(&tw->tw_refcnt) != 1) {
printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n",
tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
}
#endif
inet_twsk_put(tw);
}
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
* from the SK, and mess with hash chains and list linkage.
*/
void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
struct inet_hashinfo *hashinfo)
{
const struct inet_sock *inet = inet_sk(sk);
struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent];
struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = inet->bind_hash;
BUG_TRAP(inet->bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
spin_unlock(&bhead->lock);
write_lock(&ehead->lock);
/* Step 2: Remove SK from established hash. */
if (__sk_del_node_init(sk))
sock_prot_dec_use(sk->sk_prot);
/* Step 3: Hash TW into TIMEWAIT half of established hash table. */
inet_twsk_add_node(tw, &(ehead + hashinfo->ehash_size)->chain);
atomic_inc(&tw->tw_refcnt);
write_unlock(&ehead->lock);
}
......@@ -174,8 +174,6 @@ nlmsg_failure:
return -1;
}
extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport,
int dif);
#ifdef CONFIG_IP_TCPDIAG_IPV6
extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
struct in6_addr *daddr, u16 dport,
......@@ -197,9 +195,9 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh)
struct sk_buff *rep;
if (req->tcpdiag_family == AF_INET) {
sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport,
req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
req->id.tcpdiag_if);
sk = inet_lookup(&tcp_hashinfo, req->id.tcpdiag_dst[0],
req->id.tcpdiag_dport, req->id.tcpdiag_src[0],
req->id.tcpdiag_sport, req->id.tcpdiag_if);
}
#ifdef CONFIG_IP_TCPDIAG_IPV6
else if (req->tcpdiag_family == AF_INET6) {
......
......@@ -238,71 +238,6 @@ void tcp_unhash(struct sock *sk)
inet_unhash(&tcp_hashinfo, sk);
}
/* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
* we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
*
* Local BH must be disabled here.
*/
static inline struct sock *__tcp_v4_lookup_established(const u32 saddr,
const u16 sport,
const u32 daddr,
const u16 hnum,
const int dif)
{
struct inet_ehash_bucket *head;
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
struct sock *sk;
const struct hlist_node *node;
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
const int hash = inet_ehashfn(daddr, hnum, saddr, sport, tcp_hashinfo.ehash_size);
head = &tcp_hashinfo.ehash[hash];
read_lock(&head->lock);
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit;
}
sk = NULL;
out:
read_unlock(&head->lock);
return sk;
hit:
sock_hold(sk);
goto out;
}
static inline struct sock *__tcp_v4_lookup(u32 saddr, u16 sport,
u32 daddr, u16 hnum, int dif)
{
struct sock *sk = __tcp_v4_lookup_established(saddr, sport,
daddr, hnum, dif);
return sk ? : inet_lookup_listener(&tcp_hashinfo, daddr, hnum, dif);
}
inline struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr,
u16 dport, int dif)
{
struct sock *sk;
local_bh_disable();
sk = __tcp_v4_lookup(saddr, sport, daddr, ntohs(dport), dif);
local_bh_enable();
return sk;
}
EXPORT_SYMBOL_GPL(tcp_v4_lookup);
static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
{
return secure_tcp_sequence_number(skb->nh.iph->daddr,
......@@ -751,8 +686,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info)
return;
}
sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr,
th->source, tcp_v4_iif(skb));
sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
th->source, tcp_v4_iif(skb));
if (!sk) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return;
......@@ -1359,11 +1294,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
if (req)
return tcp_check_req(sk, skb, req, prev);
nsk = __tcp_v4_lookup_established(skb->nh.iph->saddr,
th->source,
skb->nh.iph->daddr,
ntohs(th->dest),
tcp_v4_iif(skb));
nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr,
th->source, skb->nh.iph->daddr,
ntohs(th->dest), tcp_v4_iif(skb));
if (nsk) {
if (nsk->sk_state != TCP_TIME_WAIT) {
......@@ -1505,9 +1438,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->flags = skb->nh.iph->tos;
TCP_SKB_CB(skb)->sacked = 0;
sk = __tcp_v4_lookup(skb->nh.iph->saddr, th->source,
skb->nh.iph->daddr, ntohs(th->dest),
tcp_v4_iif(skb));
sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source,
skb->nh.iph->daddr, ntohs(th->dest),
tcp_v4_iif(skb));
if (!sk)
goto no_tcp_socket;
......
......@@ -56,42 +56,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
int tcp_tw_count;
/* Must be called with locally disabled BHs. */
static void tcp_timewait_kill(struct inet_timewait_sock *tw)
{
struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb;
/* Unlink from established hashes. */
struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[tw->tw_hashent];
write_lock(&ehead->lock);
if (hlist_unhashed(&tw->tw_node)) {
write_unlock(&ehead->lock);
return;
}
__hlist_del(&tw->tw_node);
sk_node_init(&tw->tw_node);
write_unlock(&ehead->lock);
/* Disassociate with bind bucket. */
bhead = &tcp_hashinfo.bhash[inet_bhashfn(tw->tw_num, tcp_hashinfo.bhash_size)];
spin_lock(&bhead->lock);
tb = tw->tw_tb;
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(tcp_hashinfo.bind_bucket_cachep, tb);
spin_unlock(&bhead->lock);
#ifdef SOCK_REFCNT_DEBUG
if (atomic_read(&tw->tw_refcnt) != 1) {
printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n",
tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt));
}
#endif
inet_twsk_put(tw);
}
/*
* * Main purpose of TIME-WAIT state is to close connection gracefully,
* when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
......@@ -290,40 +254,6 @@ kill:
return TCP_TW_SUCCESS;
}
/* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the
* relevant info into it from the SK, and mess with hash chains
* and list linkage.
*/
static void __tcp_tw_hashdance(struct sock *sk, struct inet_timewait_sock *tw)
{
const struct inet_sock *inet = inet_sk(sk);
struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent];
struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
bhead = &tcp_hashinfo.bhash[inet_bhashfn(inet->num, tcp_hashinfo.bhash_size)];
spin_lock(&bhead->lock);
tw->tw_tb = inet->bind_hash;
BUG_TRAP(inet->bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
spin_unlock(&bhead->lock);
write_lock(&ehead->lock);
/* Step 2: Remove SK from established hash. */
if (__sk_del_node_init(sk))
sock_prot_dec_use(sk->sk_prot);
/* Step 3: Hash TW into TIMEWAIT half of established hash table. */
inet_twsk_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain);
atomic_inc(&tw->tw_refcnt);
write_unlock(&ehead->lock);
}
/*
* Move a socket to time-wait or dead fin-wait-2 state.
*/
......@@ -381,7 +311,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_ipv6only = 0;
#endif
/* Linkage updates. */
__tcp_tw_hashdance(sk, tw);
__inet_twsk_hashdance(tw, sk, &tcp_hashinfo);
/* Get the TIME_WAIT timeout firing. */
if (timeo < rto)
......@@ -448,7 +378,7 @@ rescan:
inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) {
__inet_twsk_del_dead_node(tw);
spin_unlock(&tw_death_lock);
tcp_timewait_kill(tw);
__inet_twsk_kill(tw, &tcp_hashinfo);
inet_twsk_put(tw);
killed++;
spin_lock(&tw_death_lock);
......@@ -544,7 +474,7 @@ void tcp_tw_deschedule(struct inet_timewait_sock *tw)
del_timer(&tcp_tw_timer);
}
spin_unlock(&tw_death_lock);
tcp_timewait_kill(tw);
__inet_twsk_kill(tw, &tcp_hashinfo);
}
/* Short-time timewait calendar */
......@@ -653,7 +583,7 @@ void tcp_twcal_tick(unsigned long dummy)
inet_twsk_for_each_inmate_safe(tw, node, safe,
&tcp_twcal_row[slot]) {
__inet_twsk_del_dead_node(tw);
tcp_timewait_kill(tw);
__inet_twsk_kill(tw, &tcp_hashinfo);
inet_twsk_put(tw);
killed++;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment