Commit 9fb9cbb1 authored by Yasuyuki Kozakai's avatar Yasuyuki Kozakai Committed by David S. Miller
Browse files

[NETFILTER]: Add nf_conntrack subsystem.



The existing connection tracking subsystem in netfilter can only
handle ipv4.  There were basically two choices present to add
connection tracking support for ipv6.  We could either duplicate all
of the ipv4 connection tracking code into an ipv6 counterpart, or (the
choice taken by these patches) we could design a generic layer that
could handle both ipv4 and ipv6 and thus requiring only one sub-protocol
(TCP, UDP, etc.) connection tracking helper module to be written.

In fact nf_conntrack is capable of working with any layer 3
protocol.

The existing ipv4 specific conntrack code could also not deal
with the pecularities of doing connection tracking on ipv6,
which is also cured here.  For example, these issues include:

1) ICMPv6 handling, which is used for neighbour discovery in
   ipv6 thus some messages such as these should not participate
   in connection tracking since effectively they are like ARP
   messages

2) fragmentation must be handled differently in ipv6, because
   the simplistic "defrag, connection track and NAT, refrag"
   (which the existing ipv4 connection tracking does) approach simply
   isn't feasible in ipv6

3) ipv6 extension header parsing must occur at the correct spots
   before and after connection tracking decisions, and there were
   no provisions for this in the existing connection tracking
   design

4) ipv6 has no need for stateful NAT

The ipv4 specific conntrack layer is kept around, until all of
the ipv4 specific conntrack helpers are ported over to nf_conntrack
and it is feature complete.  Once that occurs, the old conntrack
stuff will get placed into the feature-removal-schedule and we will
fully kill it off 6 months later.
Signed-off-by: default avatarYasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: default avatarHarald Welte <laforge@netfilter.org>
Signed-off-by: default avatarArnaldo Carvalho de Melo <acme@mandriva.com>
parent 6730c3c1
#ifndef _NF_CONNTRACK_COMMON_H
#define _NF_CONNTRACK_COMMON_H
/* Connection state tracking for netfilter. This is separated from,
but required by, the NAT layer; it can also be used by an iptables
extension. */
enum ip_conntrack_info
{
/* Part of an established connection (either direction). */
IP_CT_ESTABLISHED,
/* Like NEW, but related to an existing connection, or ICMP error
(in either direction). */
IP_CT_RELATED,
/* Started a new connection to track (only
IP_CT_DIR_ORIGINAL); may be a retransmission. */
IP_CT_NEW,
/* >= this indicates reply direction */
IP_CT_IS_REPLY,
/* Number of distinct IP_CT types (no NEW in reply dirn). */
IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};
/* Bitset representing status of connection. */
enum ip_conntrack_status {
/* It's an expected connection: bit 0 set. This bit never changed */
IPS_EXPECTED_BIT = 0,
IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
/* We've seen packets both ways: bit 1 set. Can be set, not unset. */
IPS_SEEN_REPLY_BIT = 1,
IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
/* Conntrack should never be early-expired. */
IPS_ASSURED_BIT = 2,
IPS_ASSURED = (1 << IPS_ASSURED_BIT),
/* Connection is confirmed: originating packet has left box */
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
/* Connection needs src nat in orig dir. This bit never changed. */
IPS_SRC_NAT_BIT = 4,
IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
/* Connection needs dst nat in orig dir. This bit never changed. */
IPS_DST_NAT_BIT = 5,
IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
/* Both together. */
IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
/* Connection needs TCP sequence adjusted. */
IPS_SEQ_ADJUST_BIT = 6,
IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
/* NAT initialization bits. */
IPS_SRC_NAT_DONE_BIT = 7,
IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
IPS_DST_NAT_DONE_BIT = 8,
IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
/* Both together */
IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
/* Connection is dying (removed from lists), can not be unset. */
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
};
/* Connection tracking event bits */
enum ip_conntrack_events
{
/* New conntrack */
IPCT_NEW_BIT = 0,
IPCT_NEW = (1 << IPCT_NEW_BIT),
/* Expected connection */
IPCT_RELATED_BIT = 1,
IPCT_RELATED = (1 << IPCT_RELATED_BIT),
/* Destroyed conntrack */
IPCT_DESTROY_BIT = 2,
IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
/* Timer has been refreshed */
IPCT_REFRESH_BIT = 3,
IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
/* Status has changed */
IPCT_STATUS_BIT = 4,
IPCT_STATUS = (1 << IPCT_STATUS_BIT),
/* Update of protocol info */
IPCT_PROTOINFO_BIT = 5,
IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
/* Volatile protocol info */
IPCT_PROTOINFO_VOLATILE_BIT = 6,
IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
/* New helper for conntrack */
IPCT_HELPER_BIT = 7,
IPCT_HELPER = (1 << IPCT_HELPER_BIT),
/* Update of helper info */
IPCT_HELPINFO_BIT = 8,
IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
/* Volatile helper info */
IPCT_HELPINFO_VOLATILE_BIT = 9,
IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
/* NAT info */
IPCT_NATINFO_BIT = 10,
IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
/* Counter highest bit has been set */
IPCT_COUNTER_FILLING_BIT = 11,
IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
};
enum ip_conntrack_expect_events {
IPEXP_NEW_BIT = 0,
IPEXP_NEW = (1 << IPEXP_NEW_BIT),
};
#ifdef __KERNEL__
struct ip_conntrack_counter
{
u_int32_t packets;
u_int32_t bytes;
};
struct ip_conntrack_stat
{
unsigned int searched;
unsigned int found;
unsigned int new;
unsigned int invalid;
unsigned int ignore;
unsigned int delete;
unsigned int delete_list;
unsigned int insert;
unsigned int insert_failed;
unsigned int drop;
unsigned int early_drop;
unsigned int error;
unsigned int expect_new;
unsigned int expect_create;
unsigned int expect_delete;
};
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_COMMON_H */
#ifndef _NF_CONNTRACK_FTP_H
#define _NF_CONNTRACK_FTP_H
/* FTP tracking. */
/* This enum is exposed to userspace */
enum ip_ct_ftp_type
{
/* PORT command from client */
IP_CT_FTP_PORT,
/* PASV response from server */
IP_CT_FTP_PASV,
/* EPRT command from client */
IP_CT_FTP_EPRT,
/* EPSV response from server */
IP_CT_FTP_EPSV,
};
#ifdef __KERNEL__
#define FTP_PORT 21
#define NUM_SEQ_TO_REMEMBER 2
/* This structure exists only once per master */
struct ip_ct_ftp_master {
/* Valid seq positions for cmd matching after newline */
u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
/* 0 means seq_match_aft_nl not set */
int seq_aft_nl_num[IP_CT_DIR_MAX];
};
struct ip_conntrack_expect;
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack_expect *exp,
u32 *seq);
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_FTP_H */
#ifndef _NF_CONNTRACK_SCTP_H
#define _NF_CONNTRACK_SCTP_H
/* SCTP tracking. */
#include <linux/netfilter/nf_conntrack_tuple_common.h>
enum sctp_conntrack {
SCTP_CONNTRACK_NONE,
SCTP_CONNTRACK_CLOSED,
SCTP_CONNTRACK_COOKIE_WAIT,
SCTP_CONNTRACK_COOKIE_ECHOED,
SCTP_CONNTRACK_ESTABLISHED,
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_MAX
};
struct ip_ct_sctp
{
enum sctp_conntrack state;
u_int32_t vtag[IP_CT_DIR_MAX];
u_int32_t ttag[IP_CT_DIR_MAX];
};
#endif /* _NF_CONNTRACK_SCTP_H */
#ifndef _NF_CONNTRACK_TCP_H
#define _NF_CONNTRACK_TCP_H
/* TCP tracking. */
/* This is exposed to userspace (ctnetlink) */
enum tcp_conntrack {
TCP_CONNTRACK_NONE,
TCP_CONNTRACK_SYN_SENT,
TCP_CONNTRACK_SYN_RECV,
TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_FIN_WAIT,
TCP_CONNTRACK_CLOSE_WAIT,
TCP_CONNTRACK_LAST_ACK,
TCP_CONNTRACK_TIME_WAIT,
TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_LISTEN,
TCP_CONNTRACK_MAX,
TCP_CONNTRACK_IGNORE
};
/* Window scaling is advertised by the sender */
#define IP_CT_TCP_FLAG_WINDOW_SCALE 0x01
/* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02
/* This sender sent FIN first */
#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03
#ifdef __KERNEL__
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction options */
};
struct ip_ct_tcp
{
struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
u_int8_t state; /* state of the connection (enum tcp_conntrack) */
/* For detecting stale connections */
u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */
u_int8_t retrans; /* Number of retransmitted packets */
u_int8_t last_index; /* Index of the last packet */
u_int32_t last_seq; /* Last sequence number seen in dir */
u_int32_t last_ack; /* Last sequence number seen in opposite dir */
u_int32_t last_end; /* Last seq + len */
};
#endif /* __KERNEL__ */
#endif /* _NF_CONNTRACK_TCP_H */
#ifndef _NF_CONNTRACK_TUPLE_COMMON_H
#define _NF_CONNTRACK_TUPLE_COMMON_H
enum ip_conntrack_dir
{
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
IP_CT_DIR_MAX
};
#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
#endif /* _NF_CONNTRACK_TUPLE_COMMON_H */
#ifndef _IP_CONNTRACK_H
#define _IP_CONNTRACK_H
/* Connection state tracking for netfilter. This is separated from,
but required by, the NAT layer; it can also be used by an iptables
extension. */
enum ip_conntrack_info
{
/* Part of an established connection (either direction). */
IP_CT_ESTABLISHED,
/* Like NEW, but related to an existing connection, or ICMP error
(in either direction). */
IP_CT_RELATED,
/* Started a new connection to track (only
IP_CT_DIR_ORIGINAL); may be a retransmission. */
IP_CT_NEW,
/* >= this indicates reply direction */
IP_CT_IS_REPLY,
/* Number of distinct IP_CT types (no NEW in reply dirn). */
IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1
};
/* Bitset representing status of connection. */
enum ip_conntrack_status {
/* It's an expected connection: bit 0 set. This bit never changed */
IPS_EXPECTED_BIT = 0,
IPS_EXPECTED = (1 << IPS_EXPECTED_BIT),
/* We've seen packets both ways: bit 1 set. Can be set, not unset. */
IPS_SEEN_REPLY_BIT = 1,
IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
/* Conntrack should never be early-expired. */
IPS_ASSURED_BIT = 2,
IPS_ASSURED = (1 << IPS_ASSURED_BIT),
/* Connection is confirmed: originating packet has left box */
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
/* Connection needs src nat in orig dir. This bit never changed. */
IPS_SRC_NAT_BIT = 4,
IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
/* Connection needs dst nat in orig dir. This bit never changed. */
IPS_DST_NAT_BIT = 5,
IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
/* Both together. */
IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
/* Connection needs TCP sequence adjusted. */
IPS_SEQ_ADJUST_BIT = 6,
IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
/* NAT initialization bits. */
IPS_SRC_NAT_DONE_BIT = 7,
IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
IPS_DST_NAT_DONE_BIT = 8,
IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
/* Both together */
IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
/* Connection is dying (removed from lists), can not be unset. */
IPS_DYING_BIT = 9,
IPS_DYING = (1 << IPS_DYING_BIT),
};
/* Connection tracking event bits */
enum ip_conntrack_events
{
/* New conntrack */
IPCT_NEW_BIT = 0,
IPCT_NEW = (1 << IPCT_NEW_BIT),
/* Expected connection */
IPCT_RELATED_BIT = 1,
IPCT_RELATED = (1 << IPCT_RELATED_BIT),
/* Destroyed conntrack */
IPCT_DESTROY_BIT = 2,
IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
/* Timer has been refreshed */
IPCT_REFRESH_BIT = 3,
IPCT_REFRESH = (1 << IPCT_REFRESH_BIT),
/* Status has changed */
IPCT_STATUS_BIT = 4,
IPCT_STATUS = (1 << IPCT_STATUS_BIT),
/* Update of protocol info */
IPCT_PROTOINFO_BIT = 5,
IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
/* Volatile protocol info */
IPCT_PROTOINFO_VOLATILE_BIT = 6,
IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT),
/* New helper for conntrack */
IPCT_HELPER_BIT = 7,
IPCT_HELPER = (1 << IPCT_HELPER_BIT),
/* Update of helper info */
IPCT_HELPINFO_BIT = 8,
IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT),
/* Volatile helper info */
IPCT_HELPINFO_VOLATILE_BIT = 9,
IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT),
/* NAT info */
IPCT_NATINFO_BIT = 10,
IPCT_NATINFO = (1 << IPCT_NATINFO_BIT),
/* Counter highest bit has been set */
IPCT_COUNTER_FILLING_BIT = 11,
IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT),
};
enum ip_conntrack_expect_events {
IPEXP_NEW_BIT = 0,
IPEXP_NEW = (1 << IPEXP_NEW_BIT),
};
#include <linux/netfilter/nf_conntrack_common.h>
#ifdef __KERNEL__
#include <linux/config.h>
......@@ -194,12 +69,6 @@ do { \
#define IP_NF_ASSERT(x)
#endif
struct ip_conntrack_counter
{
u_int32_t packets;
u_int32_t bytes;
};
struct ip_conntrack_helper;
struct ip_conntrack
......@@ -426,25 +295,6 @@ static inline int is_dying(struct ip_conntrack *ct)
extern unsigned int ip_conntrack_htable_size;
struct ip_conntrack_stat
{
unsigned int searched;
unsigned int found;
unsigned int new;
unsigned int invalid;
unsigned int ignore;
unsigned int delete;
unsigned int delete_list;
unsigned int insert;
unsigned int insert_failed;
unsigned int drop;
unsigned int early_drop;
unsigned int error;
unsigned int expect_new;
unsigned int expect_create;
unsigned int expect_delete;
};
#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
......
#ifndef _IP_CONNTRACK_FTP_H
#define _IP_CONNTRACK_FTP_H
/* FTP tracking. */
#ifdef __KERNEL__
#include <linux/netfilter/nf_conntrack_ftp.h>
#define FTP_PORT 21
#endif /* __KERNEL__ */
enum ip_ct_ftp_type
{
/* PORT command from client */
IP_CT_FTP_PORT,
/* PASV response from server */
IP_CT_FTP_PASV,
/* EPRT command from client */
IP_CT_FTP_EPRT,
/* EPSV response from server */
IP_CT_FTP_EPSV,
};
#define NUM_SEQ_TO_REMEMBER 2
/* This structure exists only once per master */
struct ip_ct_ftp_master {
/* Valid seq positions for cmd matching after newline */
u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
/* 0 means seq_match_aft_nl not set */
int seq_aft_nl_num[IP_CT_DIR_MAX];
};
struct ip_conntrack_expect;
/* For NAT to hook in when we find a packet which describes what other
* connection we should expect. */
extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
enum ip_ct_ftp_type type,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack_expect *exp,
u32 *seq);
#endif /* _IP_CONNTRACK_FTP_H */
#ifndef _IP_CONNTRACK_ICMP_H
#define _IP_CONNTRACK_ICMP_H
/* ICMP tracking. */
#include <asm/atomic.h>
struct ip_ct_icmp
{
/* Optimization: when number in == number out, forget immediately. */
atomic_t count;
};
#include <net/netfilter/ipv4/nf_conntrack_icmp.h>
#endif /* _IP_CONNTRACK_ICMP_H */
#ifndef _IP_CONNTRACK_SCTP_H
#define _IP_CONNTRACK_SCTP_H
/* SCTP tracking. */
enum sctp_conntrack {
SCTP_CONNTRACK_NONE,
SCTP_CONNTRACK_CLOSED,
SCTP_CONNTRACK_COOKIE_WAIT,
SCTP_CONNTRACK_COOKIE_ECHOED,
SCTP_CONNTRACK_ESTABLISHED,
SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_MAX
};
struct ip_ct_sctp
{
enum sctp_conntrack state;
u_int32_t vtag[IP_CT_DIR_MAX];
u_int32_t ttag[IP_CT_DIR_MAX];
};
#include <linux/netfilter/nf_conntrack_sctp.h>
#endif /* _IP_CONNTRACK_SCTP_H */
#ifndef _IP_CONNTRACK_TCP_H
#define _IP_CONNTRACK_TCP_H
/* TCP tracking. */
enum tcp_conntrack {
TCP_CONNTRACK_NONE,
TCP_CONNTRACK_SYN_SENT,
TCP_CONNTRACK_SYN_RECV,
TCP_CONNTRACK_ESTABLISHED,
TCP_CONNTRACK_FIN_WAIT,
TCP_CONNTRACK_CLOSE_WAIT,
TCP_CONNTRACK_LAST_ACK,
TCP_CONNTRACK_TIME_WAIT,
TCP_CONNTRACK_CLOSE,
TCP_CONNTRACK_LISTEN,
TCP_CONNTRACK_MAX,
TCP_CONNTRACK_IGNORE
};
/* Window scaling is advertised by the sender */
#define IP_CT_TCP_FLAG_WINDOW_SCALE 0x01
/* SACK is permitted by the sender */
#define IP_CT_TCP_FLAG_SACK_PERM 0x02
/* This sender sent FIN first */
#define IP_CT_TCP_FLAG_CLOSE_INIT 0x03
struct ip_ct_tcp_state {
u_int32_t td_end; /* max of seq + len */
u_int32_t td_maxend; /* max of ack + max(win, 1) */
u_int32_t td_maxwin; /* max(win) */
u_int8_t td_scale; /* window scale factor */
u_int8_t loose; /* used when connection picked up from the middle */
u_int8_t flags; /* per direction options */
};
struct ip_ct_tcp
{
struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */
u_int8_t state; /* state of the connection (enum tcp_conntrack) */
/* For detecting stale connections */
u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */
u_int8_t retrans; /* Number of retransmitted packets */
u_int8_t last_index; /* Index of the last packet */
u_int32_t last_seq; /* Last sequence number seen in dir */
u_int32_t last_ack; /* Last sequence number seen in opposite dir */
u_int32_t last_end; /* Last seq + len */
};