Commit 080774a2 authored by Harald Welte's avatar Harald Welte Committed by David S. Miller
Browse files

[NETFILTER]: Add ctnetlink subsystem



Add ctnetlink subsystem for userspace-access to ip_conntrack table.
This allows reading and updating of existing entries, as well as
creating new ones (and new expect's) via nfnetlink.

Please note the 'strange' byte order: nfattr (tag+length) are in host
byte order, while the payload is always guaranteed to be in network
byte order.  This allows a simple userspace process to encapsulate netlink
messages into arch-independent udp packets by just processing/swapping the
headers and not knowing anything about the actual payload.
Signed-off-by: default avatarHarald Welte <laforge@netfilter.org>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 6f1cf165
......@@ -56,7 +56,7 @@ struct nfgenmsg {
u_int16_t res_id; /* resource id */
} __attribute__ ((packed));
#define NFNETLINK_V1 1
#define NFNETLINK_V0 0
#define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \
+ NLMSG_ALIGN(sizeof(struct nfgenmsg))))
......@@ -81,6 +81,7 @@ enum nfnl_subsys_id {
#ifdef __KERNEL__
#include <linux/netlink.h>
#include <linux/capability.h>
struct nfnl_callback
......
#ifndef _IPCONNTRACK_NETLINK_H
#define _IPCONNTRACK_NETLINK_H
#include <linux/netfilter/nfnetlink.h>
enum cntl_msg_types {
IPCTNL_MSG_CT_NEW,
IPCTNL_MSG_CT_GET,
IPCTNL_MSG_CT_DELETE,
IPCTNL_MSG_CT_GET_CTRZERO,
IPCTNL_MSG_MAX
};
enum ctnl_exp_msg_types {
IPCTNL_MSG_EXP_NEW,
IPCTNL_MSG_EXP_GET,
IPCTNL_MSG_EXP_DELETE,
IPCTNL_MSG_EXP_MAX
};
enum ctattr_type {
CTA_UNSPEC,
CTA_TUPLE_ORIG,
CTA_TUPLE_REPLY,
CTA_STATUS,
CTA_PROTOINFO,
CTA_HELP,
CTA_NAT,
CTA_TIMEOUT,
CTA_MARK,
CTA_COUNTERS_ORIG,
CTA_COUNTERS_REPLY,
CTA_USE,
CTA_EXPECT,
CTA_ID,
__CTA_MAX
};
#define CTA_MAX (__CTA_MAX - 1)
enum ctattr_tuple {
CTA_TUPLE_UNSPEC,
CTA_TUPLE_IP,
CTA_TUPLE_PROTO,
__CTA_TUPLE_MAX
};
#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1)
enum ctattr_ip {
CTA_IP_UNSPEC,
CTA_IP_V4_SRC,
CTA_IP_V4_DST,
CTA_IP_V6_SRC,
CTA_IP_V6_DST,
__CTA_IP_MAX
};
#define CTA_IP_MAX (__CTA_IP_MAX - 1)
enum ctattr_l4proto {
CTA_PROTO_UNSPEC,
CTA_PROTO_NUM,
CTA_PROTO_SRC_PORT,
CTA_PROTO_DST_PORT,
CTA_PROTO_ICMP_ID,
CTA_PROTO_ICMP_TYPE,
CTA_PROTO_ICMP_CODE,
__CTA_PROTO_MAX
};
#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
enum ctattr_protoinfo {
CTA_PROTOINFO_UNSPEC,
CTA_PROTOINFO_TCP_STATE,
__CTA_PROTOINFO_MAX
};
#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1)
enum ctattr_counters {
CTA_COUNTERS_UNSPEC,
CTA_COUNTERS_PACKETS,
CTA_COUNTERS_BYTES,
__CTA_COUNTERS_MAX
};
#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1)
enum ctattr_nat {
CTA_NAT_UNSPEC,
CTA_NAT_MINIP,
CTA_NAT_MAXIP,
CTA_NAT_PROTO,
__CTA_NAT_MAX
};
#define CTA_NAT_MAX (__CTA_NAT_MAX - 1)
enum ctattr_protonat {
CTA_PROTONAT_UNSPEC,
CTA_PROTONAT_PORT_MIN,
CTA_PROTONAT_PORT_MAX,
__CTA_PROTONAT_MAX
};
#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1)
enum ctattr_expect {
CTA_EXPECT_UNSPEC,
CTA_EXPECT_TUPLE,
CTA_EXPECT_MASK,
CTA_EXPECT_TIMEOUT,
CTA_EXPECT_ID,
__CTA_EXPECT_MAX
};
#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1)
enum ctattr_help {
CTA_HELP_UNSPEC,
CTA_HELP_NAME,
__CTA_HELP_MAX
};
#define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
#define CTA_HELP_MAXNAMESIZE 32
#endif /* _IPCONNTRACK_NETLINK_H */
......@@ -209,6 +209,9 @@ struct ip_conntrack
/* Current number of expected connections */
unsigned int expecting;
/* Unique ID that identifies this conntrack*/
unsigned int id;
/* Helper, if any. */
struct ip_conntrack_helper *helper;
......@@ -257,6 +260,9 @@ struct ip_conntrack_expect
/* Usage count. */
atomic_t use;
/* Unique ID */
unsigned int id;
#ifdef CONFIG_IP_NF_NAT_NEEDED
/* This is the original per-proto part, used to map the
* expected connection the way the recipient expects. */
......@@ -296,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
}
/* decrement reference count on a conntrack */
extern void ip_conntrack_put(struct ip_conntrack *ct);
static inline void
ip_conntrack_put(struct ip_conntrack *ct)
{
IP_NF_ASSERT(ct);
nf_conntrack_put(&ct->ct_general);
}
/* call to create an explicit dependency on ip_conntrack. */
extern void need_ip_conntrack(void);
......@@ -331,6 +342,39 @@ extern void
ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data),
void *data);
extern struct ip_conntrack_helper *
__ip_conntrack_helper_find_byname(const char *);
extern struct ip_conntrack_helper *
ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple);
extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper);
extern struct ip_conntrack_protocol *
__ip_conntrack_proto_find(u_int8_t protocol);
extern struct ip_conntrack_protocol *
ip_conntrack_proto_find_get(u_int8_t protocol);
extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto);
extern void ip_ct_remove_expectations(struct ip_conntrack *ct);
extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *,
struct ip_conntrack_tuple *);
extern void ip_conntrack_free(struct ip_conntrack *ct);
extern void ip_conntrack_hash_insert(struct ip_conntrack *ct);
extern struct ip_conntrack_expect *
__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple);
extern struct ip_conntrack_expect *
ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
extern struct ip_conntrack_tuple_hash *
__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack);
extern void ip_conntrack_flush(void);
/* It's confirmed if it is, or has been in the hash table. */
static inline int is_confirmed(struct ip_conntrack *ct)
{
......
......@@ -2,6 +2,9 @@
#define _IP_CONNTRACK_CORE_H
#include <linux/netfilter.h>
#define MAX_IP_CT_PROTO 256
extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
/* This header is used to share core functionality between the
standalone connection tracking module, and the compatibility layer's use
of connection tracking. */
......@@ -53,6 +56,8 @@ struct ip_conntrack_ecache;
extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec);
#endif
extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp);
extern struct list_head *ip_conntrack_hash;
extern struct list_head ip_conntrack_expect_list;
extern rwlock_t ip_conntrack_lock;
......
......@@ -24,6 +24,8 @@ struct ip_conntrack_helper
int (*help)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info conntrackinfo);
int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct);
};
extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
......
......@@ -2,6 +2,7 @@
#ifndef _IP_CONNTRACK_PROTOCOL_H
#define _IP_CONNTRACK_PROTOCOL_H
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
struct seq_file;
......@@ -47,22 +48,22 @@ struct ip_conntrack_protocol
int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
unsigned int hooknum);
/* convert protoinfo to nfnetink attributes */
int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
const struct ip_conntrack *ct);
int (*tuple_to_nfattr)(struct sk_buff *skb,
const struct ip_conntrack_tuple *t);
int (*nfattr_to_tuple)(struct nfattr *tb[],
struct ip_conntrack_tuple *t);
/* Module (if any) which this is connected to. */
struct module *me;
};
#define MAX_IP_CT_PROTO 256
extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
/* Protocol registration. */
extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto);
extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto);
static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
{
return ip_ct_protos[protocol];
}
/* Existing built-in protocols */
extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp;
extern struct ip_conntrack_protocol ip_conntrack_protocol_udp;
......@@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void);
/* Log invalid packets */
extern unsigned int ip_ct_log_invalid;
extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *,
const struct ip_conntrack_tuple *);
extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
struct ip_conntrack_tuple *);
#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(proto) \
......
......@@ -4,6 +4,9 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
struct iphdr;
struct ip_nat_range;
......@@ -15,6 +18,8 @@ struct ip_nat_protocol
/* Protocol number. */
unsigned int protonum;
struct module *me;
/* Translate a packet to the target according to manip type.
Return true if succeeded. */
int (*manip_pkt)(struct sk_buff **pskb,
......@@ -43,19 +48,20 @@ struct ip_nat_protocol
unsigned int (*print_range)(char *buffer,
const struct ip_nat_range *range);
};
#define MAX_IP_NAT_PROTO 256
extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
int (*range_to_nfattr)(struct sk_buff *skb,
const struct ip_nat_range *range);
int (*nfattr_to_range)(struct nfattr *tb[],
struct ip_nat_range *range);
};
/* Protocol registration. */
extern int ip_nat_protocol_register(struct ip_nat_protocol *proto);
extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto);
static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol)
{
return ip_nat_protos[protocol];
}
extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol);
extern void ip_nat_proto_put(struct ip_nat_protocol *proto);
/* Built-in protocols. */
extern struct ip_nat_protocol ip_nat_protocol_tcp;
......@@ -67,4 +73,9 @@ extern int init_protocols(void) __init;
extern void cleanup_protocols(void);
extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum);
extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb,
const struct ip_nat_range *range);
extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[],
struct ip_nat_range *range);
#endif /*_IP_NAT_PROTO_H*/
......@@ -702,5 +702,12 @@ config IP_NF_ARP_MANGLE
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
config IP_NF_CONNTRACK_NETLINK
tristate 'Connection tracking netlink interface'
depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
help
This option enables support for a netlink-based userspace interface
endmenu
......@@ -9,6 +9,10 @@ iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helpe
# connection tracking
obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
# conntrack netlink interface
obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
# SCTP protocol connection tracking
obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
......
......@@ -50,7 +50,7 @@
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
#include <linux/netfilter_ipv4/listhelp.h>
#define IP_CONNTRACK_VERSION "2.2"
#define IP_CONNTRACK_VERSION "2.3"
#if 0
#define DEBUGP printk
......@@ -77,6 +77,8 @@ unsigned int ip_ct_log_invalid;
static LIST_HEAD(unconfirmed);
static int ip_conntrack_vmalloc;
static unsigned int ip_conntrack_next_id = 1;
static unsigned int ip_conntrack_expect_next_id = 1;
#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
struct notifier_block *ip_conntrack_chain;
struct notifier_block *ip_conntrack_expect_chain;
......@@ -154,13 +156,6 @@ void ip_conntrack_event_cache_init(const struct sk_buff *skb)
DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
void
ip_conntrack_put(struct ip_conntrack *ct)
{
IP_NF_ASSERT(ct);
nf_conntrack_put(&ct->ct_general);
}
static int ip_conntrack_hash_rnd_initted;
static unsigned int ip_conntrack_hash_rnd;
......@@ -222,6 +217,12 @@ static void unlink_expect(struct ip_conntrack_expect *exp)
exp->master->expecting--;
}
void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp)
{
unlink_expect(exp);
ip_conntrack_expect_put(exp);
}
static void expectation_timed_out(unsigned long ul_expect)
{
struct ip_conntrack_expect *exp = (void *)ul_expect;
......@@ -232,6 +233,33 @@ static void expectation_timed_out(unsigned long ul_expect)
ip_conntrack_expect_put(exp);
}
struct ip_conntrack_expect *
__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
{
struct ip_conntrack_expect *i;
list_for_each_entry(i, &ip_conntrack_expect_list, list) {
if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
atomic_inc(&i->use);
return i;
}
}
return NULL;
}
/* Just find a expectation corresponding to a tuple. */
struct ip_conntrack_expect *
ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
{
struct ip_conntrack_expect *i;
read_lock_bh(&ip_conntrack_lock);
i = __ip_conntrack_expect_find(tuple);
read_unlock_bh(&ip_conntrack_lock);
return i;
}
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
static struct ip_conntrack_expect *
......@@ -256,7 +284,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple)
}
/* delete all expectations for this conntrack */
static void remove_expectations(struct ip_conntrack *ct)
void ip_ct_remove_expectations(struct ip_conntrack *ct)
{
struct ip_conntrack_expect *i, *tmp;
......@@ -286,7 +314,7 @@ clean_from_lists(struct ip_conntrack *ct)
LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
/* Destroy all pending expectations */
remove_expectations(ct);
ip_ct_remove_expectations(ct);
}
static void
......@@ -304,7 +332,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to ip_conntrack_lock!!! -HW */
proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
if (proto && proto->destroy)
proto->destroy(ct);
......@@ -316,7 +344,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
* too. */
remove_expectations(ct);
ip_ct_remove_expectations(ct);
/* We overload first tuple to link into unconfirmed list. */
if (!is_confirmed(ct)) {
......@@ -331,8 +359,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
ip_conntrack_put(ct->master);
DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
kmem_cache_free(ip_conntrack_cachep, ct);
atomic_dec(&ip_conntrack_count);
ip_conntrack_free(ct);
}
static void death_by_timeout(unsigned long ul_conntrack)
......@@ -359,7 +386,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
&& ip_ct_tuple_equal(tuple, &i->tuple);
}
static struct ip_conntrack_tuple_hash *
struct ip_conntrack_tuple_hash *
__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack)
{
......@@ -394,6 +421,29 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
return h;
}
static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
unsigned int hash,
unsigned int repl_hash)
{
ct->id = ++ip_conntrack_next_id;
list_prepend(&ip_conntrack_hash[hash],
&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
list_prepend(&ip_conntrack_hash[repl_hash],
&ct->tuplehash[IP_CT_DIR_REPLY].list);
}
void ip_conntrack_hash_insert(struct ip_conntrack *ct)
{
unsigned int hash, repl_hash;
hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
write_lock_bh(&ip_conntrack_lock);
__ip_conntrack_hash_insert(ct, hash, repl_hash);
write_unlock_bh(&ip_conntrack_lock);
}
/* Confirm a connection given skb; places it in hash table */
int
__ip_conntrack_confirm(struct sk_buff **pskb)
......@@ -440,10 +490,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
/* Remove from unconfirmed list */
list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
list_prepend(&ip_conntrack_hash[hash],
&ct->tuplehash[IP_CT_DIR_ORIGINAL]);
list_prepend(&ip_conntrack_hash[repl_hash],
&ct->tuplehash[IP_CT_DIR_REPLY]);
__ip_conntrack_hash_insert(ct, hash, repl_hash);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
......@@ -527,34 +574,84 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i,
return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
}
static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
static struct ip_conntrack_helper *
__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
{
return LIST_FIND(&helpers, helper_cmp,
struct ip_conntrack_helper *,
tuple);
}
/* Allocate a new conntrack: we return -ENOMEM if classification
failed due to stress. Otherwise it really is unclassifiable. */
static struct ip_conntrack_tuple_hash *
init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_protocol *protocol,
struct sk_buff *skb)
struct ip_conntrack_helper *
ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
{
struct ip_conntrack_helper *helper;
/* need ip_conntrack_lock to assure that helper exists until
* try_module_get() is called */
read_lock_bh(&ip_conntrack_lock);
helper = __ip_conntrack_helper_find(tuple);
if (helper) {
/* need to increase module usage count to assure helper will
* not go away while the caller is e.g. busy putting a
* conntrack in the hash that uses the helper */
if (!try_module_get(helper->me))
helper = NULL;
}
read_unlock_bh(&ip_conntrack_lock);
return helper;
}
void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
{
module_put(helper->me);
}
struct ip_conntrack_protocol *
__ip_conntrack_proto_find(u_int8_t protocol)
{
return ip_ct_protos[protocol];
}
/* this is guaranteed to always return a valid protocol helper, since
* it falls back to generic_protocol */
struct ip_conntrack_protocol *
ip_conntrack_proto_find_get(u_int8_t protocol)
{
struct ip_conntrack_protocol *p;
preempt_disable();
p = __ip_conntrack_proto_find(protocol);
if (p) {
if (!try_module_get(p->me))
p = &ip_conntrack_generic_protocol;
}
preempt_enable();
return p;
}
void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
{