Commit a8138f42 authored by David S. Miller's avatar David S. Miller
Browse files

Merge git://

Pablo Neira Ayuso says:

Netfilter updates for net-next

The following patchset contains updates for your net-next tree,
they are:

1) Use kvfree() helper function from x_tables, from Eric Dumazet.

2) Remove extra timer from the conntrack ecache extension, use a
   workqueue instead to redeliver lost events to userspace instead,
   from Florian Westphal.

3) Removal of the ulog targets for ebtables and iptables. The nflog
   infrastructure superseded this almost 9 years ago, time to get rid
   of this code.

4) Replace the list of loggers by an array now that we can only have
   two possible non-overlapping logger flavours, ie. kernel ring buffer
   and netlink logging.

5) Move Eric Dumazet's log buffer code to nf_log to reuse it from
   all of the supported per-family loggers.

6) Consolidate nf_log_packet() as an unified interface for packet logging.
   After this patch, if the struct nf_loginfo is available, it explicitly
   selects the logger that is used.

7) Move ip and ip6 logging code from xt_LOG to the corresponding
   per-family loggers. Thus, x_tables and nf_tables share the same code
   for packet logging.

8) Add generic ARP packet logger, which is used by nf_tables. The
   format aims to be consistent with the output of xt_LOG.

9) Add generic bridge packet logger. Again, this is used by nf_tables
   and it routes the packets to the real family loggers. As a result,
   we get consistent logging format for the bridge family. The ebt_log
   logging code has been intentionally left in place not to break
   backward compatibility since the logging output differs from xt_LOG.

10) Update nft_log to explicitly request the required family logger when

11) Finish nft_log so it supports arp, ip, ip6, bridge and inet families.
    Allowing selection between netlink and kernel buffer ring logging.

12) Several fixes coming after the netfilter core logging changes spotted
    by robots.

13) Use IS_ENABLED() macros whenever possible in the netfilter tree,
    from Duan Jiong.

14) Removal of a couple of unnecessary branch before kfree, from Fabian
Signed-off-by: default avatarDavid S. Miller <>
parents 6fe82a39 16ea4c6b
......@@ -18,7 +18,6 @@ struct nf_conntrack_ecache {
u16 ctmask; /* bitmask of ct events to be delivered */
u16 expmask; /* bitmask of expect events to be delivered */
u32 portid; /* netlink portid of destroyer */
struct timer_list timeout;
static inline struct nf_conntrack_ecache *
......@@ -216,8 +215,23 @@ void nf_conntrack_ecache_pernet_fini(struct net *net);
int nf_conntrack_ecache_init(void);
void nf_conntrack_ecache_fini(void);
static inline void nf_conntrack_ecache_delayed_work(struct net *net)
if (!delayed_work_pending(&net->ct.ecache_dwork)) {
schedule_delayed_work(&net->ct.ecache_dwork, HZ);
net->ct.ecache_dwork_pending = true;
static inline void nf_conntrack_ecache_work(struct net *net)
if (net->ct.ecache_dwork_pending) {
net->ct.ecache_dwork_pending = false;
mod_delayed_work(system_wq, &net->ct.ecache_dwork, 0);
static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
struct nf_conn *ct) {}
static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
......@@ -255,6 +269,14 @@ static inline int nf_conntrack_ecache_init(void)
static inline void nf_conntrack_ecache_fini(void)
static inline void nf_conntrack_ecache_delayed_work(struct net *net)
static inline void nf_conntrack_ecache_work(struct net *net)
......@@ -12,8 +12,11 @@
#define NF_LOG_UID 0x08 /* Log UID owning local socket */
#define NF_LOG_MASK 0x0f
#define NF_LOG_TYPE_LOG 0x01
#define NF_LOG_TYPE_ULOG 0x02
enum nf_log_type {
struct nf_loginfo {
u_int8_t type;
......@@ -40,10 +43,10 @@ typedef void nf_logfn(struct net *net,
const char *prefix);
struct nf_logger {
struct module *me;
nf_logfn *logfn;
char *name;
struct list_head list[NFPROTO_NUMPROTO];
char *name;
enum nf_log_type type;
nf_logfn *logfn;
struct module *me;
/* Function to register/unregister log function. */
......@@ -58,6 +61,13 @@ int nf_log_bind_pf(struct net *net, u_int8_t pf,
const struct nf_logger *logger);
void nf_log_unbind_pf(struct net *net, u_int8_t pf);
int nf_logger_find_get(int pf, enum nf_log_type type);
void nf_logger_put(int pf, enum nf_log_type type);
void nf_logger_request_module(int pf, enum nf_log_type type);
#define MODULE_ALIAS_NF_LOGGER(family, type) \
MODULE_ALIAS("nf-logger-" __stringify(family) "-" __stringify(type))
/* Calls the registered backend logging function */
__printf(8, 9)
void nf_log_packet(struct net *net,
......@@ -69,4 +79,24 @@ void nf_log_packet(struct net *net,
const struct nf_loginfo *li,
const char *fmt, ...);
struct nf_log_buf;
struct nf_log_buf *nf_log_buf_open(void);
__printf(2, 3) int nf_log_buf_add(struct nf_log_buf *m, const char *f, ...);
void nf_log_buf_close(struct nf_log_buf *m);
/* common logging functions */
int nf_log_dump_udp_header(struct nf_log_buf *m, const struct sk_buff *skb,
u8 proto, int fragment, unsigned int offset);
int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
u8 proto, int fragment, unsigned int offset,
unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct nf_log_buf *m, struct sock *sk);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct nf_loginfo *loginfo,
const char *prefix);
#endif /* _NF_LOG_H */
#define S_SIZE (1024 - (sizeof(unsigned int) + 1))
struct sbuff {
unsigned int count;
char buf[S_SIZE + 1];
static struct sbuff emergency, *emergency_ptr = &emergency;
static __printf(2, 3) int sb_add(struct sbuff *m, const char *f, ...)
va_list args;
int len;
if (likely(m->count < S_SIZE)) {
va_start(args, f);
len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args);
if (likely(m->count + len < S_SIZE)) {
m->count += len;
return 0;
m->count = S_SIZE;
printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n");
return -1;
static struct sbuff *sb_open(void)
struct sbuff *m = kmalloc(sizeof(*m), GFP_ATOMIC);
if (unlikely(!m)) {
do {
m = xchg(&emergency_ptr, NULL);
} while (!m);
m->count = 0;
return m;
static void sb_close(struct sbuff *m)
m->buf[m->count] = 0;
printk("%s\n", m->buf);
if (likely(m != &emergency))
else {
emergency_ptr = m;
......@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/list_nulls.h>
#include <linux/atomic.h>
#include <linux/workqueue.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/seqlock.h>
......@@ -73,6 +74,10 @@ struct ct_pcpu {
struct netns_ct {
atomic_t count;
unsigned int expect_count;
struct delayed_work ecache_dwork;
bool ecache_dwork_pending;
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
......@@ -82,7 +87,6 @@ struct netns_ct {
char *slabname;
unsigned int sysctl_log_invalid; /* Log invalid packets */
unsigned int sysctl_events_retry_timeout;
int sysctl_events;
int sysctl_acct;
int sysctl_auto_assign_helper;
......@@ -697,6 +697,8 @@ enum nft_counter_attributes {
* @NFTA_LOG_PREFIX: prefix to prepend to log messages (NLA_STRING)
* @NFTA_LOG_SNAPLEN: length of payload to include in netlink message (NLA_U32)
* @NFTA_LOG_QTHRESHOLD: queue threshold (NLA_U32)
* @NFTA_LOG_LEVEL: log level (NLA_U32)
* @NFTA_LOG_FLAGS: logging flags (NLA_U32)
enum nft_log_attributes {
......@@ -704,6 +706,8 @@ enum nft_log_attributes {
#define NFTA_LOG_MAX (__NFTA_LOG_MAX - 1)
......@@ -14,6 +14,5 @@ header-y += ebt_nflog.h
header-y += ebt_pkttype.h
header-y += ebt_redirect.h
header-y += ebt_stp.h
header-y += ebt_ulog.h
header-y += ebt_vlan.h
header-y += ebtables.h
#ifndef _EBT_ULOG_H
#define _EBT_ULOG_H
#include <linux/types.h>
#define EBT_ULOG_MAXNLGROUPS 32 /* hardcoded netlink max */
#define EBT_ULOG_MAX_QLEN 50
#define EBT_ULOG_WATCHER "ulog"
struct ebt_ulog_info {
__u32 nlgroup;
unsigned int cprange;
unsigned int qthreshold;
char prefix[EBT_ULOG_PREFIX_LEN];
typedef struct ebt_ulog_packet_msg {
int version;
char indev[IFNAMSIZ];
char outdev[IFNAMSIZ];
char physindev[IFNAMSIZ];
char physoutdev[IFNAMSIZ];
char prefix[EBT_ULOG_PREFIX_LEN];
struct timeval stamp;
unsigned long mark;
unsigned int hook;
size_t data_len;
/* The complete packet, including Ethernet header and perhaps
* the VLAN header is appended */
unsigned char data[0] __attribute__
((aligned (__alignof__(struct ebt_ulog_info))));
} ebt_ulog_packet_msg_t;
#endif /* _EBT_ULOG_H */
......@@ -5,7 +5,6 @@ header-y += ipt_ECN.h
header-y += ipt_LOG.h
header-y += ipt_REJECT.h
header-y += ipt_TTL.h
header-y += ipt_ULOG.h
header-y += ipt_ah.h
header-y += ipt_ecn.h
header-y += ipt_ttl.h
/* Header file for IP tables userspace logging, Version 1.8
* (C) 2000-2002 by Harald Welte <>
* Distributed under the terms of GNU GPL */
#ifndef _IPT_ULOG_H
#define _IPT_ULOG_H
#define ULOG_MAC_LEN 80
#define ULOG_PREFIX_LEN 32
#define ULOG_MAX_QLEN 50
/* Why 50? Well... there is a limit imposed by the slab cache 131000
* bytes. So the multipart netlink-message has to be < 131000 bytes.
* Assuming a standard ethernet-mtu of 1500, we could define this up
* to 80... but even 50 seems to be big enough. */
/* private data structure for each rule with a ULOG target */
struct ipt_ulog_info {
unsigned int nl_group;
size_t copy_range;
size_t qthreshold;
char prefix[ULOG_PREFIX_LEN];
/* Format of the ULOG packets passed through netlink */
typedef struct ulog_packet_msg {
unsigned long mark;
long timestamp_sec;
long timestamp_usec;
unsigned int hook;
char indev_name[IFNAMSIZ];
char outdev_name[IFNAMSIZ];
size_t data_len;
char prefix[ULOG_PREFIX_LEN];
unsigned char mac_len;
unsigned char mac[ULOG_MAC_LEN];
unsigned char payload[0];
} ulog_packet_msg_t;
#endif /*_IPT_ULOG_H*/
......@@ -14,6 +14,9 @@ config NFT_BRIDGE_META
Add support for bridge dedicated meta key.
tristate "Bridge packet logging"
......@@ -202,22 +205,6 @@ config BRIDGE_EBT_LOG
To compile it as a module, choose M here. If unsure, say N.
tristate "ebt: ulog support (OBSOLETE)"
This option enables the old bridge-specific "ebt_ulog" implementation
which has been obsoleted by the new "nfnetlink_log" code (see
This option adds the ulog watcher, that you can use in any rule
in any ebtables table. The packet is passed to a userspace
logging daemon using netlink multicast sockets. This differs
from the log watcher in the sense that the complete packet is
sent to userspace instead of a descriptive text and that
netlink multicast sockets are used instead of the syslog.
To compile it as a module, choose M here. If unsure, say N.
tristate "ebt: nflog support"
......@@ -5,6 +5,9 @@
obj-$(CONFIG_NF_TABLES_BRIDGE) += nf_tables_bridge.o
obj-$(CONFIG_NFT_BRIDGE_META) += nft_meta_bridge.o
# packet logging
obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
obj-$(CONFIG_BRIDGE_NF_EBTABLES) += ebtables.o
# tables
......@@ -186,6 +186,10 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
li.u.log.level = info->loglevel;
li.u.log.logflags = info->bitmask;
/* Remember that we have to use ebt_log_packet() not to break backward
* compatibility. We cannot use the default bridge packet logger via
* nf_log_packet() with NFT_LOG_TYPE_LOG here. --Pablo
if (info->bitmask & EBT_LOG_NFLOG)
nf_log_packet(net, NFPROTO_BRIDGE, par->hooknum, skb,
par->in, par->out, &li, "%s", info->prefix);
......@@ -205,54 +209,13 @@ static struct xt_target ebt_log_tg_reg __read_mostly = {
static struct nf_logger ebt_log_logger __read_mostly = {
.name = "ebt_log",
.logfn = &ebt_log_packet,
static int __net_init ebt_log_net_init(struct net *net)
nf_log_set(net, NFPROTO_BRIDGE, &ebt_log_logger);
return 0;
static void __net_exit ebt_log_net_fini(struct net *net)
nf_log_unset(net, &ebt_log_logger);
static struct pernet_operations ebt_log_net_ops = {
.init = ebt_log_net_init,
.exit = ebt_log_net_fini,
static int __init ebt_log_init(void)
int ret;
ret = register_pernet_subsys(&ebt_log_net_ops);
if (ret < 0)
goto err_pernet;
ret = xt_register_target(&ebt_log_tg_reg);
if (ret < 0)
goto err_target;
nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger);
return ret;
return ret;
return xt_register_target(&ebt_log_tg_reg);
static void __exit ebt_log_fini(void)
* netfilter module for userspace bridged Ethernet frames logging daemons
* Authors:
* Bart De Schuymer <>
* Harald Welte <>
* November, 2004
* Based on ipt_ULOG.c, which is
* (C) 2000-2002 by Harald Welte <>
* This module accepts two parameters:
* nlbufsiz:
* The parameter specifies how big the buffer for each netlink multicast
* group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
* get accumulated in the kernel until they are sent to userspace. It is
* NOT possible to allocate more than 128kB, and it is strongly discouraged,
* because atomically allocating 128kB inside the network rx softirq is not
* reliable. Please also keep in mind that this buffer size is allocated for
* each nlgroup you are using, so the total kernel memory usage increases
* by that factor.
* flushtimeout:
* Specify, after how many hundredths of a second the queue should be
* flushed even if it is not full yet.
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
#include <linux/kernel.h>
#include <linux/timer.h>
#include <net/netlink.h>
#include <linux/netdevice.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_bridge/ebt_ulog.h>
#include <net/netfilter/nf_log.h>
#include <net/netns/generic.h>
#include <net/sock.h>
#include "../br_private.h"
static unsigned int nlbufsiz = NLMSG_GOODSIZE;
module_param(nlbufsiz, uint, 0600);
MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
"(defaults to 4096)");
static unsigned int flushtimeout = 10;
module_param(flushtimeout, uint, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths ofa second) "
"(defaults to 10)");
typedef struct {
unsigned int qlen; /* number of nlmsgs' in the skb */
struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */
struct sk_buff *skb; /* the pre-allocated skb */
struct timer_list timer; /* the timer function */
spinlock_t lock; /* the per-queue lock */
} ebt_ulog_buff_t;
static int ebt_ulog_net_id __read_mostly;
struct ebt_ulog_net {
unsigned int nlgroup[EBT_ULOG_MAXNLGROUPS];
ebt_ulog_buff_t ulog_buffers[EBT_ULOG_MAXNLGROUPS];
struct sock *ebtulognl;
static struct ebt_ulog_net *ebt_ulog_pernet(struct net *net)
return net_generic(net, ebt_ulog_net_id);
/* send one ulog_buff_t to userspace */
static void ulog_send(struct ebt_ulog_net *ebt, unsigned int nlgroup)
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[nlgroup];
if (!ub->skb)
/* last nlmsg needs NLMSG_DONE */
if (ub->qlen > 1)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
NETLINK_CB(ub->skb).dst_group = nlgroup + 1;
netlink_broadcast(ebt->ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
struct ebt_ulog_net *ebt = container_of((void *)data,
struct ebt_ulog_net,
nlgroup[*(unsigned int *)data]);
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[*(unsigned int *)data];
if (ub->skb)
ulog_send(ebt, *(unsigned int *)data);
static struct sk_buff *ulog_alloc_skb(unsigned int size)
struct sk_buff *skb;
unsigned int n;
n = max(size, nlbufsiz);
skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
if (!skb) {
if (n > size) {
/* try to allocate only as much as we need for
* current packet */
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
pr_debug("cannot even allocate buffer of size %ub\n",
return skb;
static void ebt_ulog_packet(struct net *net, unsigned int hooknr,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
const struct ebt_ulog_info *uloginfo,
const char *prefix)
ebt_ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
struct ebt_ulog_net *ebt = ebt_ulog_pernet(net);
unsigned int group = uloginfo->nlgroup;
ebt_ulog_buff_t *ub = &ebt->ulog_buffers[group];
spinlock_t *lock = &ub->lock;
ktime_t kt;
if ((uloginfo->cprange == 0) ||
(uloginfo->cprange > skb->len + ETH_HLEN))
copy_len = skb->len + ETH_HLEN;
copy_len = uloginfo->cprange;
size = nlmsg_total_size(sizeof(*pm) + copy_len);
if (size > nlbufsiz) {
pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock;
} else if (size > skb_tailroom(ub->skb)) {
ulog_send(ebt, group);
if (!(ub->skb = ulog_alloc_skb(size)))
goto unlock;
nlh = nlmsg_put(ub->skb, 0, ub->qlen, 0,
size - NLMSG_ALIGN(sizeof(*nlh)), 0);
if (!nlh) {
ub->skb = NULL;
goto unlock;