br_netfilter.c 26.9 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
/*
 *	Handle firewalling
 *	Linux ethernet bridge
 *
 *	Authors:
 *	Lennert Buytenhek               <buytenh@gnu.org>
 *	Bart De Schuymer (maintainer)	<bdschuym@pandora.be>
 *
 *	Changes:
 *	Apr 29 2003: physdev module support (bdschuym)
 *	Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
 *	Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
 *	             (bdschuym)
 *	Sep 01 2004: add IPv6 filtering (bdschuym)
 *
 *	This program is free software; you can redistribute it and/or
 *	modify it under the terms of the GNU General Public License
 *	as published by the Free Software Foundation; either version
 *	2 of the License, or (at your option) any later version.
 *
 *	Lennert dedicates this file to Kerstin Wurdinger.
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/ip.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
29
#include <linux/if_arp.h>
Linus Torvalds's avatar
Linus Torvalds committed
30
31
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
32
33
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>
Linus Torvalds's avatar
Linus Torvalds committed
34
35
36
37
38
#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
39
#include <linux/inetdevice.h>
40

Linus Torvalds's avatar
Linus Torvalds committed
41
42
#include <net/ip.h>
#include <net/ipv6.h>
43
44
#include <net/route.h>

Linus Torvalds's avatar
Linus Torvalds committed
45
46
47
48
49
50
51
52
#include <asm/uaccess.h>
#include "br_private.h"
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#define skb_origaddr(skb)	 (((struct bridge_skb_cb *) \
				 (skb->nf_bridge->data))->daddr.ipv4)
53
54
#define store_orig_dstaddr(skb)	 (skb_origaddr(skb) = ip_hdr(skb)->daddr)
#define dnat_took_place(skb)	 (skb_origaddr(skb) != ip_hdr(skb)->daddr)
Linus Torvalds's avatar
Linus Torvalds committed
55
56
57

#ifdef CONFIG_SYSCTL
static struct ctl_table_header *brnf_sysctl_header;
58
59
60
61
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly = 1;
62
static int brnf_filter_pppoe_tagged __read_mostly = 1;
Linus Torvalds's avatar
Linus Torvalds committed
63
64
#else
#define brnf_filter_vlan_tagged 1
65
#define brnf_filter_pppoe_tagged 1
Linus Torvalds's avatar
Linus Torvalds committed
66
67
#endif

Dave Jones's avatar
Dave Jones committed
68
static inline __be16 vlan_proto(const struct sk_buff *skb)
69
70
71
72
73
74
{
	return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
}

#define IS_VLAN_IP(skb) \
	(skb->protocol == htons(ETH_P_8021Q) && \
75
	 vlan_proto(skb) == htons(ETH_P_IP) && 	\
76
77
78
79
80
81
82
83
84
85
86
	 brnf_filter_vlan_tagged)

#define IS_VLAN_IPV6(skb) \
	(skb->protocol == htons(ETH_P_8021Q) && \
	 vlan_proto(skb) == htons(ETH_P_IPV6) &&\
	 brnf_filter_vlan_tagged)

#define IS_VLAN_ARP(skb) \
	(skb->protocol == htons(ETH_P_8021Q) &&	\
	 vlan_proto(skb) == htons(ETH_P_ARP) &&	\
	 brnf_filter_vlan_tagged)
Linus Torvalds's avatar
Linus Torvalds committed
87

88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
static inline __be16 pppoe_proto(const struct sk_buff *skb)
{
	return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
			    sizeof(struct pppoe_hdr)));
}

#define IS_PPPOE_IP(skb) \
	(skb->protocol == htons(ETH_P_PPP_SES) && \
	 pppoe_proto(skb) == htons(PPP_IP) && \
	 brnf_filter_pppoe_tagged)

#define IS_PPPOE_IPV6(skb) \
	(skb->protocol == htons(ETH_P_PPP_SES) && \
	 pppoe_proto(skb) == htons(PPP_IPV6) && \
	 brnf_filter_pppoe_tagged)

Linus Torvalds's avatar
Linus Torvalds committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
/* We need these fake structures to make netfilter happy --
 * lots of places assume that skb->dst != NULL, which isn't
 * all that unreasonable.
 *
 * Currently, we fill in the PMTU entry because netfilter
 * refragmentation needs it, and the rt_flags entry because
 * ipt_REJECT needs it.  Future netfilter modules might
 * require us to fill additional fields. */
static struct net_device __fake_net_device = {
	.hard_header_len	= ETH_HLEN
};

static struct rtable __fake_rtable = {
	.u = {
		.dst = {
			.__refcnt		= ATOMIC_INIT(1),
			.dev			= &__fake_net_device,
			.path			= &__fake_rtable.u.dst,
			.metrics		= {[RTAX_MTU - 1] = 1500},
123
			.flags			= DST_NOXFRM,
Linus Torvalds's avatar
Linus Torvalds committed
124
125
126
127
128
		}
	},
	.rt_flags	= 0,
};

129
130
131
132
133
134
static inline struct net_device *bridge_parent(const struct net_device *dev)
{
	struct net_bridge_port *port = rcu_dereference(dev->br_port);

	return port ? port->br->dev : NULL;
}
Linus Torvalds's avatar
Linus Torvalds committed
135

136
137
138
139
140
141
142
143
144
static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb)
{
	skb->nf_bridge = kzalloc(sizeof(struct nf_bridge_info), GFP_ATOMIC);
	if (likely(skb->nf_bridge))
		atomic_set(&(skb->nf_bridge->use), 1);

	return skb->nf_bridge;
}

145
146
147
148
149
150
151
152
153
static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
{
	unsigned int len = nf_bridge_encap_header_len(skb);

	skb_push(skb, len);
	skb->network_header -= len;
}

static inline void nf_bridge_pull_encap_header(struct sk_buff *skb)
154
{
155
156
157
158
159
	unsigned int len = nf_bridge_encap_header_len(skb);

	skb_pull(skb, len);
	skb->network_header += len;
}
160

161
162
163
164
165
166
167
168
169
170
171
static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
{
	unsigned int len = nf_bridge_encap_header_len(skb);

	skb_pull_rcsum(skb, len);
	skb->network_header += len;
}

static inline void nf_bridge_save_header(struct sk_buff *skb)
{
	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
172

173
174
	skb_copy_from_linear_data_offset(skb, -header_size,
					 skb->nf_bridge->data, header_size);
175
176
}

177
178
179
180
181
182
183
/*
 * When forwarding bridge frames, we save a copy of the original
 * header before processing.
 */
int nf_bridge_copy_header(struct sk_buff *skb)
{
	int err;
184
	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
185
186
187
188
189

	err = skb_cow(skb, header_size);
	if (err)
		return err;

190
191
	skb_copy_to_linear_data_offset(skb, -header_size,
				       skb->nf_bridge->data, header_size);
192
	__skb_push(skb, nf_bridge_encap_header_len(skb));
193
194
195
	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
/* PF_BRIDGE/PRE_ROUTING *********************************************/
/* Undo the changes made for ip6tables PREROUTING and continue the
 * bridge PRE_ROUTING hook. */
static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;

	skb->dst = (struct dst_entry *)&__fake_rtable;
	dst_hold(skb->dst);

	skb->dev = nf_bridge->physindev;
213
	nf_bridge_push_encap_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
214
215
216
217
218
219
220
221
222
223
224
225
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}

static void __br_dnat_complain(void)
{
	static unsigned long last_complaint;

	if (jiffies - last_complaint >= 5 * HZ) {
		printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
226
		       "forwarding to be enabled\n");
Linus Torvalds's avatar
Linus Torvalds committed
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
		last_complaint = jiffies;
	}
}

/* This requires some explaining. If DNAT has taken place,
 * we will need to fix up the destination Ethernet address,
 * and this is a tricky process.
 *
 * There are two cases to consider:
 * 1. The packet was DNAT'ed to a device in the same bridge
 *    port group as it was received on. We can still bridge
 *    the packet.
 * 2. The packet was DNAT'ed to a different device, either
 *    a non-bridged device or another bridge port group.
 *    The packet will need to be routed.
 *
 * The correct way of distinguishing between these two cases is to
 * call ip_route_input() and to look at skb->dst->dev, which is
 * changed to the destination device if ip_route_input() succeeds.
 *
 * Let us first consider the case that ip_route_input() succeeds:
 *
 * If skb->dst->dev equals the logical bridge device the packet
 * came in on, we can consider this bridging. We then call
 * skb->dst->output() which will make the packet enter br_nf_local_out()
 * not much later. In that function it is assured that the iptables
 * FORWARD chain is traversed for the packet.
 *
 * Otherwise, the packet is considered to be routed and we just
 * change the destination MAC address so that the packet will
257
258
259
 * later be passed up to the IP stack to be routed. For a redirected
 * packet, ip_route_input() will give back the localhost as output device,
 * which differs from the bridge device.
Linus Torvalds's avatar
Linus Torvalds committed
260
261
262
 *
 * Let us now consider the case that ip_route_input() fails:
 *
263
264
 * This can be because the destination address is martian, in which case
 * the packet will be dropped.
Linus Torvalds's avatar
Linus Torvalds committed
265
266
267
268
269
270
271
272
273
274
275
276
 * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
 * will fail, while __ip_route_output_key() will return success. The source
 * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
 * thinks we're handling a locally generated packet and won't care
 * if IP forwarding is allowed. We send a warning message to the users's
 * log telling her to put IP forwarding on.
 *
 * ip_route_input() will also fail if there is no route available.
 * In that case we just drop the packet.
 *
 * --Lennert, 20020411
 * --Bart, 20020416 (updated)
277
278
 * --Bart, 20021007 (updated)
 * --Bart, 20062711 (updated) */
Linus Torvalds's avatar
Linus Torvalds committed
279
280
281
282
283
284
285
286
287
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
	}
	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;

	skb->dev = bridge_parent(skb->dev);
288
289
290
	if (!skb->dev)
		kfree_skb(skb);
	else {
291
		nf_bridge_pull_encap_header(skb);
292
		skb->dst->output(skb);
Linus Torvalds's avatar
Linus Torvalds committed
293
294
295
296
297
298
299
	}
	return 0;
}

static int br_nf_pre_routing_finish(struct sk_buff *skb)
{
	struct net_device *dev = skb->dev;
300
	struct iphdr *iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
301
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
302
	int err;
Linus Torvalds's avatar
Linus Torvalds committed
303
304
305
306
307
308
309

	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
	}
	nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
	if (dnat_took_place(skb)) {
310
		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
Linus Torvalds's avatar
Linus Torvalds committed
311
			struct rtable *rt;
312
313
314
315
316
317
318
319
320
			struct flowi fl = {
				.nl_u = {
					.ip4_u = {
						 .daddr = iph->daddr,
						 .saddr = 0,
						 .tos = RT_TOS(iph->tos) },
				},
				.proto = 0,
			};
321
322
323
324
325
326
327
328
329
330
331
			struct in_device *in_dev = in_dev_get(dev);

			/* If err equals -EHOSTUNREACH the error is due to a
			 * martian destination or due to the fact that
			 * forwarding is disabled. For most martian packets,
			 * ip_route_output_key() will fail. It won't fail for 2 types of
			 * martian destinations: loopback destinations and destination
			 * 0.0.0.0. In both cases the packet will be dropped because the
			 * destination is the loopback device and not the bridge. */
			if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
				goto free_skb;
Linus Torvalds's avatar
Linus Torvalds committed
332
333

			if (!ip_route_output_key(&rt, &fl)) {
334
				/* - Bridged-and-DNAT'ed traffic doesn't
335
336
				 *   require ip_forwarding. */
				if (((struct dst_entry *)rt)->dev == dev) {
Linus Torvalds's avatar
Linus Torvalds committed
337
338
339
					skb->dst = (struct dst_entry *)rt;
					goto bridged_dnat;
				}
340
341
342
343
				/* we are sure that forwarding is disabled, so printing
				 * this message is no problem. Note that the packet could
				 * still have a martian destination address, in which case
				 * the packet could be dropped even if forwarding were enabled */
Linus Torvalds's avatar
Linus Torvalds committed
344
345
346
				__br_dnat_complain();
				dst_release((struct dst_entry *)rt);
			}
347
free_skb:
Linus Torvalds's avatar
Linus Torvalds committed
348
349
350
351
352
353
354
355
356
			kfree_skb(skb);
			return 0;
		} else {
			if (skb->dst->dev == dev) {
bridged_dnat:
				/* Tell br_nf_local_out this is a
				 * bridged frame */
				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
				skb->dev = nf_bridge->physindev;
357
				nf_bridge_push_encap_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
358
359
360
361
362
363
				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
					       skb, skb->dev, NULL,
					       br_nf_pre_routing_finish_bridge,
					       1);
				return 0;
			}
364
			memcpy(eth_hdr(skb)->h_dest, dev->dev_addr, ETH_ALEN);
Linus Torvalds's avatar
Linus Torvalds committed
365
366
367
368
369
370
371
372
			skb->pkt_type = PACKET_HOST;
		}
	} else {
		skb->dst = (struct dst_entry *)&__fake_rtable;
		dst_hold(skb->dst);
	}

	skb->dev = nf_bridge->physindev;
373
	nf_bridge_push_encap_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
374
375
376
377
378
379
380
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
		       br_handle_frame_finish, 1);

	return 0;
}

/* Some common code for IPv4/IPv6 */
381
static struct net_device *setup_pre_routing(struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
382
383
384
385
386
387
388
389
390
391
392
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;

	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
	nf_bridge->physindev = skb->dev;
	skb->dev = bridge_parent(skb->dev);
393
394

	return skb->dev;
Linus Torvalds's avatar
Linus Torvalds committed
395
396
397
398
399
}

/* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */
static int check_hbh_len(struct sk_buff *skb)
{
400
	unsigned char *raw = (u8 *)(ipv6_hdr(skb) + 1);
Linus Torvalds's avatar
Linus Torvalds committed
401
	u32 pkt_len;
402
403
	const unsigned char *nh = skb_network_header(skb);
	int off = raw - nh;
404
	int len = (raw[1] + 1) << 3;
Linus Torvalds's avatar
Linus Torvalds committed
405
406
407
408
409
410
411
412

	if ((raw + len) - skb->data > skb_headlen(skb))
		goto bad;

	off += 2;
	len -= 2;

	while (len > 0) {
413
		int optlen = nh[off + 1] + 2;
Linus Torvalds's avatar
Linus Torvalds committed
414

415
		switch (nh[off]) {
Linus Torvalds's avatar
Linus Torvalds committed
416
417
418
419
420
421
422
423
		case IPV6_TLV_PAD0:
			optlen = 1;
			break;

		case IPV6_TLV_PADN:
			break;

		case IPV6_TLV_JUMBO:
424
			if (nh[off + 1] != 4 || (off & 3) != 2)
Linus Torvalds's avatar
Linus Torvalds committed
425
				goto bad;
426
			pkt_len = ntohl(*(__be32 *) (nh + off + 2));
427
			if (pkt_len <= IPV6_MAXPLEN ||
428
			    ipv6_hdr(skb)->payload_len)
429
				goto bad;
Linus Torvalds's avatar
Linus Torvalds committed
430
431
			if (pkt_len > skb->len - sizeof(struct ipv6hdr))
				goto bad;
432
			if (pskb_trim_rcsum(skb,
433
					    pkt_len + sizeof(struct ipv6hdr)))
434
				goto bad;
435
			nh = skb_network_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
			break;
		default:
			if (optlen > len)
				goto bad;
			break;
		}
		off += optlen;
		len -= optlen;
	}
	if (len == 0)
		return 0;
bad:
	return -1;

}

/* Replicate the checks that IPv6 does on packet reception and pass the packet
 * to ip6tables, which doesn't support NAT, so things are fairly simple. */
static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
455
456
457
458
					   struct sk_buff *skb,
					   const struct net_device *in,
					   const struct net_device *out,
					   int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
459
460
461
462
463
464
465
466
467
468
{
	struct ipv6hdr *hdr;
	u32 pkt_len;

	if (skb->len < sizeof(struct ipv6hdr))
		goto inhdr_error;

	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
		goto inhdr_error;

469
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
470
471
472
473
474
475
476
477
478

	if (hdr->version != 6)
		goto inhdr_error;

	pkt_len = ntohs(hdr->payload_len);

	if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
		if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
			goto inhdr_error;
Herbert Xu's avatar
Herbert Xu committed
479
480
		if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
			goto inhdr_error;
Linus Torvalds's avatar
Linus Torvalds committed
481
482
	}
	if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
483
		goto inhdr_error;
Linus Torvalds's avatar
Linus Torvalds committed
484

485
	nf_bridge_put(skb->nf_bridge);
486
	if (!nf_bridge_alloc(skb))
Linus Torvalds's avatar
Linus Torvalds committed
487
		return NF_DROP;
488
489
	if (!setup_pre_routing(skb))
		return NF_DROP;
Linus Torvalds's avatar
Linus Torvalds committed
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506

	NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb->dev, NULL,
		br_nf_pre_routing_finish_ipv6);

	return NF_STOLEN;

inhdr_error:
	return NF_DROP;
}

/* Direct IPv6 traffic to br_nf_pre_routing_ipv6.
 * Replicate the checks that IPv4 does on packet reception.
 * Set skb->dev to the bridge device (i.e. parent of the
 * receiving device) to make netfilter happy, the REDIRECT
 * target in particular.  Save the original destination IP
 * address to be able to detect DNAT afterwards. */
static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
507
508
509
				      const struct net_device *in,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
510
511
512
{
	struct iphdr *iph;
	struct sk_buff *skb = *pskb;
513
514
515
516
517
518
519
	__u32 len = nf_bridge_encap_header_len(skb);

	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
		return NF_STOLEN;

	if (unlikely(!pskb_may_pull(skb, len)))
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
520

521
522
	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
	    IS_PPPOE_IPV6(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
523
524
525
526
#ifdef CONFIG_SYSCTL
		if (!brnf_call_ip6tables)
			return NF_ACCEPT;
#endif
527
		nf_bridge_pull_encap_header_rcsum(skb);
Linus Torvalds's avatar
Linus Torvalds committed
528
529
530
531
532
533
534
		return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
	}
#ifdef CONFIG_SYSCTL
	if (!brnf_call_iptables)
		return NF_ACCEPT;
#endif

535
536
	if (skb->protocol != htons(ETH_P_IP) && !IS_VLAN_IP(skb) &&
	    !IS_PPPOE_IP(skb))
Linus Torvalds's avatar
Linus Torvalds committed
537
538
		return NF_ACCEPT;

539
	nf_bridge_pull_encap_header_rcsum(skb);
Linus Torvalds's avatar
Linus Torvalds committed
540
541
542
543

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto inhdr_error;

544
	iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
545
546
547
	if (iph->ihl < 5 || iph->version != 4)
		goto inhdr_error;

548
	if (!pskb_may_pull(skb, 4 * iph->ihl))
Linus Torvalds's avatar
Linus Torvalds committed
549
550
		goto inhdr_error;

551
	iph = ip_hdr(skb);
552
	if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
Linus Torvalds's avatar
Linus Torvalds committed
553
554
555
		goto inhdr_error;

	len = ntohs(iph->tot_len);
556
	if (skb->len < len || len < 4 * iph->ihl)
Linus Torvalds's avatar
Linus Torvalds committed
557
558
		goto inhdr_error;

Herbert Xu's avatar
Herbert Xu committed
559
	pskb_trim_rcsum(skb, len);
Linus Torvalds's avatar
Linus Torvalds committed
560

561
	nf_bridge_put(skb->nf_bridge);
562
	if (!nf_bridge_alloc(skb))
Linus Torvalds's avatar
Linus Torvalds committed
563
		return NF_DROP;
564
565
	if (!setup_pre_routing(skb))
		return NF_DROP;
Linus Torvalds's avatar
Linus Torvalds committed
566
567
568
569
570
571
572
573
	store_orig_dstaddr(skb);

	NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL,
		br_nf_pre_routing_finish);

	return NF_STOLEN;

inhdr_error:
574
//      IP_INC_STATS_BH(IpInHdrErrors);
Linus Torvalds's avatar
Linus Torvalds committed
575
576
577
578
579
580
581
582
583
584
585
586
587
out:
	return NF_DROP;
}


/* PF_BRIDGE/LOCAL_IN ************************************************/
/* The packet is locally destined, which requires a real
 * dst_entry, so detach the fake one.  On the way up, the
 * packet would pass through PRE_ROUTING again (which already
 * took place when the packet entered the bridge), but we
 * register an IPv4 PRE_ROUTING 'sabotage' hook that will
 * prevent this from happening. */
static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb,
588
589
590
				   const struct net_device *in,
				   const struct net_device *out,
				   int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
{
	struct sk_buff *skb = *pskb;

	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
		dst_release(skb->dst);
		skb->dst = NULL;
	}

	return NF_ACCEPT;
}

/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct sk_buff *skb)
{
	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
	struct net_device *in;

608
	if (skb->protocol != htons(ETH_P_ARP) && !IS_VLAN_ARP(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
609
610
611
612
613
614
615
616
		in = nf_bridge->physindev;
		if (nf_bridge->mask & BRNF_PKT_TYPE) {
			skb->pkt_type = PACKET_OTHERHOST;
			nf_bridge->mask ^= BRNF_PKT_TYPE;
		}
	} else {
		in = *((struct net_device **)(skb->cb));
	}
617
	nf_bridge_push_encap_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
618
	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
619
		       skb->dev, br_forward_finish, 1);
Linus Torvalds's avatar
Linus Torvalds committed
620
621
622
623
624
625
626
627
628
	return 0;
}

/* This is the 'purely bridged' case.  For IP, we pass the packet to
 * netfilter with indev and outdev set to the bridge device,
 * but we are still able to filter on the 'real' indev/outdev
 * because of the physdev module. For ARP, indev and outdev are the
 * bridge ports. */
static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
629
630
631
				     const struct net_device *in,
				     const struct net_device *out,
				     int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
632
633
634
{
	struct sk_buff *skb = *pskb;
	struct nf_bridge_info *nf_bridge;
635
	struct net_device *parent;
Linus Torvalds's avatar
Linus Torvalds committed
636
637
638
639
640
	int pf;

	if (!skb->nf_bridge)
		return NF_ACCEPT;

641
642
643
644
	parent = bridge_parent(out);
	if (!parent)
		return NF_DROP;

645
646
	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
	    IS_PPPOE_IP(skb))
Linus Torvalds's avatar
Linus Torvalds committed
647
648
649
650
		pf = PF_INET;
	else
		pf = PF_INET6;

651
	nf_bridge_pull_encap_header(*pskb);
Linus Torvalds's avatar
Linus Torvalds committed
652
653
654
655
656
657
658
659
660
661
662

	nf_bridge = skb->nf_bridge;
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

	/* The physdev module checks on this */
	nf_bridge->mask |= BRNF_BRIDGED;
	nf_bridge->physoutdev = skb->dev;

663
664
	NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), parent,
		br_nf_forward_finish);
Linus Torvalds's avatar
Linus Torvalds committed
665
666
667
668
669

	return NF_STOLEN;
}

static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
670
671
672
				      const struct net_device *in,
				      const struct net_device *out,
				      int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
673
674
675
676
677
678
679
680
681
{
	struct sk_buff *skb = *pskb;
	struct net_device **d = (struct net_device **)(skb->cb);

#ifdef CONFIG_SYSCTL
	if (!brnf_call_arptables)
		return NF_ACCEPT;
#endif

682
	if (skb->protocol != htons(ETH_P_ARP)) {
683
		if (!IS_VLAN_ARP(skb))
Linus Torvalds's avatar
Linus Torvalds committed
684
			return NF_ACCEPT;
685
		nf_bridge_pull_encap_header(*pskb);
Linus Torvalds's avatar
Linus Torvalds committed
686
687
	}

688
	if (arp_hdr(skb)->ar_pln != 4) {
689
690
		if (IS_VLAN_ARP(skb))
			nf_bridge_push_encap_header(*pskb);
Linus Torvalds's avatar
Linus Torvalds committed
691
692
693
694
695
696
697
698
699
		return NF_ACCEPT;
	}
	*d = (struct net_device *)in;
	NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in,
		(struct net_device *)out, br_nf_forward_finish);

	return NF_STOLEN;
}

700
701
702
/* PF_BRIDGE/LOCAL_OUT ***********************************************
 *
 * This function sees both locally originated IP packets and forwarded
Linus Torvalds's avatar
Linus Torvalds committed
703
704
705
706
707
708
709
710
 * IP packets (in both cases the destination device is a bridge
 * device). It also sees bridged-and-DNAT'ed packets.
 *
 * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
 * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
 * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
 * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
 * will be executed.
711
 */
Linus Torvalds's avatar
Linus Torvalds committed
712
static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
713
714
715
				    const struct net_device *in,
				    const struct net_device *out,
				    int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
716
{
717
	struct net_device *realindev;
Linus Torvalds's avatar
Linus Torvalds committed
718
719
720
721
722
723
724
	struct sk_buff *skb = *pskb;
	struct nf_bridge_info *nf_bridge;

	if (!skb->nf_bridge)
		return NF_ACCEPT;

	nf_bridge = skb->nf_bridge;
725
726
	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
		return NF_ACCEPT;
Linus Torvalds's avatar
Linus Torvalds committed
727
728
729

	/* Bridged, take PF_BRIDGE/FORWARD.
	 * (see big note in front of br_nf_pre_routing_finish) */
730
731
	nf_bridge->physoutdev = skb->dev;
	realindev = nf_bridge->physindev;
Linus Torvalds's avatar
Linus Torvalds committed
732

733
734
735
	if (nf_bridge->mask & BRNF_PKT_TYPE) {
		skb->pkt_type = PACKET_OTHERHOST;
		nf_bridge->mask ^= BRNF_PKT_TYPE;
Linus Torvalds's avatar
Linus Torvalds committed
736
	}
737
	nf_bridge_push_encap_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
738

739
740
	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
		br_forward_finish);
Linus Torvalds's avatar
Linus Torvalds committed
741
742
743
	return NF_STOLEN;
}

744
745
746
747
static int br_nf_dev_queue_xmit(struct sk_buff *skb)
{
	if (skb->protocol == htons(ETH_P_IP) &&
	    skb->len > skb->dev->mtu &&
Herbert Xu's avatar
Herbert Xu committed
748
	    !skb_is_gso(skb))
749
750
751
752
		return ip_fragment(skb, br_dev_queue_push_xmit);
	else
		return br_dev_queue_push_xmit(skb);
}
Linus Torvalds's avatar
Linus Torvalds committed
753
754
755

/* PF_BRIDGE/POST_ROUTING ********************************************/
static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
756
757
758
				       const struct net_device *in,
				       const struct net_device *out,
				       int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
759
760
761
762
763
764
765
766
767
{
	struct sk_buff *skb = *pskb;
	struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge;
	struct net_device *realoutdev = bridge_parent(skb->dev);
	int pf;

#ifdef CONFIG_NETFILTER_DEBUG
	/* Be very paranoid. This probably won't happen anymore, but let's
	 * keep the check just to be sure... */
768
769
	if (skb_mac_header(skb) < skb->head ||
	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
Linus Torvalds's avatar
Linus Torvalds committed
770
		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
771
		       "bad mac.raw pointer.\n");
Linus Torvalds's avatar
Linus Torvalds committed
772
773
774
775
776
777
778
		goto print_error;
	}
#endif

	if (!nf_bridge)
		return NF_ACCEPT;

779
780
781
	if (!realoutdev)
		return NF_DROP;

782
783
	if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) ||
	    IS_PPPOE_IP(skb))
Linus Torvalds's avatar
Linus Torvalds committed
784
785
786
787
788
789
		pf = PF_INET;
	else
		pf = PF_INET6;

#ifdef CONFIG_NETFILTER_DEBUG
	if (skb->dst == NULL) {
790
		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
Linus Torvalds's avatar
Linus Torvalds committed
791
792
793
794
795
796
797
798
799
800
801
		goto print_error;
	}
#endif

	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
	 * about the value of skb->pkt_type. */
	if (skb->pkt_type == PACKET_OTHERHOST) {
		skb->pkt_type = PACKET_HOST;
		nf_bridge->mask |= BRNF_PKT_TYPE;
	}

802
	nf_bridge_pull_encap_header(skb);
Linus Torvalds's avatar
Linus Torvalds committed
803
804
805
806
807
808
809
	nf_bridge_save_header(skb);

#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
	if (nf_bridge->netoutdev)
		realoutdev = nf_bridge->netoutdev;
#endif
	NF_HOOK(pf, NF_IP_POST_ROUTING, skb, NULL, realoutdev,
810
		br_nf_dev_queue_xmit);
Linus Torvalds's avatar
Linus Torvalds committed
811
812
813
814
815
816
817

	return NF_STOLEN;

#ifdef CONFIG_NETFILTER_DEBUG
print_error:
	if (skb->dev != NULL) {
		printk("[%s]", skb->dev->name);
818
819
		if (realoutdev)
			printk("[%s]", realoutdev->name);
Linus Torvalds's avatar
Linus Torvalds committed
820
	}
821
	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
822
	       skb->data);
823
	dump_stack();
Linus Torvalds's avatar
Linus Torvalds committed
824
825
826
827
828
829
830
831
	return NF_ACCEPT;
#endif
}

/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
 * for the second time. */
static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb,
832
833
834
				   const struct net_device *in,
				   const struct net_device *out,
				   int (*okfn)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
835
836
837
838
839
840
841
842
843
844
845
846
847
848
{
	if ((*pskb)->nf_bridge &&
	    !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
		return NF_STOP;
	}

	return NF_ACCEPT;
}

/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
 * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
 * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
 * ip_refrag() can return NF_STOLEN. */
static struct nf_hook_ops br_nf_ops[] = {
849
850
851
852
	{ .hook = br_nf_pre_routing,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_PRE_ROUTING,
Linus Torvalds's avatar
Linus Torvalds committed
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_local_in,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_LOCAL_IN,
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_forward_ip,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_FORWARD,
	  .priority = NF_BR_PRI_BRNF - 1, },
	{ .hook = br_nf_forward_arp,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_FORWARD,
	  .priority = NF_BR_PRI_BRNF, },
	{ .hook = br_nf_local_out,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_LOCAL_OUT,
	  .priority = NF_BR_PRI_FIRST, },
	{ .hook = br_nf_post_routing,
	  .owner = THIS_MODULE,
	  .pf = PF_BRIDGE,
	  .hooknum = NF_BR_POST_ROUTING,
	  .priority = NF_BR_PRI_LAST, },
	{ .hook = ip_sabotage_in,
	  .owner = THIS_MODULE,
	  .pf = PF_INET,
	  .hooknum = NF_IP_PRE_ROUTING,
	  .priority = NF_IP_PRI_FIRST, },
	{ .hook = ip_sabotage_in,
	  .owner = THIS_MODULE,
	  .pf = PF_INET6,
	  .hooknum = NF_IP6_PRE_ROUTING,
	  .priority = NF_IP6_PRI_FIRST, },
};

#ifdef CONFIG_SYSCTL
static
893
894
int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
			    void __user * buffer, size_t * lenp, loff_t * ppos)
Linus Torvalds's avatar
Linus Torvalds committed
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
{
	int ret;

	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);

	if (write && *(int *)(ctl->data))
		*(int *)(ctl->data) = 1;
	return ret;
}

static ctl_table brnf_table[] = {
	{
		.ctl_name	= NET_BRIDGE_NF_CALL_ARPTABLES,
		.procname	= "bridge-nf-call-arptables",
		.data		= &brnf_call_arptables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.ctl_name	= NET_BRIDGE_NF_CALL_IPTABLES,
		.procname	= "bridge-nf-call-iptables",
		.data		= &brnf_call_iptables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.ctl_name	= NET_BRIDGE_NF_CALL_IP6TABLES,
		.procname	= "bridge-nf-call-ip6tables",
		.data		= &brnf_call_ip6tables,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
	},
	{
		.ctl_name	= NET_BRIDGE_NF_FILTER_VLAN_TAGGED,
		.procname	= "bridge-nf-filter-vlan-tagged",
		.data		= &brnf_filter_vlan_tagged,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
937
938
939
940
941
942
943
944
	},
	{
		.ctl_name	= NET_BRIDGE_NF_FILTER_PPPOE_TAGGED,
		.procname	= "bridge-nf-filter-pppoe-tagged",
		.data		= &brnf_filter_pppoe_tagged,
		.maxlen		= sizeof(int),
		.mode		= 0644,
		.proc_handler	= &brnf_sysctl_call_tables,
Linus Torvalds's avatar
Linus Torvalds committed
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
	},
	{ .ctl_name = 0 }
};

static ctl_table brnf_bridge_table[] = {
	{
		.ctl_name	= NET_BRIDGE,
		.procname	= "bridge",
		.mode		= 0555,
		.child		= brnf_table,
	},
	{ .ctl_name = 0 }
};

static ctl_table brnf_net_table[] = {
	{
		.ctl_name	= CTL_NET,
		.procname	= "net",
		.mode		= 0555,
		.child		= brnf_bridge_table,
	},
	{ .ctl_name = 0 }
};
#endif

970
int __init br_netfilter_init(void)
Linus Torvalds's avatar
Linus Torvalds committed
971
{
972
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
973

974
975
	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
	if (ret < 0)
Linus Torvalds's avatar
Linus Torvalds committed
976
977
		return ret;
#ifdef CONFIG_SYSCTL
978
	brnf_sysctl_header = register_sysctl_table(brnf_net_table);
Linus Torvalds's avatar
Linus Torvalds committed
979
	if (brnf_sysctl_header == NULL) {
980
981
		printk(KERN_WARNING
		       "br_netfilter: can't register to sysctl.\n");
982
983
		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
984
985
986
987
988
989
990
991
	}
#endif
	printk(KERN_NOTICE "Bridge firewalling registered\n");
	return 0;
}

void br_netfilter_fini(void)
{
992
	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
Linus Torvalds's avatar
Linus Torvalds committed
993
994
995
996
#ifdef CONFIG_SYSCTL
	unregister_sysctl_table(brnf_sysctl_header);
#endif
}