ip6_output.c 41.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
/*
 *	IPv6 output functions
3
 *	Linux INET6 implementation
Linus Torvalds's avatar
Linus Torvalds committed
4
5
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
Linus Torvalds's avatar
Linus Torvalds committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
 *
 *	Based on linux/net/ipv4/ip_output.c
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Changes:
 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
 *				extension headers are implemented.
 *				route changes now work.
 *				ip6_forward does not confuse sniffers.
 *				etc.
 *
 *      H. von Brand    :       Added missing #include <linux/string.h>
 *	Imran Patel	: 	frag id should be in NBO
 *      Kazunori MIYAZAWA @USAGI
 *			:       add ip6_append_data and related functions
 *				for datagram xmit
 */

#include <linux/errno.h>
Herbert Xu's avatar
Herbert Xu committed
30
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33
34
35
36
37
38
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
#include <linux/tcp.h>
#include <linux/route.h>
39
#include <linux/module.h>
40
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/checksum.h>
57
#include <linux/mroute6.h>
Linus Torvalds's avatar
Linus Torvalds committed
58

59
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
Linus Torvalds's avatar
Linus Torvalds committed
60

Herbert Xu's avatar
Herbert Xu committed
61
62
63
64
65
66
67
68
69
int __ip6_local_out(struct sk_buff *skb)
{
	int len;

	len = skb->len - sizeof(struct ipv6hdr);
	if (len > IPV6_MAXPLEN)
		len = 0;
	ipv6_hdr(skb)->payload_len = htons(len);

70
71
	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
		       skb_dst(skb)->dev, dst_output);
Herbert Xu's avatar
Herbert Xu committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
}

int ip6_local_out(struct sk_buff *skb)
{
	int err;

	err = __ip6_local_out(skb);
	if (likely(err == 1))
		err = dst_output(skb);

	return err;
}
EXPORT_SYMBOL_GPL(ip6_local_out);

Linus Torvalds's avatar
Linus Torvalds committed
86
87
88
/* dev_loopback_xmit for use with netfilter. */
static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
{
89
	skb_reset_mac_header(newskb);
90
	__skb_pull(newskb, skb_network_offset(newskb));
Linus Torvalds's avatar
Linus Torvalds committed
91
92
	newskb->pkt_type = PACKET_LOOPBACK;
	newskb->ip_summed = CHECKSUM_UNNECESSARY;
Eric Dumazet's avatar
Eric Dumazet committed
93
	WARN_ON(!skb_dst(newskb));
Linus Torvalds's avatar
Linus Torvalds committed
94

Eric Dumazet's avatar
Eric Dumazet committed
95
	netif_rx_ni(newskb);
Linus Torvalds's avatar
Linus Torvalds committed
96
97
98
	return 0;
}

99
static int ip6_finish_output2(struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
100
{
Eric Dumazet's avatar
Eric Dumazet committed
101
	struct dst_entry *dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
102
	struct net_device *dev = dst->dev;
103
	struct neighbour *neigh;
Linus Torvalds's avatar
Linus Torvalds committed
104
105
106
107

	skb->protocol = htons(ETH_P_IPV6);
	skb->dev = dev;

108
	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
Eric Dumazet's avatar
Eric Dumazet committed
109
		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
Linus Torvalds's avatar
Linus Torvalds committed
110

111
		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112
		    ((mroute6_socket(dev_net(dev), skb) &&
113
		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114
115
		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
					 &ipv6_hdr(skb)->saddr))) {
Linus Torvalds's avatar
Linus Torvalds committed
116
117
118
119
120
121
			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);

			/* Do not check for IFF_ALLMULTI; multicast routing
			   is not supported in any case.
			 */
			if (newskb)
122
123
				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
					newskb, NULL, newskb->dev,
Linus Torvalds's avatar
Linus Torvalds committed
124
125
					ip6_dev_loopback_xmit);

126
			if (ipv6_hdr(skb)->hop_limit == 0) {
127
128
				IP6_INC_STATS(dev_net(dev), idev,
					      IPSTATS_MIB_OUTDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
129
130
131
132
133
				kfree_skb(skb);
				return 0;
			}
		}

134
135
		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
				skb->len);
Linus Torvalds's avatar
Linus Torvalds committed
136
137
	}

138
	rcu_read_lock();
139
	neigh = dst_get_neighbour_noref(dst);
140
141
	if (neigh) {
		int res = neigh_output(neigh, skb);
142

143
144
145
146
		rcu_read_unlock();
		return res;
	}
	rcu_read_unlock();
147
148
149
150
	IP6_INC_STATS_BH(dev_net(dst->dev),
			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
	kfree_skb(skb);
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
151
152
}

153
154
155
156
157
158
159
160
161
static int ip6_finish_output(struct sk_buff *skb)
{
	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
	    dst_allfrag(skb_dst(skb)))
		return ip6_fragment(skb, ip6_finish_output2);
	else
		return ip6_finish_output2(skb);
}

Linus Torvalds's avatar
Linus Torvalds committed
162
163
int ip6_output(struct sk_buff *skb)
{
164
	struct net_device *dev = skb_dst(skb)->dev;
Eric Dumazet's avatar
Eric Dumazet committed
165
	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166
	if (unlikely(idev->cnf.disable_ipv6)) {
167
		IP6_INC_STATS(dev_net(dev), idev,
168
			      IPSTATS_MIB_OUTDISCARDS);
169
170
171
172
		kfree_skb(skb);
		return 0;
	}

173
174
175
	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
			    ip6_finish_output,
			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
Linus Torvalds's avatar
Linus Torvalds committed
176
177
178
}

/*
Shan Wei's avatar
Shan Wei committed
179
 *	xmit an sk_buff (used by TCP, SCTP and DCCP)
Linus Torvalds's avatar
Linus Torvalds committed
180
181
 */

182
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
183
	     struct ipv6_txoptions *opt, int tclass)
Linus Torvalds's avatar
Linus Torvalds committed
184
{
185
	struct net *net = sock_net(sk);
186
	struct ipv6_pinfo *np = inet6_sk(sk);
187
	struct in6_addr *first_hop = &fl6->daddr;
Eric Dumazet's avatar
Eric Dumazet committed
188
	struct dst_entry *dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
189
	struct ipv6hdr *hdr;
190
	u8  proto = fl6->flowi6_proto;
Linus Torvalds's avatar
Linus Torvalds committed
191
	int seg_len = skb->len;
192
	int hlimit = -1;
Linus Torvalds's avatar
Linus Torvalds committed
193
194
195
	u32 mtu;

	if (opt) {
196
		unsigned int head_room;
Linus Torvalds's avatar
Linus Torvalds committed
197
198
199
200
201
202
203
204
205
206

		/* First: exthdrs may take lots of space (~8K for now)
		   MAX_HEADER is not enough.
		 */
		head_room = opt->opt_nflen + opt->opt_flen;
		seg_len += head_room;
		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);

		if (skb_headroom(skb) < head_room) {
			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207
			if (skb2 == NULL) {
Eric Dumazet's avatar
Eric Dumazet committed
208
				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209
210
					      IPSTATS_MIB_OUTDISCARDS);
				kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
211
212
				return -ENOBUFS;
			}
213
			consume_skb(skb);
214
			skb = skb2;
215
			skb_set_owner_w(skb, sk);
Linus Torvalds's avatar
Linus Torvalds committed
216
217
218
219
220
221
222
		}
		if (opt->opt_flen)
			ipv6_push_frag_opts(skb, opt, &proto);
		if (opt->opt_nflen)
			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
	}

223
224
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
225
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
226
227
228
229

	/*
	 *	Fill in the IPv6 header
	 */
230
	if (np)
Linus Torvalds's avatar
Linus Torvalds committed
231
232
		hlimit = np->hop_limit;
	if (hlimit < 0)
233
		hlimit = ip6_dst_hoplimit(dst);
Linus Torvalds's avatar
Linus Torvalds committed
234

235
	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
236

Linus Torvalds's avatar
Linus Torvalds committed
237
238
239
240
	hdr->payload_len = htons(seg_len);
	hdr->nexthdr = proto;
	hdr->hop_limit = hlimit;

Alexey Dobriyan's avatar
Alexey Dobriyan committed
241
242
	hdr->saddr = fl6->saddr;
	hdr->daddr = *first_hop;
Linus Torvalds's avatar
Linus Torvalds committed
243

244
	skb->priority = sk->sk_priority;
245
	skb->mark = sk->sk_mark;
246

Linus Torvalds's avatar
Linus Torvalds committed
247
	mtu = dst_mtu(dst);
248
	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
Eric Dumazet's avatar
Eric Dumazet committed
249
		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
250
			      IPSTATS_MIB_OUT, skb->len);
251
252
		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
			       dst->dev, dst_output);
Linus Torvalds's avatar
Linus Torvalds committed
253
254
255
256
257
	}

	if (net_ratelimit())
		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
	skb->dev = dst->dev;
258
	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Eric Dumazet's avatar
Eric Dumazet committed
259
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
260
261
262
263
	kfree_skb(skb);
	return -EMSGSIZE;
}

264
265
EXPORT_SYMBOL(ip6_xmit);

Linus Torvalds's avatar
Linus Torvalds committed
266
267
268
269
270
271
272
273
/*
 *	To avoid extra problems ND packets are send through this
 *	routine. It's code duplication but I really want to avoid
 *	extra checks since ipv6_build_header is used by TCP (which
 *	is for us performance critical)
 */

int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274
	       const struct in6_addr *saddr, const struct in6_addr *daddr,
Linus Torvalds's avatar
Linus Torvalds committed
275
276
277
278
279
280
281
282
	       int proto, int len)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6hdr *hdr;

	skb->protocol = htons(ETH_P_IPV6);
	skb->dev = dev;

283
284
	skb_reset_network_header(skb);
	skb_put(skb, sizeof(struct ipv6hdr));
285
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
286

Al Viro's avatar
Al Viro committed
287
	*(__be32*)hdr = htonl(0x60000000);
Linus Torvalds's avatar
Linus Torvalds committed
288
289
290
291
292

	hdr->payload_len = htons(len);
	hdr->nexthdr = proto;
	hdr->hop_limit = np->hop_limit;

Alexey Dobriyan's avatar
Alexey Dobriyan committed
293
294
	hdr->saddr = *saddr;
	hdr->daddr = *daddr;
Linus Torvalds's avatar
Linus Torvalds committed
295
296
297
298
299
300
301
302
303
304
305
306

	return 0;
}

static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
{
	struct ip6_ra_chain *ra;
	struct sock *last = NULL;

	read_lock(&ip6_ra_lock);
	for (ra = ip6_ra_chain; ra; ra = ra->next) {
		struct sock *sk = ra->sk;
307
308
309
		if (sk && ra->sel == sel &&
		    (!sk->sk_bound_dev_if ||
		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
Linus Torvalds's avatar
Linus Torvalds committed
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
			if (last) {
				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				if (skb2)
					rawv6_rcv(last, skb2);
			}
			last = sk;
		}
	}

	if (last) {
		rawv6_rcv(last, skb);
		read_unlock(&ip6_ra_lock);
		return 1;
	}
	read_unlock(&ip6_ra_lock);
	return 0;
}

328
329
static int ip6_forward_proxy_check(struct sk_buff *skb)
{
330
	struct ipv6hdr *hdr = ipv6_hdr(skb);
331
	u8 nexthdr = hdr->nexthdr;
332
	__be16 frag_off;
333
334
335
	int offset;

	if (ipv6_ext_hdr(nexthdr)) {
336
		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
337
338
339
340
341
342
343
344
		if (offset < 0)
			return 0;
	} else
		offset = sizeof(struct ipv6hdr);

	if (nexthdr == IPPROTO_ICMPV6) {
		struct icmp6hdr *icmp6;

345
346
		if (!pskb_may_pull(skb, (skb_network_header(skb) +
					 offset + 1 - skb->data)))
347
348
			return 0;

349
		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

		switch (icmp6->icmp6_type) {
		case NDISC_ROUTER_SOLICITATION:
		case NDISC_ROUTER_ADVERTISEMENT:
		case NDISC_NEIGHBOUR_SOLICITATION:
		case NDISC_NEIGHBOUR_ADVERTISEMENT:
		case NDISC_REDIRECT:
			/* For reaction involving unicast neighbor discovery
			 * message destined to the proxied address, pass it to
			 * input function.
			 */
			return 1;
		default:
			break;
		}
	}

367
368
369
370
371
372
373
374
375
376
	/*
	 * The proxying router can't forward traffic sent to a link-local
	 * address, so signal the sender and discard the packet. This
	 * behavior is clarified by the MIPv6 specification.
	 */
	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
		dst_link_failure(skb);
		return -1;
	}

377
378
379
	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
380
381
382
383
384
385
386
static inline int ip6_forward_finish(struct sk_buff *skb)
{
	return dst_output(skb);
}

int ip6_forward(struct sk_buff *skb)
{
Eric Dumazet's avatar
Eric Dumazet committed
387
	struct dst_entry *dst = skb_dst(skb);
388
	struct ipv6hdr *hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
389
	struct inet6_skb_parm *opt = IP6CB(skb);
390
	struct net *net = dev_net(dst->dev);
391
	u32 mtu;
392

393
	if (net->ipv6.devconf_all->forwarding == 0)
Linus Torvalds's avatar
Linus Torvalds committed
394
395
		goto error;

396
397
398
	if (skb_warn_if_lro(skb))
		goto drop;

Linus Torvalds's avatar
Linus Torvalds committed
399
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
400
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
401
402
403
		goto drop;
	}

404
405
406
	if (skb->pkt_type != PACKET_HOST)
		goto drop;

407
	skb_forward_csum(skb);
Linus Torvalds's avatar
Linus Torvalds committed
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422

	/*
	 *	We DO NOT make any processing on
	 *	RA packets, pushing them to user level AS IS
	 *	without ane WARRANTY that application will be able
	 *	to interpret them. The reason is that we
	 *	cannot make anything clever here.
	 *
	 *	We are not end-node, so that if packet contains
	 *	AH/ESP, we cannot make anything.
	 *	Defragmentation also would be mistake, RA packets
	 *	cannot be fragmented, because there is no warranty
	 *	that different fragments will go along one path. --ANK
	 */
	if (opt->ra) {
423
		u8 *ptr = skb_network_header(skb) + opt->ra;
Linus Torvalds's avatar
Linus Torvalds committed
424
425
426
427
428
429
430
431
432
433
		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
			return 0;
	}

	/*
	 *	check and decrement ttl
	 */
	if (hdr->hop_limit <= 1) {
		/* Force OUTPUT device used as source address */
		skb->dev = dst->dev;
434
		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
435
436
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
437
438
439
440
441

		kfree_skb(skb);
		return -ETIMEDOUT;
	}

442
	/* XXX: idev->cnf.proxy_ndp? */
443
	if (net->ipv6.devconf_all->proxy_ndp &&
444
	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
445
446
		int proxied = ip6_forward_proxy_check(skb);
		if (proxied > 0)
447
			return ip6_input(skb);
448
		else if (proxied < 0) {
449
450
			IP6_INC_STATS(net, ip6_dst_idev(dst),
				      IPSTATS_MIB_INDISCARDS);
451
452
			goto drop;
		}
453
454
	}

Linus Torvalds's avatar
Linus Torvalds committed
455
	if (!xfrm6_route_forward(skb)) {
456
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
457
458
		goto drop;
	}
Eric Dumazet's avatar
Eric Dumazet committed
459
	dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
460
461
462

	/* IPv6 specs say nothing about it, but it is clear that we cannot
	   send redirects to source routed frames.
463
	   We don't send redirects to frames decapsulated from IPsec.
Linus Torvalds's avatar
Linus Torvalds committed
464
	 */
465
	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
466
467
468
469
470
471
472
473
474
		struct in6_addr *target = NULL;
		struct rt6_info *rt;

		/*
		 *	incoming and outgoing devices are the same
		 *	send a redirect.
		 */

		rt = (struct rt6_info *) dst;
475
476
		if (rt->rt6i_flags & RTF_GATEWAY)
			target = &rt->rt6i_gateway;
Linus Torvalds's avatar
Linus Torvalds committed
477
478
479
		else
			target = &hdr->daddr;

480
481
482
		if (!rt->rt6i_peer)
			rt6_bind_peer(rt, 1);

Linus Torvalds's avatar
Linus Torvalds committed
483
484
485
		/* Limit redirects both by destination (here)
		   and by source (inside ndisc_send_redirect)
		 */
486
		if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
487
			ndisc_send_redirect(skb, target);
488
489
490
	} else {
		int addrtype = ipv6_addr_type(&hdr->saddr);

Linus Torvalds's avatar
Linus Torvalds committed
491
		/* This check is security critical. */
492
493
		if (addrtype == IPV6_ADDR_ANY ||
		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
494
495
496
			goto error;
		if (addrtype & IPV6_ADDR_LINKLOCAL) {
			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
497
				    ICMPV6_NOT_NEIGHBOUR, 0);
498
499
			goto error;
		}
Linus Torvalds's avatar
Linus Torvalds committed
500
501
	}

502
503
504
505
	mtu = dst_mtu(dst);
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;

506
	if (skb->len > mtu && !skb_is_gso(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
507
508
		/* Again, force OUTPUT device used as source address */
		skb->dev = dst->dev;
509
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
510
511
512
513
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
514
515
516
517
518
		kfree_skb(skb);
		return -EMSGSIZE;
	}

	if (skb_cow(skb, dst->dev->hard_header_len)) {
519
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
520
521
522
		goto drop;
	}

523
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
524
525

	/* Mangling hops number delayed to point after skb COW */
526

Linus Torvalds's avatar
Linus Torvalds committed
527
528
	hdr->hop_limit--;

529
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
530
	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
531
		       ip6_forward_finish);
Linus Torvalds's avatar
Linus Torvalds committed
532
533

error:
534
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
535
536
537
538
539
540
541
542
543
544
drop:
	kfree_skb(skb);
	return -EINVAL;
}

static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
	to->pkt_type = from->pkt_type;
	to->priority = from->priority;
	to->protocol = from->protocol;
Eric Dumazet's avatar
Eric Dumazet committed
545
546
	skb_dst_drop(to);
	skb_dst_set(to, dst_clone(skb_dst(from)));
Linus Torvalds's avatar
Linus Torvalds committed
547
	to->dev = from->dev;
548
	to->mark = from->mark;
Linus Torvalds's avatar
Linus Torvalds committed
549
550
551
552

#ifdef CONFIG_NET_SCHED
	to->tc_index = from->tc_index;
#endif
553
	nf_copy(to, from);
554
555
556
557
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
	to->nf_trace = from->nf_trace;
#endif
558
	skb_copy_secmark(to, from);
Linus Torvalds's avatar
Linus Torvalds committed
559
560
561
562
563
}

int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
	u16 offset = sizeof(struct ipv6hdr);
564
565
	struct ipv6_opt_hdr *exthdr =
				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
566
	unsigned int packet_len = skb->tail - skb->network_header;
Linus Torvalds's avatar
Linus Torvalds committed
567
	int found_rhdr = 0;
568
	*nexthdr = &ipv6_hdr(skb)->nexthdr;
Linus Torvalds's avatar
Linus Torvalds committed
569
570
571
572
573
574

	while (offset + 1 <= packet_len) {

		switch (**nexthdr) {

		case NEXTHDR_HOP:
575
			break;
Linus Torvalds's avatar
Linus Torvalds committed
576
		case NEXTHDR_ROUTING:
577
578
			found_rhdr = 1;
			break;
Linus Torvalds's avatar
Linus Torvalds committed
579
		case NEXTHDR_DEST:
580
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
581
582
583
584
585
			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
				break;
#endif
			if (found_rhdr)
				return offset;
Linus Torvalds's avatar
Linus Torvalds committed
586
587
588
589
			break;
		default :
			return offset;
		}
590
591
592

		offset += ipv6_optlen(exthdr);
		*nexthdr = &exthdr->nexthdr;
593
594
		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
						 offset);
Linus Torvalds's avatar
Linus Torvalds committed
595
596
597
598
599
	}

	return offset;
}

600
601
602
603
604
void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
{
	static atomic_t ipv6_fragmentation_id;
	int old, new;

605
	if (rt && !(rt->dst.flags & DST_NOPEER)) {
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
		struct inet_peer *peer;

		if (!rt->rt6i_peer)
			rt6_bind_peer(rt, 1);
		peer = rt->rt6i_peer;
		if (peer) {
			fhdr->identification = htonl(inet_getid(peer, 0));
			return;
		}
	}
	do {
		old = atomic_read(&ipv6_fragmentation_id);
		new = old + 1;
		if (!new)
			new = 1;
	} while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
	fhdr->identification = htonl(new);
}

625
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
626
627
{
	struct sk_buff *frag;
Eric Dumazet's avatar
Eric Dumazet committed
628
	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
629
	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
Linus Torvalds's avatar
Linus Torvalds committed
630
631
632
	struct ipv6hdr *tmp_hdr;
	struct frag_hdr *fh;
	unsigned int mtu, hlen, left, len;
633
	int hroom, troom;
Al Viro's avatar
Al Viro committed
634
	__be32 frag_id = 0;
Linus Torvalds's avatar
Linus Torvalds committed
635
636
	int ptr, offset = 0, err=0;
	u8 *prevhdr, nexthdr = 0;
Eric Dumazet's avatar
Eric Dumazet committed
637
	struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds's avatar
Linus Torvalds committed
638
639
640
641

	hlen = ip6_find_1stfragopt(skb, &prevhdr);
	nexthdr = *prevhdr;

642
	mtu = ip6_skb_dst_mtu(skb);
643
644

	/* We must not fragment if the socket is set to force MTU discovery
645
	 * or if the skb it not generated by a local socket.
646
	 */
647
	if (!skb->local_df && skb->len > mtu) {
Eric Dumazet's avatar
Eric Dumazet committed
648
		skb->dev = skb_dst(skb)->dev;
649
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Eric Dumazet's avatar
Eric Dumazet committed
650
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
651
			      IPSTATS_MIB_FRAGFAILS);
652
653
654
655
		kfree_skb(skb);
		return -EMSGSIZE;
	}

656
657
658
659
660
	if (np && np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
	}
	mtu -= hlen + sizeof(struct frag_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
661

662
	if (skb_has_frag_list(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
663
		int first_len = skb_pagelen(skb);
664
		struct sk_buff *frag2;
Linus Torvalds's avatar
Linus Torvalds committed
665
666
667
668
669
670

		if (first_len - hlen > mtu ||
		    ((first_len - hlen) & 7) ||
		    skb_cloned(skb))
			goto slow_path;

671
		skb_walk_frags(skb, frag) {
Linus Torvalds's avatar
Linus Torvalds committed
672
673
674
675
			/* Correct geometry. */
			if (frag->len > mtu ||
			    ((frag->len & 7) && frag->next) ||
			    skb_headroom(frag) < hlen)
676
				goto slow_path_clean;
Linus Torvalds's avatar
Linus Torvalds committed
677
678
679

			/* Partially cloned skb? */
			if (skb_shared(frag))
680
				goto slow_path_clean;
681
682
683
684
685
686

			BUG_ON(frag->sk);
			if (skb->sk) {
				frag->sk = skb->sk;
				frag->destructor = sock_wfree;
			}
687
			skb->truesize -= frag->truesize;
Linus Torvalds's avatar
Linus Torvalds committed
688
689
690
691
692
		}

		err = 0;
		offset = 0;
		frag = skb_shinfo(skb)->frag_list;
693
		skb_frag_list_init(skb);
Linus Torvalds's avatar
Linus Torvalds committed
694
695
		/* BUILD HEADER */

696
		*prevhdr = NEXTHDR_FRAGMENT;
697
		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
698
		if (!tmp_hdr) {
Eric Dumazet's avatar
Eric Dumazet committed
699
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
700
				      IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
701
702
703
704
705
			return -ENOMEM;
		}

		__skb_pull(skb, hlen);
		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
706
707
		__skb_push(skb, hlen);
		skb_reset_network_header(skb);
708
		memcpy(skb_network_header(skb), tmp_hdr, hlen);
Linus Torvalds's avatar
Linus Torvalds committed
709

710
		ipv6_select_ident(fh, rt);
Linus Torvalds's avatar
Linus Torvalds committed
711
712
713
714
715
716
717
718
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
		fh->frag_off = htons(IP6_MF);
		frag_id = fh->identification;

		first_len = skb_pagelen(skb);
		skb->data_len = first_len - skb_headlen(skb);
		skb->len = first_len;
719
720
		ipv6_hdr(skb)->payload_len = htons(first_len -
						   sizeof(struct ipv6hdr));
721

722
		dst_hold(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
723
724
725
726
727
728

		for (;;) {
			/* Prepare header of the next frame,
			 * before previous one went down. */
			if (frag) {
				frag->ip_summed = CHECKSUM_NONE;
729
				skb_reset_transport_header(frag);
Linus Torvalds's avatar
Linus Torvalds committed
730
				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731
732
				__skb_push(frag, hlen);
				skb_reset_network_header(frag);
733
734
				memcpy(skb_network_header(frag), tmp_hdr,
				       hlen);
Linus Torvalds's avatar
Linus Torvalds committed
735
736
737
738
739
740
741
				offset += skb->len - hlen - sizeof(struct frag_hdr);
				fh->nexthdr = nexthdr;
				fh->reserved = 0;
				fh->frag_off = htons(offset);
				if (frag->next != NULL)
					fh->frag_off |= htons(IP6_MF);
				fh->identification = frag_id;
742
743
744
				ipv6_hdr(frag)->payload_len =
						htons(frag->len -
						      sizeof(struct ipv6hdr));
Linus Torvalds's avatar
Linus Torvalds committed
745
746
				ip6_copy_metadata(frag, skb);
			}
747

Linus Torvalds's avatar
Linus Torvalds committed
748
			err = output(skb);
749
			if(!err)
750
				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
751
					      IPSTATS_MIB_FRAGCREATES);
752

Linus Torvalds's avatar
Linus Torvalds committed
753
754
755
756
757
758
759
760
			if (err || !frag)
				break;

			skb = frag;
			frag = skb->next;
			skb->next = NULL;
		}

Jesper Juhl's avatar
Jesper Juhl committed
761
		kfree(tmp_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
762
763

		if (err == 0) {
764
			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
765
				      IPSTATS_MIB_FRAGOKS);
766
			dst_release(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
767
768
769
770
771
772
773
774
775
			return 0;
		}

		while (frag) {
			skb = frag->next;
			kfree_skb(frag);
			frag = skb;
		}

776
		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
777
			      IPSTATS_MIB_FRAGFAILS);
778
		dst_release(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
779
		return err;
780
781
782
783
784
785
786
787
788

slow_path_clean:
		skb_walk_frags(skb, frag2) {
			if (frag2 == frag)
				break;
			frag2->sk = NULL;
			frag2->destructor = NULL;
			skb->truesize += frag2->truesize;
		}
Linus Torvalds's avatar
Linus Torvalds committed
789
790
791
792
793
794
795
796
797
798
799
	}

slow_path:
	left = skb->len - hlen;		/* Space per frame */
	ptr = hlen;			/* Where to start from */

	/*
	 *	Fragment the datagram.
	 */

	*prevhdr = NEXTHDR_FRAGMENT;
800
801
	hroom = LL_RESERVED_SPACE(rt->dst.dev);
	troom = rt->dst.dev->needed_tailroom;
Linus Torvalds's avatar
Linus Torvalds committed
802
803
804
805
806
807
808
809
810

	/*
	 *	Keep copying data until we run out.
	 */
	while(left > 0)	{
		len = left;
		/* IF: it doesn't fit, use 'mtu' - the data space left */
		if (len > mtu)
			len = mtu;
Lucas De Marchi's avatar
Lucas De Marchi committed
811
		/* IF: we are not sending up to and including the packet end
Linus Torvalds's avatar
Linus Torvalds committed
812
813
814
815
816
817
818
819
		   then align the next start on an eight byte boundary */
		if (len < left)	{
			len &= ~7;
		}
		/*
		 *	Allocate buffer.
		 */

820
821
		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
				      hroom + troom, GFP_ATOMIC)) == NULL) {
822
			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
Eric Dumazet's avatar
Eric Dumazet committed
823
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
824
				      IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
825
826
827
828
829
830
831
832
833
			err = -ENOMEM;
			goto fail;
		}

		/*
		 *	Set up data on packet
		 */

		ip6_copy_metadata(frag, skb);
834
		skb_reserve(frag, hroom);
Linus Torvalds's avatar
Linus Torvalds committed
835
		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
836
		skb_reset_network_header(frag);
837
		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
838
839
		frag->transport_header = (frag->network_header + hlen +
					  sizeof(struct frag_hdr));
Linus Torvalds's avatar
Linus Torvalds committed
840
841
842
843
844
845
846
847
848
849
850

		/*
		 *	Charge the memory for the fragment to any owner
		 *	it might possess
		 */
		if (skb->sk)
			skb_set_owner_w(frag, skb->sk);

		/*
		 *	Copy the packet header into the new buffer.
		 */
851
		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
Linus Torvalds's avatar
Linus Torvalds committed
852
853
854
855
856
857

		/*
		 *	Build fragment header.
		 */
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
858
		if (!frag_id) {
859
			ipv6_select_ident(fh, rt);
Linus Torvalds's avatar
Linus Torvalds committed
860
861
862
863
864
865
866
			frag_id = fh->identification;
		} else
			fh->identification = frag_id;

		/*
		 *	Copy a block of the IP datagram.
		 */
867
		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
Linus Torvalds's avatar
Linus Torvalds committed
868
869
870
871
872
873
			BUG();
		left -= len;

		fh->frag_off = htons(offset);
		if (left > 0)
			fh->frag_off |= htons(IP6_MF);
874
875
		ipv6_hdr(frag)->payload_len = htons(frag->len -
						    sizeof(struct ipv6hdr));
Linus Torvalds's avatar
Linus Torvalds committed
876
877
878
879
880
881
882
883
884
885

		ptr += len;
		offset += len;

		/*
		 *	Put this fragment into the sending queue.
		 */
		err = output(frag);
		if (err)
			goto fail;
886

Eric Dumazet's avatar
Eric Dumazet committed
887
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
888
			      IPSTATS_MIB_FRAGCREATES);
Linus Torvalds's avatar
Linus Torvalds committed
889
	}
Eric Dumazet's avatar
Eric Dumazet committed
890
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
891
		      IPSTATS_MIB_FRAGOKS);
892
	consume_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
893
894
895
	return err;

fail:
Eric Dumazet's avatar
Eric Dumazet committed
896
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
897
		      IPSTATS_MIB_FRAGFAILS);
898
	kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
899
900
901
	return err;
}

902
903
904
static inline int ip6_rt_check(const struct rt6key *rt_key,
			       const struct in6_addr *fl_addr,
			       const struct in6_addr *addr_cache)
905
{
Eric Dumazet's avatar
Eric Dumazet committed
906
907
	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
908
909
}

910
911
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
					  struct dst_entry *dst,
912
					  const struct flowi6 *fl6)
Linus Torvalds's avatar
Linus Torvalds committed
913
{
914
915
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct rt6_info *rt = (struct rt6_info *)dst;
Linus Torvalds's avatar
Linus Torvalds committed
916

917
918
919
920
921
922
923
924
	if (!dst)
		goto out;

	/* Yes, checking route validity in not connected
	 * case is not very simple. Take into account,
	 * that we do not support routing by source, TOS,
	 * and MSG_DONTROUTE 		--ANK (980726)
	 *
925
926
	 * 1. ip6_rt_check(): If route was host route,
	 *    check that cached destination is current.
927
928
929
930
931
932
933
934
935
936
	 *    If it is network route, we still may
	 *    check its validity using saved pointer
	 *    to the last used address: daddr_cache.
	 *    We do not want to save whole address now,
	 *    (because main consumer of this service
	 *    is tcp, which has not this problem),
	 *    so that the last trick works only on connected
	 *    sockets.
	 * 2. oif also should be the same.
	 */
937
	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
938
#ifdef CONFIG_IPV6_SUBTREES
939
	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
940
#endif
941
	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
942
943
		dst_release(dst);
		dst = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
944
945
	}

946
947
948
949
950
out:
	return dst;
}

static int ip6_dst_lookup_tail(struct sock *sk,
951
			       struct dst_entry **dst, struct flowi6 *fl6)
952
{
953
	struct net *net = sock_net(sk);
954
955
956
957
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
	struct neighbour *n;
#endif
	int err;
958

Linus Torvalds's avatar
Linus Torvalds committed
959
	if (*dst == NULL)
960
		*dst = ip6_route_output(net, sk, fl6);
Linus Torvalds's avatar
Linus Torvalds committed
961
962
963
964

	if ((err = (*dst)->error))
		goto out_err_release;

965
	if (ipv6_addr_any(&fl6->saddr)) {
966
967
968
969
		struct rt6_info *rt = (struct rt6_info *) *dst;
		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
					  sk ? inet6_sk(sk)->srcprefs : 0,
					  &fl6->saddr);
970
		if (err)
Linus Torvalds's avatar
Linus Torvalds committed
971
972
973
			goto out_err_release;
	}

974
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
975
976
977
978
979
980
981
982
	/*
	 * Here if the dst entry we've looked up
	 * has a neighbour entry that is in the INCOMPLETE
	 * state and the src address from the flow is
	 * marked as OPTIMISTIC, we release the found
	 * dst entry and replace it instead with the
	 * dst entry of the nexthop router
	 */
983
	rcu_read_lock();
984
	n = dst_get_neighbour_noref(*dst);
985
	if (n && !(n->nud_state & NUD_VALID)) {
986
		struct inet6_ifaddr *ifp;
987
		struct flowi6 fl_gw6;
988
989
		int redirect;

990
		rcu_read_unlock();
991
		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
992
993
994
995
996
997
998
999
1000
				      (*dst)->dev, 1);

		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
		if (ifp)
			in6_ifa_put(ifp);

		if (redirect) {
			/*
			 * We need to get the dst entry for the