ip6_output.c 38.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
/*
 *	IPv6 output functions
3
 *	Linux INET6 implementation
Linus Torvalds's avatar
Linus Torvalds committed
4
5
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
Linus Torvalds's avatar
Linus Torvalds committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
 *
 *	Based on linux/net/ipv4/ip_output.c
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Changes:
 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
 *				extension headers are implemented.
 *				route changes now work.
 *				ip6_forward does not confuse sniffers.
 *				etc.
 *
 *      H. von Brand    :       Added missing #include <linux/string.h>
 *	Imran Patel	: 	frag id should be in NBO
 *      Kazunori MIYAZAWA @USAGI
 *			:       add ip6_append_data and related functions
 *				for datagram xmit
 */

#include <linux/errno.h>
Herbert Xu's avatar
Herbert Xu committed
30
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
31
32
33
34
35
36
37
38
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
#include <linux/tcp.h>
#include <linux/route.h>
39
#include <linux/module.h>
40
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/checksum.h>
57
#include <linux/mroute6.h>
Linus Torvalds's avatar
Linus Torvalds committed
58

59
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
Linus Torvalds's avatar
Linus Torvalds committed
60

Herbert Xu's avatar
Herbert Xu committed
61
62
63
64
65
66
67
68
69
int __ip6_local_out(struct sk_buff *skb)
{
	int len;

	len = skb->len - sizeof(struct ipv6hdr);
	if (len > IPV6_MAXPLEN)
		len = 0;
	ipv6_hdr(skb)->payload_len = htons(len);

70
71
	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
		       skb_dst(skb)->dev, dst_output);
Herbert Xu's avatar
Herbert Xu committed
72
73
74
75
76
77
78
79
80
81
82
83
84
85
}

int ip6_local_out(struct sk_buff *skb)
{
	int err;

	err = __ip6_local_out(skb);
	if (likely(err == 1))
		err = dst_output(skb);

	return err;
}
EXPORT_SYMBOL_GPL(ip6_local_out);

Linus Torvalds's avatar
Linus Torvalds committed
86
87
88
/* dev_loopback_xmit for use with netfilter. */
static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
{
89
	skb_reset_mac_header(newskb);
90
	__skb_pull(newskb, skb_network_offset(newskb));
Linus Torvalds's avatar
Linus Torvalds committed
91
92
	newskb->pkt_type = PACKET_LOOPBACK;
	newskb->ip_summed = CHECKSUM_UNNECESSARY;
Eric Dumazet's avatar
Eric Dumazet committed
93
	WARN_ON(!skb_dst(newskb));
Linus Torvalds's avatar
Linus Torvalds committed
94

Eric Dumazet's avatar
Eric Dumazet committed
95
	netif_rx_ni(newskb);
Linus Torvalds's avatar
Linus Torvalds committed
96
97
98
	return 0;
}

99
static int ip6_finish_output2(struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
100
{
Eric Dumazet's avatar
Eric Dumazet committed
101
	struct dst_entry *dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
102
103
104
105
106
	struct net_device *dev = dst->dev;

	skb->protocol = htons(ETH_P_IPV6);
	skb->dev = dev;

107
	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
Eric Dumazet's avatar
Eric Dumazet committed
108
		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
Linus Torvalds's avatar
Linus Torvalds committed
109

110
		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111
		    ((mroute6_socket(dev_net(dev), skb) &&
112
		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113
114
		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
					 &ipv6_hdr(skb)->saddr))) {
Linus Torvalds's avatar
Linus Torvalds committed
115
116
117
118
119
120
			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);

			/* Do not check for IFF_ALLMULTI; multicast routing
			   is not supported in any case.
			 */
			if (newskb)
121
122
				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
					newskb, NULL, newskb->dev,
Linus Torvalds's avatar
Linus Torvalds committed
123
124
					ip6_dev_loopback_xmit);

125
			if (ipv6_hdr(skb)->hop_limit == 0) {
126
127
				IP6_INC_STATS(dev_net(dev), idev,
					      IPSTATS_MIB_OUTDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
128
129
130
131
132
				kfree_skb(skb);
				return 0;
			}
		}

133
134
		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
				skb->len);
Linus Torvalds's avatar
Linus Torvalds committed
135
136
	}

137
138
139
140
141
142
143
144
145
	if (dst->hh)
		return neigh_hh_output(dst->hh, skb);
	else if (dst->neighbour)
		return dst->neighbour->output(skb);

	IP6_INC_STATS_BH(dev_net(dst->dev),
			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
	kfree_skb(skb);
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
146
147
}

148
149
150
151
152
153
154
155
156
static int ip6_finish_output(struct sk_buff *skb)
{
	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
	    dst_allfrag(skb_dst(skb)))
		return ip6_fragment(skb, ip6_finish_output2);
	else
		return ip6_finish_output2(skb);
}

Linus Torvalds's avatar
Linus Torvalds committed
157
158
int ip6_output(struct sk_buff *skb)
{
159
	struct net_device *dev = skb_dst(skb)->dev;
Eric Dumazet's avatar
Eric Dumazet committed
160
	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161
	if (unlikely(idev->cnf.disable_ipv6)) {
162
		IP6_INC_STATS(dev_net(dev), idev,
163
			      IPSTATS_MIB_OUTDISCARDS);
164
165
166
167
		kfree_skb(skb);
		return 0;
	}

168
169
170
	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
			    ip6_finish_output,
			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
Linus Torvalds's avatar
Linus Torvalds committed
171
172
173
}

/*
Shan Wei's avatar
Shan Wei committed
174
 *	xmit an sk_buff (used by TCP, SCTP and DCCP)
Linus Torvalds's avatar
Linus Torvalds committed
175
176
177
 */

int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
178
	     struct ipv6_txoptions *opt)
Linus Torvalds's avatar
Linus Torvalds committed
179
{
180
	struct net *net = sock_net(sk);
181
	struct ipv6_pinfo *np = inet6_sk(sk);
Linus Torvalds's avatar
Linus Torvalds committed
182
	struct in6_addr *first_hop = &fl->fl6_dst;
Eric Dumazet's avatar
Eric Dumazet committed
183
	struct dst_entry *dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
184
185
186
	struct ipv6hdr *hdr;
	u8  proto = fl->proto;
	int seg_len = skb->len;
187
188
	int hlimit = -1;
	int tclass = 0;
Linus Torvalds's avatar
Linus Torvalds committed
189
190
191
	u32 mtu;

	if (opt) {
192
		unsigned int head_room;
Linus Torvalds's avatar
Linus Torvalds committed
193
194
195
196
197
198
199
200
201
202

		/* First: exthdrs may take lots of space (~8K for now)
		   MAX_HEADER is not enough.
		 */
		head_room = opt->opt_nflen + opt->opt_flen;
		seg_len += head_room;
		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);

		if (skb_headroom(skb) < head_room) {
			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203
			if (skb2 == NULL) {
Eric Dumazet's avatar
Eric Dumazet committed
204
				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205
206
					      IPSTATS_MIB_OUTDISCARDS);
				kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
207
208
				return -ENOBUFS;
			}
209
210
			kfree_skb(skb);
			skb = skb2;
211
			skb_set_owner_w(skb, sk);
Linus Torvalds's avatar
Linus Torvalds committed
212
213
214
215
216
217
218
		}
		if (opt->opt_flen)
			ipv6_push_frag_opts(skb, opt, &proto);
		if (opt->opt_nflen)
			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
	}

219
220
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
221
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
222
223
224
225

	/*
	 *	Fill in the IPv6 header
	 */
226
227
	if (np) {
		tclass = np->tclass;
Linus Torvalds's avatar
Linus Torvalds committed
228
		hlimit = np->hop_limit;
229
	}
Linus Torvalds's avatar
Linus Torvalds committed
230
	if (hlimit < 0)
231
		hlimit = ip6_dst_hoplimit(dst);
Linus Torvalds's avatar
Linus Torvalds committed
232

Al Viro's avatar
Al Viro committed
233
	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
234

Linus Torvalds's avatar
Linus Torvalds committed
235
236
237
238
239
240
241
	hdr->payload_len = htons(seg_len);
	hdr->nexthdr = proto;
	hdr->hop_limit = hlimit;

	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
	ipv6_addr_copy(&hdr->daddr, first_hop);

242
	skb->priority = sk->sk_priority;
243
	skb->mark = sk->sk_mark;
244

Linus Torvalds's avatar
Linus Torvalds committed
245
	mtu = dst_mtu(dst);
246
	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
Eric Dumazet's avatar
Eric Dumazet committed
247
		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248
			      IPSTATS_MIB_OUT, skb->len);
249
250
		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
			       dst->dev, dst_output);
Linus Torvalds's avatar
Linus Torvalds committed
251
252
253
254
255
	}

	if (net_ratelimit())
		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
	skb->dev = dst->dev;
256
	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Eric Dumazet's avatar
Eric Dumazet committed
257
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
258
259
260
261
	kfree_skb(skb);
	return -EMSGSIZE;
}

262
263
EXPORT_SYMBOL(ip6_xmit);

Linus Torvalds's avatar
Linus Torvalds committed
264
265
266
267
268
269
270
271
/*
 *	To avoid extra problems ND packets are send through this
 *	routine. It's code duplication but I really want to avoid
 *	extra checks since ipv6_build_header is used by TCP (which
 *	is for us performance critical)
 */

int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272
	       const struct in6_addr *saddr, const struct in6_addr *daddr,
Linus Torvalds's avatar
Linus Torvalds committed
273
274
275
276
277
278
279
280
281
282
283
	       int proto, int len)
{
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct ipv6hdr *hdr;
	int totlen;

	skb->protocol = htons(ETH_P_IPV6);
	skb->dev = dev;

	totlen = len + sizeof(struct ipv6hdr);

284
285
	skb_reset_network_header(skb);
	skb_put(skb, sizeof(struct ipv6hdr));
286
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
287

Al Viro's avatar
Al Viro committed
288
	*(__be32*)hdr = htonl(0x60000000);
Linus Torvalds's avatar
Linus Torvalds committed
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307

	hdr->payload_len = htons(len);
	hdr->nexthdr = proto;
	hdr->hop_limit = np->hop_limit;

	ipv6_addr_copy(&hdr->saddr, saddr);
	ipv6_addr_copy(&hdr->daddr, daddr);

	return 0;
}

static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
{
	struct ip6_ra_chain *ra;
	struct sock *last = NULL;

	read_lock(&ip6_ra_lock);
	for (ra = ip6_ra_chain; ra; ra = ra->next) {
		struct sock *sk = ra->sk;
308
309
310
		if (sk && ra->sel == sel &&
		    (!sk->sk_bound_dev_if ||
		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
Linus Torvalds's avatar
Linus Torvalds committed
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
			if (last) {
				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				if (skb2)
					rawv6_rcv(last, skb2);
			}
			last = sk;
		}
	}

	if (last) {
		rawv6_rcv(last, skb);
		read_unlock(&ip6_ra_lock);
		return 1;
	}
	read_unlock(&ip6_ra_lock);
	return 0;
}

329
330
static int ip6_forward_proxy_check(struct sk_buff *skb)
{
331
	struct ipv6hdr *hdr = ipv6_hdr(skb);
332
333
334
335
336
337
338
339
340
341
342
343
344
	u8 nexthdr = hdr->nexthdr;
	int offset;

	if (ipv6_ext_hdr(nexthdr)) {
		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
		if (offset < 0)
			return 0;
	} else
		offset = sizeof(struct ipv6hdr);

	if (nexthdr == IPPROTO_ICMPV6) {
		struct icmp6hdr *icmp6;

345
346
		if (!pskb_may_pull(skb, (skb_network_header(skb) +
					 offset + 1 - skb->data)))
347
348
			return 0;

349
		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366

		switch (icmp6->icmp6_type) {
		case NDISC_ROUTER_SOLICITATION:
		case NDISC_ROUTER_ADVERTISEMENT:
		case NDISC_NEIGHBOUR_SOLICITATION:
		case NDISC_NEIGHBOUR_ADVERTISEMENT:
		case NDISC_REDIRECT:
			/* For reaction involving unicast neighbor discovery
			 * message destined to the proxied address, pass it to
			 * input function.
			 */
			return 1;
		default:
			break;
		}
	}

367
368
369
370
371
372
373
374
375
376
	/*
	 * The proxying router can't forward traffic sent to a link-local
	 * address, so signal the sender and discard the packet. This
	 * behavior is clarified by the MIPv6 specification.
	 */
	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
		dst_link_failure(skb);
		return -1;
	}

377
378
379
	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
380
381
382
383
384
385
386
static inline int ip6_forward_finish(struct sk_buff *skb)
{
	return dst_output(skb);
}

int ip6_forward(struct sk_buff *skb)
{
Eric Dumazet's avatar
Eric Dumazet committed
387
	struct dst_entry *dst = skb_dst(skb);
388
	struct ipv6hdr *hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
389
	struct inet6_skb_parm *opt = IP6CB(skb);
390
	struct net *net = dev_net(dst->dev);
391
	u32 mtu;
392

393
	if (net->ipv6.devconf_all->forwarding == 0)
Linus Torvalds's avatar
Linus Torvalds committed
394
395
		goto error;

396
397
398
	if (skb_warn_if_lro(skb))
		goto drop;

Linus Torvalds's avatar
Linus Torvalds committed
399
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
400
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
401
402
403
		goto drop;
	}

404
405
406
	if (skb->pkt_type != PACKET_HOST)
		goto drop;

407
	skb_forward_csum(skb);
Linus Torvalds's avatar
Linus Torvalds committed
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422

	/*
	 *	We DO NOT make any processing on
	 *	RA packets, pushing them to user level AS IS
	 *	without ane WARRANTY that application will be able
	 *	to interpret them. The reason is that we
	 *	cannot make anything clever here.
	 *
	 *	We are not end-node, so that if packet contains
	 *	AH/ESP, we cannot make anything.
	 *	Defragmentation also would be mistake, RA packets
	 *	cannot be fragmented, because there is no warranty
	 *	that different fragments will go along one path. --ANK
	 */
	if (opt->ra) {
423
		u8 *ptr = skb_network_header(skb) + opt->ra;
Linus Torvalds's avatar
Linus Torvalds committed
424
425
426
427
428
429
430
431
432
433
		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
			return 0;
	}

	/*
	 *	check and decrement ttl
	 */
	if (hdr->hop_limit <= 1) {
		/* Force OUTPUT device used as source address */
		skb->dev = dst->dev;
434
		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
435
436
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
437
438
439
440
441

		kfree_skb(skb);
		return -ETIMEDOUT;
	}

442
	/* XXX: idev->cnf.proxy_ndp? */
443
	if (net->ipv6.devconf_all->proxy_ndp &&
444
	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
445
446
		int proxied = ip6_forward_proxy_check(skb);
		if (proxied > 0)
447
			return ip6_input(skb);
448
		else if (proxied < 0) {
449
450
			IP6_INC_STATS(net, ip6_dst_idev(dst),
				      IPSTATS_MIB_INDISCARDS);
451
452
			goto drop;
		}
453
454
	}

Linus Torvalds's avatar
Linus Torvalds committed
455
	if (!xfrm6_route_forward(skb)) {
456
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
457
458
		goto drop;
	}
Eric Dumazet's avatar
Eric Dumazet committed
459
	dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
460
461
462

	/* IPv6 specs say nothing about it, but it is clear that we cannot
	   send redirects to source routed frames.
463
	   We don't send redirects to frames decapsulated from IPsec.
Linus Torvalds's avatar
Linus Torvalds committed
464
	 */
465
	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
466
	    !skb_sec_path(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
		struct in6_addr *target = NULL;
		struct rt6_info *rt;
		struct neighbour *n = dst->neighbour;

		/*
		 *	incoming and outgoing devices are the same
		 *	send a redirect.
		 */

		rt = (struct rt6_info *) dst;
		if ((rt->rt6i_flags & RTF_GATEWAY))
			target = (struct in6_addr*)&n->primary_key;
		else
			target = &hdr->daddr;

		/* Limit redirects both by destination (here)
		   and by source (inside ndisc_send_redirect)
		 */
		if (xrlim_allow(dst, 1*HZ))
			ndisc_send_redirect(skb, n, target);
487
488
489
	} else {
		int addrtype = ipv6_addr_type(&hdr->saddr);

Linus Torvalds's avatar
Linus Torvalds committed
490
		/* This check is security critical. */
491
492
		if (addrtype == IPV6_ADDR_ANY ||
		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493
494
495
			goto error;
		if (addrtype & IPV6_ADDR_LINKLOCAL) {
			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496
				    ICMPV6_NOT_NEIGHBOUR, 0);
497
498
			goto error;
		}
Linus Torvalds's avatar
Linus Torvalds committed
499
500
	}

501
502
503
504
	mtu = dst_mtu(dst);
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;

505
	if (skb->len > mtu && !skb_is_gso(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
506
507
		/* Again, force OUTPUT device used as source address */
		skb->dev = dst->dev;
508
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509
510
511
512
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
		IP6_INC_STATS_BH(net,
				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
513
514
515
516
517
		kfree_skb(skb);
		return -EMSGSIZE;
	}

	if (skb_cow(skb, dst->dev->hard_header_len)) {
518
		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
519
520
521
		goto drop;
	}

522
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
523
524

	/* Mangling hops number delayed to point after skb COW */
525

Linus Torvalds's avatar
Linus Torvalds committed
526
527
	hdr->hop_limit--;

528
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529
	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530
		       ip6_forward_finish);
Linus Torvalds's avatar
Linus Torvalds committed
531
532

error:
533
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
534
535
536
537
538
539
540
541
542
543
drop:
	kfree_skb(skb);
	return -EINVAL;
}

static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
	to->pkt_type = from->pkt_type;
	to->priority = from->priority;
	to->protocol = from->protocol;
Eric Dumazet's avatar
Eric Dumazet committed
544
545
	skb_dst_drop(to);
	skb_dst_set(to, dst_clone(skb_dst(from)));
Linus Torvalds's avatar
Linus Torvalds committed
546
	to->dev = from->dev;
547
	to->mark = from->mark;
Linus Torvalds's avatar
Linus Torvalds committed
548
549
550
551

#ifdef CONFIG_NET_SCHED
	to->tc_index = from->tc_index;
#endif
552
	nf_copy(to, from);
553
554
555
556
#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
	to->nf_trace = from->nf_trace;
#endif
557
	skb_copy_secmark(to, from);
Linus Torvalds's avatar
Linus Torvalds committed
558
559
560
561
562
}

int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
{
	u16 offset = sizeof(struct ipv6hdr);
563
564
	struct ipv6_opt_hdr *exthdr =
				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565
	unsigned int packet_len = skb->tail - skb->network_header;
Linus Torvalds's avatar
Linus Torvalds committed
566
	int found_rhdr = 0;
567
	*nexthdr = &ipv6_hdr(skb)->nexthdr;
Linus Torvalds's avatar
Linus Torvalds committed
568
569
570
571
572
573

	while (offset + 1 <= packet_len) {

		switch (**nexthdr) {

		case NEXTHDR_HOP:
574
			break;
Linus Torvalds's avatar
Linus Torvalds committed
575
		case NEXTHDR_ROUTING:
576
577
			found_rhdr = 1;
			break;
Linus Torvalds's avatar
Linus Torvalds committed
578
		case NEXTHDR_DEST:
579
#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580
581
582
583
584
			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
				break;
#endif
			if (found_rhdr)
				return offset;
Linus Torvalds's avatar
Linus Torvalds committed
585
586
587
588
			break;
		default :
			return offset;
		}
589
590
591

		offset += ipv6_optlen(exthdr);
		*nexthdr = &exthdr->nexthdr;
592
593
		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
						 offset);
Linus Torvalds's avatar
Linus Torvalds committed
594
595
596
597
598
	}

	return offset;
}

599
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
600
601
{
	struct sk_buff *frag;
Eric Dumazet's avatar
Eric Dumazet committed
602
	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
603
	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
Linus Torvalds's avatar
Linus Torvalds committed
604
605
606
	struct ipv6hdr *tmp_hdr;
	struct frag_hdr *fh;
	unsigned int mtu, hlen, left, len;
Al Viro's avatar
Al Viro committed
607
	__be32 frag_id = 0;
Linus Torvalds's avatar
Linus Torvalds committed
608
609
	int ptr, offset = 0, err=0;
	u8 *prevhdr, nexthdr = 0;
Eric Dumazet's avatar
Eric Dumazet committed
610
	struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds's avatar
Linus Torvalds committed
611
612
613
614

	hlen = ip6_find_1stfragopt(skb, &prevhdr);
	nexthdr = *prevhdr;

615
	mtu = ip6_skb_dst_mtu(skb);
616
617

	/* We must not fragment if the socket is set to force MTU discovery
618
	 * or if the skb it not generated by a local socket.
619
	 */
620
	if (!skb->local_df && skb->len > mtu) {
Eric Dumazet's avatar
Eric Dumazet committed
621
		skb->dev = skb_dst(skb)->dev;
622
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Eric Dumazet's avatar
Eric Dumazet committed
623
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
624
			      IPSTATS_MIB_FRAGFAILS);
625
626
627
628
		kfree_skb(skb);
		return -EMSGSIZE;
	}

629
630
631
632
633
	if (np && np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
	}
	mtu -= hlen + sizeof(struct frag_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
634

635
	if (skb_has_frag_list(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
636
		int first_len = skb_pagelen(skb);
637
		struct sk_buff *frag2;
Linus Torvalds's avatar
Linus Torvalds committed
638
639
640
641
642
643

		if (first_len - hlen > mtu ||
		    ((first_len - hlen) & 7) ||
		    skb_cloned(skb))
			goto slow_path;

644
		skb_walk_frags(skb, frag) {
Linus Torvalds's avatar
Linus Torvalds committed
645
646
647
648
			/* Correct geometry. */
			if (frag->len > mtu ||
			    ((frag->len & 7) && frag->next) ||
			    skb_headroom(frag) < hlen)
649
				goto slow_path_clean;
Linus Torvalds's avatar
Linus Torvalds committed
650
651
652

			/* Partially cloned skb? */
			if (skb_shared(frag))
653
				goto slow_path_clean;
654
655
656
657
658
659

			BUG_ON(frag->sk);
			if (skb->sk) {
				frag->sk = skb->sk;
				frag->destructor = sock_wfree;
			}
660
			skb->truesize -= frag->truesize;
Linus Torvalds's avatar
Linus Torvalds committed
661
662
663
664
665
		}

		err = 0;
		offset = 0;
		frag = skb_shinfo(skb)->frag_list;
666
		skb_frag_list_init(skb);
Linus Torvalds's avatar
Linus Torvalds committed
667
668
		/* BUILD HEADER */

669
		*prevhdr = NEXTHDR_FRAGMENT;
670
		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
671
		if (!tmp_hdr) {
Eric Dumazet's avatar
Eric Dumazet committed
672
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
673
				      IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
674
675
676
677
678
			return -ENOMEM;
		}

		__skb_pull(skb, hlen);
		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
679
680
		__skb_push(skb, hlen);
		skb_reset_network_header(skb);
681
		memcpy(skb_network_header(skb), tmp_hdr, hlen);
Linus Torvalds's avatar
Linus Torvalds committed
682

683
		ipv6_select_ident(fh);
Linus Torvalds's avatar
Linus Torvalds committed
684
685
686
687
688
689
690
691
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
		fh->frag_off = htons(IP6_MF);
		frag_id = fh->identification;

		first_len = skb_pagelen(skb);
		skb->data_len = first_len - skb_headlen(skb);
		skb->len = first_len;
692
693
		ipv6_hdr(skb)->payload_len = htons(first_len -
						   sizeof(struct ipv6hdr));
694

695
		dst_hold(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
696
697
698
699
700
701

		for (;;) {
			/* Prepare header of the next frame,
			 * before previous one went down. */
			if (frag) {
				frag->ip_summed = CHECKSUM_NONE;
702
				skb_reset_transport_header(frag);
Linus Torvalds's avatar
Linus Torvalds committed
703
				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
704
705
				__skb_push(frag, hlen);
				skb_reset_network_header(frag);
706
707
				memcpy(skb_network_header(frag), tmp_hdr,
				       hlen);
Linus Torvalds's avatar
Linus Torvalds committed
708
709
710
711
712
713
714
				offset += skb->len - hlen - sizeof(struct frag_hdr);
				fh->nexthdr = nexthdr;
				fh->reserved = 0;
				fh->frag_off = htons(offset);
				if (frag->next != NULL)
					fh->frag_off |= htons(IP6_MF);
				fh->identification = frag_id;
715
716
717
				ipv6_hdr(frag)->payload_len =
						htons(frag->len -
						      sizeof(struct ipv6hdr));
Linus Torvalds's avatar
Linus Torvalds committed
718
719
				ip6_copy_metadata(frag, skb);
			}
720

Linus Torvalds's avatar
Linus Torvalds committed
721
			err = output(skb);
722
			if(!err)
723
				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724
					      IPSTATS_MIB_FRAGCREATES);
725

Linus Torvalds's avatar
Linus Torvalds committed
726
727
728
729
730
731
732
733
			if (err || !frag)
				break;

			skb = frag;
			frag = skb->next;
			skb->next = NULL;
		}

Jesper Juhl's avatar
Jesper Juhl committed
734
		kfree(tmp_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
735
736

		if (err == 0) {
737
			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738
				      IPSTATS_MIB_FRAGOKS);
739
			dst_release(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
740
741
742
743
744
745
746
747
748
			return 0;
		}

		while (frag) {
			skb = frag->next;
			kfree_skb(frag);
			frag = skb;
		}

749
		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750
			      IPSTATS_MIB_FRAGFAILS);
751
		dst_release(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
752
		return err;
753
754
755
756
757
758
759
760
761

slow_path_clean:
		skb_walk_frags(skb, frag2) {
			if (frag2 == frag)
				break;
			frag2->sk = NULL;
			frag2->destructor = NULL;
			skb->truesize += frag2->truesize;
		}
Linus Torvalds's avatar
Linus Torvalds committed
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
	}

slow_path:
	left = skb->len - hlen;		/* Space per frame */
	ptr = hlen;			/* Where to start from */

	/*
	 *	Fragment the datagram.
	 */

	*prevhdr = NEXTHDR_FRAGMENT;

	/*
	 *	Keep copying data until we run out.
	 */
	while(left > 0)	{
		len = left;
		/* IF: it doesn't fit, use 'mtu' - the data space left */
		if (len > mtu)
			len = mtu;
		/* IF: we are not sending upto and including the packet end
		   then align the next start on an eight byte boundary */
		if (len < left)	{
			len &= ~7;
		}
		/*
		 *	Allocate buffer.
		 */

791
		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
792
			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
Eric Dumazet's avatar
Eric Dumazet committed
793
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794
				      IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
795
796
797
798
799
800
801
802
803
			err = -ENOMEM;
			goto fail;
		}

		/*
		 *	Set up data on packet
		 */

		ip6_copy_metadata(frag, skb);
804
		skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
Linus Torvalds's avatar
Linus Torvalds committed
805
		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806
		skb_reset_network_header(frag);
807
		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808
809
		frag->transport_header = (frag->network_header + hlen +
					  sizeof(struct frag_hdr));
Linus Torvalds's avatar
Linus Torvalds committed
810
811
812
813
814
815
816
817
818
819
820

		/*
		 *	Charge the memory for the fragment to any owner
		 *	it might possess
		 */
		if (skb->sk)
			skb_set_owner_w(frag, skb->sk);

		/*
		 *	Copy the packet header into the new buffer.
		 */
821
		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
Linus Torvalds's avatar
Linus Torvalds committed
822
823
824
825
826
827

		/*
		 *	Build fragment header.
		 */
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
828
		if (!frag_id) {
829
			ipv6_select_ident(fh);
Linus Torvalds's avatar
Linus Torvalds committed
830
831
832
833
834
835
836
			frag_id = fh->identification;
		} else
			fh->identification = frag_id;

		/*
		 *	Copy a block of the IP datagram.
		 */
837
		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
Linus Torvalds's avatar
Linus Torvalds committed
838
839
840
841
842
843
			BUG();
		left -= len;

		fh->frag_off = htons(offset);
		if (left > 0)
			fh->frag_off |= htons(IP6_MF);
844
845
		ipv6_hdr(frag)->payload_len = htons(frag->len -
						    sizeof(struct ipv6hdr));
Linus Torvalds's avatar
Linus Torvalds committed
846
847
848
849
850
851
852
853
854
855

		ptr += len;
		offset += len;

		/*
		 *	Put this fragment into the sending queue.
		 */
		err = output(frag);
		if (err)
			goto fail;
856

Eric Dumazet's avatar
Eric Dumazet committed
857
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858
			      IPSTATS_MIB_FRAGCREATES);
Linus Torvalds's avatar
Linus Torvalds committed
859
	}
Eric Dumazet's avatar
Eric Dumazet committed
860
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861
		      IPSTATS_MIB_FRAGOKS);
Linus Torvalds's avatar
Linus Torvalds committed
862
863
864
865
	kfree_skb(skb);
	return err;

fail:
Eric Dumazet's avatar
Eric Dumazet committed
866
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867
		      IPSTATS_MIB_FRAGFAILS);
868
	kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
869
870
871
	return err;
}

872
873
874
875
static inline int ip6_rt_check(struct rt6key *rt_key,
			       struct in6_addr *fl_addr,
			       struct in6_addr *addr_cache)
{
Eric Dumazet's avatar
Eric Dumazet committed
876
877
	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
878
879
}

880
881
882
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
					  struct dst_entry *dst,
					  struct flowi *fl)
Linus Torvalds's avatar
Linus Torvalds committed
883
{
884
885
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct rt6_info *rt = (struct rt6_info *)dst;
Linus Torvalds's avatar
Linus Torvalds committed
886

887
888
889
890
891
892
893
894
	if (!dst)
		goto out;

	/* Yes, checking route validity in not connected
	 * case is not very simple. Take into account,
	 * that we do not support routing by source, TOS,
	 * and MSG_DONTROUTE 		--ANK (980726)
	 *
895
896
	 * 1. ip6_rt_check(): If route was host route,
	 *    check that cached destination is current.
897
898
899
900
901
902
903
904
905
906
	 *    If it is network route, we still may
	 *    check its validity using saved pointer
	 *    to the last used address: daddr_cache.
	 *    We do not want to save whole address now,
	 *    (because main consumer of this service
	 *    is tcp, which has not this problem),
	 *    so that the last trick works only on connected
	 *    sockets.
	 * 2. oif also should be the same.
	 */
907
	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
908
909
910
#ifdef CONFIG_IPV6_SUBTREES
	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
#endif
911
	    (fl->oif && fl->oif != dst->dev->ifindex)) {
912
913
		dst_release(dst);
		dst = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
914
915
	}

916
917
918
919
920
921
922
923
out:
	return dst;
}

static int ip6_dst_lookup_tail(struct sock *sk,
			       struct dst_entry **dst, struct flowi *fl)
{
	int err;
924
	struct net *net = sock_net(sk);
925

Linus Torvalds's avatar
Linus Torvalds committed
926
	if (*dst == NULL)
927
		*dst = ip6_route_output(net, sk, fl);
Linus Torvalds's avatar
Linus Torvalds committed
928
929
930
931
932

	if ((err = (*dst)->error))
		goto out_err_release;

	if (ipv6_addr_any(&fl->fl6_src)) {
933
		err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
934
935
936
					 &fl->fl6_dst,
					 sk ? inet6_sk(sk)->srcprefs : 0,
					 &fl->fl6_src);
937
		if (err)
Linus Torvalds's avatar
Linus Torvalds committed
938
939
940
			goto out_err_release;
	}

941
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
	/*
	 * Here if the dst entry we've looked up
	 * has a neighbour entry that is in the INCOMPLETE
	 * state and the src address from the flow is
	 * marked as OPTIMISTIC, we release the found
	 * dst entry and replace it instead with the
	 * dst entry of the nexthop router
	 */
	if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
		struct inet6_ifaddr *ifp;
		struct flowi fl_gw;
		int redirect;

		ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
				      (*dst)->dev, 1);

		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
		if (ifp)
			in6_ifa_put(ifp);

		if (redirect) {
			/*
			 * We need to get the dst entry for the
			 * default router instead
			 */
			dst_release(*dst);
			memcpy(&fl_gw, fl, sizeof(struct flowi));
			memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
			*dst = ip6_route_output(net, sk, &fl_gw);
			if ((err = (*dst)->error))
				goto out_err_release;
973
		}
974
	}
975
976
#endif

Linus Torvalds's avatar
Linus Torvalds committed
977
978
979
	return 0;

out_err_release:
980
	if (err == -ENETUNREACH)
981
		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds's avatar
Linus Torvalds committed
982
983
984
985
	dst_release(*dst);
	*dst = NULL;
	return err;
}
986

987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
/**
 *	ip6_dst_lookup - perform route lookup on flow
 *	@sk: socket which provides route info
 *	@dst: pointer to dst_entry * for result
 *	@fl: flow to lookup
 *
 *	This function performs a route lookup on the given flow.
 *
 *	It returns zero on success, or a standard errno code on error.
 */
int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
{
	*dst = NULL;
	return ip6_dst_lookup_tail(sk, dst, fl);
}
1002
1003
EXPORT_SYMBOL_GPL(ip6_dst_lookup);

1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
/**
 *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
 *	@sk: socket which provides the dst cache and route info
 *	@dst: pointer to dst_entry * for result
 *	@fl: flow to lookup
 *
 *	This function performs a route lookup on the given flow with the
 *	possibility of using the cached route in the socket if it is valid.
 *	It will take the socket dst lock when operating on the dst cache.
 *	As a result, this function can only be used in process context.
 *
 *	It returns zero on success, or a standard errno code on error.
 */
int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
{
	*dst = NULL;
	if (sk) {
		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
		*dst = ip6_sk_dst_check(sk, *dst, fl);
	}

	return ip6_dst_lookup_tail(sk, dst, fl);
}
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);

1029
static inline int ip6_ufo_append_data(struct sock *sk,
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
			int getfrag(void *from, char *to, int offset, int len,
			int odd, struct sk_buff *skb),
			void *from, int length, int hh_len, int fragheaderlen,
			int transhdrlen, int mtu,unsigned int flags)

{
	struct sk_buff *skb;
	int err;

	/* There is support for UDP large send offload by network
	 * device, so create one single skb packet containing complete
	 * udp datagram
	 */
	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
		skb = sock_alloc_send_skb(sk,
			hh_len + fragheaderlen + transhdrlen + 20,
			(flags & MSG_DONTWAIT), &err);
		if (skb == NULL)
			return -ENOMEM;

		/* reserve space for Hardware header */
		skb_reserve(skb, hh_len);

		/* create space for UDP/IP header */
		skb_put(skb,fragheaderlen + transhdrlen);

		/* initialize network header pointer */
1057
		skb_reset_network_header(skb);
1058
1059

		/* initialize protocol header pointer */
1060
		skb->transport_header = skb->network_header + fragheaderlen;
1061

1062
		skb->ip_summed = CHECKSUM_PARTIAL;
1063
1064
1065
1066
1067
1068
1069
1070
1071
		skb->csum = 0;
		sk->sk_sndmsg_off = 0;
	}

	err = skb_append_datato_frags(sk,skb, getfrag, from,
				      (length - transhdrlen));
	if (!err) {
		struct frag_hdr fhdr;

1072
1073
1074
1075
1076
		/* Specify the length of each IPv6 datagram fragment.
		 * It has to be a multiple of 8.
		 */
		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
					     sizeof(struct frag_hdr)) & ~7;
1077
		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1078
		ipv6_select_ident(&fhdr);
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
		__skb_queue_tail(&sk->sk_write_queue, skb);

		return 0;
	}
	/* There is not enough support do UPD LSO,
	 * so follow normal path
	 */
	kfree_skb(skb);

	return err;
}
Linus Torvalds's avatar
Linus Torvalds committed
1091

1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
					       gfp_t gfp)
{
	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}

static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
						gfp_t gfp)
{
	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}

1104
1105
1106
1107
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
	int offset, int len, int odd, struct sk_buff *skb),
	void *from, int length, int transhdrlen,
	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1108
	struct rt6_info *rt, unsigned int flags, int dontfrag)
Linus Torvalds's avatar
Linus Torvalds committed
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
	struct sk_buff *skb;
	unsigned int maxfraglen, fragheaderlen;
	int exthdrlen;
	int hh_len;
	int mtu;
	int copy;
	int err;
	int offset = 0;
	int csummode = CHECKSUM_NONE;

	if (flags&MSG_PROBE)
		return 0;
	if (skb_queue_empty(&sk->sk_write_queue)) {
		/*
		 * setup for corking
		 */
		if (opt) {
1129
			if (WARN_ON(np->cork.opt))
Linus Torvalds's avatar
Linus Torvalds committed
1130
				return -EINVAL;
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159

			np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
			if (unlikely(np->cork.opt == NULL))
				return -ENOBUFS;

			np->cork.opt->tot_len = opt->tot_len;
			np->cork.opt->opt_flen = opt->opt_flen;
			np->cork.opt->opt_nflen = opt->opt_nflen;

			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
							    sk->sk_allocation);
			if (opt->dst0opt && !np->cork.opt->dst0opt)
				return -ENOBUFS;

			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
							    sk->sk_allocation);
			if (opt->dst1opt && !np->cork.opt->dst1opt)
				return -ENOBUFS;

			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
							   sk->sk_allocation);
			if (opt->hopopt && !np->cork.opt->hopopt)
				return -ENOBUFS;

			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
							    sk->sk_allocation);
			if (opt->srcrt && !np->cork.opt->srcrt)
				return -ENOBUFS;

Linus Torvalds's avatar
Linus Torvalds committed
1160
1161
			/* need source address above miyazawa*/
		}
1162
1163
		dst_hold(&rt->dst);
		inet->cork.dst = &rt->dst;
Linus Torvalds's avatar
Linus Torvalds committed
1164
1165
		inet->cork.fl = *fl;
		np->cork.hop_limit = hlimit;
1166
		np->cork.tclass = tclass;
1167
		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1168
		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1169
		if (np->frag_size < mtu) {
1170
1171
1172
1173
			if (np->frag_size)
				mtu = np->frag_size;
		}
		inet->cork.fragsize = mtu;
1174
		if (dst_allfrag(rt->dst.path))
Linus Torvalds's avatar
Linus Torvalds committed
1175
1176
1177
1178
			inet->cork.flags |= IPCORK_ALLFRAG;
		inet->cork.length = 0;
		sk->sk_sndmsg_page = NULL;
		sk->sk_sndmsg_off = 0;
1179
		exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1180
			    rt->rt6i_nfheader_len;
Linus Torvalds's avatar
Linus Torvalds committed
1181
1182
1183
		length += exthdrlen;
		transhdrlen += exthdrlen;
	} else {
1184
		rt = (struct rt6_info *)inet->cork.dst;
Linus Torvalds's avatar
Linus Torvalds committed
1185
		fl = &inet->cork.fl;
1186
		opt = np->cork.opt;
Linus Torvalds's avatar
Linus Torvalds committed
1187
1188
1189
1190
1191
		transhdrlen = 0;
		exthdrlen = 0;
		mtu = inet->cork.fragsize;
	}

1192
	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
Linus Torvalds's avatar
Linus Torvalds committed
1193

1194
	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1195
			(opt ? opt->opt_nflen : 0);
Linus Torvalds's avatar
Linus Torvalds committed
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);

	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
			return -EMSGSIZE;
		}
	}

	/*
	 * Let's try using as much space as possible.
	 * Use MTU if total length of the message fits into the MTU.
	 * Otherwise, we need to reserve fragment header and
	 * fragment alignment (= 8-15 octects, in total).
	 *
	 * Note that we may need to "move" the data from the tail of
1212
	 * of the buffer to the new fragment when we split
Linus Torvalds's avatar
Linus Torvalds committed
1213
1214
	 * the message.
	 *
1215
	 * FIXME: It may be fragmented into multiple chunks
Linus Torvalds's avatar
Linus Torvalds committed
1216
1217
	 *        at once if non-fragmentable extension headers
	 *        are too large.
1218
	 * --yoshfuji
Linus Torvalds's avatar
Linus Torvalds committed
1219
1220
1221
	 */

	inet->cork.length += length;
1222
1223
1224
1225
1226
1227
	if (length > mtu) {
		int proto = sk->sk_protocol;
		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
			return -EMSGSIZE;
		}
1228

1229
		if (proto == IPPROTO_UDP &&
1230
		    (rt->dst.dev->features & NETIF_F_UFO)) {
1231
1232
1233
1234
1235
1236
1237
1238

			err = ip6_ufo_append_data(sk, getfrag, from, length,
						  hh_len, fragheaderlen,
						  transhdrlen, mtu, flags);
			if (err)
				goto error;
			return 0;
		}
1239
	}
Linus Torvalds's avatar
Linus Torvalds committed
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275

	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
		goto alloc_new_skb;

	while (length > 0) {
		/* Check if the remaining data fits into current packet. */
		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
		if (copy < length)
			copy = maxfraglen - skb->len;

		if (copy <= 0) {
			char *data;
			unsigned int datalen;
			unsigned int fraglen;
			unsigned int fraggap;
			unsigned int alloclen;
			struct sk_buff *skb_prev;
alloc_new_skb:
			skb_prev = skb;

			/* There's no room in the current skb */
			if (skb_prev)
				fraggap = skb_prev->len - maxfraglen;
			else
				fraggap = 0;

			/*
			 * If remaining data exceeds the mtu,
			 * we know we need more fragment(s).
			 */
			datalen = length + fraggap;
			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
				datalen = maxfraglen - fragheaderlen;

			fraglen = datalen + fragheaderlen;
			if ((flags & MSG_MORE) &&