ip6_output.c 40.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2
/*
 *	IPv6 output functions
3
 *	Linux INET6 implementation
Linus Torvalds's avatar
Linus Torvalds committed
4 5
 *
 *	Authors:
6
 *	Pedro Roque		<roque@di.fc.ul.pt>
Linus Torvalds's avatar
Linus Torvalds committed
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 *
 *	Based on linux/net/ipv4/ip_output.c
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 *
 *	Changes:
 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
 *				extension headers are implemented.
 *				route changes now work.
 *				ip6_forward does not confuse sniffers.
 *				etc.
 *
 *      H. von Brand    :       Added missing #include <linux/string.h>
 *	Imran Patel	: 	frag id should be in NBO
 *      Kazunori MIYAZAWA @USAGI
 *			:       add ip6_append_data and related functions
 *				for datagram xmit
 */

#include <linux/errno.h>
Herbert Xu's avatar
Herbert Xu committed
30
#include <linux/kernel.h>
Linus Torvalds's avatar
Linus Torvalds committed
31 32 33 34 35 36 37 38
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/in6.h>
#include <linux/tcp.h>
#include <linux/route.h>
39
#include <linux/module.h>
40
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56

#include <linux/netfilter.h>
#include <linux/netfilter_ipv6.h>

#include <net/sock.h>
#include <net/snmp.h>

#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
#include <net/ip6_route.h>
#include <net/addrconf.h>
#include <net/rawv6.h>
#include <net/icmp.h>
#include <net/xfrm.h>
#include <net/checksum.h>
57
#include <linux/mroute6.h>
Linus Torvalds's avatar
Linus Torvalds committed
58

59
static int ip6_finish_output2(struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
60
{
Eric Dumazet's avatar
Eric Dumazet committed
61
	struct dst_entry *dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
62
	struct net_device *dev = dst->dev;
63
	struct neighbour *neigh;
64 65
	struct in6_addr *nexthop;
	int ret;
Linus Torvalds's avatar
Linus Torvalds committed
66 67 68 69

	skb->protocol = htons(ETH_P_IPV6);
	skb->dev = dev;

70
	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
Eric Dumazet's avatar
Eric Dumazet committed
71
		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
Linus Torvalds's avatar
Linus Torvalds committed
72

73
		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
74
		    ((mroute6_socket(dev_net(dev), skb) &&
75
		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
76 77
		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
					 &ipv6_hdr(skb)->saddr))) {
Linus Torvalds's avatar
Linus Torvalds committed
78 79 80 81 82 83
			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);

			/* Do not check for IFF_ALLMULTI; multicast routing
			   is not supported in any case.
			 */
			if (newskb)
84 85
				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
					newskb, NULL, newskb->dev,
86
					dev_loopback_xmit);
Linus Torvalds's avatar
Linus Torvalds committed
87

88
			if (ipv6_hdr(skb)->hop_limit == 0) {
89 90
				IP6_INC_STATS(dev_net(dev), idev,
					      IPSTATS_MIB_OUTDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
91 92 93 94 95
				kfree_skb(skb);
				return 0;
			}
		}

96 97
		IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
				skb->len);
98 99 100 101 102 103 104

		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
		    IPV6_ADDR_SCOPE_NODELOCAL &&
		    !(dev->flags & IFF_LOOPBACK)) {
			kfree_skb(skb);
			return 0;
		}
Linus Torvalds's avatar
Linus Torvalds committed
105 106
	}

107
	rcu_read_lock_bh();
108
	nexthop = rt6_nexthop((struct rt6_info *)dst);
109 110 111 112 113 114 115 116 117
	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
	if (unlikely(!neigh))
		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
	if (!IS_ERR(neigh)) {
		ret = dst_neigh_output(dst, neigh, skb);
		rcu_read_unlock_bh();
		return ret;
	}
	rcu_read_unlock_bh();
118

119 120
	IP6_INC_STATS(dev_net(dst->dev),
		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
121 122
	kfree_skb(skb);
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
123 124
}

125 126 127
static int ip6_finish_output(struct sk_buff *skb)
{
	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
128 129
	    dst_allfrag(skb_dst(skb)) ||
	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
130 131 132 133 134
		return ip6_fragment(skb, ip6_finish_output2);
	else
		return ip6_finish_output2(skb);
}

135
int ip6_output(struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
136
{
137
	struct net_device *dev = skb_dst(skb)->dev;
Eric Dumazet's avatar
Eric Dumazet committed
138
	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
139
	if (unlikely(idev->cnf.disable_ipv6)) {
140
		IP6_INC_STATS(dev_net(dev), idev,
141
			      IPSTATS_MIB_OUTDISCARDS);
142 143 144 145
		kfree_skb(skb);
		return 0;
	}

146 147 148
	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
			    ip6_finish_output,
			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
Linus Torvalds's avatar
Linus Torvalds committed
149 150 151
}

/*
152
 *	xmit an sk_buff (used by TCP, SCTP and DCCP)
Linus Torvalds's avatar
Linus Torvalds committed
153 154
 */

155
int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
156
	     struct ipv6_txoptions *opt, int tclass)
Linus Torvalds's avatar
Linus Torvalds committed
157
{
158
	struct net *net = sock_net(sk);
159
	struct ipv6_pinfo *np = inet6_sk(sk);
160
	struct in6_addr *first_hop = &fl6->daddr;
Eric Dumazet's avatar
Eric Dumazet committed
161
	struct dst_entry *dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
162
	struct ipv6hdr *hdr;
163
	u8  proto = fl6->flowi6_proto;
Linus Torvalds's avatar
Linus Torvalds committed
164
	int seg_len = skb->len;
165
	int hlimit = -1;
Linus Torvalds's avatar
Linus Torvalds committed
166 167 168
	u32 mtu;

	if (opt) {
169
		unsigned int head_room;
Linus Torvalds's avatar
Linus Torvalds committed
170 171 172 173 174 175 176 177 178 179

		/* First: exthdrs may take lots of space (~8K for now)
		   MAX_HEADER is not enough.
		 */
		head_room = opt->opt_nflen + opt->opt_flen;
		seg_len += head_room;
		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);

		if (skb_headroom(skb) < head_room) {
			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
180
			if (skb2 == NULL) {
Eric Dumazet's avatar
Eric Dumazet committed
181
				IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
182 183
					      IPSTATS_MIB_OUTDISCARDS);
				kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
184 185
				return -ENOBUFS;
			}
186
			consume_skb(skb);
187
			skb = skb2;
188
			skb_set_owner_w(skb, sk);
Linus Torvalds's avatar
Linus Torvalds committed
189 190 191 192 193 194 195
		}
		if (opt->opt_flen)
			ipv6_push_frag_opts(skb, opt, &proto);
		if (opt->opt_nflen)
			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
	}

196 197
	skb_push(skb, sizeof(struct ipv6hdr));
	skb_reset_network_header(skb);
198
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
199 200 201 202

	/*
	 *	Fill in the IPv6 header
	 */
203
	if (np)
Linus Torvalds's avatar
Linus Torvalds committed
204 205
		hlimit = np->hop_limit;
	if (hlimit < 0)
206
		hlimit = ip6_dst_hoplimit(dst);
Linus Torvalds's avatar
Linus Torvalds committed
207

208
	ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
209

Linus Torvalds's avatar
Linus Torvalds committed
210 211 212 213
	hdr->payload_len = htons(seg_len);
	hdr->nexthdr = proto;
	hdr->hop_limit = hlimit;

214 215
	hdr->saddr = fl6->saddr;
	hdr->daddr = *first_hop;
Linus Torvalds's avatar
Linus Torvalds committed
216

217
	skb->protocol = htons(ETH_P_IPV6);
218
	skb->priority = sk->sk_priority;
219
	skb->mark = sk->sk_mark;
220

Linus Torvalds's avatar
Linus Torvalds committed
221
	mtu = dst_mtu(dst);
222
	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
Eric Dumazet's avatar
Eric Dumazet committed
223
		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
224
			      IPSTATS_MIB_OUT, skb->len);
225 226
		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
			       dst->dev, dst_output);
Linus Torvalds's avatar
Linus Torvalds committed
227 228 229
	}

	skb->dev = dst->dev;
230
	ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
Eric Dumazet's avatar
Eric Dumazet committed
231
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
232 233 234 235
	kfree_skb(skb);
	return -EMSGSIZE;
}

236 237
EXPORT_SYMBOL(ip6_xmit);

Linus Torvalds's avatar
Linus Torvalds committed
238 239 240 241 242 243 244 245
static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
{
	struct ip6_ra_chain *ra;
	struct sock *last = NULL;

	read_lock(&ip6_ra_lock);
	for (ra = ip6_ra_chain; ra; ra = ra->next) {
		struct sock *sk = ra->sk;
246 247 248
		if (sk && ra->sel == sel &&
		    (!sk->sk_bound_dev_if ||
		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
Linus Torvalds's avatar
Linus Torvalds committed
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
			if (last) {
				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
				if (skb2)
					rawv6_rcv(last, skb2);
			}
			last = sk;
		}
	}

	if (last) {
		rawv6_rcv(last, skb);
		read_unlock(&ip6_ra_lock);
		return 1;
	}
	read_unlock(&ip6_ra_lock);
	return 0;
}

267 268
static int ip6_forward_proxy_check(struct sk_buff *skb)
{
269
	struct ipv6hdr *hdr = ipv6_hdr(skb);
270
	u8 nexthdr = hdr->nexthdr;
271
	__be16 frag_off;
272 273 274
	int offset;

	if (ipv6_ext_hdr(nexthdr)) {
275
		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
276 277 278 279 280 281 282 283
		if (offset < 0)
			return 0;
	} else
		offset = sizeof(struct ipv6hdr);

	if (nexthdr == IPPROTO_ICMPV6) {
		struct icmp6hdr *icmp6;

284 285
		if (!pskb_may_pull(skb, (skb_network_header(skb) +
					 offset + 1 - skb->data)))
286 287
			return 0;

288
		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305

		switch (icmp6->icmp6_type) {
		case NDISC_ROUTER_SOLICITATION:
		case NDISC_ROUTER_ADVERTISEMENT:
		case NDISC_NEIGHBOUR_SOLICITATION:
		case NDISC_NEIGHBOUR_ADVERTISEMENT:
		case NDISC_REDIRECT:
			/* For reaction involving unicast neighbor discovery
			 * message destined to the proxied address, pass it to
			 * input function.
			 */
			return 1;
		default:
			break;
		}
	}

306 307 308 309 310 311 312 313 314 315
	/*
	 * The proxying router can't forward traffic sent to a link-local
	 * address, so signal the sender and discard the packet. This
	 * behavior is clarified by the MIPv6 specification.
	 */
	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
		dst_link_failure(skb);
		return -1;
	}

316 317 318
	return 0;
}

Linus Torvalds's avatar
Linus Torvalds committed
319 320 321 322 323
static inline int ip6_forward_finish(struct sk_buff *skb)
{
	return dst_output(skb);
}

324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
	unsigned int mtu;
	struct inet6_dev *idev;

	if (dst_metric_locked(dst, RTAX_MTU)) {
		mtu = dst_metric_raw(dst, RTAX_MTU);
		if (mtu)
			return mtu;
	}

	mtu = IPV6_MIN_MTU;
	rcu_read_lock();
	idev = __in6_dev_get(dst->dev);
	if (idev)
		mtu = idev->cnf.mtu6;
	rcu_read_unlock();

	return mtu;
}

345 346 347 348 349 350 351 352 353 354 355 356 357 358
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
	if (skb->len <= mtu || skb->local_df)
		return false;

	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
		return true;

	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
		return false;

	return true;
}

Linus Torvalds's avatar
Linus Torvalds committed
359 360
int ip6_forward(struct sk_buff *skb)
{
Eric Dumazet's avatar
Eric Dumazet committed
361
	struct dst_entry *dst = skb_dst(skb);
362
	struct ipv6hdr *hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
363
	struct inet6_skb_parm *opt = IP6CB(skb);
364
	struct net *net = dev_net(dst->dev);
365
	u32 mtu;
366

367
	if (net->ipv6.devconf_all->forwarding == 0)
Linus Torvalds's avatar
Linus Torvalds committed
368 369
		goto error;

370 371 372
	if (skb->pkt_type != PACKET_HOST)
		goto drop;

373 374 375
	if (skb_warn_if_lro(skb))
		goto drop;

Linus Torvalds's avatar
Linus Torvalds committed
376
	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
377 378
		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
				 IPSTATS_MIB_INDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
379 380 381
		goto drop;
	}

382
	skb_forward_csum(skb);
Linus Torvalds's avatar
Linus Torvalds committed
383 384 385 386 387 388 389 390 391 392 393 394 395 396

	/*
	 *	We DO NOT make any processing on
	 *	RA packets, pushing them to user level AS IS
	 *	without ane WARRANTY that application will be able
	 *	to interpret them. The reason is that we
	 *	cannot make anything clever here.
	 *
	 *	We are not end-node, so that if packet contains
	 *	AH/ESP, we cannot make anything.
	 *	Defragmentation also would be mistake, RA packets
	 *	cannot be fragmented, because there is no warranty
	 *	that different fragments will go along one path. --ANK
	 */
397 398
	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
Linus Torvalds's avatar
Linus Torvalds committed
399 400 401 402 403 404 405 406 407
			return 0;
	}

	/*
	 *	check and decrement ttl
	 */
	if (hdr->hop_limit <= 1) {
		/* Force OUTPUT device used as source address */
		skb->dev = dst->dev;
408
		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
409 410
		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
				 IPSTATS_MIB_INHDRERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
411 412 413 414 415

		kfree_skb(skb);
		return -ETIMEDOUT;
	}

416
	/* XXX: idev->cnf.proxy_ndp? */
417
	if (net->ipv6.devconf_all->proxy_ndp &&
418
	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
419 420
		int proxied = ip6_forward_proxy_check(skb);
		if (proxied > 0)
421
			return ip6_input(skb);
422
		else if (proxied < 0) {
423 424
			IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
					 IPSTATS_MIB_INDISCARDS);
425 426
			goto drop;
		}
427 428
	}

Linus Torvalds's avatar
Linus Torvalds committed
429
	if (!xfrm6_route_forward(skb)) {
430 431
		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
				 IPSTATS_MIB_INDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
432 433
		goto drop;
	}
Eric Dumazet's avatar
Eric Dumazet committed
434
	dst = skb_dst(skb);
Linus Torvalds's avatar
Linus Torvalds committed
435 436 437

	/* IPv6 specs say nothing about it, but it is clear that we cannot
	   send redirects to source routed frames.
438
	   We don't send redirects to frames decapsulated from IPsec.
Linus Torvalds's avatar
Linus Torvalds committed
439
	 */
440
	if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
441
		struct in6_addr *target = NULL;
442
		struct inet_peer *peer;
Linus Torvalds's avatar
Linus Torvalds committed
443 444 445 446 447 448 449 450
		struct rt6_info *rt;

		/*
		 *	incoming and outgoing devices are the same
		 *	send a redirect.
		 */

		rt = (struct rt6_info *) dst;
451 452
		if (rt->rt6i_flags & RTF_GATEWAY)
			target = &rt->rt6i_gateway;
Linus Torvalds's avatar
Linus Torvalds committed
453 454 455
		else
			target = &hdr->daddr;

456
		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
457

Linus Torvalds's avatar
Linus Torvalds committed
458 459 460
		/* Limit redirects both by destination (here)
		   and by source (inside ndisc_send_redirect)
		 */
461
		if (inet_peer_xrlim_allow(peer, 1*HZ))
462
			ndisc_send_redirect(skb, target);
463 464
		if (peer)
			inet_putpeer(peer);
465 466 467
	} else {
		int addrtype = ipv6_addr_type(&hdr->saddr);

Linus Torvalds's avatar
Linus Torvalds committed
468
		/* This check is security critical. */
469 470
		if (addrtype == IPV6_ADDR_ANY ||
		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
471 472 473
			goto error;
		if (addrtype & IPV6_ADDR_LINKLOCAL) {
			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
474
				    ICMPV6_NOT_NEIGHBOUR, 0);
475 476
			goto error;
		}
Linus Torvalds's avatar
Linus Torvalds committed
477 478
	}

479
	mtu = ip6_dst_mtu_forward(dst);
480 481 482
	if (mtu < IPV6_MIN_MTU)
		mtu = IPV6_MIN_MTU;

483
	if (ip6_pkt_too_big(skb, mtu)) {
Linus Torvalds's avatar
Linus Torvalds committed
484 485
		/* Again, force OUTPUT device used as source address */
		skb->dev = dst->dev;
486
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
487 488 489 490
		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
				 IPSTATS_MIB_INTOOBIGERRORS);
		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
				 IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
491 492 493 494 495
		kfree_skb(skb);
		return -EMSGSIZE;
	}

	if (skb_cow(skb, dst->dev->hard_header_len)) {
496 497
		IP6_INC_STATS_BH(net, ip6_dst_idev(dst),
				 IPSTATS_MIB_OUTDISCARDS);
Linus Torvalds's avatar
Linus Torvalds committed
498 499 500
		goto drop;
	}

501
	hdr = ipv6_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
502 503

	/* Mangling hops number delayed to point after skb COW */
504

Linus Torvalds's avatar
Linus Torvalds committed
505 506
	hdr->hop_limit--;

507
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
508
	IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
509
	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
510
		       ip6_forward_finish);
Linus Torvalds's avatar
Linus Torvalds committed
511 512

error:
513
	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
514 515 516 517 518 519 520 521 522 523
drop:
	kfree_skb(skb);
	return -EINVAL;
}

static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
	to->pkt_type = from->pkt_type;
	to->priority = from->priority;
	to->protocol = from->protocol;
Eric Dumazet's avatar
Eric Dumazet committed
524 525
	skb_dst_drop(to);
	skb_dst_set(to, dst_clone(skb_dst(from)));
Linus Torvalds's avatar
Linus Torvalds committed
526
	to->dev = from->dev;
527
	to->mark = from->mark;
Linus Torvalds's avatar
Linus Torvalds committed
528 529 530 531

#ifdef CONFIG_NET_SCHED
	to->tc_index = from->tc_index;
#endif
532
	nf_copy(to, from);
533
	skb_copy_secmark(to, from);
Linus Torvalds's avatar
Linus Torvalds committed
534 535
}

536
int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
537 538
{
	struct sk_buff *frag;
Eric Dumazet's avatar
Eric Dumazet committed
539
	struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
540
	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
Linus Torvalds's avatar
Linus Torvalds committed
541 542 543
	struct ipv6hdr *tmp_hdr;
	struct frag_hdr *fh;
	unsigned int mtu, hlen, left, len;
544
	int hroom, troom;
Al Viro's avatar
Al Viro committed
545
	__be32 frag_id = 0;
Linus Torvalds's avatar
Linus Torvalds committed
546 547
	int ptr, offset = 0, err=0;
	u8 *prevhdr, nexthdr = 0;
Eric Dumazet's avatar
Eric Dumazet committed
548
	struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds's avatar
Linus Torvalds committed
549 550 551 552

	hlen = ip6_find_1stfragopt(skb, &prevhdr);
	nexthdr = *prevhdr;

553
	mtu = ip6_skb_dst_mtu(skb);
554 555

	/* We must not fragment if the socket is set to force MTU discovery
556
	 * or if the skb it not generated by a local socket.
557
	 */
558 559 560
	if (unlikely(!skb->local_df && skb->len > mtu) ||
		     (IP6CB(skb)->frag_max_size &&
		      IP6CB(skb)->frag_max_size > mtu)) {
561 562 563
		if (skb->sk && dst_allfrag(skb_dst(skb)))
			sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);

Eric Dumazet's avatar
Eric Dumazet committed
564
		skb->dev = skb_dst(skb)->dev;
565
		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
Eric Dumazet's avatar
Eric Dumazet committed
566
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
567
			      IPSTATS_MIB_FRAGFAILS);
568 569 570 571
		kfree_skb(skb);
		return -EMSGSIZE;
	}

572 573 574 575 576
	if (np && np->frag_size < mtu) {
		if (np->frag_size)
			mtu = np->frag_size;
	}
	mtu -= hlen + sizeof(struct frag_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
577

578
	if (skb_has_frag_list(skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
579
		int first_len = skb_pagelen(skb);
580
		struct sk_buff *frag2;
Linus Torvalds's avatar
Linus Torvalds committed
581 582 583 584 585 586

		if (first_len - hlen > mtu ||
		    ((first_len - hlen) & 7) ||
		    skb_cloned(skb))
			goto slow_path;

587
		skb_walk_frags(skb, frag) {
Linus Torvalds's avatar
Linus Torvalds committed
588 589 590 591
			/* Correct geometry. */
			if (frag->len > mtu ||
			    ((frag->len & 7) && frag->next) ||
			    skb_headroom(frag) < hlen)
592
				goto slow_path_clean;
Linus Torvalds's avatar
Linus Torvalds committed
593 594 595

			/* Partially cloned skb? */
			if (skb_shared(frag))
596
				goto slow_path_clean;
597 598 599 600 601 602

			BUG_ON(frag->sk);
			if (skb->sk) {
				frag->sk = skb->sk;
				frag->destructor = sock_wfree;
			}
603
			skb->truesize -= frag->truesize;
Linus Torvalds's avatar
Linus Torvalds committed
604 605 606 607 608
		}

		err = 0;
		offset = 0;
		frag = skb_shinfo(skb)->frag_list;
609
		skb_frag_list_init(skb);
Linus Torvalds's avatar
Linus Torvalds committed
610 611
		/* BUILD HEADER */

612
		*prevhdr = NEXTHDR_FRAGMENT;
613
		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
Linus Torvalds's avatar
Linus Torvalds committed
614
		if (!tmp_hdr) {
Eric Dumazet's avatar
Eric Dumazet committed
615
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
616
				      IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
617 618 619 620 621
			return -ENOMEM;
		}

		__skb_pull(skb, hlen);
		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
622 623
		__skb_push(skb, hlen);
		skb_reset_network_header(skb);
624
		memcpy(skb_network_header(skb), tmp_hdr, hlen);
Linus Torvalds's avatar
Linus Torvalds committed
625

626
		ipv6_select_ident(fh, rt);
Linus Torvalds's avatar
Linus Torvalds committed
627 628 629 630 631 632 633 634
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
		fh->frag_off = htons(IP6_MF);
		frag_id = fh->identification;

		first_len = skb_pagelen(skb);
		skb->data_len = first_len - skb_headlen(skb);
		skb->len = first_len;
635 636
		ipv6_hdr(skb)->payload_len = htons(first_len -
						   sizeof(struct ipv6hdr));
637

638
		dst_hold(&rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
639 640 641 642 643 644

		for (;;) {
			/* Prepare header of the next frame,
			 * before previous one went down. */
			if (frag) {
				frag->ip_summed = CHECKSUM_NONE;
645
				skb_reset_transport_header(frag);
Linus Torvalds's avatar
Linus Torvalds committed
646
				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
647 648
				__skb_push(frag, hlen);
				skb_reset_network_header(frag);
649 650
				memcpy(skb_network_header(frag), tmp_hdr,
				       hlen);
Linus Torvalds's avatar
Linus Torvalds committed
651 652 653 654 655 656 657
				offset += skb->len - hlen - sizeof(struct frag_hdr);
				fh->nexthdr = nexthdr;
				fh->reserved = 0;
				fh->frag_off = htons(offset);
				if (frag->next != NULL)
					fh->frag_off |= htons(IP6_MF);
				fh->identification = frag_id;
658 659 660
				ipv6_hdr(frag)->payload_len =
						htons(frag->len -
						      sizeof(struct ipv6hdr));
Linus Torvalds's avatar
Linus Torvalds committed
661 662
				ip6_copy_metadata(frag, skb);
			}
663

Linus Torvalds's avatar
Linus Torvalds committed
664
			err = output(skb);
665
			if(!err)
666
				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
667
					      IPSTATS_MIB_FRAGCREATES);
668

Linus Torvalds's avatar
Linus Torvalds committed
669 670 671 672 673 674 675 676
			if (err || !frag)
				break;

			skb = frag;
			frag = skb->next;
			skb->next = NULL;
		}

Jesper Juhl's avatar
Jesper Juhl committed
677
		kfree(tmp_hdr);
Linus Torvalds's avatar
Linus Torvalds committed
678 679

		if (err == 0) {
680
			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
681
				      IPSTATS_MIB_FRAGOKS);
Amerigo Wang's avatar
Amerigo Wang committed
682
			ip6_rt_put(rt);
Linus Torvalds's avatar
Linus Torvalds committed
683 684 685 686 687 688 689 690 691
			return 0;
		}

		while (frag) {
			skb = frag->next;
			kfree_skb(frag);
			frag = skb;
		}

692
		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
693
			      IPSTATS_MIB_FRAGFAILS);
Amerigo Wang's avatar
Amerigo Wang committed
694
		ip6_rt_put(rt);
Linus Torvalds's avatar
Linus Torvalds committed
695
		return err;
696 697 698 699 700 701 702 703 704

slow_path_clean:
		skb_walk_frags(skb, frag2) {
			if (frag2 == frag)
				break;
			frag2->sk = NULL;
			frag2->destructor = NULL;
			skb->truesize += frag2->truesize;
		}
Linus Torvalds's avatar
Linus Torvalds committed
705 706 707
	}

slow_path:
708 709 710 711
	if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
	    skb_checksum_help(skb))
		goto fail;

Linus Torvalds's avatar
Linus Torvalds committed
712 713 714 715 716 717 718 719
	left = skb->len - hlen;		/* Space per frame */
	ptr = hlen;			/* Where to start from */

	/*
	 *	Fragment the datagram.
	 */

	*prevhdr = NEXTHDR_FRAGMENT;
720 721
	hroom = LL_RESERVED_SPACE(rt->dst.dev);
	troom = rt->dst.dev->needed_tailroom;
Linus Torvalds's avatar
Linus Torvalds committed
722 723 724 725 726 727 728 729 730

	/*
	 *	Keep copying data until we run out.
	 */
	while(left > 0)	{
		len = left;
		/* IF: it doesn't fit, use 'mtu' - the data space left */
		if (len > mtu)
			len = mtu;
Lucas De Marchi's avatar
Lucas De Marchi committed
731
		/* IF: we are not sending up to and including the packet end
Linus Torvalds's avatar
Linus Torvalds committed
732 733 734 735 736 737 738 739
		   then align the next start on an eight byte boundary */
		if (len < left)	{
			len &= ~7;
		}
		/*
		 *	Allocate buffer.
		 */

740 741
		if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
				      hroom + troom, GFP_ATOMIC)) == NULL) {
742
			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
Eric Dumazet's avatar
Eric Dumazet committed
743
			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
744
				      IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
745 746 747 748 749 750 751 752 753
			err = -ENOMEM;
			goto fail;
		}

		/*
		 *	Set up data on packet
		 */

		ip6_copy_metadata(frag, skb);
754
		skb_reserve(frag, hroom);
Linus Torvalds's avatar
Linus Torvalds committed
755
		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
756
		skb_reset_network_header(frag);
757
		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
758 759
		frag->transport_header = (frag->network_header + hlen +
					  sizeof(struct frag_hdr));
Linus Torvalds's avatar
Linus Torvalds committed
760 761 762 763 764 765 766 767 768 769 770

		/*
		 *	Charge the memory for the fragment to any owner
		 *	it might possess
		 */
		if (skb->sk)
			skb_set_owner_w(frag, skb->sk);

		/*
		 *	Copy the packet header into the new buffer.
		 */
771
		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
Linus Torvalds's avatar
Linus Torvalds committed
772 773 774 775 776 777

		/*
		 *	Build fragment header.
		 */
		fh->nexthdr = nexthdr;
		fh->reserved = 0;
778
		if (!frag_id) {
779
			ipv6_select_ident(fh, rt);
Linus Torvalds's avatar
Linus Torvalds committed
780 781 782 783 784 785 786
			frag_id = fh->identification;
		} else
			fh->identification = frag_id;

		/*
		 *	Copy a block of the IP datagram.
		 */
787
		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
Linus Torvalds's avatar
Linus Torvalds committed
788 789 790 791 792 793
			BUG();
		left -= len;

		fh->frag_off = htons(offset);
		if (left > 0)
			fh->frag_off |= htons(IP6_MF);
794 795
		ipv6_hdr(frag)->payload_len = htons(frag->len -
						    sizeof(struct ipv6hdr));
Linus Torvalds's avatar
Linus Torvalds committed
796 797 798 799 800 801 802 803 804 805

		ptr += len;
		offset += len;

		/*
		 *	Put this fragment into the sending queue.
		 */
		err = output(frag);
		if (err)
			goto fail;
806

Eric Dumazet's avatar
Eric Dumazet committed
807
		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
808
			      IPSTATS_MIB_FRAGCREATES);
Linus Torvalds's avatar
Linus Torvalds committed
809
	}
Eric Dumazet's avatar
Eric Dumazet committed
810
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
811
		      IPSTATS_MIB_FRAGOKS);
812
	consume_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
813 814 815
	return err;

fail:
Eric Dumazet's avatar
Eric Dumazet committed
816
	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
817
		      IPSTATS_MIB_FRAGFAILS);
818
	kfree_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
819 820 821
	return err;
}

822 823 824
static inline int ip6_rt_check(const struct rt6key *rt_key,
			       const struct in6_addr *fl_addr,
			       const struct in6_addr *addr_cache)
825
{
Eric Dumazet's avatar
Eric Dumazet committed
826 827
	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
828 829
}

830 831
static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
					  struct dst_entry *dst,
832
					  const struct flowi6 *fl6)
Linus Torvalds's avatar
Linus Torvalds committed
833
{
834
	struct ipv6_pinfo *np = inet6_sk(sk);
835
	struct rt6_info *rt;
Linus Torvalds's avatar
Linus Torvalds committed
836

837 838 839
	if (!dst)
		goto out;

840 841 842 843 844 845
	if (dst->ops->family != AF_INET6) {
		dst_release(dst);
		return NULL;
	}

	rt = (struct rt6_info *)dst;
846 847 848 849 850
	/* Yes, checking route validity in not connected
	 * case is not very simple. Take into account,
	 * that we do not support routing by source, TOS,
	 * and MSG_DONTROUTE 		--ANK (980726)
	 *
851 852
	 * 1. ip6_rt_check(): If route was host route,
	 *    check that cached destination is current.
853 854 855 856 857 858 859 860 861 862
	 *    If it is network route, we still may
	 *    check its validity using saved pointer
	 *    to the last used address: daddr_cache.
	 *    We do not want to save whole address now,
	 *    (because main consumer of this service
	 *    is tcp, which has not this problem),
	 *    so that the last trick works only on connected
	 *    sockets.
	 * 2. oif also should be the same.
	 */
863
	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
864
#ifdef CONFIG_IPV6_SUBTREES
865
	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
866
#endif
867
	    (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
868 869
		dst_release(dst);
		dst = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
870 871
	}

872 873 874 875 876
out:
	return dst;
}

static int ip6_dst_lookup_tail(struct sock *sk,
877
			       struct dst_entry **dst, struct flowi6 *fl6)
878
{
879
	struct net *net = sock_net(sk);
880 881
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
	struct neighbour *n;
882
	struct rt6_info *rt;
883 884
#endif
	int err;
885

Linus Torvalds's avatar
Linus Torvalds committed
886
	if (*dst == NULL)
887
		*dst = ip6_route_output(net, sk, fl6);
Linus Torvalds's avatar
Linus Torvalds committed
888 889 890 891

	if ((err = (*dst)->error))
		goto out_err_release;

892
	if (ipv6_addr_any(&fl6->saddr)) {
893 894 895 896
		struct rt6_info *rt = (struct rt6_info *) *dst;
		err = ip6_route_get_saddr(net, rt, &fl6->daddr,
					  sk ? inet6_sk(sk)->srcprefs : 0,
					  &fl6->saddr);
897
		if (err)
Linus Torvalds's avatar
Linus Torvalds committed
898 899 900
			goto out_err_release;
	}

901
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
902 903 904 905 906 907 908 909
	/*
	 * Here if the dst entry we've looked up
	 * has a neighbour entry that is in the INCOMPLETE
	 * state and the src address from the flow is
	 * marked as OPTIMISTIC, we release the found
	 * dst entry and replace it instead with the
	 * dst entry of the nexthop router
	 */
910
	rt = (struct rt6_info *) *dst;
911
	rcu_read_lock_bh();
912
	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
913 914 915 916
	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
	rcu_read_unlock_bh();

	if (err) {
917
		struct inet6_ifaddr *ifp;
918
		struct flowi6 fl_gw6;
919 920
		int redirect;

921
		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
922 923 924 925 926 927 928 929 930 931 932 933
				      (*dst)->dev, 1);

		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
		if (ifp)
			in6_ifa_put(ifp);

		if (redirect) {
			/*
			 * We need to get the dst entry for the
			 * default router instead
			 */
			dst_release(*dst);
934 935 936
			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
			*dst = ip6_route_output(net, sk, &fl_gw6);
937 938
			if ((err = (*dst)->error))
				goto out_err_release;
939
		}
940
	}
941 942
#endif

Linus Torvalds's avatar
Linus Torvalds committed
943 944 945
	return 0;

out_err_release:
946
	if (err == -ENETUNREACH)
947
		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds's avatar
Linus Torvalds committed
948 949 950 951
	dst_release(*dst);
	*dst = NULL;
	return err;
}
952

953 954 955 956
/**
 *	ip6_dst_lookup - perform route lookup on flow
 *	@sk: socket which provides route info
 *	@dst: pointer to dst_entry * for result
957
 *	@fl6: flow to lookup
958 959 960 961 962
 *
 *	This function performs a route lookup on the given flow.
 *
 *	It returns zero on success, or a standard errno code on error.
 */
963
int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
964 965
{
	*dst = NULL;
966
	return ip6_dst_lookup_tail(sk, dst, fl6);
967
}
968 969
EXPORT_SYMBOL_GPL(ip6_dst_lookup);

970
/**
971 972
 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
 *	@sk: socket which provides route info
973
 *	@fl6: flow to lookup
974 975 976 977 978 979 980
 *	@final_dst: final destination address for ipsec lookup
 *
 *	This function performs a route lookup on the given flow.
 *
 *	It returns a valid dst pointer on success, or a pointer encoded
 *	error code.
 */
981
struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
982
				      const struct in6_addr *final_dst)
983 984 985 986
{
	struct dst_entry *dst = NULL;
	int err;

987
	err = ip6_dst_lookup_tail(sk, &dst, fl6);
988 989 990
	if (err)
		return ERR_PTR(err);
	if (final_dst)
991
		fl6->daddr = *final_dst;
992

993
	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
994 995 996 997 998
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);

/**
 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
999
 *	@sk: socket which provides the dst cache and route info
1000
 *	@fl6: flow to lookup
1001
 *	@final_dst: final destination address for ipsec lookup
1002 1003 1004 1005 1006 1007
 *
 *	This function performs a route lookup on the given flow with the
 *	possibility of using the cached route in the socket if it is valid.
 *	It will take the socket dst lock when operating on the dst cache.
 *	As a result, this function can only be used in process context.
 *
1008 1009
 *	It returns a valid dst pointer on success, or a pointer encoded
 *	error code.
1010
 */
1011
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1012
					 const struct in6_addr *final_dst)
1013
{
1014 1015
	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
	int err;
1016

1017
	dst = ip6_sk_dst_check(sk, dst, fl6);
1018

1019
	err = ip6_dst_lookup_tail(sk, &dst, fl6);
1020 1021 1022
	if (err)
		return ERR_PTR(err);
	if (final_dst)
1023
		fl6->daddr = *final_dst;
1024

1025
	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1026
}
1027
EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1028

1029
static inline int ip6_ufo_append_data(struct sock *sk,
1030 1031 1032
			int getfrag(void *from, char *to, int offset, int len,
			int odd, struct sk_buff *skb),
			void *from, int length, int hh_len, int fragheaderlen,
1033 1034
			int transhdrlen, int mtu,unsigned int flags,
			struct rt6_info *rt)
1035 1036 1037

{
	struct sk_buff *skb;
1038
	struct frag_hdr fhdr;
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
	int err;

	/* There is support for UDP large send offload by network
	 * device, so create one single skb packet containing complete
	 * udp datagram
	 */
	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
		skb = sock_alloc_send_skb(sk,
			hh_len + fragheaderlen + transhdrlen + 20,
			(flags & MSG_DONTWAIT), &err);
		if (skb == NULL)
1050
			return err;
1051 1052 1053 1054 1055 1056 1057 1058

		/* reserve space for Hardware header */
		skb_reserve(skb, hh_len);

		/* create space for UDP/IP header */
		skb_put(skb,fragheaderlen + transhdrlen);

		/* initialize network header pointer */
1059
		skb_reset_network_header(skb);
1060 1061

		/* initialize protocol header pointer */
1062
		skb->transport_header = skb->network_header + fragheaderlen;
1063

1064
		skb->protocol = htons(ETH_P_IPV6);
1065 1066 1067
		skb->csum = 0;

		__skb_queue_tail(&sk->sk_write_queue, skb);
1068 1069
	} else if (skb_is_gso(skb)) {
		goto append;
1070 1071
	}

1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
	skb->ip_summed = CHECKSUM_PARTIAL;
	/* Specify the length of each IPv6 datagram fragment.
	 * It has to be a multiple of 8.
	 */
	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
				     sizeof(struct frag_hdr)) & ~7;
	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
	ipv6_select_ident(&fhdr, rt);
	skb_shinfo(skb)->ip6_frag_id = fhdr.identification;

append:
1083 1084
	return skb_append_datato_frags(sk, skb, getfrag, from,
				       (length - transhdrlen));
1085
}
Linus Torvalds's avatar
Linus Torvalds committed
1086

1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
					       gfp_t gfp)
{
	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}

static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
						gfp_t gfp)
{
	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}

1099
static void ip6_append_data_mtu(unsigned int *mtu,
1100 1101 1102
				int *maxfraglen,
				unsigned int fragheaderlen,
				struct sk_buff *skb,
1103
				struct rt6_info *rt,
1104
				unsigned int orig_mtu)
1105 1106 1107 1108
{
	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
		if (skb == NULL) {
			/* first fragment, reserve header_len */
1109
			*mtu = orig_mtu - rt->dst.header_len;
1110 1111 1112 1113 1114 1115

		} else {
			/*
			 * this fragment is not first, the headers
			 * space is regarded as data space.
			 */
1116
			*mtu = orig_mtu;
1117 1118 1119 1120 1121 1122
		}
		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
			      + fragheaderlen - sizeof(struct frag_hdr);
	}
}

1123 1124 1125
int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
	int offset, int len, int odd, struct sk_buff *skb),
	void *from, int length, int transhdrlen,
1126
	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1127
	struct rt6_info *rt, unsigned int flags, int dontfrag)
Linus Torvalds's avatar
Linus Torvalds committed
1128 1129 1130
{
	struct inet_sock *inet = inet_sk(sk);
	struct ipv6_pinfo *np = inet6_sk(sk);
1131
	struct inet_cork *cork;
1132
	struct sk_buff *skb, *skb_prev = NULL;
1133
	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
Linus Torvalds's avatar
Linus Torvalds committed
1134
	int exthdrlen;
1135
	int dst_exthdrlen;
Linus Torvalds's avatar
Linus Torvalds committed
1136 1137 1138 1139
	int hh_len;
	int copy;
	int err;
	int offset = 0;
1140
	__u8 tx_flags = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1141 1142 1143

	if (flags&MSG_PROBE)
		return 0;
1144
	cork = &inet->cork.base;
Linus Torvalds's avatar
Linus Torvalds committed
1145 1146 1147 1148 1149
	if (skb_queue_empty(&sk->sk_write_queue)) {
		/*
		 * setup for corking
		 */
		if (opt) {
1150
			if (WARN_ON(np->cork.opt))
Linus Torvalds's avatar
Linus Torvalds committed
1151
				return -EINVAL;
1152

1153
			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);