ip_output.c 39.5 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		The Internet Protocol (IP) output module.
 *
8
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Donald Becker, <becker@super.org>
 *		Alan Cox, <Alan.Cox@linux.org>
 *		Richard Underwood
 *		Stefan Becker, <stefanb@yello.ping.de>
 *		Jorge Cwik, <jorge@laser.satlink.net>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *		Hirokazu Takahashi, <taka@valinux.co.jp>
 *
 *	See ip_input.c for original log
 *
 *	Fixes:
 *		Alan Cox	:	Missing nonblock feature in ip_build_xmit.
 *		Mike Kilburn	:	htons() missing in ip_build_xmit.
23
 *		Bradford Johnson:	Fix faulty handling of some frames when
Linus Torvalds's avatar
Linus Torvalds committed
24
25
26
27
28
29
30
31
32
33
 *					no route is found.
 *		Alexander Demenshin:	Missing sk/skb free in ip_queue_xmit
 *					(in case if packet not accepted by
 *					output firewall rules)
 *		Mike McLagan	:	Routing by source
 *		Alexey Kuznetsov:	use new route cache
 *		Andi Kleen:		Fix broken PMTU recovery and remove
 *					some redundant tests.
 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
 *		Andi Kleen	: 	Replace ip_reply with ip_send_reply.
34
35
36
 *		Andi Kleen	:	Split fast and slow ip_build_xmit path
 *					for decreased register pressure on x86
 *					and more readibility.
Linus Torvalds's avatar
Linus Torvalds committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
 *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
 *					silently drop skb instead of failing with -EPERM.
 *		Detlev Wengorz	:	Copy protocol for fragments.
 *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
 *					datagrams.
 *		Hirokazu Takahashi:	sendfile() on UDP works now.
 */

#include <asm/uaccess.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/errno.h>
52
#include <linux/highmem.h>
53
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/proc_fs.h>
#include <linux/stat.h>
#include <linux/init.h>

#include <net/snmp.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
69
#include <net/xfrm.h>
Linus Torvalds's avatar
Linus Torvalds committed
70
71
72
73
74
75
76
77
78
79
80
#include <linux/skbuff.h>
#include <net/sock.h>
#include <net/arp.h>
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
#include <linux/mroute.h>
#include <linux/netlink.h>
81
#include <linux/tcp.h>
Linus Torvalds's avatar
Linus Torvalds committed
82

83
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
84
EXPORT_SYMBOL(sysctl_ip_default_ttl);
Linus Torvalds's avatar
Linus Torvalds committed
85

86
87
88
89
static int
ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
	    unsigned int mtu,
	    int (*output)(struct net *, struct sock *, struct sk_buff *));
90

Linus Torvalds's avatar
Linus Torvalds committed
91
/* Generate a checksum for an outgoing IP datagram. */
92
void ip_send_check(struct iphdr *iph)
Linus Torvalds's avatar
Linus Torvalds committed
93
94
95
96
{
	iph->check = 0;
	iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
97
EXPORT_SYMBOL(ip_send_check);
Linus Torvalds's avatar
Linus Torvalds committed
98

99
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Herbert Xu's avatar
Herbert Xu committed
100
101
102
103
104
{
	struct iphdr *iph = ip_hdr(skb);

	iph->tot_len = htons(skb->len);
	ip_send_check(iph);
105
106
	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
		       net, sk, skb, NULL, skb_dst(skb)->dev,
107
		       dst_output);
108
109
}

110
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
Herbert Xu's avatar
Herbert Xu committed
111
112
113
{
	int err;

114
	err = __ip_local_out(net, sk, skb);
Herbert Xu's avatar
Herbert Xu committed
115
	if (likely(err == 1))
116
		err = dst_output(net, sk, skb);
Herbert Xu's avatar
Herbert Xu committed
117
118
119

	return err;
}
120
EXPORT_SYMBOL_GPL(ip_local_out);
Herbert Xu's avatar
Herbert Xu committed
121

Linus Torvalds's avatar
Linus Torvalds committed
122
123
124
125
126
static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
	int ttl = inet->uc_ttl;

	if (ttl < 0)
127
		ttl = ip4_dst_hoplimit(dst);
Linus Torvalds's avatar
Linus Torvalds committed
128
129
130
	return ttl;
}

131
/*
Linus Torvalds's avatar
Linus Torvalds committed
132
133
134
 *		Add an ip header to a skbuff and send it out.
 *
 */
135
int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
136
			  __be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
Linus Torvalds's avatar
Linus Torvalds committed
137
138
{
	struct inet_sock *inet = inet_sk(sk);
Eric Dumazet's avatar
Eric Dumazet committed
139
	struct rtable *rt = skb_rtable(skb);
140
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
141
142
143
	struct iphdr *iph;

	/* Build the IP header. */
144
	skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
145
	skb_reset_network_header(skb);
146
	iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
147
148
149
	iph->version  = 4;
	iph->ihl      = 5;
	iph->tos      = inet->tos;
150
	iph->ttl      = ip_select_ttl(inet, &rt->dst);
151
152
	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
	iph->saddr    = saddr;
Linus Torvalds's avatar
Linus Torvalds committed
153
	iph->protocol = sk->sk_protocol;
154
155
156
157
158
	if (ip_dont_fragment(sk, &rt->dst)) {
		iph->frag_off = htons(IP_DF);
		iph->id = 0;
	} else {
		iph->frag_off = 0;
159
		__ip_select_ident(net, iph, 1);
160
	}
Linus Torvalds's avatar
Linus Torvalds committed
161

162
163
164
	if (opt && opt->opt.optlen) {
		iph->ihl += opt->opt.optlen>>2;
		ip_options_build(skb, &opt->opt, daddr, rt, 0);
Linus Torvalds's avatar
Linus Torvalds committed
165
166
167
	}

	skb->priority = sk->sk_priority;
168
	skb->mark = sk->sk_mark;
Linus Torvalds's avatar
Linus Torvalds committed
169
170

	/* Send it out. */
171
	return ip_local_out(net, skb->sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
172
}
173
174
EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);

175
static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
176
{
Eric Dumazet's avatar
Eric Dumazet committed
177
	struct dst_entry *dst = skb_dst(skb);
178
	struct rtable *rt = (struct rtable *)dst;
Linus Torvalds's avatar
Linus Torvalds committed
179
	struct net_device *dev = dst->dev;
180
	unsigned int hh_len = LL_RESERVED_SPACE(dev);
181
	struct neighbour *neigh;
182
	u32 nexthop;
Linus Torvalds's avatar
Linus Torvalds committed
183

184
	if (rt->rt_type == RTN_MULTICAST) {
185
		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
186
	} else if (rt->rt_type == RTN_BROADCAST)
187
		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
188

Linus Torvalds's avatar
Linus Torvalds committed
189
	/* Be paranoid, rather than too clever. */
190
	if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
Linus Torvalds's avatar
Linus Torvalds committed
191
192
193
		struct sk_buff *skb2;

		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
194
		if (!skb2) {
Linus Torvalds's avatar
Linus Torvalds committed
195
196
197
198
199
			kfree_skb(skb);
			return -ENOMEM;
		}
		if (skb->sk)
			skb_set_owner_w(skb2, skb->sk);
200
		consume_skb(skb);
Linus Torvalds's avatar
Linus Torvalds committed
201
202
203
		skb = skb2;
	}

204
	rcu_read_lock_bh();
205
	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
206
207
208
	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
	if (unlikely(!neigh))
		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
209
	if (!IS_ERR(neigh)) {
210
		int res = dst_neigh_output(dst, neigh, skb);
211

212
		rcu_read_unlock_bh();
213
214
		return res;
	}
215
	rcu_read_unlock_bh();
216

217
218
	net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
			    __func__);
Linus Torvalds's avatar
Linus Torvalds committed
219
220
221
222
	kfree_skb(skb);
	return -EINVAL;
}

223
224
static int ip_finish_output_gso(struct net *net, struct sock *sk,
				struct sk_buff *skb, unsigned int mtu)
225
226
227
228
229
230
231
{
	netdev_features_t features;
	struct sk_buff *segs;
	int ret = 0;

	/* common case: locally created skb or seglen is <= mtu */
	if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
232
	      skb_gso_network_seglen(skb) <= mtu)
233
		return ip_finish_output2(net, sk, skb);
234
235
236
237
238
239
240
241
242
243

	/* Slowpath -  GSO segment length is exceeding the dst MTU.
	 *
	 * This can happen in two cases:
	 * 1) TCP GRO packet, DF bit not set
	 * 2) skb arrived via virtio-net, we thus get TSO/GSO skbs directly
	 * from host network stack.
	 */
	features = netif_skb_features(skb);
	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
244
	if (IS_ERR_OR_NULL(segs)) {
245
246
247
248
249
250
251
252
253
254
255
		kfree_skb(skb);
		return -ENOMEM;
	}

	consume_skb(skb);

	do {
		struct sk_buff *nskb = segs->next;
		int err;

		segs->next = NULL;
256
		err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
257
258
259
260
261
262
263
264
265

		if (err && ret == 0)
			ret = err;
		segs = nskb;
	} while (segs);

	return ret;
}

266
static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
267
{
268
269
	unsigned int mtu;

270
271
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
	/* Policy lookup after SNAT yielded a new policy */
272
	if (skb_dst(skb)->xfrm) {
273
		IPCB(skb)->flags |= IPSKB_REROUTED;
274
		return dst_output(net, sk, skb);
275
	}
276
#endif
277
	mtu = ip_skb_dst_mtu(skb);
278
	if (skb_is_gso(skb))
279
		return ip_finish_output_gso(net, sk, skb, mtu);
280

281
	if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
282
		return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
283

284
	return ip_finish_output2(net, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
285
286
}

287
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
288
{
Eric Dumazet's avatar
Eric Dumazet committed
289
	struct rtable *rt = skb_rtable(skb);
290
	struct net_device *dev = rt->dst.dev;
Linus Torvalds's avatar
Linus Torvalds committed
291
292
293
294

	/*
	 *	If the indicated interface is up and running, send the packet.
	 */
295
	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
Linus Torvalds's avatar
Linus Torvalds committed
296
297
298
299
300
301
302
303
304

	skb->dev = dev;
	skb->protocol = htons(ETH_P_IP);

	/*
	 *	Multicasts are looped back for other local users
	 */

	if (rt->rt_flags&RTCF_MULTICAST) {
305
		if (sk_mc_loop(sk)
Linus Torvalds's avatar
Linus Torvalds committed
306
307
308
309
310
311
312
313
314
#ifdef CONFIG_IP_MROUTE
		/* Small optimization: do not loopback not local frames,
		   which returned after forwarding; they will be  dropped
		   by ip_mr_input in any case.
		   Note, that local frames are looped back to be delivered
		   to local recipients.

		   This check is duplicated in ip_mr_input at the moment.
		 */
315
316
317
		    &&
		    ((rt->rt_flags & RTCF_LOCAL) ||
		     !(IPCB(skb)->flags & IPSKB_FORWARDED))
Linus Torvalds's avatar
Linus Torvalds committed
318
#endif
319
		   ) {
Linus Torvalds's avatar
Linus Torvalds committed
320
321
			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
			if (newskb)
322
				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
323
					net, sk, newskb, NULL, newskb->dev,
324
					dev_loopback_xmit);
Linus Torvalds's avatar
Linus Torvalds committed
325
326
327
328
		}

		/* Multicasts with ttl 0 must not go beyond the host */

329
		if (ip_hdr(skb)->ttl == 0) {
Linus Torvalds's avatar
Linus Torvalds committed
330
331
332
333
334
335
336
337
			kfree_skb(skb);
			return 0;
		}
	}

	if (rt->rt_flags&RTCF_BROADCAST) {
		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
		if (newskb)
338
339
340
			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
				net, sk, newskb, NULL, newskb->dev,
				dev_loopback_xmit);
Linus Torvalds's avatar
Linus Torvalds committed
341
342
	}

343
344
345
	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
			    net, sk, skb, NULL, skb->dev,
			    ip_finish_output,
346
			    !(IPCB(skb)->flags & IPSKB_REROUTED));
Linus Torvalds's avatar
Linus Torvalds committed
347
348
}

349
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
350
{
Eric Dumazet's avatar
Eric Dumazet committed
351
	struct net_device *dev = skb_dst(skb)->dev;
352

353
	IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
Linus Torvalds's avatar
Linus Torvalds committed
354

355
356
357
	skb->dev = dev;
	skb->protocol = htons(ETH_P_IP);

358
359
	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
			    net, sk, skb, NULL, dev,
360
			    ip_finish_output,
361
			    !(IPCB(skb)->flags & IPSKB_REROUTED));
Linus Torvalds's avatar
Linus Torvalds committed
362
363
}

364
365
366
367
368
369
370
371
372
373
374
375
376
377
/*
 * copy saddr and daddr, possibly using 64bit load/stores
 * Equivalent to :
 *   iph->saddr = fl4->saddr;
 *   iph->daddr = fl4->daddr;
 */
static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
{
	BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
		     offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
	memcpy(&iph->saddr, &fl4->saddr,
	       sizeof(fl4->saddr) + sizeof(fl4->daddr));
}

378
379
/* Note: skb->sk can be different from sk, in case of tunnels */
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
Linus Torvalds's avatar
Linus Torvalds committed
380
381
{
	struct inet_sock *inet = inet_sk(sk);
382
	struct net *net = sock_net(sk);
383
	struct ip_options_rcu *inet_opt;
384
	struct flowi4 *fl4;
Linus Torvalds's avatar
Linus Torvalds committed
385
386
	struct rtable *rt;
	struct iphdr *iph;
387
	int res;
Linus Torvalds's avatar
Linus Torvalds committed
388
389
390
391

	/* Skip all of this if the packet is already routed,
	 * f.e. by something like SCTP.
	 */
392
	rcu_read_lock();
393
	inet_opt = rcu_dereference(inet->inet_opt);
394
	fl4 = &fl->u.ip4;
Eric Dumazet's avatar
Eric Dumazet committed
395
	rt = skb_rtable(skb);
396
	if (rt)
Linus Torvalds's avatar
Linus Torvalds committed
397
398
399
400
		goto packet_routed;

	/* Make sure we can route this packet. */
	rt = (struct rtable *)__sk_dst_check(sk, 0);
401
	if (!rt) {
402
		__be32 daddr;
Linus Torvalds's avatar
Linus Torvalds committed
403
404

		/* Use correct destination address if we have options. */
405
		daddr = inet->inet_daddr;
406
407
		if (inet_opt && inet_opt->opt.srr)
			daddr = inet_opt->opt.faddr;
Linus Torvalds's avatar
Linus Torvalds committed
408

409
410
411
412
		/* If this fails, retransmit mechanism of transport layer will
		 * keep trying until route appears or the connection times
		 * itself out.
		 */
413
		rt = ip_route_output_ports(net, fl4, sk,
414
415
416
417
418
419
420
421
					   daddr, inet->inet_saddr,
					   inet->inet_dport,
					   inet->inet_sport,
					   sk->sk_protocol,
					   RT_CONN_FLAGS(sk),
					   sk->sk_bound_dev_if);
		if (IS_ERR(rt))
			goto no_route;
422
		sk_setup_caps(sk, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
423
	}
424
	skb_dst_set_noref(skb, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
425
426

packet_routed:
427
	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
Linus Torvalds's avatar
Linus Torvalds committed
428
429
430
		goto no_route;

	/* OK, we know where to send it, allocate and build IP header. */
431
	skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
432
	skb_reset_network_header(skb);
433
	iph = ip_hdr(skb);
434
	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
WANG Cong's avatar
WANG Cong committed
435
	if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
Linus Torvalds's avatar
Linus Torvalds committed
436
437
438
		iph->frag_off = htons(IP_DF);
	else
		iph->frag_off = 0;
439
	iph->ttl      = ip_select_ttl(inet, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
440
	iph->protocol = sk->sk_protocol;
441
442
	ip_copy_addrs(iph, fl4);

Linus Torvalds's avatar
Linus Torvalds committed
443
444
	/* Transport layer set skb->h.foo itself. */

445
446
447
	if (inet_opt && inet_opt->opt.optlen) {
		iph->ihl += inet_opt->opt.optlen >> 2;
		ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
Linus Torvalds's avatar
Linus Torvalds committed
448
449
	}

450
	ip_select_ident_segs(net, skb, sk,
451
			     skb_shinfo(skb)->gso_segs ?: 1);
Linus Torvalds's avatar
Linus Torvalds committed
452

453
	/* TODO : should we use skb->sk here instead of sk ? */
Linus Torvalds's avatar
Linus Torvalds committed
454
	skb->priority = sk->sk_priority;
455
	skb->mark = sk->sk_mark;
Linus Torvalds's avatar
Linus Torvalds committed
456

457
	res = ip_local_out(net, sk, skb);
458
459
	rcu_read_unlock();
	return res;
Linus Torvalds's avatar
Linus Torvalds committed
460
461

no_route:
462
	rcu_read_unlock();
463
	IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds's avatar
Linus Torvalds committed
464
465
466
	kfree_skb(skb);
	return -EHOSTUNREACH;
}
467
EXPORT_SYMBOL(ip_queue_xmit);
Linus Torvalds's avatar
Linus Torvalds committed
468
469
470
471
472
473

static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
{
	to->pkt_type = from->pkt_type;
	to->priority = from->priority;
	to->protocol = from->protocol;
Eric Dumazet's avatar
Eric Dumazet committed
474
	skb_dst_drop(to);
475
	skb_dst_copy(to, from);
Linus Torvalds's avatar
Linus Torvalds committed
476
	to->dev = from->dev;
477
	to->mark = from->mark;
Linus Torvalds's avatar
Linus Torvalds committed
478
479
480
481
482
483
484

	/* Copy the flags to each fragment. */
	IPCB(to)->flags = IPCB(from)->flags;

#ifdef CONFIG_NET_SCHED
	to->tc_index = from->tc_index;
#endif
485
	nf_copy(to, from);
486
487
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
	to->ipvs_property = from->ipvs_property;
Linus Torvalds's avatar
Linus Torvalds committed
488
#endif
489
	skb_copy_secmark(to, from);
Linus Torvalds's avatar
Linus Torvalds committed
490
491
}

492
static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
493
		       unsigned int mtu,
494
		       int (*output)(struct net *, struct sock *, struct sk_buff *))
495
496
497
{
	struct iphdr *iph = ip_hdr(skb);

498
	if ((iph->frag_off & htons(IP_DF)) == 0)
499
		return ip_do_fragment(net, sk, skb, output);
500
501

	if (unlikely(!skb->ignore_df ||
502
503
		     (IPCB(skb)->frag_max_size &&
		      IPCB(skb)->frag_max_size > mtu))) {
504
		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
505
506
507
508
509
510
		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
			  htonl(mtu));
		kfree_skb(skb);
		return -EMSGSIZE;
	}

511
	return ip_do_fragment(net, sk, skb, output);
512
513
}

Linus Torvalds's avatar
Linus Torvalds committed
514
515
516
517
518
519
520
/*
 *	This IP datagram is too large to be sent in one piece.  Break it up into
 *	smaller pieces (each of size equal to IP header plus
 *	a block of the data of the original IP data part) that will yet fit in a
 *	single device frame, and queue such a frame for sending.
 */

521
522
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
		   int (*output)(struct net *, struct sock *, struct sk_buff *))
Linus Torvalds's avatar
Linus Torvalds committed
523
524
525
526
527
{
	struct iphdr *iph;
	int ptr;
	struct net_device *dev;
	struct sk_buff *skb2;
528
	unsigned int mtu, hlen, left, len, ll_rs;
Linus Torvalds's avatar
Linus Torvalds committed
529
	int offset;
530
	__be16 not_last_frag;
Eric Dumazet's avatar
Eric Dumazet committed
531
	struct rtable *rt = skb_rtable(skb);
Linus Torvalds's avatar
Linus Torvalds committed
532
533
	int err = 0;

534
	dev = rt->dst.dev;
Linus Torvalds's avatar
Linus Torvalds committed
535

536
537
538
539
540
	/* for offloaded checksums cleanup checksum before fragmentation */
	if (skb->ip_summed == CHECKSUM_PARTIAL &&
	    (err = skb_checksum_help(skb)))
		goto fail;

Linus Torvalds's avatar
Linus Torvalds committed
541
542
543
544
	/*
	 *	Point into the IP datagram header.
	 */

545
	iph = ip_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
546

547
	mtu = ip_skb_dst_mtu(skb);
548
549
	if (IPCB(skb)->frag_max_size && IPCB(skb)->frag_max_size < mtu)
		mtu = IPCB(skb)->frag_max_size;
Linus Torvalds's avatar
Linus Torvalds committed
550
551
552
553
554
555

	/*
	 *	Setup starting values.
	 */

	hlen = iph->ihl * 4;
556
	mtu = mtu - hlen;	/* Size of data space */
Herbert Xu's avatar
Herbert Xu committed
557
	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
Linus Torvalds's avatar
Linus Torvalds committed
558
559
560
561
562
563
564
565

	/* When frag_list is given, use it. First, check its validity:
	 * some transformers could create wrong frag_list or break existing
	 * one, it is not prohibited. In this case fall back to copying.
	 *
	 * LATER: this step can be merged to real generation of fragments,
	 * we can switch to copy when see the first bad fragment.
	 */
566
	if (skb_has_frag_list(skb)) {
567
		struct sk_buff *frag, *frag2;
Linus Torvalds's avatar
Linus Torvalds committed
568
569
570
571
		int first_len = skb_pagelen(skb);

		if (first_len - hlen > mtu ||
		    ((first_len - hlen) & 7) ||
572
		    ip_is_fragment(iph) ||
Linus Torvalds's avatar
Linus Torvalds committed
573
574
575
		    skb_cloned(skb))
			goto slow_path;

576
		skb_walk_frags(skb, frag) {
Linus Torvalds's avatar
Linus Torvalds committed
577
578
579
580
			/* Correct geometry. */
			if (frag->len > mtu ||
			    ((frag->len & 7) && frag->next) ||
			    skb_headroom(frag) < hlen)
581
				goto slow_path_clean;
Linus Torvalds's avatar
Linus Torvalds committed
582
583
584

			/* Partially cloned skb? */
			if (skb_shared(frag))
585
				goto slow_path_clean;
586
587
588
589
590
591

			BUG_ON(frag->sk);
			if (skb->sk) {
				frag->sk = skb->sk;
				frag->destructor = sock_wfree;
			}
592
			skb->truesize -= frag->truesize;
Linus Torvalds's avatar
Linus Torvalds committed
593
594
595
596
597
598
599
		}

		/* Everything is OK. Generate! */

		err = 0;
		offset = 0;
		frag = skb_shinfo(skb)->frag_list;
600
		skb_frag_list_init(skb);
Linus Torvalds's avatar
Linus Torvalds committed
601
602
603
604
605
606
607
608
609
610
611
		skb->data_len = first_len - skb_headlen(skb);
		skb->len = first_len;
		iph->tot_len = htons(first_len);
		iph->frag_off = htons(IP_MF);
		ip_send_check(iph);

		for (;;) {
			/* Prepare header of the next frame,
			 * before previous one went down. */
			if (frag) {
				frag->ip_summed = CHECKSUM_NONE;
612
				skb_reset_transport_header(frag);
613
614
				__skb_push(frag, hlen);
				skb_reset_network_header(frag);
615
				memcpy(skb_network_header(frag), iph, hlen);
616
				iph = ip_hdr(frag);
Linus Torvalds's avatar
Linus Torvalds committed
617
618
619
620
621
622
				iph->tot_len = htons(frag->len);
				ip_copy_metadata(frag, skb);
				if (offset == 0)
					ip_options_fragment(frag);
				offset += skb->len - hlen;
				iph->frag_off = htons(offset>>3);
623
				if (frag->next)
Linus Torvalds's avatar
Linus Torvalds committed
624
625
626
627
628
					iph->frag_off |= htons(IP_MF);
				/* Ready, complete checksum */
				ip_send_check(iph);
			}

629
			err = output(net, sk, skb);
Linus Torvalds's avatar
Linus Torvalds committed
630

631
			if (!err)
632
				IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
Linus Torvalds's avatar
Linus Torvalds committed
633
634
635
636
637
638
639
640
641
			if (err || !frag)
				break;

			skb = frag;
			frag = skb->next;
			skb->next = NULL;
		}

		if (err == 0) {
642
			IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
Linus Torvalds's avatar
Linus Torvalds committed
643
644
645
646
647
648
649
650
			return 0;
		}

		while (frag) {
			skb = frag->next;
			kfree_skb(frag);
			frag = skb;
		}
651
		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
652
		return err;
653
654
655
656
657
658
659
660
661

slow_path_clean:
		skb_walk_frags(skb, frag2) {
			if (frag2 == frag)
				break;
			frag2->sk = NULL;
			frag2->destructor = NULL;
			skb->truesize += frag2->truesize;
		}
Linus Torvalds's avatar
Linus Torvalds committed
662
663
664
	}

slow_path:
665
	iph = ip_hdr(skb);
666

Linus Torvalds's avatar
Linus Torvalds committed
667
	left = skb->len - hlen;		/* Space per frame */
668
	ptr = hlen;		/* Where to start from */
Linus Torvalds's avatar
Linus Torvalds committed
669

670
	ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
671

Linus Torvalds's avatar
Linus Torvalds committed
672
673
674
675
676
677
678
679
680
681
682
	/*
	 *	Fragment the datagram.
	 */

	offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
	not_last_frag = iph->frag_off & htons(IP_MF);

	/*
	 *	Keep copying data until we run out.
	 */

Stephen Hemminger's avatar
Stephen Hemminger committed
683
	while (left > 0) {
Linus Torvalds's avatar
Linus Torvalds committed
684
685
686
687
		len = left;
		/* IF: it doesn't fit, use 'mtu' - the data space left */
		if (len > mtu)
			len = mtu;
Lucas De Marchi's avatar
Lucas De Marchi committed
688
		/* IF: we are not sending up to and including the packet end
Linus Torvalds's avatar
Linus Torvalds committed
689
690
691
692
693
		   then align the next start on an eight byte boundary */
		if (len < left)	{
			len &= ~7;
		}

694
695
696
		/* Allocate buffer */
		skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
		if (!skb2) {
Linus Torvalds's avatar
Linus Torvalds committed
697
698
699
700
701
702
703
704
705
706
707
			err = -ENOMEM;
			goto fail;
		}

		/*
		 *	Set up data on packet
		 */

		ip_copy_metadata(skb2, skb);
		skb_reserve(skb2, ll_rs);
		skb_put(skb2, len + hlen);
708
		skb_reset_network_header(skb2);
709
		skb2->transport_header = skb2->network_header + hlen;
Linus Torvalds's avatar
Linus Torvalds committed
710
711
712
713
714
715
716
717
718
719
720
721
722

		/*
		 *	Charge the memory for the fragment to any owner
		 *	it might possess
		 */

		if (skb->sk)
			skb_set_owner_w(skb2, skb->sk);

		/*
		 *	Copy the packet header into the new buffer.
		 */

723
		skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
Linus Torvalds's avatar
Linus Torvalds committed
724
725
726
727

		/*
		 *	Copy a block of the IP datagram.
		 */
728
		if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
Linus Torvalds's avatar
Linus Torvalds committed
729
730
731
732
733
734
			BUG();
		left -= len;

		/*
		 *	Fill in the new header fields.
		 */
735
		iph = ip_hdr(skb2);
Linus Torvalds's avatar
Linus Torvalds committed
736
737
		iph->frag_off = htons((offset >> 3));

738
739
740
		if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
			iph->frag_off |= htons(IP_DF);

Linus Torvalds's avatar
Linus Torvalds committed
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
		/* ANK: dirty, but effective trick. Upgrade options only if
		 * the segment to be fragmented was THE FIRST (otherwise,
		 * options are already fixed) and make it ONCE
		 * on the initial skb, so that all the following fragments
		 * will inherit fixed options.
		 */
		if (offset == 0)
			ip_options_fragment(skb);

		/*
		 *	Added AC : If we are fragmenting a fragment that's not the
		 *		   last fragment then keep MF on each bit
		 */
		if (left > 0 || not_last_frag)
			iph->frag_off |= htons(IP_MF);
		ptr += len;
		offset += len;

		/*
		 *	Put this fragment into the sending queue.
		 */
		iph->tot_len = htons(len + hlen);

		ip_send_check(iph);

766
		err = output(net, sk, skb2);
Linus Torvalds's avatar
Linus Torvalds committed
767
768
		if (err)
			goto fail;
769

770
		IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
Linus Torvalds's avatar
Linus Torvalds committed
771
	}
772
	consume_skb(skb);
773
	IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
Linus Torvalds's avatar
Linus Torvalds committed
774
775
776
	return err;

fail:
777
	kfree_skb(skb);
778
	IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
Linus Torvalds's avatar
Linus Torvalds committed
779
780
	return err;
}
781
EXPORT_SYMBOL(ip_do_fragment);
782

Linus Torvalds's avatar
Linus Torvalds committed
783
784
785
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
786
	struct msghdr *msg = from;
Linus Torvalds's avatar
Linus Torvalds committed
787

788
	if (skb->ip_summed == CHECKSUM_PARTIAL) {
789
		if (copy_from_iter(to, len, &msg->msg_iter) != len)
Linus Torvalds's avatar
Linus Torvalds committed
790
791
			return -EFAULT;
	} else {
792
		__wsum csum = 0;
793
		if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len)
Linus Torvalds's avatar
Linus Torvalds committed
794
795
796
797
798
			return -EFAULT;
		skb->csum = csum_block_add(skb->csum, csum, odd);
	}
	return 0;
}
799
EXPORT_SYMBOL(ip_generic_getfrag);
Linus Torvalds's avatar
Linus Torvalds committed
800

801
static inline __wsum
Linus Torvalds's avatar
Linus Torvalds committed
802
803
804
csum_page(struct page *page, int offset, int copy)
{
	char *kaddr;
805
	__wsum csum;
Linus Torvalds's avatar
Linus Torvalds committed
806
807
808
809
810
811
	kaddr = kmap(page);
	csum = csum_partial(kaddr + offset, copy, 0);
	kunmap(page);
	return csum;
}

812
static inline int ip_ufo_append_data(struct sock *sk,
813
			struct sk_buff_head *queue,
814
815
816
			int getfrag(void *from, char *to, int offset, int len,
			       int odd, struct sk_buff *skb),
			void *from, int length, int hh_len, int fragheaderlen,
817
			int transhdrlen, int maxfraglen, unsigned int flags)
818
819
820
821
822
823
824
825
{
	struct sk_buff *skb;
	int err;

	/* There is support for UDP fragmentation offload by network
	 * device, so create one single skb packet containing complete
	 * udp datagram
	 */
826
827
	skb = skb_peek_tail(queue);
	if (!skb) {
828
829
830
831
		skb = sock_alloc_send_skb(sk,
			hh_len + fragheaderlen + transhdrlen + 20,
			(flags & MSG_DONTWAIT), &err);

832
		if (!skb)
833
834
835
836
837
838
			return err;

		/* reserve space for Hardware header */
		skb_reserve(skb, hh_len);

		/* create space for UDP/IP header */
839
		skb_put(skb, fragheaderlen + transhdrlen);
840
841

		/* initialize network header pointer */
842
		skb_reset_network_header(skb);
843
844

		/* initialize protocol header pointer */
845
		skb->transport_header = skb->network_header + fragheaderlen;
846
847
848

		skb->csum = 0;

849
		__skb_queue_tail(queue, skb);
850
851
	} else if (skb_is_gso(skb)) {
		goto append;
852
	}
853

854
855
856
857
858
859
	skb->ip_summed = CHECKSUM_PARTIAL;
	/* specify the length of each IP datagram fragment */
	skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
	skb_shinfo(skb)->gso_type = SKB_GSO_UDP;

append:
860
861
	return skb_append_datato_frags(sk, skb, getfrag, from,
				       (length - transhdrlen));
862
863
}

864
865
866
static int __ip_append_data(struct sock *sk,
			    struct flowi4 *fl4,
			    struct sk_buff_head *queue,
867
			    struct inet_cork *cork,
868
			    struct page_frag *pfrag,
869
870
871
872
			    int getfrag(void *from, char *to, int offset,
					int len, int odd, struct sk_buff *skb),
			    void *from, int length, int transhdrlen,
			    unsigned int flags)
Linus Torvalds's avatar
Linus Torvalds committed
873
874
875
876
{
	struct inet_sock *inet = inet_sk(sk);
	struct sk_buff *skb;

877
	struct ip_options *opt = cork->opt;
Linus Torvalds's avatar
Linus Torvalds committed
878
879
880
881
882
883
	int hh_len;
	int exthdrlen;
	int mtu;
	int copy;
	int err;
	int offset = 0;
884
	unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
Linus Torvalds's avatar
Linus Torvalds committed
885
	int csummode = CHECKSUM_NONE;
886
	struct rtable *rt = (struct rtable *)cork->dst;
887
	u32 tskey = 0;
Linus Torvalds's avatar
Linus Torvalds committed
888

889
890
891
	skb = skb_peek_tail(queue);

	exthdrlen = !skb ? rt->dst.header_len : 0;
892
	mtu = cork->fragsize;
893
894
895
	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
		tskey = sk->sk_tskey++;
Linus Torvalds's avatar
Linus Torvalds committed
896

897
	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
Linus Torvalds's avatar
Linus Torvalds committed
898
899
900

	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
WANG Cong's avatar
WANG Cong committed
901
	maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
Linus Torvalds's avatar
Linus Torvalds committed
902

903
	if (cork->length + length > maxnonfragsize - fragheaderlen) {
904
		ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
905
			       mtu - (opt ? opt->optlen : 0));
Linus Torvalds's avatar
Linus Torvalds committed
906
907
908
909
910
911
912
913
914
		return -EMSGSIZE;
	}

	/*
	 * transhdrlen > 0 means that this is the first fragment and we wish
	 * it won't be fragmented in the future.
	 */
	if (transhdrlen &&
	    length + fragheaderlen <= mtu &&
915
	    rt->dst.dev->features & NETIF_F_V4_CSUM &&
916
	    !(flags & MSG_MORE) &&
Linus Torvalds's avatar
Linus Torvalds committed
917
	    !exthdrlen)
918
		csummode = CHECKSUM_PARTIAL;
Linus Torvalds's avatar
Linus Torvalds committed
919

920
	cork->length += length;
921
	if (((length > mtu) || (skb && skb_is_gso(skb))) &&
922
	    (sk->sk_protocol == IPPROTO_UDP) &&
923
	    (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
924
	    (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
925
926
		err = ip_ufo_append_data(sk, queue, getfrag, from, length,
					 hh_len, fragheaderlen, transhdrlen,
927
					 maxfraglen, flags);
928
		if (err)
929
930
931
			goto error;
		return 0;
	}
Linus Torvalds's avatar
Linus Torvalds committed
932
933
934
935
936
937
938
939

	/* So, what's going on in the loop below?
	 *
	 * We use calculated fragment length to generate chained skb,
	 * each of segments is IP fragment ready for sending to network after
	 * adding appropriate IP header.
	 */

940
	if (!skb)
Linus Torvalds's avatar
Linus Torvalds committed
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
		goto alloc_new_skb;

	while (length > 0) {
		/* Check if the remaining data fits into current packet. */
		copy = mtu - skb->len;
		if (copy < length)
			copy = maxfraglen - skb->len;
		if (copy <= 0) {
			char *data;
			unsigned int datalen;
			unsigned int fraglen;
			unsigned int fraggap;
			unsigned int alloclen;
			struct sk_buff *skb_prev;
alloc_new_skb:
			skb_prev = skb;
			if (skb_prev)
				fraggap = skb_prev->len - maxfraglen;
			else
				fraggap = 0;

			/*
			 * If remaining data exceeds the mtu,
			 * we know we need more fragment(s).
			 */
			datalen = length + fraggap;
			if (datalen > mtu - fragheaderlen)
				datalen = maxfraglen - fragheaderlen;
			fraglen = datalen + fragheaderlen;

971
			if ((flags & MSG_MORE) &&
972
			    !(rt->dst.dev->features&NETIF_F_SG))
Linus Torvalds's avatar
Linus Torvalds committed
973
974
				alloclen = mtu;
			else
975
				alloclen = fraglen;
Linus Torvalds's avatar
Linus Torvalds committed
976

977
978
			alloclen += exthdrlen;

Linus Torvalds's avatar
Linus Torvalds committed
979
980
981
982
983
			/* The last fragment gets additional space at tail.
			 * Note, with MSG_MORE we overallocate on fragments,
			 * because we have no idea what fragment will be
			 * the last.
			 */
984
			if (datalen == length + fraggap)
985
				alloclen += rt->dst.trailer_len;
986

Linus Torvalds's avatar
Linus Torvalds committed
987
			if (transhdrlen) {
988
				skb = sock_alloc_send_skb(sk,
Linus Torvalds's avatar
Linus Torvalds committed
989
990
991
992
993
994
						alloclen + hh_len + 15,
						(flags & MSG_DONTWAIT), &err);
			} else {
				skb = NULL;
				if (atomic_read(&sk->sk_wmem_alloc) <=
				    2 * sk->sk_sndbuf)
995
					skb = sock_wmalloc(sk,
Linus Torvalds's avatar
Linus Torvalds committed
996
997
							   alloclen + hh_len + 15, 1,
							   sk->sk_allocation);
998
				if (unlikely(!skb))
Linus Torvalds's avatar
Linus Torvalds committed
999
1000
					err = -ENOBUFS;
			}
1001
			if (!skb)
Linus Torvalds's avatar
Linus Torvalds committed
1002
1003
1004
1005
1006
1007
1008
1009
				goto error;

			/*
			 *	Fill in the control structures
			 */
			skb->ip_summed = csummode;
			skb->csum = 0;
			skb_reserve(skb, hh_len);
1010
1011

			/* only the initial fragment is time stamped */
1012
			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1013
			cork->tx_flags = 0;
1014
1015
			skb_shinfo(skb)->tskey = tskey;
			tskey = 0;
Linus Torvalds's avatar
Linus Torvalds committed
1016
1017
1018
1019

			/*
			 *	Find where to start putting bytes.
			 */
1020
			data = skb_put(skb, fraglen + exthdrlen);
1021
			skb_set_network_header(skb, exthdrlen);
1022
1023
			skb->transport_header = (skb->network_header +
						 fragheaderlen);
1024
			data += fragheaderlen + exthdrlen;
Linus Torvalds's avatar
Linus Torvalds committed
1025
1026
1027
1028
1029
1030
1031
1032

			if (fraggap) {
				skb->csum = skb_copy_and_csum_bits(
					skb_prev, maxfraglen,
					data + transhdrlen, fraggap, 0);
				skb_prev->csum = csum_sub(skb_prev->csum,
							  skb->csum);
				data += fraggap;
1033
				pskb_trim_unique(skb_prev, maxfraglen);
Linus Torvalds's avatar
Linus Torvalds committed
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
			}

			copy = datalen - transhdrlen - fraggap;
			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
				err = -EFAULT;
				kfree_skb(skb);
				goto error;
			}

			offset += copy;
			length -= datalen - fraggap;
			transhdrlen = 0;
			exthdrlen = 0;
			csummode = CHECKSUM_NONE;

			/*
			 * Put the packet on the pending queue.
			 */
1052
			__skb_queue_tail(queue, skb);
Linus Torvalds's avatar
Linus Torvalds committed
1053
1054
1055
1056
1057
1058
			continue;
		}

		if (copy > length)
			copy = length;

1059
		if (!(rt->dst.dev->features&NETIF_F_SG)) {
Linus Torvalds's avatar
Linus Torvalds committed
1060
1061
1062
			unsigned int off;

			off = skb->len;
1063
			if (getfrag(from, skb_put(skb, copy),
Linus Torvalds's avatar
Linus Torvalds committed
1064
1065
1066
1067
1068
1069
1070
1071
					offset, copy, off, skb) < 0) {
				__skb_trim(skb, off);
				err = -EFAULT;
				goto error;
			}
		} else {
			int i = skb_shinfo(skb)->nr_frags;

1072
1073
			err = -ENOMEM;
			if (!sk_page_frag_refill(sk, pfrag))
Linus Torvalds's avatar
Linus Torvalds committed
1074
				goto error;
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085

			if (!skb_can_coalesce(skb, i, pfrag->page,
					      pfrag->offset)) {
				err = -EMSGSIZE;
				if (i == MAX_SKB_FRAGS)
					goto error;

				__skb_fill_page_desc(skb, i, pfrag->page,
						     pfrag->offset, 0);
				skb_shinfo(skb)->nr_frags = ++i;
				get_page(pfrag->page);
Linus Torvalds's avatar
Linus Torvalds committed
1086
			}
1087
1088
1089
1090
1091
1092
1093
1094
			copy = min_t(int, copy, pfrag->size - pfrag->offset);
			if (getfrag(from,
				    page_address(pfrag->page) + pfrag->offset,
				    offset, copy, skb->len, skb) < 0)
				goto error_efault;

			pfrag->offset += copy;
			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
Linus Torvalds's avatar
Linus Torvalds committed
1095
1096
			skb->len += copy;
			skb->data_len += copy;
1097
1098
			skb