datagram.c 18.7 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
/*
 *	SUCS NET3:
 *
 *	Generic datagram handling routines. These are generic for all
 *	protocols. Possibly a generic IP version on top of these would
 *	make sense. Not tonight however 8-).
 *	This is used because UDP, RAW, PACKET, DDP, IPX, AX.25 and
 *	NetROM layer all have identical poll code and mostly
 *	identical recvmsg() code. So we share it here. The poll was
 *	shared before but buried in udp.c so I moved it.
 *
12
 *	Authors:	Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
Linus Torvalds's avatar
Linus Torvalds committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
 *						     udp.c code)
 *
 *	Fixes:
 *		Alan Cox	:	NULL return from skb_peek_copy()
 *					understood
 *		Alan Cox	:	Rewrote skb_read_datagram to avoid the
 *					skb_peek_copy stuff.
 *		Alan Cox	:	Added support for SOCK_SEQPACKET.
 *					IPX can no longer use the SO_TYPE hack
 *					but AX.25 now works right, and SPX is
 *					feasible.
 *		Alan Cox	:	Fixed write poll of non IP protocol
 *					crash.
 *		Florian  La Roche:	Changed for my new skbuff handling.
 *		Darryl Miles	:	Fixed non-blocking SOCK_SEQPACKET.
 *		Linus Torvalds	:	BSD semantic fixes.
 *		Alan Cox	:	Datagram iovec handling
 *		Darryl Miles	:	Fixed non-blocking SOCK_STREAM.
 *		Alan Cox	:	POSIXisms
 *		Pete Wyckoff    :       Unconnected accept() fix.
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <asm/uaccess.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/poll.h>
#include <linux/highmem.h>
49
#include <linux/spinlock.h>
50
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
51
52
53
54

#include <net/protocol.h>
#include <linux/skbuff.h>

55
56
57
#include <net/checksum.h>
#include <net/sock.h>
#include <net/tcp_states.h>
58
#include <trace/events/skb.h>
59
#include <net/busy_poll.h>
Linus Torvalds's avatar
Linus Torvalds committed
60
61
62
63
64
65
66
67
68

/*
 *	Is a socket 'connection oriented' ?
 */
static inline int connection_based(struct sock *sk)
{
	return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
}

69
static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
70
71
72
73
74
75
76
77
78
79
80
				  void *key)
{
	unsigned long bits = (unsigned long)key;

	/*
	 * Avoid a wakeup if event not interesting for us
	 */
	if (bits && !(bits & (POLLIN | POLLERR)))
		return 0;
	return autoremove_wake_function(wait, mode, sync, key);
}
Linus Torvalds's avatar
Linus Torvalds committed
81
/*
82
 * Wait for the last received packet to be different from skb
Linus Torvalds's avatar
Linus Torvalds committed
83
 */
84
85
static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
				 const struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
86
87
{
	int error;
88
	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
Linus Torvalds's avatar
Linus Torvalds committed
89

Eric Dumazet's avatar
Eric Dumazet committed
90
	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
Linus Torvalds's avatar
Linus Torvalds committed
91
92
93
94
95
96

	/* Socket errors? */
	error = sock_error(sk);
	if (error)
		goto out_err;

97
	if (sk->sk_receive_queue.prev != skb)
Linus Torvalds's avatar
Linus Torvalds committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
		goto out;

	/* Socket shut down? */
	if (sk->sk_shutdown & RCV_SHUTDOWN)
		goto out_noerr;

	/* Sequenced packets can come disconnected.
	 * If so we report the problem
	 */
	error = -ENOTCONN;
	if (connection_based(sk) &&
	    !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
		goto out_err;

	/* handle signals */
	if (signal_pending(current))
		goto interrupted;

	error = 0;
	*timeo_p = schedule_timeout(*timeo_p);
out:
Eric Dumazet's avatar
Eric Dumazet committed
119
	finish_wait(sk_sleep(sk), &wait);
Linus Torvalds's avatar
Linus Torvalds committed
120
121
122
123
124
125
126
127
128
129
130
131
132
	return error;
interrupted:
	error = sock_intr_errno(*timeo_p);
out_err:
	*err = error;
	goto out;
out_noerr:
	*err = 0;
	error = 1;
	goto out;
}

/**
133
 *	__skb_recv_datagram - Receive a datagram skbuff
134
135
 *	@sk: socket
 *	@flags: MSG_ flags
136
 *	@peeked: returns non-zero if this packet has been seen before
137
138
 *	@off: an offset in bytes to peek skb from. Returns an offset
 *	      within an skb where data actually starts
139
 *	@err: error code returned
Linus Torvalds's avatar
Linus Torvalds committed
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
 *
 *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
 *	and possible races. This replaces identical code in packet, raw and
 *	udp, as well as the IPX AX.25 and Appletalk. It also finally fixes
 *	the long standing peek and read race for datagram sockets. If you
 *	alter this routine remember it must be re-entrant.
 *
 *	This function will lock the socket if a skb is returned, so the caller
 *	needs to unlock the socket in that case (usually by calling
 *	skb_free_datagram)
 *
 *	* It does not lock socket since today. This function is
 *	* free of race conditions. This measure should/can improve
 *	* significantly datagram socket latencies at high loads,
 *	* when data copying to user space takes lots of time.
 *	* (BTW I've just killed the last cli() in IP/IPv6/core/netlink/packet
 *	*  8) Great win.)
 *	*			                    --ANK (980729)
 *
 *	The order of the tests when we find no data waiting are specified
 *	quite explicitly by POSIX 1003.1g, don't change them without having
 *	the standard around please.
 */
163
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
164
				    int *peeked, int *off, int *err)
Linus Torvalds's avatar
Linus Torvalds committed
165
{
166
	struct sk_buff *skb, *last;
Linus Torvalds's avatar
Linus Torvalds committed
167
168
169
170
171
172
173
174
175
	long timeo;
	/*
	 * Caller is allowed not to check sk->sk_err before skb_recv_datagram()
	 */
	int error = sock_error(sk);

	if (error)
		goto no_packet;

176
	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
Linus Torvalds's avatar
Linus Torvalds committed
177
178
179
180
181
182

	do {
		/* Again only user level code calls this function, so nothing
		 * interrupt level will suddenly eat the receive_queue.
		 *
		 * Look at current nfs client by the way...
183
		 * However, this function was correct in any case. 8)
Linus Torvalds's avatar
Linus Torvalds committed
184
		 */
185
		unsigned long cpu_flags;
186
		struct sk_buff_head *queue = &sk->sk_receive_queue;
187
		int _off = *off;
188

189
		last = (struct sk_buff *)queue;
190
		spin_lock_irqsave(&queue->lock, cpu_flags);
191
		skb_queue_walk(queue, skb) {
192
			last = skb;
193
194
			*peeked = skb->peeked;
			if (flags & MSG_PEEK) {
195
				if (_off >= skb->len && (skb->len || _off ||
196
							 skb->peeked)) {
197
					_off -= skb->len;
198
199
					continue;
				}
200
				skb->peeked = 1;
Linus Torvalds's avatar
Linus Torvalds committed
201
				atomic_inc(&skb->users);
202
			} else
203
				__skb_unlink(skb, queue);
Linus Torvalds's avatar
Linus Torvalds committed
204

205
			spin_unlock_irqrestore(&queue->lock, cpu_flags);
206
			*off = _off;
Linus Torvalds's avatar
Linus Torvalds committed
207
			return skb;
208
209
		}
		spin_unlock_irqrestore(&queue->lock, cpu_flags);
Linus Torvalds's avatar
Linus Torvalds committed
210

211
212
		if (sk_can_busy_loop(sk) &&
		    sk_busy_loop(sk, flags & MSG_DONTWAIT))
213
214
			continue;

Linus Torvalds's avatar
Linus Torvalds committed
215
216
217
218
219
		/* User doesn't want to wait */
		error = -EAGAIN;
		if (!timeo)
			goto no_packet;

220
	} while (!wait_for_more_packets(sk, err, &timeo, last));
Linus Torvalds's avatar
Linus Torvalds committed
221
222
223
224
225
226
227

	return NULL;

no_packet:
	*err = error;
	return NULL;
}
228
229
EXPORT_SYMBOL(__skb_recv_datagram);

230
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
231
232
				  int noblock, int *err)
{
233
	int peeked, off = 0;
234
235

	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
236
				   &peeked, &off, err);
237
}
238
EXPORT_SYMBOL(skb_recv_datagram);
Linus Torvalds's avatar
Linus Torvalds committed
239
240
241

void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
242
	consume_skb(skb);
243
	sk_mem_reclaim_partial(sk);
Linus Torvalds's avatar
Linus Torvalds committed
244
}
245
246
247
248
EXPORT_SYMBOL(skb_free_datagram);

void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
{
249
250
	bool slow;

251
252
253
254
255
	if (likely(atomic_read(&skb->users) == 1))
		smp_rmb();
	else if (likely(!atomic_dec_and_test(&skb->users)))
		return;

256
	slow = lock_sock_fast(sk);
Eric Dumazet's avatar
Eric Dumazet committed
257
258
	skb_orphan(skb);
	sk_mem_reclaim_partial(sk);
259
	unlock_sock_fast(sk, slow);
Eric Dumazet's avatar
Eric Dumazet committed
260

261
262
	/* skb is now orphaned, can be freed outside of locked section */
	__kfree_skb(skb);
263
264
}
EXPORT_SYMBOL(skb_free_datagram_locked);
Linus Torvalds's avatar
Linus Torvalds committed
265

266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
/**
 *	skb_kill_datagram - Free a datagram skbuff forcibly
 *	@sk: socket
 *	@skb: datagram skbuff
 *	@flags: MSG_ flags
 *
 *	This function frees a datagram skbuff that was received by
 *	skb_recv_datagram.  The flags argument must match the one
 *	used for skb_recv_datagram.
 *
 *	If the MSG_PEEK flag is set, and the packet is still on the
 *	receive queue of the socket, it will be taken off the queue
 *	before it is freed.
 *
 *	This function currently only disables BH when acquiring the
 *	sk_receive_queue lock.  Therefore it must not be used in a
 *	context where that lock is acquired in an IRQ context.
283
284
 *
 *	It returns 0 if the packet was removed by us.
285
286
 */

287
int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
288
{
289
290
	int err = 0;

291
	if (flags & MSG_PEEK) {
292
		err = -ENOENT;
293
294
295
296
		spin_lock_bh(&sk->sk_receive_queue.lock);
		if (skb == skb_peek(&sk->sk_receive_queue)) {
			__skb_unlink(skb, &sk->sk_receive_queue);
			atomic_dec(&skb->users);
297
			err = 0;
298
299
300
301
		}
		spin_unlock_bh(&sk->sk_receive_queue.lock);
	}

302
	kfree_skb(skb);
303
	atomic_inc(&sk->sk_drops);
304
305
	sk_mem_reclaim_partial(sk);

306
	return err;
307
308
309
}
EXPORT_SYMBOL(skb_kill_datagram);

Linus Torvalds's avatar
Linus Torvalds committed
310
311
/**
 *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
312
313
 *	@skb: buffer to copy
 *	@offset: offset in the buffer to start copying from
314
 *	@to: io vector to copy to
315
 *	@len: amount of data to copy from buffer to iovec
Linus Torvalds's avatar
Linus Torvalds committed
316
317
318
319
320
321
 *
 *	Note: the iovec is modified during the copy.
 */
int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
			    struct iovec *to, int len)
{
322
323
	int start = skb_headlen(skb);
	int i, copy = start - offset;
324
	struct sk_buff *frag_iter;
325

326
327
	trace_skb_copy_datagram_iovec(skb, len);

328
329
330
331
332
333
334
335
336
337
	/* Copy header. */
	if (copy > 0) {
		if (copy > len)
			copy = len;
		if (memcpy_toiovec(to, skb->data + offset, copy))
			goto fault;
		if ((len -= copy) == 0)
			return 0;
		offset += copy;
	}
338

339
340
	/* Copy paged appendix. Hmm... why does this look so complicated? */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
341
		int end;
342
		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Linus Torvalds's avatar
Linus Torvalds committed
343

344
		WARN_ON(start > offset + len);
345

346
		end = start + skb_frag_size(frag);
347
348
349
		if ((copy = end - offset) > 0) {
			int err;
			u8  *vaddr;
350
			struct page *page = skb_frag_page(frag);
Linus Torvalds's avatar
Linus Torvalds committed
351
352
353

			if (copy > len)
				copy = len;
354
			vaddr = kmap(page);
355
356
			err = memcpy_toiovec(to, vaddr + frag->page_offset +
					     offset - start, copy);
357
			kunmap(page);
Linus Torvalds's avatar
Linus Torvalds committed
358
359
360
361
362
363
			if (err)
				goto fault;
			if (!(len -= copy))
				return 0;
			offset += copy;
		}
364
		start = end;
Linus Torvalds's avatar
Linus Torvalds committed
365
	}
366

367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
	skb_walk_frags(skb, frag_iter) {
		int end;

		WARN_ON(start > offset + len);

		end = start + frag_iter->len;
		if ((copy = end - offset) > 0) {
			if (copy > len)
				copy = len;
			if (skb_copy_datagram_iovec(frag_iter,
						    offset - start,
						    to, copy))
				goto fault;
			if ((len -= copy) == 0)
				return 0;
			offset += copy;
383
		}
384
		start = end;
Linus Torvalds's avatar
Linus Torvalds committed
385
	}
386
387
388
	if (!len)
		return 0;

Linus Torvalds's avatar
Linus Torvalds committed
389
390
391
fault:
	return -EFAULT;
}
392
EXPORT_SYMBOL(skb_copy_datagram_iovec);
Linus Torvalds's avatar
Linus Torvalds committed
393

394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
/**
 *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
 *	@skb: buffer to copy
 *	@offset: offset in the buffer to start copying from
 *	@to: io vector to copy to
 *	@to_offset: offset in the io vector to start copying to
 *	@len: amount of data to copy from buffer to iovec
 *
 *	Returns 0 or -EFAULT.
 *	Note: the iovec is not modified during the copy.
 */
int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
				  const struct iovec *to, int to_offset,
				  int len)
{
	int start = skb_headlen(skb);
	int i, copy = start - offset;
411
	struct sk_buff *frag_iter;
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427

	/* Copy header. */
	if (copy > 0) {
		if (copy > len)
			copy = len;
		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
			goto fault;
		if ((len -= copy) == 0)
			return 0;
		offset += copy;
		to_offset += copy;
	}

	/* Copy paged appendix. Hmm... why does this look so complicated? */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		int end;
428
		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
429
430
431

		WARN_ON(start > offset + len);

432
		end = start + skb_frag_size(frag);
433
434
435
		if ((copy = end - offset) > 0) {
			int err;
			u8  *vaddr;
436
			struct page *page = skb_frag_page(frag);
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453

			if (copy > len)
				copy = len;
			vaddr = kmap(page);
			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
						offset - start, to_offset, copy);
			kunmap(page);
			if (err)
				goto fault;
			if (!(len -= copy))
				return 0;
			offset += copy;
			to_offset += copy;
		}
		start = end;
	}

454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
	skb_walk_frags(skb, frag_iter) {
		int end;

		WARN_ON(start > offset + len);

		end = start + frag_iter->len;
		if ((copy = end - offset) > 0) {
			if (copy > len)
				copy = len;
			if (skb_copy_datagram_const_iovec(frag_iter,
							  offset - start,
							  to, to_offset,
							  copy))
				goto fault;
			if ((len -= copy) == 0)
				return 0;
			offset += copy;
			to_offset += copy;
472
		}
473
		start = end;
474
475
476
477
478
479
480
481
482
	}
	if (!len)
		return 0;

fault:
	return -EFAULT;
}
EXPORT_SYMBOL(skb_copy_datagram_const_iovec);

483
484
485
486
487
/**
 *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
 *	@skb: buffer to copy
 *	@offset: offset in the buffer to start copying to
 *	@from: io vector to copy to
488
 *	@from_offset: offset in the io vector to start copying from
489
490
491
 *	@len: amount of data to copy to buffer from iovec
 *
 *	Returns 0 or -EFAULT.
492
 *	Note: the iovec is not modified during the copy.
493
494
 */
int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
495
496
				 const struct iovec *from, int from_offset,
				 int len)
497
498
499
{
	int start = skb_headlen(skb);
	int i, copy = start - offset;
500
	struct sk_buff *frag_iter;
501
502
503
504
505

	/* Copy header. */
	if (copy > 0) {
		if (copy > len)
			copy = len;
506
507
		if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
					copy))
508
509
510
511
			goto fault;
		if ((len -= copy) == 0)
			return 0;
		offset += copy;
512
		from_offset += copy;
513
514
515
516
517
	}

	/* Copy paged appendix. Hmm... why does this look so complicated? */
	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
		int end;
518
		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
519
520
521

		WARN_ON(start > offset + len);

522
		end = start + skb_frag_size(frag);
523
524
525
		if ((copy = end - offset) > 0) {
			int err;
			u8  *vaddr;
526
			struct page *page = skb_frag_page(frag);
527
528
529
530

			if (copy > len)
				copy = len;
			vaddr = kmap(page);
531
532
533
			err = memcpy_fromiovecend(vaddr + frag->page_offset +
						  offset - start,
						  from, from_offset, copy);
534
535
536
537
538
539
540
			kunmap(page);
			if (err)
				goto fault;

			if (!(len -= copy))
				return 0;
			offset += copy;
541
			from_offset += copy;
542
543
544
545
		}
		start = end;
	}

546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
	skb_walk_frags(skb, frag_iter) {
		int end;

		WARN_ON(start > offset + len);

		end = start + frag_iter->len;
		if ((copy = end - offset) > 0) {
			if (copy > len)
				copy = len;
			if (skb_copy_datagram_from_iovec(frag_iter,
							 offset - start,
							 from,
							 from_offset,
							 copy))
				goto fault;
			if ((len -= copy) == 0)
				return 0;
			offset += copy;
			from_offset += copy;
565
		}
566
		start = end;
567
568
569
570
571
572
573
574
575
	}
	if (!len)
		return 0;

fault:
	return -EFAULT;
}
EXPORT_SYMBOL(skb_copy_datagram_from_iovec);

Linus Torvalds's avatar
Linus Torvalds committed
576
577
static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
				      u8 __user *to, int len,
578
				      __wsum *csump)
Linus Torvalds's avatar
Linus Torvalds committed
579
{
580
581
	int start = skb_headlen(skb);
	int i, copy = start - offset;
582
583
	struct sk_buff *frag_iter;
	int pos = 0;
Linus Torvalds's avatar
Linus Torvalds committed
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601

	/* Copy header. */
	if (copy > 0) {
		int err = 0;
		if (copy > len)
			copy = len;
		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
					       *csump, &err);
		if (err)
			goto fault;
		if ((len -= copy) == 0)
			return 0;
		offset += copy;
		to += copy;
		pos = copy;
	}

	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
602
		int end;
603
		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
Linus Torvalds's avatar
Linus Torvalds committed
604

605
		WARN_ON(start > offset + len);
606

607
		end = start + skb_frag_size(frag);
Linus Torvalds's avatar
Linus Torvalds committed
608
		if ((copy = end - offset) > 0) {
609
			__wsum csum2;
Linus Torvalds's avatar
Linus Torvalds committed
610
611
			int err = 0;
			u8  *vaddr;
612
			struct page *page = skb_frag_page(frag);
Linus Torvalds's avatar
Linus Torvalds committed
613
614
615
616
617

			if (copy > len)
				copy = len;
			vaddr = kmap(page);
			csum2 = csum_and_copy_to_user(vaddr +
618
619
							frag->page_offset +
							offset - start,
Linus Torvalds's avatar
Linus Torvalds committed
620
621
622
623
624
625
626
627
628
629
630
						      to, copy, 0, &err);
			kunmap(page);
			if (err)
				goto fault;
			*csump = csum_block_add(*csump, csum2, pos);
			if (!(len -= copy))
				return 0;
			offset += copy;
			to += copy;
			pos += copy;
		}
631
		start = end;
Linus Torvalds's avatar
Linus Torvalds committed
632
633
	}

634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
	skb_walk_frags(skb, frag_iter) {
		int end;

		WARN_ON(start > offset + len);

		end = start + frag_iter->len;
		if ((copy = end - offset) > 0) {
			__wsum csum2 = 0;
			if (copy > len)
				copy = len;
			if (skb_copy_and_csum_datagram(frag_iter,
						       offset - start,
						       to, copy,
						       &csum2))
				goto fault;
			*csump = csum_block_add(*csump, csum2, pos);
			if ((len -= copy) == 0)
				return 0;
			offset += copy;
			to += copy;
			pos += copy;
Linus Torvalds's avatar
Linus Torvalds committed
655
		}
656
		start = end;
Linus Torvalds's avatar
Linus Torvalds committed
657
658
659
660
661
662
663
664
	}
	if (!len)
		return 0;

fault:
	return -EFAULT;
}

665
__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
666
{
667
	__sum16 sum;
668

669
	sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
670
	if (likely(!sum)) {
671
		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
672
673
674
675
676
			netdev_rx_csum_fault(skb->dev);
		skb->ip_summed = CHECKSUM_UNNECESSARY;
	}
	return sum;
}
677
678
679
680
681
682
EXPORT_SYMBOL(__skb_checksum_complete_head);

__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
	return __skb_checksum_complete_head(skb, skb->len);
}
683
684
EXPORT_SYMBOL(__skb_checksum_complete);

Linus Torvalds's avatar
Linus Torvalds committed
685
686
/**
 *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
687
688
 *	@skb: skbuff
 *	@hlen: hardware length
689
 *	@iov: io vector
690
 *
Linus Torvalds's avatar
Linus Torvalds committed
691
692
693
694
695
696
697
 *	Caller _must_ check that skb will fit to this iovec.
 *
 *	Returns: 0       - success.
 *		 -EINVAL - checksum failure.
 *		 -EFAULT - fault during copy. Beware, in this case iovec
 *			   can be modified!
 */
698
int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
Linus Torvalds's avatar
Linus Torvalds committed
699
700
				     int hlen, struct iovec *iov)
{
701
	__wsum csum;
Linus Torvalds's avatar
Linus Torvalds committed
702
703
	int chunk = skb->len - hlen;

704
705
706
	if (!chunk)
		return 0;

Linus Torvalds's avatar
Linus Torvalds committed
707
708
709
710
711
712
713
	/* Skip filled elements.
	 * Pretty silly, look at memcpy_toiovec, though 8)
	 */
	while (!iov->iov_len)
		iov++;

	if (iov->iov_len < chunk) {
714
		if (__skb_checksum_complete(skb))
Linus Torvalds's avatar
Linus Torvalds committed
715
716
717
718
719
720
721
722
			goto csum_error;
		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
			goto fault;
	} else {
		csum = csum_partial(skb->data, hlen, skb->csum);
		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
					       chunk, &csum))
			goto fault;
723
		if (csum_fold(csum))
Linus Torvalds's avatar
Linus Torvalds committed
724
			goto csum_error;
725
		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
726
			netdev_rx_csum_fault(skb->dev);
Linus Torvalds's avatar
Linus Torvalds committed
727
728
729
730
731
732
733
734
735
		iov->iov_len -= chunk;
		iov->iov_base += chunk;
	}
	return 0;
csum_error:
	return -EINVAL;
fault:
	return -EFAULT;
}
736
EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
Linus Torvalds's avatar
Linus Torvalds committed
737
738
739

/**
 * 	datagram_poll - generic datagram poll
740
741
742
 *	@file: file struct
 *	@sock: socket
 *	@wait: poll table
Linus Torvalds's avatar
Linus Torvalds committed
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
 *
 *	Datagram poll: Again totally generic. This also handles
 *	sequenced packet sockets providing the socket receive queue
 *	is only ever holding data ready to receive.
 *
 *	Note: when you _don't_ use this routine for this protocol,
 *	and you use a different write policy from sock_writeable()
 *	then please supply your own write_space callback.
 */
unsigned int datagram_poll(struct file *file, struct socket *sock,
			   poll_table *wait)
{
	struct sock *sk = sock->sk;
	unsigned int mask;

Eric Dumazet's avatar
Eric Dumazet committed
758
	sock_poll_wait(file, sk_sleep(sk), wait);
Linus Torvalds's avatar
Linus Torvalds committed
759
760
761
762
	mask = 0;

	/* exceptional events? */
	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
763
		mask |= POLLERR |
764
			(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
765

766
	if (sk->sk_shutdown & RCV_SHUTDOWN)
Eric Dumazet's avatar
Eric Dumazet committed
767
		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
Linus Torvalds's avatar
Linus Torvalds committed
768
769
770
771
	if (sk->sk_shutdown == SHUTDOWN_MASK)
		mask |= POLLHUP;

	/* readable? */
Eric Dumazet's avatar
Eric Dumazet committed
772
	if (!skb_queue_empty(&sk->sk_receive_queue))
Linus Torvalds's avatar
Linus Torvalds committed
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
		mask |= POLLIN | POLLRDNORM;

	/* Connection-based need to check for termination and startup */
	if (connection_based(sk)) {
		if (sk->sk_state == TCP_CLOSE)
			mask |= POLLHUP;
		/* connection hasn't started yet? */
		if (sk->sk_state == TCP_SYN_SENT)
			return mask;
	}

	/* writable? */
	if (sock_writeable(sk))
		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
	else
		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);

	return mask;
}
EXPORT_SYMBOL(datagram_poll);