udp.c 59.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		The User Datagram Protocol (UDP).
 *
8
 * Authors:	Ross Biro
Linus Torvalds's avatar
Linus Torvalds committed
9
10
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
11
 *		Alan Cox, <alan@lxorguk.ukuu.org.uk>
Linus Torvalds's avatar
Linus Torvalds committed
12
13
14
15
16
17
18
19
20
 *		Hirokazu Takahashi, <taka@valinux.co.jp>
 *
 * Fixes:
 *		Alan Cox	:	verify_area() calls
 *		Alan Cox	: 	stopped close while in use off icmp
 *					messages. Not a fix but a botch that
 *					for udp at least is 'valid'.
 *		Alan Cox	:	Fixed icmp handling properly
 *		Alan Cox	: 	Correct error for oversized datagrams
21
22
 *		Alan Cox	:	Tidied select() semantics.
 *		Alan Cox	:	udp_err() fixed properly, also now
Linus Torvalds's avatar
Linus Torvalds committed
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
 *					select and read wake correctly on errors
 *		Alan Cox	:	udp_send verify_area moved to avoid mem leak
 *		Alan Cox	:	UDP can count its memory
 *		Alan Cox	:	send to an unknown connection causes
 *					an ECONNREFUSED off the icmp, but
 *					does NOT close.
 *		Alan Cox	:	Switched to new sk_buff handlers. No more backlog!
 *		Alan Cox	:	Using generic datagram code. Even smaller and the PEEK
 *					bug no longer crashes it.
 *		Fred Van Kempen	: 	Net2e support for sk->broadcast.
 *		Alan Cox	:	Uses skb_free_datagram
 *		Alan Cox	:	Added get/set sockopt support.
 *		Alan Cox	:	Broadcasting without option set returns EACCES.
 *		Alan Cox	:	No wakeup calls. Instead we now use the callbacks.
 *		Alan Cox	:	Use ip_tos and ip_ttl
 *		Alan Cox	:	SNMP Mibs
 *		Alan Cox	:	MSG_DONTROUTE, and 0.0.0.0 support.
 *		Matt Dillon	:	UDP length checks.
 *		Alan Cox	:	Smarter af_inet used properly.
 *		Alan Cox	:	Use new kernel side addressing.
 *		Alan Cox	:	Incorrect return on truncated datagram receive.
 *	Arnt Gulbrandsen 	:	New udp_send and stuff
 *		Alan Cox	:	Cache last socket
 *		Alan Cox	:	Route cache
 *		Jon Peatfield	:	Minor efficiency fix to sendto().
 *		Mike Shaver	:	RFC1122 checks.
 *		Alan Cox	:	Nonblocking error fix.
 *	Willy Konynenberg	:	Transparent proxying support.
 *		Mike McLagan	:	Routing by source
 *		David S. Miller	:	New socket lookup architecture.
 *					Last socket cache retained as it
 *					does have a high hit rate.
 *		Olaf Kirch	:	Don't linearise iovec on sendmsg.
 *		Andi Kleen	:	Some cleanups, cache destination entry
57
 *					for connect.
Linus Torvalds's avatar
Linus Torvalds committed
58
59
60
61
62
63
64
65
66
67
68
69
70
 *	Vitaly E. Lavrov	:	Transparent proxy revived after year coma.
 *		Melvin Smith	:	Check msg_name not msg_namelen in sendto(),
 *					return ENOTCONN for unconnected sockets (POSIX)
 *		Janos Farkas	:	don't deliver multi/broadcasts to a different
 *					bound-to-device socket
 *	Hirokazu Takahashi	:	HW checksumming for outgoing UDP
 *					datagrams.
 *	Hirokazu Takahashi	:	sendfile() on UDP works now.
 *		Arnaldo C. Melo :	convert /proc/net/udp to seq_file
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 *	Derek Atkins <derek@ihtfp.com>: Add Encapulation Support
71
 *	James Chapman		:	Add L2TP encapsulation type.
Linus Torvalds's avatar
Linus Torvalds committed
72
73
74
75
76
77
78
 *
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
79

80
81
#define pr_fmt(fmt) "UDP: " fmt

Linus Torvalds's avatar
Linus Torvalds committed
82
83
#include <asm/uaccess.h>
#include <asm/ioctls.h>
Hideo Aoki's avatar
Hideo Aoki committed
84
#include <linux/bootmem.h>
85
86
#include <linux/highmem.h>
#include <linux/swap.h>
Linus Torvalds's avatar
Linus Torvalds committed
87
88
89
90
91
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/sockios.h>
92
#include <linux/igmp.h>
Linus Torvalds's avatar
Linus Torvalds committed
93
94
95
96
97
98
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
99
#include <linux/slab.h>
100
#include <net/tcp_states.h>
Linus Torvalds's avatar
Linus Torvalds committed
101
102
103
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
104
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
105
106
107
108
#include <net/icmp.h>
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
109
#include <trace/events/udp.h>
110
#include <linux/static_key.h>
111
#include <trace/events/skb.h>
112
#include <net/busy_poll.h>
113
#include "udp_impl.h"
Linus Torvalds's avatar
Linus Torvalds committed
114

115
struct udp_table udp_table __read_mostly;
116
EXPORT_SYMBOL(udp_table);
Linus Torvalds's avatar
Linus Torvalds committed
117

Eric Dumazet's avatar
Eric Dumazet committed
118
long sysctl_udp_mem[3] __read_mostly;
Hideo Aoki's avatar
Hideo Aoki committed
119
EXPORT_SYMBOL(sysctl_udp_mem);
Eric Dumazet's avatar
Eric Dumazet committed
120
121

int sysctl_udp_rmem_min __read_mostly;
Hideo Aoki's avatar
Hideo Aoki committed
122
EXPORT_SYMBOL(sysctl_udp_rmem_min);
Eric Dumazet's avatar
Eric Dumazet committed
123
124

int sysctl_udp_wmem_min __read_mostly;
Hideo Aoki's avatar
Hideo Aoki committed
125
126
EXPORT_SYMBOL(sysctl_udp_wmem_min);

Eric Dumazet's avatar
Eric Dumazet committed
127
atomic_long_t udp_memory_allocated;
Hideo Aoki's avatar
Hideo Aoki committed
128
129
EXPORT_SYMBOL(udp_memory_allocated);

130
131
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
132

133
static int udp_lib_lport_inuse(struct net *net, __u16 num,
134
			       const struct udp_hslot *hslot,
135
			       unsigned long *bitmap,
136
137
			       struct sock *sk,
			       int (*saddr_comp)(const struct sock *sk1,
138
139
						 const struct sock *sk2),
			       unsigned int log)
Linus Torvalds's avatar
Linus Torvalds committed
140
{
141
	struct sock *sk2;
142
	struct hlist_nulls_node *node;
143
	kuid_t uid = sock_i_uid(sk);
144

145
	sk_nulls_for_each(sk2, node, &hslot->head)
146
147
		if (net_eq(sock_net(sk2), net) &&
		    sk2 != sk &&
148
		    (bitmap || udp_sk(sk2)->udp_port_hash == num) &&
149
150
151
		    (!sk2->sk_reuse || !sk->sk_reuse) &&
		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
152
153
		    (!sk2->sk_reuseport || !sk->sk_reuseport ||
		      !uid_eq(uid, sock_i_uid(sk2))) &&
154
155
		    (*saddr_comp)(sk, sk2)) {
			if (bitmap)
156
157
				__set_bit(udp_sk(sk2)->udp_port_hash >> log,
					  bitmap);
158
159
160
			else
				return 1;
		}
161
162
163
	return 0;
}

Eric Dumazet's avatar
Eric Dumazet committed
164
165
166
167
168
169
170
171
172
173
174
175
/*
 * Note: we still hold spinlock of primary hash chain, so no other writer
 * can insert/delete a socket with local_port == num
 */
static int udp_lib_lport_inuse2(struct net *net, __u16 num,
			       struct udp_hslot *hslot2,
			       struct sock *sk,
			       int (*saddr_comp)(const struct sock *sk1,
						 const struct sock *sk2))
{
	struct sock *sk2;
	struct hlist_nulls_node *node;
176
	kuid_t uid = sock_i_uid(sk);
Eric Dumazet's avatar
Eric Dumazet committed
177
178
179
180
	int res = 0;

	spin_lock(&hslot2->lock);
	udp_portaddr_for_each_entry(sk2, node, &hslot2->head)
181
182
183
184
185
186
		if (net_eq(sock_net(sk2), net) &&
		    sk2 != sk &&
		    (udp_sk(sk2)->udp_port_hash == num) &&
		    (!sk2->sk_reuse || !sk->sk_reuse) &&
		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if ||
		     sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
187
188
		    (!sk2->sk_reuseport || !sk->sk_reuseport ||
		      !uid_eq(uid, sock_i_uid(sk2))) &&
Eric Dumazet's avatar
Eric Dumazet committed
189
190
191
192
193
194
195
196
		    (*saddr_comp)(sk, sk2)) {
			res = 1;
			break;
		}
	spin_unlock(&hslot2->lock);
	return res;
}

197
/**
198
 *  udp_lib_get_port  -  UDP/-Lite port lookup for IPv4 and IPv6
199
200
201
 *
 *  @sk:          socket struct in question
 *  @snum:        port number to look up
202
 *  @saddr_comp:  AF-dependent comparison of bound local IP addresses
Lucas De Marchi's avatar
Lucas De Marchi committed
203
 *  @hash2_nulladdr: AF-dependent hash value in secondary hash chains,
Eric Dumazet's avatar
Eric Dumazet committed
204
 *                   with NULL address
205
 */
206
int udp_lib_get_port(struct sock *sk, unsigned short snum,
207
		       int (*saddr_comp)(const struct sock *sk1,
Eric Dumazet's avatar
Eric Dumazet committed
208
209
					 const struct sock *sk2),
		     unsigned int hash2_nulladdr)
210
{
211
	struct udp_hslot *hslot, *hslot2;
212
	struct udp_table *udptable = sk->sk_prot->h.udp_table;
213
	int    error = 1;
214
	struct net *net = sock_net(sk);
Linus Torvalds's avatar
Linus Torvalds committed
215

216
	if (!snum) {
Eric Dumazet's avatar
Eric Dumazet committed
217
		int low, high, remaining;
218
		unsigned int rand;
219
220
		unsigned short first, last;
		DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
221

222
		inet_get_local_port_range(&low, &high);
223
		remaining = (high - low) + 1;
224

Eric Dumazet's avatar
Eric Dumazet committed
225
		rand = net_random();
226
227
228
229
		first = (((u64)rand * remaining) >> 32) + low;
		/*
		 * force rand to be an odd multiple of UDP_HTABLE_SIZE
		 */
230
		rand = (rand | 1) * (udptable->mask + 1);
Eric Dumazet's avatar
Eric Dumazet committed
231
232
		last = first + udptable->mask + 1;
		do {
233
			hslot = udp_hashslot(udptable, net, first);
234
			bitmap_zero(bitmap, PORTS_PER_CHAIN);
235
			spin_lock_bh(&hslot->lock);
236
			udp_lib_lport_inuse(net, snum, hslot, bitmap, sk,
237
					    saddr_comp, udptable->log);
238
239
240
241
242
243
244

			snum = first;
			/*
			 * Iterate on all possible values of snum for this hash.
			 * Using steps of an odd multiple of UDP_HTABLE_SIZE
			 * give us randomization and full range coverage.
			 */
Eric Dumazet's avatar
Eric Dumazet committed
245
			do {
246
				if (low <= snum && snum <= high &&
247
248
				    !test_bit(snum >> udptable->log, bitmap) &&
				    !inet_is_reserved_local_port(snum))
249
250
251
252
					goto found;
				snum += rand;
			} while (snum != first);
			spin_unlock_bh(&hslot->lock);
Eric Dumazet's avatar
Eric Dumazet committed
253
		} while (++first != last);
254
		goto fail;
255
	} else {
256
		hslot = udp_hashslot(udptable, net, snum);
257
		spin_lock_bh(&hslot->lock);
Eric Dumazet's avatar
Eric Dumazet committed
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
		if (hslot->count > 10) {
			int exist;
			unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;

			slot2          &= udptable->mask;
			hash2_nulladdr &= udptable->mask;

			hslot2 = udp_hashslot2(udptable, slot2);
			if (hslot->count < hslot2->count)
				goto scan_primary_hash;

			exist = udp_lib_lport_inuse2(net, snum, hslot2,
						     sk, saddr_comp);
			if (!exist && (hash2_nulladdr != slot2)) {
				hslot2 = udp_hashslot2(udptable, hash2_nulladdr);
				exist = udp_lib_lport_inuse2(net, snum, hslot2,
							     sk, saddr_comp);
			}
			if (exist)
				goto fail_unlock;
			else
				goto found;
		}
scan_primary_hash:
282
283
		if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk,
					saddr_comp, 0))
284
285
			goto fail_unlock;
	}
286
found:
287
	inet_sk(sk)->inet_num = snum;
288
289
	udp_sk(sk)->udp_port_hash = snum;
	udp_sk(sk)->udp_portaddr_hash ^= snum;
Linus Torvalds's avatar
Linus Torvalds committed
290
	if (sk_unhashed(sk)) {
291
		sk_nulls_add_node_rcu(sk, &hslot->head);
292
		hslot->count++;
293
		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
294
295
296
297
298
299
300

		hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
		spin_lock(&hslot2->lock);
		hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
					 &hslot2->head);
		hslot2->count++;
		spin_unlock(&hslot2->lock);
Linus Torvalds's avatar
Linus Torvalds committed
301
	}
302
	error = 0;
303
304
fail_unlock:
	spin_unlock_bh(&hslot->lock);
Linus Torvalds's avatar
Linus Torvalds committed
305
fail:
306
307
	return error;
}
Eric Dumazet's avatar
Eric Dumazet committed
308
EXPORT_SYMBOL(udp_lib_get_port);
309

310
static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
311
312
313
{
	struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);

Eric Dumazet's avatar
Eric Dumazet committed
314
	return 	(!ipv6_only_sock(sk2)  &&
315
316
		 (!inet1->inet_rcv_saddr || !inet2->inet_rcv_saddr ||
		   inet1->inet_rcv_saddr == inet2->inet_rcv_saddr));
317
318
}

319
320
321
static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
				       unsigned int port)
{
322
	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
323
324
}

325
int udp_v4_get_port(struct sock *sk, unsigned short snum)
326
{
Eric Dumazet's avatar
Eric Dumazet committed
327
	unsigned int hash2_nulladdr =
328
		udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
Eric Dumazet's avatar
Eric Dumazet committed
329
330
331
	unsigned int hash2_partial =
		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);

332
	/* precompute partial secondary hash */
Eric Dumazet's avatar
Eric Dumazet committed
333
334
	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
	return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr);
335
336
}

337
338
339
340
341
342
static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr,
			 unsigned short hnum,
			 __be16 sport, __be32 daddr, __be16 dport, int dif)
{
	int score = -1;

343
	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
344
345
346
			!ipv6_only_sock(sk)) {
		struct inet_sock *inet = inet_sk(sk);

347
		score = (sk->sk_family == PF_INET ? 2 : 1);
348
349
		if (inet->inet_rcv_saddr) {
			if (inet->inet_rcv_saddr != daddr)
350
				return -1;
351
			score += 4;
352
		}
353
354
		if (inet->inet_daddr) {
			if (inet->inet_daddr != saddr)
355
				return -1;
356
			score += 4;
357
		}
358
359
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
360
				return -1;
361
			score += 4;
362
363
364
365
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
366
			score += 4;
367
368
369
370
371
		}
	}
	return score;
}

372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
/*
 * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num)
 */
static inline int compute_score2(struct sock *sk, struct net *net,
				 __be32 saddr, __be16 sport,
				 __be32 daddr, unsigned int hnum, int dif)
{
	int score = -1;

	if (net_eq(sock_net(sk), net) && !ipv6_only_sock(sk)) {
		struct inet_sock *inet = inet_sk(sk);

		if (inet->inet_rcv_saddr != daddr)
			return -1;
		if (inet->inet_num != hnum)
			return -1;

389
		score = (sk->sk_family == PF_INET ? 2 : 1);
390
391
392
		if (inet->inet_daddr) {
			if (inet->inet_daddr != saddr)
				return -1;
393
			score += 4;
394
395
396
397
		}
		if (inet->inet_dport) {
			if (inet->inet_dport != sport)
				return -1;
398
			score += 4;
399
400
401
402
		}
		if (sk->sk_bound_dev_if) {
			if (sk->sk_bound_dev_if != dif)
				return -1;
403
			score += 4;
404
405
406
407
408
409
410
411
412
413
414
415
416
417
		}
	}
	return score;
}


/* called with read_rcu_lock() */
static struct sock *udp4_lib_lookup2(struct net *net,
		__be32 saddr, __be16 sport,
		__be32 daddr, unsigned int hnum, int dif,
		struct udp_hslot *hslot2, unsigned int slot2)
{
	struct sock *sk, *result;
	struct hlist_nulls_node *node;
418
419
	int score, badness, matches = 0, reuseport = 0;
	u32 hash = 0;
420
421
422

begin:
	result = NULL;
423
	badness = 0;
424
425
426
427
428
429
	udp_portaddr_for_each_entry_rcu(sk, node, &hslot2->head) {
		score = compute_score2(sk, net, saddr, sport,
				      daddr, hnum, dif);
		if (score > badness) {
			result = sk;
			badness = score;
430
431
432
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				hash = inet_ehashfn(net, daddr, hnum,
Eric Dumazet's avatar
Eric Dumazet committed
433
						    saddr, sport);
434
435
436
437
438
439
440
				matches = 1;
			}
		} else if (score == badness && reuseport) {
			matches++;
			if (((u64)hash * matches) >> 32 == 0)
				result = sk;
			hash = next_pseudo_random32(hash);
441
442
443
444
445
446
447
448
449
450
		}
	}
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
	if (get_nulls_value(node) != slot2)
		goto begin;
	if (result) {
451
		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
452
453
454
455
456
457
458
459
460
461
			result = NULL;
		else if (unlikely(compute_score2(result, net, saddr, sport,
				  daddr, hnum, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	return result;
}

462
463
464
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
 * harder than this. -DaveM
 */
465
struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
466
		__be16 sport, __be32 daddr, __be16 dport,
467
		int dif, struct udp_table *udptable)
468
{
469
	struct sock *sk, *result;
470
	struct hlist_nulls_node *node;
471
	unsigned short hnum = ntohs(dport);
472
473
	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
474
475
	int score, badness, matches = 0, reuseport = 0;
	u32 hash = 0;
476

477
	rcu_read_lock();
478
479
480
481
482
483
484
485
486
487
488
	if (hslot->count > 10) {
		hash2 = udp4_portaddr_hash(net, daddr, hnum);
		slot2 = hash2 & udptable->mask;
		hslot2 = &udptable->hash2[slot2];
		if (hslot->count < hslot2->count)
			goto begin;

		result = udp4_lib_lookup2(net, saddr, sport,
					  daddr, hnum, dif,
					  hslot2, slot2);
		if (!result) {
489
			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
490
491
492
493
494
			slot2 = hash2 & udptable->mask;
			hslot2 = &udptable->hash2[slot2];
			if (hslot->count < hslot2->count)
				goto begin;

495
			result = udp4_lib_lookup2(net, saddr, sport,
496
						  htonl(INADDR_ANY), hnum, dif,
497
498
499
500
501
						  hslot2, slot2);
		}
		rcu_read_unlock();
		return result;
	}
502
503
begin:
	result = NULL;
504
	badness = 0;
505
	sk_nulls_for_each_rcu(sk, node, &hslot->head) {
506
507
508
509
510
		score = compute_score(sk, net, saddr, hnum, sport,
				      daddr, dport, dif);
		if (score > badness) {
			result = sk;
			badness = score;
511
512
513
			reuseport = sk->sk_reuseport;
			if (reuseport) {
				hash = inet_ehashfn(net, daddr, hnum,
Eric Dumazet's avatar
Eric Dumazet committed
514
						    saddr, sport);
515
516
517
518
519
520
521
				matches = 1;
			}
		} else if (score == badness && reuseport) {
			matches++;
			if (((u64)hash * matches) >> 32 == 0)
				result = sk;
			hash = next_pseudo_random32(hash);
522
523
		}
	}
524
525
526
527
528
	/*
	 * if the nulls value we got at the end of this lookup is
	 * not the expected one, we must restart lookup.
	 * We probably met an item that was moved to another chain.
	 */
529
	if (get_nulls_value(node) != slot)
530
531
		goto begin;

532
	if (result) {
533
		if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
534
535
536
537
538
539
540
541
			result = NULL;
		else if (unlikely(compute_score(result, net, saddr, hnum, sport,
				  daddr, dport, dif) < badness)) {
			sock_put(result);
			goto begin;
		}
	}
	rcu_read_unlock();
542
543
	return result;
}
544
EXPORT_SYMBOL_GPL(__udp4_lib_lookup);
545

546
547
static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,
						 __be16 sport, __be16 dport,
548
						 struct udp_table *udptable)
549
{
550
	struct sock *sk;
551
552
	const struct iphdr *iph = ip_hdr(skb);

553
554
555
	if (unlikely(sk = skb_steal_sock(skb)))
		return sk;
	else
Eric Dumazet's avatar
Eric Dumazet committed
556
		return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,
557
558
					 iph->daddr, dport, inet_iif(skb),
					 udptable);
559
560
}

561
562
563
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
			     __be32 daddr, __be16 dport, int dif)
{
564
	return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
565
566
567
}
EXPORT_SYMBOL_GPL(udp4_lib_lookup);

568
static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk,
569
570
571
572
					     __be16 loc_port, __be32 loc_addr,
					     __be16 rmt_port, __be32 rmt_addr,
					     int dif)
{
573
	struct hlist_nulls_node *node;
574
575
576
	struct sock *s = sk;
	unsigned short hnum = ntohs(loc_port);

577
	sk_nulls_for_each_from(s, node) {
578
579
		struct inet_sock *inet = inet_sk(s);

580
581
582
583
584
585
586
		if (!net_eq(sock_net(s), net) ||
		    udp_sk(s)->udp_port_hash != hnum ||
		    (inet->inet_daddr && inet->inet_daddr != rmt_addr) ||
		    (inet->inet_dport != rmt_port && inet->inet_dport) ||
		    (inet->inet_rcv_saddr &&
		     inet->inet_rcv_saddr != loc_addr) ||
		    ipv6_only_sock(s) ||
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
		    (s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
			continue;
		if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
			continue;
		goto found;
	}
	s = NULL;
found:
	return s;
}

/*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  If err < 0 then the socket should
 * be closed and the error returned to the user.  If err > 0
 * it's just the icmp type << 8 | icmp code.
 * Header points to the ip header of the error packet. We move
 * on past this. Then (as it used to claim before adjustment)
 * header points to the first 8 bytes of the udp header.  We need
 * to find the appropriate port.
 */

609
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
610
611
{
	struct inet_sock *inet;
612
	const struct iphdr *iph = (const struct iphdr *)skb->data;
Eric Dumazet's avatar
Eric Dumazet committed
613
	struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
614
615
616
617
618
	const int type = icmp_hdr(skb)->type;
	const int code = icmp_hdr(skb)->code;
	struct sock *sk;
	int harderr;
	int err;
619
	struct net *net = dev_net(skb->dev);
620

621
	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
622
623
			iph->saddr, uh->source, skb->dev->ifindex, udptable);
	if (sk == NULL) {
624
		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
		return;	/* No socket for error */
	}

	err = 0;
	harderr = 0;
	inet = inet_sk(sk);

	switch (type) {
	default:
	case ICMP_TIME_EXCEEDED:
		err = EHOSTUNREACH;
		break;
	case ICMP_SOURCE_QUENCH:
		goto out;
	case ICMP_PARAMETERPROB:
		err = EPROTO;
		harderr = 1;
		break;
	case ICMP_DEST_UNREACH:
		if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
645
			ipv4_sk_update_pmtu(skb, sk, info);
646
647
648
649
650
651
652
653
654
655
656
657
658
			if (inet->pmtudisc != IP_PMTUDISC_DONT) {
				err = EMSGSIZE;
				harderr = 1;
				break;
			}
			goto out;
		}
		err = EHOSTUNREACH;
		if (code <= NR_ICMP_UNREACH) {
			harderr = icmp_err_convert[code].fatal;
			err = icmp_err_convert[code].errno;
		}
		break;
659
660
661
	case ICMP_REDIRECT:
		ipv4_sk_redirect(skb, sk);
		break;
662
663
664
665
666
667
668
669
670
	}

	/*
	 *      RFC1122: OK.  Passes ICMP errors back to application, as per
	 *	4.1.3.3.
	 */
	if (!inet->recverr) {
		if (!harderr || sk->sk_state != TCP_ESTABLISHED)
			goto out;
671
	} else
Eric Dumazet's avatar
Eric Dumazet committed
672
		ip_icmp_error(sk, skb, err, uh->dest, info, (u8 *)(uh+1));
673

674
675
676
677
678
679
680
681
	sk->sk_err = err;
	sk->sk_error_report(sk);
out:
	sock_put(sk);
}

void udp_err(struct sk_buff *skb, u32 info)
{
682
	__udp4_lib_err(skb, info, &udp_table);
683
684
685
686
687
}

/*
 * Throw away all pending data and cancel the corking. Socket is locked.
 */
688
void udp_flush_pending_frames(struct sock *sk)
689
690
691
692
693
694
695
696
697
{
	struct udp_sock *up = udp_sk(sk);

	if (up->pending) {
		up->len = 0;
		up->pending = 0;
		ip_flush_pending_frames(sk);
	}
}
698
EXPORT_SYMBOL(udp_flush_pending_frames);
699
700

/**
Herbert Xu's avatar
Herbert Xu committed
701
 * 	udp4_hwcsum  -  handle outgoing HW checksumming
702
703
 * 	@skb: 	sk_buff containing the filled-in UDP header
 * 	        (checksum field must be zeroed out)
Herbert Xu's avatar
Herbert Xu committed
704
705
 *	@src:	source IP address
 *	@dst:	destination IP address
706
 */
Herbert Xu's avatar
Herbert Xu committed
707
static void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst)
708
709
{
	struct udphdr *uh = udp_hdr(skb);
Herbert Xu's avatar
Herbert Xu committed
710
711
712
713
	struct sk_buff *frags = skb_shinfo(skb)->frag_list;
	int offset = skb_transport_offset(skb);
	int len = skb->len - offset;
	int hlen = len;
714
715
	__wsum csum = 0;

Herbert Xu's avatar
Herbert Xu committed
716
	if (!frags) {
717
718
719
720
721
		/*
		 * Only one fragment on the socket.
		 */
		skb->csum_start = skb_transport_header(skb) - skb->head;
		skb->csum_offset = offsetof(struct udphdr, check);
Herbert Xu's avatar
Herbert Xu committed
722
723
		uh->check = ~csum_tcpudp_magic(src, dst, len,
					       IPPROTO_UDP, 0);
724
725
726
727
728
729
	} else {
		/*
		 * HW-checksum won't work as there are two or more
		 * fragments on the socket so that all csums of sk_buffs
		 * should be together
		 */
Herbert Xu's avatar
Herbert Xu committed
730
731
732
733
		do {
			csum = csum_add(csum, frags->csum);
			hlen -= frags->len;
		} while ((frags = frags->next));
734

Herbert Xu's avatar
Herbert Xu committed
735
		csum = skb_checksum(skb, offset, hlen, csum);
736
737
738
739
740
741
742
743
		skb->ip_summed = CHECKSUM_NONE;

		uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
		if (uh->check == 0)
			uh->check = CSUM_MANGLED_0;
	}
}

744
static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
745
{
Herbert Xu's avatar
Herbert Xu committed
746
	struct sock *sk = skb->sk;
747
748
749
750
	struct inet_sock *inet = inet_sk(sk);
	struct udphdr *uh;
	int err = 0;
	int is_udplite = IS_UDPLITE(sk);
Herbert Xu's avatar
Herbert Xu committed
751
752
	int offset = skb_transport_offset(skb);
	int len = skb->len - offset;
753
754
755
756
757
758
	__wsum csum = 0;

	/*
	 * Create a UDP header
	 */
	uh = udp_hdr(skb);
Herbert Xu's avatar
Herbert Xu committed
759
	uh->source = inet->inet_sport;
760
	uh->dest = fl4->fl4_dport;
Herbert Xu's avatar
Herbert Xu committed
761
	uh->len = htons(len);
762
763
764
	uh->check = 0;

	if (is_udplite)  				 /*     UDP-Lite      */
Herbert Xu's avatar
Herbert Xu committed
765
		csum = udplite_csum(skb);
766
767
768
769
770
771
772
773

	else if (sk->sk_no_check == UDP_CSUM_NOXMIT) {   /* UDP csum disabled */

		skb->ip_summed = CHECKSUM_NONE;
		goto send;

	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */

774
		udp4_hwcsum(skb, fl4->saddr, fl4->daddr);
775
776
		goto send;

Herbert Xu's avatar
Herbert Xu committed
777
778
	} else
		csum = udp_csum(skb);
779
780

	/* add protocol-dependent pseudo-header */
781
	uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len,
Eric Dumazet's avatar
Eric Dumazet committed
782
				      sk->sk_protocol, csum);
783
784
785
786
	if (uh->check == 0)
		uh->check = CSUM_MANGLED_0;

send:
Eric Dumazet's avatar
Eric Dumazet committed
787
	err = ip_send_skb(sock_net(sk), skb);
Eric Dumazet's avatar
Eric Dumazet committed
788
789
790
791
792
793
794
795
796
	if (err) {
		if (err == -ENOBUFS && !inet->recverr) {
			UDP_INC_STATS_USER(sock_net(sk),
					   UDP_MIB_SNDBUFERRORS, is_udplite);
			err = 0;
		}
	} else
		UDP_INC_STATS_USER(sock_net(sk),
				   UDP_MIB_OUTDATAGRAMS, is_udplite);
Herbert Xu's avatar
Herbert Xu committed
797
798
799
800
801
802
	return err;
}

/*
 * Push out all pending data as one UDP datagram. Socket is locked.
 */
803
int udp_push_pending_frames(struct sock *sk)
Herbert Xu's avatar
Herbert Xu committed
804
805
806
{
	struct udp_sock  *up = udp_sk(sk);
	struct inet_sock *inet = inet_sk(sk);
David S. Miller's avatar
David S. Miller committed
807
	struct flowi4 *fl4 = &inet->cork.fl.u.ip4;
Herbert Xu's avatar
Herbert Xu committed
808
809
810
	struct sk_buff *skb;
	int err = 0;

811
	skb = ip_finish_skb(sk, fl4);
Herbert Xu's avatar
Herbert Xu committed
812
813
814
	if (!skb)
		goto out;

815
	err = udp_send_skb(skb, fl4);
Herbert Xu's avatar
Herbert Xu committed
816

817
818
819
820
821
out:
	up->len = 0;
	up->pending = 0;
	return err;
}
822
EXPORT_SYMBOL(udp_push_pending_frames);
823
824
825
826
827
828

int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
		size_t len)
{
	struct inet_sock *inet = inet_sk(sk);
	struct udp_sock *up = udp_sk(sk);
829
	struct flowi4 fl4_stack;
David S. Miller's avatar
David S. Miller committed
830
	struct flowi4 *fl4;
831
832
833
834
835
836
837
838
839
840
841
	int ulen = len;
	struct ipcm_cookie ipc;
	struct rtable *rt = NULL;
	int free = 0;
	int connected = 0;
	__be32 daddr, faddr, saddr;
	__be16 dport;
	u8  tos;
	int err, is_udplite = IS_UDPLITE(sk);
	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
	int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
Herbert Xu's avatar
Herbert Xu committed
842
	struct sk_buff *skb;
843
	struct ip_options_data opt_copy;
844
845
846
847
848
849
850
851

	if (len > 0xFFFF)
		return -EMSGSIZE;

	/*
	 *	Check the flags.
	 */

Eric Dumazet's avatar
Eric Dumazet committed
852
	if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */
853
854
855
		return -EOPNOTSUPP;

	ipc.opt = NULL;
856
	ipc.tx_flags = 0;
857

Herbert Xu's avatar
Herbert Xu committed
858
859
	getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;

860
	fl4 = &inet->cork.fl.u.ip4;
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
	if (up->pending) {
		/*
		 * There are pending frames.
		 * The socket lock must be held while it's corked.
		 */
		lock_sock(sk);
		if (likely(up->pending)) {
			if (unlikely(up->pending != AF_INET)) {
				release_sock(sk);
				return -EINVAL;
			}
			goto do_append_data;
		}
		release_sock(sk);
	}
	ulen += sizeof(struct udphdr);

	/*
	 *	Get and verify the address.
	 */
	if (msg->msg_name) {
Daniel Baluta's avatar
Daniel Baluta committed
882
		struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
883
884
885
886
887
888
889
890
891
892
893
894
895
896
		if (msg->msg_namelen < sizeof(*usin))
			return -EINVAL;
		if (usin->sin_family != AF_INET) {
			if (usin->sin_family != AF_UNSPEC)
				return -EAFNOSUPPORT;
		}

		daddr = usin->sin_addr.s_addr;
		dport = usin->sin_port;
		if (dport == 0)
			return -EINVAL;
	} else {
		if (sk->sk_state != TCP_ESTABLISHED)
			return -EDESTADDRREQ;
897
898
		daddr = inet->inet_daddr;
		dport = inet->inet_dport;
899
900
901
902
903
		/* Open fast path for connected socket.
		   Route will not be used, if at least one option is set.
		 */
		connected = 1;
	}
904
	ipc.addr = inet->inet_saddr;
905
906

	ipc.oif = sk->sk_bound_dev_if;
907
908
909

	sock_tx_timestamp(sk, &ipc.tx_flags);

910
	if (msg->msg_controllen) {
911
		err = ip_cmsg_send(sock_net(sk), msg, &ipc);
912
913
914
915
916
917
		if (err)
			return err;
		if (ipc.opt)
			free = 1;
		connected = 0;
	}
918
919
920
921
922
923
924
925
926
927
928
929
	if (!ipc.opt) {
		struct ip_options_rcu *inet_opt;

		rcu_read_lock();
		inet_opt = rcu_dereference(inet->inet_opt);
		if (inet_opt) {
			memcpy(&opt_copy, inet_opt,
			       sizeof(*inet_opt) + inet_opt->opt.optlen);
			ipc.opt = &opt_copy.opt;
		}
		rcu_read_unlock();
	}
930
931
932
933

	saddr = ipc.addr;
	ipc.addr = faddr = daddr;

934
	if (ipc.opt && ipc.opt->opt.srr) {
935
936
		if (!daddr)
			return -EINVAL;
937
		faddr = ipc.opt->opt.faddr;
938
939
940
941
942
		connected = 0;
	}
	tos = RT_TOS(inet->tos);
	if (sock_flag(sk, SOCK_LOCALROUTE) ||
	    (msg->msg_flags & MSG_DONTROUTE) ||
943
	    (ipc.opt && ipc.opt->opt.is_strictroute)) {
944
945
946
947
948
949
950
951
952
953
		tos |= RTO_ONLINK;
		connected = 0;
	}

	if (ipv4_is_multicast(daddr)) {
		if (!ipc.oif)
			ipc.oif = inet->mc_index;
		if (!saddr)
			saddr = inet->mc_addr;
		connected = 0;
954
955
	} else if (!ipc.oif)
		ipc.oif = inet->uc_index;
956
957

	if (connected)
Eric Dumazet's avatar
Eric Dumazet committed
958
		rt = (struct rtable *)sk_dst_check(sk, 0);
959
960

	if (rt == NULL) {
961
962
		struct net *net = sock_net(sk);

963
964
		fl4 = &fl4_stack;
		flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
965
966
967
968
				   RT_SCOPE_UNIVERSE, sk->sk_protocol,
				   inet_sk_flowi_flags(sk)|FLOWI_FLAG_CAN_SLEEP,
				   faddr, saddr, dport, inet->inet_sport);

969
970
		security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
		rt = ip_route_output_flow(net, fl4, sk);
971
972
		if (IS_ERR(rt)) {
			err = PTR_ERR(rt);
973
			rt = NULL;
974
			if (err == -ENETUNREACH)
975
				IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
976
977
978
979
980
981
982
983
			goto out;
		}

		err = -EACCES;
		if ((rt->rt_flags & RTCF_BROADCAST) &&
		    !sock_flag(sk, SOCK_BROADCAST))
			goto out;
		if (connected)
984
			sk_dst_set(sk, dst_clone(&rt->dst));
985
986
987
988
989
990
	}

	if (msg->msg_flags&MSG_CONFIRM)
		goto do_confirm;
back_from_confirm:

991
	saddr = fl4->saddr;
992
	if (!ipc.addr)
993
		daddr = ipc.addr = fl4->daddr;
994

Herbert Xu's avatar
Herbert Xu committed
995
996
	/* Lockless fast path for the non-corking case. */
	if (!corkreq) {
997
		skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
Herbert Xu's avatar
Herbert Xu committed
998
999
1000
				  sizeof(struct udphdr), &ipc, &rt,
				  msg->msg_flags);
		err = PTR_ERR(skb);
1001
		if (!IS_ERR_OR_NULL(skb))
1002
			err = udp_send_skb(skb, fl4);
Herbert Xu's avatar
Herbert Xu committed
1003
1004
1005
		goto out;
	}

1006
1007
1008
1009
1010
1011
	lock_sock(sk);
	if (unlikely(up->pending)) {
		/* The socket is already corked while preparing it. */
		/* ... which is an evident application bug. --ANK */
		release_sock(sk);

1012
		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("cork app bug 2\n"));
1013
1014
1015
1016
1017
1018
		err = -EINVAL;
		goto out;
	}
	/*
	 *	Now cork the socket to pend data.
	 */
David S. Miller's avatar
David S. Miller committed
1019
1020
1021
	fl4 = &inet->cork.fl.u.ip4;
	fl4->daddr = daddr;
	fl4->saddr = saddr;
1022
1023
	fl4->fl4_dport = dport;
	fl4->fl4_sport = inet->inet_sport;
1024
1025
1026
1027
	up->pending = AF_INET;

do_append_data:
	up->len += ulen;
1028
1029
1030
	err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
			     sizeof(struct udphdr), &ipc, &rt,
			     corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
	if (err)
		udp_flush_pending_frames(sk);
	else if (!corkreq)
		err = udp_push_pending_frames(sk);
	else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
		up->pending = 0;
	release_sock(sk);

out:
	ip_rt_put(rt);
	if (free)
		kfree(ipc.opt);
	if (!err)
		return len;
	/*
	 * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space.  Reporting
	 * ENOBUFS might not be good (it's not tunable per se), but otherwise
	 * we don't have a good statistic (IpOutDiscards but it can be too many
	 * things).  We could add another new stat but at least for now that
	 * seems like overkill.
	 */
	if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
1053
1054
		UDP_INC_STATS_USER(sock_net(sk),
				UDP_MIB_SNDBUFERRORS, is_udplite);
1055
1056
1057
1058
	}
	return err;

do_confirm:
1059
	dst_confirm(&rt->dst);
1060
1061
1062
1063
1064
	if (!(msg->msg_flags&MSG_PROBE) || len)
		goto back_from_confirm;
	err = 0;
	goto out;
}
Eric Dumazet's avatar
Eric Dumazet committed
1065
EXPORT_SYMBOL(udp_sendmsg);
1066
1067
1068
1069

int udp_sendpage(struct sock *sk, struct page *page, int offset,
		 size_t size, int flags)
{
1070
	struct inet_sock *inet = inet_sk(sk);
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
	struct udp_sock *up = udp_sk(sk);
	int ret;

	if (!up->pending) {
		struct msghdr msg = {	.msg_flags = flags|MSG_MORE };

		/* Call udp_sendmsg to specify destination address which
		 * sendpage interface can't pass.
		 * This will succeed only when the socket is connected.
		 */
		ret = udp_sendmsg(NULL, sk, &msg, 0);
		if (ret < 0)
			return ret;
	}

	lock_sock(sk);

	if (unlikely(!up->pending)) {
		release_sock(sk);

1091
		LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("udp cork app bug 3\n"));
1092
1093
1094
		return -EINVAL;
	}

1095
1096
	ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
			     page, offset, size, flags);
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
	if (ret == -EOPNOTSUPP) {
		release_sock(sk);
		return sock_no_sendpage(sk->sk_socket, page, offset,
					size, flags);
	}
	if (ret < 0) {
		udp_flush_pending_frames(sk);
		goto out;
	}

	up->len += size;
	if (!(up->corkflag || (flags&MSG_MORE)))
		ret = udp_push_pending_frames(sk);
	if (!ret)
		ret = size;
out:
	release_sock(sk);
	return ret;
}

Eric Dumazet's avatar
Eric Dumazet committed
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135

/**
 *	first_packet_length	- return length of first packet in receive queue
 *	@sk: socket
 *
 *	Drops all bad checksum frames, until a valid one is found.
 *	Returns the length of found skb, or 0 if none is found.
 */
static unsigned int first_packet_length(struct sock *sk)
{
	struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
	struct sk_buff *skb;
	unsigned int res;

	__skb_queue_head_init(&list_kill);

	spin_lock_bh(&rcvq->lock);
	while ((skb = skb_peek(rcvq)) != NULL &&
		udp_lib_checksum_complete(skb)) {
1136
1137
		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS,
				 IS_UDPLITE(sk));
Eric Dumazet's avatar
Eric Dumazet committed
1138
1139
		UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
				 IS_UDPLITE(sk));
1140
		atomic_inc(&sk->sk_drops);
Eric Dumazet's avatar
Eric Dumazet committed
1141
1142
1143
1144
1145
1146
1147
		__skb_unlink(skb, rcvq);
		__skb_queue_tail(&list_kill, skb);
	}
	res = skb ? skb->len : 0;
	spin_unlock_bh(&rcvq->lock);

	if (!skb_queue_empty(&list_kill)) {
1148
1149
		bool slow = lock_sock_fast(sk);

Eric Dumazet's avatar
Eric Dumazet committed
1150
1151
		__skb_queue_purge(&list_kill);
		sk_mem_reclaim_partial(sk);
1152
		unlock_sock_fast(sk, slow);
Eric Dumazet's avatar
Eric Dumazet committed
1153
1154
1155
1156
	}
	return res;
}

Linus Torvalds's avatar
Linus Torvalds committed
1157
1158
1159
/*
 *	IOCTL requests applicable to the UDP protocol
 */
1160

Linus Torvalds's avatar
Linus Torvalds committed
1161
1162
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
1163
1164
	switch (cmd) {
	case SIOCOUTQ:
Linus Torvalds's avatar
Linus Torvalds committed
1165
	{
1166
1167
		int amount = sk_wmem_alloc_get(sk);

1168
1169
		return put_user(amount, (int __user *)arg);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1170

1171
1172
	case SIOCINQ:
	{
Eric Dumazet's avatar
Eric Dumazet committed
1173
		unsigned int amount = first_packet_length(sk);
1174

Eric Dumazet's avatar
Eric Dumazet committed
1175
		if (amount)
1176
1177
1178
1179
1180
			/*
			 * We will only return the amount
			 * of this packet since that is all
			 * that will be read.
			 */
Eric Dumazet's avatar
Eric Dumazet committed
1181
1182
			amount -= sizeof(struct udphdr);

Stephen Hemminger's avatar