tcp_ipv4.c 74.4 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Implementation of the Transmission Control Protocol(TCP).
 *
 *		IPv4 specific functions
 *
 *
 *		code split from:
 *		linux/ipv4/tcp.c
 *		linux/ipv4/tcp_input.c
 *		linux/ipv4/tcp_output.c
 *
 *		See tcp.c for author information
 *
 *	This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

/*
 * Changes:
 *		David S. Miller	:	New socket lookup architecture.
 *					This code is dedicated to John Dyson.
 *		David S. Miller :	Change semantics of established hash,
 *					half is devoted to TIME_WAIT sockets
 *					and the rest go in the other half.
 *		Andi Kleen :		Add support for syncookies and fixed
 *					some bugs: ip options weren't passed to
 *					the TCP layer, missed a check for an
 *					ACK bit.
 *		Andi Kleen :		Implemented fast path mtu discovery.
 *	     				Fixed many serious bugs in the
37
 *					request_sock handling and moved
Linus Torvalds's avatar
Linus Torvalds committed
38
39
 *					most of it into the af independent code.
 *					Added tail drop and some other bugfixes.
Stephen Hemminger's avatar
Stephen Hemminger committed
40
 *					Added new listen semantics.
Linus Torvalds's avatar
Linus Torvalds committed
41
42
43
44
45
46
47
48
49
50
51
52
 *		Mike McLagan	:	Routing by source
 *	Juan Jose Ciarlante:		ip_dynaddr bits
 *		Andi Kleen:		various fixes.
 *	Vitaly E. Lavrov	:	Transparent proxy revived after year
 *					coma.
 *	Andi Kleen		:	Fix new listen.
 *	Andi Kleen		:	Fix accept error reporting.
 *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
 *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
 *					a single port at the same time.
 */

53
#define pr_fmt(fmt) "TCP: " fmt
Linus Torvalds's avatar
Linus Torvalds committed
54

Herbert Xu's avatar
Herbert Xu committed
55
#include <linux/bottom_half.h>
Linus Torvalds's avatar
Linus Torvalds committed
56
57
58
59
60
61
62
63
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/cache.h>
#include <linux/jhash.h>
#include <linux/init.h>
#include <linux/times.h>
64
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
65

66
#include <net/net_namespace.h>
Linus Torvalds's avatar
Linus Torvalds committed
67
#include <net/icmp.h>
68
#include <net/inet_hashtables.h>
Linus Torvalds's avatar
Linus Torvalds committed
69
#include <net/tcp.h>
70
#include <net/transp_v6.h>
Linus Torvalds's avatar
Linus Torvalds committed
71
72
#include <net/ipv6.h>
#include <net/inet_common.h>
73
#include <net/timewait_sock.h>
Linus Torvalds's avatar
Linus Torvalds committed
74
#include <net/xfrm.h>
75
#include <net/netdma.h>
76
#include <net/secure_seq.h>
Glauber Costa's avatar
Glauber Costa committed
77
#include <net/tcp_memcontrol.h>
78
#include <net/busy_poll.h>
Linus Torvalds's avatar
Linus Torvalds committed
79
80
81
82
83
84
85

#include <linux/inet.h>
#include <linux/ipv6.h>
#include <linux/stddef.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>

86
87
88
#include <linux/crypto.h>
#include <linux/scatterlist.h>

89
90
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
91
EXPORT_SYMBOL(sysctl_tcp_low_latency);
Linus Torvalds's avatar
Linus Torvalds committed
92
93


94
#ifdef CONFIG_TCP_MD5SIG
Eric Dumazet's avatar
Eric Dumazet committed
95
static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
96
			       __be32 daddr, __be32 saddr, const struct tcphdr *th);
97
98
#endif

99
struct inet_hashinfo tcp_hashinfo;
100
EXPORT_SYMBOL(tcp_hashinfo);
Linus Torvalds's avatar
Linus Torvalds committed
101

102
static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
103
{
104
105
	return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
					  ip_hdr(skb)->saddr,
106
107
					  tcp_hdr(skb)->dest,
					  tcp_hdr(skb)->source);
Linus Torvalds's avatar
Linus Torvalds committed
108
109
}

110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
	struct tcp_sock *tp = tcp_sk(sk);

	/* With PAWS, it is safe from the viewpoint
	   of data integrity. Even without PAWS it is safe provided sequence
	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.

	   Actually, the idea is close to VJ's one, only timestamp cache is
	   held not per host, but per port pair and TW bucket is used as state
	   holder.

	   If TW bucket has been already destroyed we fall back to VJ's scheme
	   and use initial timestamp retrieved from peer table.
	 */
	if (tcptw->tw_ts_recent_stamp &&
	    (twp == NULL || (sysctl_tcp_tw_reuse &&
128
			     get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
129
130
131
132
133
134
135
136
137
138
139
140
141
		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
		if (tp->write_seq == 0)
			tp->write_seq = 1;
		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
		sock_hold(sktw);
		return 1;
	}

	return 0;
}
EXPORT_SYMBOL_GPL(tcp_twsk_unique);

Linus Torvalds's avatar
Linus Torvalds committed
142
143
144
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
145
	struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
Linus Torvalds's avatar
Linus Torvalds committed
146
147
	struct inet_sock *inet = inet_sk(sk);
	struct tcp_sock *tp = tcp_sk(sk);
148
	__be16 orig_sport, orig_dport;
149
	__be32 daddr, nexthop;
150
	struct flowi4 *fl4;
151
	struct rtable *rt;
Linus Torvalds's avatar
Linus Torvalds committed
152
	int err;
153
	struct ip_options_rcu *inet_opt;
Linus Torvalds's avatar
Linus Torvalds committed
154
155
156
157
158
159
160
161

	if (addr_len < sizeof(struct sockaddr_in))
		return -EINVAL;

	if (usin->sin_family != AF_INET)
		return -EAFNOSUPPORT;

	nexthop = daddr = usin->sin_addr.s_addr;
162
163
164
	inet_opt = rcu_dereference_protected(inet->inet_opt,
					     sock_owned_by_user(sk));
	if (inet_opt && inet_opt->opt.srr) {
Linus Torvalds's avatar
Linus Torvalds committed
165
166
		if (!daddr)
			return -EINVAL;
167
		nexthop = inet_opt->opt.faddr;
Linus Torvalds's avatar
Linus Torvalds committed
168
169
	}

170
171
	orig_sport = inet->inet_sport;
	orig_dport = usin->sin_port;
172
173
	fl4 = &inet->cork.fl.u.ip4;
	rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
174
175
176
177
178
179
			      RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
			      IPPROTO_TCP,
			      orig_sport, orig_dport, sk, true);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		if (err == -ENETUNREACH)
180
			IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
181
		return err;
182
	}
Linus Torvalds's avatar
Linus Torvalds committed
183
184
185
186
187
188

	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
		ip_rt_put(rt);
		return -ENETUNREACH;
	}

189
	if (!inet_opt || !inet_opt->opt.srr)
190
		daddr = fl4->daddr;
Linus Torvalds's avatar
Linus Torvalds committed
191

192
	if (!inet->inet_saddr)
193
		inet->inet_saddr = fl4->saddr;
194
	inet->inet_rcv_saddr = inet->inet_saddr;
Linus Torvalds's avatar
Linus Torvalds committed
195

196
	if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
Linus Torvalds's avatar
Linus Torvalds committed
197
198
199
		/* Reset inherited state */
		tp->rx_opt.ts_recent	   = 0;
		tp->rx_opt.ts_recent_stamp = 0;
Pavel Emelyanov's avatar
Pavel Emelyanov committed
200
201
		if (likely(!tp->repair))
			tp->write_seq	   = 0;
Linus Torvalds's avatar
Linus Torvalds committed
202
203
	}

204
	if (tcp_death_row.sysctl_tw_recycle &&
205
206
	    !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
		tcp_fetch_timewait_stamp(sk, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
207

208
209
	inet->inet_dport = usin->sin_port;
	inet->inet_daddr = daddr;
Linus Torvalds's avatar
Linus Torvalds committed
210

211
	inet_csk(sk)->icsk_ext_hdr_len = 0;
212
213
	if (inet_opt)
		inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
Linus Torvalds's avatar
Linus Torvalds committed
214

215
	tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
216
217
218
219
220
221
222

	/* Socket identity is still unknown (sport may be zero).
	 * However we set state to SYN-SENT and not releasing socket
	 * lock select source port, enter ourselves into the hash tables and
	 * complete initialization after this.
	 */
	tcp_set_state(sk, TCP_SYN_SENT);
223
	err = inet_hash_connect(&tcp_death_row, sk);
Linus Torvalds's avatar
Linus Torvalds committed
224
225
226
	if (err)
		goto failure;

227
	rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
228
229
230
231
			       inet->inet_sport, inet->inet_dport, sk);
	if (IS_ERR(rt)) {
		err = PTR_ERR(rt);
		rt = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
232
		goto failure;
233
	}
Linus Torvalds's avatar
Linus Torvalds committed
234
	/* OK, now commit destination to socket.  */
235
	sk->sk_gso_type = SKB_GSO_TCPV4;
236
	sk_setup_caps(sk, &rt->dst);
Linus Torvalds's avatar
Linus Torvalds committed
237

Pavel Emelyanov's avatar
Pavel Emelyanov committed
238
	if (!tp->write_seq && likely(!tp->repair))
239
240
241
		tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
							   inet->inet_daddr,
							   inet->inet_sport,
Linus Torvalds's avatar
Linus Torvalds committed
242
243
							   usin->sin_port);

244
	inet->inet_id = tp->write_seq ^ jiffies;
Linus Torvalds's avatar
Linus Torvalds committed
245

246
	err = tcp_connect(sk);
Pavel Emelyanov's avatar
Pavel Emelyanov committed
247

Linus Torvalds's avatar
Linus Torvalds committed
248
249
250
251
252
253
254
	rt = NULL;
	if (err)
		goto failure;

	return 0;

failure:
255
256
257
258
	/*
	 * This unhashes the socket and releases the local port,
	 * if necessary.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
259
260
261
	tcp_set_state(sk, TCP_CLOSE);
	ip_rt_put(rt);
	sk->sk_route_caps = 0;
262
	inet->inet_dport = 0;
Linus Torvalds's avatar
Linus Torvalds committed
263
264
	return err;
}
265
EXPORT_SYMBOL(tcp_v4_connect);
Linus Torvalds's avatar
Linus Torvalds committed
266
267

/*
268
269
270
 * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
 * It can be called through tcp_release_cb() if socket was owned by user
 * at the time tcp_v4_err() was called to handle ICMP message.
Linus Torvalds's avatar
Linus Torvalds committed
271
 */
272
static void tcp_v4_mtu_reduced(struct sock *sk)
Linus Torvalds's avatar
Linus Torvalds committed
273
274
275
{
	struct dst_entry *dst;
	struct inet_sock *inet = inet_sk(sk);
276
	u32 mtu = tcp_sk(sk)->mtu_info;
Linus Torvalds's avatar
Linus Torvalds committed
277

278
279
	dst = inet_csk_update_pmtu(sk, mtu);
	if (!dst)
Linus Torvalds's avatar
Linus Torvalds committed
280
281
282
283
284
285
286
287
288
289
290
		return;

	/* Something is about to be wrong... Remember soft error
	 * for the case, if this connection will not able to recover.
	 */
	if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
		sk->sk_err_soft = EMSGSIZE;

	mtu = dst_mtu(dst);

	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
291
	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
Linus Torvalds's avatar
Linus Torvalds committed
292
293
294
295
296
297
298
299
300
301
302
		tcp_sync_mss(sk, mtu);

		/* Resend the TCP packet because it's
		 * clear that the old packet has been
		 * dropped. This is the new "fast" path mtu
		 * discovery.
		 */
		tcp_simple_retransmit(sk);
	} /* else let the usual retransmit timer handle it */
}

303
304
305
306
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
	struct dst_entry *dst = __sk_dst_check(sk, 0);

307
	if (dst)
308
		dst->ops->redirect(dst, sk, skb);
309
310
}

Linus Torvalds's avatar
Linus Torvalds committed
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
/*
 * This routine is called by the ICMP module when it gets some
 * sort of error condition.  If err < 0 then the socket should
 * be closed and the error returned to the user.  If err > 0
 * it's just the icmp type << 8 | icmp code.  After adjustment
 * header points to the first 8 bytes of the tcp header.  We need
 * to find the appropriate port.
 *
 * The locking strategy used here is very "optimistic". When
 * someone else accesses the socket the ICMP is just dropped
 * and for some paths there is no check at all.
 * A more general error queue to queue errors for later handling
 * is probably better.
 *
 */

327
void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
Linus Torvalds's avatar
Linus Torvalds committed
328
{
329
	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
330
	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
331
	struct inet_connection_sock *icsk;
Linus Torvalds's avatar
Linus Torvalds committed
332
333
	struct tcp_sock *tp;
	struct inet_sock *inet;
334
335
	const int type = icmp_hdr(icmp_skb)->type;
	const int code = icmp_hdr(icmp_skb)->code;
Linus Torvalds's avatar
Linus Torvalds committed
336
	struct sock *sk;
337
	struct sk_buff *skb;
338
	struct request_sock *req;
Linus Torvalds's avatar
Linus Torvalds committed
339
	__u32 seq;
340
	__u32 remaining;
Linus Torvalds's avatar
Linus Torvalds committed
341
	int err;
342
	struct net *net = dev_net(icmp_skb->dev);
Linus Torvalds's avatar
Linus Torvalds committed
343

344
	if (icmp_skb->len < (iph->ihl << 2) + 8) {
345
		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
346
347
348
		return;
	}

349
	sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
350
			iph->saddr, th->source, inet_iif(icmp_skb));
Linus Torvalds's avatar
Linus Torvalds committed
351
	if (!sk) {
352
		ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
Linus Torvalds's avatar
Linus Torvalds committed
353
354
355
		return;
	}
	if (sk->sk_state == TCP_TIME_WAIT) {
356
		inet_twsk_put(inet_twsk(sk));
Linus Torvalds's avatar
Linus Torvalds committed
357
358
359
360
361
362
		return;
	}

	bh_lock_sock(sk);
	/* If too many ICMPs get dropped on busy
	 * servers this needs to be solved differently.
363
364
	 * We do take care of PMTU discovery (RFC1191) special case :
	 * we can receive locally generated ICMP messages while socket is held.
Linus Torvalds's avatar
Linus Torvalds committed
365
	 */
366
367
368
369
	if (sock_owned_by_user(sk)) {
		if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
			NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
	}
Linus Torvalds's avatar
Linus Torvalds committed
370
371
372
	if (sk->sk_state == TCP_CLOSE)
		goto out;

373
374
375
376
377
	if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
		goto out;
	}

378
	icsk = inet_csk(sk);
Linus Torvalds's avatar
Linus Torvalds committed
379
	tp = tcp_sk(sk);
380
	req = tp->fastopen_rsk;
Linus Torvalds's avatar
Linus Torvalds committed
381
382
	seq = ntohl(th->seq);
	if (sk->sk_state != TCP_LISTEN &&
383
384
385
	    !between(seq, tp->snd_una, tp->snd_nxt) &&
	    (req == NULL || seq != tcp_rsk(req)->snt_isn)) {
		/* For a Fast Open socket, allow seq to be snt_isn. */
386
		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds's avatar
Linus Torvalds committed
387
388
389
390
		goto out;
	}

	switch (type) {
391
392
393
	case ICMP_REDIRECT:
		do_redirect(icmp_skb, sk);
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
394
395
396
397
398
399
400
401
402
403
404
	case ICMP_SOURCE_QUENCH:
		/* Just silently ignore these. */
		goto out;
	case ICMP_PARAMETERPROB:
		err = EPROTO;
		break;
	case ICMP_DEST_UNREACH:
		if (code > NR_ICMP_UNREACH)
			goto out;

		if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
405
406
407
408
409
410
411
			/* We are not interested in TCP_LISTEN and open_requests
			 * (SYN-ACKs send out by Linux are always <576bytes so
			 * they should go through unfragmented).
			 */
			if (sk->sk_state == TCP_LISTEN)
				goto out;

412
			tp->mtu_info = info;
413
			if (!sock_owned_by_user(sk)) {
414
				tcp_v4_mtu_reduced(sk);
415
416
417
418
			} else {
				if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
					sock_hold(sk);
			}
Linus Torvalds's avatar
Linus Torvalds committed
419
420
421
422
			goto out;
		}

		err = icmp_err_convert[code].errno;
423
424
425
426
427
428
429
430
		/* check if icmp_skb allows revert of backoff
		 * (see draft-zimmermann-tcp-lcd) */
		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
			break;
		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
		    !icsk->icsk_backoff)
			break;

431
432
		/* XXX (TFO) - revisit the following logic for TFO */

433
434
435
		if (sock_owned_by_user(sk))
			break;

436
		icsk->icsk_backoff--;
437
438
		inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
			TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
		tcp_bound_rto(sk);

		skb = tcp_write_queue_head(sk);
		BUG_ON(!skb);

		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
				tcp_time_stamp - TCP_SKB_CB(skb)->when);

		if (remaining) {
			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
						  remaining, TCP_RTO_MAX);
		} else {
			/* RTO revert clocked out retransmission.
			 * Will retransmit now */
			tcp_retransmit_timer(sk);
		}

Linus Torvalds's avatar
Linus Torvalds committed
456
457
458
459
460
461
462
463
		break;
	case ICMP_TIME_EXCEEDED:
		err = EHOSTUNREACH;
		break;
	default:
		goto out;
	}

464
465
466
467
468
469
470
471
	/* XXX (TFO) - if it's a TFO socket and has been accepted, rather
	 * than following the TCP_SYN_RECV case and closing the socket,
	 * we ignore the ICMP error and keep trying like a fully established
	 * socket. Is this the right thing to do?
	 */
	if (req && req->sk == NULL)
		goto out;

Linus Torvalds's avatar
Linus Torvalds committed
472
	switch (sk->sk_state) {
473
		struct request_sock *req, **prev;
Linus Torvalds's avatar
Linus Torvalds committed
474
475
476
477
	case TCP_LISTEN:
		if (sock_owned_by_user(sk))
			goto out;

478
479
		req = inet_csk_search_req(sk, &prev, th->dest,
					  iph->daddr, iph->saddr);
Linus Torvalds's avatar
Linus Torvalds committed
480
481
482
483
484
485
		if (!req)
			goto out;

		/* ICMPs are not backlogged, hence we cannot get
		   an established socket here.
		 */
486
		WARN_ON(req->sk);
Linus Torvalds's avatar
Linus Torvalds committed
487

488
		if (seq != tcp_rsk(req)->snt_isn) {
489
			NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
Linus Torvalds's avatar
Linus Torvalds committed
490
491
492
493
494
495
496
497
498
			goto out;
		}

		/*
		 * Still in SYN_RECV, just remove it silently.
		 * There is no good way to pass the error to the newly
		 * created socket, and POSIX does not want network
		 * errors returned from accept().
		 */
499
		inet_csk_reqsk_queue_drop(sk, req, prev);
500
		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
Linus Torvalds's avatar
Linus Torvalds committed
501
502
503
504
		goto out;

	case TCP_SYN_SENT:
	case TCP_SYN_RECV:  /* Cannot happen.
505
506
			       It can f.e. if SYNs crossed,
			       or Fast Open.
Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
			     */
		if (!sock_owned_by_user(sk)) {
			sk->sk_err = err;

			sk->sk_error_report(sk);

			tcp_done(sk);
		} else {
			sk->sk_err_soft = err;
		}
		goto out;
	}

	/* If we've already connected we will keep trying
	 * until we time out, or the user gives up.
	 *
	 * rfc1122 4.2.3.9 allows to consider as hard errors
	 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
	 * but it is obsoleted by pmtu discovery).
	 *
	 * Note, that in modern internet, where routing is unreliable
	 * and in each dark corner broken firewalls sit, sending random
	 * errors ordered by their masters even this two messages finally lose
	 * their original sense (even Linux sends invalid PORT_UNREACHs)
	 *
	 * Now we are in compliance with RFCs.
	 *							--ANK (980905)
	 */

	inet = inet_sk(sk);
	if (!sock_owned_by_user(sk) && inet->recverr) {
		sk->sk_err = err;
		sk->sk_error_report(sk);
	} else	{ /* Only an error on timeout */
		sk->sk_err_soft = err;
	}

out:
	bh_unlock_sock(sk);
	sock_put(sk);
}

549
void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
Linus Torvalds's avatar
Linus Torvalds committed
550
{
551
	struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
552

553
	if (skb->ip_summed == CHECKSUM_PARTIAL) {
554
		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
555
		skb->csum_start = skb_transport_header(skb) - skb->head;
Al Viro's avatar
Al Viro committed
556
		skb->csum_offset = offsetof(struct tcphdr, check);
Linus Torvalds's avatar
Linus Torvalds committed
557
	} else {
558
		th->check = tcp_v4_check(skb->len, saddr, daddr,
559
					 csum_partial(th,
Linus Torvalds's avatar
Linus Torvalds committed
560
561
562
563
564
						      th->doff << 2,
						      skb->csum));
	}
}

565
/* This routine computes an IPv4 TCP checksum. */
566
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
567
{
568
	const struct inet_sock *inet = inet_sk(sk);
569
570
571

	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
}
572
EXPORT_SYMBOL(tcp_v4_send_check);
573

Linus Torvalds's avatar
Linus Torvalds committed
574
575
576
577
578
579
580
581
582
583
584
585
586
/*
 *	This routine will send an RST to the other tcp.
 *
 *	Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
 *		      for reset.
 *	Answer: if a packet caused RST, it is not for a socket
 *		existing in our system, if it is matched to a socket,
 *		it is just duplicate segment or bug in other side's TCP.
 *		So that we build reply only basing on parameters
 *		arrived with segment.
 *	Exception: precedence violation. We do not implement it in any case.
 */

587
static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
588
{
589
	const struct tcphdr *th = tcp_hdr(skb);
590
591
592
	struct {
		struct tcphdr th;
#ifdef CONFIG_TCP_MD5SIG
593
		__be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
594
595
#endif
	} rep;
Linus Torvalds's avatar
Linus Torvalds committed
596
	struct ip_reply_arg arg;
597
598
#ifdef CONFIG_TCP_MD5SIG
	struct tcp_md5sig_key *key;
599
600
601
602
	const __u8 *hash_location = NULL;
	unsigned char newhash[16];
	int genhash;
	struct sock *sk1 = NULL;
603
#endif
604
	struct net *net;
Linus Torvalds's avatar
Linus Torvalds committed
605
606
607
608
609

	/* Never send a reset in response to a reset. */
	if (th->rst)
		return;

Eric Dumazet's avatar
Eric Dumazet committed
610
	if (skb_rtable(skb)->rt_type != RTN_LOCAL)
Linus Torvalds's avatar
Linus Torvalds committed
611
612
613
		return;

	/* Swap the send and the receive. */
614
615
616
617
618
	memset(&rep, 0, sizeof(rep));
	rep.th.dest   = th->source;
	rep.th.source = th->dest;
	rep.th.doff   = sizeof(struct tcphdr) / 4;
	rep.th.rst    = 1;
Linus Torvalds's avatar
Linus Torvalds committed
619
620

	if (th->ack) {
621
		rep.th.seq = th->ack_seq;
Linus Torvalds's avatar
Linus Torvalds committed
622
	} else {
623
624
625
		rep.th.ack = 1;
		rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
				       skb->len - (th->doff << 2));
Linus Torvalds's avatar
Linus Torvalds committed
626
627
	}

628
	memset(&arg, 0, sizeof(arg));
629
630
631
632
	arg.iov[0].iov_base = (unsigned char *)&rep;
	arg.iov[0].iov_len  = sizeof(rep.th);

#ifdef CONFIG_TCP_MD5SIG
633
634
635
636
637
638
639
640
641
642
	hash_location = tcp_parse_md5sig_option(th);
	if (!sk && hash_location) {
		/*
		 * active side is lost. Try to find listening socket through
		 * source port, and then find md5 key through listening socket.
		 * we are not loose security here:
		 * Incoming packet is checked with md5 hash with finding key,
		 * no RST generated if md5 hash doesn't match.
		 */
		sk1 = __inet_lookup_listener(dev_net(skb_dst(skb)->dev),
643
644
					     &tcp_hashinfo, ip_hdr(skb)->saddr,
					     th->source, ip_hdr(skb)->daddr,
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
					     ntohs(th->source), inet_iif(skb));
		/* don't send rst if it can't find key */
		if (!sk1)
			return;
		rcu_read_lock();
		key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
					&ip_hdr(skb)->saddr, AF_INET);
		if (!key)
			goto release_sk1;

		genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, NULL, skb);
		if (genhash || memcmp(hash_location, newhash, 16) != 0)
			goto release_sk1;
	} else {
		key = sk ? tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
					     &ip_hdr(skb)->saddr,
					     AF_INET) : NULL;
	}

664
665
666
667
668
669
670
671
672
	if (key) {
		rep.opt[0] = htonl((TCPOPT_NOP << 24) |
				   (TCPOPT_NOP << 16) |
				   (TCPOPT_MD5SIG << 8) |
				   TCPOLEN_MD5SIG);
		/* Update length and the length the header thinks exists */
		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
		rep.th.doff = arg.iov[0].iov_len / 4;

673
		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
674
675
				     key, ip_hdr(skb)->saddr,
				     ip_hdr(skb)->daddr, &rep.th);
676
677
	}
#endif
678
679
	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				      ip_hdr(skb)->saddr, /* XXX */
680
				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
Linus Torvalds's avatar
Linus Torvalds committed
681
	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
682
	arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
683
	/* When socket is gone, all binding information is lost.
Alexey Kuznetsov's avatar
Alexey Kuznetsov committed
684
685
	 * routing might fail in this case. No choice here, if we choose to force
	 * input interface, we will misroute in case of asymmetric route.
686
	 */
Alexey Kuznetsov's avatar
Alexey Kuznetsov committed
687
688
	if (sk)
		arg.bound_dev_if = sk->sk_bound_dev_if;
Linus Torvalds's avatar
Linus Torvalds committed
689

Eric Dumazet's avatar
Eric Dumazet committed
690
	net = dev_net(skb_dst(skb)->dev);
691
	arg.tos = ip_hdr(skb)->tos;
692
	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
693
			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
Linus Torvalds's avatar
Linus Torvalds committed
694

695
696
	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
	TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
697
698
699
700
701
702
703
704

#ifdef CONFIG_TCP_MD5SIG
release_sk1:
	if (sk1) {
		rcu_read_unlock();
		sock_put(sk1);
	}
#endif
Linus Torvalds's avatar
Linus Torvalds committed
705
706
707
708
709
710
}

/* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
   outside socket context is ugly, certainly. What can I do?
 */

711
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
712
			    u32 win, u32 tsval, u32 tsecr, int oif,
713
			    struct tcp_md5sig_key *key,
714
			    int reply_flags, u8 tos)
Linus Torvalds's avatar
Linus Torvalds committed
715
{
716
	const struct tcphdr *th = tcp_hdr(skb);
Linus Torvalds's avatar
Linus Torvalds committed
717
718
	struct {
		struct tcphdr th;
719
		__be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
720
#ifdef CONFIG_TCP_MD5SIG
721
			   + (TCPOLEN_MD5SIG_ALIGNED >> 2)
722
723
#endif
			];
Linus Torvalds's avatar
Linus Torvalds committed
724
725
	} rep;
	struct ip_reply_arg arg;
Eric Dumazet's avatar
Eric Dumazet committed
726
	struct net *net = dev_net(skb_dst(skb)->dev);
Linus Torvalds's avatar
Linus Torvalds committed
727
728

	memset(&rep.th, 0, sizeof(struct tcphdr));
729
	memset(&arg, 0, sizeof(arg));
Linus Torvalds's avatar
Linus Torvalds committed
730
731
732

	arg.iov[0].iov_base = (unsigned char *)&rep;
	arg.iov[0].iov_len  = sizeof(rep.th);
733
	if (tsecr) {
734
735
736
		rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
				   (TCPOPT_TIMESTAMP << 8) |
				   TCPOLEN_TIMESTAMP);
737
738
		rep.opt[1] = htonl(tsval);
		rep.opt[2] = htonl(tsecr);
739
		arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
Linus Torvalds's avatar
Linus Torvalds committed
740
741
742
743
744
745
746
747
748
749
750
	}

	/* Swap the send and the receive. */
	rep.th.dest    = th->source;
	rep.th.source  = th->dest;
	rep.th.doff    = arg.iov[0].iov_len / 4;
	rep.th.seq     = htonl(seq);
	rep.th.ack_seq = htonl(ack);
	rep.th.ack     = 1;
	rep.th.window  = htons(win);

751
752
#ifdef CONFIG_TCP_MD5SIG
	if (key) {
753
		int offset = (tsecr) ? 3 : 0;
754
755
756
757
758
759
760
761

		rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
					  (TCPOPT_NOP << 16) |
					  (TCPOPT_MD5SIG << 8) |
					  TCPOLEN_MD5SIG);
		arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
		rep.th.doff = arg.iov[0].iov_len/4;

762
		tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
763
764
				    key, ip_hdr(skb)->saddr,
				    ip_hdr(skb)->daddr, &rep.th);
765
766
	}
#endif
767
	arg.flags = reply_flags;
768
769
	arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
				      ip_hdr(skb)->saddr, /* XXX */
Linus Torvalds's avatar
Linus Torvalds committed
770
771
				      arg.iov[0].iov_len, IPPROTO_TCP, 0);
	arg.csumoffset = offsetof(struct tcphdr, check) / 2;
772
773
	if (oif)
		arg.bound_dev_if = oif;
774
	arg.tos = tos;
775
	ip_send_unicast_reply(net, skb, ip_hdr(skb)->saddr,
776
			      ip_hdr(skb)->daddr, &arg, arg.iov[0].iov_len);
Linus Torvalds's avatar
Linus Torvalds committed
777

778
	TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
Linus Torvalds's avatar
Linus Torvalds committed
779
780
781
782
}

static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
{
783
	struct inet_timewait_sock *tw = inet_twsk(sk);
784
	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
Linus Torvalds's avatar
Linus Torvalds committed
785

786
	tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
787
			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
788
			tcp_time_stamp + tcptw->tw_ts_offset,
789
790
			tcptw->tw_ts_recent,
			tw->tw_bound_dev_if,
791
			tcp_twsk_md5_key(tcptw),
792
793
			tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
			tw->tw_tos
794
			);
Linus Torvalds's avatar
Linus Torvalds committed
795

796
	inet_twsk_put(tw);
Linus Torvalds's avatar
Linus Torvalds committed
797
798
}

799
static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
800
				  struct request_sock *req)
Linus Torvalds's avatar
Linus Torvalds committed
801
{
802
803
804
805
806
807
	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
	 */
	tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
			tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
808
			tcp_time_stamp,
809
810
			req->ts_recent,
			0,
Eric Dumazet's avatar
Eric Dumazet committed
811
812
			tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
					  AF_INET),
813
814
			inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
			ip_hdr(skb)->tos);
Linus Torvalds's avatar
Linus Torvalds committed
815
816
817
}

/*
818
 *	Send a SYN-ACK after having received a SYN.
819
 *	This still operates on a request_sock only, not on a big
Linus Torvalds's avatar
Linus Torvalds committed
820
821
 *	socket.
 */
822
823
static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
			      struct request_sock *req,
824
825
			      u16 queue_mapping,
			      bool nocache)
Linus Torvalds's avatar
Linus Torvalds committed
826
{
827
	const struct inet_request_sock *ireq = inet_rsk(req);
828
	struct flowi4 fl4;
Linus Torvalds's avatar
Linus Torvalds committed
829
830
831
832
	int err = -1;
	struct sk_buff * skb;

	/* First, grab a route. */
833
	if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
834
		return -1;
Linus Torvalds's avatar
Linus Torvalds committed
835

Christoph Paasch's avatar
Christoph Paasch committed
836
	skb = tcp_make_synack(sk, dst, req, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
837
838

	if (skb) {
839
		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
Linus Torvalds's avatar
Linus Torvalds committed
840

841
		skb_set_queue_mapping(skb, queue_mapping);
842
843
844
		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
					    ireq->rmt_addr,
					    ireq->opt);
845
		err = net_xmit_eval(err);
846
847
		if (!tcp_rsk(req)->snt_synack && !err)
			tcp_rsk(req)->snt_synack = tcp_time_stamp;
Linus Torvalds's avatar
Linus Torvalds committed
848
849
850
851
852
	}

	return err;
}

Christoph Paasch's avatar
Christoph Paasch committed
853
static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req)
854
{
Christoph Paasch's avatar
Christoph Paasch committed
855
	int res = tcp_v4_send_synack(sk, NULL, req, 0, false);
856
857
858
859

	if (!res)
		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
	return res;
860
861
}

Linus Torvalds's avatar
Linus Torvalds committed
862
/*
863
 *	IPv4 request_sock destructor.
Linus Torvalds's avatar
Linus Torvalds committed
864
 */
865
static void tcp_v4_reqsk_destructor(struct request_sock *req)
Linus Torvalds's avatar
Linus Torvalds committed
866
{
Jesper Juhl's avatar
Jesper Juhl committed
867
	kfree(inet_rsk(req)->opt);
Linus Torvalds's avatar
Linus Torvalds committed
868
869
}

870
/*
Eric Dumazet's avatar
Eric Dumazet committed
871
 * Return true if a syncookie should be sent
872
 */
Eric Dumazet's avatar
Eric Dumazet committed
873
bool tcp_syn_flood_action(struct sock *sk,
874
875
			 const struct sk_buff *skb,
			 const char *proto)
Linus Torvalds's avatar
Linus Torvalds committed
876
{
877
	const char *msg = "Dropping request";
Eric Dumazet's avatar
Eric Dumazet committed
878
	bool want_cookie = false;
879
880
881
	struct listen_sock *lopt;


Linus Torvalds's avatar
Linus Torvalds committed
882

883
#ifdef CONFIG_SYN_COOKIES
884
	if (sysctl_tcp_syncookies) {
885
		msg = "Sending cookies";
Eric Dumazet's avatar
Eric Dumazet committed
886
		want_cookie = true;
887
888
		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
	} else
889
#endif
890
891
892
893
894
		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);

	lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
	if (!lopt->synflood_warned) {
		lopt->synflood_warned = 1;
895
		pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
896
897
898
			proto, ntohs(tcp_hdr(skb)->dest), msg);
	}
	return want_cookie;
899
}
900
EXPORT_SYMBOL(tcp_syn_flood_action);
Linus Torvalds's avatar
Linus Torvalds committed
901
902

/*
903
 * Save and compile IPv4 options into the request_sock if needed.
Linus Torvalds's avatar
Linus Torvalds committed
904
 */
905
static struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
Linus Torvalds's avatar
Linus Torvalds committed
906
{
907
908
	const struct ip_options *opt = &(IPCB(skb)->opt);
	struct ip_options_rcu *dopt = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
909
910

	if (opt && opt->optlen) {
911
912
		int opt_size = sizeof(*dopt) + opt->optlen;

Linus Torvalds's avatar
Linus Torvalds committed
913
914
		dopt = kmalloc(opt_size, GFP_ATOMIC);
		if (dopt) {
915
			if (ip_options_echo(&dopt->opt, skb)) {
Linus Torvalds's avatar
Linus Torvalds committed
916
917
918
919
920
921
922
923
				kfree(dopt);
				dopt = NULL;
			}
		}
	}
	return dopt;
}

924
925
926
927
928
929
930
931
#ifdef CONFIG_TCP_MD5SIG
/*
 * RFC2385 MD5 checksumming requires a mapping of
 * IP address->MD5 Key.
 * We need to maintain these in the sk structure.
 */

/* Find the Key structure for an address.  */
Eric Dumazet's avatar
Eric Dumazet committed
932
933
934
struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
					 const union tcp_md5_addr *addr,
					 int family)
935
936
{
	struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazet's avatar
Eric Dumazet committed
937
938
	struct tcp_md5sig_key *key;
	unsigned int size = sizeof(struct in_addr);
939
	struct tcp_md5sig_info *md5sig;
940

941
942
	/* caller either holds rcu_read_lock() or socket lock */
	md5sig = rcu_dereference_check(tp->md5sig_info,
943
944
				       sock_owned_by_user(sk) ||
				       lockdep_is_held(&sk->sk_lock.slock));
945
	if (!md5sig)
946
		return NULL;
Eric Dumazet's avatar
Eric Dumazet committed
947
948
949
950
#if IS_ENABLED(CONFIG_IPV6)
	if (family == AF_INET6)
		size = sizeof(struct in6_addr);
#endif
951
	hlist_for_each_entry_rcu(key, &md5sig->head, node) {
Eric Dumazet's avatar
Eric Dumazet committed
952
953
954
955
		if (key->family != family)
			continue;
		if (!memcmp(&key->addr, addr, size))
			return key;
956
957
958
	}
	return NULL;
}
Eric Dumazet's avatar
Eric Dumazet committed
959
EXPORT_SYMBOL(tcp_md5_do_lookup);
960
961
962
963

struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
					 struct sock *addr_sk)
{
Eric Dumazet's avatar
Eric Dumazet committed
964
965
966
967
	union tcp_md5_addr *addr;

	addr = (union tcp_md5_addr *)&inet_sk(addr_sk)->inet_daddr;
	return tcp_md5_do_lookup(sk, addr, AF_INET);
968
969
970
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);

Adrian Bunk's avatar
Adrian Bunk committed
971
972
static struct tcp_md5sig_key *tcp_v4_reqsk_md5_lookup(struct sock *sk,
						      struct request_sock *req)
973
{
Eric Dumazet's avatar
Eric Dumazet committed
974
975
976
977
	union tcp_md5_addr *addr;

	addr = (union tcp_md5_addr *)&inet_rsk(req)->rmt_addr;
	return tcp_md5_do_lookup(sk, addr, AF_INET);
978
979
980
}

/* This can be called on a newly created socket, from other files */
Eric Dumazet's avatar
Eric Dumazet committed
981
982
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
		   int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
983
984
{
	/* Add Key to the list */
985
	struct tcp_md5sig_key *key;
986
	struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazet's avatar
Eric Dumazet committed
987
	struct tcp_md5sig_info *md5sig;
988

989
	key = tcp_md5_do_lookup(sk, addr, family);
990
991
	if (key) {
		/* Pre-existing entry - just update that one. */
Eric Dumazet's avatar
Eric Dumazet committed
992
		memcpy(key->key, newkey, newkeylen);
993
		key->keylen = newkeylen;
Eric Dumazet's avatar
Eric Dumazet committed
994
995
		return 0;
	}
996

997
998
	md5sig = rcu_dereference_protected(tp->md5sig_info,
					   sock_owned_by_user(sk));
Eric Dumazet's avatar
Eric Dumazet committed
999
1000
1001
	if (!md5sig) {
		md5sig = kmalloc(sizeof(*md5sig), gfp);
		if (!md5sig)
1002
1003
			return -ENOMEM;

Eric Dumazet's avatar
Eric Dumazet committed
1004
1005
		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
		INIT_HLIST_HEAD(&md5sig->head);
1006
		rcu_assign_pointer(tp->md5sig_info, md5sig);
Eric Dumazet's avatar
Eric Dumazet committed
1007
	}
1008

1009
	key = sock_kmalloc(sk, sizeof(*key), gfp);
Eric Dumazet's avatar
Eric Dumazet committed
1010
1011
	if (!key)
		return -ENOMEM;
1012
	if (!tcp_alloc_md5sig_pool()) {
1013
		sock_kfree_s(sk, key, sizeof(*key));
Eric Dumazet's avatar
Eric Dumazet committed
1014
		return -ENOMEM;
1015
	}
Eric Dumazet's avatar
Eric Dumazet committed
1016
1017
1018
1019
1020
1021
1022
1023

	memcpy(key->key, newkey, newkeylen);
	key->keylen = newkeylen;
	key->family = family;
	memcpy(&key->addr, addr,
	       (family == AF_INET6) ? sizeof(struct in6_addr) :
				      sizeof(struct in_addr));
	hlist_add_head_rcu(&key->node, &md5sig->head);
1024
1025
	return 0;
}
Eric Dumazet's avatar
Eric Dumazet committed
1026
EXPORT_SYMBOL(tcp_md5_do_add);
1027

Eric Dumazet's avatar
Eric Dumazet committed
1028
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
1029
{
Eric Dumazet's avatar
Eric Dumazet committed
1030
1031
	struct tcp_md5sig_key *key;

1032
	key = tcp_md5_do_lookup(sk, addr, family);
Eric Dumazet's avatar
Eric Dumazet committed
1033
1034
1035
	if (!key)
		return -ENOENT;
	hlist_del_rcu(&key->node);
1036
	atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazet's avatar
Eric Dumazet committed
1037
1038
	kfree_rcu(key, rcu);
	return 0;
1039
}
Eric Dumazet's avatar
Eric Dumazet committed
1040
EXPORT_SYMBOL(tcp_md5_do_del);
1041

1042
static void tcp_clear_md5_list(struct sock *sk)
1043
1044
{
	struct tcp_sock *tp = tcp_sk(sk);
Eric Dumazet's avatar
Eric Dumazet committed
1045
	struct tcp_md5sig_key *key;
1046
	struct hlist_node *n;
1047
	struct tcp_md5sig_info *md5sig;
1048

1049
1050
	md5sig = rcu_dereference_protected(tp->md5sig_info, 1);

1051
	hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
Eric Dumazet's avatar
Eric Dumazet committed
1052
		hlist_del_rcu(&key->node);
1053
		atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
Eric Dumazet's avatar
Eric Dumazet committed
1054
		kfree_rcu(key, rcu);
1055
1056
1057
	}
}

1058
1059
static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
				 int optlen)
1060
1061
1062
1063
1064