outqueue.c 52.5 KB
Newer Older
1
/* SCTP kernel implementation
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
5
6
 * (C) Copyright IBM Corp. 2001, 2004
 * Copyright (c) 1999-2000 Cisco, Inc.
 * Copyright (c) 1999-2001 Motorola, Inc.
 * Copyright (c) 2001-2003 Intel Corp.
 *
7
 * This file is part of the SCTP kernel implementation
Linus Torvalds's avatar
Linus Torvalds committed
8
9
10
11
 *
 * These functions implement the sctp_outq class.   The outqueue handles
 * bundling and queueing of outgoing SCTP chunks.
 *
12
 * This SCTP implementation is free software;
Linus Torvalds's avatar
Linus Torvalds committed
13
14
15
16
17
 * you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
18
 * This SCTP implementation is distributed in the hope that it
Linus Torvalds's avatar
Linus Torvalds committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
 * will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *                 ************************
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GNU CC; see the file COPYING.  If not, write to
 * the Free Software Foundation, 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * Please send any bug reports or fixes you make to the
 * email address(es):
 *    lksctp developers <lksctp-developers@lists.sourceforge.net>
 *
 * Or submit a bug report through the following website:
 *    http://www.sf.net/projects/lksctp
 *
 * Written or modified by:
 *    La Monte H.P. Yarroll <piggy@acm.org>
 *    Karl Knutson          <karl@athena.chicago.il.us>
 *    Perry Melange         <pmelange@null.cc.uic.edu>
 *    Xingang Guo           <xingang.guo@intel.com>
 *    Hui Huang 	    <hui.huang@nokia.com>
 *    Sridhar Samudrala     <sri@us.ibm.com>
 *    Jon Grimm             <jgrimm@us.ibm.com>
 *
 * Any bugs reported given to us we will try to fix... any fixes shared will
 * be incorporated into the next SCTP release.
 */

49
50
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

Linus Torvalds's avatar
Linus Torvalds committed
51
52
53
54
#include <linux/types.h>
#include <linux/list.h>   /* For struct list_head */
#include <linux/socket.h>
#include <linux/ip.h>
55
#include <linux/slab.h>
Linus Torvalds's avatar
Linus Torvalds committed
56
57
58
59
60
61
62
63
64
65
#include <net/sock.h>	  /* For skb_set_owner_w */

#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>

/* Declare internal functions here.  */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
static void sctp_check_transmitted(struct sctp_outq *q,
				   struct list_head *transmitted_queue,
				   struct sctp_transport *transport,
66
				   union sctp_addr *saddr,
Linus Torvalds's avatar
Linus Torvalds committed
67
				   struct sctp_sackhdr *sack,
68
				   __u32 *highest_new_tsn);
Linus Torvalds's avatar
Linus Torvalds committed
69
70
71
72
73
74
75
76
77

static void sctp_mark_missing(struct sctp_outq *q,
			      struct list_head *transmitted_queue,
			      struct sctp_transport *transport,
			      __u32 highest_new_tsn,
			      int count_of_newacks);

static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);

78
79
static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout);

Linus Torvalds's avatar
Linus Torvalds committed
80
81
82
83
/* Add data to the front of the queue. */
static inline void sctp_outq_head_data(struct sctp_outq *q,
					struct sctp_chunk *ch)
{
84
	list_add(&ch->list, &q->out_chunk_list);
Linus Torvalds's avatar
Linus Torvalds committed
85
86
87
88
89
90
	q->out_qlen += ch->skb->len;
}

/* Take data from the front of the queue. */
static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
{
91
92
93
94
95
96
97
	struct sctp_chunk *ch = NULL;

	if (!list_empty(&q->out_chunk_list)) {
		struct list_head *entry = q->out_chunk_list.next;

		ch = list_entry(entry, struct sctp_chunk, list);
		list_del_init(entry);
Linus Torvalds's avatar
Linus Torvalds committed
98
		q->out_qlen -= ch->skb->len;
99
	}
Linus Torvalds's avatar
Linus Torvalds committed
100
101
102
103
104
105
	return ch;
}
/* Add data chunk to the end of the queue. */
static inline void sctp_outq_tail_data(struct sctp_outq *q,
				       struct sctp_chunk *ch)
{
106
	list_add_tail(&ch->list, &q->out_chunk_list);
Linus Torvalds's avatar
Linus Torvalds committed
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
	q->out_qlen += ch->skb->len;
}

/*
 * SFR-CACC algorithm:
 * D) If count_of_newacks is greater than or equal to 2
 * and t was not sent to the current primary then the
 * sender MUST NOT increment missing report count for t.
 */
static inline int sctp_cacc_skip_3_1_d(struct sctp_transport *primary,
				       struct sctp_transport *transport,
				       int count_of_newacks)
{
	if (count_of_newacks >=2 && transport != primary)
		return 1;
	return 0;
}

/*
 * SFR-CACC algorithm:
 * F) If count_of_newacks is less than 2, let d be the
 * destination to which t was sent. If cacc_saw_newack
 * is 0 for destination d, then the sender MUST NOT
 * increment missing report count for t.
 */
static inline int sctp_cacc_skip_3_1_f(struct sctp_transport *transport,
				       int count_of_newacks)
{
135
136
	if (count_of_newacks < 2 &&
			(transport && !transport->cacc.cacc_saw_newack))
Linus Torvalds's avatar
Linus Torvalds committed
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
		return 1;
	return 0;
}

/*
 * SFR-CACC algorithm:
 * 3.1) If CYCLING_CHANGEOVER is 0, the sender SHOULD
 * execute steps C, D, F.
 *
 * C has been implemented in sctp_outq_sack
 */
static inline int sctp_cacc_skip_3_1(struct sctp_transport *primary,
				     struct sctp_transport *transport,
				     int count_of_newacks)
{
	if (!primary->cacc.cycling_changeover) {
		if (sctp_cacc_skip_3_1_d(primary, transport, count_of_newacks))
			return 1;
		if (sctp_cacc_skip_3_1_f(transport, count_of_newacks))
			return 1;
		return 0;
	}
	return 0;
}

/*
 * SFR-CACC algorithm:
 * 3.2) Else if CYCLING_CHANGEOVER is 1, and t is less
 * than next_tsn_at_change of the current primary, then
 * the sender MUST NOT increment missing report count
 * for t.
 */
static inline int sctp_cacc_skip_3_2(struct sctp_transport *primary, __u32 tsn)
{
	if (primary->cacc.cycling_changeover &&
	    TSN_lt(tsn, primary->cacc.next_tsn_at_change))
		return 1;
	return 0;
}

/*
 * SFR-CACC algorithm:
 * 3) If the missing report count for TSN t is to be
 * incremented according to [RFC2960] and
 * [SCTP_STEWART-2002], and CHANGEOVER_ACTIVE is set,
Lucas De Marchi's avatar
Lucas De Marchi committed
182
 * then the sender MUST further execute steps 3.1 and
Linus Torvalds's avatar
Linus Torvalds committed
183
184
185
186
187
 * 3.2 to determine if the missing report count for
 * TSN t SHOULD NOT be incremented.
 *
 * 3.3) If 3.1 and 3.2 do not dictate that the missing
 * report count for t should not be incremented, then
Lucas De Marchi's avatar
Lucas De Marchi committed
188
 * the sender SHOULD increment missing report count for
Linus Torvalds's avatar
Linus Torvalds committed
189
190
191
192
193
194
195
196
 * t (according to [RFC2960] and [SCTP_STEWART_2002]).
 */
static inline int sctp_cacc_skip(struct sctp_transport *primary,
				 struct sctp_transport *transport,
				 int count_of_newacks,
				 __u32 tsn)
{
	if (primary->cacc.changeover_active &&
197
198
	    (sctp_cacc_skip_3_1(primary, transport, count_of_newacks) ||
	     sctp_cacc_skip_3_2(primary, tsn)))
Linus Torvalds's avatar
Linus Torvalds committed
199
200
201
202
203
204
205
206
207
208
		return 1;
	return 0;
}

/* Initialize an existing sctp_outq.  This does the boring stuff.
 * You still need to define handlers if you really want to DO
 * something with this structure...
 */
void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
{
209
210
	memset(q, 0, sizeof(struct sctp_outq));

Linus Torvalds's avatar
Linus Torvalds committed
211
	q->asoc = asoc;
212
213
	INIT_LIST_HEAD(&q->out_chunk_list);
	INIT_LIST_HEAD(&q->control_chunk_list);
Linus Torvalds's avatar
Linus Torvalds committed
214
215
216
217
218
219
220
221
222
	INIT_LIST_HEAD(&q->retransmit);
	INIT_LIST_HEAD(&q->sacked);
	INIT_LIST_HEAD(&q->abandoned);

	q->empty = 1;
}

/* Free the outqueue structure and any related pending chunks.
 */
223
static void __sctp_outq_teardown(struct sctp_outq *q)
Linus Torvalds's avatar
Linus Torvalds committed
224
225
{
	struct sctp_transport *transport;
226
	struct list_head *lchunk, *temp;
227
	struct sctp_chunk *chunk, *tmp;
Linus Torvalds's avatar
Linus Torvalds committed
228
229

	/* Throw away unacknowledged chunks. */
230
231
	list_for_each_entry(transport, &q->asoc->peer.transport_addr_list,
			transports) {
Linus Torvalds's avatar
Linus Torvalds committed
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
		while ((lchunk = sctp_list_dequeue(&transport->transmitted)) != NULL) {
			chunk = list_entry(lchunk, struct sctp_chunk,
					   transmitted_list);
			/* Mark as part of a failed message. */
			sctp_chunk_fail(chunk, q->error);
			sctp_chunk_free(chunk);
		}
	}

	/* Throw away chunks that have been gap ACKed.  */
	list_for_each_safe(lchunk, temp, &q->sacked) {
		list_del_init(lchunk);
		chunk = list_entry(lchunk, struct sctp_chunk,
				   transmitted_list);
		sctp_chunk_fail(chunk, q->error);
		sctp_chunk_free(chunk);
	}

	/* Throw away any chunks in the retransmit queue. */
	list_for_each_safe(lchunk, temp, &q->retransmit) {
		list_del_init(lchunk);
		chunk = list_entry(lchunk, struct sctp_chunk,
				   transmitted_list);
		sctp_chunk_fail(chunk, q->error);
		sctp_chunk_free(chunk);
	}

	/* Throw away any chunks that are in the abandoned queue. */
	list_for_each_safe(lchunk, temp, &q->abandoned) {
		list_del_init(lchunk);
		chunk = list_entry(lchunk, struct sctp_chunk,
				   transmitted_list);
		sctp_chunk_fail(chunk, q->error);
		sctp_chunk_free(chunk);
	}

	/* Throw away any leftover data chunks. */
	while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {

		/* Mark as send failure. */
		sctp_chunk_fail(chunk, q->error);
		sctp_chunk_free(chunk);
	}

	/* Throw away any leftover control chunks. */
277
278
	list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
		list_del_init(&chunk->list);
Linus Torvalds's avatar
Linus Torvalds committed
279
		sctp_chunk_free(chunk);
280
	}
Linus Torvalds's avatar
Linus Torvalds committed
281
282
}

283
284
285
286
287
288
void sctp_outq_teardown(struct sctp_outq *q)
{
	__sctp_outq_teardown(q);
	sctp_outq_init(q->asoc, q);
}

Linus Torvalds's avatar
Linus Torvalds committed
289
290
291
292
/* Free the outqueue structure and any related pending chunks.  */
void sctp_outq_free(struct sctp_outq *q)
{
	/* Throw away leftover chunks. */
293
	__sctp_outq_teardown(q);
Linus Torvalds's avatar
Linus Torvalds committed
294
295
296
297
298
}

/* Put a new chunk in an sctp_outq.  */
int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
{
299
	struct net *net = sock_net(q->asoc->base.sk);
Linus Torvalds's avatar
Linus Torvalds committed
300
301
	int error = 0;

302
303
304
305
	pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
		 chunk && chunk->chunk_hdr ?
		 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
		 "illegal chunk");
Linus Torvalds's avatar
Linus Torvalds committed
306
307
308
309

	/* If it is data, queue it up, otherwise, send it
	 * immediately.
	 */
310
	if (sctp_chunk_is_data(chunk)) {
Linus Torvalds's avatar
Linus Torvalds committed
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
		/* Is it OK to queue data chunks?  */
		/* From 9. Termination of Association
		 *
		 * When either endpoint performs a shutdown, the
		 * association on each peer will stop accepting new
		 * data from its user and only deliver data in queue
		 * at the time of sending or receiving the SHUTDOWN
		 * chunk.
		 */
		switch (q->asoc->state) {
		case SCTP_STATE_CLOSED:
		case SCTP_STATE_SHUTDOWN_PENDING:
		case SCTP_STATE_SHUTDOWN_SENT:
		case SCTP_STATE_SHUTDOWN_RECEIVED:
		case SCTP_STATE_SHUTDOWN_ACK_SENT:
			/* Cannot send after transport endpoint shutdown */
			error = -ESHUTDOWN;
			break;

		default:
331
332
333
334
			pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n",
				 __func__, q, chunk, chunk && chunk->chunk_hdr ?
				 sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) :
				 "illegal chunk");
Linus Torvalds's avatar
Linus Torvalds committed
335
336
337

			sctp_outq_tail_data(q, chunk);
			if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED)
338
				SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS);
Linus Torvalds's avatar
Linus Torvalds committed
339
			else
340
				SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS);
Linus Torvalds's avatar
Linus Torvalds committed
341
342
			q->empty = 0;
			break;
343
		}
Linus Torvalds's avatar
Linus Torvalds committed
344
	} else {
345
		list_add_tail(&chunk->list, &q->control_chunk_list);
346
		SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
Linus Torvalds's avatar
Linus Torvalds committed
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
	}

	if (error < 0)
		return error;

	if (!q->cork)
		error = sctp_outq_flush(q, 0);

	return error;
}

/* Insert a chunk into the sorted list based on the TSNs.  The retransmit list
 * and the abandoned list are in ascending order.
 */
static void sctp_insert_list(struct list_head *head, struct list_head *new)
{
	struct list_head *pos;
	struct sctp_chunk *nchunk, *lchunk;
	__u32 ntsn, ltsn;
	int done = 0;

	nchunk = list_entry(new, struct sctp_chunk, transmitted_list);
	ntsn = ntohl(nchunk->subh.data_hdr->tsn);

	list_for_each(pos, head) {
		lchunk = list_entry(pos, struct sctp_chunk, transmitted_list);
		ltsn = ntohl(lchunk->subh.data_hdr->tsn);
		if (TSN_lt(ntsn, ltsn)) {
			list_add(new, pos->prev);
			done = 1;
			break;
		}
	}
	if (!done)
381
		list_add_tail(new, head);
Linus Torvalds's avatar
Linus Torvalds committed
382
383
384
385
386
}

/* Mark all the eligible packets on a transport for retransmission.  */
void sctp_retransmit_mark(struct sctp_outq *q,
			  struct sctp_transport *transport,
387
			  __u8 reason)
Linus Torvalds's avatar
Linus Torvalds committed
388
389
390
391
392
393
394
395
396
397
398
399
400
{
	struct list_head *lchunk, *ltemp;
	struct sctp_chunk *chunk;

	/* Walk through the specified transmitted queue.  */
	list_for_each_safe(lchunk, ltemp, &transport->transmitted) {
		chunk = list_entry(lchunk, struct sctp_chunk,
				   transmitted_list);

		/* If the chunk is abandoned, move it to abandoned list. */
		if (sctp_chunk_abandoned(chunk)) {
			list_del_init(lchunk);
			sctp_insert_list(&q->abandoned, lchunk);
401
402
403
404
405
406
407

			/* If this chunk has not been previousely acked,
			 * stop considering it 'outstanding'.  Our peer
			 * will most likely never see it since it will
			 * not be retransmitted
			 */
			if (!chunk->tsn_gap_acked) {
408
409
410
				if (chunk->transport)
					chunk->transport->flight_size -=
							sctp_data_size(chunk);
411
				q->outstanding_bytes -= sctp_data_size(chunk);
412
				q->asoc->peer.rwnd += sctp_data_size(chunk);
413
			}
Linus Torvalds's avatar
Linus Torvalds committed
414
415
416
			continue;
		}

417
418
419
		/* If we are doing  retransmission due to a timeout or pmtu
		 * discovery, only the  chunks that are not yet acked should
		 * be added to the retransmit queue.
Linus Torvalds's avatar
Linus Torvalds committed
420
		 */
421
		if ((reason == SCTP_RTXR_FAST_RTX  &&
422
			    (chunk->fast_retransmit == SCTP_NEED_FRTX)) ||
423
		    (reason != SCTP_RTXR_FAST_RTX  && !chunk->tsn_gap_acked)) {
Linus Torvalds's avatar
Linus Torvalds committed
424
425
426
427
428
429
430
431
			/* RFC 2960 6.2.1 Processing a Received SACK
			 *
			 * C) Any time a DATA chunk is marked for
			 * retransmission (via either T3-rtx timer expiration
			 * (Section 6.3.3) or via fast retransmit
			 * (Section 7.2.4)), add the data size of those
			 * chunks to the rwnd.
			 */
432
			q->asoc->peer.rwnd += sctp_data_size(chunk);
Linus Torvalds's avatar
Linus Torvalds committed
433
			q->outstanding_bytes -= sctp_data_size(chunk);
434
435
			if (chunk->transport)
				transport->flight_size -= sctp_data_size(chunk);
Linus Torvalds's avatar
Linus Torvalds committed
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462

			/* sctpimpguide-05 Section 2.8.2
			 * M5) If a T3-rtx timer expires, the
			 * 'TSN.Missing.Report' of all affected TSNs is set
			 * to 0.
			 */
			chunk->tsn_missing_report = 0;

			/* If a chunk that is being used for RTT measurement
			 * has to be retransmitted, we cannot use this chunk
			 * anymore for RTT measurements. Reset rto_pending so
			 * that a new RTT measurement is started when a new
			 * data chunk is sent.
			 */
			if (chunk->rtt_in_progress) {
				chunk->rtt_in_progress = 0;
				transport->rto_pending = 0;
			}

			/* Move the chunk to the retransmit queue. The chunks
			 * on the retransmit queue are always kept in order.
			 */
			list_del_init(lchunk);
			sctp_insert_list(&q->retransmit, lchunk);
		}
	}

463
464
465
466
	pr_debug("%s: transport:%p, reason:%d, cwnd:%d, ssthresh:%d, "
		 "flight_size:%d, pba:%d\n", __func__, transport, reason,
		 transport->cwnd, transport->ssthresh, transport->flight_size,
		 transport->partial_bytes_acked);
Linus Torvalds's avatar
Linus Torvalds committed
467
468
469
470
471
472
473
474
}

/* Mark all the eligible packets on a transport for retransmission and force
 * one packet out.
 */
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
		     sctp_retransmit_reason_t reason)
{
475
	struct net *net = sock_net(q->asoc->base.sk);
Linus Torvalds's avatar
Linus Torvalds committed
476
477
478
479
	int error = 0;

	switch(reason) {
	case SCTP_RTXR_T3_RTX:
480
		SCTP_INC_STATS(net, SCTP_MIB_T3_RETRANSMITS);
Linus Torvalds's avatar
Linus Torvalds committed
481
482
483
484
485
486
		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX);
		/* Update the retran path if the T3-rtx timer has expired for
		 * the current retran path.
		 */
		if (transport == transport->asoc->peer.retran_path)
			sctp_assoc_update_retran_path(transport->asoc);
487
488
		transport->asoc->rtx_data_chunks +=
			transport->asoc->unack_data;
Linus Torvalds's avatar
Linus Torvalds committed
489
490
		break;
	case SCTP_RTXR_FAST_RTX:
491
		SCTP_INC_STATS(net, SCTP_MIB_FAST_RETRANSMITS);
Linus Torvalds's avatar
Linus Torvalds committed
492
		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
493
		q->fast_rtx = 1;
Linus Torvalds's avatar
Linus Torvalds committed
494
495
		break;
	case SCTP_RTXR_PMTUD:
496
		SCTP_INC_STATS(net, SCTP_MIB_PMTUD_RETRANSMITS);
Linus Torvalds's avatar
Linus Torvalds committed
497
		break;
498
	case SCTP_RTXR_T1_RTX:
499
		SCTP_INC_STATS(net, SCTP_MIB_T1_RETRANSMITS);
500
		transport->asoc->init_retries++;
501
		break;
502
503
	default:
		BUG();
Linus Torvalds's avatar
Linus Torvalds committed
504
505
	}

506
	sctp_retransmit_mark(q, transport, reason);
Linus Torvalds's avatar
Linus Torvalds committed
507
508
509
510
511

	/* PR-SCTP A5) Any time the T3-rtx timer expires, on any destination,
	 * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by
	 * following the procedures outlined in C1 - C5.
	 */
512
513
	if (reason == SCTP_RTXR_T3_RTX)
		sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point);
Linus Torvalds's avatar
Linus Torvalds committed
514

515
516
517
518
519
520
	/* Flush the queues only on timeout, since fast_rtx is only
	 * triggered during sack processing and the queue
	 * will be flushed at the end.
	 */
	if (reason != SCTP_RTXR_FAST_RTX)
		error = sctp_outq_flush(q, /* rtx_timeout */ 1);
Linus Torvalds's avatar
Linus Torvalds committed
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540

	if (error)
		q->asoc->base.sk->sk_err = -error;
}

/*
 * Transmit DATA chunks on the retransmit queue.  Upon return from
 * sctp_outq_flush_rtx() the packet 'pkt' may contain chunks which
 * need to be transmitted by the caller.
 * We assume that pkt->transport has already been set.
 *
 * The return value is a normal kernel error return value.
 */
static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
			       int rtx_timeout, int *start_timer)
{
	struct list_head *lqueue;
	struct sctp_transport *transport = pkt->transport;
	sctp_xmit_t status;
	struct sctp_chunk *chunk, *chunk1;
541
	int fast_rtx;
Linus Torvalds's avatar
Linus Torvalds committed
542
	int error = 0;
543
	int timer = 0;
544
	int done = 0;
Linus Torvalds's avatar
Linus Torvalds committed
545
546

	lqueue = &q->retransmit;
547
	fast_rtx = q->fast_rtx;
Linus Torvalds's avatar
Linus Torvalds committed
548

549
550
551
552
	/* This loop handles time-out retransmissions, fast retransmissions,
	 * and retransmissions due to opening of whindow.
	 *
	 * RFC 2960 6.3.3 Handle T3-rtx Expiration
Linus Torvalds's avatar
Linus Torvalds committed
553
554
555
556
557
558
559
560
561
562
563
564
565
566
	 *
	 * E3) Determine how many of the earliest (i.e., lowest TSN)
	 * outstanding DATA chunks for the address for which the
	 * T3-rtx has expired will fit into a single packet, subject
	 * to the MTU constraint for the path corresponding to the
	 * destination transport address to which the retransmission
	 * is being sent (this may be different from the address for
	 * which the timer expires [see Section 6.4]). Call this value
	 * K. Bundle and retransmit those K DATA chunks in a single
	 * packet to the destination endpoint.
	 *
	 * [Just to be painfully clear, if we are retransmitting
	 * because a timeout just happened, we should send only ONE
	 * packet of retransmitted data.]
567
568
569
570
	 *
	 * For fast retransmissions we also send only ONE packet.  However,
	 * if we are just flushing the queue due to open window, we'll
	 * try to send as much as possible.
Linus Torvalds's avatar
Linus Torvalds committed
571
	 */
572
	list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) {
573
574
575
576
577
578
579
		/* If the chunk is abandoned, move it to abandoned list. */
		if (sctp_chunk_abandoned(chunk)) {
			list_del_init(&chunk->transmitted_list);
			sctp_insert_list(&q->abandoned,
					 &chunk->transmitted_list);
			continue;
		}
Linus Torvalds's avatar
Linus Torvalds committed
580
581
582
583
584
585
586

		/* Make sure that Gap Acked TSNs are not retransmitted.  A
		 * simple approach is just to move such TSNs out of the
		 * way and into a 'transmitted' queue and skip to the
		 * next chunk.
		 */
		if (chunk->tsn_gap_acked) {
587
588
			list_move_tail(&chunk->transmitted_list,
				       &transport->transmitted);
Linus Torvalds's avatar
Linus Torvalds committed
589
590
591
			continue;
		}

592
593
594
595
596
597
		/* If we are doing fast retransmit, ignore non-fast_rtransmit
		 * chunks
		 */
		if (fast_rtx && !chunk->fast_retransmit)
			continue;

598
redo:
Linus Torvalds's avatar
Linus Torvalds committed
599
600
601
602
603
		/* Attempt to append this chunk to the packet. */
		status = sctp_packet_append_chunk(pkt, chunk);

		switch (status) {
		case SCTP_XMIT_PMTU_FULL:
604
605
606
607
608
609
610
611
612
613
614
			if (!pkt->has_data && !pkt->has_cookie_echo) {
				/* If this packet did not contain DATA then
				 * retransmission did not happen, so do it
				 * again.  We'll ignore the error here since
				 * control chunks are already freed so there
				 * is nothing we can do.
				 */
				sctp_packet_transmit(pkt);
				goto redo;
			}

Linus Torvalds's avatar
Linus Torvalds committed
615
			/* Send this packet.  */
616
			error = sctp_packet_transmit(pkt);
Linus Torvalds's avatar
Linus Torvalds committed
617
618
619

			/* If we are retransmitting, we should only
			 * send a single packet.
620
			 * Otherwise, try appending this chunk again.
Linus Torvalds's avatar
Linus Torvalds committed
621
			 */
622
623
			if (rtx_timeout || fast_rtx)
				done = 1;
624
625
			else
				goto redo;
Linus Torvalds's avatar
Linus Torvalds committed
626

627
			/* Bundle next chunk in the next round.  */
Linus Torvalds's avatar
Linus Torvalds committed
628
629
630
			break;

		case SCTP_XMIT_RWND_FULL:
631
			/* Send this packet. */
632
			error = sctp_packet_transmit(pkt);
Linus Torvalds's avatar
Linus Torvalds committed
633
634
635
636

			/* Stop sending DATA as there is no more room
			 * at the receiver.
			 */
637
			done = 1;
Linus Torvalds's avatar
Linus Torvalds committed
638
639
640
			break;

		case SCTP_XMIT_NAGLE_DELAY:
641
			/* Send this packet. */
642
			error = sctp_packet_transmit(pkt);
Linus Torvalds's avatar
Linus Torvalds committed
643
644

			/* Stop sending DATA because of nagle delay. */
645
			done = 1;
Linus Torvalds's avatar
Linus Torvalds committed
646
647
648
649
650
651
			break;

		default:
			/* The append was successful, so add this chunk to
			 * the transmitted list.
			 */
652
653
			list_move_tail(&chunk->transmitted_list,
				       &transport->transmitted);
Linus Torvalds's avatar
Linus Torvalds committed
654

655
			/* Mark the chunk as ineligible for fast retransmit
Linus Torvalds's avatar
Linus Torvalds committed
656
657
			 * after it is retransmitted.
			 */
658
659
			if (chunk->fast_retransmit == SCTP_NEED_FRTX)
				chunk->fast_retransmit = SCTP_DONT_FRTX;
Linus Torvalds's avatar
Linus Torvalds committed
660
661

			q->empty = 0;
662
			q->asoc->stats.rtxchunks++;
Linus Torvalds's avatar
Linus Torvalds committed
663
			break;
664
		}
Linus Torvalds's avatar
Linus Torvalds committed
665

666
667
668
669
		/* Set the timer if there were no errors */
		if (!error && !timer)
			timer = 1;

670
671
672
673
674
675
676
677
678
679
680
		if (done)
			break;
	}

	/* If we are here due to a retransmit timeout or a fast
	 * retransmit and if there are any chunks left in the retransmit
	 * queue that could not fit in the PMTU sized packet, they need
	 * to be marked as ineligible for a subsequent fast retransmit.
	 */
	if (rtx_timeout || fast_rtx) {
		list_for_each_entry(chunk1, lqueue, transmitted_list) {
681
682
			if (chunk1->fast_retransmit == SCTP_NEED_FRTX)
				chunk1->fast_retransmit = SCTP_DONT_FRTX;
Linus Torvalds's avatar
Linus Torvalds committed
683
684
685
		}
	}

686
687
688
689
690
691
	*start_timer = timer;

	/* Clear fast retransmit hint */
	if (fast_rtx)
		q->fast_rtx = 0;

Linus Torvalds's avatar
Linus Torvalds committed
692
693
694
695
696
697
	return error;
}

/* Cork the outqueue so queued chunks are really queued. */
int sctp_outq_uncork(struct sctp_outq *q)
{
698
	if (q->cork)
Linus Torvalds's avatar
Linus Torvalds committed
699
		q->cork = 0;
700
701

	return sctp_outq_flush(q, 0);
Linus Torvalds's avatar
Linus Torvalds committed
702
703
}

704

Linus Torvalds's avatar
Linus Torvalds committed
705
706
707
708
709
710
711
712
713
/*
 * Try to flush an outqueue.
 *
 * Description: Send everything in q which we legally can, subject to
 * congestion limitations.
 * * Note: This function can be called from multiple contexts so appropriate
 * locking concerns must be made.  Today we use the sock lock to protect
 * this function.
 */
714
static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
Linus Torvalds's avatar
Linus Torvalds committed
715
716
717
718
719
720
721
722
723
{
	struct sctp_packet *packet;
	struct sctp_packet singleton;
	struct sctp_association *asoc = q->asoc;
	__u16 sport = asoc->base.bind_addr.port;
	__u16 dport = asoc->peer.port;
	__u32 vtag = asoc->peer.i.init_tag;
	struct sctp_transport *transport = NULL;
	struct sctp_transport *new_transport;
724
	struct sctp_chunk *chunk, *tmp;
Linus Torvalds's avatar
Linus Torvalds committed
725
726
727
	sctp_xmit_t status;
	int error = 0;
	int start_timer = 0;
728
	int one_packet = 0;
Linus Torvalds's avatar
Linus Torvalds committed
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746

	/* These transports have chunks to send. */
	struct list_head transport_list;
	struct list_head *ltransport;

	INIT_LIST_HEAD(&transport_list);
	packet = NULL;

	/*
	 * 6.10 Bundling
	 *   ...
	 *   When bundling control chunks with DATA chunks, an
	 *   endpoint MUST place control chunks first in the outbound
	 *   SCTP packet.  The transmitter MUST transmit DATA chunks
	 *   within a SCTP packet in increasing order of TSN.
	 *   ...
	 */

747
	list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
748
749
750
751
752
753
754
755
756
757
		/* RFC 5061, 5.3
		 * F1) This means that until such time as the ASCONF
		 * containing the add is acknowledged, the sender MUST
		 * NOT use the new IP address as a source for ANY SCTP
		 * packet except on carrying an ASCONF Chunk.
		 */
		if (asoc->src_out_of_asoc_ok &&
		    chunk->chunk_hdr->type != SCTP_CID_ASCONF)
			continue;

758
759
		list_del_init(&chunk->list);

Linus Torvalds's avatar
Linus Torvalds committed
760
761
762
763
		/* Pick the right transport to use. */
		new_transport = chunk->transport;

		if (!new_transport) {
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
			/*
			 * If we have a prior transport pointer, see if
			 * the destination address of the chunk
			 * matches the destination address of the
			 * current transport.  If not a match, then
			 * try to look up the transport with a given
			 * destination address.  We do this because
			 * after processing ASCONFs, we may have new
			 * transports created.
			 */
			if (transport &&
			    sctp_cmp_addr_exact(&chunk->dest,
						&transport->ipaddr))
					new_transport = transport;
			else
				new_transport = sctp_assoc_lookup_paddr(asoc,
								&chunk->dest);

			/* if we still don't have a new transport, then
			 * use the current active path.
			 */
			if (!new_transport)
				new_transport = asoc->peer.active_path;
787
		} else if ((new_transport->state == SCTP_INACTIVE) ||
788
789
			   (new_transport->state == SCTP_UNCONFIRMED) ||
			   (new_transport->state == SCTP_PF)) {
790
791
			/* If the chunk is Heartbeat or Heartbeat Ack,
			 * send it to chunk->transport, even if it's
Linus Torvalds's avatar
Linus Torvalds committed
792
793
794
			 * inactive.
			 *
			 * 3.3.6 Heartbeat Acknowledgement:
795
			 * ...
Linus Torvalds's avatar
Linus Torvalds committed
796
797
798
			 * A HEARTBEAT ACK is always sent to the source IP
			 * address of the IP datagram containing the
			 * HEARTBEAT chunk to which this ack is responding.
799
			 * ...
800
801
			 *
			 * ASCONF_ACKs also must be sent to the source.
Linus Torvalds's avatar
Linus Torvalds committed
802
803
			 */
			if (chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT &&
804
805
			    chunk->chunk_hdr->type != SCTP_CID_HEARTBEAT_ACK &&
			    chunk->chunk_hdr->type != SCTP_CID_ASCONF_ACK)
Linus Torvalds's avatar
Linus Torvalds committed
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
				new_transport = asoc->peer.active_path;
		}

		/* Are we switching transports?
		 * Take care of transport locks.
		 */
		if (new_transport != transport) {
			transport = new_transport;
			if (list_empty(&transport->send_ready)) {
				list_add_tail(&transport->send_ready,
					      &transport_list);
			}
			packet = &transport->packet;
			sctp_packet_config(packet, vtag,
					   asoc->peer.ecn_capable);
		}

		switch (chunk->chunk_hdr->type) {
		/*
		 * 6.10 Bundling
		 *   ...
		 *   An endpoint MUST NOT bundle INIT, INIT ACK or SHUTDOWN
		 *   COMPLETE with any other chunks.  [Send them immediately.]
		 */
		case SCTP_CID_INIT:
		case SCTP_CID_INIT_ACK:
		case SCTP_CID_SHUTDOWN_COMPLETE:
			sctp_packet_init(&singleton, transport, sport, dport);
			sctp_packet_config(&singleton, vtag, 0);
			sctp_packet_append_chunk(&singleton, chunk);
			error = sctp_packet_transmit(&singleton);
			if (error < 0)
				return error;
			break;

		case SCTP_CID_ABORT:
842
843
844
			if (sctp_test_T_bit(chunk)) {
				packet->vtag = asoc->c.my_vtag;
			}
845
846
847
848
849
		/* The following chunks are "response" chunks, i.e.
		 * they are generated in response to something we
		 * received.  If we are sending these, then we can
		 * send only 1 packet containing these chunks.
		 */
Linus Torvalds's avatar
Linus Torvalds committed
850
851
852
		case SCTP_CID_HEARTBEAT_ACK:
		case SCTP_CID_SHUTDOWN_ACK:
		case SCTP_CID_COOKIE_ACK:
853
854
		case SCTP_CID_COOKIE_ECHO:
		case SCTP_CID_ERROR:
Linus Torvalds's avatar
Linus Torvalds committed
855
856
		case SCTP_CID_ECN_CWR:
		case SCTP_CID_ASCONF_ACK:
857
			one_packet = 1;
Lucas De Marchi's avatar
Lucas De Marchi committed
858
			/* Fall through */
859
860
861
862
863
864

		case SCTP_CID_SACK:
		case SCTP_CID_HEARTBEAT:
		case SCTP_CID_SHUTDOWN:
		case SCTP_CID_ECN_ECNE:
		case SCTP_CID_ASCONF:
Linus Torvalds's avatar
Linus Torvalds committed
865
		case SCTP_CID_FWD_TSN:
866
867
868
869
870
			status = sctp_packet_transmit_chunk(packet, chunk,
							    one_packet);
			if (status  != SCTP_XMIT_OK) {
				/* put the chunk back */
				list_add(&chunk->list, &q->control_chunk_list);
871
872
			} else {
				asoc->stats.octrlchunks++;
873
874
875
876
				/* PR-SCTP C5) If a FORWARD TSN is sent, the
				 * sender MUST assure that at least one T3-rtx
				 * timer is running.
				 */
877
878
				if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN)
					sctp_transport_reset_timers(transport);
879
			}
Linus Torvalds's avatar
Linus Torvalds committed
880
881
882
883
884
			break;

		default:
			/* We built a chunk with an illegal type! */
			BUG();
885
		}
Linus Torvalds's avatar
Linus Torvalds committed
886
887
	}

888
889
890
	if (q->asoc->src_out_of_asoc_ok)
		goto sctp_flush_out;

Linus Torvalds's avatar
Linus Torvalds committed
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
	/* Is it OK to send data chunks?  */
	switch (asoc->state) {
	case SCTP_STATE_COOKIE_ECHOED:
		/* Only allow bundling when this packet has a COOKIE-ECHO
		 * chunk.
		 */
		if (!packet || !packet->has_cookie_echo)
			break;

		/* fallthru */
	case SCTP_STATE_ESTABLISHED:
	case SCTP_STATE_SHUTDOWN_PENDING:
	case SCTP_STATE_SHUTDOWN_RECEIVED:
		/*
		 * RFC 2960 6.1  Transmission of DATA Chunks
		 *
		 * C) When the time comes for the sender to transmit,
		 * before sending new DATA chunks, the sender MUST
		 * first transmit any outstanding DATA chunks which
		 * are marked for retransmission (limited by the
		 * current cwnd).
		 */
		if (!list_empty(&q->retransmit)) {
914
915
			if (asoc->peer.retran_path->state == SCTP_UNCONFIRMED)
				goto sctp_flush_out;
Linus Torvalds's avatar
Linus Torvalds committed
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
			if (transport == asoc->peer.retran_path)
				goto retran;

			/* Switch transports & prepare the packet.  */

			transport = asoc->peer.retran_path;

			if (list_empty(&transport->send_ready)) {
				list_add_tail(&transport->send_ready,
					      &transport_list);
			}

			packet = &transport->packet;
			sctp_packet_config(packet, vtag,
					   asoc->peer.ecn_capable);
		retran:
			error = sctp_outq_flush_rtx(q, packet,
						    rtx_timeout, &start_timer);

			if (start_timer)
936
				sctp_transport_reset_timers(transport);
Linus Torvalds's avatar
Linus Torvalds committed
937
938
939
940
941
942
943
944
945
946
947
948
949
950

			/* This can happen on COOKIE-ECHO resend.  Only
			 * one chunk can get bundled with a COOKIE-ECHO.
			 */
			if (packet->has_cookie_echo)
				goto sctp_flush_out;

			/* Don't send new data if there is still data
			 * waiting to retransmit.
			 */
			if (!list_empty(&q->retransmit))
				goto sctp_flush_out;
		}

951
952
953
954
955
956
957
958
		/* Apply Max.Burst limitation to the current transport in
		 * case it will be used for new data.  We are going to
		 * rest it before we return, but we want to apply the limit
		 * to the currently queued data.
		 */
		if (transport)
			sctp_transport_burst_limited(transport);

Linus Torvalds's avatar
Linus Torvalds committed
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
		/* Finally, transmit new packets.  */
		while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
			/* RFC 2960 6.5 Every DATA chunk MUST carry a valid
			 * stream identifier.
			 */
			if (chunk->sinfo.sinfo_stream >=
			    asoc->c.sinit_num_ostreams) {

				/* Mark as failed send. */
				sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
				sctp_chunk_free(chunk);
				continue;
			}

			/* Has this chunk expired? */
			if (sctp_chunk_abandoned(chunk)) {
				sctp_chunk_fail(chunk, 0);
				sctp_chunk_free(chunk);
				continue;
			}

			/* If there is a specified transport, use it.
			 * Otherwise, we want to use the active path.
			 */
			new_transport = chunk->transport;
984
			if (!new_transport ||
985
			    ((new_transport->state == SCTP_INACTIVE) ||
986
987
			     (new_transport->state == SCTP_UNCONFIRMED) ||
			     (new_transport->state == SCTP_PF)))
Linus Torvalds's avatar
Linus Torvalds committed
988
				new_transport = asoc->peer.active_path;
989
990
			if (new_transport->state == SCTP_UNCONFIRMED)
				continue;
Linus Torvalds's avatar
Linus Torvalds committed
991
992
993
994
995
996
997
998
999
1000

			/* Change packets if necessary.  */
			if (new_transport != transport) {
				transport = new_transport;

				/* Schedule to have this transport's
				 * packet flushed.
				 */
				if (list_empty(&transport->send_ready)) {
					list_add_tail(&transport->send_ready,