iscsi_tcp.c 53.8 KB
Newer Older
1
2
3
4
5
/*
 * iSCSI Initiator over TCP/IP Data-Path
 *
 * Copyright (C) 2004 Dmitry Yusupov
 * Copyright (C) 2004 Alex Aizman
6
7
 * Copyright (C) 2005 - 2006 Mike Christie
 * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
 * maintained by open-iscsi@googlegroups.com
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published
 * by the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * See the file COPYING included with this distribution for more details.
 *
 * Credits:
 *	Christoph Hellwig
 *	FUJITA Tomonori
 *	Arne Redlich
 *	Zhenyu Wang
 */

#include <linux/types.h>
#include <linux/list.h>
#include <linux/inet.h>
Mike Christie's avatar
Mike Christie committed
32
#include <linux/file.h>
33
34
35
36
37
38
39
#include <linux/blkdev.h>
#include <linux/crypto.h>
#include <linux/delay.h>
#include <linux/kfifo.h>
#include <linux/scatterlist.h>
#include <net/tcp.h>
#include <scsi/scsi_cmnd.h>
40
#include <scsi/scsi_device.h>
41
42
43
44
45
46
47
48
49
50
#include <scsi/scsi_host.h>
#include <scsi/scsi.h>
#include <scsi/scsi_transport_iscsi.h>

#include "iscsi_tcp.h"

MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
	      "Alex Aizman <itn780@yahoo.com>");
MODULE_DESCRIPTION("iSCSI/TCP data-path");
MODULE_LICENSE("GPL");
51
#undef DEBUG_TCP
52
53
54
#define DEBUG_ASSERT

#ifdef DEBUG_TCP
55
#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#else
#define debug_tcp(fmt...)
#endif

#ifndef DEBUG_ASSERT
#ifdef BUG_ON
#undef BUG_ON
#endif
#define BUG_ON(expr)
#endif

static unsigned int iscsi_max_lun = 512;
module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);

70
static int iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
71
				   struct iscsi_segment *segment);
72

73
/*
74
 * Scatterlist handling: inside the iscsi_segment, we
75
76
77
78
79
80
81
82
83
84
85
86
 * remember an index into the scatterlist, and set data/size
 * to the current scatterlist entry. For highmem pages, we
 * kmap as needed.
 *
 * Note that the page is unmapped when we return from
 * TCP's data_ready handler, so we may end up mapping and
 * unmapping the same page repeatedly. The whole reason
 * for this is that we shouldn't keep the page mapped
 * outside the softirq.
 */

/**
87
88
89
 * iscsi_tcp_segment_init_sg - init indicated scatterlist entry
 * @segment: the buffer object
 * @sg: scatterlist
90
91
 * @offset: byte offset into that sg entry
 *
92
 * This function sets up the segment so that subsequent
93
94
95
96
 * data is copied to the indicated sg entry, at the given
 * offset.
 */
static inline void
97
98
iscsi_tcp_segment_init_sg(struct iscsi_segment *segment,
			  struct scatterlist *sg, unsigned int offset)
99
{
100
101
102
103
104
	segment->sg = sg;
	segment->sg_offset = offset;
	segment->size = min(sg->length - offset,
			    segment->total_size - segment->total_copied);
	segment->data = NULL;
105
106
107
}

/**
108
109
110
 * iscsi_tcp_segment_map - map the current S/G page
 * @segment: iscsi_segment
 * @recv: 1 if called from recv path
111
112
113
114
115
116
 *
 * We only need to possibly kmap data if scatter lists are being used,
 * because the iscsi passthrough and internal IO paths will never use high
 * mem pages.
 */
static inline void
117
iscsi_tcp_segment_map(struct iscsi_segment *segment, int recv)
118
119
120
{
	struct scatterlist *sg;

121
	if (segment->data != NULL || !segment->sg)
122
123
		return;

124
125
	sg = segment->sg;
	BUG_ON(segment->sg_mapped);
126
	BUG_ON(sg->length == 0);
127
128
129
130
131
132
133
134
135
136
137
138
139
140

	/*
	 * If the page count is greater than one it is ok to send
	 * to the network layer's zero copy send path. If not we
	 * have to go the slow sendmsg path. We always map for the
	 * recv path.
	 */
	if (page_count(sg_page(sg)) >= 1 && !recv)
		return;

	debug_tcp("iscsi_tcp_segment_map %s %p\n", recv ? "recv" : "xmit",
		  segment);
	segment->sg_mapped = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
	segment->data = segment->sg_mapped + sg->offset + segment->sg_offset;
141
142
143
}

static inline void
144
iscsi_tcp_segment_unmap(struct iscsi_segment *segment)
145
{
146
147
148
149
150
151
152
	debug_tcp("iscsi_tcp_segment_unmap %p\n", segment);

	if (segment->sg_mapped) {
		debug_tcp("iscsi_tcp_segment_unmap valid\n");
		kunmap_atomic(segment->sg_mapped, KM_SOFTIRQ0);
		segment->sg_mapped = NULL;
		segment->data = NULL;
153
154
155
156
157
158
159
	}
}

/*
 * Splice the digest buffer into the buffer
 */
static inline void
160
iscsi_tcp_segment_splice_digest(struct iscsi_segment *segment, void *digest)
161
{
162
163
164
165
166
167
168
	segment->data = digest;
	segment->digest_len = ISCSI_DIGEST_SIZE;
	segment->total_size += ISCSI_DIGEST_SIZE;
	segment->size = ISCSI_DIGEST_SIZE;
	segment->copied = 0;
	segment->sg = NULL;
	segment->hash = NULL;
169
170
171
}

/**
172
173
174
175
 * iscsi_tcp_segment_done - check whether the segment is complete
 * @segment: iscsi segment to check
 * @recv: set to one of this is called from the recv path
 * @copied: number of bytes copied
176
 *
177
 * Check if we're done receiving this segment. If the receive
178
179
180
181
182
183
184
185
 * buffer is full but we expect more data, move on to the
 * next entry in the scatterlist.
 *
 * If the amount of data we received isn't a multiple of 4,
 * we will transparently receive the pad bytes, too.
 *
 * This function must be re-entrant.
 */
186
static inline int
187
iscsi_tcp_segment_done(struct iscsi_segment *segment, int recv, unsigned copied)
188
{
189
	static unsigned char padbuf[ISCSI_PAD_LEN];
190
	struct scatterlist sg;
191
	unsigned int pad;
192

193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
	debug_tcp("copied %u %u size %u %s\n", segment->copied, copied,
		  segment->size, recv ? "recv" : "xmit");
	if (segment->hash && copied) {
		/*
		 * If a segment is kmapd we must unmap it before sending
		 * to the crypto layer since that will try to kmap it again.
		 */
		iscsi_tcp_segment_unmap(segment);

		if (!segment->data) {
			sg_init_table(&sg, 1);
			sg_set_page(&sg, sg_page(segment->sg), copied,
				    segment->copied + segment->sg_offset +
							segment->sg->offset);
		} else
			sg_init_one(&sg, segment->data + segment->copied,
				    copied);
		crypto_hash_update(segment->hash, &sg, copied);
	}

	segment->copied += copied;
	if (segment->copied < segment->size) {
		iscsi_tcp_segment_map(segment, recv);
216
217
		return 0;
	}
218

219
220
221
	segment->total_copied += segment->copied;
	segment->copied = 0;
	segment->size = 0;
222

223
	/* Unmap the current scatterlist page, if there is one. */
224
	iscsi_tcp_segment_unmap(segment);
225
226

	/* Do we have more scatterlist entries? */
227
228
229
	debug_tcp("total copied %u total size %u\n", segment->total_copied,
		   segment->total_size);
	if (segment->total_copied < segment->total_size) {
230
		/* Proceed to the next entry in the scatterlist. */
231
232
233
234
		iscsi_tcp_segment_init_sg(segment, sg_next(segment->sg),
					  0);
		iscsi_tcp_segment_map(segment, recv);
		BUG_ON(segment->size == 0);
235
236
237
238
		return 0;
	}

	/* Do we need to handle padding? */
239
	pad = iscsi_padding(segment->total_copied);
240
	if (pad != 0) {
241
		debug_tcp("consume %d pad bytes\n", pad);
242
243
244
		segment->total_size += pad;
		segment->size = pad;
		segment->data = padbuf;
245
246
247
248
		return 0;
	}

	/*
249
	 * Set us up for transferring the data digest. hdr digest
250
251
	 * is completely handled in hdr done function.
	 */
252
253
254
255
256
	if (segment->hash) {
		crypto_hash_final(segment->hash, segment->digest);
		iscsi_tcp_segment_splice_digest(segment,
				 recv ? segment->recv_digest : segment->digest);
		return 0;
257
	}
258

259
260
	return 1;
}
261

262
/**
263
 * iscsi_tcp_xmit_segment - transmit segment
264
 * @tcp_conn: the iSCSI TCP connection
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
 * @segment: the buffer to transmnit
 *
 * This function transmits as much of the buffer as
 * the network layer will accept, and returns the number of
 * bytes transmitted.
 *
 * If CRC hashing is enabled, the function will compute the
 * hash as it goes. When the entire segment has been transmitted,
 * it will retrieve the hash value and send it as well.
 */
static int
iscsi_tcp_xmit_segment(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment)
{
	struct socket *sk = tcp_conn->sock;
	unsigned int copied = 0;
	int r = 0;

	while (!iscsi_tcp_segment_done(segment, 0, r)) {
		struct scatterlist *sg;
		unsigned int offset, copy;
		int flags = 0;

		r = 0;
		offset = segment->copied;
		copy = segment->size - offset;

		if (segment->total_copied + segment->size < segment->total_size)
			flags |= MSG_MORE;

		/* Use sendpage if we can; else fall back to sendmsg */
		if (!segment->data) {
			sg = segment->sg;
			offset += segment->sg_offset + sg->offset;
			r = tcp_conn->sendpage(sk, sg_page(sg), offset, copy,
					       flags);
		} else {
			struct msghdr msg = { .msg_flags = flags };
			struct kvec iov = {
				.iov_base = segment->data + offset,
				.iov_len = copy
			};

			r = kernel_sendmsg(sk, &msg, &iov, 1, copy);
		}

		if (r < 0) {
			iscsi_tcp_segment_unmap(segment);
			if (copied || r == -EAGAIN)
				break;
			return r;
		}
		copied += r;
	}
	return copied;
}

/**
 * iscsi_tcp_segment_recv - copy data to segment
 * @tcp_conn: the iSCSI TCP connection
 * @segment: the buffer to copy to
326
327
328
329
330
331
332
333
334
335
336
337
338
339
 * @ptr: data pointer
 * @len: amount of data available
 *
 * This function copies up to @len bytes to the
 * given buffer, and returns the number of bytes
 * consumed, which can actually be less than @len.
 *
 * If hash digest is enabled, the function will update the
 * hash while copying.
 * Combining these two operations doesn't buy us a lot (yet),
 * but in the future we could implement combined copy+crc,
 * just way we do for network layer checksums.
 */
static int
340
341
342
iscsi_tcp_segment_recv(struct iscsi_tcp_conn *tcp_conn,
		       struct iscsi_segment *segment, const void *ptr,
		       unsigned int len)
343
{
344
	unsigned int copy = 0, copied = 0;
345

346
347
348
349
350
	while (!iscsi_tcp_segment_done(segment, 1, copy)) {
		if (copied == len) {
			debug_tcp("iscsi_tcp_segment_recv copied %d bytes\n",
				  len);
			break;
351
		}
352
353
354
355

		copy = min(len - copied, segment->size - segment->copied);
		debug_tcp("iscsi_tcp_segment_recv copying %d\n", copy);
		memcpy(segment->data + segment->copied, ptr + copied, copy);
356
357
358
359
360
361
362
363
364
365
		copied += copy;
	}
	return copied;
}

static inline void
iscsi_tcp_dgst_header(struct hash_desc *hash, const void *hdr, size_t hdrlen,
		      unsigned char digest[ISCSI_DIGEST_SIZE])
{
	struct scatterlist sg;
366

367
368
369
370
371
372
	sg_init_one(&sg, hdr, hdrlen);
	crypto_hash_digest(hash, &sg, hdrlen, digest);
}

static inline int
iscsi_tcp_dgst_verify(struct iscsi_tcp_conn *tcp_conn,
373
		      struct iscsi_segment *segment)
374
{
375
	if (!segment->digest_len)
376
377
		return 1;

378
379
	if (memcmp(segment->recv_digest, segment->digest,
		   segment->digest_len)) {
380
381
382
383
384
385
386
387
		debug_scsi("digest mismatch\n");
		return 0;
	}

	return 1;
}

/*
388
 * Helper function to set up segment buffer
389
390
 */
static inline void
391
392
__iscsi_segment_init(struct iscsi_segment *segment, size_t size,
		     iscsi_segment_done_fn_t *done, struct hash_desc *hash)
393
{
394
395
396
	memset(segment, 0, sizeof(*segment));
	segment->total_size = size;
	segment->done = done;
397
398

	if (hash) {
399
		segment->hash = hash;
400
401
402
403
404
		crypto_hash_init(hash);
	}
}

static inline void
405
406
407
iscsi_segment_init_linear(struct iscsi_segment *segment, void *data,
			  size_t size, iscsi_segment_done_fn_t *done,
			  struct hash_desc *hash)
408
{
409
410
411
	__iscsi_segment_init(segment, size, done, hash);
	segment->data = data;
	segment->size = size;
412
413
414
}

static inline int
415
416
417
418
iscsi_segment_seek_sg(struct iscsi_segment *segment,
		      struct scatterlist *sg_list, unsigned int sg_count,
		      unsigned int offset, size_t size,
		      iscsi_segment_done_fn_t *done, struct hash_desc *hash)
419
{
420
	struct scatterlist *sg;
421
422
	unsigned int i;

423
424
425
426
427
428
429
430
	debug_scsi("iscsi_segment_seek_sg offset %u size %llu\n",
		  offset, size);
	__iscsi_segment_init(segment, size, done, hash);
	for_each_sg(sg_list, sg, sg_count, i) {
		debug_scsi("sg %d, len %u offset %u\n", i, sg->length,
			   sg->offset);
		if (offset < sg->length) {
			iscsi_tcp_segment_init_sg(segment, sg, offset);
431
			return 0;
432
		}
433
		offset -= sg->length;
434
435
	}

436
437
438
439
	return ISCSI_ERR_DATA_OFFSET;
}

/**
440
 * iscsi_tcp_hdr_recv_prep - prep segment for hdr reception
441
442
443
444
445
446
447
448
449
450
451
 * @tcp_conn: iscsi connection to prep for
 *
 * This function always passes NULL for the hash argument, because when this
 * function is called we do not yet know the final size of the header and want
 * to delay the digest processing until we know that.
 */
static void
iscsi_tcp_hdr_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	debug_tcp("iscsi_tcp_hdr_recv_prep(%p%s)\n", tcp_conn,
		  tcp_conn->iscsi_conn->hdrdgst_en ? ", digest enabled" : "");
452
	iscsi_segment_init_linear(&tcp_conn->in.segment,
453
454
455
456
457
458
459
460
461
				tcp_conn->in.hdr_buf, sizeof(struct iscsi_hdr),
				iscsi_tcp_hdr_recv_done, NULL);
}

/*
 * Handle incoming reply to any other type of command
 */
static int
iscsi_tcp_data_recv_done(struct iscsi_tcp_conn *tcp_conn,
462
			 struct iscsi_segment *segment)
463
464
465
466
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	int rc = 0;

467
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
468
469
470
471
472
473
474
475
		return ISCSI_ERR_DATA_DGST;

	rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr,
			conn->data, tcp_conn->in.datalen);
	if (rc)
		return rc;

	iscsi_tcp_hdr_recv_prep(tcp_conn);
476
477
478
	return 0;
}

479
480
481
482
483
484
485
486
487
static void
iscsi_tcp_data_recv_prep(struct iscsi_tcp_conn *tcp_conn)
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct hash_desc *rx_hash = NULL;

	if (conn->datadgst_en)
		rx_hash = &tcp_conn->rx_hash;

488
	iscsi_segment_init_linear(&tcp_conn->in.segment,
489
490
491
492
				conn->data, tcp_conn->in.datalen,
				iscsi_tcp_data_recv_done, rx_hash);
}

Mike Christie's avatar
Mike Christie committed
493
494
495
496
/*
 * must be called with session lock
 */
static void
497
iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
498
{
499
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
500
	struct iscsi_r2t_info *r2t;
501

502
503
504
505
506
507
508
	/* flush ctask's r2t queues */
	while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		debug_scsi("iscsi_tcp_cleanup_ctask pending r2t dropped\n");
	}

509
510
511
512
513
514
	r2t = tcp_ctask->r2t;
	if (r2t != NULL) {
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		tcp_ctask->r2t = NULL;
	}
515
516
517
518
519
520
521
522
523
524
}

/**
 * iscsi_data_rsp - SCSI Data-In Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
525
526
527
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
528
	struct iscsi_session *session = conn->session;
529
	struct scsi_cmnd *sc = ctask->sc;
530
531
	int datasn = be32_to_cpu(rhdr->datasn);

532
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
533
	if (tcp_conn->in.datalen == 0)
534
535
		return 0;

536
537
538
	if (tcp_ctask->exp_datasn != datasn) {
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, datasn);
539
		return ISCSI_ERR_DATASN;
540
	}
541

542
	tcp_ctask->exp_datasn++;
543

544
	tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
545
	if (tcp_ctask->data_offset + tcp_conn->in.datalen > scsi_bufflen(sc)) {
546
547
		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
		          __FUNCTION__, tcp_ctask->data_offset,
548
		          tcp_conn->in.datalen, scsi_bufflen(sc));
549
		return ISCSI_ERR_DATA_OFFSET;
550
	}
551
552

	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
553
		sc->result = (DID_OK << 16) | rhdr->cmd_status;
554
		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
555
556
		if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
		                   ISCSI_FLAG_DATA_OVERFLOW)) {
557
558
559
			int res_count = be32_to_cpu(rhdr->residual_count);

			if (res_count > 0 &&
560
561
			    (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
			     res_count <= scsi_bufflen(sc)))
562
				scsi_set_resid(sc, res_count);
563
			else
564
565
				sc->result = (DID_BAD_TARGET << 16) |
					rhdr->cmd_status;
566
		}
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
	}

	conn->datain_pdus_cnt++;
	return 0;
}

/**
 * iscsi_solicit_data_init - initialize first Data-Out
 * @conn: iscsi connection
 * @ctask: scsi command task
 * @r2t: R2T info
 *
 * Notes:
 *	Initialize first Data-Out within this R2T sequence and finds
 *	proper data_offset within this SCSI command.
 *
 *	This function is called with connection lock taken.
 **/
static void
iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
			struct iscsi_r2t_info *r2t)
{
	struct iscsi_data *hdr;

591
	hdr = &r2t->dtask.hdr;
592
593
594
595
596
	memset(hdr, 0, sizeof(struct iscsi_data));
	hdr->ttt = r2t->ttt;
	hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
	r2t->solicit_datasn++;
	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
597
598
	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
	hdr->itt = ctask->hdr->itt;
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
	hdr->exp_statsn = r2t->exp_statsn;
	hdr->offset = cpu_to_be32(r2t->data_offset);
	if (r2t->data_length > conn->max_xmit_dlength) {
		hton24(hdr->dlength, conn->max_xmit_dlength);
		r2t->data_count = conn->max_xmit_dlength;
		hdr->flags = 0;
	} else {
		hton24(hdr->dlength, r2t->data_length);
		r2t->data_count = r2t->data_length;
		hdr->flags = ISCSI_FLAG_CMD_FINAL;
	}
	conn->dataout_pdus_cnt++;

	r2t->sent = 0;
}

/**
 * iscsi_r2t_rsp - iSCSI R2T Response processing
 * @conn: iscsi connection
 * @ctask: scsi command task
 **/
static int
iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
{
	struct iscsi_r2t_info *r2t;
	struct iscsi_session *session = conn->session;
625
626
627
	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
	struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
628
629
630
	int r2tsn = be32_to_cpu(rhdr->r2tsn);
	int rc;

631
632
633
	if (tcp_conn->in.datalen) {
		printk(KERN_ERR "iscsi_tcp: invalid R2t with datalen %d\n",
		       tcp_conn->in.datalen);
634
		return ISCSI_ERR_DATALEN;
635
	}
636

637
638
639
	if (tcp_ctask->exp_datasn != r2tsn){
		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
		          __FUNCTION__, tcp_ctask->exp_datasn, r2tsn);
640
		return ISCSI_ERR_R2TSN;
641
	}
642
643
644

	/* fill-in new R2T associated with the task */
	spin_lock(&session->lock);
645
646
	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);

647
	if (!ctask->sc || session->state != ISCSI_STATE_LOGGED_IN) {
648
649
650
651
652
		printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in "
		       "recovery...\n", ctask->itt);
		spin_unlock(&session->lock);
		return 0;
	}
653

654
	rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
655
656
657
658
	BUG_ON(!rc);

	r2t->exp_statsn = rhdr->statsn;
	r2t->data_length = be32_to_cpu(rhdr->data_length);
659
660
	if (r2t->data_length == 0) {
		printk(KERN_ERR "iscsi_tcp: invalid R2T with zero data len\n");
661
662
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
663
664
665
666
		spin_unlock(&session->lock);
		return ISCSI_ERR_DATALEN;
	}

667
668
669
670
671
	if (r2t->data_length > session->max_burst)
		debug_scsi("invalid R2T with data len %u and max burst %u."
			   "Attempting to execute request.\n",
			    r2t->data_length, session->max_burst);

672
	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
673
	if (r2t->data_offset + r2t->data_length > scsi_bufflen(ctask->sc)) {
674
675
		printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
		       "offset %u and total length %d\n", r2t->data_length,
676
		       r2t->data_offset, scsi_bufflen(ctask->sc));
677
678
679
		__kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
			    sizeof(void*));
		spin_unlock(&session->lock);
680
681
682
683
684
685
686
687
		return ISCSI_ERR_DATALEN;
	}

	r2t->ttt = rhdr->ttt; /* no flip */
	r2t->solicit_datasn = 0;

	iscsi_solicit_data_init(conn, ctask, r2t);

688
	tcp_ctask->exp_datasn = r2tsn + 1;
689
	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
690
	conn->r2t_pdus_cnt++;
691
692

	iscsi_requeue_ctask(ctask);
693
694
695
696
697
	spin_unlock(&session->lock);

	return 0;
}

698
699
700
701
702
/*
 * Handle incoming reply to DataIn command
 */
static int
iscsi_tcp_process_data_in(struct iscsi_tcp_conn *tcp_conn,
703
			  struct iscsi_segment *segment)
704
705
706
707
708
{
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr = tcp_conn->in.hdr;
	int rc;

709
	if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
		return ISCSI_ERR_DATA_DGST;

	/* check for non-exceptional status */
	if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
		rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
		if (rc)
			return rc;
	}

	iscsi_tcp_hdr_recv_prep(tcp_conn);
	return 0;
}

/**
 * iscsi_tcp_hdr_dissect - process PDU header
 * @conn: iSCSI connection
 * @hdr: PDU header
 *
 * This function analyzes the header of the PDU received,
 * and performs several sanity checks. If the PDU is accompanied
 * by data, the receive buffer is set up to copy the incoming data
 * to the correct location.
 */
733
static int
734
iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
735
{
736
	int rc = 0, opcode, ahslen;
737
	struct iscsi_session *session = conn->session;
738
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
739
740
	struct iscsi_cmd_task *ctask;
	uint32_t itt;
741
742

	/* verify PDU length */
743
744
	tcp_conn->in.datalen = ntoh24(hdr->dlength);
	if (tcp_conn->in.datalen > conn->max_recv_dlength) {
745
		printk(KERN_ERR "iscsi_tcp: datalen %d > %d\n",
746
		       tcp_conn->in.datalen, conn->max_recv_dlength);
747
748
749
		return ISCSI_ERR_DATALEN;
	}

750
751
752
	/* Additional header segments. So far, we don't
	 * process additional headers.
	 */
753
	ahslen = hdr->hlength << 2;
754

755
	opcode = hdr->opcode & ISCSI_OPCODE_MASK;
756
	/* verify itt (itt encoding: age+cid+itt) */
757
758
	rc = iscsi_verify_itt(conn, hdr, &itt);
	if (rc == ISCSI_ERR_NO_SCSI_CMD) {
759
		/* XXX: what does this do? */
760
761
762
763
		tcp_conn->in.datalen = 0; /* force drop */
		return 0;
	} else if (rc)
		return rc;
764

765
766
	debug_tcp("opcode 0x%x ahslen %d datalen %d\n",
		  opcode, ahslen, tcp_conn->in.datalen);
767

768
769
	switch(opcode) {
	case ISCSI_OP_SCSI_DATA_IN:
770
771
		ctask = session->cmds[itt];
		rc = iscsi_data_rsp(conn, ctask);
772
773
		if (rc)
			return rc;
774
775
776
777
778
779
780
		if (tcp_conn->in.datalen) {
			struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
			struct hash_desc *rx_hash = NULL;

			/*
			 * Setup copy of Data-In into the Scsi_Cmnd
			 * Scatterlist case:
781
			 * We set up the iscsi_segment to point to the next
782
783
784
785
786
787
788
789
790
791
792
			 * scatterlist entry to copy to. As we go along,
			 * we move on to the next scatterlist entry and
			 * update the digest per-entry.
			 */
			if (conn->datadgst_en)
				rx_hash = &tcp_conn->rx_hash;

			debug_tcp("iscsi_tcp_begin_data_in(%p, offset=%d, "
				  "datalen=%d)\n", tcp_conn,
				  tcp_ctask->data_offset,
				  tcp_conn->in.datalen);
793
794
795
796
797
798
799
			return iscsi_segment_seek_sg(&tcp_conn->in.segment,
						     scsi_sglist(ctask->sc),
						     scsi_sg_count(ctask->sc),
						     tcp_ctask->data_offset,
						     tcp_conn->in.datalen,
						     iscsi_tcp_process_data_in,
						     rx_hash);
800
		}
801
802
		/* fall through */
	case ISCSI_OP_SCSI_CMD_RSP:
803
804
805
806
807
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
808
809
		break;
	case ISCSI_OP_R2T:
810
		ctask = session->cmds[itt];
811
812
		if (ahslen)
			rc = ISCSI_ERR_AHSLEN;
813
814
		else if (ctask->sc->sc_data_direction == DMA_TO_DEVICE)
			rc = iscsi_r2t_rsp(conn, ctask);
815
816
817
818
819
820
821
		else
			rc = ISCSI_ERR_PROTO;
		break;
	case ISCSI_OP_LOGIN_RSP:
	case ISCSI_OP_TEXT_RSP:
	case ISCSI_OP_REJECT:
	case ISCSI_OP_ASYNC_EVENT:
822
823
824
825
826
		/*
		 * It is possible that we could get a PDU with a buffer larger
		 * than 8K, but there are no targets that currently do this.
		 * For now we fail until we find a vendor that needs it
		 */
827
		if (ISCSI_DEF_MAX_RECV_SEG_LEN < tcp_conn->in.datalen) {
828
829
830
			printk(KERN_ERR "iscsi_tcp: received buffer of len %u "
			      "but conn buffer is only %u (opcode %0x)\n",
			      tcp_conn->in.datalen,
831
			      ISCSI_DEF_MAX_RECV_SEG_LEN, opcode);
832
833
834
835
			rc = ISCSI_ERR_PROTO;
			break;
		}

836
837
838
839
840
841
842
		/* If there's data coming in with the response,
		 * receive it to the connection's buffer.
		 */
		if (tcp_conn->in.datalen) {
			iscsi_tcp_data_recv_prep(tcp_conn);
			return 0;
		}
843
	/* fall through */
844
845
	case ISCSI_OP_LOGOUT_RSP:
	case ISCSI_OP_NOOP_IN:
846
847
848
849
850
851
852
	case ISCSI_OP_SCSI_TMFUNC_RSP:
		rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
		break;
	default:
		rc = ISCSI_ERR_BAD_OPCODE;
		break;
	}
853

854
855
856
857
858
859
	if (rc == 0) {
		/* Anything that comes with data should have
		 * been handled above. */
		if (tcp_conn->in.datalen)
			return ISCSI_ERR_PROTO;
		iscsi_tcp_hdr_recv_prep(tcp_conn);
860
861
	}

862
	return rc;
863
864
}

865
866
867
868
869
870
871
872
873
/**
 * iscsi_tcp_hdr_recv_done - process PDU header
 *
 * This is the callback invoked when the PDU header has
 * been received. If the header is followed by additional
 * header segments, we go back for more data.
 */
static int
iscsi_tcp_hdr_recv_done(struct iscsi_tcp_conn *tcp_conn,
874
			struct iscsi_segment *segment)
875
{
876
877
	struct iscsi_conn *conn = tcp_conn->iscsi_conn;
	struct iscsi_hdr *hdr;
878

879
880
881
882
883
	/* Check if there are additional header segments
	 * *prior* to computing the digest, because we
	 * may need to go back to the caller for more.
	 */
	hdr = (struct iscsi_hdr *) tcp_conn->in.hdr_buf;
884
	if (segment->copied == sizeof(struct iscsi_hdr) && hdr->hlength) {
885
886
887
888
889
890
891
892
893
		/* Bump the header length - the caller will
		 * just loop around and get the AHS for us, and
		 * call again. */
		unsigned int ahslen = hdr->hlength << 2;

		/* Make sure we don't overflow */
		if (sizeof(*hdr) + ahslen > sizeof(tcp_conn->in.hdr_buf))
			return ISCSI_ERR_AHSLEN;

894
895
		segment->total_size += ahslen;
		segment->size += ahslen;
896
		return 0;
897
898
	}

899
900
901
902
	/* We're done processing the header. See if we're doing
	 * header digests; if so, set up the recv_digest buffer
	 * and go back for more. */
	if (conn->hdrdgst_en) {
903
904
905
		if (segment->digest_len == 0) {
			iscsi_tcp_segment_splice_digest(segment,
							segment->recv_digest);
906
			return 0;
907
		}
908
		iscsi_tcp_dgst_header(&tcp_conn->rx_hash, hdr,
909
910
				      segment->total_copied - ISCSI_DIGEST_SIZE,
				      segment->digest);
911

912
		if (!iscsi_tcp_dgst_verify(tcp_conn, segment))
913
			return ISCSI_ERR_HDR_DGST;
914
	}
915
916
917

	tcp_conn->in.hdr = hdr;
	return iscsi_tcp_hdr_dissect(conn, hdr);
918
919
920
}

/**
921
 * iscsi_tcp_recv - TCP receive in sendfile fashion
922
923
924
925
926
927
 * @rd_desc: read descriptor
 * @skb: socket buffer
 * @offset: offset in skb
 * @len: skb->len - offset
 **/
static int
928
929
iscsi_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
	       unsigned int offset, size_t len)
930
931
{
	struct iscsi_conn *conn = rd_desc->arg.data;
932
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
933
	struct iscsi_segment *segment = &tcp_conn->in.segment;
934
935
936
	struct skb_seq_state seq;
	unsigned int consumed = 0;
	int rc = 0;
937

938
	debug_tcp("in %d bytes\n", skb->len - offset);
939
940
941
942
943
944

	if (unlikely(conn->suspend_rx)) {
		debug_tcp("conn %d Rx suspended!\n", conn->id);
		return 0;
	}

945
946
947
948
	skb_prepare_seq_read(skb, offset, skb->len, &seq);
	while (1) {
		unsigned int avail;
		const u8 *ptr;
949

950
		avail = skb_seq_read(consumed, &ptr, &seq);
951
952
953
		if (avail == 0) {
			debug_tcp("no more data avail. Consumed %d\n",
				  consumed);
954
			break;
955
956
		}
		BUG_ON(segment->copied >= segment->size);
957
958

		debug_tcp("skb %p ptr=%p avail=%u\n", skb, ptr, avail);
959
		rc = iscsi_tcp_segment_recv(tcp_conn, segment, ptr, avail);
960
961
962
		BUG_ON(rc == 0);
		consumed += rc;

963
964
965
		if (segment->total_copied >= segment->total_size) {
			debug_tcp("segment done\n");
			rc = segment->done(tcp_conn, segment);
966
967
968
			if (rc != 0) {
				skb_abort_seq_read(&seq);
				goto error;
969
			}
970

971
			/* The done() functions sets up the
972
			 * next segment. */
973
974
		}
	}
975
	skb_abort_seq_read(&seq);
976
977
	conn->rxdata_octets += consumed;
	return consumed;
978

979
980
981
982
error:
	debug_tcp("Error receiving PDU, errno=%d\n", rc);
	iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
	return 0;
983
984
985
986
987
988
}

static void
iscsi_tcp_data_ready(struct sock *sk, int flag)
{
	struct iscsi_conn *conn = sk->sk_user_data;
989
	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
990
991
992
993
	read_descriptor_t rd_desc;

	read_lock(&sk->sk_callback_lock);

994
	/*
995
	 * Use rd_desc to pass 'conn' to iscsi_tcp_recv.
996
	 * We set count to 1 because we want the network layer to
997
	 * hand us all the skbs that are available. iscsi_tcp_recv
998
999
	 * handled pdus that cross buffers or pdus that still need data.
	 */
1000
	rd_desc.arg.data = conn;
For faster browsing, not all history is shown. View entire blame