scsi_lib.c 59.1 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/*
 *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
 *
 *  SCSI queueing library.
 *      Initial versions: Eric Youngdale (eric@andante.org).
 *                        Based upon conversations with large numbers
 *                        of people at Linux Expo.
 */

#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/completion.h>
#include <linux/kernel.h>
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/delay.h>
19
#include <linux/hardirq.h>
Jens Axboe's avatar
Jens Axboe committed
20
#include <linux/scatterlist.h>
Linus Torvalds's avatar
Linus Torvalds committed
21
22

#include <scsi/scsi.h>
23
#include <scsi/scsi_cmnd.h>
Linus Torvalds's avatar
Linus Torvalds committed
24
25
26
27
28
29
30
31
32
33
#include <scsi/scsi_dbg.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_driver.h>
#include <scsi/scsi_eh.h>
#include <scsi/scsi_host.h>

#include "scsi_priv.h"
#include "scsi_logging.h"


34
#define SG_MEMPOOL_NR		ARRAY_SIZE(scsi_sg_pools)
35
#define SG_MEMPOOL_SIZE		2
Linus Torvalds's avatar
Linus Torvalds committed
36

37
38
39
40
41
42
43
/*
 * The maximum number of SG segments that we will put inside a scatterlist
 * (unless chaining is used). Should ideally fit inside a single page, to
 * avoid a higher order allocation.
 */
#define SCSI_MAX_SG_SEGMENTS	128

Linus Torvalds's avatar
Linus Torvalds committed
44
45
struct scsi_host_sg_pool {
	size_t		size;
46
	char		*name;
47
	struct kmem_cache	*slab;
Linus Torvalds's avatar
Linus Torvalds committed
48
49
50
	mempool_t	*pool;
};

51
#define SP(x) { x, "sgpool-" #x }
52
static struct scsi_host_sg_pool scsi_sg_pools[] = {
Linus Torvalds's avatar
Linus Torvalds committed
53
54
	SP(8),
	SP(16),
55
#if (SCSI_MAX_SG_SEGMENTS > 16)
Linus Torvalds's avatar
Linus Torvalds committed
56
	SP(32),
57
#if (SCSI_MAX_SG_SEGMENTS > 32)
Linus Torvalds's avatar
Linus Torvalds committed
58
	SP(64),
59
#if (SCSI_MAX_SG_SEGMENTS > 64)
Linus Torvalds's avatar
Linus Torvalds committed
60
	SP(128),
61
62
63
#endif
#endif
#endif
64
};
Linus Torvalds's avatar
Linus Torvalds committed
65
66
#undef SP

67
static void scsi_run_queue(struct request_queue *q);
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84

/*
 * Function:	scsi_unprep_request()
 *
 * Purpose:	Remove all preparation done for a request, including its
 *		associated scsi_cmnd, so that it can be requeued.
 *
 * Arguments:	req	- request to unprepare
 *
 * Lock status:	Assumed that no locks are held upon entry.
 *
 * Returns:	Nothing.
 */
static void scsi_unprep_request(struct request *req)
{
	struct scsi_cmnd *cmd = req->special;

85
	req->cmd_flags &= ~REQ_DONTPREP;
86
	req->special = NULL;
87
88
89

	scsi_put_command(cmd);
}
90

Linus Torvalds's avatar
Linus Torvalds committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
/*
 * Function:    scsi_queue_insert()
 *
 * Purpose:     Insert a command in the midlevel queue.
 *
 * Arguments:   cmd    - command that we are adding to queue.
 *              reason - why we are inserting command to queue.
 *
 * Lock status: Assumed that lock is not held upon entry.
 *
 * Returns:     Nothing.
 *
 * Notes:       We do this for one of two cases.  Either the host is busy
 *              and it cannot accept any more commands for the time being,
 *              or the device returned QUEUE_FULL and can accept no more
 *              commands.
 * Notes:       This could be called either from an interrupt context or a
 *              normal process context.
 */
int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
{
	struct Scsi_Host *host = cmd->device->host;
	struct scsi_device *device = cmd->device;
114
115
	struct request_queue *q = device->request_queue;
	unsigned long flags;
Linus Torvalds's avatar
Linus Torvalds committed
116
117
118
119
120

	SCSI_LOG_MLQUEUE(1,
		 printk("Inserting command %p into mlqueue\n", cmd));

	/*
121
	 * Set the appropriate busy bit for the device/host.
Linus Torvalds's avatar
Linus Torvalds committed
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
	 *
	 * If the host/device isn't busy, assume that something actually
	 * completed, and that we should be able to queue a command now.
	 *
	 * Note that the prior mid-layer assumption that any host could
	 * always queue at least one command is now broken.  The mid-layer
	 * will implement a user specifiable stall (see
	 * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
	 * if a command is requeued with no other commands outstanding
	 * either for the device or for the host.
	 */
	if (reason == SCSI_MLQUEUE_HOST_BUSY)
		host->host_blocked = host->max_host_blocked;
	else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
		device->device_blocked = device->max_device_blocked;

	/*
	 * Decrement the counters, since these commands are no longer
	 * active on the host/device.
	 */
	scsi_device_unbusy(device);

	/*
145
146
	 * Requeue this command.  It will go before all other commands
	 * that are already in the queue.
Linus Torvalds's avatar
Linus Torvalds committed
147
148
149
150
	 *
	 * NOTE: there is magic here about the way the queue is plugged if
	 * we have no outstanding commands.
	 * 
151
	 * Although we *don't* plug the queue, we call the request
Linus Torvalds's avatar
Linus Torvalds committed
152
153
	 * function.  The SCSI request function detects the blocked condition
	 * and plugs the queue appropriately.
154
155
         */
	spin_lock_irqsave(q->queue_lock, flags);
156
	blk_requeue_request(q, cmd->request);
157
158
159
160
	spin_unlock_irqrestore(q->queue_lock, flags);

	scsi_run_queue(q);

Linus Torvalds's avatar
Linus Torvalds committed
161
162
163
	return 0;
}

164
/**
165
 * scsi_execute - insert request and wait for the result
166
167
168
169
170
171
172
173
 * @sdev:	scsi device
 * @cmd:	scsi command
 * @data_direction: data direction
 * @buffer:	data buffer
 * @bufflen:	len of buffer
 * @sense:	optional sense buffer
 * @timeout:	request timeout in seconds
 * @retries:	number of times to retry request
174
 * @flags:	or into request flags;
175
 *
176
 * returns the req->errors value which is the scsi_cmnd result
177
 * field.
178
 **/
179
180
181
int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
		 int data_direction, void *buffer, unsigned bufflen,
		 unsigned char *sense, int timeout, int retries, int flags)
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
{
	struct request *req;
	int write = (data_direction == DMA_TO_DEVICE);
	int ret = DRIVER_ERROR << 24;

	req = blk_get_request(sdev->request_queue, write, __GFP_WAIT);

	if (bufflen &&	blk_rq_map_kern(sdev->request_queue, req,
					buffer, bufflen, __GFP_WAIT))
		goto out;

	req->cmd_len = COMMAND_SIZE(cmd[0]);
	memcpy(req->cmd, cmd, req->cmd_len);
	req->sense = sense;
	req->sense_len = 0;
197
	req->retries = retries;
198
	req->timeout = timeout;
199
200
	req->cmd_type = REQ_TYPE_BLOCK_PC;
	req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT;
201
202
203
204
205
206
207
208
209
210
211
212

	/*
	 * head injection *required* here otherwise quiesce won't work
	 */
	blk_execute_rq(req->q, NULL, req, 1);

	ret = req->errors;
 out:
	blk_put_request(req);

	return ret;
}
213
EXPORT_SYMBOL(scsi_execute);
214

215
216
217
218
219
220

int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
		     int data_direction, void *buffer, unsigned bufflen,
		     struct scsi_sense_hdr *sshdr, int timeout, int retries)
{
	char *sense = NULL;
221
222
	int result;
	
223
	if (sshdr) {
224
		sense = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO);
225
226
227
		if (!sense)
			return DRIVER_ERROR << 24;
	}
228
	result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen,
229
			      sense, timeout, retries, 0);
230
	if (sshdr)
231
		scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr);
232
233
234
235
236
237

	kfree(sense);
	return result;
}
EXPORT_SYMBOL(scsi_execute_req);

238
239
240
241
242
243
struct scsi_io_context {
	void *data;
	void (*done)(void *data, char *sense, int result, int resid);
	char sense[SCSI_SENSE_BUFFERSIZE];
};

244
static struct kmem_cache *scsi_io_context_cache;
245

246
static void scsi_end_async(struct request *req, int uptodate)
247
248
249
250
251
252
{
	struct scsi_io_context *sioc = req->end_io_data;

	if (sioc->done)
		sioc->done(sioc->data, sioc->sense, req->errors, req->data_len);

253
	kmem_cache_free(scsi_io_context_cache, sioc);
254
255
256
257
258
259
260
261
262
263
264
265
	__blk_put_request(req->q, req);
}

static int scsi_merge_bio(struct request *rq, struct bio *bio)
{
	struct request_queue *q = rq->q;

	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
	if (rq_data_dir(rq) == WRITE)
		bio->bi_rw |= (1 << BIO_RW);
	blk_queue_bounce(q, &bio);

NeilBrown's avatar
NeilBrown committed
266
	return blk_rq_append_bio(q, rq, bio);
267
268
}

269
static void scsi_bi_endio(struct bio *bio, int error)
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
{
	bio_put(bio);
}

/**
 * scsi_req_map_sg - map a scatterlist into a request
 * @rq:		request to fill
 * @sg:		scatterlist
 * @nsegs:	number of elements
 * @bufflen:	len of buffer
 * @gfp:	memory allocation flags
 *
 * scsi_req_map_sg maps a scatterlist into a request so that the
 * request can be sent to the block layer. We do not trust the scatterlist
 * sent to use, as some ULDs use that struct to only organize the pages.
 */
static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
			   int nsegs, unsigned bufflen, gfp_t gfp)
{
	struct request_queue *q = rq->q;
290
	int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
291
	unsigned int data_len = bufflen, len, bytes, off;
Jens Axboe's avatar
Jens Axboe committed
292
	struct scatterlist *sg;
293
294
295
296
	struct page *page;
	struct bio *bio = NULL;
	int i, err, nr_vecs = 0;

Jens Axboe's avatar
Jens Axboe committed
297
	for_each_sg(sgl, sg, nsegs, i) {
298
		page = sg_page(sg);
Jens Axboe's avatar
Jens Axboe committed
299
300
301
		off = sg->offset;
		len = sg->length;
 		data_len += len;
302

303
304
305
306
307
308
		while (len > 0 && data_len > 0) {
			/*
			 * sg sends a scatterlist that is larger than
			 * the data_len it wants transferred for certain
			 * IO sizes
			 */
309
			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
310
			bytes = min(bytes, data_len);
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333

			if (!bio) {
				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
				nr_pages -= nr_vecs;

				bio = bio_alloc(gfp, nr_vecs);
				if (!bio) {
					err = -ENOMEM;
					goto free_bios;
				}
				bio->bi_end_io = scsi_bi_endio;
			}

			if (bio_add_pc_page(q, bio, page, bytes, off) !=
			    bytes) {
				bio_put(bio);
				err = -EINVAL;
				goto free_bios;
			}

			if (bio->bi_vcnt >= nr_vecs) {
				err = scsi_merge_bio(rq, bio);
				if (err) {
334
					bio_endio(bio, 0);
335
336
337
338
339
340
341
					goto free_bios;
				}
				bio = NULL;
			}

			page++;
			len -= bytes;
342
			data_len -=bytes;
343
344
345
346
347
			off = 0;
		}
	}

	rq->buffer = rq->data = NULL;
348
	rq->data_len = bufflen;
349
350
351
352
353
354
355
356
	return 0;

free_bios:
	while ((bio = rq->bio) != NULL) {
		rq->bio = bio->bi_next;
		/*
		 * call endio instead of bio_put incase it was bounced
		 */
357
		bio_endio(bio, 0);
358
359
360
361
362
363
364
365
366
	}

	return err;
}

/**
 * scsi_execute_async - insert request
 * @sdev:	scsi device
 * @cmd:	scsi command
367
 * @cmd_len:	length of scsi cdb
368
369
370
371
372
373
374
375
376
 * @data_direction: data direction
 * @buffer:	data buffer (this can be a kernel buffer or scatterlist)
 * @bufflen:	len of buffer
 * @use_sg:	if buffer is a scatterlist this is the number of elements
 * @timeout:	request timeout in seconds
 * @retries:	number of times to retry request
 * @flags:	or into request flags
 **/
int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
377
		       int cmd_len, int data_direction, void *buffer, unsigned bufflen,
378
379
380
381
382
383
384
385
		       int use_sg, int timeout, int retries, void *privdata,
		       void (*done)(void *, char *, int, int), gfp_t gfp)
{
	struct request *req;
	struct scsi_io_context *sioc;
	int err = 0;
	int write = (data_direction == DMA_TO_DEVICE);

386
	sioc = kmem_cache_zalloc(scsi_io_context_cache, gfp);
387
388
389
390
391
392
	if (!sioc)
		return DRIVER_ERROR << 24;

	req = blk_get_request(sdev->request_queue, write, gfp);
	if (!req)
		goto free_sense;
393
394
	req->cmd_type = REQ_TYPE_BLOCK_PC;
	req->cmd_flags |= REQ_QUIET;
395
396
397
398
399
400
401
402
403

	if (use_sg)
		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
	else if (bufflen)
		err = blk_rq_map_kern(req->q, req, buffer, bufflen, gfp);

	if (err)
		goto free_req;

404
	req->cmd_len = cmd_len;
405
	memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */
406
407
408
409
	memcpy(req->cmd, cmd, req->cmd_len);
	req->sense = sioc->sense;
	req->sense_len = 0;
	req->timeout = timeout;
410
	req->retries = retries;
411
412
413
414
415
416
417
418
419
420
421
	req->end_io_data = sioc;

	sioc->data = privdata;
	sioc->done = done;

	blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
	return 0;

free_req:
	blk_put_request(req);
free_sense:
422
	kmem_cache_free(scsi_io_context_cache, sioc);
423
424
425
426
	return DRIVER_ERROR << 24;
}
EXPORT_SYMBOL_GPL(scsi_execute_async);

Linus Torvalds's avatar
Linus Torvalds committed
427
428
429
430
431
432
433
434
435
436
437
/*
 * Function:    scsi_init_cmd_errh()
 *
 * Purpose:     Initialize cmd fields related to error handling.
 *
 * Arguments:   cmd	- command that is ready to be queued.
 *
 * Notes:       This function has the job of initializing a number of
 *              fields related to error handling.   Typically this will
 *              be called once for each command, as required.
 */
438
static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
Linus Torvalds's avatar
Linus Torvalds committed
439
440
{
	cmd->serial_number = 0;
441
	cmd->resid = 0;
Linus Torvalds's avatar
Linus Torvalds committed
442
443
444
445
446
447
448
449
450
451
452
453
	memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
	if (cmd->cmd_len == 0)
		cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
}

void scsi_device_unbusy(struct scsi_device *sdev)
{
	struct Scsi_Host *shost = sdev->host;
	unsigned long flags;

	spin_lock_irqsave(shost->host_lock, flags);
	shost->host_busy--;
454
	if (unlikely(scsi_host_in_recovery(shost) &&
455
		     (shost->host_failed || shost->host_eh_scheduled)))
Linus Torvalds's avatar
Linus Torvalds committed
456
457
		scsi_eh_wakeup(shost);
	spin_unlock(shost->host_lock);
's avatar
committed
458
	spin_lock(sdev->request_queue->queue_lock);
Linus Torvalds's avatar
Linus Torvalds committed
459
	sdev->device_busy--;
's avatar
committed
460
	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
}

/*
 * Called for single_lun devices on IO completion. Clear starget_sdev_user,
 * and call blk_run_queue for all the scsi_devices on the target -
 * including current_sdev first.
 *
 * Called with *no* scsi locks held.
 */
static void scsi_single_lun_run(struct scsi_device *current_sdev)
{
	struct Scsi_Host *shost = current_sdev->host;
	struct scsi_device *sdev, *tmp;
	struct scsi_target *starget = scsi_target(current_sdev);
	unsigned long flags;

	spin_lock_irqsave(shost->host_lock, flags);
	starget->starget_sdev_user = NULL;
	spin_unlock_irqrestore(shost->host_lock, flags);

	/*
	 * Call blk_run_queue for all LUNs on the target, starting with
	 * current_sdev. We race with others (to set starget_sdev_user),
	 * but in most cases, we will be first. Ideally, each LU on the
	 * target would get some limited time or requests on the target.
	 */
	blk_run_queue(current_sdev->request_queue);

	spin_lock_irqsave(shost->host_lock, flags);
	if (starget->starget_sdev_user)
		goto out;
	list_for_each_entry_safe(sdev, tmp, &starget->devices,
			same_target_siblings) {
		if (sdev == current_sdev)
			continue;
		if (scsi_device_get(sdev))
			continue;

		spin_unlock_irqrestore(shost->host_lock, flags);
		blk_run_queue(sdev->request_queue);
		spin_lock_irqsave(shost->host_lock, flags);
	
		scsi_device_put(sdev);
	}
 out:
	spin_unlock_irqrestore(shost->host_lock, flags);
}

/*
 * Function:	scsi_run_queue()
 *
 * Purpose:	Select a proper request queue to serve next
 *
 * Arguments:	q	- last request's queue
 *
 * Returns:     Nothing
 *
 * Notes:	The previous command was completely finished, start
 *		a new one if possible.
 */
static void scsi_run_queue(struct request_queue *q)
{
	struct scsi_device *sdev = q->queuedata;
	struct Scsi_Host *shost = sdev->host;
	unsigned long flags;

	if (sdev->single_lun)
		scsi_single_lun_run(sdev);

	spin_lock_irqsave(shost->host_lock, flags);
	while (!list_empty(&shost->starved_list) &&
	       !shost->host_blocked && !shost->host_self_blocked &&
		!((shost->can_queue > 0) &&
		  (shost->host_busy >= shost->can_queue))) {
		/*
		 * As long as shost is accepting commands and we have
		 * starved queues, call blk_run_queue. scsi_request_fn
		 * drops the queue_lock and can add us back to the
		 * starved_list.
		 *
		 * host_lock protects the starved_list and starved_entry.
		 * scsi_request_fn must get the host_lock before checking
		 * or modifying starved_list or starved_entry.
		 */
		sdev = list_entry(shost->starved_list.next,
					  struct scsi_device, starved_entry);
		list_del_init(&sdev->starved_entry);
		spin_unlock_irqrestore(shost->host_lock, flags);

550
551
552
553
554
555
556
557
558

		if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
		    !test_and_set_bit(QUEUE_FLAG_REENTER,
				      &sdev->request_queue->queue_flags)) {
			blk_run_queue(sdev->request_queue);
			clear_bit(QUEUE_FLAG_REENTER,
				  &sdev->request_queue->queue_flags);
		} else
			blk_run_queue(sdev->request_queue);
Linus Torvalds's avatar
Linus Torvalds committed
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589

		spin_lock_irqsave(shost->host_lock, flags);
		if (unlikely(!list_empty(&sdev->starved_entry)))
			/*
			 * sdev lost a race, and was put back on the
			 * starved list. This is unlikely but without this
			 * in theory we could loop forever.
			 */
			break;
	}
	spin_unlock_irqrestore(shost->host_lock, flags);

	blk_run_queue(q);
}

/*
 * Function:	scsi_requeue_command()
 *
 * Purpose:	Handle post-processing of completed commands.
 *
 * Arguments:	q	- queue to operate on
 *		cmd	- command that may need to be requeued.
 *
 * Returns:	Nothing
 *
 * Notes:	After command completion, there may be blocks left
 *		over which weren't finished by the previous command
 *		this can be for a number of reasons - the main one is
 *		I/O errors in the middle of the request, in which case
 *		we need to request the blocks that come after the bad
 *		sector.
590
 * Notes:	Upon return, cmd is a stale pointer.
Linus Torvalds's avatar
Linus Torvalds committed
591
592
593
 */
static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
{
594
	struct request *req = cmd->request;
595
596
	unsigned long flags;

597
	scsi_unprep_request(req);
598
	spin_lock_irqsave(q->queue_lock, flags);
599
	blk_requeue_request(q, req);
600
	spin_unlock_irqrestore(q->queue_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
601
602
603
604
605
606

	scsi_run_queue(q);
}

void scsi_next_command(struct scsi_cmnd *cmd)
{
607
608
609
610
611
	struct scsi_device *sdev = cmd->device;
	struct request_queue *q = sdev->request_queue;

	/* need to hold a reference on the device before we let go of the cmd */
	get_device(&sdev->sdev_gendev);
Linus Torvalds's avatar
Linus Torvalds committed
612
613
614

	scsi_put_command(cmd);
	scsi_run_queue(q);
615
616
617

	/* ok to remove device now */
	put_device(&sdev->sdev_gendev);
Linus Torvalds's avatar
Linus Torvalds committed
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
}

void scsi_run_host_queues(struct Scsi_Host *shost)
{
	struct scsi_device *sdev;

	shost_for_each_device(sdev, shost)
		scsi_run_queue(sdev->request_queue);
}

/*
 * Function:    scsi_end_request()
 *
 * Purpose:     Post-processing of completed commands (usually invoked at end
 *		of upper level post-processing and scsi_io_completion).
 *
 * Arguments:   cmd	 - command that is complete.
 *              uptodate - 1 if I/O indicates success, <= 0 for I/O error.
 *              bytes    - number of bytes of completed I/O
 *		requeue  - indicates whether we should requeue leftovers.
 *
 * Lock status: Assumed that lock is not held upon entry.
 *
641
 * Returns:     cmd if requeue required, NULL otherwise.
Linus Torvalds's avatar
Linus Torvalds committed
642
643
644
645
646
647
 *
 * Notes:       This is called for block device requests in order to
 *              mark some number of sectors as complete.
 * 
 *		We are guaranteeing that the request queue will be goosed
 *		at some point during this call.
648
 * Notes:	If cmd was requeued, upon return it will be a stale pointer.
Linus Torvalds's avatar
Linus Torvalds committed
649
650
651
652
 */
static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
					  int bytes, int requeue)
{
653
	struct request_queue *q = cmd->device->request_queue;
Linus Torvalds's avatar
Linus Torvalds committed
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
	struct request *req = cmd->request;
	unsigned long flags;

	/*
	 * If there are blocks left over at the end, set up the command
	 * to queue the remainder of them.
	 */
	if (end_that_request_chunk(req, uptodate, bytes)) {
		int leftover = (req->hard_nr_sectors << 9);

		if (blk_pc_request(req))
			leftover = req->data_len;

		/* kill remainder if no retrys */
		if (!uptodate && blk_noretry_request(req))
			end_that_request_chunk(req, 0, leftover);
		else {
671
			if (requeue) {
Linus Torvalds's avatar
Linus Torvalds committed
672
673
674
675
676
677
				/*
				 * Bleah.  Leftovers again.  Stick the
				 * leftovers in the front of the
				 * queue, and goose the queue again.
				 */
				scsi_requeue_command(q, cmd);
678
679
				cmd = NULL;
			}
Linus Torvalds's avatar
Linus Torvalds committed
680
681
682
683
684
685
686
687
688
			return cmd;
		}
	}

	add_disk_randomness(req->rq_disk);

	spin_lock_irqsave(q->queue_lock, flags);
	if (blk_rq_tagged(req))
		blk_queue_end_tag(q, req);
689
	end_that_request_last(req, uptodate);
Linus Torvalds's avatar
Linus Torvalds committed
690
691
692
693
694
695
696
697
698
699
	spin_unlock_irqrestore(q->queue_lock, flags);

	/*
	 * This will goose the queue request function at the end, so we don't
	 * need to worry about launching another command.
	 */
	scsi_next_command(cmd);
	return NULL;
}

700
701
702
703
704
/*
 * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit
 * is totally arbitrary, a setting of 2048 will get you at least 8mb ios.
 */
#define SCSI_MAX_SG_CHAIN_SEGMENTS	2048
Linus Torvalds's avatar
Linus Torvalds committed
705

706
707
708
709
710
static inline unsigned int scsi_sgtable_index(unsigned short nents)
{
	unsigned int index;

	switch (nents) {
Linus Torvalds's avatar
Linus Torvalds committed
711
	case 1 ... 8:
712
		index = 0;
Linus Torvalds's avatar
Linus Torvalds committed
713
714
		break;
	case 9 ... 16:
715
		index = 1;
Linus Torvalds's avatar
Linus Torvalds committed
716
		break;
717
#if (SCSI_MAX_SG_SEGMENTS > 16)
Linus Torvalds's avatar
Linus Torvalds committed
718
	case 17 ... 32:
719
		index = 2;
Linus Torvalds's avatar
Linus Torvalds committed
720
		break;
721
#if (SCSI_MAX_SG_SEGMENTS > 32)
Linus Torvalds's avatar
Linus Torvalds committed
722
	case 33 ... 64:
723
		index = 3;
Linus Torvalds's avatar
Linus Torvalds committed
724
		break;
725
726
#if (SCSI_MAX_SG_SEGMENTS > 64)
	case 65 ... 128:
727
		index = 4;
Linus Torvalds's avatar
Linus Torvalds committed
728
		break;
729
730
731
#endif
#endif
#endif
Linus Torvalds's avatar
Linus Torvalds committed
732
	default:
733
734
		printk(KERN_ERR "scsi: bad segment count=%d\n", nents);
		BUG();
Linus Torvalds's avatar
Linus Torvalds committed
735
736
	}

737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
	return index;
}

struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask)
{
	struct scsi_host_sg_pool *sgp;
	struct scatterlist *sgl, *prev, *ret;
	unsigned int index;
	int this, left;

	BUG_ON(!cmd->use_sg);

	left = cmd->use_sg;
	ret = prev = NULL;
	do {
		this = left;
		if (this > SCSI_MAX_SG_SEGMENTS) {
			this = SCSI_MAX_SG_SEGMENTS - 1;
			index = SG_MEMPOOL_NR - 1;
		} else
			index = scsi_sgtable_index(this);

		left -= this;

		sgp = scsi_sg_pools + index;

		sgl = mempool_alloc(sgp->pool, gfp_mask);
		if (unlikely(!sgl))
			goto enomem;

767
		sg_init_table(sgl, sgp->size);
768

769
770
771
		/*
		 * first loop through, set initial index and return value
		 */
FUJITA Tomonori's avatar
FUJITA Tomonori committed
772
		if (!ret)
773
774
775
776
777
778
779
780
781
782
			ret = sgl;

		/*
		 * chain previous sglist, if any. we know the previous
		 * sglist must be the biggest one, or we would not have
		 * ended up doing another loop.
		 */
		if (prev)
			sg_chain(prev, SCSI_MAX_SG_SEGMENTS, sgl);

783
784
785
786
787
788
789
		/*
		 * if we have nothing left, mark the last segment as
		 * end-of-list
		 */
		if (!left)
			sg_mark_end(sgl, this);

790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
		/*
		 * don't allow subsequent mempool allocs to sleep, it would
		 * violate the mempool principle.
		 */
		gfp_mask &= ~__GFP_WAIT;
		gfp_mask |= __GFP_HIGH;
		prev = sgl;
	} while (left);

	/*
	 * ->use_sg may get modified after dma mapping has potentially
	 * shrunk the number of segments, so keep a copy of it for free.
	 */
	cmd->__use_sg = cmd->use_sg;
	return ret;
enomem:
	if (ret) {
		/*
		 * Free entries chained off ret. Since we were trying to
		 * allocate another sglist, we know that all entries are of
		 * the max size.
		 */
		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
		prev = ret;
		ret = &ret[SCSI_MAX_SG_SEGMENTS - 1];

		while ((sgl = sg_chain_ptr(ret)) != NULL) {
			ret = &sgl[SCSI_MAX_SG_SEGMENTS - 1];
			mempool_free(sgl, sgp->pool);
		}

		mempool_free(prev, sgp->pool);
	}
	return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
824
825
}

826
827
EXPORT_SYMBOL(scsi_alloc_sgtable);

828
void scsi_free_sgtable(struct scsi_cmnd *cmd)
Linus Torvalds's avatar
Linus Torvalds committed
829
{
830
	struct scatterlist *sgl = cmd->request_buffer;
Linus Torvalds's avatar
Linus Torvalds committed
831
832
	struct scsi_host_sg_pool *sgp;

833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
	/*
	 * if this is the biggest size sglist, check if we have
	 * chained parts we need to free
	 */
	if (cmd->__use_sg > SCSI_MAX_SG_SEGMENTS) {
		unsigned short this, left;
		struct scatterlist *next;
		unsigned int index;

		left = cmd->__use_sg - (SCSI_MAX_SG_SEGMENTS - 1);
		next = sg_chain_ptr(&sgl[SCSI_MAX_SG_SEGMENTS - 1]);
		while (left && next) {
			sgl = next;
			this = left;
			if (this > SCSI_MAX_SG_SEGMENTS) {
				this = SCSI_MAX_SG_SEGMENTS - 1;
				index = SG_MEMPOOL_NR - 1;
			} else
				index = scsi_sgtable_index(this);

			left -= this;

			sgp = scsi_sg_pools + index;

			if (left)
				next = sg_chain_ptr(&sgl[sgp->size - 1]);

			mempool_free(sgl, sgp->pool);
		}

		/*
		 * Restore original, will be freed below
		 */
		sgl = cmd->request_buffer;
FUJITA Tomonori's avatar
FUJITA Tomonori committed
867
868
869
		sgp = scsi_sg_pools + SG_MEMPOOL_NR - 1;
	} else
		sgp = scsi_sg_pools + scsi_sgtable_index(cmd->__use_sg);
870

Linus Torvalds's avatar
Linus Torvalds committed
871
872
873
	mempool_free(sgl, sgp->pool);
}

874
875
EXPORT_SYMBOL(scsi_free_sgtable);

Linus Torvalds's avatar
Linus Torvalds committed
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
/*
 * Function:    scsi_release_buffers()
 *
 * Purpose:     Completion processing for block device I/O requests.
 *
 * Arguments:   cmd	- command that we are bailing.
 *
 * Lock status: Assumed that no lock is held upon entry.
 *
 * Returns:     Nothing
 *
 * Notes:       In the event that an upper level driver rejects a
 *		command, we must release resources allocated during
 *		the __init_io() function.  Primarily this would involve
 *		the scatter-gather table, and potentially any bounce
 *		buffers.
 */
static void scsi_release_buffers(struct scsi_cmnd *cmd)
{
	if (cmd->use_sg)
896
		scsi_free_sgtable(cmd);
Linus Torvalds's avatar
Linus Torvalds committed
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933

	/*
	 * Zero these out.  They now point to freed memory, and it is
	 * dangerous to hang onto the pointers.
	 */
	cmd->request_buffer = NULL;
	cmd->request_bufflen = 0;
}

/*
 * Function:    scsi_io_completion()
 *
 * Purpose:     Completion processing for block device I/O requests.
 *
 * Arguments:   cmd   - command that is finished.
 *
 * Lock status: Assumed that no lock is held upon entry.
 *
 * Returns:     Nothing
 *
 * Notes:       This function is matched in terms of capabilities to
 *              the function that created the scatter-gather list.
 *              In other words, if there are no bounce buffers
 *              (the normal case for most drivers), we don't need
 *              the logic to deal with cleaning up afterwards.
 *
 *		We must do one of several things here:
 *
 *		a) Call scsi_end_request.  This will finish off the
 *		   specified number of sectors.  If we are done, the
 *		   command block will be released, and the queue
 *		   function will be goosed.  If we are not done, then
 *		   scsi_end_request will directly goose the queue.
 *
 *		b) We can just use scsi_requeue_command() here.  This would
 *		   be used if we just wanted to retry, for example.
 */
934
void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
Linus Torvalds's avatar
Linus Torvalds committed
935
936
{
	int result = cmd->result;
937
	int this_count = cmd->request_bufflen;
938
	struct request_queue *q = cmd->device->request_queue;
Linus Torvalds's avatar
Linus Torvalds committed
939
940
941
942
943
944
	struct request *req = cmd->request;
	int clear_errors = 1;
	struct scsi_sense_hdr sshdr;
	int sense_valid = 0;
	int sense_deferred = 0;

945
	scsi_release_buffers(cmd);
Linus Torvalds's avatar
Linus Torvalds committed
946
947
948
949
950
951

	if (result) {
		sense_valid = scsi_command_normalize_sense(cmd, &sshdr);
		if (sense_valid)
			sense_deferred = scsi_sense_is_deferred(&sshdr);
	}
952

Linus Torvalds's avatar
Linus Torvalds committed
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
		req->errors = result;
		if (result) {
			clear_errors = 0;
			if (sense_valid && req->sense) {
				/*
				 * SG_IO wants current and deferred errors
				 */
				int len = 8 + cmd->sense_buffer[7];

				if (len > SCSI_SENSE_BUFFERSIZE)
					len = SCSI_SENSE_BUFFERSIZE;
				memcpy(req->sense, cmd->sense_buffer,  len);
				req->sense_len = len;
			}
968
969
		}
		req->data_len = cmd->resid;
Linus Torvalds's avatar
Linus Torvalds committed
970
971
972
973
974
975
	}

	/*
	 * Next deal with any sectors which we were able to correctly
	 * handle.
	 */
976
977
978
979
980
981
982
983
984
985
986
987
988
989
	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
				      "%d bytes done.\n",
				      req->nr_sectors, good_bytes));
	SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));

	if (clear_errors)
		req->errors = 0;

	/* A number of bytes were successfully read.  If there
	 * are leftovers and there is some kind of error
	 * (result != 0), retry the rest.
	 */
	if (scsi_end_request(cmd, 1, good_bytes, result == 0) == NULL)
		return;
990
991
992

	/* good_bytes = 0, or (inclusive) there were leftovers and
	 * result = 0, so scsi_end_request couldn't retry.
Linus Torvalds's avatar
Linus Torvalds committed
993
994
995
996
997
	 */
	if (sense_valid && !sense_deferred) {
		switch (sshdr.sense_key) {
		case UNIT_ATTENTION:
			if (cmd->device->removable) {
998
				/* Detected disc change.  Set a bit
Linus Torvalds's avatar
Linus Torvalds committed
999
1000
1001
				 * and quietly refuse further access.
				 */
				cmd->device->changed = 1;
1002
				scsi_end_request(cmd, 0, this_count, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1003
1004
				return;
			} else {
1005
1006
1007
1008
1009
				/* Must have been a power glitch, or a
				 * bus reset.  Could not have been a
				 * media change, so we just retry the
				 * request and see what happens.
				 */
Linus Torvalds's avatar
Linus Torvalds committed
1010
1011
1012
1013
1014
				scsi_requeue_command(q, cmd);
				return;
			}
			break;
		case ILLEGAL_REQUEST:
1015
1016
1017
1018
1019
1020
1021
1022
			/* If we had an ILLEGAL REQUEST returned, then
			 * we may have performed an unsupported
			 * command.  The only thing this should be
			 * would be a ten byte read where only a six
			 * byte read was supported.  Also, on a system
			 * where READ CAPACITY failed, we may have
			 * read past the end of the disk.
			 */
1023
1024
			if ((cmd->device->use_10_for_rw &&
			    sshdr.asc == 0x20 && sshdr.ascq == 0x00) &&
Linus Torvalds's avatar
Linus Torvalds committed
1025
1026
1027
			    (cmd->cmnd[0] == READ_10 ||
			     cmd->cmnd[0] == WRITE_10)) {
				cmd->device->use_10_for_rw = 0;
1028
1029
				/* This will cause a retry with a
				 * 6-byte command.
Linus Torvalds's avatar
Linus Torvalds committed
1030
1031
				 */
				scsi_requeue_command(q, cmd);
1032
				return;
Linus Torvalds's avatar
Linus Torvalds committed
1033
			} else {
1034
				scsi_end_request(cmd, 0, this_count, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1035
1036
1037
1038
				return;
			}
			break;
		case NOT_READY:
1039
			/* If the device is in the process of becoming
1040
			 * ready, or has a temporary blockage, retry.
Linus Torvalds's avatar
Linus Torvalds committed
1041
			 */
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
			if (sshdr.asc == 0x04) {
				switch (sshdr.ascq) {
				case 0x01: /* becoming ready */
				case 0x04: /* format in progress */
				case 0x05: /* rebuild in progress */
				case 0x06: /* recalculation in progress */
				case 0x07: /* operation in progress */
				case 0x08: /* Long write in progress */
				case 0x09: /* self test in progress */
					scsi_requeue_command(q, cmd);
					return;
				default:
					break;
				}
Linus Torvalds's avatar
Linus Torvalds committed
1056
			}
1057
1058
1059
1060
1061
			if (!(req->cmd_flags & REQ_QUIET))
				scsi_cmd_print_sense_hdr(cmd,
							 "Device not ready",
							 &sshdr);

1062
			scsi_end_request(cmd, 0, this_count, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1063
1064
			return;
		case VOLUME_OVERFLOW:
1065
			if (!(req->cmd_flags & REQ_QUIET)) {
1066
				scmd_printk(KERN_INFO, cmd,
1067
					    "Volume overflow, CDB: ");
1068
				__scsi_print_command(cmd->cmnd);
1069
1070
				scsi_print_sense("", cmd);
			}
1071
1072
			/* See SSC3rXX or current. */
			scsi_end_request(cmd, 0, this_count, 1);
Linus Torvalds's avatar
Linus Torvalds committed
1073
1074
1075
1076
			return;
		default:
			break;
		}
1077
	}
Linus Torvalds's avatar
Linus Torvalds committed
1078
	if (host_byte(result) == DID_RESET) {
1079
1080
1081
		/* Third party bus reset or reset for error recovery
		 * reasons.  Just retry the request and see what
		 * happens.
Linus Torvalds's avatar
Linus Torvalds committed
1082
1083
1084
1085
1086
		 */
		scsi_requeue_command(q, cmd);
		return;
	}
	if (result) {
1087
		if (!(req->cmd_flags & REQ_QUIET)) {
1088
			scsi_print_result(cmd);
1089
1090
1091
			if (driver_byte(result) & DRIVER_SENSE)
				scsi_print_sense("", cmd);
		}
Linus Torvalds's avatar
Linus Torvalds committed
1092
	}
1093
	scsi_end_request(cmd, 0, this_count, !result);
Linus Torvalds's avatar
Linus Torvalds committed
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
}

/*
 * Function:    scsi_init_io()
 *
 * Purpose:     SCSI I/O initialize function.
 *
 * Arguments:   cmd   - Command descriptor we wish to initialize
 *
 * Returns:     0 on success
 *		BLKPREP_DEFER if the failure is retryable
 *		BLKPREP_KILL if the failure is fatal
 */
static int scsi_init_io(struct scsi_cmnd *cmd)
{
	struct request     *req = cmd->request;
	int		   count;

	/*
1113
	 * We used to not use scatter-gather for single segment request,
Linus Torvalds's avatar
Linus Torvalds committed
1114
1115
1116
1117
1118
1119
	 * but now we do (it makes highmem I/O easier to support without
	 * kmapping pages)
	 */
	cmd->use_sg = req->nr_phys_segments;

	/*
1120
	 * If sg table allocation fails, requeue request later.
Linus Torvalds's avatar
Linus Torvalds committed
1121
	 */
1122
1123
	cmd->request_buffer = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
	if (unlikely(!cmd->request_buffer)) {
Alan Stern's avatar
Alan Stern committed
1124
		scsi_unprep_request(req);
Linus Torvalds's avatar
Linus Torvalds committed
1125
		return BLKPREP_DEFER;
Alan Stern's avatar
Alan Stern committed
1126
	}
Linus Torvalds's avatar
Linus Torvalds committed
1127

1128
	req->buffer = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1129
1130
	if (blk_pc_request(req))
		cmd->request_bufflen = req->data_len;
1131
1132
	else
		cmd->request_bufflen = req->nr_sectors << 9;
Linus Torvalds's avatar
Linus Torvalds committed
1133
1134
1135
1136
1137
1138
1139
1140

	/* 
	 * Next, walk the list, and fill in the addresses and sizes of
	 * each segment.
	 */
	count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
	if (likely(count <= cmd->use_sg)) {
		cmd->use_sg = count;
1141
		return BLKPREP_OK;
Linus Torvalds's avatar
Linus Torvalds committed
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
	}

	printk(KERN_ERR "Incorrect number of segments after building list\n");
	printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
	printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
			req->current_nr_sectors);

	return BLKPREP_KILL;
}

1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev,
		struct request *req)
{
	struct scsi_cmnd *cmd;

	if (!req->special) {
		cmd = scsi_get_command(sdev, GFP_ATOMIC);
		if (unlikely(!cmd))
			return NULL;
		req->special = cmd;
	} else {
		cmd = req->special;
	}

	/* pull a tag out of the request if we have one */
	cmd->tag = req->tag;
	cmd->request = req;

	return cmd;
}

1173
int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
James Bottomley's avatar
James Bottomley committed
1174
{
1175
	struct scsi_cmnd *cmd;
1176
1177
1178
1179
	int ret = scsi_prep_state_check(sdev, req);

	if (ret != BLKPREP_OK)
		return ret;
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207

	cmd = scsi_get_cmd_from_req(sdev, req);
	if (unlikely(!cmd))
		return BLKPREP_DEFER;

	/*
	 * BLOCK_PC requests may transfer data, in which case they must
	 * a bio attached to them.  Or they might contain a SCSI command
	 * that does not transfer data, in which case they may optionally
	 * submit a request without an attached bio.
	 */
	if (req->bio) {
		int ret;

		BUG_ON(!req->nr_phys_segments);

		ret = scsi_init_io(cmd);
		if (unlikely(ret))
			return ret;
	} else {
		BUG_ON(req->data_len);
		BUG_ON(req->data);

		cmd->request_bufflen = 0;
		cmd->request_buffer = NULL;
		cmd->use_sg = 0;
		req->buffer = NULL;
	}
James Bottomley's avatar
James Bottomley committed
1208

1209
	BUILD_BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd));
James Bottomley's avatar
James Bottomley committed
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
	memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd));
	cmd->cmd_len = req->cmd_len;
	if (!req->data_len)
		cmd->sc_data_direction = DMA_NONE;
	else if (rq_data_dir(req) == WRITE)
		cmd->sc_data_direction = DMA_TO_DEVICE;
	else
		cmd->sc_data_direction = DMA_FROM_DEVICE;
	
	cmd->transfersize = req->data_len;
	cmd->allowed = req->retries;
	cmd->timeout_per_command = req->timeout;
1222
	return BLKPREP_OK;
James Bottomley's avatar
James Bottomley committed
1223
}
1224
EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd);
James Bottomley's avatar
James Bottomley committed
1225

1226
1227
1228
1229
1230
/*
 * Setup a REQ_TYPE_FS command.  These are simple read/write request
 * from filesystems that still need to be translated to SCSI CDBs from
 * the ULD.
 */
1231
int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
Linus Torvalds's avatar
Linus Torvalds committed
1232
1233
{
	struct scsi_cmnd *cmd;
1234
	int ret = scsi_prep_state_check(sdev, req);
Linus Torvalds's avatar
Linus Torvalds committed
1235

1236
1237
	if (ret != BLKPREP_OK)
		return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1238
	/*
1239
	 * Filesystem requests must transfer data.
Linus Torvalds's avatar
Linus Torvalds committed
1240
	 */
1241
1242
1243
1244
1245
1246
	BUG_ON(!req->nr_phys_segments);

	cmd = scsi_get_cmd_from_req(sdev, req);
	if (unlikely(!cmd))
		return BLKPREP_DEFER;

1247
	return scsi_init_io(cmd);
1248
}
1249
EXPORT_SYMBOL(scsi_setup_fs_cmnd);
1250

1251
int scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
1252
1253
1254
{
	int ret = BLKPREP_OK;

Linus Torvalds's avatar
Linus Torvalds committed
1255
	/*
1256
1257
	 * If the device is not in running state we will reject some
	 * or all commands.
Linus Torvalds's avatar
Linus Torvalds committed
1258
	 */
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
	if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
		switch (sdev->sdev_state) {
		case SDEV_OFFLINE:
			/*
			 * If the device is offline we refuse to process any
			 * commands.  The device must be brought online
			 * before trying any recovery commands.
			 */
			sdev_printk(KERN_ERR, sdev,
				    "rejecting I/O to offline device\n");
			ret = BLKPREP_KILL;
			break;
		case SDEV_DEL:
			/*
			 * If the device is fully deleted, we refuse to
			 * process any commands as well.
			 */
1276
			sdev_printk(KERN_ERR, sdev,
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
				    "rejecting I/O to dead device\n");
			ret = BLKPREP_KILL;
			break;
		case SDEV_QUIESCE:
		case SDEV_BLOCK:
			/*
			 * If the devices is blocked we defer normal commands.
			 */
			if (!(req->cmd_flags & REQ_PREEMPT))
				ret = BLKPREP_DEFER;
			break;
		default:
			/*
			 * For any other not fully online state we only allow
			 * special commands.  In particular any user initiated
			 * command is not allowed.
			 */
			if (!(req->cmd_flags & REQ_PREEMPT))
				ret = BLKPREP_KILL;
			break;
Linus Torvalds's avatar
Linus Torvalds committed
1297
1298
		}
	}
1299
1300
1301
	return ret;
}
EXPORT_SYMBOL(scsi_prep_state_check);
Linus Torvalds's avatar
Linus Torvalds committed
1302

1303
1304
1305
int scsi_prep_return(struct request_queue *q, struct request *req, int ret)
{
	struct scsi_device *sdev = q->queuedata;
Linus Torvalds's avatar
Linus Torvalds committed
1306

1307
1308
1309
	switch (ret) {
	case BLKPREP_KILL:
		req->errors = DID_NO_CONNECT << 16;
1310
1311
1312
1313
1314
1315
1316
		/* release the command and kill it */
		if (req->special) {
			struct scsi_cmnd *cmd = req->special;
			scsi_release_buffers(cmd);
			scsi_put_command(cmd);
			req->special = NULL;
		}
1317
1318
		break;
	case BLKPREP_DEFER:
Linus Torvalds's avatar
Linus Torvalds committed
1319
		/*
1320
1321
1322
		 * If we defer, the elv_next_request() returns NULL, but the
		 * queue must be restarted, so we plug here if no returning
		 * command will automatically do that.
Linus Torvalds's avatar
Linus Torvalds committed
1323
		 */
1324
1325
1326
1327
1328
		if (sdev->device_busy == 0)
			blk_plug_device(q);
		break;
	default:
		req->cmd_flags |= REQ_DONTPREP;
Linus Torvalds's avatar
Linus Torvalds committed
1329
1330
	}

1331
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1332
}
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
EXPORT_SYMBOL(scsi_prep_return);

static int scsi_prep_fn(struct request_queue *q, struct request *req)
{
	struct scsi_device *sdev = q->queuedata;
	int ret = BLKPREP_KILL;

	if (req->cmd_type == REQ_TYPE_BLOCK_PC)
		ret = scsi_setup_blk_pc_cmnd(sdev, req);
	return scsi_prep_return(q, req, ret);
}
Linus Torvalds's avatar
Linus Torvalds committed
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361

/*
 * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
 * return 0.
 *
 * Called with the queue_lock held.
 */
static inline int scsi_dev_queue_ready(struct request_queue *q,
				  struct scsi_device *sdev)
{
	if (sdev->device_busy >= sdev->queue_depth)
		return 0;
	if (sdev->device_busy == 0 && sdev->device_blocked) {
		/*
		 * unblock after device_blocked iterates to zero
		 */
		if (--sdev->device_blocked == 0) {
			SCSI_LOG_MLQUEUE(3,
1362
1363
				   sdev_printk(KERN_INFO, sdev,
				   "unblocking device at zero depth\n"));
Linus Torvalds's avatar
Linus Torvalds committed
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
		} else {
			blk_plug_device(q);
			return 0;
		}
	}
	if (sdev->device_blocked)
		return 0;

	return 1;
}

/*
 * scsi_host_queue_ready: if we can send requests to shost, return 1 else
 * return 0. We must end up running the queue again whenever 0 is
 * returned, else IO can hang.
 *
 * Called with host_lock held.
 */
static inline int scsi_host_queue_ready(struct request_queue *q,
				   struct Scsi_Host *shost,
				   struct scsi_device *sdev)
{
1386
	if (scsi_host_in_recovery(shost))
Linus Torvalds's avatar
Linus Torvalds committed
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
		return 0;
	if (shost->host_busy == 0 && shost->host_blocked) {
		/*
		 * unblock after host_blocked iterates to zero
		 */
		if (--shost->host_blocked == 0) {
			SCSI_LOG_MLQUEUE(3,
				printk("scsi%d unblocking host at zero depth\n",
					shost->host_no));
		} else {
			blk_plug_device(q);
			return 0;
		}
	}
	if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
	    shost->host_blocked || shost->host_self_blocked) {
		if (list_empty(&sdev->starved_entry))
			list_add_tail(&sdev->starved_entry, &shost->starved_list);
		return 0;
	}

	/* We're OK to process the command, so we can't be starved */
	if (!list_empty(&sdev->starved_entry))
		list_del_init(&sdev->starved_entry);

	return 1;
}

/*
1416
 * Kill a request for a dead device
Linus Torvalds's avatar
Linus Torvalds committed
1417
 */
1418
static void scsi_kill_request(struct request *req, struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
1419
{
1420
	struct scsi_cmnd *cmd = req->special;
1421
1422
	struct scsi_device *sdev = cmd->device;
	struct Scsi_Host *shost = sdev->host;
Linus Torvalds's avatar
Linus Torvalds committed
1423

1424
1425
	blkdev_dequeue_request(req);

1426
1427
1428
1429
	if (unlikely(cmd == NULL)) {
		printk(KERN_CRIT "impossible request in %s.\n",
				 __FUNCTION__);
		BUG();
Linus Torvalds's avatar
Linus Torvalds committed
1430
	}
1431
1432
1433
1434

	scsi_init_cmd_errh(cmd);
	cmd->result = DID_NO_CONNECT << 16;
	atomic_inc(&cmd->device->iorequest_cnt);
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447

	/*
	 * SCSI request completion path will do scsi_device_unbusy(),
	 * bump busy counts.  To bump the counters, we need to dance
	 * with the locks as normal issue path does.
	 */
	sdev->device_busy++;
	spin_unlock(sdev->request_queue->queue_lock);
	spin_lock(shost->host_lock);
	shost->host_busy++;
	spin_unlock(shost->host_lock);
	spin_lock(sdev->request_queue->queue_lock);

1448
	__scsi_done(cmd);
Linus Torvalds's avatar
Linus Torvalds committed
1449
1450
}

1451
1452
1453
static void scsi_softirq_done(struct request *rq)
{
	struct scsi_cmnd *cmd = rq->completion_data;
1454
	unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command;
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
	int disposition;

	INIT_LIST_HEAD(&cmd->eh_entry);

	disposition = scsi_decide_disposition(cmd);
	if (disposition != SUCCESS &&
	    time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) {
		sdev_printk(KERN_ERR, cmd->device,
			    "timing out command, waited %lus\n",
			    wait_for/HZ);
		disposition = SUCCESS;
	}
			
	scsi_log_completion(cmd, disposition);

	switch (disposition) {
		case SUCCESS:
			scsi_finish_command(cmd);
			break;
		case NEEDS_RETRY:
1475
			scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY);
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
			break;
		case ADD_TO_MLQUEUE:
			scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY);
			break;
		default:
			if (!scsi_eh_scmd_add(cmd, 0))
				scsi_finish_command(cmd);
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
/*
 * Function:    scsi_request_fn()
 *
 * Purpose:     Main strategy routine for SCSI.
 *
 * Arguments:   q       - Pointer to actual queue.
 *
 * Returns:     Nothing
 *
 * Lock status: IO request lock assumed to be held when called.
 */
static void scsi_request_fn(struct