This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / drivers / scsi / scsi_lib.c
1 /*
2  *  scsi_lib.c Copyright (C) 1999 Eric Youngdale
3  *
4  *  SCSI queueing library.
5  *      Initial versions: Eric Youngdale (eric@andante.org).
6  *                        Based upon conversations with large numbers
7  *                        of people at Linux Expo.
8  */
9
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/completion.h>
13 #include <linux/kernel.h>
14 #include <linux/mempool.h>
15 #include <linux/slab.h>
16 #include <linux/init.h>
17 #include <linux/pci.h>
18
19 #include <scsi/scsi.h>
20 #include <scsi/scsi_dbg.h>
21 #include <scsi/scsi_device.h>
22 #include <scsi/scsi_driver.h>
23 #include <scsi/scsi_eh.h>
24 #include <scsi/scsi_host.h>
25 #include <scsi/scsi_request.h>
26
27 #include "scsi_priv.h"
28 #include "scsi_logging.h"
29
30
31 #define SG_MEMPOOL_NR           (sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool))
32 #define SG_MEMPOOL_SIZE         32
33
34 struct scsi_host_sg_pool {
35         size_t          size;
36         char            *name; 
37         kmem_cache_t    *slab;
38         mempool_t       *pool;
39 };
40
41 #if (SCSI_MAX_PHYS_SEGMENTS < 32)
42 #error SCSI_MAX_PHYS_SEGMENTS is too small
43 #endif
44
45 #define SP(x) { x, "sgpool-" #x } 
46 struct scsi_host_sg_pool scsi_sg_pools[] = { 
47         SP(8),
48         SP(16),
49         SP(32),
50 #if (SCSI_MAX_PHYS_SEGMENTS > 32)
51         SP(64),
52 #if (SCSI_MAX_PHYS_SEGMENTS > 64)
53         SP(128),
54 #if (SCSI_MAX_PHYS_SEGMENTS > 128)
55         SP(256),
56 #if (SCSI_MAX_PHYS_SEGMENTS > 256)
57 #error SCSI_MAX_PHYS_SEGMENTS is too large
58 #endif
59 #endif
60 #endif
61 #endif
62 };      
63 #undef SP
64
65
66 /*
67  * Function:    scsi_insert_special_req()
68  *
69  * Purpose:     Insert pre-formed request into request queue.
70  *
71  * Arguments:   sreq    - request that is ready to be queued.
72  *              at_head - boolean.  True if we should insert at head
73  *                        of queue, false if we should insert at tail.
74  *
75  * Lock status: Assumed that lock is not held upon entry.
76  *
77  * Returns:     Nothing
78  *
79  * Notes:       This function is called from character device and from
80  *              ioctl types of functions where the caller knows exactly
81  *              what SCSI command needs to be issued.   The idea is that
82  *              we merely inject the command into the queue (at the head
83  *              for now), and then call the queue request function to actually
84  *              process it.
85  */
86 int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
87 {
88         /*
89          * Because users of this function are apt to reuse requests with no
90          * modification, we have to sanitise the request flags here
91          */
92         sreq->sr_request->flags &= ~REQ_DONTPREP;
93         blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
94                            at_head, sreq, 0);
95         return 0;
96 }
97
98 /*
99  * Function:    scsi_queue_insert()
100  *
101  * Purpose:     Insert a command in the midlevel queue.
102  *
103  * Arguments:   cmd    - command that we are adding to queue.
104  *              reason - why we are inserting command to queue.
105  *
106  * Lock status: Assumed that lock is not held upon entry.
107  *
108  * Returns:     Nothing.
109  *
110  * Notes:       We do this for one of two cases.  Either the host is busy
111  *              and it cannot accept any more commands for the time being,
112  *              or the device returned QUEUE_FULL and can accept no more
113  *              commands.
114  * Notes:       This could be called either from an interrupt context or a
115  *              normal process context.
116  */
117 int scsi_queue_insert(struct scsi_cmnd *cmd, int reason)
118 {
119         struct Scsi_Host *host = cmd->device->host;
120         struct scsi_device *device = cmd->device;
121
122         SCSI_LOG_MLQUEUE(1,
123                  printk("Inserting command %p into mlqueue\n", cmd));
124
125         /*
126          * We are inserting the command into the ml queue.  First, we
127          * cancel the timer, so it doesn't time out.
128          */
129         scsi_delete_timer(cmd);
130
131         /*
132          * Next, set the appropriate busy bit for the device/host.
133          *
134          * If the host/device isn't busy, assume that something actually
135          * completed, and that we should be able to queue a command now.
136          *
137          * Note that the prior mid-layer assumption that any host could
138          * always queue at least one command is now broken.  The mid-layer
139          * will implement a user specifiable stall (see
140          * scsi_host.max_host_blocked and scsi_device.max_device_blocked)
141          * if a command is requeued with no other commands outstanding
142          * either for the device or for the host.
143          */
144         if (reason == SCSI_MLQUEUE_HOST_BUSY)
145                 host->host_blocked = host->max_host_blocked;
146         else if (reason == SCSI_MLQUEUE_DEVICE_BUSY)
147                 device->device_blocked = device->max_device_blocked;
148
149         /*
150          * Register the fact that we own the thing for now.
151          */
152         cmd->state = SCSI_STATE_MLQUEUE;
153         cmd->owner = SCSI_OWNER_MIDLEVEL;
154
155         /*
156          * Decrement the counters, since these commands are no longer
157          * active on the host/device.
158          */
159         scsi_device_unbusy(device);
160
161         /*
162          * Insert this command at the head of the queue for it's device.
163          * It will go before all other commands that are already in the queue.
164          *
165          * NOTE: there is magic here about the way the queue is plugged if
166          * we have no outstanding commands.
167          * 
168          * Although this *doesn't* plug the queue, it does call the request
169          * function.  The SCSI request function detects the blocked condition
170          * and plugs the queue appropriately.
171          */
172         blk_insert_request(device->request_queue, cmd->request, 1, cmd, 1);
173         return 0;
174 }
175
176 /*
177  * Function:    scsi_do_req
178  *
179  * Purpose:     Queue a SCSI request
180  *
181  * Arguments:   sreq      - command descriptor.
182  *              cmnd      - actual SCSI command to be performed.
183  *              buffer    - data buffer.
184  *              bufflen   - size of data buffer.
185  *              done      - completion function to be run.
186  *              timeout   - how long to let it run before timeout.
187  *              retries   - number of retries we allow.
188  *
189  * Lock status: No locks held upon entry.
190  *
191  * Returns:     Nothing.
192  *
193  * Notes:       This function is only used for queueing requests for things
194  *              like ioctls and character device requests - this is because
195  *              we essentially just inject a request into the queue for the
196  *              device.
197  *
198  *              In order to support the scsi_device_quiesce function, we
199  *              now inject requests on the *head* of the device queue
200  *              rather than the tail.
201  */
202 void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
203                  void *buffer, unsigned bufflen,
204                  void (*done)(struct scsi_cmnd *),
205                  int timeout, int retries)
206 {
207         /*
208          * If the upper level driver is reusing these things, then
209          * we should release the low-level block now.  Another one will
210          * be allocated later when this request is getting queued.
211          */
212         __scsi_release_request(sreq);
213
214         /*
215          * Our own function scsi_done (which marks the host as not busy,
216          * disables the timeout counter, etc) will be called by us or by the
217          * scsi_hosts[host].queuecommand() function needs to also call
218          * the completion function for the high level driver.
219          */
220         memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd));
221         sreq->sr_bufflen = bufflen;
222         sreq->sr_buffer = buffer;
223         sreq->sr_allowed = retries;
224         sreq->sr_done = done;
225         sreq->sr_timeout_per_command = timeout;
226
227         if (sreq->sr_cmd_len == 0)
228                 sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
229
230         /*
231          * head injection *required* here otherwise quiesce won't work
232          */
233         scsi_insert_special_req(sreq, 1);
234 }
235  
236 static void scsi_wait_done(struct scsi_cmnd *cmd)
237 {
238         struct request *req = cmd->request;
239         struct request_queue *q = cmd->device->request_queue;
240         unsigned long flags;
241
242         req->rq_status = RQ_SCSI_DONE;  /* Busy, but indicate request done */
243
244         spin_lock_irqsave(q->queue_lock, flags);
245         if (blk_rq_tagged(req))
246                 blk_queue_end_tag(q, req);
247         spin_unlock_irqrestore(q->queue_lock, flags);
248
249         if (req->waiting)
250                 complete(req->waiting);
251 }
252
253 void scsi_wait_req(struct scsi_request *sreq, const void *cmnd, void *buffer,
254                    unsigned bufflen, int timeout, int retries)
255 {
256         DECLARE_COMPLETION(wait);
257         
258         sreq->sr_request->waiting = &wait;
259         sreq->sr_request->rq_status = RQ_SCSI_BUSY;
260         scsi_do_req(sreq, cmnd, buffer, bufflen, scsi_wait_done,
261                         timeout, retries);
262         wait_for_completion(&wait);
263         sreq->sr_request->waiting = NULL;
264         if (sreq->sr_request->rq_status != RQ_SCSI_DONE)
265                 sreq->sr_result |= (DRIVER_ERROR << 24);
266
267         __scsi_release_request(sreq);
268 }
269
270 /*
271  * Function:    scsi_init_cmd_errh()
272  *
273  * Purpose:     Initialize cmd fields related to error handling.
274  *
275  * Arguments:   cmd     - command that is ready to be queued.
276  *
277  * Returns:     Nothing
278  *
279  * Notes:       This function has the job of initializing a number of
280  *              fields related to error handling.   Typically this will
281  *              be called once for each command, as required.
282  */
283 static int scsi_init_cmd_errh(struct scsi_cmnd *cmd)
284 {
285         cmd->owner = SCSI_OWNER_MIDLEVEL;
286         cmd->serial_number = 0;
287         cmd->serial_number_at_timeout = 0;
288         cmd->abort_reason = 0;
289
290         memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer);
291
292         if (cmd->cmd_len == 0)
293                 cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]);
294
295         /*
296          * We need saved copies of a number of fields - this is because
297          * error handling may need to overwrite these with different values
298          * to run different commands, and once error handling is complete,
299          * we will need to restore these values prior to running the actual
300          * command.
301          */
302         cmd->old_use_sg = cmd->use_sg;
303         cmd->old_cmd_len = cmd->cmd_len;
304         cmd->sc_old_data_direction = cmd->sc_data_direction;
305         cmd->old_underflow = cmd->underflow;
306         memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd));
307         cmd->buffer = cmd->request_buffer;
308         cmd->bufflen = cmd->request_bufflen;
309         cmd->internal_timeout = NORMAL_TIMEOUT;
310         cmd->abort_reason = 0;
311
312         return 1;
313 }
314
315 /*
316  * Function:   scsi_setup_cmd_retry()
317  *
318  * Purpose:    Restore the command state for a retry
319  *
320  * Arguments:  cmd      - command to be restored
321  *
322  * Returns:    Nothing
323  *
324  * Notes:      Immediately prior to retrying a command, we need
325  *             to restore certain fields that we saved above.
326  */
327 void scsi_setup_cmd_retry(struct scsi_cmnd *cmd)
328 {
329         memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd));
330         cmd->request_buffer = cmd->buffer;
331         cmd->request_bufflen = cmd->bufflen;
332         cmd->use_sg = cmd->old_use_sg;
333         cmd->cmd_len = cmd->old_cmd_len;
334         cmd->sc_data_direction = cmd->sc_old_data_direction;
335         cmd->underflow = cmd->old_underflow;
336 }
337
338 void scsi_device_unbusy(struct scsi_device *sdev)
339 {
340         struct Scsi_Host *shost = sdev->host;
341         unsigned long flags;
342
343         spin_lock_irqsave(shost->host_lock, flags);
344         shost->host_busy--;
345         if (unlikely(test_bit(SHOST_RECOVERY, &shost->shost_state) &&
346                      shost->host_failed))
347                 scsi_eh_wakeup(shost);
348         spin_unlock(shost->host_lock);
349         spin_lock(&sdev->sdev_lock);
350         sdev->device_busy--;
351         spin_unlock_irqrestore(&sdev->sdev_lock, flags);
352 }
353
354 /*
355  * Called for single_lun devices on IO completion. Clear starget_sdev_user,
356  * and call blk_run_queue for all the scsi_devices on the target -
357  * including current_sdev first.
358  *
359  * Called with *no* scsi locks held.
360  */
361 static void scsi_single_lun_run(struct scsi_device *current_sdev)
362 {
363         struct Scsi_Host *shost = current_sdev->host;
364         struct scsi_device *sdev, *tmp;
365         unsigned long flags;
366
367         spin_lock_irqsave(shost->host_lock, flags);
368         scsi_target(current_sdev)->starget_sdev_user = NULL;
369         spin_unlock_irqrestore(shost->host_lock, flags);
370
371         /*
372          * Call blk_run_queue for all LUNs on the target, starting with
373          * current_sdev. We race with others (to set starget_sdev_user),
374          * but in most cases, we will be first. Ideally, each LU on the
375          * target would get some limited time or requests on the target.
376          */
377         blk_run_queue(current_sdev->request_queue);
378
379         spin_lock_irqsave(shost->host_lock, flags);
380         if (scsi_target(current_sdev)->starget_sdev_user)
381                 goto out;
382         list_for_each_entry_safe(sdev, tmp, &current_sdev->same_target_siblings,
383                         same_target_siblings) {
384                 if (scsi_device_get(sdev))
385                         continue;
386
387                 spin_unlock_irqrestore(shost->host_lock, flags);
388                 blk_run_queue(sdev->request_queue);
389                 spin_lock_irqsave(shost->host_lock, flags);
390         
391                 scsi_device_put(sdev);
392         }
393  out:
394         spin_unlock_irqrestore(shost->host_lock, flags);
395 }
396
397 /*
398  * Function:    scsi_run_queue()
399  *
400  * Purpose:     Select a proper request queue to serve next
401  *
402  * Arguments:   q       - last request's queue
403  *
404  * Returns:     Nothing
405  *
406  * Notes:       The previous command was completely finished, start
407  *              a new one if possible.
408  */
409 static void scsi_run_queue(struct request_queue *q)
410 {
411         struct scsi_device *sdev = q->queuedata;
412         struct Scsi_Host *shost = sdev->host;
413         unsigned long flags;
414
415         if (sdev->single_lun)
416                 scsi_single_lun_run(sdev);
417
418         spin_lock_irqsave(shost->host_lock, flags);
419         while (!list_empty(&shost->starved_list) &&
420                !shost->host_blocked && !shost->host_self_blocked &&
421                 !((shost->can_queue > 0) &&
422                   (shost->host_busy >= shost->can_queue))) {
423                 /*
424                  * As long as shost is accepting commands and we have
425                  * starved queues, call blk_run_queue. scsi_request_fn
426                  * drops the queue_lock and can add us back to the
427                  * starved_list.
428                  *
429                  * host_lock protects the starved_list and starved_entry.
430                  * scsi_request_fn must get the host_lock before checking
431                  * or modifying starved_list or starved_entry.
432                  */
433                 sdev = list_entry(shost->starved_list.next,
434                                           struct scsi_device, starved_entry);
435                 list_del_init(&sdev->starved_entry);
436                 spin_unlock_irqrestore(shost->host_lock, flags);
437
438                 blk_run_queue(sdev->request_queue);
439
440                 spin_lock_irqsave(shost->host_lock, flags);
441                 if (unlikely(!list_empty(&sdev->starved_entry)))
442                         /*
443                          * sdev lost a race, and was put back on the
444                          * starved list. This is unlikely but without this
445                          * in theory we could loop forever.
446                          */
447                         break;
448         }
449         spin_unlock_irqrestore(shost->host_lock, flags);
450
451         blk_run_queue(q);
452 }
453
454 /*
455  * Function:    scsi_requeue_command()
456  *
457  * Purpose:     Handle post-processing of completed commands.
458  *
459  * Arguments:   q       - queue to operate on
460  *              cmd     - command that may need to be requeued.
461  *
462  * Returns:     Nothing
463  *
464  * Notes:       After command completion, there may be blocks left
465  *              over which weren't finished by the previous command
466  *              this can be for a number of reasons - the main one is
467  *              I/O errors in the middle of the request, in which case
468  *              we need to request the blocks that come after the bad
469  *              sector.
470  */
471 static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd)
472 {
473         cmd->request->flags &= ~REQ_DONTPREP;
474         blk_insert_request(q, cmd->request, 1, cmd, 1);
475
476         scsi_run_queue(q);
477 }
478
479 void scsi_next_command(struct scsi_cmnd *cmd)
480 {
481         struct request_queue *q = cmd->device->request_queue;
482
483         scsi_put_command(cmd);
484         scsi_run_queue(q);
485 }
486
487 void scsi_run_host_queues(struct Scsi_Host *shost)
488 {
489         struct scsi_device *sdev;
490
491         shost_for_each_device(sdev, shost)
492                 scsi_run_queue(sdev->request_queue);
493 }
494
495 /*
496  * Function:    scsi_end_request()
497  *
498  * Purpose:     Post-processing of completed commands called from interrupt
499  *              handler or a bottom-half handler.
500  *
501  * Arguments:   cmd      - command that is complete.
502  *              uptodate - 1 if I/O indicates success, 0 for I/O error.
503  *              sectors  - number of sectors we want to mark.
504  *              requeue  - indicates whether we should requeue leftovers.
505  *              frequeue - indicates that if we release the command block
506  *                         that the queue request function should be called.
507  *
508  * Lock status: Assumed that lock is not held upon entry.
509  *
510  * Returns:     Nothing
511  *
512  * Notes:       This is called for block device requests in order to
513  *              mark some number of sectors as complete.
514  * 
515  *              We are guaranteeing that the request queue will be goosed
516  *              at some point during this call.
517  */
518 static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
519                                           int bytes, int requeue)
520 {
521         request_queue_t *q = cmd->device->request_queue;
522         struct request *req = cmd->request;
523         unsigned long flags;
524
525         /*
526          * If there are blocks left over at the end, set up the command
527          * to queue the remainder of them.
528          */
529         if (end_that_request_chunk(req, uptodate, bytes)) {
530                 int leftover = (req->hard_nr_sectors << 9);
531
532                 if (blk_pc_request(req))
533                         leftover = req->data_len;
534
535                 /* kill remainder if no retrys */
536                 if (!uptodate && blk_noretry_request(req))
537                         end_that_request_chunk(req, 0, leftover);
538                 else {
539                         if (requeue)
540                                 /*
541                                  * Bleah.  Leftovers again.  Stick the
542                                  * leftovers in the front of the
543                                  * queue, and goose the queue again.
544                                  */
545                                 scsi_requeue_command(q, cmd);
546
547                         return cmd;
548                 }
549         }
550
551         add_disk_randomness(req->rq_disk);
552
553         spin_lock_irqsave(q->queue_lock, flags);
554         if (blk_rq_tagged(req))
555                 blk_queue_end_tag(q, req);
556         end_that_request_last(req);
557         spin_unlock_irqrestore(q->queue_lock, flags);
558
559         /*
560          * This will goose the queue request function at the end, so we don't
561          * need to worry about launching another command.
562          */
563         scsi_next_command(cmd);
564         return NULL;
565 }
566
567 static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, int gfp_mask)
568 {
569         struct scsi_host_sg_pool *sgp;
570         struct scatterlist *sgl;
571
572         BUG_ON(!cmd->use_sg);
573
574         switch (cmd->use_sg) {
575         case 1 ... 8:
576                 cmd->sglist_len = 0;
577                 break;
578         case 9 ... 16:
579                 cmd->sglist_len = 1;
580                 break;
581         case 17 ... 32:
582                 cmd->sglist_len = 2;
583                 break;
584 #if (SCSI_MAX_PHYS_SEGMENTS > 32)
585         case 33 ... 64:
586                 cmd->sglist_len = 3;
587                 break;
588 #if (SCSI_MAX_PHYS_SEGMENTS > 64)
589         case 65 ... 128:
590                 cmd->sglist_len = 4;
591                 break;
592 #if (SCSI_MAX_PHYS_SEGMENTS  > 128)
593         case 129 ... 256:
594                 cmd->sglist_len = 5;
595                 break;
596 #endif
597 #endif
598 #endif
599         default:
600                 return NULL;
601         }
602
603         sgp = scsi_sg_pools + cmd->sglist_len;
604         sgl = mempool_alloc(sgp->pool, gfp_mask);
605         if (sgl)
606                 memset(sgl, 0, sgp->size);
607         return sgl;
608 }
609
610 static void scsi_free_sgtable(struct scatterlist *sgl, int index)
611 {
612         struct scsi_host_sg_pool *sgp;
613
614         BUG_ON(index > SG_MEMPOOL_NR);
615
616         sgp = scsi_sg_pools + index;
617         mempool_free(sgl, sgp->pool);
618 }
619
620 /*
621  * Function:    scsi_release_buffers()
622  *
623  * Purpose:     Completion processing for block device I/O requests.
624  *
625  * Arguments:   cmd     - command that we are bailing.
626  *
627  * Lock status: Assumed that no lock is held upon entry.
628  *
629  * Returns:     Nothing
630  *
631  * Notes:       In the event that an upper level driver rejects a
632  *              command, we must release resources allocated during
633  *              the __init_io() function.  Primarily this would involve
634  *              the scatter-gather table, and potentially any bounce
635  *              buffers.
636  */
637 static void scsi_release_buffers(struct scsi_cmnd *cmd)
638 {
639         struct request *req = cmd->request;
640
641         /*
642          * Free up any indirection buffers we allocated for DMA purposes. 
643          */
644         if (cmd->use_sg)
645                 scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len);
646         else if (cmd->request_buffer != req->buffer)
647                 kfree(cmd->request_buffer);
648
649         /*
650          * Zero these out.  They now point to freed memory, and it is
651          * dangerous to hang onto the pointers.
652          */
653         cmd->buffer  = NULL;
654         cmd->bufflen = 0;
655         cmd->request_buffer = NULL;
656         cmd->request_bufflen = 0;
657 }
658
659 /*
660  * Function:    scsi_io_completion()
661  *
662  * Purpose:     Completion processing for block device I/O requests.
663  *
664  * Arguments:   cmd   - command that is finished.
665  *
666  * Lock status: Assumed that no lock is held upon entry.
667  *
668  * Returns:     Nothing
669  *
670  * Notes:       This function is matched in terms of capabilities to
671  *              the function that created the scatter-gather list.
672  *              In other words, if there are no bounce buffers
673  *              (the normal case for most drivers), we don't need
674  *              the logic to deal with cleaning up afterwards.
675  *
676  *              We must do one of several things here:
677  *
678  *              a) Call scsi_end_request.  This will finish off the
679  *                 specified number of sectors.  If we are done, the
680  *                 command block will be released, and the queue
681  *                 function will be goosed.  If we are not done, then
682  *                 scsi_end_request will directly goose the queue.
683  *
684  *              b) We can just use scsi_requeue_command() here.  This would
685  *                 be used if we just wanted to retry, for example.
686  */
687 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes,
688                         unsigned int block_bytes)
689 {
690         int result = cmd->result;
691         int this_count = cmd->bufflen;
692         request_queue_t *q = cmd->device->request_queue;
693         struct request *req = cmd->request;
694         int clear_errors = 1;
695
696         /*
697          * Free up any indirection buffers we allocated for DMA purposes. 
698          * For the case of a READ, we need to copy the data out of the
699          * bounce buffer and into the real buffer.
700          */
701         if (cmd->use_sg)
702                 scsi_free_sgtable(cmd->buffer, cmd->sglist_len);
703         else if (cmd->buffer != req->buffer) {
704                 if (rq_data_dir(req) == READ) {
705                         unsigned long flags;
706                         char *to = bio_kmap_irq(req->bio, &flags);
707                         memcpy(to, cmd->buffer, cmd->bufflen);
708                         bio_kunmap_irq(to, &flags);
709                 }
710                 kfree(cmd->buffer);
711         }
712
713         if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
714                 req->errors = result;
715                 if (result) {
716                         clear_errors = 0;
717                         if (cmd->sense_buffer[0] & 0x70) {
718                                 int len = 8 + cmd->sense_buffer[7];
719
720                                 if (len > SCSI_SENSE_BUFFERSIZE)
721                                         len = SCSI_SENSE_BUFFERSIZE;
722                                 memcpy(req->sense, cmd->sense_buffer,  len);
723                                 req->sense_len = len;
724                         }
725                 } else
726                         req->data_len -= cmd->bufflen;
727         }
728
729         /*
730          * Zero these out.  They now point to freed memory, and it is
731          * dangerous to hang onto the pointers.
732          */
733         cmd->buffer  = NULL;
734         cmd->bufflen = 0;
735         cmd->request_buffer = NULL;
736         cmd->request_bufflen = 0;
737
738         /*
739          * Next deal with any sectors which we were able to correctly
740          * handle.
741          */
742         if (good_bytes >= 0) {
743                 SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n",
744                                               req->nr_sectors, good_bytes));
745                 SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg));
746
747                 if (clear_errors)
748                         req->errors = 0;
749                 /*
750                  * If multiple sectors are requested in one buffer, then
751                  * they will have been finished off by the first command.
752                  * If not, then we have a multi-buffer command.
753                  *
754                  * If block_bytes != 0, it means we had a medium error
755                  * of some sort, and that we want to mark some number of
756                  * sectors as not uptodate.  Thus we want to inhibit
757                  * requeueing right here - we will requeue down below
758                  * when we handle the bad sectors.
759                  */
760                 cmd = scsi_end_request(cmd, 1, good_bytes, result == 0);
761
762                 /*
763                  * If the command completed without error, then either finish off the
764                  * rest of the command, or start a new one.
765                  */
766                 if (result == 0 || cmd == NULL ) {
767                         return;
768                 }
769         }
770         /*
771          * Now, if we were good little boys and girls, Santa left us a request
772          * sense buffer.  We can extract information from this, so we
773          * can choose a block to remap, etc.
774          */
775         if (driver_byte(result) != 0) {
776                 if ((cmd->sense_buffer[0] & 0x7f) == 0x70) {
777                         /*
778                          * If the device is in the process of becoming ready,
779                          * retry.
780                          */
781                         if (cmd->sense_buffer[12] == 0x04 &&
782                             cmd->sense_buffer[13] == 0x01) {
783                                 scsi_requeue_command(q, cmd);
784                                 return;
785                         }
786                         if ((cmd->sense_buffer[2] & 0xf) == UNIT_ATTENTION) {
787                                 if (cmd->device->removable) {
788                                         /* detected disc change.  set a bit 
789                                          * and quietly refuse further access.
790                                          */
791                                         cmd->device->changed = 1;
792                                         cmd = scsi_end_request(cmd, 0,
793                                                         this_count, 1);
794                                         return;
795                                 } else {
796                                         /*
797                                         * Must have been a power glitch, or a
798                                         * bus reset.  Could not have been a
799                                         * media change, so we just retry the
800                                         * request and see what happens.  
801                                         */
802                                         scsi_requeue_command(q, cmd);
803                                         return;
804                                 }
805                         }
806                 }
807                 /*
808                  * If we had an ILLEGAL REQUEST returned, then we may have
809                  * performed an unsupported command.  The only thing this
810                  * should be would be a ten byte read where only a six byte
811                  * read was supported.  Also, on a system where READ CAPACITY
812                  * failed, we may have read past the end of the disk.
813                  */
814
815                 switch (cmd->sense_buffer[2]) {
816                 case ILLEGAL_REQUEST:
817                         if (cmd->device->use_10_for_rw &&
818                             (cmd->cmnd[0] == READ_10 ||
819                              cmd->cmnd[0] == WRITE_10)) {
820                                 cmd->device->use_10_for_rw = 0;
821                                 /*
822                                  * This will cause a retry with a 6-byte
823                                  * command.
824                                  */
825                                 scsi_requeue_command(q, cmd);
826                                 result = 0;
827                         } else {
828                                 cmd = scsi_end_request(cmd, 0, this_count, 1);
829                                 return;
830                         }
831                         break;
832                 case NOT_READY:
833                         printk(KERN_INFO "Device %s not ready.\n",
834                                req->rq_disk ? req->rq_disk->disk_name : "");
835                         cmd = scsi_end_request(cmd, 0, this_count, 1);
836                         return;
837                         break;
838                 case MEDIUM_ERROR:
839                 case VOLUME_OVERFLOW:
840                         printk("scsi%d: ERROR on channel %d, id %d, lun %d, CDB: ",
841                                cmd->device->host->host_no, (int) cmd->device->channel,
842                                (int) cmd->device->id, (int) cmd->device->lun);
843                         __scsi_print_command(cmd->data_cmnd);
844                         scsi_print_sense("", cmd);
845                         cmd = scsi_end_request(cmd, 0, block_bytes, 1);
846                         return;
847                 default:
848                         break;
849                 }
850         }                       /* driver byte != 0 */
851         if (host_byte(result) == DID_RESET) {
852                 /*
853                  * Third party bus reset or reset for error
854                  * recovery reasons.  Just retry the request
855                  * and see what happens.  
856                  */
857                 scsi_requeue_command(q, cmd);
858                 return;
859         }
860         if (result) {
861                 printk("SCSI error : <%d %d %d %d> return code = 0x%x\n",
862                        cmd->device->host->host_no,
863                        cmd->device->channel,
864                        cmd->device->id,
865                        cmd->device->lun, result);
866
867                 if (driver_byte(result) & DRIVER_SENSE)
868                         scsi_print_sense("", cmd);
869                 /*
870                  * Mark a single buffer as not uptodate.  Queue the remainder.
871                  * We sometimes get this cruft in the event that a medium error
872                  * isn't properly reported.
873                  */
874                 block_bytes = req->hard_cur_sectors << 9;
875                 if (!block_bytes)
876                         block_bytes = req->data_len;
877                 cmd = scsi_end_request(cmd, 0, block_bytes, 1);
878         }
879 }
880
881 /*
882  * Function:    scsi_init_io()
883  *
884  * Purpose:     SCSI I/O initialize function.
885  *
886  * Arguments:   cmd   - Command descriptor we wish to initialize
887  *
888  * Returns:     0 on success
889  *              BLKPREP_DEFER if the failure is retryable
890  *              BLKPREP_KILL if the failure is fatal
891  */
892 static int scsi_init_io(struct scsi_cmnd *cmd)
893 {
894         struct request     *req = cmd->request;
895         struct scatterlist *sgpnt;
896         int                count;
897
898         /*
899          * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
900          */
901         if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
902                 cmd->request_bufflen = req->data_len;
903                 cmd->request_buffer = req->data;
904                 req->buffer = req->data;
905                 cmd->use_sg = 0;
906                 return 0;
907         }
908
909         /*
910          * we used to not use scatter-gather for single segment request,
911          * but now we do (it makes highmem I/O easier to support without
912          * kmapping pages)
913          */
914         cmd->use_sg = req->nr_phys_segments;
915
916         /*
917          * if sg table allocation fails, requeue request later.
918          */
919         sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC);
920         if (unlikely(!sgpnt)) {
921                 req->flags |= REQ_SPECIAL;
922                 return BLKPREP_DEFER;
923         }
924
925         cmd->request_buffer = (char *) sgpnt;
926         cmd->request_bufflen = req->nr_sectors << 9;
927         if (blk_pc_request(req))
928                 cmd->request_bufflen = req->data_len;
929         req->buffer = NULL;
930
931         /* 
932          * Next, walk the list, and fill in the addresses and sizes of
933          * each segment.
934          */
935         count = blk_rq_map_sg(req->q, req, cmd->request_buffer);
936
937         /*
938          * mapped well, send it off
939          */
940         if (likely(count <= cmd->use_sg)) {
941                 cmd->use_sg = count;
942                 return 0;
943         }
944
945         printk(KERN_ERR "Incorrect number of segments after building list\n");
946         printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg);
947         printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors,
948                         req->current_nr_sectors);
949
950         /* release the command and kill it */
951         scsi_release_buffers(cmd);
952         scsi_put_command(cmd);
953         return BLKPREP_KILL;
954 }
955
956 static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk,
957                                sector_t *error_sector)
958 {
959         struct scsi_device *sdev = q->queuedata;
960         struct scsi_driver *drv;
961
962         if (sdev->sdev_state != SDEV_RUNNING)
963                 return -ENXIO;
964
965         drv = *(struct scsi_driver **) disk->private_data;
966         if (drv->issue_flush)
967                 return drv->issue_flush(&sdev->sdev_gendev, error_sector);
968
969         return -EOPNOTSUPP;
970 }
971
972 static int scsi_prep_fn(struct request_queue *q, struct request *req)
973 {
974         struct scsi_device *sdev = q->queuedata;
975         struct scsi_cmnd *cmd;
976         int specials_only = 0;
977
978         /*
979          * Just check to see if the device is online.  If it isn't, we
980          * refuse to process any commands.  The device must be brought
981          * online before trying any recovery commands
982          */
983         if (unlikely(!scsi_device_online(sdev))) {
984                 printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
985                        sdev->host->host_no, sdev->id, sdev->lun);
986                 return BLKPREP_KILL;
987         }
988         if (unlikely(sdev->sdev_state != SDEV_RUNNING)) {
989                 /* OK, we're not in a running state don't prep
990                  * user commands */
991                 if (sdev->sdev_state == SDEV_DEL) {
992                         /* Device is fully deleted, no commands
993                          * at all allowed down */
994                         printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to dead device\n",
995                                sdev->host->host_no, sdev->id, sdev->lun);
996                         return BLKPREP_KILL;
997                 }
998                 /* OK, we only allow special commands (i.e. not
999                  * user initiated ones */
1000                 specials_only = sdev->sdev_state;
1001         }
1002
1003         /*
1004          * Find the actual device driver associated with this command.
1005          * The SPECIAL requests are things like character device or
1006          * ioctls, which did not originate from ll_rw_blk.  Note that
1007          * the special field is also used to indicate the cmd for
1008          * the remainder of a partially fulfilled request that can 
1009          * come up when there is a medium error.  We have to treat
1010          * these two cases differently.  We differentiate by looking
1011          * at request->cmd, as this tells us the real story.
1012          */
1013         if (req->flags & REQ_SPECIAL) {
1014                 struct scsi_request *sreq = req->special;
1015
1016                 if (sreq->sr_magic == SCSI_REQ_MAGIC) {
1017                         cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC);
1018                         if (unlikely(!cmd))
1019                                 goto defer;
1020                         scsi_init_cmd_from_req(cmd, sreq);
1021                 } else
1022                         cmd = req->special;
1023         } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1024
1025                 if(unlikely(specials_only)) {
1026                         if(specials_only == SDEV_QUIESCE ||
1027                                         specials_only == SDEV_BLOCK)
1028                                 return BLKPREP_DEFER;
1029                         
1030                         printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to device being removed\n",
1031                                sdev->host->host_no, sdev->id, sdev->lun);
1032                         return BLKPREP_KILL;
1033                 }
1034                         
1035                         
1036                 /*
1037                  * Now try and find a command block that we can use.
1038                  */
1039                 if (!req->special) {
1040                         cmd = scsi_get_command(sdev, GFP_ATOMIC);
1041                         if (unlikely(!cmd))
1042                                 goto defer;
1043                 } else
1044                         cmd = req->special;
1045                 
1046                 /* pull a tag out of the request if we have one */
1047                 cmd->tag = req->tag;
1048         } else {
1049                 blk_dump_rq_flags(req, "SCSI bad req");
1050                 return BLKPREP_KILL;
1051         }
1052         
1053         /* note the overloading of req->special.  When the tag
1054          * is active it always means cmd.  If the tag goes
1055          * back for re-queueing, it may be reset */
1056         req->special = cmd;
1057         cmd->request = req;
1058         
1059         /*
1060          * FIXME: drop the lock here because the functions below
1061          * expect to be called without the queue lock held.  Also,
1062          * previously, we dequeued the request before dropping the
1063          * lock.  We hope REQ_STARTED prevents anything untoward from
1064          * happening now.
1065          */
1066         if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) {
1067                 struct scsi_driver *drv;
1068                 int ret;
1069
1070                 /*
1071                  * This will do a couple of things:
1072                  *  1) Fill in the actual SCSI command.
1073                  *  2) Fill in any other upper-level specific fields
1074                  * (timeout).
1075                  *
1076                  * If this returns 0, it means that the request failed
1077                  * (reading past end of disk, reading offline device,
1078                  * etc).   This won't actually talk to the device, but
1079                  * some kinds of consistency checking may cause the     
1080                  * request to be rejected immediately.
1081                  */
1082
1083                 /* 
1084                  * This sets up the scatter-gather table (allocating if
1085                  * required).
1086                  */
1087                 ret = scsi_init_io(cmd);
1088                 if (ret)        /* BLKPREP_KILL return also releases the command */
1089                         return ret;
1090                 
1091                 /*
1092                  * Initialize the actual SCSI command for this request.
1093                  */
1094                 drv = *(struct scsi_driver **)req->rq_disk->private_data;
1095                 if (unlikely(!drv->init_command(cmd))) {
1096                         scsi_release_buffers(cmd);
1097                         scsi_put_command(cmd);
1098                         return BLKPREP_KILL;
1099                 }
1100         }
1101
1102         /*
1103          * The request is now prepped, no need to come back here
1104          */
1105         req->flags |= REQ_DONTPREP;
1106         return BLKPREP_OK;
1107
1108  defer:
1109         /* If we defer, the elv_next_request() returns NULL, but the
1110          * queue must be restarted, so we plug here if no returning
1111          * command will automatically do that. */
1112         if (sdev->device_busy == 0)
1113                 blk_plug_device(q);
1114         return BLKPREP_DEFER;
1115 }
1116
1117 /*
1118  * scsi_dev_queue_ready: if we can send requests to sdev, return 1 else
1119  * return 0.
1120  *
1121  * Called with the queue_lock held.
1122  */
1123 static inline int scsi_dev_queue_ready(struct request_queue *q,
1124                                   struct scsi_device *sdev)
1125 {
1126         if (sdev->device_busy >= sdev->queue_depth)
1127                 return 0;
1128         if (sdev->device_busy == 0 && sdev->device_blocked) {
1129                 /*
1130                  * unblock after device_blocked iterates to zero
1131                  */
1132                 if (--sdev->device_blocked == 0) {
1133                         SCSI_LOG_MLQUEUE(3,
1134                                 printk("scsi%d (%d:%d) unblocking device at"
1135                                        " zero depth\n", sdev->host->host_no,
1136                                        sdev->id, sdev->lun));
1137                 } else {
1138                         blk_plug_device(q);
1139                         return 0;
1140                 }
1141         }
1142         if (sdev->device_blocked)
1143                 return 0;
1144
1145         return 1;
1146 }
1147
1148 /*
1149  * scsi_host_queue_ready: if we can send requests to shost, return 1 else
1150  * return 0. We must end up running the queue again whenever 0 is
1151  * returned, else IO can hang.
1152  *
1153  * Called with host_lock held.
1154  */
1155 static inline int scsi_host_queue_ready(struct request_queue *q,
1156                                    struct Scsi_Host *shost,
1157                                    struct scsi_device *sdev)
1158 {
1159         if (test_bit(SHOST_RECOVERY, &shost->shost_state))
1160                 return 0;
1161         if (shost->host_busy == 0 && shost->host_blocked) {
1162                 /*
1163                  * unblock after host_blocked iterates to zero
1164                  */
1165                 if (--shost->host_blocked == 0) {
1166                         SCSI_LOG_MLQUEUE(3,
1167                                 printk("scsi%d unblocking host at zero depth\n",
1168                                         shost->host_no));
1169                 } else {
1170                         blk_plug_device(q);
1171                         return 0;
1172                 }
1173         }
1174         if ((shost->can_queue > 0 && shost->host_busy >= shost->can_queue) ||
1175             shost->host_blocked || shost->host_self_blocked) {
1176                 if (list_empty(&sdev->starved_entry))
1177                         list_add_tail(&sdev->starved_entry, &shost->starved_list);
1178                 return 0;
1179         }
1180
1181         /* We're OK to process the command, so we can't be starved */
1182         if (!list_empty(&sdev->starved_entry))
1183                 list_del_init(&sdev->starved_entry);
1184
1185         return 1;
1186 }
1187
1188 /*
1189  * Function:    scsi_request_fn()
1190  *
1191  * Purpose:     Main strategy routine for SCSI.
1192  *
1193  * Arguments:   q       - Pointer to actual queue.
1194  *
1195  * Returns:     Nothing
1196  *
1197  * Lock status: IO request lock assumed to be held when called.
1198  */
1199 static void scsi_request_fn(struct request_queue *q)
1200 {
1201         struct scsi_device *sdev = q->queuedata;
1202         struct Scsi_Host *shost = sdev->host;
1203         struct scsi_cmnd *cmd;
1204         struct request *req;
1205
1206         if(!get_device(&sdev->sdev_gendev))
1207                 /* We must be tearing the block queue down already */
1208                 return;
1209
1210         /*
1211          * To start with, we keep looping until the queue is empty, or until
1212          * the host is no longer able to accept any more requests.
1213          */
1214         while (!blk_queue_plugged(q)) {
1215                 int rtn;
1216                 /*
1217                  * get next queueable request.  We do this early to make sure
1218                  * that the request is fully prepared even if we cannot 
1219                  * accept it.
1220                  */
1221                 req = elv_next_request(q);
1222                 if (!req || !scsi_dev_queue_ready(q, sdev))
1223                         break;
1224
1225                 if (unlikely(!scsi_device_online(sdev))) {
1226                         printk(KERN_ERR "scsi%d (%d:%d): rejecting I/O to offline device\n",
1227                                sdev->host->host_no, sdev->id, sdev->lun);
1228                         blkdev_dequeue_request(req);
1229                         req->flags |= REQ_QUIET;
1230                         while (end_that_request_first(req, 0, req->nr_sectors))
1231                                 ;
1232                         end_that_request_last(req);
1233                         continue;
1234                 }
1235
1236
1237                 /*
1238                  * Remove the request from the request list.
1239                  */
1240                 if (!(blk_queue_tagged(q) && !blk_queue_start_tag(q, req)))
1241                         blkdev_dequeue_request(req);
1242                 sdev->device_busy++;
1243
1244                 spin_unlock(q->queue_lock);
1245                 spin_lock(shost->host_lock);
1246
1247                 if (!scsi_host_queue_ready(q, shost, sdev))
1248                         goto not_ready;
1249                 if (sdev->single_lun) {
1250                         if (scsi_target(sdev)->starget_sdev_user &&
1251                             scsi_target(sdev)->starget_sdev_user != sdev)
1252                                 goto not_ready;
1253                         scsi_target(sdev)->starget_sdev_user = sdev;
1254                 }
1255                 shost->host_busy++;
1256
1257                 /*
1258                  * XXX(hch): This is rather suboptimal, scsi_dispatch_cmd will
1259                  *              take the lock again.
1260                  */
1261                 spin_unlock_irq(shost->host_lock);
1262
1263                 cmd = req->special;
1264                 if (unlikely(cmd == NULL)) {
1265                         printk(KERN_CRIT "impossible request in %s.\n"
1266                                          "please mail a stack trace to "
1267                                          "linux-scsi@vger.kernel.org",
1268                                          __FUNCTION__);
1269                         BUG();
1270                 }
1271
1272                 /*
1273                  * Finally, initialize any error handling parameters, and set up
1274                  * the timers for timeouts.
1275                  */
1276                 scsi_init_cmd_errh(cmd);
1277
1278                 /*
1279                  * Dispatch the command to the low-level driver.
1280                  */
1281                 rtn = scsi_dispatch_cmd(cmd);
1282                 spin_lock_irq(q->queue_lock);
1283                 if(rtn) {
1284                         /* we're refusing the command; because of
1285                          * the way locks get dropped, we need to 
1286                          * check here if plugging is required */
1287                         if(sdev->device_busy == 0)
1288                                 blk_plug_device(q);
1289
1290                         break;
1291                 }
1292         }
1293
1294         goto out;
1295
1296  not_ready:
1297         spin_unlock_irq(shost->host_lock);
1298
1299         /*
1300          * lock q, handle tag, requeue req, and decrement device_busy. We
1301          * must return with queue_lock held.
1302          *
1303          * Decrementing device_busy without checking it is OK, as all such
1304          * cases (host limits or settings) should run the queue at some
1305          * later time.
1306          */
1307         spin_lock_irq(q->queue_lock);
1308         blk_requeue_request(q, req);
1309         sdev->device_busy--;
1310         if(sdev->device_busy == 0)
1311                 blk_plug_device(q);
1312  out:
1313         /* must be careful here...if we trigger the ->remove() function
1314          * we cannot be holding the q lock */
1315         spin_unlock_irq(q->queue_lock);
1316         put_device(&sdev->sdev_gendev);
1317         spin_lock_irq(q->queue_lock);
1318 }
1319
1320 u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
1321 {
1322         struct device *host_dev;
1323         u64 bounce_limit = 0xffffffff;
1324
1325         if (shost->unchecked_isa_dma)
1326                 return BLK_BOUNCE_ISA;
1327         /*
1328          * Platforms with virtual-DMA translation
1329          * hardware have no practical limit.
1330          */
1331         if (!PCI_DMA_BUS_IS_PHYS)
1332                 return BLK_BOUNCE_ANY;
1333
1334         host_dev = scsi_get_device(shost);
1335         if (host_dev && host_dev->dma_mask)
1336                 bounce_limit = *host_dev->dma_mask;
1337
1338         return bounce_limit;
1339 }
1340
1341 struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
1342 {
1343         struct Scsi_Host *shost = sdev->host;
1344         struct request_queue *q;
1345
1346         q = blk_init_queue(scsi_request_fn, &sdev->sdev_lock);
1347         if (!q)
1348                 return NULL;
1349
1350         blk_queue_prep_rq(q, scsi_prep_fn);
1351
1352         blk_queue_max_hw_segments(q, shost->sg_tablesize);
1353         blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS);
1354         blk_queue_max_sectors(q, shost->max_sectors);
1355         blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost));
1356         blk_queue_segment_boundary(q, shost->dma_boundary);
1357         blk_queue_issue_flush_fn(q, scsi_issue_flush_fn);
1358
1359         if (!shost->use_clustering)
1360                 clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
1361         return q;
1362 }
1363
1364 void scsi_free_queue(struct request_queue *q)
1365 {
1366         blk_cleanup_queue(q);
1367 }
1368
1369 /*
1370  * Function:    scsi_block_requests()
1371  *
1372  * Purpose:     Utility function used by low-level drivers to prevent further
1373  *              commands from being queued to the device.
1374  *
1375  * Arguments:   shost       - Host in question
1376  *
1377  * Returns:     Nothing
1378  *
1379  * Lock status: No locks are assumed held.
1380  *
1381  * Notes:       There is no timer nor any other means by which the requests
1382  *              get unblocked other than the low-level driver calling
1383  *              scsi_unblock_requests().
1384  */
1385 void scsi_block_requests(struct Scsi_Host *shost)
1386 {
1387         shost->host_self_blocked = 1;
1388 }
1389
1390 /*
1391  * Function:    scsi_unblock_requests()
1392  *
1393  * Purpose:     Utility function used by low-level drivers to allow further
1394  *              commands from being queued to the device.
1395  *
1396  * Arguments:   shost       - Host in question
1397  *
1398  * Returns:     Nothing
1399  *
1400  * Lock status: No locks are assumed held.
1401  *
1402  * Notes:       There is no timer nor any other means by which the requests
1403  *              get unblocked other than the low-level driver calling
1404  *              scsi_unblock_requests().
1405  *
1406  *              This is done as an API function so that changes to the
1407  *              internals of the scsi mid-layer won't require wholesale
1408  *              changes to drivers that use this feature.
1409  */
1410 void scsi_unblock_requests(struct Scsi_Host *shost)
1411 {
1412         shost->host_self_blocked = 0;
1413         scsi_run_host_queues(shost);
1414 }
1415
1416 int __init scsi_init_queue(void)
1417 {
1418         int i;
1419
1420         for (i = 0; i < SG_MEMPOOL_NR; i++) {
1421                 struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1422                 int size = sgp->size * sizeof(struct scatterlist);
1423
1424                 sgp->slab = kmem_cache_create(sgp->name, size, 0,
1425                                 SLAB_HWCACHE_ALIGN, NULL, NULL);
1426                 if (!sgp->slab) {
1427                         printk(KERN_ERR "SCSI: can't init sg slab %s\n",
1428                                         sgp->name);
1429                 }
1430
1431                 sgp->pool = mempool_create(SG_MEMPOOL_SIZE,
1432                                 mempool_alloc_slab, mempool_free_slab,
1433                                 sgp->slab);
1434                 if (!sgp->pool) {
1435                         printk(KERN_ERR "SCSI: can't init sg mempool %s\n",
1436                                         sgp->name);
1437                 }
1438         }
1439
1440         return 0;
1441 }
1442
1443 void scsi_exit_queue(void)
1444 {
1445         int i;
1446
1447         for (i = 0; i < SG_MEMPOOL_NR; i++) {
1448                 struct scsi_host_sg_pool *sgp = scsi_sg_pools + i;
1449                 mempool_destroy(sgp->pool);
1450                 kmem_cache_destroy(sgp->slab);
1451         }
1452 }
1453 /**
1454  *      __scsi_mode_sense - issue a mode sense, falling back from 10 to 
1455  *              six bytes if necessary.
1456  *      @sreq:  SCSI request to fill in with the MODE_SENSE
1457  *      @dbd:   set if mode sense will allow block descriptors to be returned
1458  *      @modepage: mode page being requested
1459  *      @buffer: request buffer (may not be smaller than eight bytes)
1460  *      @len:   length of request buffer.
1461  *      @timeout: command timeout
1462  *      @retries: number of retries before failing
1463  *      @data: returns a structure abstracting the mode header data
1464  *
1465  *      Returns zero if unsuccessful, or the header offset (either 4
1466  *      or 8 depending on whether a six or ten byte command was
1467  *      issued) if successful.
1468  **/
1469 int
1470 __scsi_mode_sense(struct scsi_request *sreq, int dbd, int modepage,
1471                   unsigned char *buffer, int len, int timeout, int retries,
1472                   struct scsi_mode_data *data) {
1473         unsigned char cmd[12];
1474         int use_10_for_ms;
1475         int header_length;
1476
1477         memset(data, 0, sizeof(*data));
1478         memset(&cmd[0], 0, 12);
1479         cmd[1] = dbd & 0x18;    /* allows DBD and LLBA bits */
1480         cmd[2] = modepage;
1481
1482  retry:
1483         use_10_for_ms = sreq->sr_device->use_10_for_ms;
1484
1485         if (use_10_for_ms) {
1486                 if (len < 8)
1487                         len = 8;
1488
1489                 cmd[0] = MODE_SENSE_10;
1490                 cmd[8] = len;
1491                 header_length = 8;
1492         } else {
1493                 if (len < 4)
1494                         len = 4;
1495
1496                 cmd[0] = MODE_SENSE;
1497                 cmd[4] = len;
1498                 header_length = 4;
1499         }
1500
1501         sreq->sr_cmd_len = 0;
1502         sreq->sr_sense_buffer[0] = 0;
1503         sreq->sr_sense_buffer[2] = 0;
1504         sreq->sr_data_direction = DMA_FROM_DEVICE;
1505
1506         memset(buffer, 0, len);
1507
1508         scsi_wait_req(sreq, cmd, buffer, len, timeout, retries);
1509
1510         /* This code looks awful: what it's doing is making sure an
1511          * ILLEGAL REQUEST sense return identifies the actual command
1512          * byte as the problem.  MODE_SENSE commands can return
1513          * ILLEGAL REQUEST if the code page isn't supported */
1514         if (use_10_for_ms && ! scsi_status_is_good(sreq->sr_result) &&
1515             (driver_byte(sreq->sr_result) & DRIVER_SENSE) &&
1516             sreq->sr_sense_buffer[2] == ILLEGAL_REQUEST &&
1517             (sreq->sr_sense_buffer[4] & 0x40) == 0x40 &&
1518             sreq->sr_sense_buffer[5] == 0 &&
1519             sreq->sr_sense_buffer[6] == 0 ) {
1520                 sreq->sr_device->use_10_for_ms = 0;
1521                 goto retry;
1522         }
1523
1524         if(scsi_status_is_good(sreq->sr_result)) {
1525                 data->header_length = header_length;
1526                 if(use_10_for_ms) {
1527                         data->length = buffer[0]*256 + buffer[1] + 2;
1528                         data->medium_type = buffer[2];
1529                         data->device_specific = buffer[3];
1530                         data->longlba = buffer[4] & 0x01;
1531                         data->block_descriptor_length = buffer[6]*256
1532                                 + buffer[7];
1533                 } else {
1534                         data->length = buffer[0] + 1;
1535                         data->medium_type = buffer[1];
1536                         data->device_specific = buffer[2];
1537                         data->block_descriptor_length = buffer[3];
1538                 }
1539         }
1540
1541         return sreq->sr_result;
1542 }
1543
1544 /**
1545  *      scsi_mode_sense - issue a mode sense, falling back from 10 to 
1546  *              six bytes if necessary.
1547  *      @sdev:  scsi device to send command to.
1548  *      @dbd:   set if mode sense will disable block descriptors in the return
1549  *      @modepage: mode page being requested
1550  *      @buffer: request buffer (may not be smaller than eight bytes)
1551  *      @len:   length of request buffer.
1552  *      @timeout: command timeout
1553  *      @retries: number of retries before failing
1554  *
1555  *      Returns zero if unsuccessful, or the header offset (either 4
1556  *      or 8 depending on whether a six or ten byte command was
1557  *      issued) if successful.
1558  **/
1559 int
1560 scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage,
1561                 unsigned char *buffer, int len, int timeout, int retries,
1562                 struct scsi_mode_data *data)
1563 {
1564         struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1565         int ret;
1566
1567         if (!sreq)
1568                 return -1;
1569
1570         ret = __scsi_mode_sense(sreq, dbd, modepage, buffer, len,
1571                                 timeout, retries, data);
1572
1573         scsi_release_request(sreq);
1574
1575         return ret;
1576 }
1577
1578 int
1579 scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries)
1580 {
1581         struct scsi_request *sreq;
1582         char cmd[] = {
1583                 TEST_UNIT_READY, 0, 0, 0, 0, 0,
1584         };
1585         int result;
1586         
1587         sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1588         if (!sreq)
1589                 return -ENOMEM;
1590
1591         sreq->sr_data_direction = DMA_NONE;
1592         scsi_wait_req(sreq, cmd, NULL, 0, timeout, retries);
1593
1594         if ((driver_byte(sreq->sr_result) & DRIVER_SENSE) &&
1595             ((sreq->sr_sense_buffer[2] & 0x0f) == UNIT_ATTENTION ||
1596              (sreq->sr_sense_buffer[2] & 0x0f) == NOT_READY) &&
1597             sdev->removable) {
1598                 sdev->changed = 1;
1599                 sreq->sr_result = 0;
1600         }
1601         result = sreq->sr_result;
1602         scsi_release_request(sreq);
1603         return result;
1604 }
1605 EXPORT_SYMBOL(scsi_test_unit_ready);
1606
1607 /**
1608  *      scsi_device_set_state - Take the given device through the device
1609  *              state model.
1610  *      @sdev:  scsi device to change the state of.
1611  *      @state: state to change to.
1612  *
1613  *      Returns zero if unsuccessful or an error if the requested 
1614  *      transition is illegal.
1615  **/
1616 int
1617 scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
1618 {
1619         enum scsi_device_state oldstate = sdev->sdev_state;
1620
1621         if (state == oldstate)
1622                 return 0;
1623
1624         switch (state) {
1625         case SDEV_CREATED:
1626                 /* There are no legal states that come back to
1627                  * created.  This is the manually initialised start
1628                  * state */
1629                 goto illegal;
1630                         
1631         case SDEV_RUNNING:
1632                 switch (oldstate) {
1633                 case SDEV_CREATED:
1634                 case SDEV_OFFLINE:
1635                 case SDEV_QUIESCE:
1636                 case SDEV_BLOCK:
1637                         break;
1638                 default:
1639                         goto illegal;
1640                 }
1641                 break;
1642
1643         case SDEV_QUIESCE:
1644                 switch (oldstate) {
1645                 case SDEV_RUNNING:
1646                 case SDEV_OFFLINE:
1647                         break;
1648                 default:
1649                         goto illegal;
1650                 }
1651                 break;
1652
1653         case SDEV_OFFLINE:
1654                 switch (oldstate) {
1655                 case SDEV_CREATED:
1656                 case SDEV_RUNNING:
1657                 case SDEV_QUIESCE:
1658                 case SDEV_BLOCK:
1659                         break;
1660                 default:
1661                         goto illegal;
1662                 }
1663                 break;
1664
1665         case SDEV_BLOCK:
1666                 switch (oldstate) {
1667                 case SDEV_CREATED:
1668                 case SDEV_RUNNING:
1669                         break;
1670                 default:
1671                         goto illegal;
1672                 }
1673                 break;
1674
1675         case SDEV_CANCEL:
1676                 switch (oldstate) {
1677                 case SDEV_CREATED:
1678                 case SDEV_RUNNING:
1679                 case SDEV_OFFLINE:
1680                 case SDEV_BLOCK:
1681                         break;
1682                 default:
1683                         goto illegal;
1684                 }
1685                 break;
1686
1687         case SDEV_DEL:
1688                 switch (oldstate) {
1689                 case SDEV_CANCEL:
1690                         break;
1691                 default:
1692                         goto illegal;
1693                 }
1694                 break;
1695
1696         }
1697         sdev->sdev_state = state;
1698         return 0;
1699
1700  illegal:
1701         dev_printk(KERN_ERR, &sdev->sdev_gendev,
1702                    "Illegal state transition %s->%s\n",
1703                    scsi_device_state_name(oldstate),
1704                    scsi_device_state_name(state));
1705         WARN_ON(1);
1706         return -EINVAL;
1707 }
1708 EXPORT_SYMBOL(scsi_device_set_state);
1709
1710 /**
1711  *      scsi_device_quiesce - Block user issued commands.
1712  *      @sdev:  scsi device to quiesce.
1713  *
1714  *      This works by trying to transition to the SDEV_QUIESCE state
1715  *      (which must be a legal transition).  When the device is in this
1716  *      state, only special requests will be accepted, all others will
1717  *      be deferred.  Since special requests may also be requeued requests,
1718  *      a successful return doesn't guarantee the device will be 
1719  *      totally quiescent.
1720  *
1721  *      Must be called with user context, may sleep.
1722  *
1723  *      Returns zero if unsuccessful or an error if not.
1724  **/
1725 int
1726 scsi_device_quiesce(struct scsi_device *sdev)
1727 {
1728         int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
1729         if (err)
1730                 return err;
1731
1732         scsi_run_queue(sdev->request_queue);
1733         while (sdev->device_busy) {
1734                 schedule_timeout(HZ/5);
1735                 scsi_run_queue(sdev->request_queue);
1736         }
1737         return 0;
1738 }
1739 EXPORT_SYMBOL(scsi_device_quiesce);
1740
1741 /**
1742  *      scsi_device_resume - Restart user issued commands to a quiesced device.
1743  *      @sdev:  scsi device to resume.
1744  *
1745  *      Moves the device from quiesced back to running and restarts the
1746  *      queues.
1747  *
1748  *      Must be called with user context, may sleep.
1749  **/
1750 void
1751 scsi_device_resume(struct scsi_device *sdev)
1752 {
1753         if(scsi_device_set_state(sdev, SDEV_RUNNING))
1754                 return;
1755         scsi_run_queue(sdev->request_queue);
1756 }
1757 EXPORT_SYMBOL(scsi_device_resume);
1758
1759 static int
1760 device_quiesce_fn(struct device *dev, void *data)
1761 {
1762         scsi_device_quiesce(to_scsi_device(dev));
1763         return 0;
1764 }
1765
1766 void
1767 scsi_target_quiesce(struct scsi_target *starget)
1768 {
1769         device_for_each_child(&starget->dev, NULL, device_quiesce_fn);
1770 }
1771 EXPORT_SYMBOL(scsi_target_quiesce);
1772
1773 static int
1774 device_resume_fn(struct device *dev, void *data)
1775 {
1776         scsi_device_resume(to_scsi_device(dev));
1777         return 0;
1778 }
1779
1780 void
1781 scsi_target_resume(struct scsi_target *starget)
1782 {
1783         device_for_each_child(&starget->dev, NULL, device_resume_fn);
1784 }
1785 EXPORT_SYMBOL(scsi_target_resume);
1786
1787 /**
1788  * scsi_internal_device_block - internal function to put a device
1789  *                              temporarily into the SDEV_BLOCK state
1790  * @sdev:       device to block
1791  *
1792  * Block request made by scsi lld's to temporarily stop all
1793  * scsi commands on the specified device.  Called from interrupt
1794  * or normal process context.
1795  *
1796  * Returns zero if successful or error if not
1797  *
1798  * Notes:       
1799  *      This routine transitions the device to the SDEV_BLOCK state
1800  *      (which must be a legal transition).  When the device is in this
1801  *      state, all commands are deferred until the scsi lld reenables
1802  *      the device with scsi_device_unblock or device_block_tmo fires.
1803  *      This routine assumes the host_lock is held on entry.
1804  *
1805  *      As the LLDD/Transport that is calling this function doesn't
1806  *      actually know what the device state is, the function may be
1807  *      called at an inappropriate time. Therefore, before requesting
1808  *      the state change, the function validates that the transition is
1809  *      valid.
1810  **/
1811 int
1812 scsi_internal_device_block(struct scsi_device *sdev)
1813 {
1814         request_queue_t *q = sdev->request_queue;
1815         unsigned long flags;
1816         int err = 0;
1817
1818         if ((sdev->sdev_state != SDEV_CREATED) &&
1819             (sdev->sdev_state != SDEV_RUNNING))
1820                 return 0;
1821
1822         err = scsi_device_set_state(sdev, SDEV_BLOCK);
1823         if (err)
1824                 return err;
1825
1826         /* 
1827          * The device has transitioned to SDEV_BLOCK.  Stop the
1828          * block layer from calling the midlayer with this device's
1829          * request queue. 
1830          */
1831         spin_lock_irqsave(q->queue_lock, flags);
1832         blk_stop_queue(q);
1833         spin_unlock_irqrestore(q->queue_lock, flags);
1834
1835         return 0;
1836 }
1837 EXPORT_SYMBOL_GPL(scsi_internal_device_block);
1838  
1839 /**
1840  * scsi_internal_device_unblock - resume a device after a block request
1841  * @sdev:       device to resume
1842  *
1843  * Called by scsi lld's or the midlayer to restart the device queue
1844  * for the previously suspended scsi device.  Called from interrupt or
1845  * normal process context.
1846  *
1847  * Returns zero if successful or error if not.
1848  *
1849  * Notes:       
1850  *      This routine transitions the device to the SDEV_RUNNING state
1851  *      (which must be a legal transition) allowing the midlayer to
1852  *      goose the queue for this device.  This routine assumes the 
1853  *      host_lock is held upon entry.
1854  *
1855  *      As the LLDD/Transport that is calling this function doesn't
1856  *      actually know what the device state is, the function may be
1857  *      called at an inappropriate time. Therefore, before requesting
1858  *      the state change, the function validates that the transition is
1859  *      valid.
1860  **/
1861 int
1862 scsi_internal_device_unblock(struct scsi_device *sdev)
1863 {
1864         request_queue_t *q = sdev->request_queue; 
1865         int err;
1866         unsigned long flags;
1867         
1868         if (sdev->sdev_state != SDEV_BLOCK)
1869                 return 0;
1870         
1871         /* 
1872          * Try to transition the scsi device to SDEV_RUNNING
1873          * and goose the device queue if successful.  
1874          */
1875         err = scsi_device_set_state(sdev, SDEV_RUNNING);
1876         if (err)
1877                 return err;
1878
1879         spin_lock_irqsave(q->queue_lock, flags);
1880         blk_start_queue(q);
1881         spin_unlock_irqrestore(q->queue_lock, flags);
1882
1883         return 0;
1884 }
1885 EXPORT_SYMBOL_GPL(scsi_internal_device_unblock);