ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / drivers / scsi / scsi_error.c
1 /*
2  *  scsi_error.c Copyright (C) 1997 Eric Youngdale
3  *
4  *  SCSI error/timeout handling
5  *      Initial versions: Eric Youngdale.  Based upon conversations with
6  *                        Leonard Zubkoff and David Miller at Linux Expo, 
7  *                        ideas originating from all over the place.
8  *
9  *      Restructured scsi_unjam_host and associated functions.
10  *      September 04, 2002 Mike Anderson (andmike@us.ibm.com)
11  *
12  *      Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
13  *      minor  cleanups.
14  *      September 30, 2002 Mike Anderson (andmike@us.ibm.com)
15  */
16
17 #include <linux/module.h>
18 #include <linux/sched.h>
19 #include <linux/timer.h>
20 #include <linux/string.h>
21 #include <linux/slab.h>
22 #include <linux/kernel.h>
23 #include <linux/interrupt.h>
24 #include <linux/blkdev.h>
25 #include <linux/smp_lock.h>
26 #include <scsi/scsi_ioctl.h>
27
28 #include "scsi.h"
29 #include "hosts.h"
30
31 #include "scsi_priv.h"
32 #include "scsi_logging.h"
33
34 #ifdef DEBUG
35 #define SENSE_TIMEOUT SCSI_TIMEOUT
36 #else
37 #define SENSE_TIMEOUT (10*HZ)
38 #endif
39
40 #define START_UNIT_TIMEOUT (30*HZ)
41
42 /*
43  * These should *probably* be handled by the host itself.
44  * Since it is allowed to sleep, it probably should.
45  */
46 #define BUS_RESET_SETTLE_TIME   10*HZ
47 #define HOST_RESET_SETTLE_TIME  10*HZ
48
49 /* called with shost->host_lock held */
50 void scsi_eh_wakeup(struct Scsi_Host *shost)
51 {
52         if (shost->host_busy == shost->host_failed) {
53                 up(shost->eh_wait);
54                 SCSI_LOG_ERROR_RECOVERY(5,
55                                 printk("Waking error handler thread\n"));
56         }
57 }
58
59 /**
60  * scsi_eh_scmd_add - add scsi cmd to error handling.
61  * @scmd:       scmd to run eh on.
62  * @eh_flag:    optional SCSI_EH flag.
63  *
64  * Return value:
65  *      0 on failure.
66  **/
67 int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
68 {
69         struct Scsi_Host *shost = scmd->device->host;
70         unsigned long flags;
71
72         if (shost->eh_wait == NULL)
73                 return 0;
74
75         spin_lock_irqsave(shost->host_lock, flags);
76
77         scsi_eh_eflags_set(scmd, eh_flag);
78         /*
79          * FIXME: Can we stop setting owner and state.
80          */
81         scmd->owner = SCSI_OWNER_ERROR_HANDLER;
82         scmd->state = SCSI_STATE_FAILED;
83         /*
84          * Set the serial_number_at_timeout to the current
85          * serial_number
86          */
87         scmd->serial_number_at_timeout = scmd->serial_number;
88         list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
89         set_bit(SHOST_RECOVERY, &shost->shost_state);
90         shost->host_failed++;
91         scsi_eh_wakeup(shost);
92         spin_unlock_irqrestore(shost->host_lock, flags);
93         return 1;
94 }
95
96 /**
97  * scsi_add_timer - Start timeout timer for a single scsi command.
98  * @scmd:       scsi command that is about to start running.
99  * @timeout:    amount of time to allow this command to run.
100  * @complete:   timeout function to call if timer isn't canceled.
101  *
102  * Notes:
103  *    This should be turned into an inline function.  Each scsi command
104  *    has its own timer, and as it is added to the queue, we set up the
105  *    timer.  When the command completes, we cancel the timer.
106  **/
107 void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
108                     void (*complete)(struct scsi_cmnd *))
109 {
110
111         /*
112          * If the clock was already running for this command, then
113          * first delete the timer.  The timer handling code gets rather
114          * confused if we don't do this.
115          */
116         if (scmd->eh_timeout.function)
117                 del_timer(&scmd->eh_timeout);
118
119         scmd->eh_timeout.data = (unsigned long)scmd;
120         scmd->eh_timeout.expires = jiffies + timeout;
121         scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
122
123         SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
124                                           " %d, (%p)\n", __FUNCTION__,
125                                           scmd, timeout, complete));
126
127         add_timer(&scmd->eh_timeout);
128 }
129
130 /**
131  * scsi_delete_timer - Delete/cancel timer for a given function.
132  * @scmd:       Cmd that we are canceling timer for
133  *
134  * Notes:
135  *     This should be turned into an inline function.
136  *
137  * Return value:
138  *     1 if we were able to detach the timer.  0 if we blew it, and the
139  *     timer function has already started to run.
140  **/
141 int scsi_delete_timer(struct scsi_cmnd *scmd)
142 {
143         int rtn;
144
145         rtn = del_timer(&scmd->eh_timeout);
146
147         SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
148                                          " rtn: %d\n", __FUNCTION__,
149                                          scmd, rtn));
150
151         scmd->eh_timeout.data = (unsigned long)NULL;
152         scmd->eh_timeout.function = NULL;
153
154         return rtn;
155 }
156
157 /**
158  * scsi_times_out - Timeout function for normal scsi commands.
159  * @scmd:       Cmd that is timing out.
160  *
161  * Notes:
162  *     We do not need to lock this.  There is the potential for a race
163  *     only in that the normal completion handling might run, but if the
164  *     normal completion function determines that the timer has already
165  *     fired, then it mustn't do anything.
166  **/
167 void scsi_times_out(struct scsi_cmnd *scmd)
168 {
169         scsi_log_completion(scmd, TIMEOUT_ERROR);
170         if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
171                 panic("Error handler thread not present at %p %p %s %d",
172                       scmd, scmd->device->host, __FILE__, __LINE__);
173         }
174 }
175
176 /**
177  * scsi_block_when_processing_errors - Prevent cmds from being queued.
178  * @sdev:       Device on which we are performing recovery.
179  *
180  * Description:
181  *     We block until the host is out of error recovery, and then check to
182  *     see whether the host or the device is offline.
183  *
184  * Return value:
185  *     0 when dev was taken offline by error recovery. 1 OK to proceed.
186  **/
187 int scsi_block_when_processing_errors(struct scsi_device *sdev)
188 {
189         int online;
190
191         wait_event(sdev->host->host_wait, (!test_bit(SHOST_RECOVERY, &sdev->host->shost_state)));
192
193         online = scsi_device_online(sdev);
194
195         SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
196                                           online));
197
198         return online;
199 }
200
201 #ifdef CONFIG_SCSI_LOGGING
202 /**
203  * scsi_eh_prt_fail_stats - Log info on failures.
204  * @shost:      scsi host being recovered.
205  * @work_q:     Queue of scsi cmds to process.
206  **/
207 static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
208                                           struct list_head *work_q)
209 {
210         struct scsi_cmnd *scmd;
211         struct scsi_device *sdev;
212         int total_failures = 0;
213         int cmd_failed = 0;
214         int cmd_cancel = 0;
215         int devices_failed = 0;
216
217         shost_for_each_device(sdev, shost) {
218                 list_for_each_entry(scmd, work_q, eh_entry) {
219                         if (scmd->device == sdev) {
220                                 ++total_failures;
221                                 if (scsi_eh_eflags_chk(scmd,
222                                                        SCSI_EH_CANCEL_CMD))
223                                         ++cmd_cancel;
224                                 else 
225                                         ++cmd_failed;
226                         }
227                 }
228
229                 if (cmd_cancel || cmd_failed) {
230                         SCSI_LOG_ERROR_RECOVERY(3,
231                                 printk("%s: %d:%d:%d:%d cmds failed: %d,"
232                                        " cancel: %d\n",
233                                        __FUNCTION__, shost->host_no,
234                                        sdev->channel, sdev->id, sdev->lun,
235                                        cmd_failed, cmd_cancel));
236                         cmd_cancel = 0;
237                         cmd_failed = 0;
238                         ++devices_failed;
239                 }
240         }
241
242         SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
243                                           " devices require eh work\n",
244                                   total_failures, devices_failed));
245 }
246 #endif
247
248 /**
249  * scsi_check_sense - Examine scsi cmd sense
250  * @scmd:       Cmd to have sense checked.
251  *
252  * Return value:
253  *      SUCCESS or FAILED or NEEDS_RETRY
254  **/
255 static int scsi_check_sense(struct scsi_cmnd *scmd)
256 {
257         if (!SCSI_SENSE_VALID(scmd))
258                 return FAILED;
259
260         if (scmd->sense_buffer[2] & 0xe0)
261                 return SUCCESS;
262
263         switch (scmd->sense_buffer[2] & 0xf) {
264         case NO_SENSE:
265                 return SUCCESS;
266         case RECOVERED_ERROR:
267                 return /* soft_error */ SUCCESS;
268
269         case ABORTED_COMMAND:
270                 return NEEDS_RETRY;
271         case NOT_READY:
272         case UNIT_ATTENTION:
273                 /*
274                  * if we are expecting a cc/ua because of a bus reset that we
275                  * performed, treat this just as a retry.  otherwise this is
276                  * information that we should pass up to the upper-level driver
277                  * so that we can deal with it there.
278                  */
279                 if (scmd->device->expecting_cc_ua) {
280                         scmd->device->expecting_cc_ua = 0;
281                         return NEEDS_RETRY;
282                 }
283                 /*
284                  * if the device is in the process of becoming ready, we 
285                  * should retry.
286                  */
287                 if ((scmd->sense_buffer[12] == 0x04) &&
288                         (scmd->sense_buffer[13] == 0x01)) {
289                         return NEEDS_RETRY;
290                 }
291                 /*
292                  * if the device is not started, we need to wake
293                  * the error handler to start the motor
294                  */
295                 if (scmd->device->allow_restart &&
296                     (scmd->sense_buffer[12] == 0x04) &&
297                     (scmd->sense_buffer[13] == 0x02)) {
298                         return FAILED;
299                 }
300                 return SUCCESS;
301
302                 /* these three are not supported */
303         case COPY_ABORTED:
304         case VOLUME_OVERFLOW:
305         case MISCOMPARE:
306                 return SUCCESS;
307
308         case MEDIUM_ERROR:
309                 return NEEDS_RETRY;
310
311         case ILLEGAL_REQUEST:
312         case BLANK_CHECK:
313         case DATA_PROTECT:
314         case HARDWARE_ERROR:
315         default:
316                 return SUCCESS;
317         }
318 }
319
320 /**
321  * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
322  * @scmd:       SCSI cmd to examine.
323  *
324  * Notes:
325  *    This is *only* called when we are examining the status of commands
326  *    queued during error recovery.  the main difference here is that we
327  *    don't allow for the possibility of retries here, and we are a lot
328  *    more restrictive about what we consider acceptable.
329  **/
330 static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
331 {
332         /*
333          * first check the host byte, to see if there is anything in there
334          * that would indicate what we need to do.
335          */
336         if (host_byte(scmd->result) == DID_RESET) {
337                 /*
338                  * rats.  we are already in the error handler, so we now
339                  * get to try and figure out what to do next.  if the sense
340                  * is valid, we have a pretty good idea of what to do.
341                  * if not, we mark it as FAILED.
342                  */
343                 return scsi_check_sense(scmd);
344         }
345         if (host_byte(scmd->result) != DID_OK)
346                 return FAILED;
347
348         /*
349          * next, check the message byte.
350          */
351         if (msg_byte(scmd->result) != COMMAND_COMPLETE)
352                 return FAILED;
353
354         /*
355          * now, check the status byte to see if this indicates
356          * anything special.
357          */
358         switch (status_byte(scmd->result)) {
359         case GOOD:
360         case COMMAND_TERMINATED:
361                 return SUCCESS;
362         case CHECK_CONDITION:
363                 return scsi_check_sense(scmd);
364         case CONDITION_GOOD:
365         case INTERMEDIATE_GOOD:
366         case INTERMEDIATE_C_GOOD:
367                 /*
368                  * who knows?  FIXME(eric)
369                  */
370                 return SUCCESS;
371         case BUSY:
372         case QUEUE_FULL:
373         case RESERVATION_CONFLICT:
374         default:
375                 return FAILED;
376         }
377         return FAILED;
378 }
379
380 /**
381  * scsi_eh_times_out - timeout function for error handling.
382  * @scmd:       Cmd that is timing out.
383  *
384  * Notes:
385  *    During error handling, the kernel thread will be sleeping waiting
386  *    for some action to complete on the device.  our only job is to
387  *    record that it timed out, and to wake up the thread.
388  **/
389 static void scsi_eh_times_out(struct scsi_cmnd *scmd)
390 {
391         scsi_eh_eflags_set(scmd, SCSI_EH_REC_TIMEOUT);
392         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
393                                           scmd));
394
395         if (scmd->device->host->eh_action)
396                 up(scmd->device->host->eh_action);
397 }
398
399 /**
400  * scsi_eh_done - Completion function for error handling.
401  * @scmd:       Cmd that is done.
402  **/
403 static void scsi_eh_done(struct scsi_cmnd *scmd)
404 {
405         /*
406          * if the timeout handler is already running, then just set the
407          * flag which says we finished late, and return.  we have no
408          * way of stopping the timeout handler from running, so we must
409          * always defer to it.
410          */
411         if (del_timer(&scmd->eh_timeout)) {
412                 scmd->request->rq_status = RQ_SCSI_DONE;
413                 scmd->owner = SCSI_OWNER_ERROR_HANDLER;
414
415                 SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
416                                            __FUNCTION__, scmd, scmd->result));
417
418                 if (scmd->device->host->eh_action)
419                         up(scmd->device->host->eh_action);
420         }
421 }
422
423 /**
424  * scsi_send_eh_cmnd  - send a cmd to a device as part of error recovery.
425  * @scmd:       SCSI Cmd to send.
426  * @timeout:    Timeout for cmd.
427  *
428  * Notes:
429  *    The initialization of the structures is quite a bit different in
430  *    this case, and furthermore, there is a different completion handler
431  *    vs scsi_dispatch_cmd.
432  * Return value:
433  *    SUCCESS or FAILED or NEEDS_RETRY
434  **/
435 static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
436 {
437         struct Scsi_Host *host = scmd->device->host;
438         DECLARE_MUTEX_LOCKED(sem);
439         unsigned long flags;
440         int rtn = SUCCESS;
441
442         /*
443          * we will use a queued command if possible, otherwise we will
444          * emulate the queuing and calling of completion function ourselves.
445          */
446         scmd->owner = SCSI_OWNER_LOWLEVEL;
447
448         if (scmd->device->scsi_level <= SCSI_2)
449                 scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) |
450                         (scmd->device->lun << 5 & 0xe0);
451
452         scsi_add_timer(scmd, timeout, scsi_eh_times_out);
453
454         /*
455          * set up the semaphore so we wait for the command to complete.
456          */
457         scmd->device->host->eh_action = &sem;
458         scmd->request->rq_status = RQ_SCSI_BUSY;
459
460         spin_lock_irqsave(scmd->device->host->host_lock, flags);
461         scsi_log_send(scmd);
462         host->hostt->queuecommand(scmd, scsi_eh_done);
463         spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
464
465         down(&sem);
466         scsi_log_completion(scmd, SUCCESS);
467
468         scmd->device->host->eh_action = NULL;
469
470         /*
471          * see if timeout.  if so, tell the host to forget about it.
472          * in other words, we don't want a callback any more.
473          */
474         if (scsi_eh_eflags_chk(scmd, SCSI_EH_REC_TIMEOUT)) {
475                 scsi_eh_eflags_clr(scmd,  SCSI_EH_REC_TIMEOUT);
476                 scmd->owner = SCSI_OWNER_LOWLEVEL;
477
478                 /*
479                  * as far as the low level driver is
480                  * concerned, this command is still active, so
481                  * we must give the low level driver a chance
482                  * to abort it. (db) 
483                  *
484                  * FIXME(eric) - we are not tracking whether we could
485                  * abort a timed out command or not.  not sure how
486                  * we should treat them differently anyways.
487                  */
488                 spin_lock_irqsave(scmd->device->host->host_lock, flags);
489                 if (scmd->device->host->hostt->eh_abort_handler)
490                         scmd->device->host->hostt->eh_abort_handler(scmd);
491                 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
492                         
493                 scmd->request->rq_status = RQ_SCSI_DONE;
494                 scmd->owner = SCSI_OWNER_ERROR_HANDLER;
495                         
496                 rtn = FAILED;
497         }
498
499         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
500                                           __FUNCTION__, scmd, rtn));
501
502         /*
503          * now examine the actual status codes to see whether the command
504          * actually did complete normally.
505          */
506         if (rtn == SUCCESS) {
507                 rtn = scsi_eh_completed_normally(scmd);
508                 SCSI_LOG_ERROR_RECOVERY(3,
509                         printk("%s: scsi_eh_completed_normally %x\n",
510                                __FUNCTION__, rtn));
511                 switch (rtn) {
512                 case SUCCESS:
513                 case NEEDS_RETRY:
514                 case FAILED:
515                         break;
516                 default:
517                         rtn = FAILED;
518                         break;
519                 }
520         }
521
522         return rtn;
523 }
524
525 /**
526  * scsi_request_sense - Request sense data from a particular target.
527  * @scmd:       SCSI cmd for request sense.
528  *
529  * Notes:
530  *    Some hosts automatically obtain this information, others require
531  *    that we obtain it on our own. This function will *not* return until
532  *    the command either times out, or it completes.
533  **/
534 static int scsi_request_sense(struct scsi_cmnd *scmd)
535 {
536         static unsigned char generic_sense[6] =
537         {REQUEST_SENSE, 0, 0, 0, 252, 0};
538         unsigned char *scsi_result;
539         int saved_result;
540         int rtn;
541
542         memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
543
544         scsi_result = kmalloc(252, GFP_ATOMIC | (scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0);
545
546
547         if (unlikely(!scsi_result)) {
548                 printk(KERN_ERR "%s: cannot allocate scsi_result.\n",
549                        __FUNCTION__);
550                 return FAILED;
551         }
552
553         /*
554          * zero the sense buffer.  some host adapters automatically always
555          * request sense, so it is not a good idea that
556          * scmd->request_buffer and scmd->sense_buffer point to the same
557          * address (db).  0 is not a valid sense code. 
558          */
559         memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
560         memset(scsi_result, 0, 252);
561
562         saved_result = scmd->result;
563         scmd->request_buffer = scsi_result;
564         scmd->request_bufflen = 252;
565         scmd->use_sg = 0;
566         scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
567         scmd->sc_data_direction = DMA_FROM_DEVICE;
568         scmd->underflow = 0;
569
570         rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
571
572         /* last chance to have valid sense data */
573         if(!SCSI_SENSE_VALID(scmd)) {
574                 memcpy(scmd->sense_buffer, scmd->request_buffer,
575                        sizeof(scmd->sense_buffer));
576         }
577
578         kfree(scsi_result);
579
580         /*
581          * when we eventually call scsi_finish, we really wish to complete
582          * the original request, so let's restore the original data. (db)
583          */
584         scsi_setup_cmd_retry(scmd);
585         scmd->result = saved_result;
586         return rtn;
587 }
588
589 /**
590  * scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
591  * @scmd:       Original SCSI cmd that eh has finished.
592  * @done_q:     Queue for processed commands.
593  *
594  * Notes:
595  *    We don't want to use the normal command completion while we are are
596  *    still handling errors - it may cause other commands to be queued,
597  *    and that would disturb what we are doing.  thus we really want to
598  *    keep a list of pending commands for final completion, and once we
599  *    are ready to leave error handling we handle completion for real.
600  **/
601 static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
602                                struct list_head *done_q)
603 {
604         scmd->device->host->host_failed--;
605         scmd->state = SCSI_STATE_BHQUEUE;
606
607         scsi_eh_eflags_clr_all(scmd);
608
609         /*
610          * set this back so that the upper level can correctly free up
611          * things.
612          */
613         scsi_setup_cmd_retry(scmd);
614         list_move_tail(&scmd->eh_entry, done_q);
615 }
616
617 /**
618  * scsi_eh_get_sense - Get device sense data.
619  * @work_q:     Queue of commands to process.
620  * @done_q:     Queue of proccessed commands..
621  *
622  * Description:
623  *    See if we need to request sense information.  if so, then get it
624  *    now, so we have a better idea of what to do.  
625  *
626  * Notes:
627  *    This has the unfortunate side effect that if a shost adapter does
628  *    not automatically request sense information, that we end up shutting
629  *    it down before we request it.  All shosts should be doing this
630  *    anyways, so for now all I have to say is tough noogies if you end up
631  *    in here.  On second thought, this is probably a good idea.  We
632  *    *really* want to give authors an incentive to automatically request
633  *    this.
634  *
635  *    In 2.5 this capability will be going away.
636  *
637  *    Really?  --hch
638  **/
639 static int scsi_eh_get_sense(struct list_head *work_q,
640                              struct list_head *done_q)
641 {
642         struct list_head *lh, *lh_sf;
643         struct scsi_cmnd *scmd;
644         int rtn;
645
646         list_for_each_safe(lh, lh_sf, work_q) {
647                 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
648                 if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD) ||
649                     SCSI_SENSE_VALID(scmd))
650                         continue;
651
652                 SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
653                                                   " for id: %d\n",
654                                                   current->comm,
655                                                   scmd->device->id));
656                 rtn = scsi_request_sense(scmd);
657                 if (rtn != SUCCESS)
658                         continue;
659
660                 SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
661                                                   " result %x\n", scmd,
662                                                   scmd->result));
663                 SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", scmd));
664
665                 rtn = scsi_decide_disposition(scmd);
666
667                 /*
668                  * if the result was normal, then just pass it along to the
669                  * upper level.
670                  */
671                 if (rtn == SUCCESS)
672                         /* we don't want this command reissued, just
673                          * finished with the sense data, so set
674                          * retries to the max allowed to ensure it
675                          * won't get reissued */
676                         scmd->retries = scmd->allowed;
677                 else if (rtn != NEEDS_RETRY)
678                         continue;
679
680                 scsi_eh_finish_cmd(scmd, done_q);
681         }
682
683         return list_empty(work_q);
684 }
685
686 /**
687  * scsi_try_to_abort_cmd - Ask host to abort a running command.
688  * @scmd:       SCSI cmd to abort from Lower Level.
689  *
690  * Notes:
691  *    This function will not return until the user's completion function
692  *    has been called.  there is no timeout on this operation.  if the
693  *    author of the low-level driver wishes this operation to be timed,
694  *    they can provide this facility themselves.  helper functions in
695  *    scsi_error.c can be supplied to make this easier to do.
696  **/
697 static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
698 {
699         unsigned long flags;
700         int rtn = FAILED;
701
702         if (!scmd->device->host->hostt->eh_abort_handler)
703                 return rtn;
704
705         /*
706          * scsi_done was called just after the command timed out and before
707          * we had a chance to process it. (db)
708          */
709         if (scmd->serial_number == 0)
710                 return SUCCESS;
711
712         scmd->owner = SCSI_OWNER_LOWLEVEL;
713
714         spin_lock_irqsave(scmd->device->host->host_lock, flags);
715         rtn = scmd->device->host->hostt->eh_abort_handler(scmd);
716         spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
717
718         return rtn;
719 }
720
721 /**
722  * scsi_eh_tur - Send TUR to device.
723  * @scmd:       Scsi cmd to send TUR
724  *
725  * Return value:
726  *    0 - Device is ready. 1 - Device NOT ready.
727  **/
728 static int scsi_eh_tur(struct scsi_cmnd *scmd)
729 {
730         static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
731         int retry_cnt = 1, rtn;
732
733 retry_tur:
734         memcpy(scmd->cmnd, tur_command, sizeof(tur_command));
735
736         /*
737          * zero the sense buffer.  the scsi spec mandates that any
738          * untransferred sense data should be interpreted as being zero.
739          */
740         memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
741
742         scmd->request_buffer = NULL;
743         scmd->request_bufflen = 0;
744         scmd->use_sg = 0;
745         scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
746         scmd->underflow = 0;
747         scmd->sc_data_direction = DMA_NONE;
748
749         rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
750
751         /*
752          * when we eventually call scsi_finish, we really wish to complete
753          * the original request, so let's restore the original data. (db)
754          */
755         scsi_setup_cmd_retry(scmd);
756
757         /*
758          * hey, we are done.  let's look to see what happened.
759          */
760         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
761                 __FUNCTION__, scmd, rtn));
762         if (rtn == SUCCESS)
763                 return 0;
764         else if (rtn == NEEDS_RETRY)
765                 if (retry_cnt--)
766                         goto retry_tur;
767         return 1;
768 }
769
770 /**
771  * scsi_eh_abort_cmds - abort canceled commands.
772  * @shost:      scsi host being recovered.
773  * @eh_done_q:  list_head for processed commands.
774  *
775  * Decription:
776  *    Try and see whether or not it makes sense to try and abort the
777  *    running command.  this only works out to be the case if we have one
778  *    command that has timed out.  if the command simply failed, it makes
779  *    no sense to try and abort the command, since as far as the shost
780  *    adapter is concerned, it isn't running.
781  **/
782 static int scsi_eh_abort_cmds(struct list_head *work_q,
783                               struct list_head *done_q)
784 {
785         struct list_head *lh, *lh_sf;
786         struct scsi_cmnd *scmd;
787         int rtn;
788
789         list_for_each_safe(lh, lh_sf, work_q) {
790                 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
791                 if (!scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD))
792                         continue;
793                 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
794                                                   "0x%p\n", current->comm,
795                                                   scmd));
796                 rtn = scsi_try_to_abort_cmd(scmd);
797                 if (rtn == SUCCESS) {
798                         scsi_eh_eflags_clr(scmd,  SCSI_EH_CANCEL_CMD);
799                         if (!scsi_device_online(scmd->device) ||
800                             !scsi_eh_tur(scmd)) {
801                                 scsi_eh_finish_cmd(scmd, done_q);
802                         }
803                                 
804                 } else
805                         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
806                                                           " cmd failed:"
807                                                           "0x%p\n",
808                                                           current->comm,
809                                                           scmd));
810         }
811
812         return list_empty(work_q);
813 }
814
815 /**
816  * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
817  * @scmd:       SCSI cmd used to send BDR       
818  *
819  * Notes:
820  *    There is no timeout for this operation.  if this operation is
821  *    unreliable for a given host, then the host itself needs to put a
822  *    timer on it, and set the host back to a consistent state prior to
823  *    returning.
824  **/
825 static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
826 {
827         unsigned long flags;
828         int rtn = FAILED;
829
830         if (!scmd->device->host->hostt->eh_device_reset_handler)
831                 return rtn;
832
833         scmd->owner = SCSI_OWNER_LOWLEVEL;
834
835         spin_lock_irqsave(scmd->device->host->host_lock, flags);
836         rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
837         spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
838
839         if (rtn == SUCCESS) {
840                 scmd->device->was_reset = 1;
841                 scmd->device->expecting_cc_ua = 1;
842         }
843
844         return rtn;
845 }
846
847 /**
848  * scsi_eh_try_stu - Send START_UNIT to device.
849  * @scmd:       Scsi cmd to send START_UNIT
850  *
851  * Return value:
852  *    0 - Device is ready. 1 - Device NOT ready.
853  **/
854 static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
855 {
856         static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
857         int rtn;
858
859         if (!scmd->device->allow_restart)
860                 return 1;
861
862         memcpy(scmd->cmnd, stu_command, sizeof(stu_command));
863
864         /*
865          * zero the sense buffer.  the scsi spec mandates that any
866          * untransferred sense data should be interpreted as being zero.
867          */
868         memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
869
870         scmd->request_buffer = NULL;
871         scmd->request_bufflen = 0;
872         scmd->use_sg = 0;
873         scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
874         scmd->underflow = 0;
875         scmd->sc_data_direction = DMA_NONE;
876
877         rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT);
878
879         /*
880          * when we eventually call scsi_finish, we really wish to complete
881          * the original request, so let's restore the original data. (db)
882          */
883         scsi_setup_cmd_retry(scmd);
884
885         /*
886          * hey, we are done.  let's look to see what happened.
887          */
888         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
889                 __FUNCTION__, scmd, rtn));
890         if (rtn == SUCCESS)
891                 return 0;
892         return 1;
893 }
894
895  /**
896  * scsi_eh_stu - send START_UNIT if needed
897  * @shost:      scsi host being recovered.
898  * @eh_done_q:  list_head for processed commands.
899  *
900  * Notes:
901  *    If commands are failing due to not ready, initializing command required,
902  *      try revalidating the device, which will end up sending a start unit. 
903  **/
904 static int scsi_eh_stu(struct Scsi_Host *shost,
905                               struct list_head *work_q,
906                               struct list_head *done_q)
907 {
908         struct list_head *lh, *lh_sf;
909         struct scsi_cmnd *scmd, *stu_scmd;
910         struct scsi_device *sdev;
911
912         shost_for_each_device(sdev, shost) {
913                 stu_scmd = NULL;
914                 list_for_each_entry(scmd, work_q, eh_entry)
915                         if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
916                             scsi_check_sense(scmd) == FAILED ) {
917                                 stu_scmd = scmd;
918                                 break;
919                         }
920
921                 if (!stu_scmd)
922                         continue;
923
924                 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
925                                                   " 0x%p\n", current->comm, sdev));
926
927                 if (!scsi_eh_try_stu(stu_scmd)) {
928                         if (!scsi_device_online(sdev) ||
929                             !scsi_eh_tur(stu_scmd)) {
930                                 list_for_each_safe(lh, lh_sf, work_q) {
931                                         scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
932                                         if (scmd->device == sdev)
933                                                 scsi_eh_finish_cmd(scmd, done_q);
934                                 }
935                         }
936                 } else {
937                         SCSI_LOG_ERROR_RECOVERY(3,
938                                                 printk("%s: START_UNIT failed to sdev:"
939                                                        " 0x%p\n", current->comm, sdev));
940                 }
941         }
942
943         return list_empty(work_q);
944 }
945
946
947 /**
948  * scsi_eh_bus_device_reset - send bdr if needed
949  * @shost:      scsi host being recovered.
950  * @eh_done_q:  list_head for processed commands.
951  *
952  * Notes:
953  *    Try a bus device reset.  still, look to see whether we have multiple
954  *    devices that are jammed or not - if we have multiple devices, it
955  *    makes no sense to try bus_device_reset - we really would need to try
956  *    a bus_reset instead. 
957  **/
958 static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
959                                     struct list_head *work_q,
960                                     struct list_head *done_q)
961 {
962         struct list_head *lh, *lh_sf;
963         struct scsi_cmnd *scmd, *bdr_scmd;
964         struct scsi_device *sdev;
965         int rtn;
966
967         shost_for_each_device(sdev, shost) {
968                 bdr_scmd = NULL;
969                 list_for_each_entry(scmd, work_q, eh_entry)
970                         if (scmd->device == sdev) {
971                                 bdr_scmd = scmd;
972                                 break;
973                         }
974
975                 if (!bdr_scmd)
976                         continue;
977
978                 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
979                                                   " 0x%p\n", current->comm,
980                                                   sdev));
981                 rtn = scsi_try_bus_device_reset(bdr_scmd);
982                 if (rtn == SUCCESS) {
983                         if (!scsi_device_online(sdev) ||
984                             !scsi_eh_tur(bdr_scmd)) {
985                                 list_for_each_safe(lh, lh_sf,
986                                                    work_q) {
987                                         scmd = list_entry(lh, struct
988                                                           scsi_cmnd,
989                                                           eh_entry);
990                                         if (scmd->device == sdev)
991                                                 scsi_eh_finish_cmd(scmd,
992                                                                    done_q);
993                                 }
994                         }
995                 } else {
996                         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
997                                                           " failed sdev:"
998                                                           "0x%p\n",
999                                                           current->comm,
1000                                                            sdev));
1001                 }
1002         }
1003
1004         return list_empty(work_q);
1005 }
1006
1007 /**
1008  * scsi_try_bus_reset - ask host to perform a bus reset
1009  * @scmd:       SCSI cmd to send bus reset.
1010  **/
1011 static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
1012 {
1013         unsigned long flags;
1014         int rtn;
1015
1016         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
1017                                           __FUNCTION__));
1018         scmd->owner = SCSI_OWNER_LOWLEVEL;
1019         scmd->serial_number_at_timeout = scmd->serial_number;
1020
1021         if (!scmd->device->host->hostt->eh_bus_reset_handler)
1022                 return FAILED;
1023
1024         spin_lock_irqsave(scmd->device->host->host_lock, flags);
1025         rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
1026         spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1027
1028         if (rtn == SUCCESS) {
1029                 scsi_sleep(BUS_RESET_SETTLE_TIME);
1030                 spin_lock_irqsave(scmd->device->host->host_lock, flags);
1031                 scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
1032                 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1033         }
1034
1035         return rtn;
1036 }
1037
1038 /**
1039  * scsi_try_host_reset - ask host adapter to reset itself
1040  * @scmd:       SCSI cmd to send hsot reset.
1041  **/
1042 static int scsi_try_host_reset(struct scsi_cmnd *scmd)
1043 {
1044         unsigned long flags;
1045         int rtn;
1046
1047         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
1048                                           __FUNCTION__));
1049         scmd->owner = SCSI_OWNER_LOWLEVEL;
1050         scmd->serial_number_at_timeout = scmd->serial_number;
1051
1052         if (!scmd->device->host->hostt->eh_host_reset_handler)
1053                 return FAILED;
1054
1055         spin_lock_irqsave(scmd->device->host->host_lock, flags);
1056         rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
1057         spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1058
1059         if (rtn == SUCCESS) {
1060                 scsi_sleep(HOST_RESET_SETTLE_TIME);
1061                 spin_lock_irqsave(scmd->device->host->host_lock, flags);
1062                 scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
1063                 spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
1064         }
1065
1066         return rtn;
1067 }
1068
1069 /**
1070  * scsi_eh_bus_reset - send a bus reset 
1071  * @shost:      scsi host being recovered.
1072  * @eh_done_q:  list_head for processed commands.
1073  **/
1074 static int scsi_eh_bus_reset(struct Scsi_Host *shost,
1075                              struct list_head *work_q,
1076                              struct list_head *done_q)
1077 {
1078         struct list_head *lh, *lh_sf;
1079         struct scsi_cmnd *scmd;
1080         struct scsi_cmnd *chan_scmd;
1081         unsigned int channel;
1082         int rtn;
1083
1084         /*
1085          * we really want to loop over the various channels, and do this on
1086          * a channel by channel basis.  we should also check to see if any
1087          * of the failed commands are on soft_reset devices, and if so, skip
1088          * the reset.  
1089          */
1090
1091         for (channel = 0; channel <= shost->max_channel; channel++) {
1092                 chan_scmd = NULL;
1093                 list_for_each_entry(scmd, work_q, eh_entry) {
1094                         if (channel == scmd->device->channel) {
1095                                 chan_scmd = scmd;
1096                                 break;
1097                                 /*
1098                                  * FIXME add back in some support for
1099                                  * soft_reset devices.
1100                                  */
1101                         }
1102                 }
1103
1104                 if (!chan_scmd)
1105                         continue;
1106                 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
1107                                                   " %d\n", current->comm,
1108                                                   channel));
1109                 rtn = scsi_try_bus_reset(chan_scmd);
1110                 if (rtn == SUCCESS) {
1111                         list_for_each_safe(lh, lh_sf, work_q) {
1112                                 scmd = list_entry(lh, struct scsi_cmnd,
1113                                                   eh_entry);
1114                                 if (channel == scmd->device->channel)
1115                                         if (!scsi_device_online(scmd->device) ||
1116                                             !scsi_eh_tur(scmd))
1117                                                 scsi_eh_finish_cmd(scmd,
1118                                                                    done_q);
1119                         }
1120                 } else {
1121                         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
1122                                                           " failed chan: %d\n",
1123                                                           current->comm,
1124                                                           channel));
1125                 }
1126         }
1127         return list_empty(work_q);
1128 }
1129
1130 /**
1131  * scsi_eh_host_reset - send a host reset 
1132  * @work_q:     list_head for processed commands.
1133  * @done_q:     list_head for processed commands.
1134  **/
1135 static int scsi_eh_host_reset(struct list_head *work_q,
1136                               struct list_head *done_q)
1137 {
1138         int rtn;
1139         struct list_head *lh, *lh_sf;
1140         struct scsi_cmnd *scmd;
1141
1142         if (!list_empty(work_q)) {
1143                 scmd = list_entry(work_q->next,
1144                                   struct scsi_cmnd, eh_entry);
1145
1146                 SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
1147                                                   , current->comm));
1148
1149                 rtn = scsi_try_host_reset(scmd);
1150                 if (rtn == SUCCESS) {
1151                         list_for_each_safe(lh, lh_sf, work_q) {
1152                                 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
1153                                 if (!scsi_device_online(scmd->device) ||
1154                                     (!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) ||
1155                                     !scsi_eh_tur(scmd))
1156                                         scsi_eh_finish_cmd(scmd, done_q);
1157                         }
1158                 } else {
1159                         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
1160                                                           " failed\n",
1161                                                           current->comm));
1162                 }
1163         }
1164         return list_empty(work_q);
1165 }
1166
1167 /**
1168  * scsi_eh_offline_sdevs - offline scsi devices that fail to recover
1169  * @work_q:     list_head for processed commands.
1170  * @done_q:     list_head for processed commands.
1171  *
1172  **/
1173 static void scsi_eh_offline_sdevs(struct list_head *work_q,
1174                                   struct list_head *done_q)
1175 {
1176         struct list_head *lh, *lh_sf;
1177         struct scsi_cmnd *scmd;
1178
1179         list_for_each_safe(lh, lh_sf, work_q) {
1180                 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
1181                 printk(KERN_INFO "scsi: Device offlined - not"
1182                                 " ready after error recovery: host"
1183                                 " %d channel %d id %d lun %d\n",
1184                                 scmd->device->host->host_no,
1185                                 scmd->device->channel,
1186                                 scmd->device->id,
1187                                 scmd->device->lun);
1188                 scsi_device_set_state(scmd->device, SDEV_OFFLINE);
1189                 if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
1190                         /*
1191                          * FIXME: Handle lost cmds.
1192                          */
1193                 }
1194                 scsi_eh_finish_cmd(scmd, done_q);
1195         }
1196         return;
1197 }
1198
1199 /**
1200  * scsi_sleep_done - timer function for scsi_sleep
1201  * @sem:        semphore to signal
1202  *
1203  **/
1204 static void scsi_sleep_done(unsigned long data)
1205 {
1206         struct semaphore *sem = (struct semaphore *)data;
1207
1208         if (sem)
1209                 up(sem);
1210 }
1211
1212 /**
1213  * scsi_sleep - sleep for specified timeout
1214  * @timeout:    timeout value
1215  *
1216  **/
1217 void scsi_sleep(int timeout)
1218 {
1219         DECLARE_MUTEX_LOCKED(sem);
1220         struct timer_list timer;
1221
1222         init_timer(&timer);
1223         timer.data = (unsigned long)&sem;
1224         timer.expires = jiffies + timeout;
1225         timer.function = (void (*)(unsigned long))scsi_sleep_done;
1226
1227         SCSI_LOG_ERROR_RECOVERY(5, printk("sleeping for timer tics %d\n",
1228                                           timeout));
1229
1230         add_timer(&timer);
1231
1232         down(&sem);
1233         del_timer(&timer);
1234 }
1235
1236 /**
1237  * scsi_decide_disposition - Disposition a cmd on return from LLD.
1238  * @scmd:       SCSI cmd to examine.
1239  *
1240  * Notes:
1241  *    This is *only* called when we are examining the status after sending
1242  *    out the actual data command.  any commands that are queued for error
1243  *    recovery (e.g. test_unit_ready) do *not* come through here.
1244  *
1245  *    When this routine returns failed, it means the error handler thread
1246  *    is woken.  In cases where the error code indicates an error that
1247  *    doesn't require the error handler read (i.e. we don't need to
1248  *    abort/reset), this function should return SUCCESS.
1249  **/
1250 int scsi_decide_disposition(struct scsi_cmnd *scmd)
1251 {
1252         int rtn;
1253
1254         /*
1255          * if the device is offline, then we clearly just pass the result back
1256          * up to the top level.
1257          */
1258         if (!scsi_device_online(scmd->device)) {
1259                 SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
1260                                                   " as SUCCESS\n",
1261                                                   __FUNCTION__));
1262                 return SUCCESS;
1263         }
1264
1265         /*
1266          * first check the host byte, to see if there is anything in there
1267          * that would indicate what we need to do.
1268          */
1269         switch (host_byte(scmd->result)) {
1270         case DID_PASSTHROUGH:
1271                 /*
1272                  * no matter what, pass this through to the upper layer.
1273                  * nuke this special code so that it looks like we are saying
1274                  * did_ok.
1275                  */
1276                 scmd->result &= 0xff00ffff;
1277                 return SUCCESS;
1278         case DID_OK:
1279                 /*
1280                  * looks good.  drop through, and check the next byte.
1281                  */
1282                 break;
1283         case DID_NO_CONNECT:
1284         case DID_BAD_TARGET:
1285         case DID_ABORT:
1286                 /*
1287                  * note - this means that we just report the status back
1288                  * to the top level driver, not that we actually think
1289                  * that it indicates SUCCESS.
1290                  */
1291                 return SUCCESS;
1292                 /*
1293                  * when the low level driver returns did_soft_error,
1294                  * it is responsible for keeping an internal retry counter 
1295                  * in order to avoid endless loops (db)
1296                  *
1297                  * actually this is a bug in this function here.  we should
1298                  * be mindful of the maximum number of retries specified
1299                  * and not get stuck in a loop.
1300                  */
1301         case DID_SOFT_ERROR:
1302                 goto maybe_retry;
1303         case DID_IMM_RETRY:
1304                 return NEEDS_RETRY;
1305
1306         case DID_ERROR:
1307                 if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
1308                     status_byte(scmd->result) == RESERVATION_CONFLICT)
1309                         /*
1310                          * execute reservation conflict processing code
1311                          * lower down
1312                          */
1313                         break;
1314                 /* fallthrough */
1315
1316         case DID_BUS_BUSY:
1317         case DID_PARITY:
1318                 goto maybe_retry;
1319         case DID_TIME_OUT:
1320                 /*
1321                  * when we scan the bus, we get timeout messages for
1322                  * these commands if there is no device available.
1323                  * other hosts report did_no_connect for the same thing.
1324                  */
1325                 if ((scmd->cmnd[0] == TEST_UNIT_READY ||
1326                      scmd->cmnd[0] == INQUIRY)) {
1327                         return SUCCESS;
1328                 } else {
1329                         return FAILED;
1330                 }
1331         case DID_RESET:
1332                 return SUCCESS;
1333         default:
1334                 return FAILED;
1335         }
1336
1337         /*
1338          * next, check the message byte.
1339          */
1340         if (msg_byte(scmd->result) != COMMAND_COMPLETE)
1341                 return FAILED;
1342
1343         /*
1344          * check the status byte to see if this indicates anything special.
1345          */
1346         switch (status_byte(scmd->result)) {
1347         case QUEUE_FULL:
1348                 /*
1349                  * the case of trying to send too many commands to a
1350                  * tagged queueing device.
1351                  */
1352         case BUSY:
1353                 /*
1354                  * device can't talk to us at the moment.  Should only
1355                  * occur (SAM-3) when the task queue is empty, so will cause
1356                  * the empty queue handling to trigger a stall in the
1357                  * device.
1358                  */
1359                 return ADD_TO_MLQUEUE;
1360         case GOOD:
1361         case COMMAND_TERMINATED:
1362                 return SUCCESS;
1363         case CHECK_CONDITION:
1364                 rtn = scsi_check_sense(scmd);
1365                 if (rtn == NEEDS_RETRY)
1366                         goto maybe_retry;
1367                 /* if rtn == FAILED, we have no sense information;
1368                  * returning FAILED will wake the error handler thread
1369                  * to collect the sense and redo the decide
1370                  * disposition */
1371                 return rtn;
1372         case CONDITION_GOOD:
1373         case INTERMEDIATE_GOOD:
1374         case INTERMEDIATE_C_GOOD:
1375                 /*
1376                  * who knows?  FIXME(eric)
1377                  */
1378                 return SUCCESS;
1379
1380         case RESERVATION_CONFLICT:
1381                 printk("scsi%d (%d,%d,%d) : reservation conflict\n",
1382                        scmd->device->host->host_no, scmd->device->channel,
1383                        scmd->device->id, scmd->device->lun);
1384                 return SUCCESS; /* causes immediate i/o error */
1385         default:
1386                 return FAILED;
1387         }
1388         return FAILED;
1389
1390       maybe_retry:
1391
1392         /* we requeue for retry because the error was retryable, and
1393          * the request was not marked fast fail.  Note that above,
1394          * even if the request is marked fast fail, we still requeue
1395          * for queue congestion conditions (QUEUE_FULL or BUSY) */
1396         if ((++scmd->retries) < scmd->allowed 
1397             && !blk_noretry_request(scmd->request)) {
1398                 return NEEDS_RETRY;
1399         } else {
1400                 /*
1401                  * no more retries - report this one back to upper level.
1402                  */
1403                 return SUCCESS;
1404         }
1405 }
1406
1407 /**
1408  * scsi_eh_lock_done - done function for eh door lock request
1409  * @scmd:       SCSI command block for the door lock request
1410  *
1411  * Notes:
1412  *      We completed the asynchronous door lock request, and it has either
1413  *      locked the door or failed.  We must free the command structures
1414  *      associated with this request.
1415  **/
1416 static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
1417 {
1418         struct scsi_request *sreq = scmd->sc_request;
1419
1420         scsi_release_request(sreq);
1421 }
1422
1423
1424 /**
1425  * scsi_eh_lock_door - Prevent medium removal for the specified device
1426  * @sdev:       SCSI device to prevent medium removal
1427  *
1428  * Locking:
1429  *      We must be called from process context; scsi_allocate_request()
1430  *      may sleep.
1431  *
1432  * Notes:
1433  *      We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
1434  *      head of the devices request queue, and continue.
1435  *
1436  * Bugs:
1437  *      scsi_allocate_request() may sleep waiting for existing requests to
1438  *      be processed.  However, since we haven't kicked off any request
1439  *      processing for this host, this may deadlock.
1440  *
1441  *      If scsi_allocate_request() fails for what ever reason, we
1442  *      completely forget to lock the door.
1443  **/
1444 static void scsi_eh_lock_door(struct scsi_device *sdev)
1445 {
1446         struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
1447
1448         if (unlikely(!sreq)) {
1449                 printk(KERN_ERR "%s: request allocate failed,"
1450                        "prevent media removal cmd not sent\n", __FUNCTION__);
1451                 return;
1452         }
1453
1454         sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
1455         sreq->sr_cmnd[1] = 0;
1456         sreq->sr_cmnd[2] = 0;
1457         sreq->sr_cmnd[3] = 0;
1458         sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
1459         sreq->sr_cmnd[5] = 0;
1460         sreq->sr_data_direction = DMA_NONE;
1461         sreq->sr_bufflen = 0;
1462         sreq->sr_buffer = NULL;
1463         sreq->sr_allowed = 5;
1464         sreq->sr_done = scsi_eh_lock_done;
1465         sreq->sr_timeout_per_command = 10 * HZ;
1466         sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
1467
1468         scsi_insert_special_req(sreq, 1);
1469 }
1470
1471
1472 /**
1473  * scsi_restart_operations - restart io operations to the specified host.
1474  * @shost:      Host we are restarting.
1475  *
1476  * Notes:
1477  *    When we entered the error handler, we blocked all further i/o to
1478  *    this device.  we need to 'reverse' this process.
1479  **/
1480 static void scsi_restart_operations(struct Scsi_Host *shost)
1481 {
1482         struct scsi_device *sdev;
1483
1484         /*
1485          * If the door was locked, we need to insert a door lock request
1486          * onto the head of the SCSI request queue for the device.  There
1487          * is no point trying to lock the door of an off-line device.
1488          */
1489         shost_for_each_device(sdev, shost) {
1490                 if (scsi_device_online(sdev) && sdev->locked)
1491                         scsi_eh_lock_door(sdev);
1492         }
1493
1494         /*
1495          * next free up anything directly waiting upon the host.  this
1496          * will be requests for character device operations, and also for
1497          * ioctls to queued block devices.
1498          */
1499         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
1500                                           __FUNCTION__));
1501
1502         clear_bit(SHOST_RECOVERY, &shost->shost_state);
1503
1504         wake_up(&shost->host_wait);
1505
1506         /*
1507          * finally we need to re-initiate requests that may be pending.  we will
1508          * have had everything blocked while error handling is taking place, and
1509          * now that error recovery is done, we will need to ensure that these
1510          * requests are started.
1511          */
1512         scsi_run_host_queues(shost);
1513 }
1514
1515 /**
1516  * scsi_eh_ready_devs - check device ready state and recover if not.
1517  * @shost:      host to be recovered.
1518  * @eh_done_q:  list_head for processed commands.
1519  *
1520  **/
1521 static void scsi_eh_ready_devs(struct Scsi_Host *shost,
1522                                struct list_head *work_q,
1523                                struct list_head *done_q)
1524 {
1525         if (!scsi_eh_stu(shost, work_q, done_q))
1526                 if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
1527                         if (!scsi_eh_bus_reset(shost, work_q, done_q))
1528                                 if (!scsi_eh_host_reset(work_q, done_q))
1529                                         scsi_eh_offline_sdevs(work_q, done_q);
1530 }
1531
1532 /**
1533  * scsi_eh_flush_done_q - finish processed commands or retry them.
1534  * @done_q:     list_head of processed commands.
1535  *
1536  **/
1537 static void scsi_eh_flush_done_q(struct list_head *done_q)
1538 {
1539         struct list_head *lh, *lh_sf;
1540         struct scsi_cmnd *scmd;
1541
1542         list_for_each_safe(lh, lh_sf, done_q) {
1543                 scmd = list_entry(lh, struct scsi_cmnd, eh_entry);
1544                 list_del_init(lh);
1545                 if (scsi_device_online(scmd->device) &&
1546                     !blk_noretry_request(scmd->request) &&
1547                     (++scmd->retries < scmd->allowed)) {
1548                         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
1549                                                           " retry cmd: %p\n",
1550                                                           current->comm,
1551                                                           scmd));
1552                                 scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
1553                 } else {
1554                         if (!scmd->result)
1555                                 scmd->result |= (DRIVER_TIMEOUT << 24);
1556                         SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
1557                                                         " cmd: %p\n",
1558                                                         current->comm, scmd));
1559                         scsi_finish_command(scmd);
1560                 }
1561         }
1562 }
1563
1564 /**
1565  * scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
1566  * @shost:      Host to unjam.
1567  *
1568  * Notes:
1569  *    When we come in here, we *know* that all commands on the bus have
1570  *    either completed, failed or timed out.  we also know that no further
1571  *    commands are being sent to the host, so things are relatively quiet
1572  *    and we have freedom to fiddle with things as we wish.
1573  *
1574  *    This is only the *default* implementation.  it is possible for
1575  *    individual drivers to supply their own version of this function, and
1576  *    if the maintainer wishes to do this, it is strongly suggested that
1577  *    this function be taken as a template and modified.  this function
1578  *    was designed to correctly handle problems for about 95% of the
1579  *    different cases out there, and it should always provide at least a
1580  *    reasonable amount of error recovery.
1581  *
1582  *    Any command marked 'failed' or 'timeout' must eventually have
1583  *    scsi_finish_cmd() called for it.  we do all of the retry stuff
1584  *    here, so when we restart the host after we return it should have an
1585  *    empty queue.
1586  **/
1587 static void scsi_unjam_host(struct Scsi_Host *shost)
1588 {
1589         unsigned long flags;
1590         LIST_HEAD(eh_work_q);
1591         LIST_HEAD(eh_done_q);
1592
1593         spin_lock_irqsave(shost->host_lock, flags);
1594         list_splice_init(&shost->eh_cmd_q, &eh_work_q);
1595         spin_unlock_irqrestore(shost->host_lock, flags);
1596
1597         SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
1598
1599         if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
1600                 if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
1601                         scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
1602
1603         scsi_eh_flush_done_q(&eh_done_q);
1604 }
1605
1606 /**
1607  * scsi_error_handler - Handle errors/timeouts of SCSI cmds.
1608  * @data:       Host for which we are running.
1609  *
1610  * Notes:
1611  *    This is always run in the context of a kernel thread.  The idea is
1612  *    that we start this thing up when the kernel starts up (one per host
1613  *    that we detect), and it immediately goes to sleep and waits for some
1614  *    event (i.e. failure).  When this takes place, we have the job of
1615  *    trying to unjam the bus and restarting things.
1616  **/
1617 int scsi_error_handler(void *data)
1618 {
1619         struct Scsi_Host *shost = (struct Scsi_Host *) data;
1620         int rtn;
1621         DECLARE_MUTEX_LOCKED(sem);
1622
1623         lock_kernel();
1624
1625         /*
1626          *    Flush resources
1627          */
1628
1629         daemonize("scsi_eh_%d", shost->host_no);
1630
1631         current->flags |= PF_NOFREEZE;
1632
1633         shost->eh_wait = &sem;
1634         shost->ehandler = current;
1635
1636         unlock_kernel();
1637
1638         /*
1639          * Wake up the thread that created us.
1640          */
1641         SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
1642                                           " scsi_eh_%d\n",shost->host_no));
1643
1644         complete(shost->eh_notify);
1645
1646         while (1) {
1647                 /*
1648                  * If we get a signal, it means we are supposed to go
1649                  * away and die.  This typically happens if the user is
1650                  * trying to unload a module.
1651                  */
1652                 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
1653                                                   " scsi_eh_%d"
1654                                                   " sleeping\n",shost->host_no));
1655
1656                 /*
1657                  * Note - we always use down_interruptible with the semaphore
1658                  * even if the module was loaded as part of the kernel.  The
1659                  * reason is that down() will cause this thread to be counted
1660                  * in the load average as a running process, and down
1661                  * interruptible doesn't.  Given that we need to allow this
1662                  * thread to die if the driver was loaded as a module, using
1663                  * semaphores isn't unreasonable.
1664                  */
1665                 down_interruptible(&sem);
1666                 if (shost->eh_kill)
1667                         break;
1668
1669                 SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
1670                                                   " scsi_eh_%d waking"
1671                                                   " up\n",shost->host_no));
1672
1673                 shost->eh_active = 1;
1674
1675                 /*
1676                  * We have a host that is failing for some reason.  Figure out
1677                  * what we need to do to get it up and online again (if we can).
1678                  * If we fail, we end up taking the thing offline.
1679                  */
1680                 if (shost->hostt->eh_strategy_handler) 
1681                         rtn = shost->hostt->eh_strategy_handler(shost);
1682                 else
1683                         scsi_unjam_host(shost);
1684
1685                 shost->eh_active = 0;
1686
1687                 /*
1688                  * Note - if the above fails completely, the action is to take
1689                  * individual devices offline and flush the queue of any
1690                  * outstanding requests that may have been pending.  When we
1691                  * restart, we restart any I/O to any other devices on the bus
1692                  * which are still online.
1693                  */
1694                 scsi_restart_operations(shost);
1695
1696         }
1697
1698         SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
1699                                           " exiting\n",shost->host_no));
1700
1701         /*
1702          * Make sure that nobody tries to wake us up again.
1703          */
1704         shost->eh_wait = NULL;
1705
1706         /*
1707          * Knock this down too.  From this point on, the host is flying
1708          * without a pilot.  If this is because the module is being unloaded,
1709          * that's fine.  If the user sent a signal to this thing, we are
1710          * potentially in real danger.
1711          */
1712         shost->eh_active = 0;
1713         shost->ehandler = NULL;
1714
1715         /*
1716          * If anyone is waiting for us to exit (i.e. someone trying to unload
1717          * a driver), then wake up that process to let them know we are on
1718          * the way out the door.
1719          */
1720         complete_and_exit(shost->eh_notify, 0);
1721         return 0;
1722 }
1723
1724 /*
1725  * Function:    scsi_report_bus_reset()
1726  *
1727  * Purpose:     Utility function used by low-level drivers to report that
1728  *              they have observed a bus reset on the bus being handled.
1729  *
1730  * Arguments:   shost       - Host in question
1731  *              channel     - channel on which reset was observed.
1732  *
1733  * Returns:     Nothing
1734  *
1735  * Lock status: Host lock must be held.
1736  *
1737  * Notes:       This only needs to be called if the reset is one which
1738  *              originates from an unknown location.  Resets originated
1739  *              by the mid-level itself don't need to call this, but there
1740  *              should be no harm.
1741  *
1742  *              The main purpose of this is to make sure that a CHECK_CONDITION
1743  *              is properly treated.
1744  */
1745 void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
1746 {
1747         struct scsi_device *sdev;
1748
1749         __shost_for_each_device(sdev, shost) {
1750                 if (channel == sdev->channel) {
1751                         sdev->was_reset = 1;
1752                         sdev->expecting_cc_ua = 1;
1753                 }
1754         }
1755 }
1756
1757 /*
1758  * Function:    scsi_report_device_reset()
1759  *
1760  * Purpose:     Utility function used by low-level drivers to report that
1761  *              they have observed a device reset on the device being handled.
1762  *
1763  * Arguments:   shost       - Host in question
1764  *              channel     - channel on which reset was observed
1765  *              target      - target on which reset was observed
1766  *
1767  * Returns:     Nothing
1768  *
1769  * Lock status: Host lock must be held
1770  *
1771  * Notes:       This only needs to be called if the reset is one which
1772  *              originates from an unknown location.  Resets originated
1773  *              by the mid-level itself don't need to call this, but there
1774  *              should be no harm.
1775  *
1776  *              The main purpose of this is to make sure that a CHECK_CONDITION
1777  *              is properly treated.
1778  */
1779 void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
1780 {
1781         struct scsi_device *sdev;
1782
1783         __shost_for_each_device(sdev, shost) {
1784                 if (channel == sdev->channel &&
1785                     target == sdev->id) {
1786                         sdev->was_reset = 1;
1787                         sdev->expecting_cc_ua = 1;
1788                 }
1789         }
1790 }
1791
1792 static void
1793 scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
1794 {
1795 }
1796
1797 /*
1798  * Function:    scsi_reset_provider
1799  *
1800  * Purpose:     Send requested reset to a bus or device at any phase.
1801  *
1802  * Arguments:   device  - device to send reset to
1803  *              flag - reset type (see scsi.h)
1804  *
1805  * Returns:     SUCCESS/FAILURE.
1806  *
1807  * Notes:       This is used by the SCSI Generic driver to provide
1808  *              Bus/Device reset capability.
1809  */
1810 int
1811 scsi_reset_provider(struct scsi_device *dev, int flag)
1812 {
1813         struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
1814         struct request req;
1815         int rtn;
1816
1817         scmd->request = &req;
1818         memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
1819         scmd->request->rq_status        = RQ_SCSI_BUSY;
1820         scmd->state                     = SCSI_STATE_INITIALIZING;
1821         scmd->owner                     = SCSI_OWNER_MIDLEVEL;
1822     
1823         memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
1824     
1825         scmd->scsi_done         = scsi_reset_provider_done_command;
1826         scmd->done                      = NULL;
1827         scmd->buffer                    = NULL;
1828         scmd->bufflen                   = 0;
1829         scmd->request_buffer            = NULL;
1830         scmd->request_bufflen           = 0;
1831         scmd->internal_timeout          = NORMAL_TIMEOUT;
1832         scmd->abort_reason              = DID_ABORT;
1833
1834         scmd->cmd_len                   = 0;
1835
1836         scmd->sc_data_direction         = DMA_BIDIRECTIONAL;
1837         scmd->sc_request                = NULL;
1838         scmd->sc_magic                  = SCSI_CMND_MAGIC;
1839
1840         init_timer(&scmd->eh_timeout);
1841
1842         /*
1843          * Sometimes the command can get back into the timer chain,
1844          * so use the pid as an identifier.
1845          */
1846         scmd->pid                       = 0;
1847
1848         switch (flag) {
1849         case SCSI_TRY_RESET_DEVICE:
1850                 rtn = scsi_try_bus_device_reset(scmd);
1851                 if (rtn == SUCCESS)
1852                         break;
1853                 /* FALLTHROUGH */
1854         case SCSI_TRY_RESET_BUS:
1855                 rtn = scsi_try_bus_reset(scmd);
1856                 if (rtn == SUCCESS)
1857                         break;
1858                 /* FALLTHROUGH */
1859         case SCSI_TRY_RESET_HOST:
1860                 rtn = scsi_try_host_reset(scmd);
1861                 break;
1862         default:
1863                 rtn = FAILED;
1864         }
1865
1866         scsi_delete_timer(scmd);
1867         scsi_next_command(scmd);
1868         return rtn;
1869 }