vserver 1.9.3

[linux-2.6.git] / drivers / ide / ide-io.c
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c

index ff5b16a..d20cd1e 100644 (file)
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -54,38 +54,77 @@
  #include <asm/io.h>
  #include <asm/bitops.h>
  
-/**
- *     ide_end_request         -       complete an IDE I/O
- *     @drive: IDE device for the I/O
- *     @uptodate: 
- *     @nr_sectors: number of sectors completed
- *
- *     This is our end_request wrapper function. We complete the I/O
- *     update random number input and dequeue the request, which if
- *     it was tagged may be out of order.
+static void ide_fill_flush_cmd(ide_drive_t *drive, struct request *rq)
+{
+       char *buf = rq->cmd;
+
+       /*
+        * reuse cdb space for ata command
+        */
+       memset(buf, 0, sizeof(rq->cmd));
+
+       rq->flags |= REQ_DRIVE_TASK | REQ_STARTED;
+       rq->buffer = buf;
+       rq->buffer[0] = WIN_FLUSH_CACHE;
+
+       if (ide_id_has_flush_cache_ext(drive->id) &&
+           (drive->capacity64 >= (1UL << 28)))
+               rq->buffer[0] = WIN_FLUSH_CACHE_EXT;
+}
+
+/*
+ * preempt pending requests, and store this cache flush for immediate
+ * execution
   */
- 
-int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
+static struct request *ide_queue_flush_cmd(ide_drive_t *drive,
+                                          struct request *rq, int post)
  {
-       struct request *rq;
-       unsigned long flags;
-       int ret = 1;
+       struct request *flush_rq = &HWGROUP(drive)->wrq;
  
-       spin_lock_irqsave(&ide_lock, flags);
-       rq = HWGROUP(drive)->rq;
+       /*
+        * write cache disabled, clear the barrier bit and treat it like
+        * an ordinary write
+        */
+       if (!drive->wcache) {
+               rq->flags |= REQ_BAR_PREFLUSH;
+               return rq;
+       }
  
-       BUG_ON(!(rq->flags & REQ_STARTED));
+       ide_init_drive_cmd(flush_rq);
+       ide_fill_flush_cmd(drive, flush_rq);
  
-       if (!nr_sectors)
-               nr_sectors = rq->hard_cur_sectors;
+       flush_rq->special = rq;
+       flush_rq->nr_sectors = rq->nr_sectors;
+
+       if (!post) {
+               drive->doing_barrier = 1;
+               flush_rq->flags |= REQ_BAR_PREFLUSH;
+               blkdev_dequeue_request(rq);
+       } else
+               flush_rq->flags |= REQ_BAR_POSTFLUSH;
+
+       __elv_add_request(drive->queue, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+       HWGROUP(drive)->rq = NULL;
+       return flush_rq;
+}
+
+static int __ide_end_request(ide_drive_t *drive, struct request *rq,
+                            int uptodate, int nr_sectors)
+{
+       int ret = 1;
+
+       BUG_ON(!(rq->flags & REQ_STARTED));
  
         /*
          * if failfast is set on a request, override number of sectors and
          * complete the whole request right now
          */
-       if (blk_noretry_request(rq) && !uptodate)
+       if (blk_noretry_request(rq) && end_io_error(uptodate))
                 nr_sectors = rq->hard_nr_sectors;
  
+       if (!blk_fs_request(rq) && end_io_error(uptodate) && !rq->errors)
+               rq->errors = -EIO;
+
         /*
          * decide whether to reenable DMA -- 3 is a random magic for now,
          * if we DMA timeout more than 3 times, just stay in PIO
@@ -97,15 +136,56 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
  
         if (!end_that_request_first(rq, uptodate, nr_sectors)) {
                 add_disk_randomness(rq->rq_disk);
+
+               if (blk_rq_tagged(rq))
+                       blk_queue_end_tag(drive->queue, rq);
+
                 blkdev_dequeue_request(rq);
                 HWGROUP(drive)->rq = NULL;
                 end_that_request_last(rq);
                 ret = 0;
         }
-       spin_unlock_irqrestore(&ide_lock, flags);
         return ret;
  }
  
+/**
+ *     ide_end_request         -       complete an IDE I/O
+ *     @drive: IDE device for the I/O
+ *     @uptodate:
+ *     @nr_sectors: number of sectors completed
+ *
+ *     This is our end_request wrapper function. We complete the I/O
+ *     update random number input and dequeue the request, which if
+ *     it was tagged may be out of order.
+ */
+
+int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors)
+{
+       struct request *rq;
+       unsigned long flags;
+       int ret = 1;
+
+       spin_lock_irqsave(&ide_lock, flags);
+       rq = HWGROUP(drive)->rq;
+
+       if (!nr_sectors)
+               nr_sectors = rq->hard_cur_sectors;
+
+       if (!blk_barrier_rq(rq) || !drive->wcache)
+               ret = __ide_end_request(drive, rq, uptodate, nr_sectors);
+       else {
+               struct request *flush_rq = &HWGROUP(drive)->wrq;
+
+               flush_rq->nr_sectors -= nr_sectors;
+               if (!flush_rq->nr_sectors) {
+                       ide_queue_flush_cmd(drive, rq, 1);
+                       ret = 0;
+               }
+       }
+
+       spin_unlock_irqrestore(&ide_lock, flags);
+       return ret;
+}
  EXPORT_SYMBOL(ide_end_request);
  
  /**
@@ -137,6 +217,113 @@ static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
         spin_unlock_irqrestore(&ide_lock, flags);
  }
  
+/*
+ * FIXME: probably move this somewhere else, name is bad too :)
+ */
+u64 ide_get_error_location(ide_drive_t *drive, char *args)
+{
+       u32 high, low;
+       u8 hcyl, lcyl, sect;
+       u64 sector;
+
+       high = 0;
+       hcyl = args[5];
+       lcyl = args[4];
+       sect = args[3];
+
+       if (ide_id_has_flush_cache_ext(drive->id)) {
+               low = (hcyl << 16) | (lcyl << 8) | sect;
+               HWIF(drive)->OUTB(drive->ctl|0x80, IDE_CONTROL_REG);
+               high = ide_read_24(drive);
+       } else {
+               u8 cur = HWIF(drive)->INB(IDE_SELECT_REG);
+               if (cur & 0x40)
+                       low = (hcyl << 16) | (lcyl << 8) | sect;
+               else {
+                       low = hcyl * drive->head * drive->sect;
+                       low += lcyl * drive->sect;
+                       low += sect - 1;
+               }
+       }
+
+       sector = ((u64) high << 24) | low;
+       return sector;
+}
+EXPORT_SYMBOL(ide_get_error_location);
+
+static void ide_complete_barrier(ide_drive_t *drive, struct request *rq,
+                                int error)
+{
+       struct request *real_rq = rq->special;
+       int good_sectors, bad_sectors;
+       sector_t sector;
+
+       if (!error) {
+               if (blk_barrier_postflush(rq)) {
+                       /*
+                        * this completes the barrier write
+                        */
+                       __ide_end_request(drive, real_rq, 1, real_rq->hard_nr_sectors);
+                       drive->doing_barrier = 0;
+               } else {
+                       /*
+                        * just indicate that we did the pre flush
+                        */
+                       real_rq->flags |= REQ_BAR_PREFLUSH;
+                       elv_requeue_request(drive->queue, real_rq);
+               }
+               /*
+                * all is fine, return
+                */
+               return;
+       }
+
+       /*
+        * we need to end real_rq, but it's not on the queue currently.
+        * put it back on the queue, so we don't have to special case
+        * anything else for completing it
+        */
+       if (!blk_barrier_postflush(rq))
+               elv_requeue_request(drive->queue, real_rq);
+
+       /*
+        * drive aborted flush command, assume FLUSH_CACHE_* doesn't
+        * work and disable barrier support
+        */
+       if (error & ABRT_ERR) {
+               printk(KERN_ERR "%s: barrier support doesn't work\n", drive->name);
+               __ide_end_request(drive, real_rq, -EOPNOTSUPP, real_rq->hard_nr_sectors);
+               blk_queue_ordered(drive->queue, 0);
+               blk_queue_issue_flush_fn(drive->queue, NULL);
+       } else {
+               /*
+                * find out what part of the request failed
+                */
+               good_sectors = 0;
+               if (blk_barrier_postflush(rq)) {
+                       sector = ide_get_error_location(drive, rq->buffer);
+
+                       if ((sector >= real_rq->hard_sector) &&
+                           (sector < real_rq->hard_sector + real_rq->hard_nr_sectors))
+                               good_sectors = sector - real_rq->hard_sector;
+               } else
+                       sector = real_rq->hard_sector;
+
+               bad_sectors = real_rq->hard_nr_sectors - good_sectors;
+               if (good_sectors)
+                       __ide_end_request(drive, real_rq, 1, good_sectors);
+               if (bad_sectors)
+                       __ide_end_request(drive, real_rq, 0, bad_sectors);
+
+               printk(KERN_ERR "%s: failed barrier write: "
+                               "sector=%Lx(good=%d/bad=%d)\n",
+                               drive->name, (unsigned long long)sector,
+                               good_sectors, bad_sectors);
+       }
+
+       drive->doing_barrier = 0;
+}
+
  /**
   *     ide_end_drive_cmd       -       end an explicit drive command
   *     @drive: command 
@@ -197,6 +384,8 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
                                 args->hobRegister[IDE_DATA_OFFSET]      = (data >> 8) & 0xFF;
                         }
                         args->tfRegister[IDE_ERROR_OFFSET]   = err;
+                       /* be sure we're looking at the low order bits */
+                       hwif->OUTB(drive->ctl & ~0x80, IDE_CONTROL_REG);
                         args->tfRegister[IDE_NSECTOR_OFFSET] = hwif->INB(IDE_NSECTOR_REG);
                         args->tfRegister[IDE_SECTOR_OFFSET]  = hwif->INB(IDE_SECTOR_REG);
                         args->tfRegister[IDE_LCYL_OFFSET]    = hwif->INB(IDE_LCYL_REG);
@@ -226,6 +415,10 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
  
         spin_lock_irqsave(&ide_lock, flags);
         blkdev_dequeue_request(rq);
+
+       if (blk_barrier_preflush(rq) || blk_barrier_postflush(rq))
+               ide_complete_barrier(drive, rq, err);
+
         HWGROUP(drive)->rq = NULL;
         end_that_request_last(rq);
         spin_unlock_irqrestore(&ide_lock, flags);
@@ -501,7 +694,9 @@ ide_startstop_t execute_drive_cmd (ide_drive_t *drive, struct request *rq)
   
                 if (!args)
                         goto done;
- 
+
+               hwif->data_phase = args->data_phase;
+
                 if (args->tf_out_flags.all != 0) 
                         return flagged_taskfile(drive, args);
                 return do_rw_taskfile(drive, args);
@@ -712,6 +907,22 @@ static inline ide_drive_t *choose_drive (ide_hwgroup_t *hwgroup)
  repeat:        
         best = NULL;
         drive = hwgroup->drive;
+
+       /*
+        * drive is doing pre-flush, ordered write, post-flush sequence. even
+        * though that is 3 requests, it must be seen as a single transaction.
+        * we must not preempt this drive until that is complete
+        */
+       if (drive->doing_barrier) {
+               /*
+                * small race where queue could get replugged during
+                * the 3-request flush cycle, just yank the plug since
+                * we want it to finish asap
+                */
+               blk_remove_plug(drive->queue);
+               return drive;
+       }
+
         do {
                 if ((!drive->sleep || time_after_eq(jiffies, drive->sleep))
                     && !elv_queue_empty(drive->queue)) {
@@ -867,6 +1078,13 @@ void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
                         break;
                 }
  
+               /*
+                * if rq is a barrier write, issue pre cache flush if not
+                * already done
+                */
+               if (blk_barrier_rq(rq) && !blk_barrier_preflush(rq))
+                       rq = ide_queue_flush_cmd(drive, rq, 0);
+
                 /*
                  * Sanity: don't accept a request that isn't a PM request
                  * if we are currently power managed. This is very important as
@@ -917,7 +1135,9 @@ EXPORT_SYMBOL(ide_do_request);
   */
  void do_ide_request(request_queue_t *q)
  {
-       ide_do_request(q->queuedata, IDE_NO_IRQ);
+       ide_drive_t *drive = q->queuedata;
+
+       ide_do_request(HWGROUP(drive), IDE_NO_IRQ);
  }
  
  /*
@@ -1286,6 +1506,7 @@ void ide_init_drive_cmd (struct request *rq)
  {
         memset(rq, 0, sizeof(*rq));
         rq->flags = REQ_DRIVE_CMD;
+       rq->ref_count = 1;
  }
  
  EXPORT_SYMBOL(ide_init_drive_cmd);