fedora core 6 1.2949 + vserver 2.2.0
[linux-2.6.git] / drivers / scsi / sd.c
index 8fd4590..b781a90 100644 (file)
@@ -32,7 +32,6 @@
  *     than the level indicated above to trigger output.       
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/genhd.h>
 #include <linux/hdreg.h>
 #include <linux/errno.h>
+#include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
-#include <linux/kref.h>
+#include <linux/delay.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_ioctl.h>
-#include <scsi/scsi_request.h>
 #include <scsi/scsicam.h>
 
 #include "scsi_logging.h"
 
-
 /*
- * Remaining dev_t-handling stuff
+ * More than enough for everybody ;)  The huge number of majors
+ * is a leftover from 16bit dev_t days, we don't really need that
+ * much numberspace.
  */
 #define SD_MAJORS      16
-#define SD_DISKS       32768   /* anything between 256 and 262144 */
+
+MODULE_AUTHOR("Eric Youngdale");
+MODULE_DESCRIPTION("SCSI disk (sd) driver");
+MODULE_LICENSE("GPL");
+
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK0_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK1_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK2_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK3_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK4_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK5_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK6_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK7_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK8_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK9_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK10_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK11_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK12_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK13_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK14_MAJOR);
+MODULE_ALIAS_BLOCKDEV_MAJOR(SCSI_DISK15_MAJOR);
+
+/*
+ * This is limited by the naming scheme enforced in sd_probe,
+ * add another character to it if you really need more disks.
+ */
+#define SD_MAX_DISKS   (((26 * 26) + 26 + 1) * 26)
 
 /*
  * Time out in seconds for disks and Magneto-opticals (which are slower).
  * Number of allowed retries
  */
 #define SD_MAX_RETRIES         5
+#define SD_PASSTHROUGH_RETRIES 1
 
-static void scsi_disk_release(struct kref *kref);
+/*
+ * Size of the initial data buffer for mode and read capacity data
+ */
+#define SD_BUF_SIZE            512
 
 struct scsi_disk {
        struct scsi_driver *driver;     /* always &sd_template */
        struct scsi_device *device;
-       struct kref     kref;
+       struct class_device cdev;
        struct gendisk  *disk;
        unsigned int    openers;        /* protected by BKL for now, yuck */
        sector_t        capacity;       /* size in 512-byte sectors */
@@ -94,16 +125,17 @@ struct scsi_disk {
        u8              write_prot;
        unsigned        WCE : 1;        /* state of disk WCE bit */
        unsigned        RCD : 1;        /* state of disk RCD bit, unused */
+       unsigned        DPOFUA : 1;     /* state of disk DPOFUA bit */
 };
+#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,cdev)
 
-
-static unsigned long sd_index_bits[SD_DISKS / BITS_PER_LONG];
-static spinlock_t sd_index_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_IDR(sd_index_idr);
+static DEFINE_SPINLOCK(sd_index_lock);
 
 /* This semaphore is used to mediate the 0->1 reference get in the
  * face of object destruction (i.e. we can't allow a get on an
  * object after last put) */
-static DECLARE_MUTEX(sd_ref_sem);
+static DEFINE_MUTEX(sd_ref_mutex);
 
 static int sd_revalidate_disk(struct gendisk *disk);
 static void sd_rw_intr(struct scsi_cmnd * SCpnt);
@@ -113,8 +145,122 @@ static int sd_remove(struct device *);
 static void sd_shutdown(struct device *dev);
 static void sd_rescan(struct device *);
 static int sd_init_command(struct scsi_cmnd *);
+static int sd_issue_flush(struct device *, sector_t *);
+static void sd_prepare_flush(request_queue_t *, struct request *);
 static void sd_read_capacity(struct scsi_disk *sdkp, char *diskname,
-                struct scsi_request *SRpnt, unsigned char *buffer);
+                            unsigned char *buffer);
+static void scsi_disk_release(struct class_device *cdev);
+
+static const char *sd_cache_types[] = {
+       "write through", "none", "write back",
+       "write back, no read (daft)"
+};
+
+static ssize_t sd_store_cache_type(struct class_device *cdev, const char *buf,
+                                  size_t count)
+{
+       int i, ct = -1, rcd, wce, sp;
+       struct scsi_disk *sdkp = to_scsi_disk(cdev);
+       struct scsi_device *sdp = sdkp->device;
+       char buffer[64];
+       char *buffer_data;
+       struct scsi_mode_data data;
+       struct scsi_sense_hdr sshdr;
+       int len;
+
+       if (sdp->type != TYPE_DISK)
+               /* no cache control on RBC devices; theoretically they
+                * can do it, but there's probably so many exceptions
+                * it's not worth the risk */
+               return -EINVAL;
+
+       for (i = 0; i < ARRAY_SIZE(sd_cache_types); i++) {
+               const int len = strlen(sd_cache_types[i]);
+               if (strncmp(sd_cache_types[i], buf, len) == 0 &&
+                   buf[len] == '\n') {
+                       ct = i;
+                       break;
+               }
+       }
+       if (ct < 0)
+               return -EINVAL;
+       rcd = ct & 0x01 ? 1 : 0;
+       wce = ct & 0x02 ? 1 : 0;
+       if (scsi_mode_sense(sdp, 0x08, 8, buffer, sizeof(buffer), SD_TIMEOUT,
+                           SD_MAX_RETRIES, &data, NULL))
+               return -EINVAL;
+       len = min_t(size_t, sizeof(buffer), data.length - data.header_length -
+                 data.block_descriptor_length);
+       buffer_data = buffer + data.header_length +
+               data.block_descriptor_length;
+       buffer_data[2] &= ~0x05;
+       buffer_data[2] |= wce << 2 | rcd;
+       sp = buffer_data[0] & 0x80 ? 1 : 0;
+
+       if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT,
+                            SD_MAX_RETRIES, &data, &sshdr)) {
+               if (scsi_sense_valid(&sshdr))
+                       scsi_print_sense_hdr(sdkp->disk->disk_name, &sshdr);
+               return -EINVAL;
+       }
+       sd_revalidate_disk(sdkp->disk);
+       return count;
+}
+
+static ssize_t sd_store_allow_restart(struct class_device *cdev, const char *buf,
+                                     size_t count)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(cdev);
+       struct scsi_device *sdp = sdkp->device;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+
+       if (sdp->type != TYPE_DISK)
+               return -EINVAL;
+
+       sdp->allow_restart = simple_strtoul(buf, NULL, 10);
+
+       return count;
+}
+
+static ssize_t sd_show_cache_type(struct class_device *cdev, char *buf)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(cdev);
+       int ct = sdkp->RCD + 2*sdkp->WCE;
+
+       return snprintf(buf, 40, "%s\n", sd_cache_types[ct]);
+}
+
+static ssize_t sd_show_fua(struct class_device *cdev, char *buf)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(cdev);
+
+       return snprintf(buf, 20, "%u\n", sdkp->DPOFUA);
+}
+
+static ssize_t sd_show_allow_restart(struct class_device *cdev, char *buf)
+{
+       struct scsi_disk *sdkp = to_scsi_disk(cdev);
+
+       return snprintf(buf, 40, "%d\n", sdkp->device->allow_restart);
+}
+
+static struct class_device_attribute sd_disk_attrs[] = {
+       __ATTR(cache_type, S_IRUGO|S_IWUSR, sd_show_cache_type,
+              sd_store_cache_type),
+       __ATTR(FUA, S_IRUGO, sd_show_fua, NULL),
+       __ATTR(allow_restart, S_IRUGO|S_IWUSR, sd_show_allow_restart,
+              sd_store_allow_restart),
+       __ATTR_NULL,
+};
+
+static struct class sd_disk_class = {
+       .name           = "scsi_disk",
+       .owner          = THIS_MODULE,
+       .release        = scsi_disk_release,
+       .class_dev_attrs = sd_disk_attrs,
+};
 
 static struct scsi_driver sd_template = {
        .owner                  = THIS_MODULE,
@@ -126,9 +272,11 @@ static struct scsi_driver sd_template = {
        },
        .rescan                 = sd_rescan,
        .init_command           = sd_init_command,
+       .issue_flush            = sd_issue_flush,
 };
 
-/* Device no to disk mapping:
+/*
+ * Device no to disk mapping:
  * 
  *       major         disc2     disc  p1
  *   |............|.............|....|....| <- dev_t
@@ -141,7 +289,6 @@ static struct scsi_driver sd_template = {
  * As we stay compatible with our numbering scheme, we can reuse 
  * the well-know SCSI majors 8, 65--71, 136--143.
  */
-
 static int sd_major(int major_idx)
 {
        switch (major_idx) {
@@ -157,51 +304,55 @@ static int sd_major(int major_idx)
        }
 }
 
-static unsigned int make_sd_dev(unsigned int sd_nr, unsigned int part)
+static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
 {
-       return  (part & 0xf) | ((sd_nr & 0xf) << 4) |
-               (sd_major((sd_nr & 0xf0) >> 4) << 20) | (sd_nr & 0xfff00);
+       return container_of(disk->private_data, struct scsi_disk, driver);
 }
 
-/* reverse mapping dev -> (sd_nr, part) not currently needed */
-
-#define to_scsi_disk(obj) container_of(obj,struct scsi_disk,kref)
-
-static inline struct scsi_disk *scsi_disk(struct gendisk *disk)
+static struct scsi_disk *__scsi_disk_get(struct gendisk *disk)
 {
-       return container_of(disk->private_data, struct scsi_disk, driver);
+       struct scsi_disk *sdkp = NULL;
+
+       if (disk->private_data) {
+               sdkp = scsi_disk(disk);
+               if (scsi_device_get(sdkp->device) == 0)
+                       class_device_get(&sdkp->cdev);
+               else
+                       sdkp = NULL;
+       }
+       return sdkp;
 }
 
 static struct scsi_disk *scsi_disk_get(struct gendisk *disk)
 {
-       struct scsi_disk *sdkp = NULL;
+       struct scsi_disk *sdkp;
 
-       down(&sd_ref_sem);
-       if (disk->private_data == NULL)
-               goto out;
-       sdkp = scsi_disk(disk);
-       if (!kref_get(&sdkp->kref))
-               goto out_sdkp;
-       if (scsi_device_get(sdkp->device))
-               goto out_put;
-       up(&sd_ref_sem);
+       mutex_lock(&sd_ref_mutex);
+       sdkp = __scsi_disk_get(disk);
+       mutex_unlock(&sd_ref_mutex);
        return sdkp;
+}
 
- out_put:
-       kref_put(&sdkp->kref);
- out_sdkp:
-       sdkp = NULL;
- out:
-       up(&sd_ref_sem);
+static struct scsi_disk *scsi_disk_get_from_dev(struct device *dev)
+{
+       struct scsi_disk *sdkp;
+
+       mutex_lock(&sd_ref_mutex);
+       sdkp = dev_get_drvdata(dev);
+       if (sdkp)
+               sdkp = __scsi_disk_get(sdkp->disk);
+       mutex_unlock(&sd_ref_mutex);
        return sdkp;
 }
 
 static void scsi_disk_put(struct scsi_disk *sdkp)
 {
-       down(&sd_ref_sem);
-       scsi_device_put(sdkp->device);
-       kref_put(&sdkp->kref);
-       up(&sd_ref_sem);
+       struct scsi_device *sdev = sdkp->device;
+
+       mutex_lock(&sd_ref_mutex);
+       class_device_put(&sdkp->cdev);
+       scsi_device_put(sdev);
+       mutex_unlock(&sd_ref_mutex);
 }
 
 /**
@@ -214,55 +365,21 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
  **/
 static int sd_init_command(struct scsi_cmnd * SCpnt)
 {
-       unsigned int this_count, timeout;
-       struct gendisk *disk;
-       sector_t block;
        struct scsi_device *sdp = SCpnt->device;
-
-       timeout = sdp->timeout;
-
-       /*
-        * these are already setup, just copy cdb basically
-        */
-       if (SCpnt->request->flags & REQ_BLOCK_PC) {
-               struct request *rq = SCpnt->request;
-
-               if (sizeof(rq->cmd) > sizeof(SCpnt->cmnd))
-                       return 0;
-
-               memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd));
-               if (rq_data_dir(rq) == WRITE)
-                       SCpnt->sc_data_direction = DMA_TO_DEVICE;
-               else if (rq->data_len)
-                       SCpnt->sc_data_direction = DMA_FROM_DEVICE;
-               else
-                       SCpnt->sc_data_direction = DMA_NONE;
-
-               this_count = rq->data_len;
-               if (rq->timeout)
-                       timeout = rq->timeout;
-
-               SCpnt->transfersize = rq->data_len;
-               goto queue;
-       }
-
-       /*
-        * we only do REQ_CMD and REQ_BLOCK_PC
-        */
-       if (!(SCpnt->request->flags & REQ_CMD))
-               return 0;
-
-       disk = SCpnt->request->rq_disk;
-       block = SCpnt->request->sector;
-       this_count = SCpnt->request_bufflen >> 9;
+       struct request *rq = SCpnt->request;
+       struct gendisk *disk = rq->rq_disk;
+       sector_t block = rq->sector;
+       unsigned int this_count = SCpnt->request_bufflen >> 9;
+       unsigned int timeout = sdp->timeout;
 
        SCSI_LOG_HLQUEUE(1, printk("sd_init_command: disk=%s, block=%llu, "
-                           "count=%d\n", disk->disk_name, (unsigned long long)block, this_count));
+                           "count=%d\n", disk->disk_name,
+                        (unsigned long long)block, this_count));
 
        if (!sdp || !scsi_device_online(sdp) ||
-           block + SCpnt->request->nr_sectors > get_capacity(disk)) {
+           block + rq->nr_sectors > get_capacity(disk)) {
                SCSI_LOG_HLQUEUE(2, printk("Finishing %ld sectors\n", 
-                                SCpnt->request->nr_sectors));
+                                rq->nr_sectors));
                SCSI_LOG_HLQUEUE(2, printk("Retry with 0x%p\n", SCpnt));
                return 0;
        }
@@ -290,7 +407,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
         * for this.
         */
        if (sdp->sector_size == 1024) {
-               if ((block & 1) || (SCpnt->request->nr_sectors & 1)) {
+               if ((block & 1) || (rq->nr_sectors & 1)) {
                        printk(KERN_ERR "sd: Bad block number requested");
                        return 0;
                } else {
@@ -299,7 +416,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                }
        }
        if (sdp->sector_size == 2048) {
-               if ((block & 3) || (SCpnt->request->nr_sectors & 3)) {
+               if ((block & 3) || (rq->nr_sectors & 3)) {
                        printk(KERN_ERR "sd: Bad block number requested");
                        return 0;
                } else {
@@ -308,7 +425,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                }
        }
        if (sdp->sector_size == 4096) {
-               if ((block & 7) || (SCpnt->request->nr_sectors & 7)) {
+               if ((block & 7) || (rq->nr_sectors & 7)) {
                        printk(KERN_ERR "sd: Bad block number requested");
                        return 0;
                } else {
@@ -316,30 +433,29 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                        this_count = this_count >> 3;
                }
        }
-       if (rq_data_dir(SCpnt->request) == WRITE) {
+       if (rq_data_dir(rq) == WRITE) {
                if (!sdp->writeable) {
                        return 0;
                }
                SCpnt->cmnd[0] = WRITE_6;
                SCpnt->sc_data_direction = DMA_TO_DEVICE;
-       } else if (rq_data_dir(SCpnt->request) == READ) {
+       } else if (rq_data_dir(rq) == READ) {
                SCpnt->cmnd[0] = READ_6;
                SCpnt->sc_data_direction = DMA_FROM_DEVICE;
        } else {
-               printk(KERN_ERR "sd: Unknown command %lx\n", 
-                      SCpnt->request->flags);
-/* overkill    panic("Unknown sd command %lx\n", SCpnt->request->flags); */
+               printk(KERN_ERR "sd: Unknown command %x\n", rq->cmd_flags);
                return 0;
        }
 
        SCSI_LOG_HLQUEUE(2, printk("%s : %s %d/%ld 512 byte blocks.\n", 
-               disk->disk_name, (rq_data_dir(SCpnt->request) == WRITE) ? 
-               "writing" : "reading", this_count, SCpnt->request->nr_sectors));
+               disk->disk_name, (rq_data_dir(rq) == WRITE) ? 
+               "writing" : "reading", this_count, rq->nr_sectors));
 
        SCpnt->cmnd[1] = 0;
        
        if (block > 0xffffffff) {
                SCpnt->cmnd[0] += READ_16 - READ_6;
+               SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0;
                SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
                SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
                SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
@@ -359,6 +475,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                        this_count = 0xffff;
 
                SCpnt->cmnd[0] += READ_10 - READ_6;
+               SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0;
                SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
                SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
                SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
@@ -367,8 +484,16 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
                SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
        } else {
-               if (this_count > 0xff)
-                       this_count = 0xff;
+               if (unlikely(blk_fua_rq(rq))) {
+                       /*
+                        * This happens only if this drive failed
+                        * 10byte rw command with ILLEGAL_REQUEST
+                        * during operation and thus turned off
+                        * use_10_for_rw.
+                        */
+                       printk(KERN_ERR "sd: FUA write on READ/WRITE(6) drive\n");
+                       return 0;
+               }
 
                SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f);
                SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff);
@@ -376,8 +501,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
                SCpnt->cmnd[4] = (unsigned char) this_count;
                SCpnt->cmnd[5] = 0;
        }
-       SCpnt->request_bufflen = SCpnt->bufflen =
-                       this_count * sdp->sector_size;
+       SCpnt->request_bufflen = this_count * sdp->sector_size;
 
        /*
         * We shouldn't disconnect in the middle of a sector, so with a dumb
@@ -386,8 +510,6 @@ static int sd_init_command(struct scsi_cmnd * SCpnt)
         */
        SCpnt->transfersize = sdp->sector_size;
        SCpnt->underflow = this_count << 9;
-
-queue:
        SCpnt->allowed = SD_MAX_RETRIES;
        SCpnt->timeout_per_command = timeout;
 
@@ -513,7 +635,7 @@ static int sd_release(struct inode *inode, struct file *filp)
        return 0;
 }
 
-static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *loc)
+static int sd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
        struct scsi_disk *sdkp = scsi_disk(bdev->bd_disk);
        struct scsi_device *sdp = sdkp->device;
@@ -531,15 +653,9 @@ static int sd_hdio_getgeo(struct block_device *bdev, struct hd_geometry __user *
        else
                scsicam_bios_param(bdev, sdkp->capacity, diskinfo);
 
-       if (put_user(diskinfo[0], &loc->heads))
-               return -EFAULT;
-       if (put_user(diskinfo[1], &loc->sectors))
-               return -EFAULT;
-       if (put_user(diskinfo[2], &loc->cylinders))
-               return -EFAULT;
-       if (put_user((unsigned)get_start_sect(bdev),
-                    (unsigned long __user *)&loc->start))
-               return -EFAULT;
+       geo->heads = diskinfo[0];
+       geo->sectors = diskinfo[1];
+       geo->cylinders = diskinfo[2];
        return 0;
 }
 
@@ -575,14 +691,9 @@ static int sd_ioctl(struct inode * inode, struct file * filp,
         * may try and take the device offline, in which case all further
         * access to the device is prohibited.
         */
-       if (!scsi_block_when_processing_errors(sdp))
-               return -ENODEV;
-
-       if (cmd == HDIO_GETGEO) {
-               if (!arg)
-                       return -EINVAL;
-               return sd_hdio_getgeo(bdev, p);
-       }
+       error = scsi_nonblockable_ioctl(sdp, cmd, p, filp);
+       if (!scsi_block_when_processing_errors(sdp) || !error)
+               return error;
 
        /*
         * Send SCSI addressing ioctls directly to mid level, send other
@@ -648,7 +759,7 @@ static int sd_media_changed(struct gendisk *disk)
         */
        retval = -ENODEV;
        if (scsi_block_when_processing_errors(sdp))
-               retval = scsi_ioctl(sdp, SCSI_IOCTL_TEST_UNIT_READY, NULL);
+               retval = scsi_test_unit_ready(sdp, SD_TIMEOUT, SD_MAX_RETRIES);
 
        /*
         * Unable to test, unit probably not ready.   This usually
@@ -676,17 +787,119 @@ not_present:
        return 1;
 }
 
+static int sd_sync_cache(struct scsi_device *sdp)
+{
+       int retries, res;
+       struct scsi_sense_hdr sshdr;
+
+       if (!scsi_device_online(sdp))
+               return -ENODEV;
+
+
+       for (retries = 3; retries > 0; --retries) {
+               unsigned char cmd[10] = { 0 };
+
+               cmd[0] = SYNCHRONIZE_CACHE;
+               /*
+                * Leave the rest of the command zero to indicate
+                * flush everything.
+                */
+               res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
+                                      SD_TIMEOUT, SD_MAX_RETRIES);
+               if (res == 0)
+                       break;
+       }
+
+       if (res) {              printk(KERN_WARNING "FAILED\n  status = %x, message = %02x, "
+                                   "host = %d, driver = %02x\n  ",
+                                   status_byte(res), msg_byte(res),
+                                   host_byte(res), driver_byte(res));
+                       if (driver_byte(res) & DRIVER_SENSE)
+                               scsi_print_sense_hdr("sd", &sshdr);
+       }
+
+       return res;
+}
+
+static int sd_issue_flush(struct device *dev, sector_t *error_sector)
+{
+       int ret = 0;
+       struct scsi_device *sdp = to_scsi_device(dev);
+       struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
+
+       if (!sdkp)
+               return -ENODEV;
+
+       if (sdkp->WCE)
+               ret = sd_sync_cache(sdp);
+       scsi_disk_put(sdkp);
+       return ret;
+}
+
+static void sd_prepare_flush(request_queue_t *q, struct request *rq)
+{
+       memset(rq->cmd, 0, sizeof(rq->cmd));
+       rq->cmd_type = REQ_TYPE_BLOCK_PC;
+       rq->timeout = SD_TIMEOUT;
+       rq->cmd[0] = SYNCHRONIZE_CACHE;
+       rq->cmd_len = 10;
+}
+
 static void sd_rescan(struct device *dev)
 {
-       struct scsi_disk *sdkp = dev_get_drvdata(dev);
-       sd_revalidate_disk(sdkp->disk);
+       struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
+
+       if (sdkp) {
+               sd_revalidate_disk(sdkp->disk);
+               scsi_disk_put(sdkp);
+       }
 }
 
+
+#ifdef CONFIG_COMPAT
+/* 
+ * This gets directly called from VFS. When the ioctl 
+ * is not recognized we go back to the other translation paths. 
+ */
+static long sd_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       struct block_device *bdev = file->f_path.dentry->d_inode->i_bdev;
+       struct gendisk *disk = bdev->bd_disk;
+       struct scsi_device *sdev = scsi_disk(disk)->device;
+
+       /*
+        * If we are in the middle of error recovery, don't let anyone
+        * else try and use this device.  Also, if error recovery fails, it
+        * may try and take the device offline, in which case all further
+        * access to the device is prohibited.
+        */
+       if (!scsi_block_when_processing_errors(sdev))
+               return -ENODEV;
+              
+       if (sdev->host->hostt->compat_ioctl) {
+               int ret;
+
+               ret = sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg);
+
+               return ret;
+       }
+
+       /* 
+        * Let the static ioctl translation table take care of it.
+        */
+       return -ENOIOCTLCMD; 
+}
+#endif
+
 static struct block_device_operations sd_fops = {
        .owner                  = THIS_MODULE,
        .open                   = sd_open,
        .release                = sd_release,
        .ioctl                  = sd_ioctl,
+       .getgeo                 = sd_getgeo,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl           = sd_compat_ioctl,
+#endif
        .media_changed          = sd_media_changed,
        .revalidate_disk        = sd_revalidate_disk,
 };
@@ -701,115 +914,108 @@ static struct block_device_operations sd_fops = {
 static void sd_rw_intr(struct scsi_cmnd * SCpnt)
 {
        int result = SCpnt->result;
-       int this_count = SCpnt->bufflen;
-       int good_bytes = (result == 0 ? this_count : 0);
-       sector_t block_sectors = 1;
-       sector_t error_sector;
+       unsigned int xfer_size = SCpnt->request_bufflen;
+       unsigned int good_bytes = result ? 0 : xfer_size;
+       u64 start_lba = SCpnt->request->sector;
+       u64 bad_lba;
+       struct scsi_sense_hdr sshdr;
+       int sense_valid = 0;
+       int sense_deferred = 0;
+       int info_valid;
+
+       if (result) {
+               sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
+               if (sense_valid)
+                       sense_deferred = scsi_sense_is_deferred(&sshdr);
+       }
 #ifdef CONFIG_SCSI_LOGGING
        SCSI_LOG_HLCOMPLETE(1, printk("sd_rw_intr: %s: res=0x%x\n", 
                                SCpnt->request->rq_disk->disk_name, result));
-       if (0 != result) {
-               SCSI_LOG_HLCOMPLETE(1, printk("sd_rw_intr: sb[0,2,asc,ascq]"
-                               "=%x,%x,%x,%x\n", SCpnt->sense_buffer[0],
-                       SCpnt->sense_buffer[2], SCpnt->sense_buffer[12],
-                       SCpnt->sense_buffer[13]));
+       if (sense_valid) {
+               SCSI_LOG_HLCOMPLETE(1, printk("sd_rw_intr: sb[respc,sk,asc,"
+                               "ascq]=%x,%x,%x,%x\n", sshdr.response_code,
+                               sshdr.sense_key, sshdr.asc, sshdr.ascq));
        }
 #endif
-       /*
-          Handle MEDIUM ERRORs that indicate partial success.  Since this is a
-          relatively rare error condition, no care is taken to avoid
-          unnecessary additional work such as memcpy's that could be avoided.
-        */
-
-       /* An error occurred */
-       if (driver_byte(result) != 0 &&         /* An error occurred */
-           (SCpnt->sense_buffer[0] & 0x7f) == 0x70) { /* Sense current */
-               switch (SCpnt->sense_buffer[2]) {
-               case MEDIUM_ERROR:
-                       if (!(SCpnt->sense_buffer[0] & 0x80))
-                               break;
-                       if (!blk_fs_request(SCpnt->request))
-                               break;
-                       error_sector = (SCpnt->sense_buffer[3] << 24) |
-                       (SCpnt->sense_buffer[4] << 16) |
-                       (SCpnt->sense_buffer[5] << 8) |
-                       SCpnt->sense_buffer[6];
-                       if (SCpnt->request->bio != NULL)
-                               block_sectors = bio_sectors(SCpnt->request->bio);
-                       switch (SCpnt->device->sector_size) {
-                       case 1024:
-                               error_sector <<= 1;
-                               if (block_sectors < 2)
-                                       block_sectors = 2;
-                               break;
-                       case 2048:
-                               error_sector <<= 2;
-                               if (block_sectors < 4)
-                                       block_sectors = 4;
-                               break;
-                       case 4096:
-                               error_sector <<=3;
-                               if (block_sectors < 8)
-                                       block_sectors = 8;
-                               break;
-                       case 256:
-                               error_sector >>= 1;
-                               break;
-                       default:
-                               break;
-                       }
+       if (driver_byte(result) != DRIVER_SENSE &&
+           (!sense_valid || sense_deferred))
+               goto out;
 
-                       error_sector &= ~(block_sectors - 1);
-                       good_bytes = (error_sector - SCpnt->request->sector) << 9;
-                       if (good_bytes < 0 || good_bytes >= this_count)
-                               good_bytes = 0;
+       switch (sshdr.sense_key) {
+       case HARDWARE_ERROR:
+       case MEDIUM_ERROR:
+               if (!blk_fs_request(SCpnt->request))
+                       goto out;
+               info_valid = scsi_get_sense_info_fld(SCpnt->sense_buffer,
+                                                    SCSI_SENSE_BUFFERSIZE,
+                                                    &bad_lba);
+               if (!info_valid)
+                       goto out;
+               if (xfer_size <= SCpnt->device->sector_size)
+                       goto out;
+               switch (SCpnt->device->sector_size) {
+               case 256:
+                       start_lba <<= 1;
                        break;
-
-               case RECOVERED_ERROR: /* an error occurred, but it recovered */
-               case NO_SENSE: /* LLDD got sense data */
-                       /*
-                        * Inform the user, but make sure that it's not treated
-                        * as a hard error.
-                        */
-                       scsi_print_sense("sd", SCpnt);
-                       SCpnt->result = 0;
-                       SCpnt->sense_buffer[0] = 0x0;
-                       good_bytes = this_count;
+               case 512:
                        break;
-
-               case ILLEGAL_REQUEST:
-                       if (SCpnt->device->use_10_for_rw &&
-                           (SCpnt->cmnd[0] == READ_10 ||
-                            SCpnt->cmnd[0] == WRITE_10))
-                               SCpnt->device->use_10_for_rw = 0;
-                       if (SCpnt->device->use_10_for_ms &&
-                           (SCpnt->cmnd[0] == MODE_SENSE_10 ||
-                            SCpnt->cmnd[0] == MODE_SELECT_10))
-                               SCpnt->device->use_10_for_ms = 0;
+               case 1024:
+                       start_lba >>= 1;
+                       break;
+               case 2048:
+                       start_lba >>= 2;
+                       break;
+               case 4096:
+                       start_lba >>= 3;
                        break;
-
                default:
+                       /* Print something here with limiting frequency. */
+                       goto out;
                        break;
                }
+               /* This computation should always be done in terms of
+                * the resolution of the device's medium.
+                */
+               good_bytes = (bad_lba - start_lba)*SCpnt->device->sector_size;
+               break;
+       case RECOVERED_ERROR:
+       case NO_SENSE:
+               /* Inform the user, but make sure that it's not treated
+                * as a hard error.
+                */
+               scsi_print_sense("sd", SCpnt);
+               SCpnt->result = 0;
+               memset(SCpnt->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+               good_bytes = xfer_size;
+               break;
+       case ILLEGAL_REQUEST:
+               if (SCpnt->device->use_10_for_rw &&
+                   (SCpnt->cmnd[0] == READ_10 ||
+                    SCpnt->cmnd[0] == WRITE_10))
+                       SCpnt->device->use_10_for_rw = 0;
+               if (SCpnt->device->use_10_for_ms &&
+                   (SCpnt->cmnd[0] == MODE_SENSE_10 ||
+                    SCpnt->cmnd[0] == MODE_SELECT_10))
+                       SCpnt->device->use_10_for_ms = 0;
+               break;
+       default:
+               break;
        }
-       /*
-        * This calls the generic completion function, now that we know
-        * how many actual sectors finished, and how many sectors we need
-        * to say have failed.
-        */
-       scsi_io_completion(SCpnt, good_bytes, block_sectors << 9);
+ out:
+       scsi_io_completion(SCpnt, good_bytes);
 }
 
-static int media_not_present(struct scsi_disk *sdkp, struct scsi_request *srp)
+static int media_not_present(struct scsi_disk *sdkp,
+                            struct scsi_sense_hdr *sshdr)
 {
-       if (!srp->sr_result)
+
+       if (!scsi_sense_valid(sshdr))
                return 0;
-       if (!(driver_byte(srp->sr_result) & DRIVER_SENSE))
+       /* not invoked for commands that could return deferred errors */
+       if (sshdr->sense_key != NOT_READY &&
+           sshdr->sense_key != UNIT_ATTENTION)
                return 0;
-       if (srp->sr_sense_buffer[2] != NOT_READY &&
-           srp->sr_sense_buffer[2] != UNIT_ATTENTION)
-               return 0;
-       if (srp->sr_sense_buffer[12] != 0x3A) /* medium not present */
+       if (sshdr->asc != 0x3A) /* medium not present */
                return 0;
 
        set_media_not_present(sdkp);
@@ -820,12 +1026,14 @@ static int media_not_present(struct scsi_disk *sdkp, struct scsi_request *srp)
  * spinup disk - called only in sd_revalidate_disk()
  */
 static void
-sd_spinup_disk(struct scsi_disk *sdkp, char *diskname,
-              struct scsi_request *SRpnt, unsigned char *buffer) {
+sd_spinup_disk(struct scsi_disk *sdkp, char *diskname)
+{
        unsigned char cmd[10];
-       unsigned long spintime_value = 0;
+       unsigned long spintime_expire = 0;
        int retries, spintime;
        unsigned int the_result;
+       struct scsi_sense_hdr sshdr;
+       int sense_valid = 0;
 
        spintime = 0;
 
@@ -838,34 +1046,33 @@ sd_spinup_disk(struct scsi_disk *sdkp, char *diskname,
                        cmd[0] = TEST_UNIT_READY;
                        memset((void *) &cmd[1], 0, 9);
 
-                       SRpnt->sr_cmd_len = 0;
-                       SRpnt->sr_sense_buffer[0] = 0;
-                       SRpnt->sr_sense_buffer[2] = 0;
-                       SRpnt->sr_data_direction = DMA_NONE;
+                       the_result = scsi_execute_req(sdkp->device, cmd,
+                                                     DMA_NONE, NULL, 0,
+                                                     &sshdr, SD_TIMEOUT,
+                                                     SD_MAX_RETRIES);
 
-                       scsi_wait_req (SRpnt, (void *) cmd, (void *) buffer,
-                                      0/*512*/, SD_TIMEOUT, SD_MAX_RETRIES);
+                       /*
+                        * If the drive has indicated to us that it
+                        * doesn't have any media in it, don't bother
+                        * with any more polling.
+                        */
+                       if (media_not_present(sdkp, &sshdr))
+                               return;
 
-                       the_result = SRpnt->sr_result;
+                       if (the_result)
+                               sense_valid = scsi_sense_valid(&sshdr);
                        retries++;
                } while (retries < 3 && 
                         (!scsi_status_is_good(the_result) ||
                          ((driver_byte(the_result) & DRIVER_SENSE) &&
-                          SRpnt->sr_sense_buffer[2] == UNIT_ATTENTION)));
-
-               /*
-                * If the drive has indicated to us that it doesn't have
-                * any media in it, don't bother with any of the rest of
-                * this crap.
-                */
-               if (media_not_present(sdkp, SRpnt))
-                       return;
+                         sense_valid && sshdr.sense_key == UNIT_ATTENTION)));
 
                if ((driver_byte(the_result) & DRIVER_SENSE) == 0) {
                        /* no sense, TUR either succeeded or failed
                         * with a status error */
                        if(!spintime && !scsi_status_is_good(the_result))
-                               printk(KERN_NOTICE "%s: Unit Not Ready, error = 0x%x\n", diskname, the_result);
+                               printk(KERN_NOTICE "%s: Unit Not Ready, "
+                                      "error = 0x%x\n", diskname, the_result);
                        break;
                }
                                        
@@ -880,16 +1087,15 @@ sd_spinup_disk(struct scsi_disk *sdkp, char *diskname,
                 * If manual intervention is required, or this is an
                 * absent USB storage device, a spinup is meaningless.
                 */
-               if (SRpnt->sr_sense_buffer[2] == NOT_READY &&
-                   SRpnt->sr_sense_buffer[12] == 4 /* not ready */ &&
-                   SRpnt->sr_sense_buffer[13] == 3) {
+               if (sense_valid &&
+                   sshdr.sense_key == NOT_READY &&
+                   sshdr.asc == 4 && sshdr.ascq == 3) {
                        break;          /* manual intervention required */
 
                /*
                 * Issue command to spin up drive when not ready
                 */
-               } else if (SRpnt->sr_sense_buffer[2] == NOT_READY) {
-                       unsigned long time1;
+               } else if (sense_valid && sshdr.sense_key == NOT_READY) {
                        if (!spintime) {
                                printk(KERN_NOTICE "%s: Spinning up disk...",
                                       diskname);
@@ -897,36 +1103,42 @@ sd_spinup_disk(struct scsi_disk *sdkp, char *diskname,
                                cmd[1] = 1;     /* Return immediately */
                                memset((void *) &cmd[2], 0, 8);
                                cmd[4] = 1;     /* Start spin cycle */
-                               SRpnt->sr_cmd_len = 0;
-                               SRpnt->sr_sense_buffer[0] = 0;
-                               SRpnt->sr_sense_buffer[2] = 0;
-
-                               SRpnt->sr_data_direction = DMA_NONE;
-                               scsi_wait_req(SRpnt, (void *)cmd, 
-                                             (void *) buffer, 0/*512*/, 
-                                             SD_TIMEOUT, SD_MAX_RETRIES);
-                               spintime_value = jiffies;
+                               scsi_execute_req(sdkp->device, cmd, DMA_NONE,
+                                                NULL, 0, &sshdr,
+                                                SD_TIMEOUT, SD_MAX_RETRIES);
+                               spintime_expire = jiffies + 100 * HZ;
+                               spintime = 1;
                        }
-                       spintime = 1;
-                       time1 = HZ;
                        /* Wait 1 second for next try */
-                       do {
-                               current->state = TASK_UNINTERRUPTIBLE;
-                               time1 = schedule_timeout(time1);
-                       } while(time1);
+                       msleep(1000);
                        printk(".");
+
+               /*
+                * Wait for USB flash devices with slow firmware.
+                * Yes, this sense key/ASC combination shouldn't
+                * occur here.  It's characteristic of these devices.
+                */
+               } else if (sense_valid &&
+                               sshdr.sense_key == UNIT_ATTENTION &&
+                               sshdr.asc == 0x28) {
+                       if (!spintime) {
+                               spintime_expire = jiffies + 5 * HZ;
+                               spintime = 1;
+                       }
+                       /* Wait 1 second for next try */
+                       msleep(1000);
                } else {
                        /* we don't understand the sense code, so it's
                         * probably pointless to loop */
                        if(!spintime) {
-                               printk(KERN_NOTICE "%s: Unit Not Ready, sense:\n", diskname);
-                               scsi_print_req_sense("", SRpnt);
+                               printk(KERN_NOTICE "%s: Unit Not Ready, "
+                                       "sense:\n", diskname);
+                               scsi_print_sense_hdr("", &sshdr);
                        }
                        break;
                }
                                
-       } while (spintime &&
-                time_after(spintime_value + 100 * HZ, jiffies));
+       } while (spintime && time_before_eq(jiffies, spintime_expire));
 
        if (spintime) {
                if (scsi_status_is_good(the_result))
@@ -941,12 +1153,15 @@ sd_spinup_disk(struct scsi_disk *sdkp, char *diskname,
  */
 static void
 sd_read_capacity(struct scsi_disk *sdkp, char *diskname,
-                struct scsi_request *SRpnt, unsigned char *buffer) {
+                unsigned char *buffer)
+{
        unsigned char cmd[16];
-       struct scsi_device *sdp = sdkp->device;
        int the_result, retries;
        int sector_size = 0;
        int longrc = 0;
+       struct scsi_sense_hdr sshdr;
+       int sense_valid = 0;
+       struct scsi_device *sdp = sdkp->device;
 
 repeat:
        retries = 3;
@@ -963,18 +1178,15 @@ repeat:
                        memset((void *) buffer, 0, 8);
                }
                
-               SRpnt->sr_cmd_len = 0;
-               SRpnt->sr_sense_buffer[0] = 0;
-               SRpnt->sr_sense_buffer[2] = 0;
-               SRpnt->sr_data_direction = DMA_FROM_DEVICE;
-
-               scsi_wait_req(SRpnt, (void *) cmd, (void *) buffer,
-                             longrc ? 12 : 8, SD_TIMEOUT, SD_MAX_RETRIES);
+               the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
+                                             buffer, longrc ? 12 : 8, &sshdr,
+                                             SD_TIMEOUT, SD_MAX_RETRIES);
 
-               if (media_not_present(sdkp, SRpnt))
+               if (media_not_present(sdkp, &sshdr))
                        return;
 
-               the_result = SRpnt->sr_result;
+               if (the_result)
+                       sense_valid = scsi_sense_valid(&sshdr);
                retries--;
 
        } while (the_result && retries);
@@ -989,20 +1201,20 @@ repeat:
                       driver_byte(the_result));
 
                if (driver_byte(the_result) & DRIVER_SENSE)
-                       scsi_print_req_sense("sd", SRpnt);
+                       scsi_print_sense_hdr("sd", &sshdr);
                else
                        printk("%s : sense not available. \n", diskname);
 
                /* Set dirty bit for removable devices if not ready -
                 * sometimes drives will not report this properly. */
                if (sdp->removable &&
-                   SRpnt->sr_sense_buffer[2] == NOT_READY)
+                   sense_valid && sshdr.sense_key == NOT_READY)
                        sdp->changed = 1;
 
                /* Either no media are present but the drive didn't tell us,
                   or they are present but the read capacity command fails */
                /* sdkp->media_present = 0; -- not always correct */
-               sdkp->capacity = 0x200000; /* 1 GB - random */
+               sdkp->capacity = 0; /* unknown mapped to zero - as usual */
 
                return;
        } else if (the_result && longrc) {
@@ -1031,9 +1243,12 @@ repeat:
                                       " READ CAPACITY(16).\n", diskname);
                                longrc = 1;
                                goto repeat;
-                       } else {
-                               printk(KERN_ERR "%s: too big for kernel.  Assuming maximum 2Tb\n", diskname);
                        }
+                       printk(KERN_ERR "%s: too big for this kernel.  Use a "
+                              "kernel compiled with support for large block "
+                              "devices.\n", diskname);
+                       sdkp->capacity = 0;
+                       goto got_data;
                }
                sdkp->capacity = 1 + (((sector_t)buffer[0] << 24) |
                        (buffer[1] << 16) |
@@ -1053,6 +1268,11 @@ repeat:
                        (buffer[9] << 16) | (buffer[10] << 8) | buffer[11];
        }       
 
+       /* Some devices return the total number of sectors, not the
+        * highest sector number.  Make the necessary adjustment. */
+       if (sdp->fix_capacity)
+               --sdkp->capacity;
+
 got_data:
        if (sector_size == 0) {
                sector_size = 512;
@@ -1074,6 +1294,13 @@ got_data:
                 * For this reason, we leave the thing in the table.
                 */
                sdkp->capacity = 0;
+               /*
+                * set a bogus sector size so the normal read/write
+                * logic in the block layer will eventually refuse any
+                * request on this device without tripping over power
+                * of two sector size assumptions
+                */
+               sector_size = 512;
        }
        {
                /*
@@ -1082,14 +1309,13 @@ got_data:
                 * Jacques Gelinas (Jacques@solucorp.qc.ca)
                 */
                int hard_sector = sector_size;
-               sector_t sz = sdkp->capacity * (hard_sector/256);
+               sector_t sz = (sdkp->capacity/2) * (hard_sector/256);
                request_queue_t *queue = sdp->request_queue;
-               sector_t mb;
+               sector_t mb = sz;
 
                blk_queue_hardsect_size(queue, hard_sector);
                /* avoid 64-bit division on 32-bit platforms */
-               mb = sz >> 1;
-               sector_div(sz, 1250);
+               sector_div(sz, 625);
                mb -= sz - 974;
                sector_div(mb, 1950);
 
@@ -1114,38 +1340,42 @@ got_data:
 
 /* called with buffer of length 512 */
 static inline int
-sd_do_mode_sense(struct scsi_request *SRpnt, int dbd, int modepage,
-                unsigned char *buffer, int len, struct scsi_mode_data *data)
+sd_do_mode_sense(struct scsi_device *sdp, int dbd, int modepage,
+                unsigned char *buffer, int len, struct scsi_mode_data *data,
+                struct scsi_sense_hdr *sshdr)
 {
-       return __scsi_mode_sense(SRpnt, dbd, modepage, buffer, len,
-                                SD_TIMEOUT, SD_MAX_RETRIES, data);
+       return scsi_mode_sense(sdp, dbd, modepage, buffer, len,
+                              SD_TIMEOUT, SD_MAX_RETRIES, data,
+                              sshdr);
 }
 
 /*
  * read write protect setting, if possible - called only in sd_revalidate_disk()
- * called with buffer of length 512
+ * called with buffer of length SD_BUF_SIZE
  */
 static void
 sd_read_write_protect_flag(struct scsi_disk *sdkp, char *diskname,
-                  struct scsi_request *SRpnt, unsigned char *buffer) {
+                          unsigned char *buffer)
+{
        int res;
+       struct scsi_device *sdp = sdkp->device;
        struct scsi_mode_data data;
 
        set_disk_ro(sdkp->disk, 0);
-       if (sdkp->device->skip_ms_page_3f) {
+       if (sdp->skip_ms_page_3f) {
                printk(KERN_NOTICE "%s: assuming Write Enabled\n", diskname);
                return;
        }
 
-       if (sdkp->device->use_192_bytes_for_3f) {
-               res = sd_do_mode_sense(SRpnt, 0, 0x3F, buffer, 192, &data);
+       if (sdp->use_192_bytes_for_3f) {
+               res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 192, &data, NULL);
        } else {
                /*
                 * First attempt: ask for all pages (0x3F), but only 4 bytes.
                 * We have to start carefully: some devices hang if we ask
                 * for more than is available.
                 */
-               res = sd_do_mode_sense(SRpnt, 0, 0x3F, buffer, 4, &data);
+               res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 4, &data, NULL);
 
                /*
                 * Second attempt: ask for page 0 When only page 0 is
@@ -1154,14 +1384,14 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, char *diskname,
                 * CDB.
                 */
                if (!scsi_status_is_good(res))
-                       res = sd_do_mode_sense(SRpnt, 0, 0, buffer, 4, &data);
+                       res = sd_do_mode_sense(sdp, 0, 0, buffer, 4, &data, NULL);
 
                /*
                 * Third attempt: ask 255 bytes, as we did earlier.
                 */
                if (!scsi_status_is_good(res))
-                       res = sd_do_mode_sense(SRpnt, 0, 0x3F, buffer, 255,
-                                              &data);
+                       res = sd_do_mode_sense(sdp, 0, 0x3F, buffer, 255,
+                                              &data, NULL);
        }
 
        if (!scsi_status_is_good(res)) {
@@ -1179,26 +1409,43 @@ sd_read_write_protect_flag(struct scsi_disk *sdkp, char *diskname,
 
 /*
  * sd_read_cache_type - called only from sd_revalidate_disk()
- * called with buffer of length 512
+ * called with buffer of length SD_BUF_SIZE
  */
 static void
 sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
-                  struct scsi_request *SRpnt, unsigned char *buffer) {
+                  unsigned char *buffer)
+{
        int len = 0, res;
+       struct scsi_device *sdp = sdkp->device;
 
-       const int dbd = 0;         /* DBD */
-       const int modepage = 0x08; /* current values, cache page */
+       int dbd;
+       int modepage;
        struct scsi_mode_data data;
+       struct scsi_sense_hdr sshdr;
 
-       if (sdkp->device->skip_ms_page_8)
+       if (sdp->skip_ms_page_8)
                goto defaults;
 
+       if (sdp->type == TYPE_RBC) {
+               modepage = 6;
+               dbd = 8;
+       } else {
+               modepage = 8;
+               dbd = 0;
+       }
+
        /* cautiously ask */
-       res = sd_do_mode_sense(SRpnt, dbd, modepage, buffer, 4, &data);
+       res = sd_do_mode_sense(sdp, dbd, modepage, buffer, 4, &data, &sshdr);
 
        if (!scsi_status_is_good(res))
                goto bad_sense;
 
+       if (!data.header_length) {
+               modepage = 6;
+               printk(KERN_ERR "%s: missing header in MODE_SENSE response\n",
+                      diskname);
+       }
+
        /* that went OK, now ask for the proper length */
        len = data.length;
 
@@ -1213,48 +1460,68 @@ sd_read_cache_type(struct scsi_disk *sdkp, char *diskname,
 
        /* Take headers and block descriptors into account */
        len += data.header_length + data.block_descriptor_length;
+       if (len > SD_BUF_SIZE)
+               goto bad_sense;
 
        /* Get the data */
-       res = sd_do_mode_sense(SRpnt, dbd, modepage, buffer, len, &data);
+       res = sd_do_mode_sense(sdp, dbd, modepage, buffer, len, &data, &sshdr);
 
        if (scsi_status_is_good(res)) {
-               const char *types[] = {
-                       "write through", "none", "write back",
-                       "write back, no read (daft)"
-               };
-               int ct = 0;
-               int offset = data.header_length +
-                       data.block_descriptor_length + 2;
+               int offset = data.header_length + data.block_descriptor_length;
 
-               sdkp->WCE = ((buffer[offset] & 0x04) != 0);
-               sdkp->RCD = ((buffer[offset] & 0x01) != 0);
+               if (offset >= SD_BUF_SIZE - 2) {
+                       printk(KERN_ERR "%s: malformed MODE SENSE response",
+                               diskname);
+                       goto defaults;
+               }
+
+               if ((buffer[offset] & 0x3f) != modepage) {
+                       printk(KERN_ERR "%s: got wrong page\n", diskname);
+                       goto defaults;
+               }
 
-               ct =  sdkp->RCD + 2*sdkp->WCE;
+               if (modepage == 8) {
+                       sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
+                       sdkp->RCD = ((buffer[offset + 2] & 0x01) != 0);
+               } else {
+                       sdkp->WCE = ((buffer[offset + 2] & 0x01) == 0);
+                       sdkp->RCD = 0;
+               }
+
+               sdkp->DPOFUA = (data.device_specific & 0x10) != 0;
+               if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) {
+                       printk(KERN_NOTICE "SCSI device %s: uses "
+                              "READ/WRITE(6), disabling FUA\n", diskname);
+                       sdkp->DPOFUA = 0;
+               }
 
-               printk(KERN_NOTICE "SCSI device %s: drive cache: %s\n",
-                      diskname, types[ct]);
+               printk(KERN_NOTICE "SCSI device %s: "
+                      "write cache: %s, read cache: %s, %s\n",
+                      diskname,
+                      sdkp->WCE ? "enabled" : "disabled",
+                      sdkp->RCD ? "disabled" : "enabled",
+                      sdkp->DPOFUA ? "supports DPO and FUA"
+                      : "doesn't support DPO or FUA");
 
                return;
        }
 
 bad_sense:
-       if ((SRpnt->sr_sense_buffer[0] & 0x70) == 0x70
-            && (SRpnt->sr_sense_buffer[2] & 0x0f) == ILLEGAL_REQUEST
-            /* ASC 0x24 ASCQ 0x00: Invalid field in CDB */
-            && SRpnt->sr_sense_buffer[12] == 0x24
-            && SRpnt->sr_sense_buffer[13] == 0x00) {
+       if (scsi_sense_valid(&sshdr) &&
+           sshdr.sense_key == ILLEGAL_REQUEST &&
+           sshdr.asc == 0x24 && sshdr.ascq == 0x0)
                printk(KERN_NOTICE "%s: cache data unavailable\n",
-                      diskname);
-       } else {
+                      diskname);       /* Invalid field in CDB */
+       else
                printk(KERN_ERR "%s: asking for cache data failed\n",
                       diskname);
-       }
 
 defaults:
        printk(KERN_ERR "%s: assuming drive cache: write through\n",
               diskname);
        sdkp->WCE = 0;
        sdkp->RCD = 0;
+       sdkp->DPOFUA = 0;
 }
 
 /**
@@ -1266,8 +1533,8 @@ static int sd_revalidate_disk(struct gendisk *disk)
 {
        struct scsi_disk *sdkp = scsi_disk(disk);
        struct scsi_device *sdp = sdkp->device;
-       struct scsi_request *sreq;
        unsigned char *buffer;
+       unsigned ordered;
 
        SCSI_LOG_HLQUEUE(3, printk("sd_revalidate_disk: disk=%s\n", disk->disk_name));
 
@@ -1278,18 +1545,11 @@ static int sd_revalidate_disk(struct gendisk *disk)
        if (!scsi_device_online(sdp))
                goto out;
 
-       sreq = scsi_allocate_request(sdp, GFP_KERNEL);
-       if (!sreq) {
-               printk(KERN_WARNING "(sd_revalidate_disk:) Request allocation "
-                      "failure.\n");
-               goto out;
-       }
-
-       buffer = kmalloc(512, GFP_KERNEL | __GFP_DMA);
+       buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA);
        if (!buffer) {
                printk(KERN_WARNING "(sd_revalidate_disk:) Memory allocation "
                       "failure.\n");
-               goto out_release_request;
+               goto out;
        }
 
        /* defaults, until the device tells us otherwise */
@@ -1300,25 +1560,35 @@ static int sd_revalidate_disk(struct gendisk *disk)
        sdkp->WCE = 0;
        sdkp->RCD = 0;
 
-       sd_spinup_disk(sdkp, disk->disk_name, sreq, buffer);
+       sd_spinup_disk(sdkp, disk->disk_name);
 
        /*
         * Without media there is no reason to ask; moreover, some devices
         * react badly if we do.
         */
        if (sdkp->media_present) {
-               sd_read_capacity(sdkp, disk->disk_name, sreq, buffer);
-               if (sdp->removable)
-                       sd_read_write_protect_flag(sdkp, disk->disk_name,
-                                       sreq, buffer);
-               sd_read_cache_type(sdkp, disk->disk_name, sreq, buffer);
+               sd_read_capacity(sdkp, disk->disk_name, buffer);
+               sd_read_write_protect_flag(sdkp, disk->disk_name, buffer);
+               sd_read_cache_type(sdkp, disk->disk_name, buffer);
        }
-               
+
+       /*
+        * We now have all cache related info, determine how we deal
+        * with ordered requests.  Note that as the current SCSI
+        * dispatch function can alter request order, we cannot use
+        * QUEUE_ORDERED_TAG_* even when ordered tag is supported.
+        */
+       if (sdkp->WCE)
+               ordered = sdkp->DPOFUA
+                       ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH;
+       else
+               ordered = QUEUE_ORDERED_DRAIN;
+
+       blk_queue_ordered(sdkp->disk->queue, ordered, sd_prepare_flush);
+
        set_capacity(disk, sdkp->capacity);
        kfree(buffer);
 
- out_release_request: 
-       scsi_release_request(sreq);
  out:
        return 0;
 }
@@ -1347,42 +1617,35 @@ static int sd_probe(struct device *dev)
        struct scsi_disk *sdkp;
        struct gendisk *gd;
        u32 index;
-       int error, devno;
+       int error;
 
        error = -ENODEV;
-       if ((sdp->type != TYPE_DISK) && (sdp->type != TYPE_MOD))
+       if (sdp->type != TYPE_DISK && sdp->type != TYPE_MOD && sdp->type != TYPE_RBC)
                goto out;
 
-       SCSI_LOG_HLQUEUE(3, printk("sd_attach: scsi device: <%d,%d,%d,%d>\n", 
-                        sdp->host->host_no, sdp->channel, sdp->id, sdp->lun));
+       SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
+                                       "sd_attach\n"));
 
        error = -ENOMEM;
-       sdkp = kmalloc(sizeof(*sdkp), GFP_KERNEL);
+       sdkp = kzalloc(sizeof(*sdkp), GFP_KERNEL);
        if (!sdkp)
                goto out;
 
-       memset (sdkp, 0, sizeof(*sdkp));
-       kref_init(&sdkp->kref, scsi_disk_release);
-
-       /* Note: We can accomodate 64 partitions, but the genhd code
-        * assumes partitions allocate consecutive minors, which they don't.
-        * So for now stay with max 16 partitions and leave two spare bits. 
-        * Later, we may change the genhd code and the alloc_disk() call
-        * and the ->minors assignment here.    KG, 2004-02-10
-        */ 
        gd = alloc_disk(16);
        if (!gd)
                goto out_free;
 
+       if (!idr_pre_get(&sd_index_idr, GFP_KERNEL))
+               goto out_put;
+
        spin_lock(&sd_index_lock);
-       index = find_first_zero_bit(sd_index_bits, SD_DISKS);
-       if (index == SD_DISKS) {
-               spin_unlock(&sd_index_lock);
+       error = idr_get_new(&sd_index_idr, NULL, &index);
+       spin_unlock(&sd_index_lock);
+
+       if (index >= SD_MAX_DISKS)
                error = -EBUSY;
+       if (error)
                goto out_put;
-       }
-       __set_bit(index, sd_index_bits);
-       spin_unlock(&sd_index_lock);
 
        sdkp->device = sdp;
        sdkp->driver = &sd_template;
@@ -1391,21 +1654,30 @@ static int sd_probe(struct device *dev)
        sdkp->openers = 0;
 
        if (!sdp->timeout) {
-               if (sdp->type == TYPE_DISK)
+               if (sdp->type != TYPE_MOD)
                        sdp->timeout = SD_TIMEOUT;
                else
                        sdp->timeout = SD_MOD_TIMEOUT;
        }
 
-       devno = make_sd_dev(index, 0);
-       gd->major = MAJOR(devno);
-       gd->first_minor = MINOR(devno);
+       class_device_initialize(&sdkp->cdev);
+       sdkp->cdev.dev = &sdp->sdev_gendev;
+       sdkp->cdev.class = &sd_disk_class;
+       strncpy(sdkp->cdev.class_id, sdp->sdev_gendev.bus_id, BUS_ID_SIZE);
+
+       if (class_device_add(&sdkp->cdev))
+               goto out_put;
+
+       get_device(&sdp->sdev_gendev);
+
+       gd->major = sd_major((index & 0xf0) >> 4);
+       gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00);
        gd->minors = 16;
        gd->fops = &sd_fops;
 
        if (index < 26) {
                sprintf(gd->disk_name, "sd%c", 'a' + index % 26);
-       } else if (index < (26*27)) {
+       } else if (index < (26 + 1) * 26) {
                sprintf(gd->disk_name, "sd%c%c",
                        'a' + index / 26 - 1,'a' + index % 26);
        } else {
@@ -1416,9 +1688,8 @@ static int sd_probe(struct device *dev)
                        'a' + m1, 'a' + m2, 'a' + m3);
        }
 
-       strcpy(gd->devfs_name, sdp->devfs_name);
-
        gd->private_data = &sdkp->driver;
+       gd->queue = sdkp->device->request_queue;
 
        sd_revalidate_disk(gd);
 
@@ -1426,23 +1697,20 @@ static int sd_probe(struct device *dev)
        gd->flags = GENHD_FL_DRIVERFS;
        if (sdp->removable)
                gd->flags |= GENHD_FL_REMOVABLE;
-       gd->queue = sdkp->device->request_queue;
 
        dev_set_drvdata(dev, sdkp);
        add_disk(gd);
 
-       printk(KERN_NOTICE "Attached scsi %sdisk %s at scsi%d, channel %d, "
-              "id %d, lun %d\n", sdp->removable ? "removable " : "",
-              gd->disk_name, sdp->host->host_no, sdp->channel,
-              sdp->id, sdp->lun);
+       sdev_printk(KERN_NOTICE, sdp, "Attached scsi %sdisk %s\n",
+                   sdp->removable ? "removable " : "", gd->disk_name);
 
        return 0;
 
-out_put:
+ out_put:
        put_disk(gd);
-out_free:
+ out_free:
        kfree(sdkp);
-out:
+ out:
        return error;
 }
 
@@ -1461,36 +1729,39 @@ static int sd_remove(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
 
+       class_device_del(&sdkp->cdev);
        del_gendisk(sdkp->disk);
        sd_shutdown(dev);
-       down(&sd_ref_sem);
-       kref_put(&sdkp->kref);
-       up(&sd_ref_sem);
+
+       mutex_lock(&sd_ref_mutex);
+       dev_set_drvdata(dev, NULL);
+       class_device_put(&sdkp->cdev);
+       mutex_unlock(&sd_ref_mutex);
 
        return 0;
 }
 
 /**
  *     scsi_disk_release - Called to free the scsi_disk structure
- *     @kref: pointer to embedded kref
+ *     @cdev: pointer to embedded class device
  *
- *     sd_ref_sem must be held entering this routine.  Because it is
+ *     sd_ref_mutex must be held entering this routine.  Because it is
  *     called on last put, you should always use the scsi_disk_get()
  *     scsi_disk_put() helpers which manipulate the semaphore directly
- *     and never do a direct kref_put().
+ *     and never do a direct class_device_put().
  **/
-static void scsi_disk_release(struct kref *kref)
+static void scsi_disk_release(struct class_device *cdev)
 {
-       struct scsi_disk *sdkp = to_scsi_disk(kref);
+       struct scsi_disk *sdkp = to_scsi_disk(cdev);
        struct gendisk *disk = sdkp->disk;
        
        spin_lock(&sd_index_lock);
-       clear_bit(sdkp->index, sd_index_bits);
+       idr_remove(&sd_index_idr, sdkp->index);
        spin_unlock(&sd_index_lock);
 
        disk->private_data = NULL;
-
        put_disk(disk);
+       put_device(&sdkp->device->sdev_gendev);
 
        kfree(sdkp);
 }
@@ -1503,53 +1774,18 @@ static void scsi_disk_release(struct kref *kref)
 static void sd_shutdown(struct device *dev)
 {
        struct scsi_device *sdp = to_scsi_device(dev);
-       struct scsi_disk *sdkp;
-       struct scsi_request *sreq;
-       int retries, res;
+       struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
 
-       sdkp = dev_get_drvdata(dev);
        if (!sdkp)
-               return;         /* this can happen */
-
-       if (!scsi_device_online(sdp) || !sdkp->WCE)
-               return;
-
-       printk(KERN_NOTICE "Synchronizing SCSI cache for disk %s: ",
-                       sdkp->disk->disk_name);
+               return;         /* this can happen */
 
-       sreq = scsi_allocate_request(sdp, GFP_KERNEL);
-       if (!sreq) {
-               printk("FAILED\n  No memory for request\n");
-               return;
+       if (sdkp->WCE) {
+               printk(KERN_NOTICE "Synchronizing SCSI cache for disk %s: \n",
+                               sdkp->disk->disk_name);
+               sd_sync_cache(sdp);
        }
-
-       sreq->sr_data_direction = DMA_NONE;
-       for (retries = 3; retries > 0; --retries) {
-               unsigned char cmd[10] = { 0 };
-
-               cmd[0] = SYNCHRONIZE_CACHE;
-               /*
-                * Leave the rest of the command zero to indicate
-                * flush everything.
-                */
-               scsi_wait_req(sreq, cmd, NULL, 0, SD_TIMEOUT, SD_MAX_RETRIES);
-               if (sreq->sr_result == 0)
-                       break;
-       }
-
-       res = sreq->sr_result;
-       if (res) {
-               printk(KERN_WARNING "FAILED\n  status = %x, message = %02x, "
-                                   "host = %d, driver = %02x\n  ",
-                                   status_byte(res), msg_byte(res),
-                                   host_byte(res), driver_byte(res));
-                       if (driver_byte(res) & DRIVER_SENSE)
-                               scsi_print_req_sense("sd", sreq);
-       }
-       
-       scsi_release_request(sreq);
-       printk("\n");
-}      
+       scsi_disk_put(sdkp);
+}
 
 /**
  *     init_sd - entry point for this driver (both when built in or when
@@ -1559,7 +1795,7 @@ static void sd_shutdown(struct device *dev)
  **/
 static int __init init_sd(void)
 {
-       int majors = 0, i;
+       int majors = 0, i, err;
 
        SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n"));
 
@@ -1570,7 +1806,22 @@ static int __init init_sd(void)
        if (!majors)
                return -ENODEV;
 
-       return scsi_register_driver(&sd_template.gendrv);
+       err = class_register(&sd_disk_class);
+       if (err)
+               goto err_out;
+
+       err = scsi_register_driver(&sd_template.gendrv);
+       if (err)
+               goto err_out_class;
+
+       return 0;
+
+err_out_class:
+       class_unregister(&sd_disk_class);
+err_out:
+       for (i = 0; i < SD_MAJORS; i++)
+               unregister_blkdev(sd_major(i), "sd");
+       return err;
 }
 
 /**
@@ -1585,13 +1836,11 @@ static void __exit exit_sd(void)
        SCSI_LOG_HLQUEUE(3, printk("exit_sd: exiting sd driver\n"));
 
        scsi_unregister_driver(&sd_template.gendrv);
+       class_unregister(&sd_disk_class);
+
        for (i = 0; i < SD_MAJORS; i++)
                unregister_blkdev(sd_major(i), "sd");
 }
 
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Eric Youngdale");
-MODULE_DESCRIPTION("SCSI disk (sd) driver");
-
 module_init(init_sd);
 module_exit(exit_sd);