Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / drivers / block / aoe / aoecmd.c
index 9ac1a58..39da28d 100644 (file)
@@ -8,6 +8,8 @@
 #include <linux/blkdev.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <linux/genhd.h>
+#include <asm/unaligned.h>
 #include "aoe.h"
 
 #define TIMERTICK (HZ / 10)
@@ -27,6 +29,7 @@ new_skb(struct net_device *if_dev, ulong len)
                skb->protocol = __constant_htons(ETH_P_AOE);
                skb->priority = 0;
                skb_put(skb, len);
+               memset(skb->head, 0, len);
                skb->next = skb->prev = NULL;
 
                /* tell the network layer not to perform IP checksums
@@ -90,19 +93,16 @@ newtag(struct aoedev *d)
 static int
 aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
 {
-       u16 type = __constant_cpu_to_be16(ETH_P_AOE);
-       u16 aoemajor = __cpu_to_be16(d->aoemajor);
        u32 host_tag = newtag(d);
-       u32 tag = __cpu_to_be32(host_tag);
 
        memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
        memcpy(h->dst, d->addr, sizeof h->dst);
-       memcpy(h->type, &type, sizeof type);
+       h->type = __constant_cpu_to_be16(ETH_P_AOE);
        h->verfl = AOE_HVER;
-       memcpy(h->major, &aoemajor, sizeof aoemajor);
+       h->major = cpu_to_be16(d->aoemajor);
        h->minor = d->aoeminor;
        h->cmd = AOECMD_ATA;
-       memcpy(h->tag, &tag, sizeof tag);
+       h->tag = cpu_to_be32(host_tag);
 
        return host_tag;
 }
@@ -181,17 +181,76 @@ aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 
        skb = skb_prepare(d, f);
        if (skb) {
-               skb->next = d->skblist;
-               d->skblist = skb;
+               skb->next = NULL;
+               if (d->sendq_hd)
+                       d->sendq_tl->next = skb;
+               else
+                       d->sendq_hd = skb;
+               d->sendq_tl = skb;
        }
 }
 
+/* some callers cannot sleep, and they can call this function,
+ * transmitting the packets later, when interrupts are on
+ */
+static struct sk_buff *
+aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff **tail)
+{
+       struct aoe_hdr *h;
+       struct aoe_cfghdr *ch;
+       struct sk_buff *skb, *sl, *sl_tail;
+       struct net_device *ifp;
+
+       sl = sl_tail = NULL;
+
+       read_lock(&dev_base_lock);
+       for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
+               dev_hold(ifp);
+               if (!is_aoe_netif(ifp))
+                       continue;
+
+               skb = new_skb(ifp, sizeof *h + sizeof *ch);
+               if (skb == NULL) {
+                       printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
+                       continue;
+               }
+               if (sl_tail == NULL)
+                       sl_tail = skb;
+               h = (struct aoe_hdr *) skb->mac.raw;
+               memset(h, 0, sizeof *h + sizeof *ch);
+
+               memset(h->dst, 0xff, sizeof h->dst);
+               memcpy(h->src, ifp->dev_addr, sizeof h->src);
+               h->type = __constant_cpu_to_be16(ETH_P_AOE);
+               h->verfl = AOE_HVER;
+               h->major = cpu_to_be16(aoemajor);
+               h->minor = aoeminor;
+               h->cmd = AOECMD_CFG;
+
+               skb->next = sl;
+               sl = skb;
+       }
+       read_unlock(&dev_base_lock);
+
+       if (tail != NULL)
+               *tail = sl_tail;
+       return sl;
+}
+
 /* enters with d->lock held */
 void
 aoecmd_work(struct aoedev *d)
 {
        struct frame *f;
        struct buf *buf;
+
+       if (d->flags & DEVFL_PAUSE) {
+               if (!aoedev_isbusy(d))
+                       d->sendq_hd = aoecmd_cfg_pkts(d->aoemajor,
+                                               d->aoeminor, &d->sendq_tl);
+               return;
+       }
+
 loop:
        f = getframe(d, FREETAG);
        if (f == NULL)
@@ -215,7 +274,6 @@ rexmit(struct aoedev *d, struct frame *f)
        struct aoe_hdr *h;
        char buf[128];
        u32 n;
-       u32 net_tag;
 
        n = newtag(d);
 
@@ -227,13 +285,18 @@ rexmit(struct aoedev *d, struct frame *f)
 
        h = (struct aoe_hdr *) f->data;
        f->tag = n;
-       net_tag = __cpu_to_be32(n);
-       memcpy(h->tag, &net_tag, sizeof net_tag);
+       h->tag = cpu_to_be32(n);
+       memcpy(h->dst, d->addr, sizeof h->dst);
+       memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
 
        skb = skb_prepare(d, f);
        if (skb) {
-               skb->next = d->skblist;
-               d->skblist = skb;
+               skb->next = NULL;
+               if (d->sendq_hd)
+                       d->sendq_tl->next = skb;
+               else
+                       d->sendq_hd = skb;
+               d->sendq_tl = skb;
        }
 }
 
@@ -268,7 +331,7 @@ rexmit_timer(ulong vp)
        spin_lock_irqsave(&d->lock, flags);
 
        if (d->flags & DEVFL_TKILL) {
-tdie:          spin_unlock_irqrestore(&d->lock, flags);
+               spin_unlock_irqrestore(&d->lock, flags);
                return;
        }
        f = d->frames;
@@ -279,14 +342,14 @@ tdie:             spin_unlock_irqrestore(&d->lock, flags);
                        n /= HZ;
                        if (n > MAXWAIT) { /* waited too long.  device failure. */
                                aoedev_downdev(d);
-                               goto tdie;
+                               break;
                        }
                        rexmit(d, f);
                }
        }
 
-       sl = d->skblist;
-       d->skblist = NULL;
+       sl = d->sendq_hd;
+       d->sendq_hd = d->sendq_tl = NULL;
        if (sl) {
                n = d->rttavg <<= 1;
                if (n > MAXTIMER)
@@ -301,6 +364,37 @@ tdie:              spin_unlock_irqrestore(&d->lock, flags);
        aoenet_xmit(sl);
 }
 
+/* this function performs work that has been deferred until sleeping is OK
+ */
+void
+aoecmd_sleepwork(void *vp)
+{
+       struct aoedev *d = (struct aoedev *) vp;
+
+       if (d->flags & DEVFL_GDALLOC)
+               aoeblk_gdalloc(d);
+
+       if (d->flags & DEVFL_NEWSIZE) {
+               struct block_device *bd;
+               unsigned long flags;
+               u64 ssize;
+
+               ssize = d->gd->capacity;
+               bd = bdget_disk(d->gd, 0);
+
+               if (bd) {
+                       mutex_lock(&bd->bd_inode->i_mutex);
+                       i_size_write(bd->bd_inode, (loff_t)ssize<<9);
+                       mutex_unlock(&bd->bd_inode->i_mutex);
+                       bdput(bd);
+               }
+               spin_lock_irqsave(&d->lock, flags);
+               d->flags |= DEVFL_UP;
+               d->flags &= ~DEVFL_NEWSIZE;
+               spin_unlock_irqrestore(&d->lock, flags);
+       }
+}
+
 static void
 ataid_complete(struct aoedev *d, unsigned char *id)
 {
@@ -308,16 +402,16 @@ ataid_complete(struct aoedev *d, unsigned char *id)
        u16 n;
 
        /* word 83: command set supported */
-       n = __le16_to_cpu(*((u16 *) &id[83<<1]));
+       n = le16_to_cpu(get_unaligned((__le16 *) &id[83<<1]));
 
        /* word 86: command set/feature enabled */
-       n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
+       n |= le16_to_cpu(get_unaligned((__le16 *) &id[86<<1]));
 
        if (n & (1<<10)) {      /* bit 10: LBA 48 */
                d->flags |= DEVFL_EXT;
 
                /* word 100: number lba48 sectors */
-               ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
+               ssize = le64_to_cpu(get_unaligned((__le64 *) &id[100<<1]));
 
                /* set as in ide-disk.c:init_idedisk_capacity */
                d->geo.cylinders = ssize;
@@ -328,28 +422,36 @@ ataid_complete(struct aoedev *d, unsigned char *id)
                d->flags &= ~DEVFL_EXT;
 
                /* number lba28 sectors */
-               ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
+               ssize = le32_to_cpu(get_unaligned((__le32 *) &id[60<<1]));
 
                /* NOTE: obsolete in ATA 6 */
-               d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
-               d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
-               d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
+               d->geo.cylinders = le16_to_cpu(get_unaligned((__le16 *) &id[54<<1]));
+               d->geo.heads = le16_to_cpu(get_unaligned((__le16 *) &id[55<<1]));
+               d->geo.sectors = le16_to_cpu(get_unaligned((__le16 *) &id[56<<1]));
        }
+
+       if (d->ssize != ssize)
+               printk(KERN_INFO "aoe: %012llx e%lu.%lu v%04x has %llu "
+                       "sectors\n", (unsigned long long)mac_addr(d->addr),
+                       d->aoemajor, d->aoeminor,
+                       d->fw_ver, (long long)ssize);
        d->ssize = ssize;
        d->geo.start = 0;
        if (d->gd != NULL) {
                d->gd->capacity = ssize;
-               d->flags |= DEVFL_UP;
-               return;
-       }
-       if (d->flags & DEVFL_WORKON) {
-               printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on!  "
-                       "(This really shouldn't happen).\n");
-               return;
+               d->flags |= DEVFL_NEWSIZE;
+       } else {
+               if (d->flags & DEVFL_GDALLOC) {
+                       printk(KERN_INFO "aoe: %s: %s e%lu.%lu, %s\n",
+                              __FUNCTION__,
+                              "can't schedule work for",
+                              d->aoemajor, d->aoeminor,
+                              "it's already on! (This really shouldn't happen).\n");
+                       return;
+               }
+               d->flags |= DEVFL_GDALLOC;
        }
-       INIT_WORK(&d->work, aoeblk_gdalloc, d);
        schedule_work(&d->work);
-       d->flags |= DEVFL_WORKON;
 }
 
 static void
@@ -380,29 +482,30 @@ aoecmd_ata_rsp(struct sk_buff *skb)
        register long n;
        ulong flags;
        char ebuf[128];
-       
+       u16 aoemajor;
+
        hin = (struct aoe_hdr *) skb->mac.raw;
-       d = aoedev_bymac(hin->src);
+       aoemajor = be16_to_cpu(hin->major);
+       d = aoedev_by_aoeaddr(aoemajor, hin->minor);
        if (d == NULL) {
                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
                        "for unknown device %d.%d\n",
-                        __be16_to_cpu(*((u16 *) hin->major)),
-                       hin->minor);
+                        aoemajor, hin->minor);
                aoechr_error(ebuf);
                return;
        }
 
        spin_lock_irqsave(&d->lock, flags);
 
-       f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
+       f = getframe(d, be32_to_cpu(hin->tag));
        if (f == NULL) {
                spin_unlock_irqrestore(&d->lock, flags);
                snprintf(ebuf, sizeof ebuf,
                        "%15s e%d.%d    tag=%08x@%08lx\n",
                        "unexpected rsp",
-                       __be16_to_cpu(*((u16 *) hin->major)),
+                       be16_to_cpu(hin->major),
                        hin->minor,
-                       __be32_to_cpu(*((u32 *) hin->tag)),
+                       be32_to_cpu(hin->tag),
                        jiffies);
                aoechr_error(ebuf);
                return;
@@ -414,9 +517,13 @@ aoecmd_ata_rsp(struct sk_buff *skb)
        ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
        buf = f->buf;
 
+       if (ahout->cmdstat == WIN_IDENTIFY)
+               d->flags &= ~DEVFL_PAUSE;
        if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
                printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
-                       "stat=%2.2Xh\n", ahout->cmdstat, ahin->cmdstat);
+                       "stat=%2.2Xh from e%ld.%ld\n", 
+                       ahout->cmdstat, ahin->cmdstat,
+                       d->aoemajor, d->aoeminor);
                if (buf)
                        buf->flags |= BUFFL_FAIL;
        } else {
@@ -444,13 +551,12 @@ aoecmd_ata_rsp(struct sk_buff *skb)
                                return;
                        }
                        ataid_complete(d, (char *) (ahin+1));
-                       /* d->flags |= DEVFL_WC_UPDATE; */
                        break;
                default:
                        printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
                               "outbound ata command %2.2Xh for %d.%d\n", 
                               ahout->cmdstat,
-                              __be16_to_cpu(*((u16 *) hin->major)),
+                              be16_to_cpu(hin->major),
                               hin->minor);
                }
        }
@@ -458,8 +564,17 @@ aoecmd_ata_rsp(struct sk_buff *skb)
        if (buf) {
                buf->nframesout -= 1;
                if (buf->nframesout == 0 && buf->resid == 0) {
-                       n = !(buf->flags & BUFFL_FAIL);
-                       bio_endio(buf->bio, buf->bio->bi_size, 0);
+                       unsigned long duration = jiffies - buf->start_time;
+                       unsigned long n_sect = buf->bio->bi_size >> 9;
+                       struct gendisk *disk = d->gd;
+                       const int rw = bio_data_dir(buf->bio);
+
+                       disk_stat_inc(disk, ios[rw]);
+                       disk_stat_add(disk, ticks[rw], duration);
+                       disk_stat_add(disk, sectors[rw], n_sect);
+                       disk_stat_add(disk, io_ticks, duration);
+                       n = (buf->flags & BUFFL_FAIL) ? -EIO : 0;
+                       bio_endio(buf->bio, buf->bio->bi_size, n);
                        mempool_free(buf, d->bufpool);
                }
        }
@@ -468,60 +583,26 @@ aoecmd_ata_rsp(struct sk_buff *skb)
        f->tag = FREETAG;
 
        aoecmd_work(d);
-
-       sl = d->skblist;
-       d->skblist = NULL;
+       sl = d->sendq_hd;
+       d->sendq_hd = d->sendq_tl = NULL;
 
        spin_unlock_irqrestore(&d->lock, flags);
-
        aoenet_xmit(sl);
 }
 
 void
 aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 {
-       struct aoe_hdr *h;
-       struct aoe_cfghdr *ch;
-       struct sk_buff *skb, *sl;
-       struct net_device *ifp;
-       u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
-       u16 net_aoemajor = __cpu_to_be16(aoemajor);
-
-       sl = NULL;
-
-       read_lock(&dev_base_lock);
-       for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
-               dev_hold(ifp);
-               if (!is_aoe_netif(ifp))
-                       continue;
-
-               skb = new_skb(ifp, sizeof *h + sizeof *ch);
-               if (skb == NULL) {
-                       printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
-                       continue;
-               }
-               h = (struct aoe_hdr *) skb->mac.raw;
-               memset(h, 0, sizeof *h + sizeof *ch);
-
-               memset(h->dst, 0xff, sizeof h->dst);
-               memcpy(h->src, ifp->dev_addr, sizeof h->src);
-               memcpy(h->type, &aoe_type, sizeof aoe_type);
-               h->verfl = AOE_HVER;
-               memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
-               h->minor = aoeminor;
-               h->cmd = AOECMD_CFG;
+       struct sk_buff *sl;
 
-               skb->next = sl;
-               sl = skb;
-       }
-       read_unlock(&dev_base_lock);
+       sl = aoecmd_cfg_pkts(aoemajor, aoeminor, NULL);
 
        aoenet_xmit(sl);
 }
  
 /*
  * Since we only call this in one place (and it only prepares one frame)
- * we just return the skb.  Usually we'd chain it up to the d->skblist.
+ * we just return the skb.  Usually we'd chain it up to the aoedev sendq.
  */
 static struct sk_buff *
 aoecmd_ata_id(struct aoedev *d)
@@ -547,9 +628,6 @@ aoecmd_ata_id(struct aoedev *d)
        f->waited = 0;
        f->writedatalen = 0;
 
-       /* this message initializes the device, so we reset the rttavg */
-       d->rttavg = MAXTIMER;
-
        /* set up ata header */
        ah->scnt = 1;
        ah->cmdstat = WIN_IDENTIFY;
@@ -557,12 +635,8 @@ aoecmd_ata_id(struct aoedev *d)
 
        skb = skb_prepare(d, f);
 
-       /* we now want to start the rexmit tracking */
-       d->flags &= ~DEVFL_TKILL;
-       d->timer.data = (ulong) d;
+       d->rttavg = MAXTIMER;
        d->timer.function = rexmit_timer;
-       d->timer.expires = jiffies + TIMERTICK;
-       add_timer(&d->timer);
 
        return skb;
 }
@@ -573,9 +647,10 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
        struct aoedev *d;
        struct aoe_hdr *h;
        struct aoe_cfghdr *ch;
-       ulong flags, bufcnt, sysminor, aoemajor;
+       ulong flags, sysminor, aoemajor;
+       u16 bufcnt;
        struct sk_buff *sl;
-       enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
+       enum { MAXFRAMES = 16 };
 
        h = (struct aoe_hdr *) skb->mac.raw;
        ch = (struct aoe_cfghdr *) (h+1);
@@ -584,7 +659,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
         * Enough people have their dip switches set backwards to
         * warrant a loud message for this special case.
         */
-       aoemajor = __be16_to_cpu(*((u16 *) h->major));
+       aoemajor = be16_to_cpu(h->major);
        if (aoemajor == 0xfff) {
                printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
                        "address is all ones.  Check shelf dip switches\n");
@@ -592,33 +667,39 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
        }
 
        sysminor = SYSMINOR(aoemajor, h->minor);
-       if (sysminor > MAXSYSMINOR) {
-               printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
-                       "large\n", sysminor);
+       if (sysminor * AOE_PARTITIONS + AOE_PARTITIONS > MINORMASK) {
+               printk(KERN_INFO
+                       "aoe: e%ld.%d: minor number too large\n", 
+                       aoemajor, (int) h->minor);
                return;
        }
 
-       bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
+       bufcnt = be16_to_cpu(ch->bufcnt);
        if (bufcnt > MAXFRAMES) /* keep it reasonable */
                bufcnt = MAXFRAMES;
 
-       d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
+       d = aoedev_by_sysminor_m(sysminor, bufcnt);
        if (d == NULL) {
-               printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
+               printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device sysminor_m failure\n");
                return;
        }
 
        spin_lock_irqsave(&d->lock, flags);
 
-       if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
+       /* permit device to migrate mac and network interface */
+       d->ifp = skb->dev;
+       memcpy(d->addr, h->src, sizeof d->addr);
+
+       /* don't change users' perspective */
+       if (d->nopen && !(d->flags & DEVFL_PAUSE)) {
                spin_unlock_irqrestore(&d->lock, flags);
                return;
        }
+       d->flags |= DEVFL_PAUSE;        /* force pause */
+       d->fw_ver = be16_to_cpu(ch->fwver);
 
-       d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
-
-       /* we get here only if the device is new */
-       sl = aoecmd_ata_id(d);
+       /* check for already outstanding ataid */
+       sl = aoedev_isbusy(d) == 0 ? aoecmd_ata_id(d) : NULL;
 
        spin_unlock_irqrestore(&d->lock, flags);