vserver 1.9.3
[linux-2.6.git] / drivers / md / md.c
index c451c8d..745f7e7 100644 (file)
@@ -154,6 +154,39 @@ static spinlock_t all_mddevs_lock = SPIN_LOCK_UNLOCKED;
                tmp = tmp->next;})                                      \
                )
 
+int md_flush_mddev(mddev_t *mddev, sector_t *error_sector)
+{
+       struct list_head *tmp;
+       mdk_rdev_t *rdev;
+       int ret = 0;
+
+       /*
+        * this list iteration is done without any locking in md?!
+        */
+       ITERATE_RDEV(mddev, rdev, tmp) {
+               request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
+               int err;
+
+               if (!r_queue->issue_flush_fn)
+                       err = -EOPNOTSUPP;
+               else
+                       err = r_queue->issue_flush_fn(r_queue, rdev->bdev->bd_disk, error_sector);
+
+               if (!ret)
+                       ret = err;
+       }
+
+       return ret;
+}
+
+static int md_flush_all(request_queue_t *q, struct gendisk *disk,
+                        sector_t *error_sector)
+{
+       mddev_t *mddev = q->queuedata;
+
+       return md_flush_mddev(mddev, error_sector);
+}
+
 static int md_fail_request (request_queue_t *q, struct bio *bio)
 {
        bio_io_error(bio, bio->bi_size);
@@ -373,7 +406,7 @@ static int read_disk_sb(mdk_rdev_t * rdev)
        return 0;
 
 fail:
-       printk(KERN_ERR "md: disabled device %s, could not read superblock.\n",
+       printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
                bdevname(rdev->bdev,b));
        return -EINVAL;
 }
@@ -439,6 +472,31 @@ static unsigned int calc_sb_csum(mdp_super_t * sb)
        return csum;
 }
 
+/* csum_partial is not consistent between different architectures.
+ * Some (i386) do a 32bit csum.  Some (alpha) do 16 bit.
+ * This makes it hard for user-space to know what to do.
+ * So we use calc_sb_csum to set the checksum to allow working
+ * with older kernels, but allow calc_sb_csum_common to
+ * be used when checking if a checksum is correct, to
+ * make life easier for user-space tools that might write
+ * a superblock.
+ */
+static unsigned int calc_sb_csum_common(mdp_super_t *super)
+{
+       unsigned int  disk_csum = super->sb_csum;
+       unsigned long long newcsum = 0;
+       unsigned int csum;
+       int i;
+       unsigned int *superc = (int*) super;
+       super->sb_csum = 0;
+
+       for (i=0; i<MD_SB_BYTES/4; i++)
+               newcsum+= superc[i];
+       csum = (newcsum& 0xffffffff) + (newcsum>>32);
+       super->sb_csum = disk_csum;
+       return csum;
+}
+
 /*
  * Handle superblock details.
  * We want to be able to handle multiple superblock formats
@@ -521,7 +579,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
        if (sb->raid_disks <= 0)
                goto abort;
 
-       if (calc_sb_csum(sb) != sb->sb_csum) {
+       if (calc_sb_csum(sb) != sb->sb_csum &&
+               calc_sb_csum_common(sb) != sb->sb_csum) {
                printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
                        b);
                goto abort;
@@ -745,11 +804,21 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
 {
        unsigned int disk_csum, csum;
+       unsigned long long newcsum;
        int size = 256 + sb->max_dev*2;
+       unsigned int *isuper = (unsigned int*)sb;
+       int i;
 
        disk_csum = sb->sb_csum;
        sb->sb_csum = 0;
-       csum = csum_partial((void *)sb, size, 0);
+       newcsum = 0;
+       for (i=0; size>=4; size -= 4 )
+               newcsum += le32_to_cpu(*isuper++);
+
+       if (size == 2)
+               newcsum += le16_to_cpu(*(unsigned short*) isuper);
+
+       csum = (newcsum & 0xffffffff) + (newcsum >> 32);
        sb->sb_csum = disk_csum;
        return csum;
 }
@@ -1042,20 +1111,24 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
 /*
  * prevent the device from being mounted, repartitioned or
  * otherwise reused by a RAID array (or any other kernel
- * subsystem), by opening the device. [simply getting an
- * inode is not enough, the SCSI module usage code needs
- * an explicit open() on the device]
+ * subsystem), by bd_claiming the device.
  */
 static int lock_rdev(mdk_rdev_t *rdev, dev_t dev)
 {
        int err = 0;
        struct block_device *bdev;
+       char b[BDEVNAME_SIZE];
 
        bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
-       if (IS_ERR(bdev))
+       if (IS_ERR(bdev)) {
+               printk(KERN_ERR "md: could not open %s.\n",
+                       __bdevname(dev, b));
                return PTR_ERR(bdev);
+       }
        err = bd_claim(bdev, rdev);
        if (err) {
+               printk(KERN_ERR "md: could not bd_claim %s.\n",
+                       bdevname(bdev, b));
                blkdev_put(bdev);
                return err;
        }
@@ -1117,10 +1190,7 @@ static void export_array(mddev_t *mddev)
 
 static void print_desc(mdp_disk_t *desc)
 {
-       char b[BDEVNAME_SIZE];
-
-       printk(" DISK<N:%d,%s(%d,%d),R:%d,S:%d>\n", desc->number,
-               __bdevname(MKDEV(desc->major, desc->minor), b),
+       printk(" DISK<N:%d,(%d,%d),R:%d,S:%d>\n", desc->number,
                desc->major,desc->minor,desc->raid_disk,desc->state);
 }
 
@@ -1312,8 +1382,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
 
        rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL);
        if (!rdev) {
-               printk(KERN_ERR "md: could not alloc mem for %s!\n", 
-                       __bdevname(newdev, b));
+               printk(KERN_ERR "md: could not alloc mem for new device!\n");
                return ERR_PTR(-ENOMEM);
        }
        memset(rdev, 0, sizeof(*rdev));
@@ -1322,11 +1391,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
                goto abort_free;
 
        err = lock_rdev(rdev, newdev);
-       if (err) {
-               printk(KERN_ERR "md: could not lock %s.\n",
-                       __bdevname(newdev, b));
+       if (err)
                goto abort_free;
-       }
+
        rdev->desc_nr = -1;
        rdev->faulty = 0;
        rdev->in_sync = 0;
@@ -1607,7 +1674,7 @@ static int do_md_run(mddev_t * mddev)
        spin_lock(&pers_lock);
        if (!pers[pnum] || !try_module_get(pers[pnum]->owner)) {
                spin_unlock(&pers_lock);
-               printk(KERN_ERR "md: personality %d is not loaded!\n",
+               printk(KERN_WARNING "md: personality %d is not loaded!\n",
                       pnum);
                return -EINVAL;
        }
@@ -1615,6 +1682,8 @@ static int do_md_run(mddev_t * mddev)
        mddev->pers = pers[pnum];
        spin_unlock(&pers_lock);
 
+       mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
+
        err = mddev->pers->run(mddev);
        if (err) {
                printk(KERN_ERR "md: pers->run() failed ...\n");
@@ -1645,6 +1714,7 @@ static int do_md_run(mddev_t * mddev)
         */
        mddev->queue->queuedata = mddev;
        mddev->queue->make_request_fn = mddev->pers->make_request;
+       mddev->queue->issue_flush_fn = md_flush_all;
 
        mddev->changed = 1;
        return 0;
@@ -1881,11 +1951,9 @@ static int autostart_array(dev_t startdev)
        mdk_rdev_t *start_rdev = NULL, *rdev;
 
        start_rdev = md_import_device(startdev, 0, 0);
-       if (IS_ERR(start_rdev)) {
-               printk(KERN_WARNING "md: could not import %s!\n",
-                       __bdevname(startdev, b));
+       if (IS_ERR(start_rdev))
                return err;
-       }
+
 
        /* NOTE: this can only work for 0.90.0 superblocks */
        sb = (mdp_super_t*)page_address(start_rdev->sb_page);
@@ -1916,12 +1984,9 @@ static int autostart_array(dev_t startdev)
                if (MAJOR(dev) != desc->major || MINOR(dev) != desc->minor)
                        continue;
                rdev = md_import_device(dev, 0, 0);
-               if (IS_ERR(rdev)) {
-                       printk(KERN_WARNING "md: could not import %s,"
-                               " trying to run array nevertheless.\n",
-                               __bdevname(dev, b));
+               if (IS_ERR(rdev))
                        continue;
-               }
+
                list_add(&rdev->same_set, &pending_raid_disks);
        }
 
@@ -1934,7 +1999,7 @@ static int autostart_array(dev_t startdev)
 }
 
 
-static int get_version(void * arg)
+static int get_version(void __user * arg)
 {
        mdu_version_t ver;
 
@@ -1948,7 +2013,7 @@ static int get_version(void * arg)
        return 0;
 }
 
-static int get_array_info(mddev_t * mddev, void * arg)
+static int get_array_info(mddev_t * mddev, void __user * arg)
 {
        mdu_array_info_t info;
        int nr,working,active,failed,spare;
@@ -1998,7 +2063,7 @@ static int get_array_info(mddev_t * mddev, void * arg)
        return 0;
 }
 
-static int get_disk_info(mddev_t * mddev, void * arg)
+static int get_disk_info(mddev_t * mddev, void __user * arg)
 {
        mdu_disk_info_t info;
        unsigned int nr;
@@ -2153,42 +2218,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
        return 0;
 }
 
-static int hot_generate_error(mddev_t * mddev, dev_t dev)
-{
-       char b[BDEVNAME_SIZE];
-       struct request_queue *q;
-       mdk_rdev_t *rdev;
-
-       if (!mddev->pers)
-               return -ENODEV;
-
-       printk(KERN_INFO "md: trying to generate %s error in %s ... \n",
-               __bdevname(dev, b), mdname(mddev));
-
-       rdev = find_rdev(mddev, dev);
-       if (!rdev) {
-               /* MD_BUG(); */ /* like hell - it's not a driver bug */
-               return -ENXIO;
-       }
-
-       if (rdev->desc_nr == -1) {
-               MD_BUG();
-               return -EINVAL;
-       }
-       if (!rdev->in_sync)
-               return -ENODEV;
-
-       q = bdev_get_queue(rdev->bdev);
-       if (!q) {
-               MD_BUG();
-               return -ENODEV;
-       }
-       printk(KERN_INFO "md: okay, generating error!\n");
-//     q->oneshot_error = 1; // disabled for now
-
-       return 0;
-}
-
 static int hot_remove_disk(mddev_t * mddev, dev_t dev)
 {
        char b[BDEVNAME_SIZE];
@@ -2197,9 +2226,6 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev)
        if (!mddev->pers)
                return -ENODEV;
 
-       printk(KERN_INFO "md: trying to remove %s from %s ... \n",
-               __bdevname(dev, b), mdname(mddev));
-
        rdev = find_rdev(mddev, dev);
        if (!rdev)
                return -ENXIO;
@@ -2227,9 +2253,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
        if (!mddev->pers)
                return -ENODEV;
 
-       printk(KERN_INFO "md: trying to hot-add %s to %s ... \n",
-               __bdevname(dev, b), mdname(mddev));
-
        if (mddev->major_version != 0) {
                printk(KERN_WARNING "%s: HOT_ADD may only be used with"
                        " version-0 superblocks.\n",
@@ -2251,7 +2274,12 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
                return -EINVAL;
        }
 
-       rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
+       if (mddev->persistent)
+               rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
+       else
+               rdev->sb_offset =
+                       rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
+
        size = calc_dev_size(rdev, mddev->chunk_size);
        rdev->size = size;
 
@@ -2372,6 +2400,103 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
        return 0;
 }
 
+/*
+ * update_array_info is used to change the configuration of an
+ * on-line array.
+ * The version, ctime,level,size,raid_disks,not_persistent, layout,chunk_size
+ * fields in the info are checked against the array.
+ * Any differences that cannot be handled will cause an error.
+ * Normally, only one change can be managed at a time.
+ */
+static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
+{
+       int rv = 0;
+       int cnt = 0;
+
+       if (mddev->major_version != info->major_version ||
+           mddev->minor_version != info->minor_version ||
+/*         mddev->patch_version != info->patch_version || */
+           mddev->ctime         != info->ctime         ||
+           mddev->level         != info->level         ||
+           mddev->layout        != info->layout        ||
+           !mddev->persistent   != info->not_persistent||
+           mddev->chunk_size    != info->chunk_size    )
+               return -EINVAL;
+       /* Check there is only one change */
+       if (mddev->size != info->size) cnt++;
+       if (mddev->raid_disks != info->raid_disks) cnt++;
+       if (cnt == 0) return 0;
+       if (cnt > 1) return -EINVAL;
+
+       if (mddev->size != info->size) {
+               mdk_rdev_t * rdev;
+               struct list_head *tmp;
+               if (mddev->pers->resize == NULL)
+                       return -EINVAL;
+               /* The "size" is the amount of each device that is used.
+                * This can only make sense for arrays with redundancy.
+                * linear and raid0 always use whatever space is available
+                * We can only consider changing the size of no resync
+                * or reconstruction is happening, and if the new size
+                * is acceptable. It must fit before the sb_offset or,
+                * if that is <data_offset, it must fit before the
+                * size of each device.
+                * If size is zero, we find the largest size that fits.
+                */
+               if (mddev->sync_thread)
+                       return -EBUSY;
+               ITERATE_RDEV(mddev,rdev,tmp) {
+                       sector_t avail;
+                       int fit = (info->size == 0);
+                       if (rdev->sb_offset > rdev->data_offset)
+                               avail = (rdev->sb_offset*2) - rdev->data_offset;
+                       else
+                               avail = get_capacity(rdev->bdev->bd_disk)
+                                       - rdev->data_offset;
+                       if (fit && (info->size == 0 || info->size > avail/2))
+                               info->size = avail/2;
+                       if (avail < ((sector_t)info->size << 1))
+                               return -ENOSPC;
+               }
+               rv = mddev->pers->resize(mddev, (sector_t)info->size *2);
+               if (!rv) {
+                       struct block_device *bdev;
+
+                       bdev = bdget_disk(mddev->gendisk, 0);
+                       if (bdev) {
+                               down(&bdev->bd_inode->i_sem);
+                               i_size_write(bdev->bd_inode, mddev->array_size << 10);
+                               up(&bdev->bd_inode->i_sem);
+                               bdput(bdev);
+                       }
+               }
+       }
+       if (mddev->raid_disks    != info->raid_disks) {
+               /* change the number of raid disks */
+               if (mddev->pers->reshape == NULL)
+                       return -EINVAL;
+               if (info->raid_disks <= 0 ||
+                   info->raid_disks >= mddev->max_disks)
+                       return -EINVAL;
+               if (mddev->sync_thread)
+                       return -EBUSY;
+               rv = mddev->pers->reshape(mddev, info->raid_disks);
+               if (!rv) {
+                       struct block_device *bdev;
+
+                       bdev = bdget_disk(mddev->gendisk, 0);
+                       if (bdev) {
+                               down(&bdev->bd_inode->i_sem);
+                               i_size_write(bdev->bd_inode, mddev->array_size << 10);
+                               up(&bdev->bd_inode->i_sem);
+                               bdput(bdev);
+                       }
+               }
+       }
+       md_update_sb(mddev);
+       return rv;
+}
+
 static int set_disk_faulty(mddev_t *mddev, dev_t dev)
 {
        mdk_rdev_t *rdev;
@@ -2387,9 +2512,9 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev)
 static int md_ioctl(struct inode *inode, struct file *file,
                        unsigned int cmd, unsigned long arg)
 {
-       char b[BDEVNAME_SIZE];
        int err = 0;
-       struct hd_geometry *loc = (struct hd_geometry *) arg;
+       void __user *argp = (void __user *)arg;
+       struct hd_geometry __user *loc = argp;
        mddev_t *mddev = NULL;
 
        if (!capable(CAP_SYS_ADMIN))
@@ -2402,7 +2527,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
        switch (cmd)
        {
                case RAID_VERSION:
-                       err = get_version((void *)arg);
+                       err = get_version(argp);
                        goto done;
 
                case PRINT_RAID_DEBUG:
@@ -2445,8 +2570,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
                }
                err = autostart_array(new_decode_dev(arg));
                if (err) {
-                       printk(KERN_WARNING "md: autostart %s failed!\n",
-                               __bdevname(arg, b));
+                       printk(KERN_WARNING "md: autostart failed!\n");
                        goto abort;
                }
                goto done;
@@ -2463,33 +2587,41 @@ static int md_ioctl(struct inode *inode, struct file *file,
        switch (cmd)
        {
                case SET_ARRAY_INFO:
-
-                       if (!list_empty(&mddev->disks)) {
-                               printk(KERN_WARNING 
-                                       "md: array %s already has disks!\n",
-                                       mdname(mddev));
-                               err = -EBUSY;
-                               goto abort_unlock;
-                       }
-                       if (mddev->raid_disks) {
-                               printk(KERN_WARNING 
-                                       "md: array %s already initialised!\n",
-                                       mdname(mddev));
-                               err = -EBUSY;
-                               goto abort_unlock;
-                       }
                        {
                                mdu_array_info_t info;
                                if (!arg)
                                        memset(&info, 0, sizeof(info));
-                               else if (copy_from_user(&info, (void*)arg, sizeof(info))) {
+                               else if (copy_from_user(&info, argp, sizeof(info))) {
                                        err = -EFAULT;
                                        goto abort_unlock;
                                }
+                               if (mddev->pers) {
+                                       err = update_array_info(mddev, &info);
+                                       if (err) {
+                                               printk(KERN_WARNING "md: couldn't update"
+                                                      " array info. %d\n", err);
+                                               goto abort_unlock;
+                                       }
+                                       goto done_unlock;
+                               }
+                               if (!list_empty(&mddev->disks)) {
+                                       printk(KERN_WARNING
+                                              "md: array %s already has disks!\n",
+                                              mdname(mddev));
+                                       err = -EBUSY;
+                                       goto abort_unlock;
+                               }
+                               if (mddev->raid_disks) {
+                                       printk(KERN_WARNING
+                                              "md: array %s already initialised!\n",
+                                              mdname(mddev));
+                                       err = -EBUSY;
+                                       goto abort_unlock;
+                               }
                                err = set_array_info(mddev, &info);
                                if (err) {
                                        printk(KERN_WARNING "md: couldn't set"
-                                               " array info. %d\n", err);
+                                              " array info. %d\n", err);
                                        goto abort_unlock;
                                }
                        }
@@ -2513,11 +2645,11 @@ static int md_ioctl(struct inode *inode, struct file *file,
        switch (cmd)
        {
                case GET_ARRAY_INFO:
-                       err = get_array_info(mddev, (void *)arg);
+                       err = get_array_info(mddev, argp);
                        goto done_unlock;
 
                case GET_DISK_INFO:
-                       err = get_disk_info(mddev, (void *)arg);
+                       err = get_disk_info(mddev, argp);
                        goto done_unlock;
 
                case RESTART_ARRAY_RW:
@@ -2543,18 +2675,18 @@ static int md_ioctl(struct inode *inode, struct file *file,
                                err = -EINVAL;
                                goto abort_unlock;
                        }
-                       err = put_user (2, (char *) &loc->heads);
+                       err = put_user (2, (char __user *) &loc->heads);
                        if (err)
                                goto abort_unlock;
-                       err = put_user (4, (char *) &loc->sectors);
+                       err = put_user (4, (char __user *) &loc->sectors);
                        if (err)
                                goto abort_unlock;
                        err = put_user(get_capacity(mddev->gendisk)/8,
-                                               (short *) &loc->cylinders);
+                                       (short __user *) &loc->cylinders);
                        if (err)
                                goto abort_unlock;
                        err = put_user (get_start_sect(inode->i_bdev),
-                                               (long *) &loc->start);
+                                               (long __user *) &loc->start);
                        goto done_unlock;
        }
 
@@ -2573,15 +2705,13 @@ static int md_ioctl(struct inode *inode, struct file *file,
                case ADD_NEW_DISK:
                {
                        mdu_disk_info_t info;
-                       if (copy_from_user(&info, (void*)arg, sizeof(info)))
+                       if (copy_from_user(&info, argp, sizeof(info)))
                                err = -EFAULT;
                        else
                                err = add_new_disk(mddev, &info);
                        goto done_unlock;
                }
-               case HOT_GENERATE_ERROR:
-                       err = hot_generate_error(mddev, new_decode_dev(arg));
-                       goto done_unlock;
+
                case HOT_REMOVE_DISK:
                        err = hot_remove_disk(mddev, new_decode_dev(arg));
                        goto done_unlock;
@@ -2765,7 +2895,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
        return thread;
 }
 
-void md_interrupt_thread(mdk_thread_t *thread)
+static void md_interrupt_thread(mdk_thread_t *thread)
 {
        if (!thread->tsk) {
                MD_BUG();
@@ -2808,6 +2938,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
        if (!mddev->pers->error_handler)
                return;
        mddev->pers->error_handler(mddev,rdev);
+       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
        set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
        md_wakeup_thread(mddev->thread);
 }
@@ -2840,7 +2971,11 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
        unsigned long max_blocks, resync, res, dt, db, rt;
 
        resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
-       max_blocks = mddev->size;
+
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+               max_blocks = mddev->resync_max_sectors >> 1;
+       else
+               max_blocks = mddev->size;
 
        /*
         * Should not happen.
@@ -3076,11 +3211,6 @@ int unregister_md_personality(int pnum)
        return 0;
 }
 
-void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors)
-{
-       rdev->bdev->bd_contains->bd_disk->sync_io += nr_sectors;
-}
-
 static int is_mddev_idle(mddev_t *mddev)
 {
        mdk_rdev_t * rdev;
@@ -3093,8 +3223,12 @@ static int is_mddev_idle(mddev_t *mddev)
                struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
                curr_events = disk_stat_read(disk, read_sectors) + 
                                disk_stat_read(disk, write_sectors) - 
-                               disk->sync_io;
-               if ((curr_events - rdev->last_events) > 32) {
+                               atomic_read(&disk->sync_io);
+               /* Allow some slack between valud of curr_events and last_events,
+                * as there are some uninteresting races.
+                * Note: the following is an unsigned comparison.
+                */
+               if ((curr_events - rdev->last_events + 32) > 64) {
                        rdev->last_events = curr_events;
                        idle = 0;
                }
@@ -3228,7 +3362,14 @@ static void md_do_sync(mddev_t *mddev)
                }
        } while (mddev->curr_resync < 2);
 
-       max_sectors = mddev->size << 1;
+       if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+               /* resync follows the size requested by the personality,
+                * which default to physical size, but can be virtual size
+                */
+               max_sectors = mddev->resync_max_sectors;
+       else
+               /* recovery follows the physical size of devices */
+               max_sectors = mddev->size << 1;
 
        printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
        printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
@@ -3278,7 +3419,7 @@ static void md_do_sync(mddev_t *mddev)
                j += sectors;
                if (j>1) mddev->curr_resync = j;
 
-               if (last_check + window > j)
+               if (last_check + window > j || j == max_sectors)
                        continue;
 
                last_check = j;
@@ -3444,8 +3585,8 @@ void md_check_recovery(mddev_t *mddev)
                        if (rdev->raid_disk >= 0 &&
                            rdev->faulty &&
                            atomic_read(&rdev->nr_pending)==0) {
-                               mddev->pers->hot_remove_disk(mddev, rdev->raid_disk);
-                               rdev->raid_disk = -1;
+                               if (mddev->pers->hot_remove_disk(mddev, rdev->raid_disk)==0)
+                                       rdev->raid_disk = -1;
                        }
                        if (!rdev->faulty && rdev->raid_disk >= 0 && !rdev->in_sync)
                                spares++;
@@ -3586,7 +3727,6 @@ void md_autodetect_dev(dev_t dev)
 
 static void autostart_arrays(int part)
 {
-       char b[BDEVNAME_SIZE];
        mdk_rdev_t *rdev;
        int i;
 
@@ -3596,11 +3736,9 @@ static void autostart_arrays(int part)
                dev_t dev = detected_devices[i];
 
                rdev = md_import_device(dev,0, 0);
-               if (IS_ERR(rdev)) {
-                       printk(KERN_ALERT "md: could not import %s!\n",
-                               __bdevname(dev, b));
+               if (IS_ERR(rdev))
                        continue;
-               }
+
                if (rdev->faulty) {
                        MD_BUG();
                        continue;
@@ -3651,7 +3789,6 @@ module_exit(md_exit)
 EXPORT_SYMBOL(register_md_personality);
 EXPORT_SYMBOL(unregister_md_personality);
 EXPORT_SYMBOL(md_error);
-EXPORT_SYMBOL(md_sync_acct);
 EXPORT_SYMBOL(md_done_sync);
 EXPORT_SYMBOL(md_write_start);
 EXPORT_SYMBOL(md_write_end);
@@ -3660,6 +3797,5 @@ EXPORT_SYMBOL(md_register_thread);
 EXPORT_SYMBOL(md_unregister_thread);
 EXPORT_SYMBOL(md_wakeup_thread);
 EXPORT_SYMBOL(md_print_devices);
-EXPORT_SYMBOL(md_interrupt_thread);
 EXPORT_SYMBOL(md_check_recovery);
 MODULE_LICENSE("GPL");