X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fmd%2Fmd.c;h=745f7e7bd3ee445f0782285355d811023ae9ca70;hb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;hp=77cd6e9f3acdfd59249218296690f98a093f1b5f;hpb=a2c21200f1c81b08cb55e417b68150bba439b646;p=linux-2.6.git diff --git a/drivers/md/md.c b/drivers/md/md.c index 77cd6e9f3..745f7e7bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -154,6 +154,39 @@ static spinlock_t all_mddevs_lock = SPIN_LOCK_UNLOCKED; tmp = tmp->next;}) \ ) +int md_flush_mddev(mddev_t *mddev, sector_t *error_sector) +{ + struct list_head *tmp; + mdk_rdev_t *rdev; + int ret = 0; + + /* + * this list iteration is done without any locking in md?! + */ + ITERATE_RDEV(mddev, rdev, tmp) { + request_queue_t *r_queue = bdev_get_queue(rdev->bdev); + int err; + + if (!r_queue->issue_flush_fn) + err = -EOPNOTSUPP; + else + err = r_queue->issue_flush_fn(r_queue, rdev->bdev->bd_disk, error_sector); + + if (!ret) + ret = err; + } + + return ret; +} + +static int md_flush_all(request_queue_t *q, struct gendisk *disk, + sector_t *error_sector) +{ + mddev_t *mddev = q->queuedata; + + return md_flush_mddev(mddev, error_sector); +} + static int md_fail_request (request_queue_t *q, struct bio *bio) { bio_io_error(bio, bio->bi_size); @@ -373,7 +406,7 @@ static int read_disk_sb(mdk_rdev_t * rdev) return 0; fail: - printk(KERN_ERR "md: disabled device %s, could not read superblock.\n", + printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n", bdevname(rdev->bdev,b)); return -EINVAL; } @@ -439,6 +472,31 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) return csum; } +/* csum_partial is not consistent between different architectures. + * Some (i386) do a 32bit csum. Some (alpha) do 16 bit. + * This makes it hard for user-space to know what to do. + * So we use calc_sb_csum to set the checksum to allow working + * with older kernels, but allow calc_sb_csum_common to + * be used when checking if a checksum is correct, to + * make life easier for user-space tools that might write + * a superblock. + */ +static unsigned int calc_sb_csum_common(mdp_super_t *super) +{ + unsigned int disk_csum = super->sb_csum; + unsigned long long newcsum = 0; + unsigned int csum; + int i; + unsigned int *superc = (int*) super; + super->sb_csum = 0; + + for (i=0; i>32); + super->sb_csum = disk_csum; + return csum; +} + /* * Handle superblock details. * We want to be able to handle multiple superblock formats @@ -521,7 +579,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version if (sb->raid_disks <= 0) goto abort; - if (calc_sb_csum(sb) != sb->sb_csum) { + if (calc_sb_csum(sb) != sb->sb_csum && + calc_sb_csum_common(sb) != sb->sb_csum) { printk(KERN_WARNING "md: invalid superblock checksum on %s\n", b); goto abort; @@ -745,11 +804,21 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb) { unsigned int disk_csum, csum; + unsigned long long newcsum; int size = 256 + sb->max_dev*2; + unsigned int *isuper = (unsigned int*)sb; + int i; disk_csum = sb->sb_csum; sb->sb_csum = 0; - csum = csum_partial((void *)sb, size, 0); + newcsum = 0; + for (i=0; size>=4; size -= 4 ) + newcsum += le32_to_cpu(*isuper++); + + if (size == 2) + newcsum += le16_to_cpu(*(unsigned short*) isuper); + + csum = (newcsum & 0xffffffff) + (newcsum >> 32); sb->sb_csum = disk_csum; return csum; } @@ -1042,20 +1111,24 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) /* * prevent the device from being mounted, repartitioned or * otherwise reused by a RAID array (or any other kernel - * subsystem), by opening the device. [simply getting an - * inode is not enough, the SCSI module usage code needs - * an explicit open() on the device] + * subsystem), by bd_claiming the device. */ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev) { int err = 0; struct block_device *bdev; + char b[BDEVNAME_SIZE]; bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); - if (IS_ERR(bdev)) + if (IS_ERR(bdev)) { + printk(KERN_ERR "md: could not open %s.\n", + __bdevname(dev, b)); return PTR_ERR(bdev); + } err = bd_claim(bdev, rdev); if (err) { + printk(KERN_ERR "md: could not bd_claim %s.\n", + bdevname(bdev, b)); blkdev_put(bdev); return err; } @@ -1117,10 +1190,7 @@ static void export_array(mddev_t *mddev) static void print_desc(mdp_disk_t *desc) { - char b[BDEVNAME_SIZE]; - - printk(" DISK\n", desc->number, - __bdevname(MKDEV(desc->major, desc->minor), b), + printk(" DISK\n", desc->number, desc->major,desc->minor,desc->raid_disk,desc->state); } @@ -1312,8 +1382,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi rdev = (mdk_rdev_t *) kmalloc(sizeof(*rdev), GFP_KERNEL); if (!rdev) { - printk(KERN_ERR "md: could not alloc mem for %s!\n", - __bdevname(newdev, b)); + printk(KERN_ERR "md: could not alloc mem for new device!\n"); return ERR_PTR(-ENOMEM); } memset(rdev, 0, sizeof(*rdev)); @@ -1322,11 +1391,9 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi goto abort_free; err = lock_rdev(rdev, newdev); - if (err) { - printk(KERN_ERR "md: could not lock %s.\n", - __bdevname(newdev, b)); + if (err) goto abort_free; - } + rdev->desc_nr = -1; rdev->faulty = 0; rdev->in_sync = 0; @@ -1615,6 +1682,8 @@ static int do_md_run(mddev_t * mddev) mddev->pers = pers[pnum]; spin_unlock(&pers_lock); + mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ + err = mddev->pers->run(mddev); if (err) { printk(KERN_ERR "md: pers->run() failed ...\n"); @@ -1645,6 +1714,7 @@ static int do_md_run(mddev_t * mddev) */ mddev->queue->queuedata = mddev; mddev->queue->make_request_fn = mddev->pers->make_request; + mddev->queue->issue_flush_fn = md_flush_all; mddev->changed = 1; return 0; @@ -1881,11 +1951,9 @@ static int autostart_array(dev_t startdev) mdk_rdev_t *start_rdev = NULL, *rdev; start_rdev = md_import_device(startdev, 0, 0); - if (IS_ERR(start_rdev)) { - printk(KERN_WARNING "md: could not import %s!\n", - __bdevname(startdev, b)); + if (IS_ERR(start_rdev)) return err; - } + /* NOTE: this can only work for 0.90.0 superblocks */ sb = (mdp_super_t*)page_address(start_rdev->sb_page); @@ -1916,12 +1984,9 @@ static int autostart_array(dev_t startdev) if (MAJOR(dev) != desc->major || MINOR(dev) != desc->minor) continue; rdev = md_import_device(dev, 0, 0); - if (IS_ERR(rdev)) { - printk(KERN_WARNING "md: could not import %s," - " trying to run array nevertheless.\n", - __bdevname(dev, b)); + if (IS_ERR(rdev)) continue; - } + list_add(&rdev->same_set, &pending_raid_disks); } @@ -2153,42 +2218,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) return 0; } -static int hot_generate_error(mddev_t * mddev, dev_t dev) -{ - char b[BDEVNAME_SIZE]; - struct request_queue *q; - mdk_rdev_t *rdev; - - if (!mddev->pers) - return -ENODEV; - - printk(KERN_INFO "md: trying to generate %s error in %s ... \n", - __bdevname(dev, b), mdname(mddev)); - - rdev = find_rdev(mddev, dev); - if (!rdev) { - /* MD_BUG(); */ /* like hell - it's not a driver bug */ - return -ENXIO; - } - - if (rdev->desc_nr == -1) { - MD_BUG(); - return -EINVAL; - } - if (!rdev->in_sync) - return -ENODEV; - - q = bdev_get_queue(rdev->bdev); - if (!q) { - MD_BUG(); - return -ENODEV; - } - printk(KERN_INFO "md: okay, generating error!\n"); -// q->oneshot_error = 1; // disabled for now - - return 0; -} - static int hot_remove_disk(mddev_t * mddev, dev_t dev) { char b[BDEVNAME_SIZE]; @@ -2197,9 +2226,6 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev) if (!mddev->pers) return -ENODEV; - printk(KERN_INFO "md: trying to remove %s from %s ... \n", - __bdevname(dev, b), mdname(mddev)); - rdev = find_rdev(mddev, dev); if (!rdev) return -ENXIO; @@ -2227,9 +2253,6 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev) if (!mddev->pers) return -ENODEV; - printk(KERN_INFO "md: trying to hot-add %s to %s ... \n", - __bdevname(dev, b), mdname(mddev)); - if (mddev->major_version != 0) { printk(KERN_WARNING "%s: HOT_ADD may only be used with" " version-0 superblocks.\n", @@ -2489,7 +2512,6 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) static int md_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { - char b[BDEVNAME_SIZE]; int err = 0; void __user *argp = (void __user *)arg; struct hd_geometry __user *loc = argp; @@ -2548,8 +2570,7 @@ static int md_ioctl(struct inode *inode, struct file *file, } err = autostart_array(new_decode_dev(arg)); if (err) { - printk(KERN_WARNING "md: autostart %s failed!\n", - __bdevname(arg, b)); + printk(KERN_WARNING "md: autostart failed!\n"); goto abort; } goto done; @@ -2690,9 +2711,7 @@ static int md_ioctl(struct inode *inode, struct file *file, err = add_new_disk(mddev, &info); goto done_unlock; } - case HOT_GENERATE_ERROR: - err = hot_generate_error(mddev, new_decode_dev(arg)); - goto done_unlock; + case HOT_REMOVE_DISK: err = hot_remove_disk(mddev, new_decode_dev(arg)); goto done_unlock; @@ -2876,7 +2895,7 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, return thread; } -void md_interrupt_thread(mdk_thread_t *thread) +static void md_interrupt_thread(mdk_thread_t *thread) { if (!thread->tsk) { MD_BUG(); @@ -2919,6 +2938,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) if (!mddev->pers->error_handler) return; mddev->pers->error_handler(mddev,rdev); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); } @@ -2951,7 +2971,11 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) unsigned long max_blocks, resync, res, dt, db, rt; resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; - max_blocks = mddev->size; + + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + max_blocks = mddev->resync_max_sectors >> 1; + else + max_blocks = mddev->size; /* * Should not happen. @@ -3187,11 +3211,6 @@ int unregister_md_personality(int pnum) return 0; } -void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors) -{ - rdev->bdev->bd_contains->bd_disk->sync_io += nr_sectors; -} - static int is_mddev_idle(mddev_t *mddev) { mdk_rdev_t * rdev; @@ -3204,8 +3223,12 @@ static int is_mddev_idle(mddev_t *mddev) struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; curr_events = disk_stat_read(disk, read_sectors) + disk_stat_read(disk, write_sectors) - - disk->sync_io; - if ((curr_events - rdev->last_events) > 32) { + atomic_read(&disk->sync_io); + /* Allow some slack between valud of curr_events and last_events, + * as there are some uninteresting races. + * Note: the following is an unsigned comparison. + */ + if ((curr_events - rdev->last_events + 32) > 64) { rdev->last_events = curr_events; idle = 0; } @@ -3339,7 +3362,14 @@ static void md_do_sync(mddev_t *mddev) } } while (mddev->curr_resync < 2); - max_sectors = mddev->size << 1; + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) + /* resync follows the size requested by the personality, + * which default to physical size, but can be virtual size + */ + max_sectors = mddev->resync_max_sectors; + else + /* recovery follows the physical size of devices */ + max_sectors = mddev->size << 1; printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" @@ -3697,7 +3727,6 @@ void md_autodetect_dev(dev_t dev) static void autostart_arrays(int part) { - char b[BDEVNAME_SIZE]; mdk_rdev_t *rdev; int i; @@ -3707,11 +3736,9 @@ static void autostart_arrays(int part) dev_t dev = detected_devices[i]; rdev = md_import_device(dev,0, 0); - if (IS_ERR(rdev)) { - printk(KERN_ALERT "md: could not import %s!\n", - __bdevname(dev, b)); + if (IS_ERR(rdev)) continue; - } + if (rdev->faulty) { MD_BUG(); continue; @@ -3762,7 +3789,6 @@ module_exit(md_exit) EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(unregister_md_personality); EXPORT_SYMBOL(md_error); -EXPORT_SYMBOL(md_sync_acct); EXPORT_SYMBOL(md_done_sync); EXPORT_SYMBOL(md_write_start); EXPORT_SYMBOL(md_write_end); @@ -3771,6 +3797,5 @@ EXPORT_SYMBOL(md_register_thread); EXPORT_SYMBOL(md_unregister_thread); EXPORT_SYMBOL(md_wakeup_thread); EXPORT_SYMBOL(md_print_devices); -EXPORT_SYMBOL(md_interrupt_thread); EXPORT_SYMBOL(md_check_recovery); MODULE_LICENSE("GPL");