X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fmd%2Fmd.c;h=2fd2cd4470a678a657a8ca1e7f578c3e966692e5;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=f19b874753a9c4aec3802b557ab05100f387ff61;hpb=f7ed79d23a47594e7834d66a8f14449796d4f3e6;p=linux-2.6.git diff --git a/drivers/md/md.c b/drivers/md/md.c index f19b87475..2fd2cd447 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -43,7 +43,6 @@ #include /* for invalidate_bdev */ #include #include -#include #include @@ -159,18 +158,7 @@ static int start_readonly; */ static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters); static atomic_t md_event_count; -void md_new_event(mddev_t *mddev) -{ - atomic_inc(&md_event_count); - wake_up(&md_event_waiters); - sysfs_notify(&mddev->kobj, NULL, "sync_action"); -} -EXPORT_SYMBOL_GPL(md_new_event); - -/* Alternate version that can be called from interrupts - * when calling sysfs_notify isn't needed. - */ -void md_new_event_inintr(mddev_t *mddev) +static void md_new_event(mddev_t *mddev) { atomic_inc(&md_event_count); wake_up(&md_event_waiters); @@ -225,11 +213,10 @@ static void mddev_put(mddev_t *mddev) return; if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); - spin_unlock(&all_mddevs_lock); - blk_cleanup_queue(mddev->queue); + blk_put_queue(mddev->queue); kobject_unregister(&mddev->kobj); - } else - spin_unlock(&all_mddevs_lock); + } + spin_unlock(&all_mddevs_lock); } static mddev_t * mddev_find(dev_t unit) @@ -263,7 +250,7 @@ static mddev_t * mddev_find(dev_t unit) else new->md_minor = MINOR(unit) >> MdpMinorShift; - mutex_init(&new->reconfig_mutex); + init_MUTEX(&new->reconfig_sem); INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); @@ -276,7 +263,6 @@ static mddev_t * mddev_find(dev_t unit) kfree(new); return NULL; } - set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags); blk_queue_make_request(new->queue, md_fail_request); @@ -285,17 +271,22 @@ static mddev_t * mddev_find(dev_t unit) static inline int mddev_lock(mddev_t * mddev) { - return mutex_lock_interruptible(&mddev->reconfig_mutex); + return down_interruptible(&mddev->reconfig_sem); +} + +static inline void mddev_lock_uninterruptible(mddev_t * mddev) +{ + down(&mddev->reconfig_sem); } static inline int mddev_trylock(mddev_t * mddev) { - return mutex_trylock(&mddev->reconfig_mutex); + return down_trylock(&mddev->reconfig_sem); } static inline void mddev_unlock(mddev_t * mddev) { - mutex_unlock(&mddev->reconfig_mutex); + up(&mddev->reconfig_sem); md_wakeup_thread(mddev->thread); } @@ -666,8 +657,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version } if (sb->major_version != 0 || - sb->minor_version < 90 || - sb->minor_version > 91) { + sb->minor_version != 90) { printk(KERN_WARNING "Bad version number %d.%d on %s\n", sb->major_version, sb->minor_version, b); @@ -752,20 +742,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->bitmap_offset = 0; mddev->default_bitmap_offset = MD_SB_BYTES >> 9; - if (mddev->minor_version >= 91) { - mddev->reshape_position = sb->reshape_position; - mddev->delta_disks = sb->delta_disks; - mddev->new_level = sb->new_level; - mddev->new_layout = sb->new_layout; - mddev->new_chunk = sb->new_chunk; - } else { - mddev->reshape_position = MaxSector; - mddev->delta_disks = 0; - mddev->new_level = mddev->level; - mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; - } - if (sb->state & (1<recovery_cp = MaxSector; else { @@ -785,8 +761,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (sb->state & (1<bitmap_file == NULL) { - if (mddev->level != 1 && mddev->level != 4 - && mddev->level != 5 && mddev->level != 6 + if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6 && mddev->level != 10) { /* FIXME use a better test */ printk(KERN_WARNING "md: bitmaps not supported for this level.\n"); @@ -860,6 +835,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->md_magic = MD_SB_MAGIC; sb->major_version = mddev->major_version; + sb->minor_version = mddev->minor_version; sb->patch_version = mddev->patch_version; sb->gvalid_words = 0; /* ignored */ memcpy(&sb->set_uuid0, mddev->uuid+0, 4); @@ -878,17 +854,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->events_hi = (mddev->events>>32); sb->events_lo = (u32)mddev->events; - if (mddev->reshape_position == MaxSector) - sb->minor_version = 90; - else { - sb->minor_version = 91; - sb->reshape_position = mddev->reshape_position; - sb->new_level = mddev->new_level; - sb->delta_disks = mddev->delta_disks; - sb->new_layout = mddev->new_layout; - sb->new_chunk = mddev->new_chunk; - } - mddev->minor_version = sb->minor_version; if (mddev->in_sync) { sb->recovery_cp = mddev->recovery_cp; @@ -925,9 +890,10 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) d->raid_disk = rdev2->raid_disk; else d->raid_disk = rdev2->desc_nr; /* compatibility */ - if (test_bit(Faulty, &rdev2->flags)) + if (test_bit(Faulty, &rdev2->flags)) { d->state = (1<flags)) { + failed++; + } else if (test_bit(In_sync, &rdev2->flags)) { d->state = (1<state |= (1<bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); } - if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { - mddev->reshape_position = le64_to_cpu(sb->reshape_position); - mddev->delta_disks = le32_to_cpu(sb->delta_disks); - mddev->new_level = le32_to_cpu(sb->new_level); - mddev->new_layout = le32_to_cpu(sb->new_layout); - mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; - } else { - mddev->reshape_position = MaxSector; - mddev->delta_disks = 0; - mddev->new_level = mddev->level; - mddev->new_layout = mddev->layout; - mddev->new_chunk = mddev->chunk_size; - } - } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling */ __u64 ev1 = le64_to_cpu(sb->events); @@ -1218,14 +1170,6 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); } - if (mddev->reshape_position != MaxSector) { - sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); - sb->reshape_position = cpu_to_le64(mddev->reshape_position); - sb->new_layout = cpu_to_le32(mddev->new_layout); - sb->delta_disks = cpu_to_le32(mddev->delta_disks); - sb->new_level = cpu_to_le32(mddev->new_level); - sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); - } max_dev = 0; ITERATE_RDEV(mddev,rdev2,tmp) @@ -1354,7 +1298,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) else ko = &rdev->bdev->bd_disk->kobj; sysfs_create_link(&rdev->kobj, ko, "block"); - bd_claim_by_disk(rdev->bdev, rdev, mddev->gendisk); return 0; } @@ -1365,7 +1308,6 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } - bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); list_del_init(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; @@ -1548,7 +1490,7 @@ static void sync_sbs(mddev_t * mddev) } } -void md_update_sb(mddev_t * mddev) +static void md_update_sb(mddev_t * mddev) { int err; struct list_head *tmp; @@ -1625,7 +1567,6 @@ repeat: wake_up(&mddev->sb_wait); } -EXPORT_SYMBOL_GPL(md_update_sb); /* words written to sysfs files may, or my not, be \n terminated. * We want to accept with case. For this we use cmd_match. @@ -2218,9 +2159,7 @@ action_show(mddev_t *mddev, char *page) char *type = "idle"; if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) { - if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) - type = "reshape"; - else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) type = "resync"; else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) @@ -2251,17 +2190,10 @@ action_store(mddev_t *mddev, const char *page, size_t len) return -EBUSY; else if (cmd_match(page, "resync") || cmd_match(page, "recover")) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - else if (cmd_match(page, "reshape")) { - int err; - if (mddev->pers->start_reshape == NULL) - return -EINVAL; - err = mddev->pers->start_reshape(mddev); - if (err) - return err; - } else { + else { if (cmd_match(page, "check")) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); - else if (!cmd_match(page, "repair")) + else if (cmd_match(page, "repair")) return -EINVAL; set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); @@ -2369,63 +2301,6 @@ sync_completed_show(mddev_t *mddev, char *page) static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); -static ssize_t -suspend_lo_show(mddev_t *mddev, char *page) -{ - return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo); -} - -static ssize_t -suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) -{ - char *e; - unsigned long long new = simple_strtoull(buf, &e, 10); - - if (mddev->pers->quiesce == NULL) - return -EINVAL; - if (buf == e || (*e && *e != '\n')) - return -EINVAL; - if (new >= mddev->suspend_hi || - (new > mddev->suspend_lo && new < mddev->suspend_hi)) { - mddev->suspend_lo = new; - mddev->pers->quiesce(mddev, 2); - return len; - } else - return -EINVAL; -} -static struct md_sysfs_entry md_suspend_lo = -__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); - - -static ssize_t -suspend_hi_show(mddev_t *mddev, char *page) -{ - return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi); -} - -static ssize_t -suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) -{ - char *e; - unsigned long long new = simple_strtoull(buf, &e, 10); - - if (mddev->pers->quiesce == NULL) - return -EINVAL; - if (buf == e || (*e && *e != '\n')) - return -EINVAL; - if ((new <= mddev->suspend_lo && mddev->suspend_lo >= mddev->suspend_hi) || - (new > mddev->suspend_lo && new > mddev->suspend_hi)) { - mddev->suspend_hi = new; - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); - return len; - } else - return -EINVAL; -} -static struct md_sysfs_entry md_suspend_hi = -__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); - - static struct attribute *md_default_attrs[] = { &md_level.attr, &md_raid_disks.attr, @@ -2443,8 +2318,6 @@ static struct attribute *md_redundancy_attrs[] = { &md_sync_max.attr, &md_sync_speed.attr, &md_sync_completed.attr, - &md_suspend_lo.attr, - &md_suspend_hi.attr, NULL, }; static struct attribute_group md_redundancy_group = { @@ -2462,11 +2335,9 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; - rv = mddev_lock(mddev); - if (!rv) { - rv = entry->show(mddev, page); - mddev_unlock(mddev); - } + mddev_lock(mddev); + rv = entry->show(mddev, page); + mddev_unlock(mddev); return rv; } @@ -2480,11 +2351,9 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, if (!entry->store) return -EIO; - rv = mddev_lock(mddev); - if (!rv) { - rv = entry->store(mddev, page, length); - mddev_unlock(mddev); - } + mddev_lock(mddev); + rv = entry->store(mddev, page, length); + mddev_unlock(mddev); return rv; } @@ -2508,7 +2377,7 @@ int mdp_major = 0; static struct kobject *md_probe(dev_t dev, int *part, void *data) { - static DEFINE_MUTEX(disks_mutex); + static DECLARE_MUTEX(disks_sem); mddev_t *mddev = mddev_find(dev); struct gendisk *disk; int partitioned = (MAJOR(dev) != MD_MAJOR); @@ -2518,15 +2387,15 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) if (!mddev) return NULL; - mutex_lock(&disks_mutex); + down(&disks_sem); if (mddev->gendisk) { - mutex_unlock(&disks_mutex); + up(&disks_sem); mddev_put(mddev); return NULL; } disk = alloc_disk(1 << shift); if (!disk) { - mutex_unlock(&disks_mutex); + up(&disks_sem); mddev_put(mddev); return NULL; } @@ -2544,7 +2413,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - mutex_unlock(&disks_mutex); + up(&disks_sem); mddev->kobj.parent = &disk->kobj; mddev->kobj.k_name = NULL; snprintf(mddev->kobj.name, KOBJ_NAME_LEN, "%s", "md"); @@ -2667,14 +2536,6 @@ static int do_md_run(mddev_t * mddev) mddev->level = pers->level; strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); - if (mddev->reshape_position != MaxSector && - pers->start_reshape == NULL) { - /* This personality cannot handle reshaping... */ - mddev->pers = NULL; - module_put(pers->owner); - return -EINVAL; - } - mddev->recovery = 0; mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ mddev->barriers_work = 1; @@ -2908,6 +2769,7 @@ static void autorun_array(mddev_t *mddev) */ static void autorun_devices(int part) { + struct list_head candidates; struct list_head *tmp; mdk_rdev_t *rdev0, *rdev; mddev_t *mddev; @@ -2916,7 +2778,6 @@ static void autorun_devices(int part) printk(KERN_INFO "md: autorun ...\n"); while (!list_empty(&pending_raid_disks)) { dev_t dev; - LIST_HEAD(candidates); rdev0 = list_entry(pending_raid_disks.next, mdk_rdev_t, same_set); @@ -3563,18 +3424,11 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->default_bitmap_offset = MD_SB_BYTES >> 9; mddev->bitmap_offset = 0; - mddev->reshape_position = MaxSector; - /* * Generate a 128 bit UUID */ get_random_bytes(mddev->uuid, 16); - mddev->new_level = mddev->level; - mddev->new_chunk = mddev->chunk_size; - mddev->new_layout = mddev->layout; - mddev->delta_disks = 0; - return 0; } @@ -3630,16 +3484,14 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) { int rv; /* change the number of raid disks */ - if (mddev->pers->check_reshape == NULL) + if (mddev->pers->reshape == NULL) return -EINVAL; if (raid_disks <= 0 || raid_disks >= mddev->max_disks) return -EINVAL; - if (mddev->sync_thread || mddev->reshape_position != MaxSector) + if (mddev->sync_thread) return -EBUSY; - mddev->delta_disks = raid_disks - mddev->raid_disks; - - rv = mddev->pers->check_reshape(mddev); + rv = mddev->pers->reshape(mddev, raid_disks); return rv; } @@ -3810,7 +3662,7 @@ static int md_ioctl(struct inode *inode, struct file *file, if (cnt > 0 ) { printk(KERN_WARNING "md: %s(pid %d) used deprecated START_ARRAY ioctl. " - "This will not be supported beyond July 2006\n", + "START_ARRAY is removed in kernel 2.6.19 and above.\n", current->comm, current->pid); cnt--; } @@ -4158,7 +4010,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - md_new_event_inintr(mddev); + md_new_event(mddev); } /* seq_file implementation /proc/mdstat */ @@ -4186,10 +4038,7 @@ static void status_unused(struct seq_file *seq) static void status_resync(struct seq_file *seq, mddev_t * mddev) { - sector_t max_blocks, resync, res; - unsigned long dt, db, rt; - int scale; - unsigned int per_milli; + unsigned long max_blocks, resync, res, dt, db, rt; resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; @@ -4205,22 +4054,9 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) MD_BUG(); return; } - /* Pick 'scale' such that (resync>>scale)*1000 will fit - * in a sector_t, and (max_blocks>>scale) will fit in a - * u32, as those are the requirements for sector_div. - * Thus 'scale' must be at least 10 - */ - scale = 10; - if (sizeof(sector_t) > sizeof(unsigned long)) { - while ( max_blocks/2 > (1ULL<<(scale+32))) - scale++; - } - res = (resync>>scale)*1000; - sector_div(res, (u32)((max_blocks>>scale)+1)); - - per_milli = res; + res = (resync/1024)*1000/(max_blocks/1024 + 1); { - int i, x = per_milli/50, y = 20-x; + int i, x = res/50, y = 20-x; seq_printf(seq, "["); for (i = 0; i < x; i++) seq_printf(seq, "="); @@ -4229,14 +4065,10 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) seq_printf(seq, "."); seq_printf(seq, "] "); } - seq_printf(seq, " %s =%3u.%u%% (%llu/%llu)", - (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)? - "reshape" : + seq_printf(seq, " %s =%3lu.%lu%% (%lu/%lu)", (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? - "resync" : "recovery")), - per_milli/10, per_milli % 10, - (unsigned long long) resync, - (unsigned long long) max_blocks); + "resync" : "recovery"), + res/10, res % 10, resync, max_blocks); /* * We do not want to overflow, so the order of operands and @@ -4250,7 +4082,7 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) dt = ((jiffies - mddev->resync_mark) / HZ); if (!dt) dt++; db = resync - (mddev->resync_mark_cnt/2); - rt = (dt * ((unsigned long)(max_blocks-resync) / (db/100+1)))/100; + rt = (dt * ((max_blocks-resync) / (db/100+1)))/100; seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); @@ -4349,9 +4181,8 @@ static int md_seq_show(struct seq_file *seq, void *v) return 0; } - if (mddev_lock(mddev) < 0) + if (mddev_lock(mddev)!=0) return -EINTR; - if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { seq_printf(seq, "%s : %sactive", mdname(mddev), mddev->pers ? "" : "in"); @@ -4608,7 +4439,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait); #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) -void md_do_sync(mddev_t *mddev) +static void md_do_sync(mddev_t *mddev) { mddev_t *mddev2; unsigned int currspeed = 0, @@ -4688,9 +4519,7 @@ void md_do_sync(mddev_t *mddev) */ max_sectors = mddev->resync_max_sectors; mddev->resync_mismatches = 0; - } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) - max_sectors = mddev->size << 1; - else + } else /* recovery follows the physical size of devices */ max_sectors = mddev->size << 1; @@ -4826,8 +4655,6 @@ void md_do_sync(mddev_t *mddev) mddev->pers->sync_request(mddev, max_sectors, &skipped, 1); if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) && - test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && - !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && mddev->curr_resync > 2 && mddev->curr_resync >= mddev->recovery_cp) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -4845,7 +4672,6 @@ void md_do_sync(mddev_t *mddev) set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); } -EXPORT_SYMBOL_GPL(md_do_sync); /* @@ -4901,7 +4727,7 @@ void md_check_recovery(mddev_t *mddev) )) return; - if (mddev_trylock(mddev)) { + if (mddev_trylock(mddev)==0) { int spares =0; spin_lock_irq(&mddev->write_lock); @@ -5037,10 +4863,8 @@ static int md_notify_reboot(struct notifier_block *this, printk(KERN_INFO "md: stopping all md devices.\n"); ITERATE_MDDEV(mddev,tmp) - if (mddev_trylock(mddev)) { + if (mddev_trylock(mddev)==0) do_md_stop (mddev, 1); - mddev_unlock(mddev); - } /* * certain more exotic SCSI devices are known to be * volatile wrt too early system reboots. While the