vserver 1.9.5.x5
[linux-2.6.git] / drivers / md / multipath.c
index 9d79b2a..63f81fd 100644 (file)
@@ -36,8 +36,6 @@
 
 
 static mdk_personality_t multipath_personality;
-static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
-struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
 
 
 static void *mp_pool_alloc(int gfp_flags, void *data)
@@ -63,16 +61,16 @@ static int multipath_map (multipath_conf_t *conf)
         * now we use the first available disk.
         */
 
-       spin_lock_irq(&conf->device_lock);
+       rcu_read_lock();
        for (i = 0; i < disks; i++) {
                mdk_rdev_t *rdev = conf->multipaths[i].rdev;
                if (rdev && rdev->in_sync) {
                        atomic_inc(&rdev->nr_pending);
-                       spin_unlock_irq(&conf->device_lock);
+                       rcu_read_unlock();
                        return i;
                }
        }
-       spin_unlock_irq(&conf->device_lock);
+       rcu_read_unlock();
 
        printk(KERN_ERR "multipath_map(): no more operational IO paths?\n");
        return (-1);
@@ -82,14 +80,11 @@ static void multipath_reschedule_retry (struct multipath_bh *mp_bh)
 {
        unsigned long flags;
        mddev_t *mddev = mp_bh->mddev;
+       multipath_conf_t *conf = mddev_to_conf(mddev);
 
-       spin_lock_irqsave(&retry_list_lock, flags);
-       if (multipath_retry_list == NULL)
-               multipath_retry_tail = &multipath_retry_list;
-       *multipath_retry_tail = mp_bh;
-       multipath_retry_tail = &mp_bh->next_mp;
-       mp_bh->next_mp = NULL;
-       spin_unlock_irqrestore(&retry_list_lock, flags);
+       spin_lock_irqsave(&conf->device_lock, flags);
+       list_add(&mp_bh->retry_list, &conf->retry_list);
+       spin_unlock_irqrestore(&conf->device_lock, flags);
        md_wakeup_thread(mddev->thread);
 }
 
@@ -140,26 +135,26 @@ static void unplug_slaves(mddev_t *mddev)
 {
        multipath_conf_t *conf = mddev_to_conf(mddev);
        int i;
-       unsigned long flags;
 
-       spin_lock_irqsave(&conf->device_lock, flags);
+       rcu_read_lock();
        for (i=0; i<mddev->raid_disks; i++) {
                mdk_rdev_t *rdev = conf->multipaths[i].rdev;
-               if (rdev && !rdev->faulty) {
+               if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
                        request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 
                        atomic_inc(&rdev->nr_pending);
-                       spin_unlock_irqrestore(&conf->device_lock, flags);
+                       rcu_read_unlock();
 
                        if (r_queue->unplug_fn)
                                r_queue->unplug_fn(r_queue);
 
-                       spin_lock_irqsave(&conf->device_lock, flags);
-                       atomic_dec(&rdev->nr_pending);
+                       rdev_dec_pending(rdev, mddev);
+                       rcu_read_lock();
                }
        }
-       spin_unlock_irqrestore(&conf->device_lock, flags);
+       rcu_read_unlock();
 }
+
 static void multipath_unplug(request_queue_t *q)
 {
        unplug_slaves(q->queuedata);
@@ -195,6 +190,7 @@ static int multipath_make_request (request_queue_t *q, struct bio * bio)
        multipath = conf->multipaths + mp_bh->path;
 
        mp_bh->bio = *bio;
+       mp_bh->bio.bi_sector += multipath->rdev->data_offset;
        mp_bh->bio.bi_bdev = multipath->rdev->bdev;
        mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST);
        mp_bh->bio.bi_end_io = multipath_end_request;
@@ -224,22 +220,26 @@ static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk,
        multipath_conf_t *conf = mddev_to_conf(mddev);
        int i, ret = 0;
 
-       for (i=0; i<mddev->raid_disks; i++) {
+       rcu_read_lock();
+       for (i=0; i<mddev->raid_disks && ret == 0; i++) {
                mdk_rdev_t *rdev = conf->multipaths[i].rdev;
                if (rdev && !rdev->faulty) {
                        struct block_device *bdev = rdev->bdev;
                        request_queue_t *r_queue = bdev_get_queue(bdev);
 
-                       if (!r_queue->issue_flush_fn) {
+                       if (!r_queue->issue_flush_fn)
                                ret = -EOPNOTSUPP;
-                               break;
+                       else {
+                               atomic_inc(&rdev->nr_pending);
+                               rcu_read_unlock();
+                               ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+                                                             error_sector);
+                               rdev_dec_pending(rdev, mddev);
+                               rcu_read_lock();
                        }
-
-                       ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk, error_sector);
-                       if (ret)
-                               break;
                }
        }
+       rcu_read_unlock();
        return ret;
 }
 
@@ -310,10 +310,9 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
        struct multipath_info *p;
 
        print_multipath_conf(conf);
-       spin_lock_irq(&conf->device_lock);
+
        for (path=0; path<mddev->raid_disks; path++) 
                if ((p=conf->multipaths+path)->rdev == NULL) {
-                       p->rdev = rdev;
                        blk_queue_stack_limits(mddev->queue,
                                               rdev->bdev->bd_disk->queue);
 
@@ -330,9 +329,9 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
                        conf->working_disks++;
                        rdev->raid_disk = path;
                        rdev->in_sync = 1;
+                       p->rdev = rdev;
                        found = 1;
                }
-       spin_unlock_irq(&conf->device_lock);
 
        print_multipath_conf(conf);
        return found;
@@ -341,26 +340,29 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 static int multipath_remove_disk(mddev_t *mddev, int number)
 {
        multipath_conf_t *conf = mddev->private;
-       int err = 1;
+       int err = 0;
+       mdk_rdev_t *rdev;
        struct multipath_info *p = conf->multipaths + number;
 
        print_multipath_conf(conf);
-       spin_lock_irq(&conf->device_lock);
 
-       if (p->rdev) {
-               if (p->rdev->in_sync ||
-                   atomic_read(&p->rdev->nr_pending)) {
+       rdev = p->rdev;
+       if (rdev) {
+               if (rdev->in_sync ||
+                   atomic_read(&rdev->nr_pending)) {
                        printk(KERN_ERR "hot-remove-disk, slot %d is identified"                                " but is still operational!\n", number);
                        err = -EBUSY;
                        goto abort;
                }
                p->rdev = NULL;
-               err = 0;
+               synchronize_kernel();
+               if (atomic_read(&rdev->nr_pending)) {
+                       /* lost the race, try later */
+                       err = -EBUSY;
+                       p->rdev = rdev;
+               }
        }
-       if (err)
-               MD_BUG();
 abort:
-       spin_unlock_irq(&conf->device_lock);
 
        print_multipath_conf(conf);
        return err;
@@ -382,18 +384,18 @@ static void multipathd (mddev_t *mddev)
        struct bio *bio;
        unsigned long flags;
        multipath_conf_t *conf = mddev_to_conf(mddev);
+       struct list_head *head = &conf->retry_list;
 
        md_check_recovery(mddev);
        for (;;) {
                char b[BDEVNAME_SIZE];
-               spin_lock_irqsave(&retry_list_lock, flags);
-               mp_bh = multipath_retry_list;
-               if (!mp_bh)
+               spin_lock_irqsave(&conf->device_lock, flags);
+               if (list_empty(head))
                        break;
-               multipath_retry_list = mp_bh->next_mp;
-               spin_unlock_irqrestore(&retry_list_lock, flags);
+               mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
+               list_del(head->prev);
+               spin_unlock_irqrestore(&conf->device_lock, flags);
 
-               mddev = mp_bh->mddev;
                bio = &mp_bh->bio;
                bio->bi_sector = mp_bh->master_bio->bi_sector;
                
@@ -409,6 +411,7 @@ static void multipathd (mddev_t *mddev)
                                bdevname(bio->bi_bdev,b),
                                (unsigned long long)bio->bi_sector);
                        *bio = *(mp_bh->master_bio);
+                       bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset;
                        bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
                        bio->bi_rw |= (1 << BIO_RW_FAILFAST);
                        bio->bi_end_io = multipath_end_request;
@@ -416,7 +419,7 @@ static void multipathd (mddev_t *mddev)
                        generic_make_request(bio);
                }
        }
-       spin_unlock_irqrestore(&retry_list_lock, flags);
+       spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
 static int multipath_run (mddev_t *mddev)
@@ -488,7 +491,8 @@ static int multipath_run (mddev_t *mddev)
        conf->raid_disks = mddev->raid_disks;
        mddev->sb_dirty = 1;
        conf->mddev = mddev;
-       conf->device_lock = SPIN_LOCK_UNLOCKED;
+       spin_lock_init(&conf->device_lock);
+       INIT_LIST_HEAD(&conf->retry_list);
 
        if (!conf->working_disks) {
                printk(KERN_ERR "multipath: no operational IO paths for %s\n",
@@ -543,6 +547,7 @@ static int multipath_stop (mddev_t *mddev)
 
        md_unregister_thread(mddev->thread);
        mddev->thread = NULL;
+       blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
        mempool_destroy(conf->pool);
        kfree(conf->multipaths);
        kfree(conf);