static mdk_personality_t multipath_personality;
-static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
-struct multipath_bh *multipath_retry_list = NULL, **multipath_retry_tail;
static void *mp_pool_alloc(int gfp_flags, void *data)
* now we use the first available disk.
*/
- spin_lock_irq(&conf->device_lock);
+ rcu_read_lock();
for (i = 0; i < disks; i++) {
mdk_rdev_t *rdev = conf->multipaths[i].rdev;
if (rdev && rdev->in_sync) {
atomic_inc(&rdev->nr_pending);
- spin_unlock_irq(&conf->device_lock);
+ rcu_read_unlock();
return i;
}
}
- spin_unlock_irq(&conf->device_lock);
+ rcu_read_unlock();
printk(KERN_ERR "multipath_map(): no more operational IO paths?\n");
return (-1);
{
unsigned long flags;
mddev_t *mddev = mp_bh->mddev;
+ multipath_conf_t *conf = mddev_to_conf(mddev);
- spin_lock_irqsave(&retry_list_lock, flags);
- if (multipath_retry_list == NULL)
- multipath_retry_tail = &multipath_retry_list;
- *multipath_retry_tail = mp_bh;
- multipath_retry_tail = &mp_bh->next_mp;
- mp_bh->next_mp = NULL;
- spin_unlock_irqrestore(&retry_list_lock, flags);
+ spin_lock_irqsave(&conf->device_lock, flags);
+ list_add(&mp_bh->retry_list, &conf->retry_list);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
md_wakeup_thread(mddev->thread);
}
* operation and are ready to return a success/failure code to the buffer
* cache layer.
*/
-static void multipath_end_bh_io (struct multipath_bh *mp_bh, int uptodate)
+static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
{
struct bio *bio = mp_bh->master_bio;
multipath_conf_t *conf = mddev_to_conf(mp_bh->mddev);
- bio_endio(bio, bio->bi_size, uptodate ? 0 : -EIO);
+ bio_endio(bio, bio->bi_size, err);
mempool_free(mp_bh, conf->pool);
}
return 1;
if (uptodate)
- multipath_end_bh_io(mp_bh, uptodate);
- else {
+ multipath_end_bh_io(mp_bh, 0);
+ else if (!bio_rw_ahead(bio)) {
/*
* oops, IO error:
*/
bdevname(rdev->bdev,b),
(unsigned long long)bio->bi_sector);
multipath_reschedule_retry(mp_bh);
- }
+ } else
+ multipath_end_bh_io(mp_bh, error);
rdev_dec_pending(rdev, conf->mddev);
return 0;
}
{
multipath_conf_t *conf = mddev_to_conf(mddev);
int i;
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
+ rcu_read_lock();
for (i=0; i<mddev->raid_disks; i++) {
mdk_rdev_t *rdev = conf->multipaths[i].rdev;
- if (rdev && !rdev->faulty) {
+ if (rdev && !rdev->faulty && atomic_read(&rdev->nr_pending)) {
request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
atomic_inc(&rdev->nr_pending);
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ rcu_read_unlock();
if (r_queue->unplug_fn)
r_queue->unplug_fn(r_queue);
- spin_lock_irqsave(&conf->device_lock, flags);
- atomic_dec(&rdev->nr_pending);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
}
}
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ rcu_read_unlock();
}
+
static void multipath_unplug(request_queue_t *q)
{
unplug_slaves(q->queuedata);
multipath = conf->multipaths + mp_bh->path;
mp_bh->bio = *bio;
+ mp_bh->bio.bi_sector += multipath->rdev->data_offset;
mp_bh->bio.bi_bdev = multipath->rdev->bdev;
mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST);
mp_bh->bio.bi_end_io = multipath_end_request;
seq_printf (seq, "]");
}
+static int multipath_issue_flush(request_queue_t *q, struct gendisk *disk,
+ sector_t *error_sector)
+{
+ mddev_t *mddev = q->queuedata;
+ multipath_conf_t *conf = mddev_to_conf(mddev);
+ int i, ret = 0;
+
+ rcu_read_lock();
+ for (i=0; i<mddev->raid_disks && ret == 0; i++) {
+ mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+ if (rdev && !rdev->faulty) {
+ struct block_device *bdev = rdev->bdev;
+ request_queue_t *r_queue = bdev_get_queue(bdev);
+
+ if (!r_queue->issue_flush_fn)
+ ret = -EOPNOTSUPP;
+ else {
+ atomic_inc(&rdev->nr_pending);
+ rcu_read_unlock();
+ ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
+ error_sector);
+ rdev_dec_pending(rdev, mddev);
+ rcu_read_lock();
+ }
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
/*
* Careful, this can execute in IRQ contexts as well!
struct multipath_info *p;
print_multipath_conf(conf);
- spin_lock_irq(&conf->device_lock);
+
for (path=0; path<mddev->raid_disks; path++)
if ((p=conf->multipaths+path)->rdev == NULL) {
- p->rdev = rdev;
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
mddev->queue->max_sectors > (PAGE_SIZE>>9))
- mddev->queue->max_sectors = (PAGE_SIZE>>9);
+ blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
conf->working_disks++;
rdev->raid_disk = path;
rdev->in_sync = 1;
+ p->rdev = rdev;
found = 1;
}
- spin_unlock_irq(&conf->device_lock);
print_multipath_conf(conf);
return found;
static int multipath_remove_disk(mddev_t *mddev, int number)
{
multipath_conf_t *conf = mddev->private;
- int err = 1;
+ int err = 0;
+ mdk_rdev_t *rdev;
struct multipath_info *p = conf->multipaths + number;
print_multipath_conf(conf);
- spin_lock_irq(&conf->device_lock);
- if (p->rdev) {
- if (p->rdev->in_sync ||
- atomic_read(&p->rdev->nr_pending)) {
+ rdev = p->rdev;
+ if (rdev) {
+ if (rdev->in_sync ||
+ atomic_read(&rdev->nr_pending)) {
printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number);
err = -EBUSY;
goto abort;
}
p->rdev = NULL;
- err = 0;
+ synchronize_kernel();
+ if (atomic_read(&rdev->nr_pending)) {
+ /* lost the race, try later */
+ err = -EBUSY;
+ p->rdev = rdev;
+ }
}
- if (err)
- MD_BUG();
abort:
- spin_unlock_irq(&conf->device_lock);
print_multipath_conf(conf);
return err;
struct bio *bio;
unsigned long flags;
multipath_conf_t *conf = mddev_to_conf(mddev);
+ struct list_head *head = &conf->retry_list;
md_check_recovery(mddev);
for (;;) {
char b[BDEVNAME_SIZE];
- spin_lock_irqsave(&retry_list_lock, flags);
- mp_bh = multipath_retry_list;
- if (!mp_bh)
+ spin_lock_irqsave(&conf->device_lock, flags);
+ if (list_empty(head))
break;
- multipath_retry_list = mp_bh->next_mp;
- spin_unlock_irqrestore(&retry_list_lock, flags);
+ mp_bh = list_entry(head->prev, struct multipath_bh, retry_list);
+ list_del(head->prev);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
- mddev = mp_bh->mddev;
bio = &mp_bh->bio;
bio->bi_sector = mp_bh->master_bio->bi_sector;
" error for block %llu\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_sector);
- multipath_end_bh_io(mp_bh, 0);
+ multipath_end_bh_io(mp_bh, -EIO);
} else {
printk(KERN_ERR "multipath: %s: redirecting sector %llu"
" to another IO path\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_sector);
+ *bio = *(mp_bh->master_bio);
+ bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset;
bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
+ bio->bi_rw |= (1 << BIO_RW_FAILFAST);
+ bio->bi_end_io = multipath_end_request;
+ bio->bi_private = mp_bh;
generic_make_request(bio);
}
}
- spin_unlock_irqrestore(&retry_list_lock, flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
static int multipath_run (mddev_t *mddev)
mddev->queue->unplug_fn = multipath_unplug;
+ mddev->queue->issue_flush_fn = multipath_issue_flush;
+
conf->working_disks = 0;
ITERATE_RDEV(mddev,rdev,tmp) {
disk_idx = rdev->raid_disk;
* a merge_bvec_fn to be involved in multipath */
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
mddev->queue->max_sectors > (PAGE_SIZE>>9))
- mddev->queue->max_sectors = (PAGE_SIZE>>9);
+ blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!rdev->faulty)
conf->working_disks++;
conf->raid_disks = mddev->raid_disks;
mddev->sb_dirty = 1;
conf->mddev = mddev;
- conf->device_lock = SPIN_LOCK_UNLOCKED;
+ spin_lock_init(&conf->device_lock);
+ INIT_LIST_HEAD(&conf->retry_list);
if (!conf->working_disks) {
printk(KERN_ERR "multipath: no operational IO paths for %s\n",
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
+ blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
mempool_destroy(conf->pool);
kfree(conf->multipaths);
kfree(conf);