X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid1.c;h=f0b7caa4a878b08b4c4ea0af2cbaf60d4c9d79c1;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=3b4d69c0562301b18039657ac0d47261a4eed6af;hpb=3944158a6d33f94668dbd6bdc32ff5c67bb53ec2;p=linux-2.6.git diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3b4d69c05..f0b7caa4a 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -315,11 +315,10 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int if (r1_bio->bios[mirror] == bio) break; - if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { + if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); set_bit(R1BIO_BarrierRetry, &r1_bio->state); r1_bio->mddev->barriers_work = 0; - /* Don't rdev_dec_pending in this branch - keep it for the retry */ } else { /* * this branch is our 'one mirror IO has finished' event handler: @@ -366,7 +365,6 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int } } } - rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } /* * @@ -374,26 +372,30 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int * already. */ if (atomic_dec_and_test(&r1_bio->remaining)) { - if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) + if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { reschedule_retry(r1_bio); - else { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - /* free extra copy of the data pages */ - int i = bio->bi_vcnt; - while (i--) - safe_put_page(bio->bi_io_vec[i].bv_page); - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - behind); - md_write_end(r1_bio->mddev); - raid_end_bio_io(r1_bio); + /* Don't dec_pending yet, we want to hold + * the reference over the retry + */ + goto out; } + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + /* free extra copy of the data pages */ + int i = bio->bi_vcnt; + while (i--) + safe_put_page(bio->bi_io_vec[i].bv_page); + } + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + behind); + md_write_end(r1_bio->mddev); + raid_end_bio_io(r1_bio); } + rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); + out: if (to_put) bio_put(to_put); @@ -751,24 +753,18 @@ static int make_request(request_queue_t *q, struct bio * bio) const int rw = bio_data_dir(bio); int do_barriers; + if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { + bio_endio(bio, bio->bi_size, -EOPNOTSUPP); + return 0; + } + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. - * We test barriers_work *after* md_write_start as md_write_start - * may cause the first superblock write, and that will check out - * if barriers work. */ - md_write_start(mddev, bio); /* wait on superblock update early */ - if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { - if (rw == WRITE) - md_write_end(mddev); - bio_endio(bio, bio->bi_size, -EOPNOTSUPP); - return 0; - } - wait_barrier(conf); disk_stat_inc(mddev->gendisk, ios[rw]); @@ -930,13 +926,10 @@ static void status(struct seq_file *seq, mddev_t *mddev) seq_printf(seq, " [%d/%d] [", conf->raid_disks, conf->working_disks); - rcu_read_lock(); - for (i = 0; i < conf->raid_disks; i++) { - mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); + for (i = 0; i < conf->raid_disks; i++) seq_printf(seq, "%s", - rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); - } - rcu_read_unlock(); + conf->mirrors[i].rdev && + test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_"); seq_printf(seq, "]"); } @@ -978,6 +971,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) static void print_conf(conf_t *conf) { int i; + mirror_info_t *tmp; printk("RAID1 conf printout:\n"); if (!conf) { @@ -987,17 +981,14 @@ static void print_conf(conf_t *conf) printk(" --- wd:%d rd:%d\n", conf->working_disks, conf->raid_disks); - rcu_read_lock(); for (i = 0; i < conf->raid_disks; i++) { char b[BDEVNAME_SIZE]; - mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); - if (rdev) + tmp = conf->mirrors + i; + if (tmp->rdev) printk(" disk %d, wo:%d, o:%d, dev:%s\n", - i, !test_bit(In_sync, &rdev->flags), - !test_bit(Faulty, &rdev->flags), - bdevname(rdev->bdev,b)); + i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags), + bdevname(tmp->rdev->bdev,b)); } - rcu_read_unlock(); } static void close_sync(conf_t *conf) @@ -1013,20 +1004,20 @@ static int raid1_spare_active(mddev_t *mddev) { int i; conf_t *conf = mddev->private; + mirror_info_t *tmp; /* * Find all failed disks within the RAID1 configuration - * and mark them readable. - * Called under mddev lock, so rcu protection not needed. + * and mark them readable */ for (i = 0; i < conf->raid_disks; i++) { - mdk_rdev_t *rdev = conf->mirrors[i].rdev; - if (rdev - && !test_bit(Faulty, &rdev->flags) - && !test_bit(In_sync, &rdev->flags)) { + tmp = conf->mirrors + i; + if (tmp->rdev + && !test_bit(Faulty, &tmp->rdev->flags) + && !test_bit(In_sync, &tmp->rdev->flags)) { conf->working_disks++; mddev->degraded--; - set_bit(In_sync, &rdev->flags); + set_bit(In_sync, &tmp->rdev->flags); } } @@ -1144,19 +1135,8 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) mirror = i; break; } - if (!uptodate) { - int sync_blocks = 0; - sector_t s = r1_bio->sector; - long sectors_to_go = r1_bio->sectors; - /* make sure these bits doesn't get cleared. */ - do { - bitmap_end_sync(mddev->bitmap, s, - &sync_blocks, 1); - s += sync_blocks; - sectors_to_go -= sync_blocks; - } while (sectors_to_go > 0); + if (!uptodate) md_error(mddev, conf->mirrors[mirror].rdev); - } update_head_pos(mirror, r1_bio); @@ -1242,7 +1222,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) /* ouch - failed to read all of that. * Try some synchronous reads of other devices to get * good data, much like with normal read errors. Only - * read into the pages we already have so we don't + * read into the pages we already have so they we don't * need to re-issue the read request. * We don't need to freeze the array, because being in an * active sync request, there is no normal IO, and @@ -1262,10 +1242,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) s = PAGE_SIZE >> 9; do { if (r1_bio->bios[d]->bi_end_io == end_sync_read) { - /* No rcu protection needed here devices - * can only be removed when no resync is - * active, and resync is currently active - */ rdev = conf->mirrors[d].rdev; if (sync_page_io(rdev->bdev, sect + rdev->data_offset, @@ -1417,18 +1393,14 @@ static void raid1d(mddev_t *mddev) unplug = 1; } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { /* some requests in the r1bio were BIO_RW_BARRIER - * requests which failed with -EOPNOTSUPP. Hohumm.. + * requests which failed with -ENOTSUPP. Hohumm.. * Better resubmit without the barrier. * We know which devices to resubmit for, because * all others have had their bios[] entry cleared. - * We already have a nr_pending reference on these rdevs. */ int i; clear_bit(R1BIO_BarrierRetry, &r1_bio->state); clear_bit(R1BIO_Barrier, &r1_bio->state); - for (i=0; i < conf->raid_disks; i++) - if (r1_bio->bios[i]) - atomic_inc(&r1_bio->remaining); for (i=0; i < conf->raid_disks; i++) if (r1_bio->bios[i]) { struct bio_vec *bvec; @@ -1472,11 +1444,6 @@ static void raid1d(mddev_t *mddev) s = PAGE_SIZE >> 9; do { - /* Note: no rcu protection needed here - * as this is synchronous in the raid1d thread - * which is the thread that might remove - * a device. If raid1d ever becomes multi-threaded.... - */ rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags) && @@ -1522,11 +1489,9 @@ static void raid1d(mddev_t *mddev) s<<9, conf->tmppage, READ) == 0) /* Well, this device is dead */ md_error(mddev, rdev); - else { + else atomic_add(s, &rdev->corrected_errors); - printk(KERN_INFO "raid1:%s: read error corrected (%d sectors at %llu on %s)\n", - mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b)); - } + } } } else { @@ -1581,7 +1546,8 @@ static int init_resync(conf_t *conf) int buffs; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; - BUG_ON(conf->r1buf_pool); + if (conf->r1buf_pool) + BUG(); conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, conf->poolinfo); if (!conf->r1buf_pool) @@ -1640,13 +1606,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return 0; } - if (mddev->bitmap == NULL && - mddev->recovery_cp == MaxSector && - !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && - conf->fullsync == 0) { - *skipped = 1; - return max_sector - sector_nr; - } /* before building a request, check if we can skip these blocks.. * This call the bitmap_start_sync doesn't actually record anything */ @@ -1761,7 +1720,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; - BUG_ON(sync_blocks < (PAGE_SIZE>>9)); + if (sync_blocks < (PAGE_SIZE>>9)) + BUG(); if (len > (sync_blocks<<9)) len = sync_blocks<<9; } @@ -1802,17 +1762,19 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i for (i=0; iraid_disks; i++) { bio = r1_bio->bios[i]; if (bio->bi_end_io == end_sync_read) { - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors); generic_make_request(bio); } } } else { atomic_set(&r1_bio->remaining, 1); bio = r1_bio->bios[r1_bio->read_disk]; - md_sync_acct(bio->bi_bdev, nr_sectors); + md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, + nr_sectors); generic_make_request(bio); } + return nr_sectors; } @@ -1829,11 +1791,6 @@ static int run(mddev_t *mddev) mdname(mddev), mddev->level); goto out; } - if (mddev->reshape_position != MaxSector) { - printk("raid1: %s: reshape_position set but not supported\n", - mdname(mddev)); - goto out; - } /* * copy the already verified devices into our private RAID1 * bookkeeping area. [whatever we allocate in run(), @@ -1911,8 +1868,7 @@ static int run(mddev_t *mddev) disk = conf->mirrors + i; - if (!disk->rdev || - !test_bit(In_sync, &disk->rdev->flags)) { + if (!disk->rdev) { disk->head_position = 0; mddev->degraded++; } @@ -2017,7 +1973,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) return 0; } -static int raid1_reshape(mddev_t *mddev) +static int raid1_reshape(mddev_t *mddev, int raid_disks) { /* We need to: * 1/ resize the r1bio_pool @@ -2034,22 +1990,10 @@ static int raid1_reshape(mddev_t *mddev) struct pool_info *newpoolinfo; mirror_info_t *newmirrors; conf_t *conf = mddev_to_conf(mddev); - int cnt, raid_disks; + int cnt; int d, d2; - /* Cannot change chunk_size, layout, or level */ - if (mddev->chunk_size != mddev->new_chunk || - mddev->layout != mddev->new_layout || - mddev->level != mddev->new_level) { - mddev->new_chunk = mddev->chunk_size; - mddev->new_layout = mddev->layout; - mddev->new_level = mddev->level; - return -EINVAL; - } - - raid_disks = mddev->raid_disks + mddev->delta_disks; - if (raid_disks < conf->raid_disks) { cnt=0; for (d= 0; d < conf->raid_disks; d++) @@ -2096,7 +2040,6 @@ static int raid1_reshape(mddev_t *mddev) mddev->degraded += (raid_disks - conf->raid_disks); conf->raid_disks = mddev->raid_disks = raid_disks; - mddev->delta_disks = 0; conf->last_used = 0; /* just make sure it is in-range */ lower_barrier(conf); @@ -2138,7 +2081,7 @@ static struct mdk_personality raid1_personality = .spare_active = raid1_spare_active, .sync_request = sync_request, .resize = raid1_resize, - .check_reshape = raid1_reshape, + .reshape = raid1_reshape, .quiesce = raid1_quiesce, };