X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fmd%2Fraid1.c;h=f0b7caa4a878b08b4c4ea0af2cbaf60d4c9d79c1;hb=987b0145d94eecf292d8b301228356f44611ab7c;hp=3b4d69c0562301b18039657ac0d47261a4eed6af;hpb=3944158a6d33f94668dbd6bdc32ff5c67bb53ec2;p=linux-2.6.git

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3b4d69c05..f0b7caa4a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -315,11 +315,10 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 		if (r1_bio->bios[mirror] == bio)
 			break;
 
-	if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
+	if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
 		set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
 		set_bit(R1BIO_BarrierRetry, &r1_bio->state);
 		r1_bio->mddev->barriers_work = 0;
-		/* Don't rdev_dec_pending in this branch - keep it for the retry */
 	} else {
 		/*
 		 * this branch is our 'one mirror IO has finished' event handler:
@@ -366,7 +365,6 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 				}
 			}
 		}
-		rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 	}
 	/*
 	 *
@@ -374,26 +372,30 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 	 * already.
 	 */
 	if (atomic_dec_and_test(&r1_bio->remaining)) {
-		if (test_bit(R1BIO_BarrierRetry, &r1_bio->state))
+		if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
 			reschedule_retry(r1_bio);
-		else {
-			/* it really is the end of this request */
-			if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
-				/* free extra copy of the data pages */
-				int i = bio->bi_vcnt;
-				while (i--)
-					safe_put_page(bio->bi_io_vec[i].bv_page);
-			}
-			/* clear the bitmap if all writes complete successfully */
-			bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
-					r1_bio->sectors,
-					!test_bit(R1BIO_Degraded, &r1_bio->state),
-					behind);
-			md_write_end(r1_bio->mddev);
-			raid_end_bio_io(r1_bio);
+			/* Don't dec_pending yet, we want to hold
+			 * the reference over the retry
+			 */
+			goto out;
 		}
+		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
+			/* free extra copy of the data pages */
+			int i = bio->bi_vcnt;
+			while (i--)
+				safe_put_page(bio->bi_io_vec[i].bv_page);
+		}
+		/* clear the bitmap if all writes complete successfully */
+		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
+				r1_bio->sectors,
+				!test_bit(R1BIO_Degraded, &r1_bio->state),
+				behind);
+		md_write_end(r1_bio->mddev);
+		raid_end_bio_io(r1_bio);
 	}
 
+	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
+ out:
 	if (to_put)
 		bio_put(to_put);
 
@@ -751,24 +753,18 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	const int rw = bio_data_dir(bio);
 	int do_barriers;
 
+	if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
+		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
+		return 0;
+	}
+
 	/*
 	 * Register the new request and wait if the reconstruction
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
-	 * We test barriers_work *after* md_write_start as md_write_start
-	 * may cause the first superblock write, and that will check out
-	 * if barriers work.
 	 */
-
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
-		if (rw == WRITE)
-			md_write_end(mddev);
-		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
-		return 0;
-	}
-
 	wait_barrier(conf);
 
 	disk_stat_inc(mddev->gendisk, ios[rw]);
@@ -930,13 +926,10 @@ static void status(struct seq_file *seq, mddev_t *mddev)
 
 	seq_printf(seq, " [%d/%d] [", conf->raid_disks,
 						conf->working_disks);
-	rcu_read_lock();
-	for (i = 0; i < conf->raid_disks; i++) {
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+	for (i = 0; i < conf->raid_disks; i++)
 		seq_printf(seq, "%s",
-			   rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
-	}
-	rcu_read_unlock();
+			      conf->mirrors[i].rdev &&
+			      test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
 	seq_printf(seq, "]");
 }
 
@@ -978,6 +971,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 static void print_conf(conf_t *conf)
 {
 	int i;
+	mirror_info_t *tmp;
 
 	printk("RAID1 conf printout:\n");
 	if (!conf) {
@@ -987,17 +981,14 @@ static void print_conf(conf_t *conf)
 	printk(" --- wd:%d rd:%d\n", conf->working_disks,
 		conf->raid_disks);
 
-	rcu_read_lock();
 	for (i = 0; i < conf->raid_disks; i++) {
 		char b[BDEVNAME_SIZE];
-		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-		if (rdev)
+		tmp = conf->mirrors + i;
+		if (tmp->rdev)
 			printk(" disk %d, wo:%d, o:%d, dev:%s\n",
-			       i, !test_bit(In_sync, &rdev->flags),
-			       !test_bit(Faulty, &rdev->flags),
-			       bdevname(rdev->bdev,b));
+				i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags),
+				bdevname(tmp->rdev->bdev,b));
 	}
-	rcu_read_unlock();
 }
 
 static void close_sync(conf_t *conf)
@@ -1013,20 +1004,20 @@ static int raid1_spare_active(mddev_t *mddev)
 {
 	int i;
 	conf_t *conf = mddev->private;
+	mirror_info_t *tmp;
 
 	/*
 	 * Find all failed disks within the RAID1 configuration 
-	 * and mark them readable.
-	 * Called under mddev lock, so rcu protection not needed.
+	 * and mark them readable
 	 */
 	for (i = 0; i < conf->raid_disks; i++) {
-		mdk_rdev_t *rdev = conf->mirrors[i].rdev;
-		if (rdev
-		    && !test_bit(Faulty, &rdev->flags)
-		    && !test_bit(In_sync, &rdev->flags)) {
+		tmp = conf->mirrors + i;
+		if (tmp->rdev 
+		    && !test_bit(Faulty, &tmp->rdev->flags)
+		    && !test_bit(In_sync, &tmp->rdev->flags)) {
 			conf->working_disks++;
 			mddev->degraded--;
-			set_bit(In_sync, &rdev->flags);
+			set_bit(In_sync, &tmp->rdev->flags);
 		}
 	}
 
@@ -1144,19 +1135,8 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
 			mirror = i;
 			break;
 		}
-	if (!uptodate) {
-		int sync_blocks = 0;
-		sector_t s = r1_bio->sector;
-		long sectors_to_go = r1_bio->sectors;
-		/* make sure these bits doesn't get cleared. */
-		do {
-			bitmap_end_sync(mddev->bitmap, s,
-					&sync_blocks, 1);
-			s += sync_blocks;
-			sectors_to_go -= sync_blocks;
-		} while (sectors_to_go > 0);
+	if (!uptodate)
 		md_error(mddev, conf->mirrors[mirror].rdev);
-	}
 
 	update_head_pos(mirror, r1_bio);
 
@@ -1242,7 +1222,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 		/* ouch - failed to read all of that.
 		 * Try some synchronous reads of other devices to get
 		 * good data, much like with normal read errors.  Only
-		 * read into the pages we already have so we don't
+		 * read into the pages we already have so they we don't
 		 * need to re-issue the read request.
 		 * We don't need to freeze the array, because being in an
 		 * active sync request, there is no normal IO, and
@@ -1262,10 +1242,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 				s = PAGE_SIZE >> 9;
 			do {
 				if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
-					/* No rcu protection needed here devices
-					 * can only be removed when no resync is
-					 * active, and resync is currently active
-					 */
 					rdev = conf->mirrors[d].rdev;
 					if (sync_page_io(rdev->bdev,
 							 sect + rdev->data_offset,
@@ -1417,18 +1393,14 @@ static void raid1d(mddev_t *mddev)
 			unplug = 1;
 		} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
 			/* some requests in the r1bio were BIO_RW_BARRIER
-			 * requests which failed with -EOPNOTSUPP.  Hohumm..
+			 * requests which failed with -ENOTSUPP.  Hohumm..
 			 * Better resubmit without the barrier.
 			 * We know which devices to resubmit for, because
 			 * all others have had their bios[] entry cleared.
-			 * We already have a nr_pending reference on these rdevs.
 			 */
 			int i;
 			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
 			clear_bit(R1BIO_Barrier, &r1_bio->state);
-			for (i=0; i < conf->raid_disks; i++)
-				if (r1_bio->bios[i])
-					atomic_inc(&r1_bio->remaining);
 			for (i=0; i < conf->raid_disks; i++)
 				if (r1_bio->bios[i]) {
 					struct bio_vec *bvec;
@@ -1472,11 +1444,6 @@ static void raid1d(mddev_t *mddev)
 					s = PAGE_SIZE >> 9;
 
 				do {
-					/* Note: no rcu protection needed here
-					 * as this is synchronous in the raid1d thread
-					 * which is the thread that might remove
-					 * a device.  If raid1d ever becomes multi-threaded....
-					 */
 					rdev = conf->mirrors[d].rdev;
 					if (rdev &&
 					    test_bit(In_sync, &rdev->flags) &&
@@ -1522,11 +1489,9 @@ static void raid1d(mddev_t *mddev)
 									 s<<9, conf->tmppage, READ) == 0)
 								/* Well, this device is dead */
 								md_error(mddev, rdev);
-							else {
+							else
 								atomic_add(s, &rdev->corrected_errors);
-								printk(KERN_INFO "raid1:%s: read error corrected (%d sectors at %llu on %s)\n",
-								       mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b));
-							}
+
 						}
 					}
 				} else {
@@ -1581,7 +1546,8 @@ static int init_resync(conf_t *conf)
 	int buffs;
 
 	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
-	BUG_ON(conf->r1buf_pool);
+	if (conf->r1buf_pool)
+		BUG();
 	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
 					  conf->poolinfo);
 	if (!conf->r1buf_pool)
@@ -1640,13 +1606,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return 0;
 	}
 
-	if (mddev->bitmap == NULL &&
-	    mddev->recovery_cp == MaxSector &&
-	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
-	    conf->fullsync == 0) {
-		*skipped = 1;
-		return max_sector - sector_nr;
-	}
 	/* before building a request, check if we can skip these blocks..
 	 * This call the bitmap_start_sync doesn't actually record anything
 	 */
@@ -1761,7 +1720,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 			    !conf->fullsync &&
 			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
 				break;
-			BUG_ON(sync_blocks < (PAGE_SIZE>>9));
+			if (sync_blocks < (PAGE_SIZE>>9))
+				BUG();
 			if (len > (sync_blocks<<9))
 				len = sync_blocks<<9;
 		}
@@ -1802,17 +1762,19 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		for (i=0; i<conf->raid_disks; i++) {
 			bio = r1_bio->bios[i];
 			if (bio->bi_end_io == end_sync_read) {
-				md_sync_acct(bio->bi_bdev, nr_sectors);
+				md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors);
 				generic_make_request(bio);
 			}
 		}
 	} else {
 		atomic_set(&r1_bio->remaining, 1);
 		bio = r1_bio->bios[r1_bio->read_disk];
-		md_sync_acct(bio->bi_bdev, nr_sectors);
+		md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev,
+			     nr_sectors);
 		generic_make_request(bio);
 
 	}
+
 	return nr_sectors;
 }
 
@@ -1829,11 +1791,6 @@ static int run(mddev_t *mddev)
 		       mdname(mddev), mddev->level);
 		goto out;
 	}
-	if (mddev->reshape_position != MaxSector) {
-		printk("raid1: %s: reshape_position set but not supported\n",
-		       mdname(mddev));
-		goto out;
-	}
 	/*
 	 * copy the already verified devices into our private RAID1
 	 * bookkeeping area. [whatever we allocate in run(),
@@ -1911,8 +1868,7 @@ static int run(mddev_t *mddev)
 
 		disk = conf->mirrors + i;
 
-		if (!disk->rdev ||
-		    !test_bit(In_sync, &disk->rdev->flags)) {
+		if (!disk->rdev) {
 			disk->head_position = 0;
 			mddev->degraded++;
 		}
@@ -2017,7 +1973,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
 	return 0;
 }
 
-static int raid1_reshape(mddev_t *mddev)
+static int raid1_reshape(mddev_t *mddev, int raid_disks)
 {
 	/* We need to:
 	 * 1/ resize the r1bio_pool
@@ -2034,22 +1990,10 @@ static int raid1_reshape(mddev_t *mddev)
 	struct pool_info *newpoolinfo;
 	mirror_info_t *newmirrors;
 	conf_t *conf = mddev_to_conf(mddev);
-	int cnt, raid_disks;
+	int cnt;
 
 	int d, d2;
 
-	/* Cannot change chunk_size, layout, or level */
-	if (mddev->chunk_size != mddev->new_chunk ||
-	    mddev->layout != mddev->new_layout ||
-	    mddev->level != mddev->new_level) {
-		mddev->new_chunk = mddev->chunk_size;
-		mddev->new_layout = mddev->layout;
-		mddev->new_level = mddev->level;
-		return -EINVAL;
-	}
-
-	raid_disks = mddev->raid_disks + mddev->delta_disks;
-
 	if (raid_disks < conf->raid_disks) {
 		cnt=0;
 		for (d= 0; d < conf->raid_disks; d++)
@@ -2096,7 +2040,6 @@ static int raid1_reshape(mddev_t *mddev)
 
 	mddev->degraded += (raid_disks - conf->raid_disks);
 	conf->raid_disks = mddev->raid_disks = raid_disks;
-	mddev->delta_disks = 0;
 
 	conf->last_used = 0; /* just make sure it is in-range */
 	lower_barrier(conf);
@@ -2138,7 +2081,7 @@ static struct mdk_personality raid1_personality =
 	.spare_active	= raid1_spare_active,
 	.sync_request	= sync_request,
 	.resize		= raid1_resize,
-	.check_reshape	= raid1_reshape,
+	.reshape	= raid1_reshape,
 	.quiesce	= raid1_quiesce,
 };