X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fblock%2Fll_rw_blk.c;h=3994af7e555b140ab0b658db8b4bf1d3741f7363;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=26fdf6be6bd0ceb299c5e45f8d9154bc068cc3bf;hpb=87fc8d1bb10cd459024a742c6a10961fefcef18f;p=linux-2.6.git diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 26fdf6be6..3994af7e5 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -100,7 +100,7 @@ static void blk_queue_congestion_threshold(struct request_queue *q) nr = q->nr_requests; q->nr_congestion_on = nr; - nr = q->nr_requests - (q->nr_requests / 8) - 1; + nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1; if (nr < 1) nr = 1; q->nr_congestion_off = nr; @@ -243,6 +243,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) blk_queue_hardsect_size(q, 512); blk_queue_dma_alignment(q, 511); blk_queue_congestion_threshold(q); + q->nr_batching = BLK_BATCH_REQ; q->unplug_thresh = 4; /* hmm */ q->unplug_delay = (3 * HZ) / 1000; /* 3 milliseconds */ @@ -260,6 +261,8 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); blk_queue_activity_fn(q, NULL, NULL); + + INIT_LIST_HEAD(&q->drain_list); } EXPORT_SYMBOL(blk_queue_make_request); @@ -1354,9 +1357,29 @@ void blk_stop_queue(request_queue_t *q) blk_remove_plug(q); set_bit(QUEUE_FLAG_STOPPED, &q->queue_flags); } - EXPORT_SYMBOL(blk_stop_queue); +/** + * blk_sync_queue - cancel any pending callbacks on a queue + * @q: the queue + * + * Description: + * The block layer may perform asynchronous callback activity + * on a queue, such as calling the unplug function after a timeout. + * A block device may call blk_sync_queue to ensure that any + * such activity is cancelled, thus allowing it to release resources + * the the callbacks might use. The caller must already have made sure + * that its ->make_request_fn will not re-add plugging prior to calling + * this function. + * + */ +void blk_sync_queue(struct request_queue *q) +{ + del_timer_sync(&q->unplug_timer); + kblockd_flush(); +} +EXPORT_SYMBOL(blk_sync_queue); + /** * blk_run_queue - run a single device queue * @q: The queue to run @@ -1370,7 +1393,6 @@ void blk_run_queue(struct request_queue *q) q->request_fn(q); spin_unlock_irqrestore(q->queue_lock, flags); } - EXPORT_SYMBOL(blk_run_queue); /** @@ -1395,10 +1417,10 @@ void blk_cleanup_queue(request_queue_t * q) if (!atomic_dec_and_test(&q->refcnt)) return; - elevator_exit(q); + if (q->elevator) + elevator_exit(q->elevator); - del_timer_sync(&q->unplug_timer); - kblockd_flush(); + blk_sync_queue(q); if (rl->rq_pool) mempool_destroy(rl->rq_pool); @@ -1416,8 +1438,10 @@ static int blk_init_free_list(request_queue_t *q) struct request_list *rl = &q->rq; rl->count[READ] = rl->count[WRITE] = 0; + rl->starved[READ] = rl->starved[WRITE] = 0; init_waitqueue_head(&rl->wait[READ]); init_waitqueue_head(&rl->wait[WRITE]); + init_waitqueue_head(&rl->drain); rl->rq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep); @@ -1429,45 +1453,6 @@ static int blk_init_free_list(request_queue_t *q) static int __make_request(request_queue_t *, struct bio *); -static elevator_t *chosen_elevator = -#if defined(CONFIG_IOSCHED_AS) - &iosched_as; -#elif defined(CONFIG_IOSCHED_DEADLINE) - &iosched_deadline; -#elif defined(CONFIG_IOSCHED_CFQ) - &iosched_cfq; -#elif defined(CONFIG_IOSCHED_NOOP) - &elevator_noop; -#else - NULL; -#error "You must have at least 1 I/O scheduler selected" -#endif - -#if defined(CONFIG_IOSCHED_AS) || defined(CONFIG_IOSCHED_DEADLINE) || defined (CONFIG_IOSCHED_NOOP) -static int __init elevator_setup(char *str) -{ -#ifdef CONFIG_IOSCHED_DEADLINE - if (!strcmp(str, "deadline")) - chosen_elevator = &iosched_deadline; -#endif -#ifdef CONFIG_IOSCHED_AS - if (!strcmp(str, "as")) - chosen_elevator = &iosched_as; -#endif -#ifdef CONFIG_IOSCHED_CFQ - if (!strcmp(str, "cfq")) - chosen_elevator = &iosched_cfq; -#endif -#ifdef CONFIG_IOSCHED_NOOP - if (!strcmp(str, "noop")) - chosen_elevator = &elevator_noop; -#endif - return 1; -} - -__setup("elevator=", elevator_setup); -#endif /* CONFIG_IOSCHED_AS || CONFIG_IOSCHED_DEADLINE || CONFIG_IOSCHED_NOOP */ - request_queue_t *blk_alloc_queue(int gfp_mask) { request_queue_t *q = kmem_cache_alloc(requestq_cachep, gfp_mask); @@ -1520,21 +1505,14 @@ EXPORT_SYMBOL(blk_alloc_queue); **/ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) { - request_queue_t *q; - static int printed; + request_queue_t *q = blk_alloc_queue(GFP_KERNEL); - q = blk_alloc_queue(GFP_KERNEL); if (!q) return NULL; if (blk_init_free_list(q)) goto out_init; - if (!printed) { - printed = 1; - printk("Using %s io scheduler\n", chosen_elevator->elevator_name); - } - q->request_fn = rfn; q->back_merge_fn = ll_back_merge_fn; q->front_merge_fn = ll_front_merge_fn; @@ -1555,8 +1533,10 @@ request_queue_t *blk_init_queue(request_fn_proc *rfn, spinlock_t *lock) /* * all done */ - if (!elevator_init(q, chosen_elevator)) + if (!elevator_init(q, NULL)) { + blk_queue_congestion_threshold(q); return q; + } blk_cleanup_queue(q); out_init: @@ -1584,13 +1564,20 @@ static inline void blk_free_request(request_queue_t *q, struct request *rq) mempool_free(rq, q->rq.rq_pool); } -static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) +static inline struct request *blk_alloc_request(request_queue_t *q, int rw, + int gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); if (!rq) return NULL; + /* + * first three bits are identical in rq->flags and bio->bi_rw, + * see bio.h and blkdev.h + */ + rq->flags = rw; + if (!elv_set_request(q, rq, gfp_mask)) return rq; @@ -1602,7 +1589,7 @@ static inline struct request *blk_alloc_request(request_queue_t *q,int gfp_mask) * ioc_batching returns true if the ioc is a valid batching request and * should be given priority access to a request. */ -static inline int ioc_batching(struct io_context *ioc) +static inline int ioc_batching(request_queue_t *q, struct io_context *ioc) { if (!ioc) return 0; @@ -1612,7 +1599,7 @@ static inline int ioc_batching(struct io_context *ioc) * even if the batch times out, otherwise we could theoretically * lose wakeups. */ - return ioc->nr_batch_requests == BLK_BATCH_REQ || + return ioc->nr_batch_requests == q->nr_batching || (ioc->nr_batch_requests > 0 && time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME)); } @@ -1623,15 +1610,31 @@ static inline int ioc_batching(struct io_context *ioc) * is the behaviour we want though - once it gets a wakeup it should be given * a nice run. */ -void ioc_set_batching(struct io_context *ioc) +void ioc_set_batching(request_queue_t *q, struct io_context *ioc) { - if (!ioc || ioc_batching(ioc)) + if (!ioc || ioc_batching(q, ioc)) return; - ioc->nr_batch_requests = BLK_BATCH_REQ; + ioc->nr_batch_requests = q->nr_batching; ioc->last_waited = jiffies; } +static void __freed_request(request_queue_t *q, int rw) +{ + struct request_list *rl = &q->rq; + + if (rl->count[rw] < queue_congestion_off_threshold(q)) + clear_queue_congested(q, rw); + + if (rl->count[rw] + 1 <= q->nr_requests) { + smp_mb(); + if (waitqueue_active(&rl->wait[rw])) + wake_up(&rl->wait[rw]); + + blk_clear_queue_full(q, rw); + } +} + /* * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. @@ -1641,13 +1644,16 @@ static void freed_request(request_queue_t *q, int rw) struct request_list *rl = &q->rq; rl->count[rw]--; - if (rl->count[rw] < queue_congestion_off_threshold(q)) - clear_queue_congested(q, rw); - if (rl->count[rw]+1 <= q->nr_requests) { - if (waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); - if (!waitqueue_active(&rl->wait[rw])) - blk_clear_queue_full(q, rw); + + __freed_request(q, rw); + + if (unlikely(rl->starved[rw ^ 1])) + __freed_request(q, rw ^ 1); + + if (!rl->count[READ] && !rl->count[WRITE]) { + smp_mb(); + if (unlikely(waitqueue_active(&rl->drain))) + wake_up(&rl->drain); } } @@ -1661,6 +1667,9 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) struct request_list *rl = &q->rq; struct io_context *ioc = get_io_context(gfp_mask); + if (unlikely(test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags))) + goto out; + spin_lock_irq(q->queue_lock); if (rl->count[rw]+1 >= q->nr_requests) { /* @@ -1670,13 +1679,21 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) * will be blocked. */ if (!blk_queue_full(q, rw)) { - ioc_set_batching(ioc); + ioc_set_batching(q, ioc); blk_set_queue_full(q, rw); } } - if (blk_queue_full(q, rw) - && !ioc_batching(ioc) && !elv_may_queue(q, rw)) { + switch (elv_may_queue(q, rw)) { + case ELV_MQUEUE_NO: + goto rq_starved; + case ELV_MQUEUE_MAY: + break; + case ELV_MQUEUE_MUST: + goto get_rq; + } + + if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { /* * The queue is full and the allocating process is not a * "batcher", and not exempted by the IO scheduler @@ -1685,12 +1702,14 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) goto out; } +get_rq: rl->count[rw]++; + rl->starved[rw] = 0; if (rl->count[rw] >= queue_congestion_on_threshold(q)) set_queue_congested(q, rw); spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, gfp_mask); + rq = blk_alloc_request(q, rw, gfp_mask); if (!rq) { /* * Allocation failed presumably due to memory. Undo anything @@ -1701,21 +1720,27 @@ static struct request *get_request(request_queue_t *q, int rw, int gfp_mask) */ spin_lock_irq(q->queue_lock); freed_request(q, rw); + + /* + * in the very unlikely event that allocation failed and no + * requests for this direction was pending, mark us starved + * so that freeing of a request in the other direction will + * notice us. another possible fix would be to split the + * rq mempool into READ and WRITE + */ +rq_starved: + if (unlikely(rl->count[rw] == 0)) + rl->starved[rw] = 1; + spin_unlock_irq(q->queue_lock); goto out; } - if (ioc_batching(ioc)) + if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; INIT_LIST_HEAD(&rq->queuelist); - /* - * first three bits are identical in rq->flags and bio->bi_rw, - * see bio.h and blkdev.h - */ - rq->flags = rw; - rq->errors = 0; rq->rq_status = RQ_ACTIVE; rq->bio = rq->biotail = NULL; @@ -1764,7 +1789,7 @@ static struct request *get_request_wait(request_queue_t *q, int rw) * See ioc_batching, ioc_set_batching */ ioc = get_io_context(GFP_NOIO); - ioc_set_batching(ioc); + ioc_set_batching(q, ioc); put_io_context(ioc); } finish_wait(&rl->wait[rw], &wait); @@ -2082,13 +2107,13 @@ void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) return; if (rw == READ) { - disk_stat_add(rq->rq_disk, read_sectors, nr_sectors); + __disk_stat_add(rq->rq_disk, read_sectors, nr_sectors); if (!new_io) - disk_stat_inc(rq->rq_disk, read_merges); + __disk_stat_inc(rq->rq_disk, read_merges); } else if (rw == WRITE) { - disk_stat_add(rq->rq_disk, write_sectors, nr_sectors); + __disk_stat_add(rq->rq_disk, write_sectors, nr_sectors); if (!new_io) - disk_stat_inc(rq->rq_disk, write_merges); + __disk_stat_inc(rq->rq_disk, write_merges); } if (new_io) { disk_round_stats(rq->rq_disk); @@ -2134,12 +2159,12 @@ void disk_round_stats(struct gendisk *disk) { unsigned long now = jiffies; - disk_stat_add(disk, time_in_queue, + __disk_stat_add(disk, time_in_queue, disk->in_flight * (now - disk->stamp)); disk->stamp = now; if (disk->in_flight) - disk_stat_add(disk, io_ticks, (now - disk->stamp_idle)); + __disk_stat_add(disk, io_ticks, (now - disk->stamp_idle)); disk->stamp_idle = now; } @@ -2384,9 +2409,7 @@ again: break; bio->bi_next = req->bio; - req->cbio = req->bio = bio; - req->nr_cbio_segments = bio_segments(bio); - req->nr_cbio_sectors = bio_sectors(bio); + req->bio = bio; /* * may not be valid. if the low level driver said @@ -2458,11 +2481,9 @@ get_rq: req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; req->nr_phys_segments = bio_phys_segments(q, bio); req->nr_hw_segments = bio_hw_segments(q, bio); - req->nr_cbio_segments = bio_segments(bio); - req->nr_cbio_sectors = bio_sectors(bio); req->buffer = bio_data(bio); /* see ->buffer comment above */ req->waiting = NULL; - req->cbio = req->bio = req->biotail = bio; + req->bio = req->biotail = bio; req->rq_disk = bio->bi_bdev->bd_disk; req->start_time = jiffies; @@ -2506,6 +2527,106 @@ static inline void blk_partition_remap(struct bio *bio) } } +void blk_finish_queue_drain(request_queue_t *q) +{ + struct request_list *rl = &q->rq; + struct request *rq; + + spin_lock_irq(q->queue_lock); + clear_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + + while (!list_empty(&q->drain_list)) { + rq = list_entry_rq(q->drain_list.next); + + list_del_init(&rq->queuelist); + __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); + } + + spin_unlock_irq(q->queue_lock); + + wake_up(&rl->wait[0]); + wake_up(&rl->wait[1]); + wake_up(&rl->drain); +} + +static int wait_drain(request_queue_t *q, struct request_list *rl, int dispatch) +{ + int wait = rl->count[READ] + rl->count[WRITE]; + + if (dispatch) + wait += !list_empty(&q->queue_head); + + return wait; +} + +/* + * We rely on the fact that only requests allocated through blk_alloc_request() + * have io scheduler private data structures associated with them. Any other + * type of request (allocated on stack or through kmalloc()) should not go + * to the io scheduler core, but be attached to the queue head instead. + */ +void blk_wait_queue_drained(request_queue_t *q, int wait_dispatch) +{ + struct request_list *rl = &q->rq; + DEFINE_WAIT(wait); + + spin_lock_irq(q->queue_lock); + set_bit(QUEUE_FLAG_DRAIN, &q->queue_flags); + + while (wait_drain(q, rl, wait_dispatch)) { + prepare_to_wait(&rl->drain, &wait, TASK_UNINTERRUPTIBLE); + + if (wait_drain(q, rl, wait_dispatch)) { + __generic_unplug_device(q); + spin_unlock_irq(q->queue_lock); + io_schedule(); + spin_lock_irq(q->queue_lock); + } + + finish_wait(&rl->drain, &wait); + } + + spin_unlock_irq(q->queue_lock); +} + +/* + * block waiting for the io scheduler being started again. + */ +static inline void block_wait_queue_running(request_queue_t *q) +{ + DEFINE_WAIT(wait); + + while (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) { + struct request_list *rl = &q->rq; + + prepare_to_wait_exclusive(&rl->drain, &wait, + TASK_UNINTERRUPTIBLE); + + /* + * re-check the condition. avoids using prepare_to_wait() + * in the fast path (queue is running) + */ + if (test_bit(QUEUE_FLAG_DRAIN, &q->queue_flags)) + io_schedule(); + + finish_wait(&rl->drain, &wait); + } +} + +static void handle_bad_sector(struct bio *bio) +{ + char b[BDEVNAME_SIZE]; + + printk(KERN_INFO "attempt to access beyond end of device\n"); + printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", + bdevname(bio->bi_bdev, b), + bio->bi_rw, + (unsigned long long)bio->bi_sector + bio_sectors(bio), + (long long)(bio->bi_bdev->bd_inode->i_size >> 9)); + + set_bit(BIO_EOF, &bio->bi_flags); +} + /** * generic_make_request: hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. @@ -2542,21 +2663,13 @@ void generic_make_request(struct bio *bio) if (maxsector) { sector_t sector = bio->bi_sector; - if (maxsector < nr_sectors || - maxsector - nr_sectors < sector) { - char b[BDEVNAME_SIZE]; - /* This may well happen - the kernel calls - * bread() without checking the size of the - * device, e.g., when mounting a device. */ - printk(KERN_INFO - "attempt to access beyond end of device\n"); - printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n", - bdevname(bio->bi_bdev, b), - bio->bi_rw, - (unsigned long long) sector + nr_sectors, - (long long) maxsector); - - set_bit(BIO_EOF, &bio->bi_flags); + if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { + /* + * This may well happen - the kernel calls bread() + * without checking the size of the device, e.g., when + * mounting a device. + */ + handle_bad_sector(bio); goto end_io; } } @@ -2595,6 +2708,8 @@ end_io: if (test_bit(QUEUE_FLAG_DEAD, &q->queue_flags)) goto end_io; + block_wait_queue_running(q); + /* * If this device has partitions, remap block n * of partition p to block n+start(p) of the disk. @@ -2643,103 +2758,40 @@ void submit_bio(int rw, struct bio *bio) EXPORT_SYMBOL(submit_bio); -/** - * blk_rq_next_segment - * @rq: the request being processed - * - * Description: - * Points to the next segment in the request if the current segment - * is complete. Leaves things unchanged if this segment is not over - * or if no more segments are left in this request. - * - * Meant to be used for bio traversal during I/O submission - * Does not affect any I/O completions or update completion state - * in the request, and does not modify any bio fields. - * - * Decrementing rq->nr_sectors, rq->current_nr_sectors and - * rq->nr_cbio_sectors as data is transferred is the caller's - * responsibility and should be done before calling this routine. - **/ -void blk_rq_next_segment(struct request *rq) -{ - if (rq->current_nr_sectors > 0) - return; - - if (rq->nr_cbio_sectors > 0) { - --rq->nr_cbio_segments; - rq->current_nr_sectors = blk_rq_vec(rq)->bv_len >> 9; - } else { - if ((rq->cbio = rq->cbio->bi_next)) { - rq->nr_cbio_segments = bio_segments(rq->cbio); - rq->nr_cbio_sectors = bio_sectors(rq->cbio); - rq->current_nr_sectors = bio_cur_sectors(rq->cbio); - } - } - - /* remember the size of this segment before we start I/O */ - rq->hard_cur_sectors = rq->current_nr_sectors; -} - -/** - * process_that_request_first - process partial request submission - * @req: the request being processed - * @nr_sectors: number of sectors I/O has been submitted on - * - * Description: - * May be used for processing bio's while submitting I/O without - * signalling completion. Fails if more data is requested than is - * available in the request in which case it doesn't advance any - * pointers. - * - * Assumes a request is correctly set up. No sanity checks. - * - * Return: - * 0 - no more data left to submit (not processed) - * 1 - data available to submit for this request (processed) - **/ -int process_that_request_first(struct request *req, unsigned int nr_sectors) -{ - unsigned int nsect; - - if (req->nr_sectors < nr_sectors) - return 0; - - req->nr_sectors -= nr_sectors; - req->sector += nr_sectors; - while (nr_sectors) { - nsect = min_t(unsigned, req->current_nr_sectors, nr_sectors); - req->current_nr_sectors -= nsect; - nr_sectors -= nsect; - if (req->cbio) { - req->nr_cbio_sectors -= nsect; - blk_rq_next_segment(req); - } - } - return 1; -} - -EXPORT_SYMBOL(process_that_request_first); - void blk_recalc_rq_segments(struct request *rq) { struct bio *bio, *prevbio = NULL; int nr_phys_segs, nr_hw_segs; + unsigned int phys_size, hw_size; + request_queue_t *q = rq->q; if (!rq->bio) return; - nr_phys_segs = nr_hw_segs = 0; + phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; rq_for_each_bio(bio, rq) { /* Force bio hw/phys segs to be recalculated. */ bio->bi_flags &= ~(1 << BIO_SEG_VALID); - nr_phys_segs += bio_phys_segments(rq->q, bio); - nr_hw_segs += bio_hw_segments(rq->q, bio); + nr_phys_segs += bio_phys_segments(q, bio); + nr_hw_segs += bio_hw_segments(q, bio); if (prevbio) { - if (blk_phys_contig_segment(rq->q, prevbio, bio)) + int pseg = phys_size + prevbio->bi_size + bio->bi_size; + int hseg = hw_size + prevbio->bi_size + bio->bi_size; + + if (blk_phys_contig_segment(q, prevbio, bio) && + pseg <= q->max_segment_size) { nr_phys_segs--; - if (blk_hw_contig_segment(rq->q, prevbio, bio)) + phys_size += prevbio->bi_size + bio->bi_size; + } else + phys_size = 0; + + if (blk_hw_contig_segment(q, prevbio, bio) && + hseg <= q->max_segment_size) { nr_hw_segs--; + hw_size += prevbio->bi_size + bio->bi_size; + } else + hw_size = 0; } prevbio = bio; } @@ -2755,8 +2807,7 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) rq->hard_nr_sectors -= nsect; /* - * Move the I/O submission pointers ahead if required, - * i.e. for drivers not aware of rq->cbio. + * Move the I/O submission pointers ahead if required. */ if ((rq->nr_sectors >= rq->hard_nr_sectors) && (rq->sector <= rq->hard_sector)) { @@ -2764,11 +2815,7 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) rq->nr_sectors = rq->hard_nr_sectors; rq->hard_cur_sectors = bio_cur_sectors(rq->bio); rq->current_nr_sectors = rq->hard_cur_sectors; - rq->nr_cbio_segments = bio_segments(rq->bio); - rq->nr_cbio_sectors = bio_sectors(rq->bio); rq->buffer = bio_data(rq->bio); - - rq->cbio = rq->bio; } /* @@ -2940,12 +2987,12 @@ void end_that_request_last(struct request *req) unsigned long duration = jiffies - req->start_time; switch (rq_data_dir(req)) { case WRITE: - disk_stat_inc(disk, writes); - disk_stat_add(disk, write_ticks, duration); + __disk_stat_inc(disk, writes); + __disk_stat_add(disk, write_ticks, duration); break; case READ: - disk_stat_inc(disk, reads); - disk_stat_add(disk, read_ticks, duration); + __disk_stat_inc(disk, reads); + __disk_stat_add(disk, read_ticks, duration); break; } disk_round_stats(disk); @@ -2980,33 +3027,13 @@ void blk_rq_bio_prep(request_queue_t *q, struct request *rq, struct bio *bio) rq->current_nr_sectors = bio_cur_sectors(bio); rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); - rq->nr_cbio_segments = bio_segments(bio); - rq->nr_cbio_sectors = bio_sectors(bio); rq->buffer = bio_data(bio); - rq->cbio = rq->bio = rq->biotail = bio; + rq->bio = rq->biotail = bio; } EXPORT_SYMBOL(blk_rq_bio_prep); -void blk_rq_prep_restart(struct request *rq) -{ - struct bio *bio; - - bio = rq->cbio = rq->bio; - if (bio) { - rq->nr_cbio_segments = bio_segments(bio); - rq->nr_cbio_sectors = bio_sectors(bio); - rq->hard_cur_sectors = bio_cur_sectors(bio); - rq->buffer = bio_data(bio); - } - rq->sector = rq->hard_sector; - rq->nr_sectors = rq->hard_nr_sectors; - rq->current_nr_sectors = rq->hard_cur_sectors; -} - -EXPORT_SYMBOL(blk_rq_prep_restart); - int kblockd_schedule_work(struct work_struct *work) { return queue_work(kblockd_workqueue, work); @@ -3018,6 +3045,7 @@ void kblockd_flush(void) { flush_workqueue(kblockd_workqueue); } +EXPORT_SYMBOL(kblockd_flush); int __init blk_dev_init(void) { @@ -3036,6 +3064,7 @@ int __init blk_dev_init(void) blk_max_low_pfn = max_low_pfn; blk_max_pfn = max_pfn; + return 0; } @@ -3052,9 +3081,13 @@ void put_io_context(struct io_context *ioc) if (atomic_dec_and_test(&ioc->refcount)) { if (ioc->aic && ioc->aic->dtor) ioc->aic->dtor(ioc->aic); + if (ioc->cic && ioc->cic->dtor) + ioc->cic->dtor(ioc->cic); + kmem_cache_free(iocontext_cachep, ioc); } } +EXPORT_SYMBOL(put_io_context); /* Called by the exitting task */ void exit_io_context(void) @@ -3064,14 +3097,15 @@ void exit_io_context(void) local_irq_save(flags); ioc = current->io_context; - if (ioc) { - if (ioc->aic && ioc->aic->exit) - ioc->aic->exit(ioc->aic); - put_io_context(ioc); - current->io_context = NULL; - } else - WARN_ON(1); + current->io_context = NULL; local_irq_restore(flags); + + if (ioc->aic && ioc->aic->exit) + ioc->aic->exit(ioc->aic); + if (ioc->cic && ioc->cic->exit) + ioc->cic->exit(ioc->cic); + + put_io_context(ioc); } /* @@ -3090,22 +3124,42 @@ struct io_context *get_io_context(int gfp_flags) local_irq_save(flags); ret = tsk->io_context; - if (ret == NULL) { - ret = kmem_cache_alloc(iocontext_cachep, GFP_ATOMIC); - if (ret) { - atomic_set(&ret->refcount, 1); - ret->pid = tsk->pid; - ret->last_waited = jiffies; /* doesn't matter... */ - ret->nr_batch_requests = 0; /* because this is 0 */ - ret->aic = NULL; + if (ret) + goto out; + + local_irq_restore(flags); + + ret = kmem_cache_alloc(iocontext_cachep, gfp_flags); + if (ret) { + atomic_set(&ret->refcount, 1); + ret->pid = tsk->pid; + ret->last_waited = jiffies; /* doesn't matter... */ + ret->nr_batch_requests = 0; /* because this is 0 */ + ret->aic = NULL; + ret->cic = NULL; + spin_lock_init(&ret->lock); + + local_irq_save(flags); + + /* + * very unlikely, someone raced with us in setting up the task + * io context. free new context and just grab a reference. + */ + if (!tsk->io_context) tsk->io_context = ret; + else { + kmem_cache_free(iocontext_cachep, ret); + ret = tsk->io_context; } - } - if (ret) + +out: atomic_inc(&ret->refcount); - local_irq_restore(flags); + local_irq_restore(flags); + } + return ret; } +EXPORT_SYMBOL(get_io_context); void copy_io_context(struct io_context **pdst, struct io_context **psrc) { @@ -3119,6 +3173,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc) *pdst = src; } } +EXPORT_SYMBOL(copy_io_context); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) { @@ -3127,7 +3182,7 @@ void swap_io_context(struct io_context **ioc1, struct io_context **ioc2) *ioc1 = *ioc2; *ioc2 = temp; } - +EXPORT_SYMBOL(swap_io_context); /* * sysfs parts below @@ -3285,11 +3340,18 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = { .show = queue_max_hw_sectors_show, }; +static struct queue_sysfs_entry queue_iosched_entry = { + .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR }, + .show = elv_iosched_show, + .store = elv_iosched_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, &queue_max_hw_sectors_entry.attr, &queue_max_sectors_entry.attr, + &queue_iosched_entry.attr, NULL, };