2 * linux/drivers/block/cfq-iosched.c
4 * CFQ, or complete fairness queueing, disk scheduler.
6 * Based on ideas from a previously unfinished io
7 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
9 * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
11 #include <linux/kernel.h>
13 #include <linux/blkdev.h>
14 #include <linux/elevator.h>
15 #include <linux/bio.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/init.h>
20 #include <linux/compiler.h>
21 #include <linux/hash.h>
22 #include <linux/rbtree.h>
23 #include <linux/mempool.h>
28 static int cfq_quantum = 4;
29 static int cfq_queued = 8;
31 #define CFQ_QHASH_SHIFT 6
32 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
33 #define list_entry_qhash(entry) list_entry((entry), struct cfq_queue, cfq_hash)
35 #define CFQ_MHASH_SHIFT 8
36 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
37 #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
38 #define CFQ_MHASH_FN(sec) (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
39 #define ON_MHASH(crq) !list_empty(&(crq)->hash)
40 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
41 #define list_entry_hash(ptr) list_entry((ptr), struct cfq_rq, hash)
43 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
45 #define RQ_DATA(rq) ((struct cfq_rq *) (rq)->elevator_private)
47 static kmem_cache_t *crq_pool;
48 static kmem_cache_t *cfq_pool;
49 static mempool_t *cfq_mpool;
52 struct list_head rr_list;
53 struct list_head *dispatch;
54 struct list_head *cfq_hash;
56 struct list_head *crq_hash;
58 unsigned int busy_queues;
59 unsigned int max_queued;
65 struct list_head cfq_hash;
66 struct list_head cfq_list;
67 struct rb_root sort_list;
72 * with a simple addition like this, we can do io priorities. almost.
73 * does need a split request free list, too.
80 struct rb_node rb_node;
83 struct request *request;
85 struct cfq_queue *cfq_queue;
87 struct list_head hash;
90 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
91 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid);
92 static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq);
95 * lots of deadline iosched dupes, can be abstracted later...
97 static inline void __cfq_del_crq_hash(struct cfq_rq *crq)
99 list_del_init(&crq->hash);
102 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
105 __cfq_del_crq_hash(crq);
108 static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
110 cfq_del_crq_hash(crq);
112 if (q->last_merge == crq->request)
113 q->last_merge = NULL;
116 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
118 struct request *rq = crq->request;
120 BUG_ON(ON_MHASH(crq));
122 list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]);
125 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
127 struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
128 struct list_head *entry, *next = hash_list->next;
130 while ((entry = next) != hash_list) {
131 struct cfq_rq *crq = list_entry_hash(entry);
132 struct request *__rq = crq->request;
136 BUG_ON(!ON_MHASH(crq));
138 if (!rq_mergeable(__rq)) {
139 __cfq_del_crq_hash(crq);
143 if (rq_hash_key(__rq) == offset)
151 * rb tree support functions
154 #define RB_EMPTY(node) ((node)->rb_node == NULL)
155 #define RB_CLEAR(node) ((node)->rb_color = RB_NONE)
156 #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
157 #define ON_RB(node) ((node)->rb_color != RB_NONE)
158 #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
159 #define rq_rb_key(rq) (rq)->sector
161 static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
163 if (ON_RB(&crq->rb_node)) {
164 cfqq->queued[rq_data_dir(crq->request)]--;
165 rb_erase(&crq->rb_node, &cfqq->sort_list);
166 crq->cfq_queue = NULL;
170 static struct cfq_rq *
171 __cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
173 struct rb_node **p = &cfqq->sort_list.rb_node;
174 struct rb_node *parent = NULL;
175 struct cfq_rq *__crq;
179 __crq = rb_entry_crq(parent);
181 if (crq->rb_key < __crq->rb_key)
183 else if (crq->rb_key > __crq->rb_key)
189 rb_link_node(&crq->rb_node, parent, p);
194 cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
196 struct request *rq = crq->request;
197 struct cfq_rq *__alias;
199 crq->rb_key = rq_rb_key(rq);
200 cfqq->queued[rq_data_dir(rq)]++;
202 __alias = __cfq_add_crq_rb(cfqq, crq);
204 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
205 crq->cfq_queue = cfqq;
209 cfq_del_crq_rb(cfqq, __alias);
210 cfq_dispatch_sort(cfqd->dispatch, __alias);
214 static struct request *
215 cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
217 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
223 n = cfqq->sort_list.rb_node;
225 struct cfq_rq *crq = rb_entry_crq(n);
227 if (sector < crq->rb_key)
229 else if (sector > crq->rb_key)
239 static void cfq_remove_request(request_queue_t *q, struct request *rq)
241 struct cfq_data *cfqd = q->elevator.elevator_data;
242 struct cfq_rq *crq = RQ_DATA(rq);
245 struct cfq_queue *cfqq = crq->cfq_queue;
247 cfq_remove_merge_hints(q, crq);
248 list_del_init(&rq->queuelist);
251 cfq_del_crq_rb(cfqq, crq);
253 if (RB_EMPTY(&cfqq->sort_list))
254 cfq_put_queue(cfqd, cfqq);
260 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
262 struct cfq_data *cfqd = q->elevator.elevator_data;
263 struct request *__rq;
266 ret = elv_try_last_merge(q, bio);
267 if (ret != ELEVATOR_NO_MERGE) {
268 __rq = q->last_merge;
272 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
274 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
276 if (elv_rq_merge_ok(__rq, bio)) {
277 ret = ELEVATOR_BACK_MERGE;
282 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
284 if (elv_rq_merge_ok(__rq, bio)) {
285 ret = ELEVATOR_FRONT_MERGE;
290 return ELEVATOR_NO_MERGE;
292 q->last_merge = __rq;
298 static void cfq_merged_request(request_queue_t *q, struct request *req)
300 struct cfq_data *cfqd = q->elevator.elevator_data;
301 struct cfq_rq *crq = RQ_DATA(req);
303 cfq_del_crq_hash(crq);
304 cfq_add_crq_hash(cfqd, crq);
306 if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
307 struct cfq_queue *cfqq = crq->cfq_queue;
309 cfq_del_crq_rb(cfqq, crq);
310 cfq_add_crq_rb(cfqd, cfqq, crq);
317 cfq_merged_requests(request_queue_t *q, struct request *req,
318 struct request *next)
320 cfq_merged_request(q, req);
321 cfq_remove_request(q, next);
324 static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq)
326 struct list_head *entry = head;
327 struct request *__rq;
329 if (!list_empty(head)) {
330 __rq = list_entry_rq(head->next);
332 if (crq->request->sector < __rq->sector) {
338 while ((entry = entry->prev) != head) {
339 __rq = list_entry_rq(entry);
341 if (crq->request->sector <= __rq->sector)
346 list_add_tail(&crq->request->queuelist, entry);
350 __cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
351 struct cfq_queue *cfqq)
353 struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list));
355 cfq_del_crq_rb(cfqq, crq);
356 cfq_remove_merge_hints(q, crq);
357 cfq_dispatch_sort(cfqd->dispatch, crq);
360 static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd)
362 struct cfq_queue *cfqq;
363 struct list_head *entry, *tmp;
364 int ret, queued, good_queues;
366 if (list_empty(&cfqd->rr_list))
372 list_for_each_safe(entry, tmp, &cfqd->rr_list) {
373 cfqq = list_entry_cfqq(cfqd->rr_list.next);
375 BUG_ON(RB_EMPTY(&cfqq->sort_list));
377 __cfq_dispatch_requests(q, cfqd, cfqq);
379 if (RB_EMPTY(&cfqq->sort_list))
380 cfq_put_queue(cfqd, cfqq);
388 if ((queued < cfq_quantum) && good_queues)
394 static struct request *cfq_next_request(request_queue_t *q)
396 struct cfq_data *cfqd = q->elevator.elevator_data;
399 if (!list_empty(cfqd->dispatch)) {
402 rq = list_entry_rq(cfqd->dispatch->next);
406 cfq_remove_merge_hints(q, crq);
411 if (cfq_dispatch_requests(q, cfqd))
417 static inline struct cfq_queue *
418 __cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval)
420 struct list_head *hash_list = &cfqd->cfq_hash[hashval];
421 struct list_head *entry;
423 list_for_each(entry, hash_list) {
424 struct cfq_queue *__cfqq = list_entry_qhash(entry);
426 if (__cfqq->pid == pid)
433 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid)
435 const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
437 return __cfq_find_cfq_hash(cfqd, pid, hashval);
440 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
443 list_del(&cfqq->cfq_list);
444 list_del(&cfqq->cfq_hash);
445 mempool_free(cfqq, cfq_mpool);
448 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid)
450 const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
451 struct cfq_queue *cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval);
454 cfqq = mempool_alloc(cfq_mpool, GFP_NOIO);
456 INIT_LIST_HEAD(&cfqq->cfq_hash);
457 INIT_LIST_HEAD(&cfqq->cfq_list);
458 RB_CLEAR_ROOT(&cfqq->sort_list);
461 cfqq->queued[0] = cfqq->queued[1] = 0;
462 list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
468 static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
470 struct cfq_queue *cfqq;
472 cfqq = cfq_get_queue(cfqd, current->tgid);
474 cfq_add_crq_rb(cfqd, cfqq, crq);
476 if (list_empty(&cfqq->cfq_list)) {
477 list_add(&cfqq->cfq_list, &cfqd->rr_list);
483 cfq_insert_request(request_queue_t *q, struct request *rq, int where)
485 struct cfq_data *cfqd = q->elevator.elevator_data;
486 struct cfq_rq *crq = RQ_DATA(rq);
489 case ELEVATOR_INSERT_BACK:
490 while (cfq_dispatch_requests(q, cfqd))
492 list_add_tail(&rq->queuelist, cfqd->dispatch);
494 case ELEVATOR_INSERT_FRONT:
495 list_add(&rq->queuelist, cfqd->dispatch);
497 case ELEVATOR_INSERT_SORT:
498 BUG_ON(!blk_fs_request(rq));
499 cfq_enqueue(cfqd, crq);
502 printk("%s: bad insert point %d\n", __FUNCTION__,where);
506 if (rq_mergeable(rq)) {
507 cfq_add_crq_hash(cfqd, crq);
514 static int cfq_queue_empty(request_queue_t *q)
516 struct cfq_data *cfqd = q->elevator.elevator_data;
518 if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list))
524 static struct request *
525 cfq_former_request(request_queue_t *q, struct request *rq)
527 struct cfq_rq *crq = RQ_DATA(rq);
528 struct rb_node *rbprev = rb_prev(&crq->rb_node);
531 return rb_entry_crq(rbprev)->request;
536 static struct request *
537 cfq_latter_request(request_queue_t *q, struct request *rq)
539 struct cfq_rq *crq = RQ_DATA(rq);
540 struct rb_node *rbnext = rb_next(&crq->rb_node);
543 return rb_entry_crq(rbnext)->request;
548 static int cfq_may_queue(request_queue_t *q, int rw)
550 struct cfq_data *cfqd = q->elevator.elevator_data;
551 struct cfq_queue *cfqq;
554 if (!cfqd->busy_queues)
557 cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
559 int limit = (q->nr_requests - cfq_queued) / cfqd->busy_queues;
563 else if (limit > cfqd->max_queued)
564 limit = cfqd->max_queued;
566 if (cfqq->queued[rw] > limit)
573 static void cfq_put_request(request_queue_t *q, struct request *rq)
575 struct cfq_data *cfqd = q->elevator.elevator_data;
576 struct cfq_rq *crq = RQ_DATA(rq);
579 BUG_ON(q->last_merge == rq);
580 BUG_ON(ON_MHASH(crq));
582 mempool_free(crq, cfqd->crq_pool);
583 rq->elevator_private = NULL;
587 static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
589 struct cfq_data *cfqd = q->elevator.elevator_data;
590 struct cfq_rq *crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
593 RB_CLEAR(&crq->rb_node);
595 crq->cfq_queue = NULL;
596 INIT_LIST_HEAD(&crq->hash);
597 rq->elevator_private = crq;
604 static void cfq_exit(request_queue_t *q, elevator_t *e)
606 struct cfq_data *cfqd = e->elevator_data;
608 e->elevator_data = NULL;
609 mempool_destroy(cfqd->crq_pool);
610 kfree(cfqd->crq_hash);
611 kfree(cfqd->cfq_hash);
615 static int cfq_init(request_queue_t *q, elevator_t *e)
617 struct cfq_data *cfqd;
620 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
624 memset(cfqd, 0, sizeof(*cfqd));
625 INIT_LIST_HEAD(&cfqd->rr_list);
627 cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
631 cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
635 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
639 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
640 INIT_LIST_HEAD(&cfqd->crq_hash[i]);
641 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
642 INIT_LIST_HEAD(&cfqd->cfq_hash[i]);
644 cfqd->dispatch = &q->queue_head;
645 e->elevator_data = cfqd;
648 * just set it to some high value, we want anyone to be able to queue
649 * some requests. fairness is handled differently
651 cfqd->max_queued = q->nr_requests;
652 q->nr_requests = 8192;
656 kfree(cfqd->cfq_hash);
658 kfree(cfqd->crq_hash);
664 static int __init cfq_slab_setup(void)
666 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
670 panic("cfq_iosched: can't init crq pool\n");
672 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
676 panic("cfq_iosched: can't init cfq pool\n");
678 cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool);
681 panic("cfq_iosched: can't init cfq mpool\n");
686 subsys_initcall(cfq_slab_setup);
688 elevator_t iosched_cfq = {
689 .elevator_name = "cfq",
690 .elevator_merge_fn = cfq_merge,
691 .elevator_merged_fn = cfq_merged_request,
692 .elevator_merge_req_fn = cfq_merged_requests,
693 .elevator_next_req_fn = cfq_next_request,
694 .elevator_add_req_fn = cfq_insert_request,
695 .elevator_remove_req_fn = cfq_remove_request,
696 .elevator_queue_empty_fn = cfq_queue_empty,
697 .elevator_former_req_fn = cfq_former_request,
698 .elevator_latter_req_fn = cfq_latter_request,
699 .elevator_set_req_fn = cfq_set_request,
700 .elevator_put_req_fn = cfq_put_request,
701 .elevator_may_queue_fn = cfq_may_queue,
702 .elevator_init_fn = cfq_init,
703 .elevator_exit_fn = cfq_exit,
706 EXPORT_SYMBOL(iosched_cfq);