ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / drivers / block / cfq-iosched.c
1 /*
2  *  linux/drivers/block/cfq-iosched.c
3  *
4  *  CFQ, or complete fairness queueing, disk scheduler.
5  *
6  *  Based on ideas from a previously unfinished io
7  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
8  *
9  *  Copyright (C) 2003 Jens Axboe <axboe@suse.de>
10  */
11 #include <linux/kernel.h>
12 #include <linux/fs.h>
13 #include <linux/blkdev.h>
14 #include <linux/elevator.h>
15 #include <linux/bio.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/init.h>
20 #include <linux/compiler.h>
21 #include <linux/hash.h>
22 #include <linux/rbtree.h>
23 #include <linux/mempool.h>
24
25 /*
26  * tunables
27  */
28 static int cfq_quantum = 4;
29 static int cfq_queued = 8;
30
31 #define CFQ_QHASH_SHIFT         6
32 #define CFQ_QHASH_ENTRIES       (1 << CFQ_QHASH_SHIFT)
33 #define list_entry_qhash(entry) list_entry((entry), struct cfq_queue, cfq_hash)
34
35 #define CFQ_MHASH_SHIFT         8
36 #define CFQ_MHASH_BLOCK(sec)    ((sec) >> 3)
37 #define CFQ_MHASH_ENTRIES       (1 << CFQ_MHASH_SHIFT)
38 #define CFQ_MHASH_FN(sec)       (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
39 #define ON_MHASH(crq)           !list_empty(&(crq)->hash)
40 #define rq_hash_key(rq)         ((rq)->sector + (rq)->nr_sectors)
41 #define list_entry_hash(ptr)    list_entry((ptr), struct cfq_rq, hash)
42
43 #define list_entry_cfqq(ptr)    list_entry((ptr), struct cfq_queue, cfq_list)
44
45 #define RQ_DATA(rq)             ((struct cfq_rq *) (rq)->elevator_private)
46
47 static kmem_cache_t *crq_pool;
48 static kmem_cache_t *cfq_pool;
49 static mempool_t *cfq_mpool;
50
51 struct cfq_data {
52         struct list_head rr_list;
53         struct list_head *dispatch;
54         struct list_head *cfq_hash;
55
56         struct list_head *crq_hash;
57
58         unsigned int busy_queues;
59         unsigned int max_queued;
60
61         mempool_t *crq_pool;
62 };
63
64 struct cfq_queue {
65         struct list_head cfq_hash;
66         struct list_head cfq_list;
67         struct rb_root sort_list;
68         int pid;
69         int queued[2];
70 #if 0
71         /*
72          * with a simple addition like this, we can do io priorities. almost.
73          * does need a split request free list, too.
74          */
75         int io_prio
76 #endif
77 };
78
79 struct cfq_rq {
80         struct rb_node rb_node;
81         sector_t rb_key;
82
83         struct request *request;
84
85         struct cfq_queue *cfq_queue;
86
87         struct list_head hash;
88 };
89
90 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
91 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid);
92 static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq);
93
94 /*
95  * lots of deadline iosched dupes, can be abstracted later...
96  */
97 static inline void __cfq_del_crq_hash(struct cfq_rq *crq)
98 {
99         list_del_init(&crq->hash);
100 }
101
102 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
103 {
104         if (ON_MHASH(crq))
105                 __cfq_del_crq_hash(crq);
106 }
107
108 static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
109 {
110         cfq_del_crq_hash(crq);
111
112         if (q->last_merge == crq->request)
113                 q->last_merge = NULL;
114 }
115
116 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
117 {
118         struct request *rq = crq->request;
119
120         BUG_ON(ON_MHASH(crq));
121
122         list_add(&crq->hash, &cfqd->crq_hash[CFQ_MHASH_FN(rq_hash_key(rq))]);
123 }
124
125 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
126 {
127         struct list_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
128         struct list_head *entry, *next = hash_list->next;
129
130         while ((entry = next) != hash_list) {
131                 struct cfq_rq *crq = list_entry_hash(entry);
132                 struct request *__rq = crq->request;
133
134                 next = entry->next;
135
136                 BUG_ON(!ON_MHASH(crq));
137
138                 if (!rq_mergeable(__rq)) {
139                         __cfq_del_crq_hash(crq);
140                         continue;
141                 }
142
143                 if (rq_hash_key(__rq) == offset)
144                         return __rq;
145         }
146
147         return NULL;
148 }
149
150 /*
151  * rb tree support functions
152  */
153 #define RB_NONE         (2)
154 #define RB_EMPTY(node)  ((node)->rb_node == NULL)
155 #define RB_CLEAR(node)  ((node)->rb_color = RB_NONE)
156 #define RB_CLEAR_ROOT(root)     ((root)->rb_node = NULL)
157 #define ON_RB(node)     ((node)->rb_color != RB_NONE)
158 #define rb_entry_crq(node)      rb_entry((node), struct cfq_rq, rb_node)
159 #define rq_rb_key(rq)           (rq)->sector
160
161 static inline void cfq_del_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
162 {
163         if (ON_RB(&crq->rb_node)) {
164                 cfqq->queued[rq_data_dir(crq->request)]--;
165                 rb_erase(&crq->rb_node, &cfqq->sort_list);
166                 crq->cfq_queue = NULL;
167         }
168 }
169
170 static struct cfq_rq *
171 __cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
172 {
173         struct rb_node **p = &cfqq->sort_list.rb_node;
174         struct rb_node *parent = NULL;
175         struct cfq_rq *__crq;
176
177         while (*p) {
178                 parent = *p;
179                 __crq = rb_entry_crq(parent);
180
181                 if (crq->rb_key < __crq->rb_key)
182                         p = &(*p)->rb_left;
183                 else if (crq->rb_key > __crq->rb_key)
184                         p = &(*p)->rb_right;
185                 else
186                         return __crq;
187         }
188
189         rb_link_node(&crq->rb_node, parent, p);
190         return 0;
191 }
192
193 static void
194 cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
195 {
196         struct request *rq = crq->request;
197         struct cfq_rq *__alias;
198
199         crq->rb_key = rq_rb_key(rq);
200         cfqq->queued[rq_data_dir(rq)]++;
201 retry:
202         __alias = __cfq_add_crq_rb(cfqq, crq);
203         if (!__alias) {
204                 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
205                 crq->cfq_queue = cfqq;
206                 return;
207         }
208
209         cfq_del_crq_rb(cfqq, __alias);
210         cfq_dispatch_sort(cfqd->dispatch, __alias);
211         goto retry;
212 }
213
214 static struct request *
215 cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
216 {
217         struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
218         struct rb_node *n;
219
220         if (!cfqq)
221                 goto out;
222
223         n = cfqq->sort_list.rb_node;
224         while (n) {
225                 struct cfq_rq *crq = rb_entry_crq(n);
226
227                 if (sector < crq->rb_key)
228                         n = n->rb_left;
229                 else if (sector > crq->rb_key)
230                         n = n->rb_right;
231                 else
232                         return crq->request;
233         }
234
235 out:
236         return NULL;
237 }
238
239 static void cfq_remove_request(request_queue_t *q, struct request *rq)
240 {
241         struct cfq_data *cfqd = q->elevator.elevator_data;
242         struct cfq_rq *crq = RQ_DATA(rq);
243
244         if (crq) {
245                 struct cfq_queue *cfqq = crq->cfq_queue;
246
247                 cfq_remove_merge_hints(q, crq);
248                 list_del_init(&rq->queuelist);
249
250                 if (cfqq) {
251                         cfq_del_crq_rb(cfqq, crq);
252
253                         if (RB_EMPTY(&cfqq->sort_list))
254                                 cfq_put_queue(cfqd, cfqq);
255                 }
256         }
257 }
258
259 static int
260 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
261 {
262         struct cfq_data *cfqd = q->elevator.elevator_data;
263         struct request *__rq;
264         int ret;
265
266         ret = elv_try_last_merge(q, bio);
267         if (ret != ELEVATOR_NO_MERGE) {
268                 __rq = q->last_merge;
269                 goto out_insert;
270         }
271
272         __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
273         if (__rq) {
274                 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
275
276                 if (elv_rq_merge_ok(__rq, bio)) {
277                         ret = ELEVATOR_BACK_MERGE;
278                         goto out;
279                 }
280         }
281
282         __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
283         if (__rq) {
284                 if (elv_rq_merge_ok(__rq, bio)) {
285                         ret = ELEVATOR_FRONT_MERGE;
286                         goto out;
287                 }
288         }
289
290         return ELEVATOR_NO_MERGE;
291 out:
292         q->last_merge = __rq;
293 out_insert:
294         *req = __rq;
295         return ret;
296 }
297
298 static void cfq_merged_request(request_queue_t *q, struct request *req)
299 {
300         struct cfq_data *cfqd = q->elevator.elevator_data;
301         struct cfq_rq *crq = RQ_DATA(req);
302
303         cfq_del_crq_hash(crq);
304         cfq_add_crq_hash(cfqd, crq);
305
306         if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
307                 struct cfq_queue *cfqq = crq->cfq_queue;
308
309                 cfq_del_crq_rb(cfqq, crq);
310                 cfq_add_crq_rb(cfqd, cfqq, crq);
311         }
312
313         q->last_merge = req;
314 }
315
316 static void
317 cfq_merged_requests(request_queue_t *q, struct request *req,
318                     struct request *next)
319 {
320         cfq_merged_request(q, req);
321         cfq_remove_request(q, next);
322 }
323
324 static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq)
325 {
326         struct list_head *entry = head;
327         struct request *__rq;
328
329         if (!list_empty(head)) {
330                 __rq = list_entry_rq(head->next);
331
332                 if (crq->request->sector < __rq->sector) {
333                         entry = head->prev;
334                         goto link;
335                 }
336         }
337
338         while ((entry = entry->prev) != head) {
339                 __rq = list_entry_rq(entry);
340
341                 if (crq->request->sector <= __rq->sector)
342                         break;
343         }
344
345 link:
346         list_add_tail(&crq->request->queuelist, entry);
347 }
348
349 static inline void
350 __cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
351                         struct cfq_queue *cfqq)
352 {
353         struct cfq_rq *crq = rb_entry_crq(rb_first(&cfqq->sort_list));
354
355         cfq_del_crq_rb(cfqq, crq);
356         cfq_remove_merge_hints(q, crq);
357         cfq_dispatch_sort(cfqd->dispatch, crq);
358 }
359
360 static int cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd)
361 {
362         struct cfq_queue *cfqq;
363         struct list_head *entry, *tmp;
364         int ret, queued, good_queues;
365
366         if (list_empty(&cfqd->rr_list))
367                 return 0;
368
369         queued = ret = 0;
370 restart:
371         good_queues = 0;
372         list_for_each_safe(entry, tmp, &cfqd->rr_list) {
373                 cfqq = list_entry_cfqq(cfqd->rr_list.next);
374
375                 BUG_ON(RB_EMPTY(&cfqq->sort_list));
376
377                 __cfq_dispatch_requests(q, cfqd, cfqq);
378
379                 if (RB_EMPTY(&cfqq->sort_list))
380                         cfq_put_queue(cfqd, cfqq);
381                 else
382                         good_queues++;
383
384                 queued++;
385                 ret = 1;
386         }
387
388         if ((queued < cfq_quantum) && good_queues)
389                 goto restart;
390
391         return ret;
392 }
393
394 static struct request *cfq_next_request(request_queue_t *q)
395 {
396         struct cfq_data *cfqd = q->elevator.elevator_data;
397         struct request *rq;
398
399         if (!list_empty(cfqd->dispatch)) {
400                 struct cfq_rq *crq;
401 dispatch:
402                 rq = list_entry_rq(cfqd->dispatch->next);
403
404                 crq = RQ_DATA(rq);
405                 if (crq)
406                         cfq_remove_merge_hints(q, crq);
407
408                 return rq;
409         }
410
411         if (cfq_dispatch_requests(q, cfqd))
412                 goto dispatch;
413
414         return NULL;
415 }
416
417 static inline struct cfq_queue *
418 __cfq_find_cfq_hash(struct cfq_data *cfqd, int pid, const int hashval)
419 {
420         struct list_head *hash_list = &cfqd->cfq_hash[hashval];
421         struct list_head *entry;
422
423         list_for_each(entry, hash_list) {
424                 struct cfq_queue *__cfqq = list_entry_qhash(entry);
425
426                 if (__cfqq->pid == pid)
427                         return __cfqq;
428         }
429
430         return NULL;
431 }
432
433 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid)
434 {
435         const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
436
437         return __cfq_find_cfq_hash(cfqd, pid, hashval);
438 }
439
440 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
441 {
442         cfqd->busy_queues--;
443         list_del(&cfqq->cfq_list);
444         list_del(&cfqq->cfq_hash);
445         mempool_free(cfqq, cfq_mpool);
446 }
447
448 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int pid)
449 {
450         const int hashval = hash_long(current->tgid, CFQ_QHASH_SHIFT);
451         struct cfq_queue *cfqq = __cfq_find_cfq_hash(cfqd, pid, hashval);
452
453         if (!cfqq) {
454                 cfqq = mempool_alloc(cfq_mpool, GFP_NOIO);
455
456                 INIT_LIST_HEAD(&cfqq->cfq_hash);
457                 INIT_LIST_HEAD(&cfqq->cfq_list);
458                 RB_CLEAR_ROOT(&cfqq->sort_list);
459
460                 cfqq->pid = pid;
461                 cfqq->queued[0] = cfqq->queued[1] = 0;
462                 list_add(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
463         }
464
465         return cfqq;
466 }
467
468 static void cfq_enqueue(struct cfq_data *cfqd, struct cfq_rq *crq)
469 {
470         struct cfq_queue *cfqq;
471
472         cfqq = cfq_get_queue(cfqd, current->tgid);
473
474         cfq_add_crq_rb(cfqd, cfqq, crq);
475
476         if (list_empty(&cfqq->cfq_list)) {
477                 list_add(&cfqq->cfq_list, &cfqd->rr_list);
478                 cfqd->busy_queues++;
479         }
480 }
481
482 static void
483 cfq_insert_request(request_queue_t *q, struct request *rq, int where)
484 {
485         struct cfq_data *cfqd = q->elevator.elevator_data;
486         struct cfq_rq *crq = RQ_DATA(rq);
487
488         switch (where) {
489                 case ELEVATOR_INSERT_BACK:
490                         while (cfq_dispatch_requests(q, cfqd))
491                                 ;
492                         list_add_tail(&rq->queuelist, cfqd->dispatch);
493                         break;
494                 case ELEVATOR_INSERT_FRONT:
495                         list_add(&rq->queuelist, cfqd->dispatch);
496                         break;
497                 case ELEVATOR_INSERT_SORT:
498                         BUG_ON(!blk_fs_request(rq));
499                         cfq_enqueue(cfqd, crq);
500                         break;
501                 default:
502                         printk("%s: bad insert point %d\n", __FUNCTION__,where);
503                         return;
504         }
505
506         if (rq_mergeable(rq)) {
507                 cfq_add_crq_hash(cfqd, crq);
508
509                 if (!q->last_merge)
510                         q->last_merge = rq;
511         }
512 }
513
514 static int cfq_queue_empty(request_queue_t *q)
515 {
516         struct cfq_data *cfqd = q->elevator.elevator_data;
517
518         if (list_empty(cfqd->dispatch) && list_empty(&cfqd->rr_list))
519                 return 1;
520
521         return 0;
522 }
523
524 static struct request *
525 cfq_former_request(request_queue_t *q, struct request *rq)
526 {
527         struct cfq_rq *crq = RQ_DATA(rq);
528         struct rb_node *rbprev = rb_prev(&crq->rb_node);
529
530         if (rbprev)
531                 return rb_entry_crq(rbprev)->request;
532
533         return NULL;
534 }
535
536 static struct request *
537 cfq_latter_request(request_queue_t *q, struct request *rq)
538 {
539         struct cfq_rq *crq = RQ_DATA(rq);
540         struct rb_node *rbnext = rb_next(&crq->rb_node);
541
542         if (rbnext)
543                 return rb_entry_crq(rbnext)->request;
544
545         return NULL;
546 }
547
548 static int cfq_may_queue(request_queue_t *q, int rw)
549 {
550         struct cfq_data *cfqd = q->elevator.elevator_data;
551         struct cfq_queue *cfqq;
552         int ret = 1;
553
554         if (!cfqd->busy_queues)
555                 goto out;
556
557         cfqq = cfq_find_cfq_hash(cfqd, current->tgid);
558         if (cfqq) {
559                 int limit = (q->nr_requests - cfq_queued) / cfqd->busy_queues;
560
561                 if (limit < 3)
562                         limit = 3;
563                 else if (limit > cfqd->max_queued)
564                         limit = cfqd->max_queued;
565
566                 if (cfqq->queued[rw] > limit)
567                         ret = 0;
568         }
569 out:
570         return ret;
571 }
572
573 static void cfq_put_request(request_queue_t *q, struct request *rq)
574 {
575         struct cfq_data *cfqd = q->elevator.elevator_data;
576         struct cfq_rq *crq = RQ_DATA(rq);
577
578         if (crq) {
579                 BUG_ON(q->last_merge == rq);
580                 BUG_ON(ON_MHASH(crq));
581
582                 mempool_free(crq, cfqd->crq_pool);
583                 rq->elevator_private = NULL;
584         }
585 }
586
587 static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
588 {
589         struct cfq_data *cfqd = q->elevator.elevator_data;
590         struct cfq_rq *crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
591
592         if (crq) {
593                 RB_CLEAR(&crq->rb_node);
594                 crq->request = rq;
595                 crq->cfq_queue = NULL;
596                 INIT_LIST_HEAD(&crq->hash);
597                 rq->elevator_private = crq;
598                 return 0;
599         }
600
601         return 1;
602 }
603
604 static void cfq_exit(request_queue_t *q, elevator_t *e)
605 {
606         struct cfq_data *cfqd = e->elevator_data;
607
608         e->elevator_data = NULL;
609         mempool_destroy(cfqd->crq_pool);
610         kfree(cfqd->crq_hash);
611         kfree(cfqd->cfq_hash);
612         kfree(cfqd);
613 }
614
615 static int cfq_init(request_queue_t *q, elevator_t *e)
616 {
617         struct cfq_data *cfqd;
618         int i;
619
620         cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
621         if (!cfqd)
622                 return -ENOMEM;
623
624         memset(cfqd, 0, sizeof(*cfqd));
625         INIT_LIST_HEAD(&cfqd->rr_list);
626
627         cfqd->crq_hash = kmalloc(sizeof(struct list_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
628         if (!cfqd->crq_hash)
629                 goto out_crqhash;
630
631         cfqd->cfq_hash = kmalloc(sizeof(struct list_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
632         if (!cfqd->cfq_hash)
633                 goto out_cfqhash;
634
635         cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
636         if (!cfqd->crq_pool)
637                 goto out_crqpool;
638
639         for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
640                 INIT_LIST_HEAD(&cfqd->crq_hash[i]);
641         for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
642                 INIT_LIST_HEAD(&cfqd->cfq_hash[i]);
643
644         cfqd->dispatch = &q->queue_head;
645         e->elevator_data = cfqd;
646
647         /*
648          * just set it to some high value, we want anyone to be able to queue
649          * some requests. fairness is handled differently
650          */
651         cfqd->max_queued = q->nr_requests;
652         q->nr_requests = 8192;
653
654         return 0;
655 out_crqpool:
656         kfree(cfqd->cfq_hash);
657 out_cfqhash:
658         kfree(cfqd->crq_hash);
659 out_crqhash:
660         kfree(cfqd);
661         return -ENOMEM;
662 }
663
664 static int __init cfq_slab_setup(void)
665 {
666         crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
667                                         NULL, NULL);
668
669         if (!crq_pool)
670                 panic("cfq_iosched: can't init crq pool\n");
671
672         cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
673                                         NULL, NULL);
674
675         if (!cfq_pool)
676                 panic("cfq_iosched: can't init cfq pool\n");
677
678         cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool);
679
680         if (!cfq_mpool)
681                 panic("cfq_iosched: can't init cfq mpool\n");
682
683         return 0;
684 }
685
686 subsys_initcall(cfq_slab_setup);
687
688 elevator_t iosched_cfq = {
689         .elevator_name =                "cfq",
690         .elevator_merge_fn =            cfq_merge,
691         .elevator_merged_fn =           cfq_merged_request,
692         .elevator_merge_req_fn =        cfq_merged_requests,
693         .elevator_next_req_fn =         cfq_next_request,
694         .elevator_add_req_fn =          cfq_insert_request,
695         .elevator_remove_req_fn =       cfq_remove_request,
696         .elevator_queue_empty_fn =      cfq_queue_empty,
697         .elevator_former_req_fn =       cfq_former_request,
698         .elevator_latter_req_fn =       cfq_latter_request,
699         .elevator_set_req_fn =          cfq_set_request,
700         .elevator_put_req_fn =          cfq_put_request,
701         .elevator_may_queue_fn =        cfq_may_queue,
702         .elevator_init_fn =             cfq_init,
703         .elevator_exit_fn =             cfq_exit,
704 };
705
706 EXPORT_SYMBOL(iosched_cfq);