This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / drivers / block / cfq-iosched.c
1 /*
2  *  linux/drivers/block/cfq-iosched.c
3  *
4  *  CFQ, or complete fairness queueing, disk scheduler.
5  *
6  *  Based on ideas from a previously unfinished io
7  *  scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
8  *
9  *  IO priorities are supported, from 0% to 100% in 5% increments. Both of
10  *  those values have special meaning - 0% class is allowed to do io if
11  *  noone else wants to use the disk. 100% is considered real-time io, and
12  *  always get priority. Default process io rate is 95%. In absence of other
13  *  io, a class may consume 100% disk bandwidth regardless. Withing a class,
14  *  bandwidth is distributed equally among the citizens.
15  *
16  * TODO:
17  *      - cfq_select_requests() needs some work for 5-95% io
18  *      - barriers not supported
19  *      - export grace periods in ms, not jiffies
20  *
21  *  Copyright (C) 2003 Jens Axboe <axboe@suse.de>
22  */
23 #include <linux/kernel.h>
24 #include <linux/fs.h>
25 #include <linux/blkdev.h>
26 #include <linux/elevator.h>
27 #include <linux/bio.h>
28 #include <linux/config.h>
29 #include <linux/module.h>
30 #include <linux/slab.h>
31 #include <linux/init.h>
32 #include <linux/compiler.h>
33 #include <linux/hash.h>
34 #include <linux/rbtree.h>
35 #include <linux/mempool.h>
36
37 #if IOPRIO_NR > BITS_PER_LONG
38 #error Cannot support this many io priority levels
39 #endif
40
41 /*
42  * tunables
43  */
44 static int cfq_quantum = 6;
45 static int cfq_quantum_io = 256;
46 static int cfq_idle_quantum = 1;
47 static int cfq_idle_quantum_io = 64;
48 static int cfq_queued = 4;
49 static int cfq_grace_rt = HZ / 100 ?: 1;
50 static int cfq_grace_idle = HZ / 10;
51
52 #define CFQ_QHASH_SHIFT         6
53 #define CFQ_QHASH_ENTRIES       (1 << CFQ_QHASH_SHIFT)
54 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
55
56 #define CFQ_MHASH_SHIFT         8
57 #define CFQ_MHASH_BLOCK(sec)    ((sec) >> 3)
58 #define CFQ_MHASH_ENTRIES       (1 << CFQ_MHASH_SHIFT)
59 #define CFQ_MHASH_FN(sec)       (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
60 #define rq_hash_key(rq)         ((rq)->sector + (rq)->nr_sectors)
61 #define list_entry_hash(ptr)    hlist_entry((ptr), struct cfq_rq, hash)
62
63 #define list_entry_cfqq(ptr)    list_entry((ptr), struct cfq_queue, cfq_list)
64 #define list_entry_prio(ptr)    list_entry((ptr), struct cfq_rq, prio_list)
65
66 #define cfq_account_io(crq)     \
67         ((crq)->ioprio != IOPRIO_IDLE && (crq)->ioprio != IOPRIO_RT)
68
69 /*
70  * defines how we distribute bandwidth (can be tgid, uid, etc)
71  */
72
73 /* FIXME: change hash_key to be sizeof(void *) rather than sizeof(int) 
74  * otherwise the cast of cki_tsk_icls will not work reliably on 64-bit arches.
75  * OR, change cki_tsk_icls to return ints (will need another id space to be 
76  * managed)
77  */
78
79 #if defined(CONFIG_CKRM_RES_BLKIO) || defined(CONFIG_CKRM_RES_BLKIO_MODULE)
80 extern inline void *cki_hash_key(struct task_struct *tsk);
81 extern inline int cki_ioprio(struct task_struct *tsk);
82 #define cfq_hash_key(current)   ((int)cki_hash_key((current)))
83 #define cfq_ioprio(current)     (cki_ioprio((current)))
84
85 #else
86 #define cfq_hash_key(current)   ((current)->tgid)
87 /*
88  * move to io_context
89  */
90 #define cfq_ioprio(current)     ((current)->ioprio)
91 #endif
92
93 #define CFQ_WAIT_RT     0
94 #define CFQ_WAIT_NORM   1
95
96 static kmem_cache_t *crq_pool;
97 static kmem_cache_t *cfq_pool;
98 static mempool_t *cfq_mpool;
99
100 /*
101  * defines an io priority level
102  */
103 struct io_prio_data {
104         struct list_head rr_list;
105         int busy_queues;
106         int busy_rq;
107         unsigned long busy_sectors;
108         
109         /* Statistics on requests, sectors and queues 
110          * added to (in) and dispatched from (out) 
111          * this priority level. Reinsertion of previously
112          * dispatched crq's into cfq's results in double counting
113          * which is ignored for now as in-out should 
114          * still be accurate.
115          */
116         atomic_t cum_rq_in,cum_rq_out;              
117         atomic_t cum_sectors_in,cum_sectors_out;    
118         atomic_t cum_queues_in,cum_queues_out;
119       
120         struct list_head prio_list;
121         int last_rq;
122         int last_sectors;
123 };
124
125 /*
126  * per-request queue structure
127  */
128 struct cfq_data {
129         struct list_head *dispatch;
130         struct hlist_head *cfq_hash;
131         struct hlist_head *crq_hash;
132         mempool_t *crq_pool;
133
134         struct io_prio_data cid[IOPRIO_NR];
135
136         /*
137          * total number of busy queues and requests
138          */
139         int busy_rq;
140         int busy_queues;
141         unsigned long busy_sectors;
142
143         unsigned long rq_starved_mask;
144
145         /*
146          * grace period handling
147          */
148         struct timer_list timer;
149         unsigned long wait_end;
150         unsigned long flags;
151         struct work_struct work;
152
153         /*
154          * tunables
155          */
156         unsigned int cfq_quantum;
157         unsigned int cfq_quantum_io;
158         unsigned int cfq_idle_quantum;
159         unsigned int cfq_idle_quantum_io;
160         unsigned int cfq_queued;
161         unsigned int cfq_grace_rt;
162         unsigned int cfq_grace_idle;
163 };
164
165 /*
166  * per-class structure
167  */
168 struct cfq_queue {
169         struct list_head cfq_list;
170         struct hlist_node cfq_hash;
171         int hash_key;
172         struct rb_root sort_list;
173         int queued[2];
174         int ioprio;
175 };
176
177 /*
178  * per-request structure
179  */
180 struct cfq_rq {
181         struct cfq_queue *cfq_queue;
182         struct rb_node rb_node;
183         struct hlist_node hash;
184         sector_t rb_key;
185
186         struct request *request;
187
188         struct list_head prio_list;
189         unsigned long nr_sectors;
190         int ioprio;
191 };
192
193 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
194 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid);
195 static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq);
196
197 /*
198  * lots of deadline iosched dupes, can be abstracted later...
199  */
200 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
201 {
202         hlist_del_init(&crq->hash);
203 }
204
205 static inline void
206 cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
207 {
208         cfq_del_crq_hash(crq);
209
210         if (q->last_merge == crq->request)
211                 q->last_merge = NULL;
212 }
213
214 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
215 {
216         struct request *rq = crq->request;
217         const int hash_idx = CFQ_MHASH_FN(rq_hash_key(rq));
218
219         BUG_ON(!hlist_unhashed(&crq->hash));
220
221         hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
222 }
223
224 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
225 {
226         struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
227         struct hlist_node *entry, *next;
228
229         hlist_for_each_safe(entry, next, hash_list) {
230                 struct cfq_rq *crq = list_entry_hash(entry);
231                 struct request *__rq = crq->request;
232
233                 BUG_ON(hlist_unhashed(&crq->hash));
234
235                 if (!rq_mergeable(__rq)) {
236                         cfq_del_crq_hash(crq);
237                         continue;
238                 }
239
240                 if (rq_hash_key(__rq) == offset)
241                         return __rq;
242         }
243
244         return NULL;
245 }
246
247 /*
248  * rb tree support functions
249  */
250 #define RB_EMPTY(node)          ((node)->rb_node == NULL)
251 #define rb_entry_crq(node)      rb_entry((node), struct cfq_rq, rb_node)
252 #define rq_rb_key(rq)           (rq)->sector
253
254 static void
255 cfq_del_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
256 {
257         if (crq->cfq_queue) {
258                 crq->cfq_queue = NULL;
259
260                 if (cfq_account_io(crq)) {
261                         cfqd->busy_rq--;
262                         cfqd->busy_sectors -= crq->nr_sectors;
263                         cfqd->cid[crq->ioprio].busy_rq--;
264                         atomic_inc(&(cfqd->cid[crq->ioprio].cum_rq_out));
265                         cfqd->cid[crq->ioprio].busy_sectors -= crq->nr_sectors;
266                         atomic_add(crq->nr_sectors,&(cfqd->cid[crq->ioprio].cum_sectors_out));
267                 }
268
269                 cfqq->queued[rq_data_dir(crq->request)]--;
270                 rb_erase(&crq->rb_node, &cfqq->sort_list);
271         }
272 }
273
274 static struct cfq_rq *
275 __cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
276 {
277         struct rb_node **p = &cfqq->sort_list.rb_node;
278         struct rb_node *parent = NULL;
279         struct cfq_rq *__crq;
280
281         while (*p) {
282                 parent = *p;
283                 __crq = rb_entry_crq(parent);
284
285                 if (crq->rb_key < __crq->rb_key)
286                         p = &(*p)->rb_left;
287                 else if (crq->rb_key > __crq->rb_key)
288                         p = &(*p)->rb_right;
289                 else
290                         return __crq;
291         }
292
293         rb_link_node(&crq->rb_node, parent, p);
294         return 0;
295 }
296
297 static void
298 cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
299 {
300         struct request *rq = crq->request;
301         struct cfq_rq *__alias;
302
303         cfqq->queued[rq_data_dir(rq)]++;
304         if (cfq_account_io(crq)) {
305                 cfqd->busy_rq++;
306                 cfqd->busy_sectors += crq->nr_sectors;
307                 cfqd->cid[crq->ioprio].busy_rq++;
308                 atomic_inc(&(cfqd->cid[crq->ioprio].cum_rq_in));                
309                 cfqd->cid[crq->ioprio].busy_sectors += crq->nr_sectors;
310                 atomic_add(crq->nr_sectors,&(cfqd->cid[crq->ioprio].cum_sectors_in));
311         }
312 retry:
313         __alias = __cfq_add_crq_rb(cfqq, crq);
314         if (!__alias) {
315                 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
316                 crq->rb_key = rq_rb_key(rq);
317                 crq->cfq_queue = cfqq;
318                 return;
319         }
320
321         cfq_del_crq_rb(cfqd, cfqq, __alias);
322         cfq_dispatch_sort(cfqd->dispatch, __alias);
323         goto retry;
324 }
325
326 static struct request *
327 cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
328 {
329         struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
330         struct rb_node *n;
331
332         if (!cfqq)
333                 goto out;
334
335         n = cfqq->sort_list.rb_node;
336         while (n) {
337                 struct cfq_rq *crq = rb_entry_crq(n);
338
339                 if (sector < crq->rb_key)
340                         n = n->rb_left;
341                 else if (sector > crq->rb_key)
342                         n = n->rb_right;
343                 else
344                         return crq->request;
345         }
346
347 out:
348         return NULL;
349 }
350
351 static void cfq_remove_request(request_queue_t *q, struct request *rq)
352 {
353         struct cfq_data *cfqd = q->elevator.elevator_data;
354         struct cfq_rq *crq = RQ_ELV_DATA(rq);
355
356         if (crq) {
357                 cfq_remove_merge_hints(q, crq);
358                 list_del_init(&crq->prio_list);
359                 list_del_init(&rq->queuelist);
360
361                 /*
362                  * set a grace period timer to allow realtime io to make real
363                  * progress, if we release an rt request. for normal request,
364                  * set timer so idle io doesn't interfere with other io
365                  */
366                 if (crq->ioprio == IOPRIO_RT) {
367                         set_bit(CFQ_WAIT_RT, &cfqd->flags);
368                         cfqd->wait_end = jiffies + cfqd->cfq_grace_rt;
369                 } else if (crq->ioprio != IOPRIO_IDLE) {
370                         set_bit(CFQ_WAIT_NORM, &cfqd->flags);
371                         cfqd->wait_end = jiffies + cfqd->cfq_grace_idle;
372                 }
373
374                 if (crq->cfq_queue) {
375                         struct cfq_queue *cfqq = crq->cfq_queue;
376
377                         cfq_del_crq_rb(cfqd, cfqq, crq);
378
379                         if (RB_EMPTY(&cfqq->sort_list))
380                                 cfq_put_queue(cfqd, cfqq);
381                 }
382         }
383 }
384
385 static int
386 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
387 {
388         struct cfq_data *cfqd = q->elevator.elevator_data;
389         struct request *__rq;
390         int ret;
391
392         ret = elv_try_last_merge(q, bio);
393         if (ret != ELEVATOR_NO_MERGE) {
394                 __rq = q->last_merge;
395                 goto out_insert;
396         }
397
398         __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
399         if (__rq) {
400                 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
401
402                 if (elv_rq_merge_ok(__rq, bio)) {
403                         ret = ELEVATOR_BACK_MERGE;
404                         goto out;
405                 }
406         }
407
408         __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
409         if (__rq) {
410                 if (elv_rq_merge_ok(__rq, bio)) {
411                         ret = ELEVATOR_FRONT_MERGE;
412                         goto out;
413                 }
414         }
415
416         return ELEVATOR_NO_MERGE;
417 out:
418         q->last_merge = __rq;
419 out_insert:
420         *req = __rq;
421         return ret;
422 }
423
424 static void cfq_merged_request(request_queue_t *q, struct request *req)
425 {
426         struct cfq_data *cfqd = q->elevator.elevator_data;
427         struct cfq_rq *crq = RQ_ELV_DATA(req);
428
429         cfq_del_crq_hash(crq);
430         cfq_add_crq_hash(cfqd, crq);
431
432         if (crq->cfq_queue && (rq_rb_key(req) != crq->rb_key)) {
433                 struct cfq_queue *cfqq = crq->cfq_queue;
434
435                 cfq_del_crq_rb(cfqd, cfqq, crq);
436                 cfq_add_crq_rb(cfqd, cfqq, crq);
437         }
438
439         cfqd->busy_sectors += req->hard_nr_sectors - crq->nr_sectors;
440         cfqd->cid[crq->ioprio].busy_sectors += req->hard_nr_sectors - crq->nr_sectors;
441         crq->nr_sectors = req->hard_nr_sectors;
442
443         q->last_merge = req;
444 }
445
446 static void
447 cfq_merged_requests(request_queue_t *q, struct request *req,
448                     struct request *next)
449 {
450         cfq_merged_request(q, req);
451         cfq_remove_request(q, next);
452 }
453
454 /*
455  * sort into dispatch list, in optimal ascending order
456  */
457 static void cfq_dispatch_sort(struct list_head *head, struct cfq_rq *crq)
458 {
459         struct list_head *entry = head;
460         struct request *__rq;
461
462         if (!list_empty(head)) {
463                 __rq = list_entry_rq(head->next);
464
465                 if (crq->request->sector < __rq->sector) {
466                         entry = head->prev;
467                         goto link;
468                 }
469         }
470
471         while ((entry = entry->prev) != head) {
472                 __rq = list_entry_rq(entry);
473
474                 if (crq->request->sector <= __rq->sector)
475                         break;
476         }
477
478 link:
479         list_add_tail(&crq->request->queuelist, entry);
480 }
481
482 /*
483  * remove from io scheduler core and put on dispatch list for service
484  */
485 static inline int
486 __cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
487                         struct cfq_queue *cfqq)
488 {
489         struct cfq_rq *crq;
490
491         crq = rb_entry_crq(rb_first(&cfqq->sort_list));
492
493         cfq_del_crq_rb(cfqd, cfqq, crq);
494         cfq_remove_merge_hints(q, crq);
495         cfq_dispatch_sort(cfqd->dispatch, crq);
496
497         /*
498          * technically, for IOPRIO_RT we don't need to add it to the list.
499          */
500         list_add_tail(&crq->prio_list, &cfqd->cid[cfqq->ioprio].prio_list);
501         return crq->nr_sectors;
502 }
503
504 static int
505 cfq_dispatch_requests(request_queue_t *q, int prio, int max_rq, int max_sectors)
506 {
507         struct cfq_data *cfqd = q->elevator.elevator_data;
508         struct list_head *plist = &cfqd->cid[prio].rr_list;
509         struct list_head *entry, *nxt;
510         int q_rq, q_io;
511
512         /*
513          * for each queue at this prio level, dispatch a request
514          */
515         q_rq = q_io = 0;
516         list_for_each_safe(entry, nxt, plist) {
517                 struct cfq_queue *cfqq = list_entry_cfqq(entry);
518
519                 BUG_ON(RB_EMPTY(&cfqq->sort_list));
520
521                 q_io += __cfq_dispatch_requests(q, cfqd, cfqq);
522                 q_rq++;
523
524                 if (RB_EMPTY(&cfqq->sort_list))
525                         cfq_put_queue(cfqd, cfqq);
526
527                 /*
528                  * if we hit the queue limit, put the string of serviced
529                  * queues at the back of the pending list
530                  */
531                 if (q_io >= max_sectors || q_rq >= max_rq) {
532                         struct list_head *prv = nxt->prev;
533
534                         if (prv != plist) {
535                                 list_del(plist);
536                                 list_add(plist, prv);
537                         }
538                         break;
539                 }
540         }
541
542         cfqd->cid[prio].last_rq = q_rq;
543         cfqd->cid[prio].last_sectors = q_io;
544         return q_rq;
545 }
546
547 /*
548  * try to move some requests to the dispatch list. return 0 on success
549  */
550 static int cfq_select_requests(request_queue_t *q, struct cfq_data *cfqd)
551 {
552         int queued, busy_rq, busy_sectors, i;
553
554         /*
555          * if there's any realtime io, only schedule that
556          */
557         if (cfq_dispatch_requests(q, IOPRIO_RT, cfqd->cfq_quantum, cfqd->cfq_quantum_io))
558                 return 1;
559
560         /*
561          * if RT io was last serviced and grace time hasn't expired,
562          * arm the timer to restart queueing if no other RT io has been
563          * submitted in the mean time
564          */
565         if (test_bit(CFQ_WAIT_RT, &cfqd->flags)) {
566                 if (time_before(jiffies, cfqd->wait_end)) {
567                         mod_timer(&cfqd->timer, cfqd->wait_end);
568                         return 0;
569                 }
570                 clear_bit(CFQ_WAIT_RT, &cfqd->flags);
571         }
572
573         /*
574          * for each priority level, calculate number of requests we
575          * are allowed to put into service.
576          */
577         queued = 0;
578         busy_rq = cfqd->busy_rq;
579         busy_sectors = cfqd->busy_sectors;
580         for (i = IOPRIO_RT - 1; i > IOPRIO_IDLE; i--) {
581                 const int o_rq = busy_rq - cfqd->cid[i].busy_rq;
582                 const int o_sectors = busy_sectors - cfqd->cid[i].busy_sectors;
583                 int q_rq = cfqd->cfq_quantum * (i + 1) / IOPRIO_NR;
584                 int q_io = cfqd->cfq_quantum_io * (i + 1) / IOPRIO_NR;
585
586                 /*
587                  * no need to keep iterating the list, if there are no
588                  * requests pending anymore
589                  */
590                 if (!cfqd->busy_rq)
591                         break;
592
593                 /*
594                  * find out how many requests and sectors we are allowed to
595                  * service
596                  */
597                 if (o_rq)
598                         q_rq = o_sectors * (i + 1) / IOPRIO_NR;
599                 if (q_rq > cfqd->cfq_quantum)
600                         q_rq = cfqd->cfq_quantum;
601
602                 if (o_sectors)
603                         q_io = o_sectors * (i + 1) / IOPRIO_NR;
604                 if (q_io > cfqd->cfq_quantum_io)
605                         q_io = cfqd->cfq_quantum_io;
606
607                 /*
608                  * average with last dispatched for fairness
609                  */
610                 if (cfqd->cid[i].last_rq != -1)
611                         q_rq = (cfqd->cid[i].last_rq + q_rq) / 2;
612                 if (cfqd->cid[i].last_sectors != -1)
613                         q_io = (cfqd->cid[i].last_sectors + q_io) / 2;
614
615                 queued += cfq_dispatch_requests(q, i, q_rq, q_io);
616         }
617
618         if (queued)
619                 return 1;
620
621         /*
622          * only allow dispatch of idle io, if the queue has been idle from
623          * servicing RT or normal io for the grace period
624          */
625         if (test_bit(CFQ_WAIT_NORM, &cfqd->flags)) {
626                 if (time_before(jiffies, cfqd->wait_end)) {
627                         mod_timer(&cfqd->timer, cfqd->wait_end);
628                         return 0;
629                 }
630                 clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
631         }
632
633         /*
634          * if we found nothing to do, allow idle io to be serviced
635          */
636         if (cfq_dispatch_requests(q, IOPRIO_IDLE, cfqd->cfq_idle_quantum, cfqd->cfq_idle_quantum_io))
637                 return 1;
638
639         return 0;
640 }
641
642 static struct request *cfq_next_request(request_queue_t *q)
643 {
644         struct cfq_data *cfqd = q->elevator.elevator_data;
645         struct request *rq;
646
647         if (!list_empty(cfqd->dispatch)) {
648                 struct cfq_rq *crq;
649 dispatch:
650                 /*
651                  * end grace period, we are servicing a request
652                  */
653                 del_timer(&cfqd->timer);
654                 clear_bit(CFQ_WAIT_RT, &cfqd->flags);
655                 clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
656
657                 BUG_ON(list_empty(cfqd->dispatch));
658                 rq = list_entry_rq(cfqd->dispatch->next);
659
660                 BUG_ON(q->last_merge == rq);
661                 crq = RQ_ELV_DATA(rq);
662                 if (crq) {
663                         BUG_ON(!hlist_unhashed(&crq->hash));
664                         list_del_init(&crq->prio_list);
665                 }
666
667                 return rq;
668         }
669
670         /*
671          * we moved requests to dispatch list, go back end serve one
672          */
673         if (cfq_select_requests(q, cfqd))
674                 goto dispatch;
675
676         return NULL;
677 }
678
679 static inline struct cfq_queue *
680 __cfq_find_cfq_hash(struct cfq_data *cfqd, int hashkey, const int hashval)
681 {
682         struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
683         struct hlist_node *entry;
684
685         hlist_for_each(entry, hash_list) {
686                 struct cfq_queue *__cfqq = list_entry_qhash(entry);
687
688                 if (__cfqq->hash_key == hashkey)
689                         return __cfqq;
690         }
691
692         return NULL;
693 }
694
695 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int hashkey)
696 {
697         const int hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
698
699         return __cfq_find_cfq_hash(cfqd, hashkey, hashval);
700 }
701
702 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
703 {
704         cfqd->busy_queues--;
705         WARN_ON(cfqd->busy_queues < 0);
706
707         cfqd->cid[cfqq->ioprio].busy_queues--;
708         WARN_ON(cfqd->cid[cfqq->ioprio].busy_queues < 0);
709         atomic_inc(&(cfqd->cid[cfqq->ioprio].cum_queues_out));
710
711         list_del(&cfqq->cfq_list);
712         hlist_del(&cfqq->cfq_hash);
713         mempool_free(cfqq, cfq_mpool);
714 }
715
716 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int hashkey)
717 {
718         const int hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
719         struct cfq_queue *cfqq, *new_cfqq = NULL;
720
721  retry:
722         cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval);
723
724         if (!cfqq) {
725                 if (new_cfqq) {
726                        cfqq = new_cfqq;
727                        new_cfqq = NULL;
728                 } else {
729                   new_cfqq = mempool_alloc(cfq_mpool, GFP_ATOMIC);
730                   /* MEF: I think cfq-iosched.c needs further fixing
731                      to avoid the bugon. Shailabh will be sending
732                      a new patch for this soon.
733                   */
734                   BUG_ON(new_cfqq == NULL);
735                   goto retry;
736                 }
737                 
738                 memset(cfqq, 0, sizeof(*cfqq));
739                 INIT_HLIST_NODE(&cfqq->cfq_hash);
740                 INIT_LIST_HEAD(&cfqq->cfq_list);
741
742                 cfqq->hash_key = cfq_hash_key(current);
743                 cfqq->ioprio = cfq_ioprio(current);
744                 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
745         }
746
747         if (new_cfqq) {
748                 mempool_free(new_cfqq, cfq_mpool);
749         }
750
751         return cfqq;
752 }
753
754 static void
755 __cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
756 {
757         const int prio = crq->ioprio;
758         struct cfq_queue *cfqq;
759
760         cfqq = cfq_get_queue(cfqd, cfq_hash_key(current));
761
762         /*
763          * not too good...
764          */
765         if (prio > cfqq->ioprio) {
766                 printk("prio hash collision %d %d\n", prio, cfqq->ioprio);
767                 if (!list_empty(&cfqq->cfq_list)) {
768                         cfqd->cid[cfqq->ioprio].busy_queues--;
769                         WARN_ON(cfqd->cid[cfqq->ioprio].busy_queues < 0);
770                         atomic_inc(&(cfqd->cid[cfqq->ioprio].cum_queues_out));
771                         cfqd->cid[prio].busy_queues++;
772                         atomic_inc(&(cfqd->cid[prio].cum_queues_in));
773                         list_move_tail(&cfqq->cfq_list, &cfqd->cid[prio].rr_list);
774                 }
775                 cfqq->ioprio = prio;
776         }
777
778         cfq_add_crq_rb(cfqd, cfqq, crq);
779
780         if (list_empty(&cfqq->cfq_list)) {
781                 list_add_tail(&cfqq->cfq_list, &cfqd->cid[prio].rr_list);
782                 cfqd->cid[prio].busy_queues++;
783                 atomic_inc(&(cfqd->cid[prio].cum_queues_in));
784                 cfqd->busy_queues++;
785         }
786
787         if (rq_mergeable(crq->request)) {
788                 cfq_add_crq_hash(cfqd, crq);
789
790                 if (!q->last_merge)
791                         q->last_merge = crq->request;
792         }
793
794 }
795
796 static void cfq_reenqueue(request_queue_t *q, struct cfq_data *cfqd, int prio)
797 {
798         struct list_head *prio_list = &cfqd->cid[prio].prio_list;
799         struct list_head *entry, *tmp;
800
801         list_for_each_safe(entry, tmp, prio_list) {
802                 struct cfq_rq *crq = list_entry_prio(entry);
803
804                 list_del_init(entry);
805                 list_del_init(&crq->request->queuelist);
806                 __cfq_enqueue(q, cfqd, crq);
807         }
808 }
809
810 static void
811 cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
812 {
813         const int prio = cfq_ioprio(current);
814
815         crq->ioprio = prio;
816         crq->nr_sectors = crq->request->hard_nr_sectors;
817         __cfq_enqueue(q, cfqd, crq);
818
819         if (prio == IOPRIO_RT) {
820                 int i;
821
822                 /*
823                  * realtime io gets priority, move all other io back
824                  */
825                 for (i = IOPRIO_IDLE; i < IOPRIO_RT; i++)
826                         cfq_reenqueue(q, cfqd, i);
827         } else if (prio != IOPRIO_IDLE) {
828                 /*
829                  * check if we need to move idle io back into queue
830                  */
831                 cfq_reenqueue(q, cfqd, IOPRIO_IDLE);
832         }
833 }
834
835 static void
836 cfq_insert_request(request_queue_t *q, struct request *rq, int where)
837 {
838         struct cfq_data *cfqd = q->elevator.elevator_data;
839         struct cfq_rq *crq = RQ_ELV_DATA(rq);
840
841         switch (where) {
842                 case ELEVATOR_INSERT_BACK:
843 #if 0
844                         while (cfq_dispatch_requests(q, cfqd))
845                                 ;
846 #endif
847                         list_add_tail(&rq->queuelist, cfqd->dispatch);
848                         break;
849                 case ELEVATOR_INSERT_FRONT:
850                         list_add(&rq->queuelist, cfqd->dispatch);
851                         break;
852                 case ELEVATOR_INSERT_SORT:
853                         BUG_ON(!blk_fs_request(rq));
854                         cfq_enqueue(q, cfqd, crq);
855                         break;
856                 default:
857                         printk("%s: bad insert point %d\n", __FUNCTION__,where);
858                         return;
859         }
860 }
861
862 static int cfq_queue_empty(request_queue_t *q)
863 {
864         struct cfq_data *cfqd = q->elevator.elevator_data;
865
866         if (list_empty(cfqd->dispatch) && !cfqd->busy_queues)
867                 return 1;
868
869         return 0;
870 }
871
872 static struct request *
873 cfq_former_request(request_queue_t *q, struct request *rq)
874 {
875         struct cfq_rq *crq = RQ_ELV_DATA(rq);
876         struct rb_node *rbprev = rb_prev(&crq->rb_node);
877
878         if (rbprev)
879                 return rb_entry_crq(rbprev)->request;
880
881         return NULL;
882 }
883
884 static struct request *
885 cfq_latter_request(request_queue_t *q, struct request *rq)
886 {
887         struct cfq_rq *crq = RQ_ELV_DATA(rq);
888         struct rb_node *rbnext = rb_next(&crq->rb_node);
889
890         if (rbnext)
891                 return rb_entry_crq(rbnext)->request;
892
893         return NULL;
894 }
895
896 static void cfq_queue_congested(request_queue_t *q)
897 {
898         struct cfq_data *cfqd = q->elevator.elevator_data;
899
900         set_bit(cfq_ioprio(current), &cfqd->rq_starved_mask);
901 }
902
903 static int cfq_may_queue(request_queue_t *q, int rw)
904 {
905         struct cfq_data *cfqd = q->elevator.elevator_data;
906         struct cfq_queue *cfqq;
907         const int prio = cfq_ioprio(current);
908         int limit, ret = 1;
909
910         if (!cfqd->busy_queues)
911                 goto out;
912
913         cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
914         if (!cfqq)
915                 goto out;
916
917         cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
918         if (!cfqq)
919                 goto out;
920
921         /*
922          * if higher or equal prio io is sleeping waiting for a request, don't
923          * allow this one to allocate one. as long as ll_rw_blk does fifo
924          * waitqueue wakeups this should work...
925          */
926         if (cfqd->rq_starved_mask & ~((1 << prio) - 1))
927                 goto out;
928
929         if (cfqq->queued[rw] < cfqd->cfq_queued || !cfqd->cid[prio].busy_queues)
930                 goto out;
931
932         limit = q->nr_requests * (prio + 1) / IOPRIO_NR;
933         limit /= cfqd->cid[prio].busy_queues;
934         if (cfqq->queued[rw] > limit)
935                 ret = 0;
936
937 out:
938         return ret;
939 }
940
941 static void cfq_put_request(request_queue_t *q, struct request *rq)
942 {
943         struct cfq_data *cfqd = q->elevator.elevator_data;
944         struct cfq_rq *crq = RQ_ELV_DATA(rq);
945
946         if (crq) {
947                 BUG_ON(q->last_merge == rq);
948                 BUG_ON(!hlist_unhashed(&crq->hash));
949
950                 mempool_free(crq, cfqd->crq_pool);
951                 rq->elevator_private = NULL;
952         }
953 }
954
955 static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
956 {
957         struct cfq_data *cfqd = q->elevator.elevator_data;
958         struct cfq_rq *crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
959
960         if (crq) {
961                 /*
962                  * process now has one request
963                  */
964                 clear_bit(cfq_ioprio(current), &cfqd->rq_starved_mask);
965
966                 memset(crq, 0, sizeof(*crq));
967                 crq->request = rq;
968                 INIT_HLIST_NODE(&crq->hash);
969                 INIT_LIST_HEAD(&crq->prio_list);
970                 rq->elevator_private = crq;
971                 return 0;
972         }
973
974         return 1;
975 }
976
977 static void cfq_exit(request_queue_t *q, elevator_t *e)
978 {
979         struct cfq_data *cfqd = e->elevator_data;
980
981         e->elevator_data = NULL;
982         mempool_destroy(cfqd->crq_pool);
983         kfree(cfqd->crq_hash);
984         kfree(cfqd->cfq_hash);
985         kfree(cfqd);
986 }
987
988 static void cfq_timer(unsigned long data)
989 {
990         struct cfq_data *cfqd = (struct cfq_data *) data;
991
992         clear_bit(CFQ_WAIT_RT, &cfqd->flags);
993         clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
994         kblockd_schedule_work(&cfqd->work);
995 }
996
997 static void cfq_work(void *data)
998 {
999         request_queue_t *q = data;
1000         unsigned long flags;
1001
1002         spin_lock_irqsave(q->queue_lock, flags);
1003         if (cfq_next_request(q))
1004                 q->request_fn(q);
1005         spin_unlock_irqrestore(q->queue_lock, flags);
1006 }
1007
1008 static int cfq_init(request_queue_t *q, elevator_t *e)
1009 {
1010         struct cfq_data *cfqd;
1011         int i;
1012
1013         cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
1014         if (!cfqd)
1015                 return -ENOMEM;
1016
1017         memset(cfqd, 0, sizeof(*cfqd));
1018
1019         init_timer(&cfqd->timer);
1020         cfqd->timer.function = cfq_timer;
1021         cfqd->timer.data = (unsigned long) cfqd;
1022
1023         INIT_WORK(&cfqd->work, cfq_work, q);
1024
1025         for (i = 0; i < IOPRIO_NR; i++) {
1026                 struct io_prio_data *cid = &cfqd->cid[i];
1027
1028                 INIT_LIST_HEAD(&cid->rr_list);
1029                 INIT_LIST_HEAD(&cid->prio_list);
1030                 cid->last_rq = -1;
1031                 cid->last_sectors = -1;
1032
1033                 atomic_set(&cid->cum_rq_in,0);          
1034                 atomic_set(&cid->cum_rq_out,0);
1035                 atomic_set(&cid->cum_sectors_in,0);
1036                 atomic_set(&cid->cum_sectors_out,0);            
1037                 atomic_set(&cid->cum_queues_in,0);
1038                 atomic_set(&cid->cum_queues_out,0);
1039         }
1040
1041         cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES, GFP_KERNEL);
1042         if (!cfqd->crq_hash)
1043                 goto out_crqhash;
1044
1045         cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES, GFP_KERNEL);
1046         if (!cfqd->cfq_hash)
1047                 goto out_cfqhash;
1048
1049         cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, crq_pool);
1050         if (!cfqd->crq_pool)
1051                 goto out_crqpool;
1052
1053         for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
1054                 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
1055         for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
1056                 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
1057
1058         cfqd->cfq_queued = cfq_queued;
1059         cfqd->cfq_quantum = cfq_quantum;
1060         cfqd->cfq_quantum_io = cfq_quantum_io;
1061         cfqd->cfq_idle_quantum = cfq_idle_quantum;
1062         cfqd->cfq_idle_quantum_io = cfq_idle_quantum_io;
1063         cfqd->cfq_grace_rt = cfq_grace_rt;
1064         cfqd->cfq_grace_idle = cfq_grace_idle;
1065
1066         q->nr_requests <<= 2;
1067
1068         cfqd->dispatch = &q->queue_head;
1069         e->elevator_data = cfqd;
1070
1071         return 0;
1072 out_crqpool:
1073         kfree(cfqd->cfq_hash);
1074 out_cfqhash:
1075         kfree(cfqd->crq_hash);
1076 out_crqhash:
1077         kfree(cfqd);
1078         return -ENOMEM;
1079 }
1080
1081 static int __init cfq_slab_setup(void)
1082 {
1083         crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
1084                                         NULL, NULL);
1085
1086         if (!crq_pool)
1087                 panic("cfq_iosched: can't init crq pool\n");
1088
1089         cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
1090                                         NULL, NULL);
1091
1092         if (!cfq_pool)
1093                 panic("cfq_iosched: can't init cfq pool\n");
1094
1095         cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool);
1096
1097         if (!cfq_mpool)
1098                 panic("cfq_iosched: can't init cfq mpool\n");
1099
1100         return 0;
1101 }
1102
1103 subsys_initcall(cfq_slab_setup);
1104
1105 /*
1106  * sysfs parts below -->
1107  */
1108 struct cfq_fs_entry {
1109         struct attribute attr;
1110         ssize_t (*show)(struct cfq_data *, char *);
1111         ssize_t (*store)(struct cfq_data *, const char *, size_t);
1112 };
1113
1114 static ssize_t
1115 cfq_var_show(unsigned int var, char *page)
1116 {
1117         return sprintf(page, "%d\n", var);
1118 }
1119
1120 static ssize_t
1121 cfq_var_store(unsigned int *var, const char *page, size_t count)
1122 {
1123         char *p = (char *) page;
1124
1125         *var = simple_strtoul(p, &p, 10);
1126         return count;
1127 }
1128
1129 #define SHOW_FUNCTION(__FUNC, __VAR)                                    \
1130 static ssize_t __FUNC(struct cfq_data *cfqd, char *page)                \
1131 {                                                                       \
1132         return cfq_var_show(__VAR, (page));                             \
1133 }
1134 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum);
1135 SHOW_FUNCTION(cfq_quantum_io_show, cfqd->cfq_quantum_io);
1136 SHOW_FUNCTION(cfq_idle_quantum_show, cfqd->cfq_idle_quantum);
1137 SHOW_FUNCTION(cfq_idle_quantum_io_show, cfqd->cfq_idle_quantum_io);
1138 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
1139 SHOW_FUNCTION(cfq_grace_rt_show, cfqd->cfq_grace_rt);
1140 SHOW_FUNCTION(cfq_grace_idle_show, cfqd->cfq_grace_idle);
1141 #undef SHOW_FUNCTION
1142
1143 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)                         \
1144 static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count)    \
1145 {                                                                       \
1146         int ret = cfq_var_store(__PTR, (page), count);                  \
1147         if (*(__PTR) < (MIN))                                           \
1148                 *(__PTR) = (MIN);                                       \
1149         else if (*(__PTR) > (MAX))                                      \
1150                 *(__PTR) = (MAX);                                       \
1151         return ret;                                                     \
1152 }
1153 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, INT_MAX);
1154 STORE_FUNCTION(cfq_quantum_io_store, &cfqd->cfq_quantum_io, 4, INT_MAX);
1155 STORE_FUNCTION(cfq_idle_quantum_store, &cfqd->cfq_idle_quantum, 1, INT_MAX);
1156 STORE_FUNCTION(cfq_idle_quantum_io_store, &cfqd->cfq_idle_quantum_io, 4, INT_MAX);
1157 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, INT_MAX);
1158 STORE_FUNCTION(cfq_grace_rt_store, &cfqd->cfq_grace_rt, 0, INT_MAX);
1159 STORE_FUNCTION(cfq_grace_idle_store, &cfqd->cfq_grace_idle, 0, INT_MAX);
1160 #undef STORE_FUNCTION
1161
1162
1163 /* Additional entries to get priority level data */
1164 static ssize_t
1165 cfq_prio_show(struct cfq_data *cfqd, char *page, unsigned int priolvl)
1166 {
1167         int r1,r2,s1,s2,q1,q2;
1168
1169         if (!(priolvl >= IOPRIO_IDLE && priolvl <= IOPRIO_RT)) 
1170                 return 0;
1171         
1172         r1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_in));
1173         r2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_out));
1174         s1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_in));
1175         s2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_out));
1176         q1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_in)); 
1177         q2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_out));
1178         
1179
1180         /*
1181           return sprintf(page,"rq %d (%d,%d) sec %d (%d,%d) q %d (%d,%d)\n",
1182                       r1-r2,r1,r2,
1183                       s1-s2,s1,s2,
1184                       q1-q2,q1,q2);
1185         */
1186
1187         return sprintf(page,"rq (%d,%d) sec (%d,%d) q (%d,%d)\n",
1188                       r1,r2,
1189                       s1,s2,
1190                       q1,q2);
1191
1192 }
1193
1194 #define SHOW_PRIO_DATA(__PRIOLVL)                                               \
1195 static ssize_t cfq_prio_##__PRIOLVL##_show(struct cfq_data *cfqd, char *page)   \
1196 {                                                                               \
1197         return cfq_prio_show(cfqd,page,__PRIOLVL);                              \
1198 }
1199 SHOW_PRIO_DATA(0);
1200 SHOW_PRIO_DATA(1);
1201 SHOW_PRIO_DATA(2);
1202 SHOW_PRIO_DATA(3);
1203 SHOW_PRIO_DATA(4);
1204 SHOW_PRIO_DATA(5);
1205 SHOW_PRIO_DATA(6);
1206 SHOW_PRIO_DATA(7);
1207 SHOW_PRIO_DATA(8);
1208 SHOW_PRIO_DATA(9);
1209 SHOW_PRIO_DATA(10);
1210 SHOW_PRIO_DATA(11);
1211 SHOW_PRIO_DATA(12);
1212 SHOW_PRIO_DATA(13);
1213 SHOW_PRIO_DATA(14);
1214 SHOW_PRIO_DATA(15);
1215 SHOW_PRIO_DATA(16);
1216 SHOW_PRIO_DATA(17);
1217 SHOW_PRIO_DATA(18);
1218 SHOW_PRIO_DATA(19);
1219 SHOW_PRIO_DATA(20);
1220 #undef SHOW_PRIO_DATA
1221
1222
1223 static ssize_t cfq_prio_store(struct cfq_data *cfqd, const char *page, size_t count, int priolvl)
1224 {       
1225         atomic_set(&(cfqd->cid[priolvl].cum_rq_in),0);
1226         atomic_set(&(cfqd->cid[priolvl].cum_rq_out),0);
1227         atomic_set(&(cfqd->cid[priolvl].cum_sectors_in),0);
1228         atomic_set(&(cfqd->cid[priolvl].cum_sectors_out),0);
1229         atomic_set(&(cfqd->cid[priolvl].cum_queues_in),0);
1230         atomic_set(&(cfqd->cid[priolvl].cum_queues_out),0);
1231
1232         return count;
1233 }
1234
1235
1236 #define STORE_PRIO_DATA(__PRIOLVL)                                                                 \
1237 static ssize_t cfq_prio_##__PRIOLVL##_store(struct cfq_data *cfqd, const char *page, size_t count) \
1238 {                                                                                                  \
1239         return cfq_prio_store(cfqd,page,count,__PRIOLVL);                                          \
1240 }                  
1241 STORE_PRIO_DATA(0);     
1242 STORE_PRIO_DATA(1);
1243 STORE_PRIO_DATA(2);
1244 STORE_PRIO_DATA(3);
1245 STORE_PRIO_DATA(4);
1246 STORE_PRIO_DATA(5);
1247 STORE_PRIO_DATA(6);
1248 STORE_PRIO_DATA(7);
1249 STORE_PRIO_DATA(8);
1250 STORE_PRIO_DATA(9);
1251 STORE_PRIO_DATA(10);
1252 STORE_PRIO_DATA(11);
1253 STORE_PRIO_DATA(12);
1254 STORE_PRIO_DATA(13);
1255 STORE_PRIO_DATA(14);
1256 STORE_PRIO_DATA(15);
1257 STORE_PRIO_DATA(16);
1258 STORE_PRIO_DATA(17);
1259 STORE_PRIO_DATA(18);
1260 STORE_PRIO_DATA(19);
1261 STORE_PRIO_DATA(20);
1262 #undef STORE_PRIO_DATA
1263
1264
1265
1266 static struct cfq_fs_entry cfq_quantum_entry = {
1267         .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR },
1268         .show = cfq_quantum_show,
1269         .store = cfq_quantum_store,
1270 };
1271 static struct cfq_fs_entry cfq_quantum_io_entry = {
1272         .attr = {.name = "quantum_io", .mode = S_IRUGO | S_IWUSR },
1273         .show = cfq_quantum_io_show,
1274         .store = cfq_quantum_io_store,
1275 };
1276 static struct cfq_fs_entry cfq_idle_quantum_entry = {
1277         .attr = {.name = "idle_quantum", .mode = S_IRUGO | S_IWUSR },
1278         .show = cfq_idle_quantum_show,
1279         .store = cfq_idle_quantum_store,
1280 };
1281 static struct cfq_fs_entry cfq_idle_quantum_io_entry = {
1282         .attr = {.name = "idle_quantum_io", .mode = S_IRUGO | S_IWUSR },
1283         .show = cfq_idle_quantum_io_show,
1284         .store = cfq_idle_quantum_io_store,
1285 };
1286 static struct cfq_fs_entry cfq_queued_entry = {
1287         .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
1288         .show = cfq_queued_show,
1289         .store = cfq_queued_store,
1290 };
1291 static struct cfq_fs_entry cfq_grace_rt_entry = {
1292         .attr = {.name = "grace_rt", .mode = S_IRUGO | S_IWUSR },
1293         .show = cfq_grace_rt_show,
1294         .store = cfq_grace_rt_store,
1295 };
1296 static struct cfq_fs_entry cfq_grace_idle_entry = {
1297         .attr = {.name = "grace_idle", .mode = S_IRUGO | S_IWUSR },
1298         .show = cfq_grace_idle_show,
1299         .store = cfq_grace_idle_store,
1300 };
1301
1302 #define P_0_STR   "p0"
1303 #define P_1_STR   "p1"
1304 #define P_2_STR   "p2"
1305 #define P_3_STR   "p3"
1306 #define P_4_STR   "p4"
1307 #define P_5_STR   "p5"
1308 #define P_6_STR   "p6"
1309 #define P_7_STR   "p7"
1310 #define P_8_STR   "p8"
1311 #define P_9_STR   "p9"
1312 #define P_10_STR  "p10"
1313 #define P_11_STR  "p11"
1314 #define P_12_STR  "p12"
1315 #define P_13_STR  "p13"
1316 #define P_14_STR  "p14"
1317 #define P_15_STR  "p15"
1318 #define P_16_STR  "p16"
1319 #define P_17_STR  "p17"
1320 #define P_18_STR  "p18"
1321 #define P_19_STR  "p19"
1322 #define P_20_STR  "p20"
1323
1324
1325 #define CFQ_PRIO_SYSFS_ENTRY(__PRIOLVL)                                    \
1326 static struct cfq_fs_entry cfq_prio_##__PRIOLVL##_entry = {                \
1327         .attr = {.name = P_##__PRIOLVL##_STR, .mode = S_IRUGO | S_IWUSR }, \
1328         .show = cfq_prio_##__PRIOLVL##_show,                               \
1329         .store = cfq_prio_##__PRIOLVL##_store,                             \
1330 };
1331 CFQ_PRIO_SYSFS_ENTRY(0);
1332 CFQ_PRIO_SYSFS_ENTRY(1);
1333 CFQ_PRIO_SYSFS_ENTRY(2);
1334 CFQ_PRIO_SYSFS_ENTRY(3);
1335 CFQ_PRIO_SYSFS_ENTRY(4);
1336 CFQ_PRIO_SYSFS_ENTRY(5);
1337 CFQ_PRIO_SYSFS_ENTRY(6);
1338 CFQ_PRIO_SYSFS_ENTRY(7);
1339 CFQ_PRIO_SYSFS_ENTRY(8);
1340 CFQ_PRIO_SYSFS_ENTRY(9);
1341 CFQ_PRIO_SYSFS_ENTRY(10);
1342 CFQ_PRIO_SYSFS_ENTRY(11);
1343 CFQ_PRIO_SYSFS_ENTRY(12);
1344 CFQ_PRIO_SYSFS_ENTRY(13);
1345 CFQ_PRIO_SYSFS_ENTRY(14);
1346 CFQ_PRIO_SYSFS_ENTRY(15);
1347 CFQ_PRIO_SYSFS_ENTRY(16);
1348 CFQ_PRIO_SYSFS_ENTRY(17);
1349 CFQ_PRIO_SYSFS_ENTRY(18);
1350 CFQ_PRIO_SYSFS_ENTRY(19);
1351 CFQ_PRIO_SYSFS_ENTRY(20);
1352 #undef CFQ_PRIO_SYSFS_ENTRY
1353
1354
1355 static struct attribute *default_attrs[] = {
1356         &cfq_quantum_entry.attr,
1357         &cfq_quantum_io_entry.attr,
1358         &cfq_idle_quantum_entry.attr,
1359         &cfq_idle_quantum_io_entry.attr,
1360         &cfq_queued_entry.attr,
1361         &cfq_grace_rt_entry.attr,
1362         &cfq_grace_idle_entry.attr,
1363         &cfq_prio_0_entry.attr,
1364         &cfq_prio_1_entry.attr,
1365         &cfq_prio_2_entry.attr,
1366         &cfq_prio_3_entry.attr,
1367         &cfq_prio_4_entry.attr,
1368         &cfq_prio_5_entry.attr,
1369         &cfq_prio_6_entry.attr,
1370         &cfq_prio_7_entry.attr,
1371         &cfq_prio_8_entry.attr,
1372         &cfq_prio_9_entry.attr,
1373         &cfq_prio_10_entry.attr,
1374         &cfq_prio_11_entry.attr,
1375         &cfq_prio_12_entry.attr,
1376         &cfq_prio_13_entry.attr,
1377         &cfq_prio_14_entry.attr,
1378         &cfq_prio_15_entry.attr,
1379         &cfq_prio_16_entry.attr,
1380         &cfq_prio_17_entry.attr,
1381         &cfq_prio_18_entry.attr,
1382         &cfq_prio_19_entry.attr,
1383         &cfq_prio_20_entry.attr,
1384         NULL,
1385 };
1386
1387 #define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)
1388
1389 static ssize_t
1390 cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1391 {
1392         elevator_t *e = container_of(kobj, elevator_t, kobj);
1393         struct cfq_fs_entry *entry = to_cfq(attr);
1394
1395         if (!entry->show)
1396                 return 0;
1397
1398         return entry->show(e->elevator_data, page);
1399 }
1400
1401 static ssize_t
1402 cfq_attr_store(struct kobject *kobj, struct attribute *attr,
1403                const char *page, size_t length)
1404 {
1405         elevator_t *e = container_of(kobj, elevator_t, kobj);
1406         struct cfq_fs_entry *entry = to_cfq(attr);
1407
1408         if (!entry->store)
1409                 return -EINVAL;
1410
1411         return entry->store(e->elevator_data, page, length);
1412 }
1413
1414 static struct sysfs_ops cfq_sysfs_ops = {
1415         .show   = cfq_attr_show,
1416         .store  = cfq_attr_store,
1417 };
1418
1419 struct kobj_type cfq_ktype = {
1420         .sysfs_ops      = &cfq_sysfs_ops,
1421         .default_attrs  = default_attrs,
1422 };
1423
1424 elevator_t iosched_cfq = {
1425         .elevator_name =                "cfq",
1426         .elevator_ktype =               &cfq_ktype,
1427         .elevator_merge_fn =            cfq_merge,
1428         .elevator_merged_fn =           cfq_merged_request,
1429         .elevator_merge_req_fn =        cfq_merged_requests,
1430         .elevator_next_req_fn =         cfq_next_request,
1431         .elevator_add_req_fn =          cfq_insert_request,
1432         .elevator_remove_req_fn =       cfq_remove_request,
1433         .elevator_queue_empty_fn =      cfq_queue_empty,
1434         .elevator_former_req_fn =       cfq_former_request,
1435         .elevator_latter_req_fn =       cfq_latter_request,
1436         .elevator_set_req_fn =          cfq_set_request,
1437         .elevator_put_req_fn =          cfq_put_request,
1438         .elevator_may_queue_fn =        cfq_may_queue,
1439         .elevator_set_congested_fn =    cfq_queue_congested,
1440         .elevator_init_fn =             cfq_init,
1441         .elevator_exit_fn =             cfq_exit,
1442 };
1443
1444 EXPORT_SYMBOL(iosched_cfq);