2 * linux/drivers/block/cfq-iosched.c
4 * CFQ, or complete fairness queueing, disk scheduler.
6 * Based on ideas from a previously unfinished io
7 * scheduler (round robin per-process disk scheduling) and Andrea Arcangeli.
9 * IO priorities are supported, from 0% to 100% in 5% increments. Both of
10 * those values have special meaning - 0% class is allowed to do io if
11 * noone else wants to use the disk. 100% is considered real-time io, and
12 * always get priority. Default process io rate is 95%. In absence of other
13 * io, a class may consume 100% disk bandwidth regardless. Withing a class,
14 * bandwidth is distributed equally among the citizens.
17 * - cfq_select_requests() needs some work for 5-95% io
18 * - barriers not supported
19 * - export grace periods in ms, not jiffies
21 * Copyright (C) 2003 Jens Axboe <axboe@suse.de>
23 #include <linux/kernel.h>
25 #include <linux/blkdev.h>
26 #include <linux/elevator.h>
27 #include <linux/bio.h>
28 #include <linux/config.h>
29 #include <linux/module.h>
30 #include <linux/slab.h>
31 #include <linux/init.h>
32 #include <linux/compiler.h>
33 #include <linux/hash.h>
34 #include <linux/rbtree.h>
35 #include <linux/mempool.h>
36 #include <asm/div64.h>
38 #if IOPRIO_NR > BITS_PER_LONG
39 #error Cannot support this many io priority levels
45 static int cfq_quantum = 6;
46 static int cfq_quantum_io = 256;
47 static int cfq_idle_quantum = 1;
48 static int cfq_idle_quantum_io = 64;
49 static int cfq_queued = 4;
50 static int cfq_grace_rt = HZ / 100 ?: 1;
51 static int cfq_grace_idle = HZ / 10;
53 #define CFQ_EPOCH 1000000000
54 #define CFQ_SECTORATE 1000
55 #define CFQ_HMAX_PCT 80
57 #define CFQ_QHASH_SHIFT 6
58 #define CFQ_QHASH_ENTRIES (1 << CFQ_QHASH_SHIFT)
59 #define list_entry_qhash(entry) hlist_entry((entry), struct cfq_queue, cfq_hash)
61 #define CFQ_MHASH_SHIFT 8
62 #define CFQ_MHASH_BLOCK(sec) ((sec) >> 3)
63 #define CFQ_MHASH_ENTRIES (1 << CFQ_MHASH_SHIFT)
64 #define CFQ_MHASH_FN(sec) (hash_long(CFQ_MHASH_BLOCK((sec)),CFQ_MHASH_SHIFT))
65 #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors)
66 #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash)
68 #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list)
69 #define list_entry_prio(ptr) list_entry((ptr), struct cfq_rq, prio_list)
71 #define cfq_account_io(crq) \
72 ((crq)->ioprio != IOPRIO_IDLE && (crq)->ioprio != IOPRIO_RT)
75 * defines how we distribute bandwidth (can be tgid, uid, etc)
78 /* FIXME: change hash_key to be sizeof(void *) rather than sizeof(int)
79 * otherwise the cast of cki_tsk_icls will not work reliably on 64-bit arches.
80 * OR, change cki_tsk_icls to return ints (will need another id space to be
84 #if defined(CONFIG_CKRM_RES_BLKIO) || defined(CONFIG_CKRM_RES_BLKIO_MODULE)
85 extern void *cki_hash_key(struct task_struct *tsk);
86 extern int cki_ioprio(struct task_struct *tsk);
87 extern void *cki_cfqpriv(struct task_struct *tsk);
89 #define cfq_hash_key(tsk) ((int)cki_hash_key((tsk)))
90 #define cfq_ioprio(tsk) (cki_ioprio((tsk)))
91 #define cfq_cfqpriv(cfqd,tsk) (cki_cfqpriv((tsk)))
94 #define cfq_hash_key(tsk) ((tsk)->tgid)
95 #define cfq_cfqpriv(cfqd,tsk) (&(((cfqd)->cid[(tsk)->ioprio]).cfqpriv))
100 #define cfq_ioprio(tsk) ((tsk)->ioprio)
103 #define CFQ_WAIT_RT 0
104 #define CFQ_WAIT_NORM 1
106 static kmem_cache_t *crq_pool;
107 static kmem_cache_t *cfq_pool;
108 static mempool_t *cfq_mpool;
111 * defines an io priority level
113 struct io_prio_data {
114 struct list_head rr_list;
117 unsigned long busy_sectors;
119 /* requests, sectors and queues
120 * added(in),dispatched/deleted(out)
121 * at this priority level.
123 atomic_t cum_rq_in,cum_rq_out;
124 atomic_t cum_sectors_in,cum_sectors_out;
125 atomic_t cum_queues_in,cum_queues_out;
127 cfqlim_t cfqpriv; /* data for enforcing limits */
129 struct list_head prio_list;
136 * per-request queue structure
139 struct list_head rr_list;
140 struct list_head *dispatch;
141 struct hlist_head *cfq_hash;
142 struct hlist_head *crq_hash;
145 struct io_prio_data cid[IOPRIO_NR];
148 * total number of busy queues and requests
152 unsigned long busy_sectors;
155 request_queue_t *queue;
156 unsigned long rq_starved_mask;
159 * grace period handling
161 struct timer_list timer;
162 unsigned long wait_end;
164 struct work_struct work;
169 unsigned int cfq_quantum;
170 unsigned int cfq_quantum_io;
171 unsigned int cfq_idle_quantum;
172 unsigned int cfq_idle_quantum_io;
173 unsigned int cfq_queued;
174 unsigned int cfq_grace_rt;
175 unsigned int cfq_grace_idle;
177 unsigned int cfq_epoch;
178 unsigned int cfq_hmax_pct;
179 unsigned int cfq_qsectorate;
183 * per-class structure
186 struct list_head cfq_list;
187 struct hlist_node cfq_hash;
189 struct rb_root sort_list;
193 /* limit related settings/stats obtained
194 either from io_prio_data or ckrm I/O class
196 struct cfqlim *cfqpriv;
198 u64 epstart; /* current epoch's starting timestamp (ns) */
199 u64 epsector[2]; /* Total sectors dispatched in [0] previous
200 * and [1] current epoch
203 unsigned long avsec; /* avg sectors dispatched/epoch */
204 // unsigned long long lastime; /* timestamp of last request served */
205 // unsigned long sectorate; /* limit for sectors served/epoch */
206 int skipped; /* queue skipped at last dispatch ? */
208 /* Per queue timer to suspend/resume queue from processing */
209 struct timer_list timer;
210 unsigned long wait_end;
212 struct work_struct work;
214 struct cfq_data *cfqd;
220 * Per-request structure
223 struct cfq_queue *cfq_queue;
224 struct rb_node rb_node;
225 struct hlist_node hash;
228 struct request *request;
229 struct list_head prio_list;
230 unsigned long nr_sectors;
234 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq);
235 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int pid);
236 static void cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
240 * lots of deadline iosched dupes, can be abstracted later...
242 static inline void cfq_del_crq_hash(struct cfq_rq *crq)
244 hlist_del_init(&crq->hash);
248 cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
250 cfq_del_crq_hash(crq);
252 if (q->last_merge == crq->request)
253 q->last_merge = NULL;
256 static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
258 struct request *rq = crq->request;
259 const int hash_idx = CFQ_MHASH_FN(rq_hash_key(rq));
261 BUG_ON(!hlist_unhashed(&crq->hash));
263 hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]);
266 static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
268 struct hlist_head *hash_list = &cfqd->crq_hash[CFQ_MHASH_FN(offset)];
269 struct hlist_node *entry, *next;
271 hlist_for_each_safe(entry, next, hash_list) {
272 struct cfq_rq *crq = list_entry_hash(entry);
273 struct request *__rq = crq->request;
275 BUG_ON(hlist_unhashed(&crq->hash));
277 if (!rq_mergeable(__rq)) {
278 cfq_del_crq_hash(crq);
282 if (rq_hash_key(__rq) == offset)
290 * rb tree support functions
292 #define RB_EMPTY(node) ((node)->rb_node == NULL)
293 #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
294 #define rq_rb_key(rq) (rq)->sector
297 cfq_del_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
299 if (crq->cfq_queue) {
300 crq->cfq_queue = NULL;
302 if (cfq_account_io(crq)) {
304 cfqd->busy_sectors -= crq->nr_sectors;
305 cfqd->cid[crq->ioprio].busy_rq--;
306 cfqd->cid[crq->ioprio].busy_sectors -= crq->nr_sectors;
308 atomic_inc(&(cfqd->cid[crq->ioprio].cum_rq_out));
309 atomic_add(crq->nr_sectors,
310 &(cfqd->cid[crq->ioprio].cum_sectors_out));
311 cfqq->queued[rq_data_dir(crq->request)]--;
312 rb_erase(&crq->rb_node, &cfqq->sort_list);
316 static struct cfq_rq *
317 __cfq_add_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
319 struct rb_node **p = &cfqq->sort_list.rb_node;
320 struct rb_node *parent = NULL;
321 struct cfq_rq *__crq;
325 __crq = rb_entry_crq(parent);
327 if (crq->rb_key < __crq->rb_key)
329 else if (crq->rb_key > __crq->rb_key)
335 rb_link_node(&crq->rb_node, parent, p);
340 cfq_add_crq_rb(struct cfq_data *cfqd, struct cfq_queue *cfqq,struct cfq_rq *crq)
342 struct request *rq = crq->request;
343 struct cfq_rq *__alias;
346 cfqq->queued[rq_data_dir(rq)]++;
347 if (cfq_account_io(crq)) {
349 cfqd->busy_sectors += crq->nr_sectors;
350 cfqd->cid[crq->ioprio].busy_rq++;
351 cfqd->cid[crq->ioprio].busy_sectors += crq->nr_sectors;
353 atomic_inc(&(cfqd->cid[crq->ioprio].cum_rq_in));
354 atomic_add(crq->nr_sectors,
355 &(cfqd->cid[crq->ioprio].cum_sectors_in));
357 __alias = __cfq_add_crq_rb(cfqq, crq);
359 rb_insert_color(&crq->rb_node, &cfqq->sort_list);
360 crq->rb_key = rq_rb_key(rq);
361 crq->cfq_queue = cfqq;
365 cfq_dispatch_sort(cfqd, cfqq, __alias);
369 static struct request *
370 cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector)
372 struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
378 n = cfqq->sort_list.rb_node;
380 struct cfq_rq *crq = rb_entry_crq(n);
382 if (sector < crq->rb_key)
384 else if (sector > crq->rb_key)
394 static void cfq_remove_request(request_queue_t *q, struct request *rq)
396 struct cfq_data *cfqd = q->elevator.elevator_data;
397 struct cfq_rq *crq = RQ_ELV_DATA(rq);
401 cfq_remove_merge_hints(q, crq);
402 list_del_init(&crq->prio_list);
403 list_del_init(&rq->queuelist);
406 * set a grace period timer to allow realtime io to make real
407 * progress, if we release an rt request. for normal request,
408 * set timer so idle io doesn't interfere with other io
410 if (crq->ioprio == IOPRIO_RT) {
411 set_bit(CFQ_WAIT_RT, &cfqd->flags);
412 cfqd->wait_end = jiffies + cfqd->cfq_grace_rt;
413 } else if (crq->ioprio != IOPRIO_IDLE) {
414 set_bit(CFQ_WAIT_NORM, &cfqd->flags);
415 cfqd->wait_end = jiffies + cfqd->cfq_grace_idle;
418 if (crq->cfq_queue) {
419 struct cfq_queue *cfqq = crq->cfq_queue;
421 cfq_del_crq_rb(cfqd, cfqq, crq);
423 if (RB_EMPTY(&cfqq->sort_list))
424 cfq_put_queue(cfqd, cfqq);
430 cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
432 struct cfq_data *cfqd = q->elevator.elevator_data;
433 struct request *__rq;
436 ret = elv_try_last_merge(q, bio);
437 if (ret != ELEVATOR_NO_MERGE) {
438 __rq = q->last_merge;
442 __rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
444 BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector);
446 if (elv_rq_merge_ok(__rq, bio)) {
447 ret = ELEVATOR_BACK_MERGE;
452 __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio));
454 if (elv_rq_merge_ok(__rq, bio)) {
455 ret = ELEVATOR_FRONT_MERGE;
460 return ELEVATOR_NO_MERGE;
462 q->last_merge = __rq;
468 static void cfq_merged_request(request_queue_t *q, struct request *req)
470 struct cfq_data *cfqd = q->elevator.elevator_data;
471 struct cfq_rq *crq = RQ_ELV_DATA(req);
474 cfq_del_crq_hash(crq);
475 cfq_add_crq_hash(cfqd, crq);
477 if (crq->cfq_queue && (rq_rb_key(req) != crq->rb_key)) {
478 struct cfq_queue *cfqq = crq->cfq_queue;
480 cfq_del_crq_rb(cfqd, cfqq, crq);
481 cfq_add_crq_rb(cfqd, cfqq, crq);
484 tmp = req->hard_nr_sectors - crq->nr_sectors;
485 cfqd->busy_sectors += tmp;
486 cfqd->cid[crq->ioprio].busy_sectors += tmp;
487 atomic_add(tmp,&(cfqd->cid[crq->ioprio].cum_sectors_in));
489 crq->nr_sectors = req->hard_nr_sectors;
495 cfq_merged_requests(request_queue_t *q, struct request *req,
496 struct request *next)
498 cfq_merged_request(q, req);
499 cfq_remove_request(q, next);
503 * sort into dispatch list, in optimal ascending order
506 cfq_dispatch_sort(struct cfq_data *cfqd, struct cfq_queue *cfqq,
509 struct list_head *head = cfqd->dispatch, *entry = head;
510 struct request *__rq;
512 cfq_del_crq_rb(cfqd, cfqq, crq);
513 cfq_remove_merge_hints(cfqd->queue, crq);
515 if (!list_empty(head)) {
516 __rq = list_entry_rq(head->next);
518 if (crq->request->sector < __rq->sector) {
524 while ((entry = entry->prev) != head) {
525 __rq = list_entry_rq(entry);
527 if (crq->request->sector <= __rq->sector)
532 list_add_tail(&crq->request->queuelist, entry);
535 struct cfq_queue *dcfqq;
540 /* Over how many ns is sectorate defined */
541 #define NS4SCALE (100000000)
544 __cfq_check_limit(struct cfq_data *cfqd,struct cfq_queue *cfqq, int dontskip)
547 unsigned long long ts, gap, epoch, tmp;
548 unsigned long newavsec, sectorate;
550 crq = rb_entry_crq(rb_first(&cfqq->sort_list));
553 gap = ts - cfqq->epstart;
554 epoch = cfqd->cfq_epoch;
556 sectorate = atomic_read(&cfqq->cfqpriv->sectorate);
557 // sectorate = atomic_read(&(cfqd->cid[crq->ioprio].sectorate));
561 if ((gap >= epoch) || (gap < 0)) {
563 if (gap >= (epoch << 1)) {
564 cfqq->epsector[0] = 0;
567 cfqq->epsector[0] = cfqq->epsector[1];
568 cfqq->epstart += epoch;
570 cfqq->epsector[1] = 0;
571 gap = ts - cfqq->epstart;
573 tmp = (cfqq->epsector[0] + crq->nr_sectors) * NS4SCALE;
574 do_div(tmp,epoch+gap);
576 cfqq->avsec = (unsigned long)tmp;
578 cfqq->epsector[1] += crq->nr_sectors;
580 cfqq->cfqpriv->navsec = cfqq->avsec;
581 cfqq->cfqpriv->sec[0] = cfqq->epsector[0];
582 cfqq->cfqpriv->sec[1] = cfqq->epsector[1];
583 cfqq->cfqpriv->timedout++;
585 cfqd->cid[crq->ioprio].navsec = cfqq->avsec;
586 cfqd->cid[crq->ioprio].sec[0] = cfqq->epsector[0];
587 cfqd->cid[crq->ioprio].sec[1] = cfqq->epsector[1];
588 cfqd->cid[crq->ioprio].timedout++;
593 tmp = (cfqq->epsector[0] + cfqq->epsector[1] + crq->nr_sectors)
595 do_div(tmp,epoch+gap);
597 newavsec = (unsigned long)tmp;
598 if ((newavsec < sectorate) || dontskip) {
599 cfqq->avsec = newavsec ;
601 cfqq->epsector[1] += crq->nr_sectors;
602 cfqq->cfqpriv->navsec = cfqq->avsec;
603 cfqq->cfqpriv->sec[1] = cfqq->epsector[1];
605 cfqd->cid[crq->ioprio].navsec = cfqq->avsec;
606 cfqd->cid[crq->ioprio].sec[1] = cfqq->epsector[1];
610 /* pause q's processing till avsec drops to
611 cfq_hmax_pct % of its value */
612 tmp = (epoch+gap) * (100-cfqd->cfq_hmax_pct);
613 do_div(tmp,1000000*cfqd->cfq_hmax_pct);
614 cfqq->wait_end = jiffies+msecs_to_jiffies(tmp);
620 * remove from io scheduler core and put on dispatch list for service
623 __cfq_dispatch_requests(request_queue_t *q, struct cfq_data *cfqd,
624 struct cfq_queue *cfqq)
628 crq = rb_entry_crq(rb_first(&cfqq->sort_list));
630 cfq_dispatch_sort(cfqd, cfqq, crq);
633 * technically, for IOPRIO_RT we don't need to add it to the list.
635 list_add_tail(&crq->prio_list, &cfqd->cid[cfqq->ioprio].prio_list);
636 return crq->nr_sectors;
640 cfq_dispatch_requests(request_queue_t *q, int prio, int max_rq, int max_sectors)
642 struct cfq_data *cfqd = q->elevator.elevator_data;
643 struct list_head *plist = &cfqd->cid[prio].rr_list;
644 struct cfq_queue *cfqq;
645 struct list_head *entry, *nxt;
647 int first_round,busy_queues,busy_unlimited;
651 * for each queue at this prio level, dispatch a request
658 list_for_each_safe(entry, nxt, plist) {
659 cfqq = list_entry_cfqq(entry);
661 BUG_ON(RB_EMPTY(&cfqq->sort_list));
665 if (first_round || busy_unlimited)
666 __cfq_check_limit(cfqd,cfqq,0);
668 __cfq_check_limit(cfqd,cfqq,1);
671 cfqq->cfqpriv->nskip++;
672 /* cfqd->cid[prio].nskip++; */
674 if (time_before(jiffies, cfqq->wait_end)) {
675 list_del(&cfqq->cfq_list);
676 mod_timer(&cfqq->timer,cfqq->wait_end);
682 q_io += __cfq_dispatch_requests(q, cfqd, cfqq);
685 if (RB_EMPTY(&cfqq->sort_list)) {
688 cfq_put_queue(cfqd, cfqq);
691 if (q_io >= max_sectors || q_rq >= max_rq) {
693 struct list_head *prv = nxt->prev;
697 list_add(plist, prv);
704 if ((q_io < max_sectors) && (q_rq < max_rq) &&
705 (busy_queues || first_round))
711 * if we hit the queue limit, put the string of serviced
712 * queues at the back of the pending list
714 struct list_head *prv = nxt->prev;
717 list_add(plist, prv);
721 cfqd->cid[prio].last_rq = q_rq;
722 cfqd->cid[prio].last_sectors = q_io;
727 * try to move some requests to the dispatch list. return 0 on success
729 static int cfq_select_requests(request_queue_t *q, struct cfq_data *cfqd)
731 int queued, busy_rq, busy_sectors, i;
734 * if there's any realtime io, only schedule that
736 if (cfq_dispatch_requests(q, IOPRIO_RT, cfqd->cfq_quantum, cfqd->cfq_quantum_io))
740 * if RT io was last serviced and grace time hasn't expired,
741 * arm the timer to restart queueing if no other RT io has been
742 * submitted in the mean time
744 if (test_bit(CFQ_WAIT_RT, &cfqd->flags)) {
745 if (time_before(jiffies, cfqd->wait_end)) {
746 mod_timer(&cfqd->timer, cfqd->wait_end);
749 clear_bit(CFQ_WAIT_RT, &cfqd->flags);
753 * for each priority level, calculate number of requests we
754 * are allowed to put into service.
757 busy_rq = cfqd->busy_rq;
758 busy_sectors = cfqd->busy_sectors;
759 for (i = IOPRIO_RT - 1; i > IOPRIO_IDLE; i--) {
760 const int o_rq = busy_rq - cfqd->cid[i].busy_rq;
761 const int o_sectors = busy_sectors - cfqd->cid[i].busy_sectors;
762 int q_rq = cfqd->cfq_quantum * (i + 1) / IOPRIO_NR;
763 int q_io = cfqd->cfq_quantum_io * (i + 1) / IOPRIO_NR;
766 * no need to keep iterating the list, if there are no
767 * requests pending anymore
773 * find out how many requests and sectors we are allowed to
777 q_rq = o_sectors * (i + 1) / IOPRIO_NR;
778 if (q_rq > cfqd->cfq_quantum)
779 q_rq = cfqd->cfq_quantum;
782 q_io = o_sectors * (i + 1) / IOPRIO_NR;
783 if (q_io > cfqd->cfq_quantum_io)
784 q_io = cfqd->cfq_quantum_io;
787 * average with last dispatched for fairness
789 if (cfqd->cid[i].last_rq != -1)
790 q_rq = (cfqd->cid[i].last_rq + q_rq) / 2;
791 if (cfqd->cid[i].last_sectors != -1)
792 q_io = (cfqd->cid[i].last_sectors + q_io) / 2;
794 queued += cfq_dispatch_requests(q, i, q_rq, q_io);
801 * only allow dispatch of idle io, if the queue has been idle from
802 * servicing RT or normal io for the grace period
804 if (test_bit(CFQ_WAIT_NORM, &cfqd->flags)) {
805 if (time_before(jiffies, cfqd->wait_end)) {
806 mod_timer(&cfqd->timer, cfqd->wait_end);
809 clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
813 * if we found nothing to do, allow idle io to be serviced
815 if (cfq_dispatch_requests(q, IOPRIO_IDLE, cfqd->cfq_idle_quantum, cfqd->cfq_idle_quantum_io))
821 static struct request *cfq_next_request(request_queue_t *q)
823 struct cfq_data *cfqd = q->elevator.elevator_data;
826 if (!list_empty(cfqd->dispatch)) {
830 * end grace period, we are servicing a request
832 del_timer(&cfqd->timer);
833 clear_bit(CFQ_WAIT_RT, &cfqd->flags);
834 clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
836 BUG_ON(list_empty(cfqd->dispatch));
837 rq = list_entry_rq(cfqd->dispatch->next);
839 BUG_ON(q->last_merge == rq);
840 crq = RQ_ELV_DATA(rq);
842 BUG_ON(!hlist_unhashed(&crq->hash));
843 list_del_init(&crq->prio_list);
850 * we moved requests to dispatch list, go back end serve one
852 if (cfq_select_requests(q, cfqd))
858 static inline struct cfq_queue *
859 __cfq_find_cfq_hash(struct cfq_data *cfqd, int hashkey, const int hashval)
861 struct hlist_head *hash_list = &cfqd->cfq_hash[hashval];
862 struct hlist_node *entry;
864 hlist_for_each(entry, hash_list) {
865 struct cfq_queue *__cfqq = list_entry_qhash(entry);
867 if (__cfqq->hash_key == hashkey)
875 static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *cfqd, int hashkey)
877 const int hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
879 return __cfq_find_cfq_hash(cfqd, hashkey, hashval);
882 static void cfq_put_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
885 WARN_ON(cfqd->busy_queues < 0);
887 cfqd->cid[cfqq->ioprio].busy_queues--;
888 WARN_ON(cfqd->cid[cfqq->ioprio].busy_queues < 0);
889 atomic_inc(&(cfqd->cid[cfqq->ioprio].cum_queues_out));
891 list_del(&cfqq->cfq_list);
892 hlist_del(&cfqq->cfq_hash);
893 mempool_free(cfqq, cfq_mpool);
896 static void cfq_pauseq_timer(unsigned long data)
898 struct cfq_queue *cfqq = (struct cfq_queue *) data;
899 kblockd_schedule_work(&cfqq->work);
902 static void cfq_pauseq_work(void *data)
904 struct cfq_queue *cfqq = (struct cfq_queue *) data;
905 struct cfq_data *cfqd = cfqq->cfqd;
906 request_queue_t *q = cfqd->queue;
909 spin_lock_irqsave(q->queue_lock, flags);
910 list_add_tail(&cfqq->cfq_list,&cfqd->cid[cfqq->ioprio].rr_list);
912 if (cfq_next_request(q))
914 spin_unlock_irqrestore(q->queue_lock, flags);
916 //del_timer(&cfqq->timer);
919 static struct cfq_queue *__cfq_get_queue(struct cfq_data *cfqd, int hashkey,
922 const int hashval = hash_long(hashkey, CFQ_QHASH_SHIFT);
923 struct cfq_queue *cfqq, *new_cfqq = NULL;
924 request_queue_t *q = cfqd->queue;
927 cfqq = __cfq_find_cfq_hash(cfqd, hashkey, hashval);
933 } else if (gfp_mask & __GFP_WAIT) {
934 spin_unlock_irq(q->queue_lock);
935 new_cfqq = mempool_alloc(cfq_mpool, gfp_mask);
936 spin_lock_irq(q->queue_lock);
941 memset(cfqq, 0, sizeof(*cfqq));
942 INIT_HLIST_NODE(&cfqq->cfq_hash);
943 INIT_LIST_HEAD(&cfqq->cfq_list);
944 cfqq->hash_key = cfq_hash_key(current);
945 cfqq->ioprio = cfq_ioprio(current);
947 cfqq->cfqpriv = cfq_cfqpriv(cfqd,current);
949 cfqq->cfqpriv = &((cfqd->cid[cfqq->ioprio]).cfqpriv);
951 cfqq->epstart = sched_clock();
952 /* epsector, avsec, skipped initialized to zero by memset */
954 init_timer(&cfqq->timer);
955 cfqq->timer.function = cfq_pauseq_timer;
956 cfqq->timer.data = (unsigned long) cfqq;
958 INIT_WORK(&cfqq->work, cfq_pauseq_work, cfqq);
962 hlist_add_head(&cfqq->cfq_hash, &cfqd->cfq_hash[hashval]);
966 mempool_free(new_cfqq, cfq_mpool);
971 static struct cfq_queue *cfq_get_queue(struct cfq_data *cfqd, int hashkey,
974 request_queue_t *q = cfqd->queue;
975 struct cfq_queue *cfqq;
977 spin_lock_irq(q->queue_lock);
978 cfqq = __cfq_get_queue(cfqd, hashkey, gfp_mask);
979 spin_unlock_irq(q->queue_lock);
985 __cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
987 const int prio = crq->ioprio;
988 struct cfq_queue *cfqq;
990 cfqq = __cfq_get_queue(cfqd, cfq_hash_key(current), GFP_ATOMIC);
996 if (prio > cfqq->ioprio) {
997 printk("prio hash collision %d %d\n",
999 if (!list_empty(&cfqq->cfq_list)) {
1000 cfqd->cid[cfqq->ioprio].busy_queues--;
1001 WARN_ON(cfqd->cid[cfqq->ioprio].busy_queues<0);
1002 atomic_inc(&(cfqd->cid[cfqq->ioprio].cum_queues_out));
1003 cfqd->cid[prio].busy_queues++;
1004 atomic_inc(&(cfqd->cid[prio].cum_queues_in));
1005 list_move_tail(&cfqq->cfq_list,
1006 &cfqd->cid[prio].rr_list);
1008 cfqq->ioprio = prio;
1011 cfq_add_crq_rb(cfqd, cfqq, crq);
1013 if (list_empty(&cfqq->cfq_list)) {
1014 list_add_tail(&cfqq->cfq_list,
1015 &cfqd->cid[prio].rr_list);
1016 cfqd->cid[prio].busy_queues++;
1017 atomic_inc(&(cfqd->cid[prio].cum_queues_in));
1018 cfqd->busy_queues++;
1021 if (rq_mergeable(crq->request)) {
1022 cfq_add_crq_hash(cfqd, crq);
1025 q->last_merge = crq->request;
1030 * should can only happen if the request wasn't allocated
1031 * through blk_alloc_request(), eg stack requests from ide-cd
1032 * (those should be removed) _and_ we are in OOM.
1034 list_add_tail(&crq->request->queuelist, cfqd->dispatch);
1038 static void cfq_reenqueue(request_queue_t *q, struct cfq_data *cfqd, int prio)
1040 struct list_head *prio_list = &cfqd->cid[prio].prio_list;
1041 struct list_head *entry, *tmp;
1043 list_for_each_safe(entry, tmp, prio_list) {
1044 struct cfq_rq *crq = list_entry_prio(entry);
1046 list_del_init(entry);
1047 list_del_init(&crq->request->queuelist);
1048 __cfq_enqueue(q, cfqd, crq);
1053 cfq_enqueue(request_queue_t *q, struct cfq_data *cfqd, struct cfq_rq *crq)
1055 const int prio = cfq_ioprio(current);
1058 crq->nr_sectors = crq->request->hard_nr_sectors;
1059 __cfq_enqueue(q, cfqd, crq);
1061 if (prio == IOPRIO_RT) {
1065 * realtime io gets priority, move all other io back
1067 for (i = IOPRIO_IDLE; i < IOPRIO_RT; i++)
1068 cfq_reenqueue(q, cfqd, i);
1069 } else if (prio != IOPRIO_IDLE) {
1071 * check if we need to move idle io back into queue
1073 cfq_reenqueue(q, cfqd, IOPRIO_IDLE);
1078 cfq_insert_request(request_queue_t *q, struct request *rq, int where)
1080 struct cfq_data *cfqd = q->elevator.elevator_data;
1081 struct cfq_rq *crq = RQ_ELV_DATA(rq);
1084 case ELEVATOR_INSERT_BACK:
1086 while (cfq_dispatch_requests(q, cfqd))
1089 list_add_tail(&rq->queuelist, cfqd->dispatch);
1091 case ELEVATOR_INSERT_FRONT:
1092 list_add(&rq->queuelist, cfqd->dispatch);
1094 case ELEVATOR_INSERT_SORT:
1095 BUG_ON(!blk_fs_request(rq));
1096 cfq_enqueue(q, cfqd, crq);
1099 printk("%s: bad insert point %d\n",
1100 __FUNCTION__,where);
1105 static int cfq_queue_empty(request_queue_t *q)
1107 struct cfq_data *cfqd = q->elevator.elevator_data;
1109 if (list_empty(cfqd->dispatch) && !cfqd->busy_queues)
1115 static struct request *
1116 cfq_former_request(request_queue_t *q, struct request *rq)
1118 struct cfq_rq *crq = RQ_ELV_DATA(rq);
1119 struct rb_node *rbprev = rb_prev(&crq->rb_node);
1122 return rb_entry_crq(rbprev)->request;
1127 static struct request *
1128 cfq_latter_request(request_queue_t *q, struct request *rq)
1130 struct cfq_rq *crq = RQ_ELV_DATA(rq);
1131 struct rb_node *rbnext = rb_next(&crq->rb_node);
1134 return rb_entry_crq(rbnext)->request;
1139 static void cfq_queue_congested(request_queue_t *q)
1141 struct cfq_data *cfqd = q->elevator.elevator_data;
1143 set_bit(cfq_ioprio(current), &cfqd->rq_starved_mask);
1146 static int cfq_may_queue(request_queue_t *q, int rw)
1148 struct cfq_data *cfqd = q->elevator.elevator_data;
1149 struct cfq_queue *cfqq;
1150 const int prio = cfq_ioprio(current);
1153 if (!cfqd->busy_queues)
1156 cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
1160 cfqq = cfq_find_cfq_hash(cfqd, cfq_hash_key(current));
1165 * if higher or equal prio io is sleeping waiting for a request, don't
1166 * allow this one to allocate one. as long as ll_rw_blk does fifo
1167 * waitqueue wakeups this should work...
1169 if (cfqd->rq_starved_mask & ~((1 << prio) - 1))
1172 if (cfqq->queued[rw] < cfqd->cfq_queued || !cfqd->cid[prio].busy_queues)
1175 limit = q->nr_requests * (prio + 1) / IOPRIO_NR;
1176 limit /= cfqd->cid[prio].busy_queues;
1177 if (cfqq->queued[rw] > limit)
1183 static void cfq_put_request(request_queue_t *q, struct request *rq)
1185 struct cfq_data *cfqd = q->elevator.elevator_data;
1186 struct cfq_rq *crq = RQ_ELV_DATA(rq);
1187 struct request_list *rl;
1191 BUG_ON(q->last_merge == rq);
1192 BUG_ON(!hlist_unhashed(&crq->hash));
1194 mempool_free(crq, cfqd->crq_pool);
1195 rq->elevator_private = NULL;
1199 * work-around for may_queue "bug": if a read gets issued and refused
1200 * to queue because writes ate all the allowed slots and no other
1201 * reads are pending for this queue, it could get stuck infinitely
1202 * since freed_request() only checks the waitqueue for writes when
1203 * freeing them. or vice versa for a single write vs many reads.
1204 * so check here whether "the other" data direction might be able
1205 * to queue and wake them
1208 other_rw = rq_data_dir(rq) ^ 1;
1209 if (rl->count[other_rw] <= q->nr_requests) {
1211 if (waitqueue_active(&rl->wait[other_rw]))
1212 wake_up(&rl->wait[other_rw]);
1216 static int cfq_set_request(request_queue_t *q, struct request *rq, int gfp_mask)
1218 struct cfq_data *cfqd = q->elevator.elevator_data;
1219 struct cfq_queue *cfqq;
1223 * prepare a queue up front, so cfq_enqueue() doesn't have to
1225 cfqq = cfq_get_queue(cfqd, cfq_hash_key(current), gfp_mask);
1229 crq = mempool_alloc(cfqd->crq_pool, gfp_mask);
1232 * process now has one request
1234 clear_bit(cfq_ioprio(current), &cfqd->rq_starved_mask);
1236 memset(crq, 0, sizeof(*crq));
1238 INIT_HLIST_NODE(&crq->hash);
1239 INIT_LIST_HEAD(&crq->prio_list);
1240 rq->elevator_private = crq;
1247 static void cfq_exit(request_queue_t *q, elevator_t *e)
1249 struct cfq_data *cfqd = e->elevator_data;
1251 e->elevator_data = NULL;
1252 mempool_destroy(cfqd->crq_pool);
1253 kfree(cfqd->crq_hash);
1254 kfree(cfqd->cfq_hash);
1260 static void cfq_timer(unsigned long data)
1262 struct cfq_data *cfqd = (struct cfq_data *) data;
1264 clear_bit(CFQ_WAIT_RT, &cfqd->flags);
1265 clear_bit(CFQ_WAIT_NORM, &cfqd->flags);
1266 kblockd_schedule_work(&cfqd->work);
1269 static void cfq_work(void *data)
1271 request_queue_t *q = data;
1272 unsigned long flags;
1274 spin_lock_irqsave(q->queue_lock, flags);
1275 if (cfq_next_request(q))
1277 spin_unlock_irqrestore(q->queue_lock, flags);
1280 static int cfq_init(request_queue_t *q, elevator_t *e)
1282 struct cfq_data *cfqd;
1285 cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL);
1289 memset(cfqd, 0, sizeof(*cfqd));
1290 init_timer(&cfqd->timer);
1291 cfqd->timer.function = cfq_timer;
1292 cfqd->timer.data = (unsigned long) cfqd;
1294 INIT_WORK(&cfqd->work, cfq_work, q);
1296 for (i = 0; i < IOPRIO_NR; i++) {
1297 struct io_prio_data *cid = &cfqd->cid[i];
1299 INIT_LIST_HEAD(&cid->rr_list);
1300 INIT_LIST_HEAD(&cid->prio_list);
1302 cid->last_sectors = -1;
1304 atomic_set(&cid->cum_rq_in,0);
1305 atomic_set(&cid->cum_rq_out,0);
1306 atomic_set(&cid->cum_sectors_in,0);
1307 atomic_set(&cid->cum_sectors_out,0);
1308 atomic_set(&cid->cum_queues_in,0);
1309 atomic_set(&cid->cum_queues_out,0);
1312 atomic_set(&((cid->cfqpriv).sectorate),CFQ_SECTORATE);
1313 (cid->cfqpriv).nskip = 0;
1314 (cid->cfqpriv).navsec = 0;
1315 (cid->cfqpriv).timedout = 0;
1318 cfqd->crq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_MHASH_ENTRIES,
1320 if (!cfqd->crq_hash)
1323 cfqd->cfq_hash = kmalloc(sizeof(struct hlist_head) * CFQ_QHASH_ENTRIES,
1325 if (!cfqd->cfq_hash)
1328 cfqd->crq_pool = mempool_create(BLKDEV_MIN_RQ, mempool_alloc_slab,
1329 mempool_free_slab, crq_pool);
1330 if (!cfqd->crq_pool)
1333 for (i = 0; i < CFQ_MHASH_ENTRIES; i++)
1334 INIT_HLIST_HEAD(&cfqd->crq_hash[i]);
1335 for (i = 0; i < CFQ_QHASH_ENTRIES; i++)
1336 INIT_HLIST_HEAD(&cfqd->cfq_hash[i]);
1338 cfqd->cfq_queued = cfq_queued;
1339 cfqd->cfq_quantum = cfq_quantum;
1340 cfqd->cfq_quantum_io = cfq_quantum_io;
1341 cfqd->cfq_idle_quantum = cfq_idle_quantum;
1342 cfqd->cfq_idle_quantum_io = cfq_idle_quantum_io;
1343 cfqd->cfq_grace_rt = cfq_grace_rt;
1344 cfqd->cfq_grace_idle = cfq_grace_idle;
1346 cfqd->cfq_epoch = CFQ_EPOCH;
1347 cfqd->cfq_hmax_pct = CFQ_HMAX_PCT;
1349 q->nr_requests <<= 2;
1351 cfqd->dispatch = &q->queue_head;
1352 e->elevator_data = cfqd;
1357 kfree(cfqd->cfq_hash);
1359 kfree(cfqd->crq_hash);
1365 static int __init cfq_slab_setup(void)
1367 crq_pool = kmem_cache_create("crq_pool", sizeof(struct cfq_rq), 0, 0,
1371 panic("cfq_iosched: can't init crq pool\n");
1373 cfq_pool = kmem_cache_create("cfq_pool", sizeof(struct cfq_queue), 0, 0,
1377 panic("cfq_iosched: can't init cfq pool\n");
1379 cfq_mpool = mempool_create(64, mempool_alloc_slab, mempool_free_slab, cfq_pool);
1382 panic("cfq_iosched: can't init cfq mpool\n");
1387 subsys_initcall(cfq_slab_setup);
1390 * sysfs parts below -->
1392 struct cfq_fs_entry {
1393 struct attribute attr;
1394 ssize_t (*show)(struct cfq_data *, char *);
1395 ssize_t (*store)(struct cfq_data *, const char *, size_t);
1399 cfq_var_show(unsigned int var, char *page)
1401 return sprintf(page, "%d\n", var);
1405 cfq_var_store(unsigned int *var, const char *page, size_t count)
1407 char *p = (char *) page;
1409 *var = simple_strtoul(p, &p, 10);
1413 #define SHOW_FUNCTION(__FUNC, __VAR) \
1414 static ssize_t __FUNC(struct cfq_data *cfqd, char *page) \
1416 return cfq_var_show(__VAR, (page)); \
1418 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum);
1419 SHOW_FUNCTION(cfq_quantum_io_show, cfqd->cfq_quantum_io);
1420 SHOW_FUNCTION(cfq_idle_quantum_show, cfqd->cfq_idle_quantum);
1421 SHOW_FUNCTION(cfq_idle_quantum_io_show, cfqd->cfq_idle_quantum_io);
1422 SHOW_FUNCTION(cfq_queued_show, cfqd->cfq_queued);
1423 SHOW_FUNCTION(cfq_grace_rt_show, cfqd->cfq_grace_rt);
1424 SHOW_FUNCTION(cfq_grace_idle_show, cfqd->cfq_grace_idle);
1425 SHOW_FUNCTION(cfq_epoch_show, cfqd->cfq_epoch);
1426 SHOW_FUNCTION(cfq_hmax_pct_show, cfqd->cfq_hmax_pct);
1427 #undef SHOW_FUNCTION
1429 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \
1430 static ssize_t __FUNC(struct cfq_data *cfqd, const char *page, size_t count) \
1432 int ret = cfq_var_store(__PTR, (page), count); \
1433 if (*(__PTR) < (MIN)) \
1435 else if (*(__PTR) > (MAX)) \
1439 STORE_FUNCTION(cfq_quantum_store, &cfqd->cfq_quantum, 1, INT_MAX);
1440 STORE_FUNCTION(cfq_quantum_io_store, &cfqd->cfq_quantum_io, 4, INT_MAX);
1441 STORE_FUNCTION(cfq_idle_quantum_store, &cfqd->cfq_idle_quantum, 1, INT_MAX);
1442 STORE_FUNCTION(cfq_idle_quantum_io_store, &cfqd->cfq_idle_quantum_io, 4, INT_MAX);
1443 STORE_FUNCTION(cfq_queued_store, &cfqd->cfq_queued, 1, INT_MAX);
1444 STORE_FUNCTION(cfq_grace_rt_store, &cfqd->cfq_grace_rt, 0, INT_MAX);
1445 STORE_FUNCTION(cfq_grace_idle_store, &cfqd->cfq_grace_idle, 0, INT_MAX);
1446 STORE_FUNCTION(cfq_epoch_store, &cfqd->cfq_epoch, 0, INT_MAX);
1447 STORE_FUNCTION(cfq_hmax_pct_store, &cfqd->cfq_hmax_pct, 1, 100);
1448 #undef STORE_FUNCTION
1451 /* Additional entries to get priority level data */
1453 cfq_prio_show(struct cfq_data *cfqd, char *page, unsigned int priolvl)
1455 //int r1,r2,s1,s2,q1,q2;
1457 if (!(priolvl >= IOPRIO_IDLE && priolvl <= IOPRIO_RT))
1461 r1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_in));
1462 r2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_rq_out));
1463 s1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_in));
1464 s2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_sectors_out));
1465 q1 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_in));
1466 q2 = (int)atomic_read(&(cfqd->cid[priolvl].cum_queues_out));
1469 return sprintf(page,"skip %d timdout %d avsec %lu rate %ld "
1470 " sec0 %lu sec1 %lu\n",
1471 cfqd->cid[priolvl].cfqpriv.nskip,
1472 cfqd->cid[priolvl].cfqpriv.timedout,
1473 cfqd->cid[priolvl].cfqpriv.navsec,
1474 atomic_read(&(cfqd->cid[priolvl].cfqpriv.sectorate)),
1475 (unsigned long)cfqd->cid[priolvl].cfqpriv.sec[0],
1476 (unsigned long)cfqd->cid[priolvl].cfqpriv.sec[1]);
1480 #define SHOW_PRIO_DATA(__PRIOLVL) \
1481 static ssize_t cfq_prio_##__PRIOLVL##_show(struct cfq_data *cfqd, char *page) \
1483 return cfq_prio_show(cfqd,page,__PRIOLVL); \
1506 #undef SHOW_PRIO_DATA
1509 static ssize_t cfq_prio_store(struct cfq_data *cfqd, const char *page, size_t count, int priolvl)
1512 char *p = (char *) page;
1515 val = (int) simple_strtoul(p, &p, 10);
1517 atomic_set(&(cfqd->cid[priolvl].cfqpriv.sectorate),val);
1518 cfqd->cid[priolvl].cfqpriv.nskip = 0;
1519 cfqd->cid[priolvl].cfqpriv.navsec = 0;
1520 cfqd->cid[priolvl].cfqpriv.timedout = 0;
1523 atomic_set(&(cfqd->cid[priolvl].cum_rq_in),0);
1524 atomic_set(&(cfqd->cid[priolvl].cum_rq_out),0);
1525 atomic_set(&(cfqd->cid[priolvl].cum_sectors_in),0);
1526 atomic_set(&(cfqd->cid[priolvl].cum_sectors_out),0);
1527 atomic_set(&(cfqd->cid[priolvl].cum_queues_in),0);
1528 atomic_set(&(cfqd->cid[priolvl].cum_queues_out),0);
1535 #define STORE_PRIO_DATA(__PRIOLVL) \
1536 static ssize_t cfq_prio_##__PRIOLVL##_store(struct cfq_data *cfqd, const char *page, size_t count) \
1538 return cfq_prio_store(cfqd,page,count,__PRIOLVL); \
1550 STORE_PRIO_DATA(10);
1551 STORE_PRIO_DATA(11);
1552 STORE_PRIO_DATA(12);
1553 STORE_PRIO_DATA(13);
1554 STORE_PRIO_DATA(14);
1555 STORE_PRIO_DATA(15);
1556 STORE_PRIO_DATA(16);
1557 STORE_PRIO_DATA(17);
1558 STORE_PRIO_DATA(18);
1559 STORE_PRIO_DATA(19);
1560 STORE_PRIO_DATA(20);
1561 #undef STORE_PRIO_DATA
1564 static struct cfq_fs_entry cfq_quantum_entry = {
1565 .attr = {.name = "quantum", .mode = S_IRUGO | S_IWUSR },
1566 .show = cfq_quantum_show,
1567 .store = cfq_quantum_store,
1569 static struct cfq_fs_entry cfq_quantum_io_entry = {
1570 .attr = {.name = "quantum_io", .mode = S_IRUGO | S_IWUSR },
1571 .show = cfq_quantum_io_show,
1572 .store = cfq_quantum_io_store,
1574 static struct cfq_fs_entry cfq_idle_quantum_entry = {
1575 .attr = {.name = "idle_quantum", .mode = S_IRUGO | S_IWUSR },
1576 .show = cfq_idle_quantum_show,
1577 .store = cfq_idle_quantum_store,
1579 static struct cfq_fs_entry cfq_idle_quantum_io_entry = {
1580 .attr = {.name = "idle_quantum_io", .mode = S_IRUGO | S_IWUSR },
1581 .show = cfq_idle_quantum_io_show,
1582 .store = cfq_idle_quantum_io_store,
1584 static struct cfq_fs_entry cfq_queued_entry = {
1585 .attr = {.name = "queued", .mode = S_IRUGO | S_IWUSR },
1586 .show = cfq_queued_show,
1587 .store = cfq_queued_store,
1589 static struct cfq_fs_entry cfq_grace_rt_entry = {
1590 .attr = {.name = "grace_rt", .mode = S_IRUGO | S_IWUSR },
1591 .show = cfq_grace_rt_show,
1592 .store = cfq_grace_rt_store,
1594 static struct cfq_fs_entry cfq_grace_idle_entry = {
1595 .attr = {.name = "grace_idle", .mode = S_IRUGO | S_IWUSR },
1596 .show = cfq_grace_idle_show,
1597 .store = cfq_grace_idle_store,
1599 static struct cfq_fs_entry cfq_epoch_entry = {
1600 .attr = {.name = "epoch", .mode = S_IRUGO | S_IWUSR },
1601 .show = cfq_epoch_show,
1602 .store = cfq_epoch_store,
1604 static struct cfq_fs_entry cfq_hmax_pct_entry = {
1605 .attr = {.name = "hmaxpct", .mode = S_IRUGO | S_IWUSR },
1606 .show = cfq_hmax_pct_show,
1607 .store = cfq_hmax_pct_store,
1610 #define P_0_STR "p0"
1611 #define P_1_STR "p1"
1612 #define P_2_STR "p2"
1613 #define P_3_STR "p3"
1614 #define P_4_STR "p4"
1615 #define P_5_STR "p5"
1616 #define P_6_STR "p6"
1617 #define P_7_STR "p7"
1618 #define P_8_STR "p8"
1619 #define P_9_STR "p9"
1620 #define P_10_STR "p10"
1621 #define P_11_STR "p11"
1622 #define P_12_STR "p12"
1623 #define P_13_STR "p13"
1624 #define P_14_STR "p14"
1625 #define P_15_STR "p15"
1626 #define P_16_STR "p16"
1627 #define P_17_STR "p17"
1628 #define P_18_STR "p18"
1629 #define P_19_STR "p19"
1630 #define P_20_STR "p20"
1633 #define CFQ_PRIO_SYSFS_ENTRY(__PRIOLVL) \
1634 static struct cfq_fs_entry cfq_prio_##__PRIOLVL##_entry = { \
1635 .attr = {.name = P_##__PRIOLVL##_STR, .mode = S_IRUGO | S_IWUSR }, \
1636 .show = cfq_prio_##__PRIOLVL##_show, \
1637 .store = cfq_prio_##__PRIOLVL##_store, \
1639 CFQ_PRIO_SYSFS_ENTRY(0);
1640 CFQ_PRIO_SYSFS_ENTRY(1);
1641 CFQ_PRIO_SYSFS_ENTRY(2);
1642 CFQ_PRIO_SYSFS_ENTRY(3);
1643 CFQ_PRIO_SYSFS_ENTRY(4);
1644 CFQ_PRIO_SYSFS_ENTRY(5);
1645 CFQ_PRIO_SYSFS_ENTRY(6);
1646 CFQ_PRIO_SYSFS_ENTRY(7);
1647 CFQ_PRIO_SYSFS_ENTRY(8);
1648 CFQ_PRIO_SYSFS_ENTRY(9);
1649 CFQ_PRIO_SYSFS_ENTRY(10);
1650 CFQ_PRIO_SYSFS_ENTRY(11);
1651 CFQ_PRIO_SYSFS_ENTRY(12);
1652 CFQ_PRIO_SYSFS_ENTRY(13);
1653 CFQ_PRIO_SYSFS_ENTRY(14);
1654 CFQ_PRIO_SYSFS_ENTRY(15);
1655 CFQ_PRIO_SYSFS_ENTRY(16);
1656 CFQ_PRIO_SYSFS_ENTRY(17);
1657 CFQ_PRIO_SYSFS_ENTRY(18);
1658 CFQ_PRIO_SYSFS_ENTRY(19);
1659 CFQ_PRIO_SYSFS_ENTRY(20);
1660 #undef CFQ_PRIO_SYSFS_ENTRY
1662 static struct attribute *default_attrs[] = {
1663 &cfq_quantum_entry.attr,
1664 &cfq_quantum_io_entry.attr,
1665 &cfq_idle_quantum_entry.attr,
1666 &cfq_idle_quantum_io_entry.attr,
1667 &cfq_queued_entry.attr,
1668 &cfq_grace_rt_entry.attr,
1669 &cfq_grace_idle_entry.attr,
1670 &cfq_epoch_entry.attr,
1671 &cfq_hmax_pct_entry.attr,
1672 &cfq_prio_0_entry.attr,
1673 &cfq_prio_1_entry.attr,
1674 &cfq_prio_2_entry.attr,
1675 &cfq_prio_3_entry.attr,
1676 &cfq_prio_4_entry.attr,
1677 &cfq_prio_5_entry.attr,
1678 &cfq_prio_6_entry.attr,
1679 &cfq_prio_7_entry.attr,
1680 &cfq_prio_8_entry.attr,
1681 &cfq_prio_9_entry.attr,
1682 &cfq_prio_10_entry.attr,
1683 &cfq_prio_11_entry.attr,
1684 &cfq_prio_12_entry.attr,
1685 &cfq_prio_13_entry.attr,
1686 &cfq_prio_14_entry.attr,
1687 &cfq_prio_15_entry.attr,
1688 &cfq_prio_16_entry.attr,
1689 &cfq_prio_17_entry.attr,
1690 &cfq_prio_18_entry.attr,
1691 &cfq_prio_19_entry.attr,
1692 &cfq_prio_20_entry.attr,
1696 #define to_cfq(atr) container_of((atr), struct cfq_fs_entry, attr)
1699 cfq_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
1701 elevator_t *e = container_of(kobj, elevator_t, kobj);
1702 struct cfq_fs_entry *entry = to_cfq(attr);
1707 return entry->show(e->elevator_data, page);
1711 cfq_attr_store(struct kobject *kobj, struct attribute *attr,
1712 const char *page, size_t length)
1714 elevator_t *e = container_of(kobj, elevator_t, kobj);
1715 struct cfq_fs_entry *entry = to_cfq(attr);
1720 return entry->store(e->elevator_data, page, length);
1723 static struct sysfs_ops cfq_sysfs_ops = {
1724 .show = cfq_attr_show,
1725 .store = cfq_attr_store,
1728 struct kobj_type cfq_ktype = {
1729 .sysfs_ops = &cfq_sysfs_ops,
1730 .default_attrs = default_attrs,
1733 elevator_t iosched_cfq = {
1734 .elevator_name = "cfq",
1735 .elevator_ktype = &cfq_ktype,
1736 .elevator_merge_fn = cfq_merge,
1737 .elevator_merged_fn = cfq_merged_request,
1738 .elevator_merge_req_fn = cfq_merged_requests,
1739 .elevator_next_req_fn = cfq_next_request,
1740 .elevator_add_req_fn = cfq_insert_request,
1741 .elevator_remove_req_fn = cfq_remove_request,
1742 .elevator_queue_empty_fn = cfq_queue_empty,
1743 .elevator_former_req_fn = cfq_former_request,
1744 .elevator_latter_req_fn = cfq_latter_request,
1745 .elevator_set_req_fn = cfq_set_request,
1746 .elevator_put_req_fn = cfq_put_request,
1747 .elevator_may_queue_fn = cfq_may_queue,
1748 .elevator_set_congested_fn = cfq_queue_congested,
1749 .elevator_init_fn = cfq_init,
1750 .elevator_exit_fn = cfq_exit,
1753 EXPORT_SYMBOL(iosched_cfq);