2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data;
93 struct cbq_class *next; /* hash table link */
94 struct cbq_class *next_alive; /* next class with backlog in this priority band */
98 unsigned char priority; /* class priority */
99 unsigned char priority2; /* priority to be used after overlimit */
100 unsigned char ewma_log; /* time constant for idle time calculation */
101 unsigned char ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police;
108 /* Link-sharing scheduler parameters */
109 long maxidle; /* Class parameters: see below. */
113 struct qdisc_rate_table *R_tab;
115 /* Overlimit strategy parameters */
116 void (*overlimit)(struct cbq_class *cl);
119 /* General scheduler (WRR) parameters */
121 long quantum; /* Allotment per WRR round */
122 long weight; /* Relative allotment: see below */
124 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
125 struct cbq_class *split; /* Ptr to split node */
126 struct cbq_class *share; /* Ptr to LS parent in the class tree */
127 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
128 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
130 struct cbq_class *sibling; /* Sibling chain */
131 struct cbq_class *children; /* Pointer to children chain */
133 struct Qdisc *q; /* Elementary queueing discipline */
137 unsigned char cpriority; /* Effective priority */
138 unsigned char delayed;
139 unsigned char level; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last; /* Last end of service */
145 psched_time_t undertime;
147 long deficit; /* Saved deficit for WRR */
148 unsigned long penalized;
149 struct tc_stats stats;
150 struct tc_cbq_xstats xstats;
152 struct tcf_proto *filter_list;
157 struct cbq_class *defaults[TC_PRIO_MAX+1];
160 struct cbq_sched_data
162 struct cbq_class *classes[16]; /* Hash table of all classes */
163 int nclasses[TC_CBQ_MAXPRIO+1];
164 unsigned quanta[TC_CBQ_MAXPRIO+1];
166 struct cbq_class link;
169 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
172 #ifdef CONFIG_NET_CLS_POLICE
173 struct cbq_class *rx_class;
175 struct cbq_class *tx_class;
176 struct cbq_class *tx_borrowed;
178 psched_time_t now; /* Cached timestamp */
179 psched_time_t now_rt; /* Cached real time */
182 struct timer_list delay_timer;
183 struct timer_list wd_timer; /* Watchdog timer,
193 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196 static __inline__ unsigned cbq_hash(u32 h)
203 static __inline__ struct cbq_class *
204 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206 struct cbq_class *cl;
208 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
209 if (cl->classid == classid)
214 #ifdef CONFIG_NET_CLS_POLICE
216 static struct cbq_class *
217 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219 struct cbq_class *cl, *new;
221 for (cl = this->tparent; cl; cl = cl->tparent)
222 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
230 /* Classify packet. The procedure is pretty complicated, but
231 it allows us to combine link sharing and priority scheduling
234 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
235 so that it resolves to split nodes. Then packets are classified
236 by logical priority, or a more specific classifier may be attached
240 static struct cbq_class *
241 cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
243 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
244 struct cbq_class *head = &q->link;
245 struct cbq_class **defmap;
246 struct cbq_class *cl = NULL;
247 u32 prio = skb->priority;
248 struct tcf_result res;
251 * Step 1. If skb->priority points to one of our classes, use it.
253 if (TC_H_MAJ(prio^sch->handle) == 0 &&
254 (cl = cbq_class_lookup(q, prio)) != NULL)
260 defmap = head->defaults;
263 * Step 2+n. Apply classifier.
265 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268 if ((cl = (void*)res.class) == NULL) {
269 if (TC_H_MAJ(res.classid))
270 cl = cbq_class_lookup(q, res.classid);
271 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
272 cl = defmap[TC_PRIO_BESTEFFORT];
274 if (cl == NULL || cl->level >= head->level)
278 #ifdef CONFIG_NET_CLS_POLICE
280 case TC_POLICE_RECLASSIFY:
281 return cbq_reclassify(skb, cl);
292 * Step 3+n. If classifier selected a link sharing class,
293 * apply agency specific classifier.
294 * Repeat this procdure until we hit a leaf node.
303 * Step 4. No success...
305 if (TC_H_MAJ(prio) == 0 &&
306 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
307 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
314 A packet has just been enqueued on the empty class.
315 cbq_activate_class adds it to the tail of active class list
316 of its priority band.
319 static __inline__ void cbq_activate_class(struct cbq_class *cl)
321 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
322 int prio = cl->cpriority;
323 struct cbq_class *cl_tail;
325 cl_tail = q->active[prio];
326 q->active[prio] = cl;
328 if (cl_tail != NULL) {
329 cl->next_alive = cl_tail->next_alive;
330 cl_tail->next_alive = cl;
333 q->activemask |= (1<<prio);
338 Unlink class from active chain.
339 Note that this same procedure is done directly in cbq_dequeue*
340 during round-robin procedure.
343 static void cbq_deactivate_class(struct cbq_class *this)
345 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
346 int prio = this->cpriority;
347 struct cbq_class *cl;
348 struct cbq_class *cl_prev = q->active[prio];
351 cl = cl_prev->next_alive;
353 cl_prev->next_alive = cl->next_alive;
354 cl->next_alive = NULL;
356 if (cl == q->active[prio]) {
357 q->active[prio] = cl_prev;
358 if (cl == q->active[prio]) {
359 q->active[prio] = NULL;
360 q->activemask &= ~(1<<prio);
365 cl = cl_prev->next_alive;
368 } while ((cl_prev = cl) != q->active[prio]);
372 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
374 int toplevel = q->toplevel;
376 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
380 PSCHED_GET_TIME(now);
381 incr = PSCHED_TDIFF(now, q->now_rt);
382 PSCHED_TADD2(q->now, incr, now);
385 if (PSCHED_TLESS(cl->undertime, now)) {
386 q->toplevel = cl->level;
389 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
394 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
396 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
397 struct cbq_class *cl = cbq_classify(skb, sch);
399 int ret = NET_XMIT_POLICED;
401 #ifdef CONFIG_NET_CLS_POLICE
405 #ifdef CONFIG_NET_CLS_POLICE
406 cl->q->__parent = sch;
408 if ((ret = cl->q->enqueue(skb, cl->q)) == 0) {
410 sch->stats.packets++;
411 sch->stats.bytes+=len;
412 cbq_mark_toplevel(q, cl);
414 cbq_activate_class(cl);
423 cbq_mark_toplevel(q, cl);
430 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
432 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
433 struct cbq_class *cl;
436 if ((cl = q->tx_class) == NULL) {
443 cbq_mark_toplevel(q, cl);
445 #ifdef CONFIG_NET_CLS_POLICE
447 cl->q->__parent = sch;
449 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
452 cbq_activate_class(cl);
460 /* Overlimit actions */
462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
464 static void cbq_ovl_classic(struct cbq_class *cl)
466 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
467 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
470 delay += cl->offtime;
473 Class goes to sleep, so that it will have no
474 chance to work avgidle. Let's forgive it 8)
476 BTW cbq-2.0 has a crap in this
477 place, apparently they forgot to shift it by cl->ewma_log.
480 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
481 if (cl->avgidle < cl->minidle)
482 cl->avgidle = cl->minidle;
485 PSCHED_TADD2(q->now, delay, cl->undertime);
487 cl->xstats.overactions++;
490 if (q->wd_expires == 0 || q->wd_expires > delay)
491 q->wd_expires = delay;
493 /* Dirty work! We must schedule wakeups based on
494 real available rate, rather than leaf rate,
495 which may be tiny (even zero).
497 if (q->toplevel == TC_CBQ_MAXLEVEL) {
499 psched_tdiff_t base_delay = q->wd_expires;
501 for (b = cl->borrow; b; b = b->borrow) {
502 delay = PSCHED_TDIFF(b->undertime, q->now);
503 if (delay < base_delay) {
510 q->wd_expires = base_delay;
514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
518 static void cbq_ovl_rclassic(struct cbq_class *cl)
520 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
521 struct cbq_class *this = cl;
524 if (cl->level > q->toplevel) {
528 } while ((cl = cl->borrow) != NULL);
535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
537 static void cbq_ovl_delay(struct cbq_class *cl)
539 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
540 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
543 unsigned long sched = jiffies;
545 delay += cl->offtime;
547 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
548 if (cl->avgidle < cl->minidle)
549 cl->avgidle = cl->minidle;
550 PSCHED_TADD2(q->now, delay, cl->undertime);
553 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
554 cl->penalized = sched;
555 cl->cpriority = TC_CBQ_MAXPRIO;
556 q->pmask |= (1<<TC_CBQ_MAXPRIO);
557 if (del_timer(&q->delay_timer) &&
558 (long)(q->delay_timer.expires - sched) > 0)
559 q->delay_timer.expires = sched;
560 add_timer(&q->delay_timer);
562 cl->xstats.overactions++;
567 if (q->wd_expires == 0 || q->wd_expires > delay)
568 q->wd_expires = delay;
571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
573 static void cbq_ovl_lowprio(struct cbq_class *cl)
575 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
577 cl->penalized = jiffies + cl->penalty;
579 if (cl->cpriority != cl->priority2) {
580 cl->cpriority = cl->priority2;
581 q->pmask |= (1<<cl->cpriority);
582 cl->xstats.overactions++;
587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
589 static void cbq_ovl_drop(struct cbq_class *cl)
591 if (cl->q->ops->drop)
592 if (cl->q->ops->drop(cl->q))
594 cl->xstats.overactions++;
598 static void cbq_watchdog(unsigned long arg)
600 struct Qdisc *sch = (struct Qdisc*)arg;
602 sch->flags &= ~TCQ_F_THROTTLED;
603 netif_schedule(sch->dev);
606 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
608 struct cbq_class *cl;
609 struct cbq_class *cl_prev = q->active[prio];
610 unsigned long now = jiffies;
611 unsigned long sched = now;
617 cl = cl_prev->next_alive;
618 if ((long)(now - cl->penalized) > 0) {
619 cl_prev->next_alive = cl->next_alive;
620 cl->next_alive = NULL;
621 cl->cpriority = cl->priority;
623 cbq_activate_class(cl);
625 if (cl == q->active[prio]) {
626 q->active[prio] = cl_prev;
627 if (cl == q->active[prio]) {
628 q->active[prio] = NULL;
633 cl = cl_prev->next_alive;
634 } else if ((long)(sched - cl->penalized) > 0)
635 sched = cl->penalized;
636 } while ((cl_prev = cl) != q->active[prio]);
638 return (long)(sched - now);
641 static void cbq_undelay(unsigned long arg)
643 struct Qdisc *sch = (struct Qdisc*)arg;
644 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
652 int prio = ffz(~pmask);
657 tmp = cbq_undelay_prio(q, prio);
660 if (tmp < delay || delay == 0)
666 q->delay_timer.expires = jiffies + delay;
667 add_timer(&q->delay_timer);
670 sch->flags &= ~TCQ_F_THROTTLED;
671 netif_schedule(sch->dev);
675 #ifdef CONFIG_NET_CLS_POLICE
677 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
680 struct Qdisc *sch = child->__parent;
681 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
682 struct cbq_class *cl = q->rx_class;
686 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688 cbq_mark_toplevel(q, cl);
691 cl->q->__parent = sch;
693 if (cl->q->enqueue(skb, cl->q) == 0) {
695 sch->stats.packets++;
696 sch->stats.bytes+=len;
698 cbq_activate_class(cl);
711 It is mission critical procedure.
713 We "regenerate" toplevel cutoff, if transmitting class
714 has backlog and it is not regulated. It is not part of
715 original CBQ description, but looks more reasonable.
716 Probably, it is wrong. This question needs further investigation.
719 static __inline__ void
720 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
721 struct cbq_class *borrowed)
723 if (cl && q->toplevel >= borrowed->level) {
724 if (cl->q->q.qlen > 1) {
726 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
727 q->toplevel = borrowed->level;
730 } while ((borrowed=borrowed->borrow) != NULL);
733 /* It is not necessary now. Uncommenting it
734 will save CPU cycles, but decrease fairness.
736 q->toplevel = TC_CBQ_MAXLEVEL;
742 cbq_update(struct cbq_sched_data *q)
744 struct cbq_class *this = q->tx_class;
745 struct cbq_class *cl = this;
750 for ( ; cl; cl = cl->share) {
751 long avgidle = cl->avgidle;
755 cl->stats.bytes += len;
758 (now - last) is total time between packet right edges.
759 (last_pktlen/rate) is "virtual" busy time, so that
761 idle = (now - last) - last_pktlen/rate
764 idle = PSCHED_TDIFF(q->now, cl->last);
765 if ((unsigned long)idle > 128*1024*1024) {
766 avgidle = cl->maxidle;
768 idle -= L2T(cl, len);
770 /* true_avgidle := (1-W)*true_avgidle + W*idle,
771 where W=2^{-ewma_log}. But cl->avgidle is scaled:
772 cl->avgidle == true_avgidle/W,
775 avgidle += idle - (avgidle>>cl->ewma_log);
779 /* Overlimit or at-limit */
781 if (avgidle < cl->minidle)
782 avgidle = cl->minidle;
784 cl->avgidle = avgidle;
786 /* Calculate expected time, when this class
787 will be allowed to send.
789 (1-W)*true_avgidle + W*delay = 0, i.e.
790 idle = (1/W - 1)*(-true_avgidle)
792 idle = (1 - W)*(-cl->avgidle);
794 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
798 To maintain the rate allocated to the class,
799 we add to undertime virtual clock,
800 necessary to complete transmitted packet.
801 (len/phys_bandwidth has been already passed
802 to the moment of cbq_update)
805 idle -= L2T(&q->link, len);
806 idle += L2T(cl, len);
808 PSCHED_AUDIT_TDIFF(idle);
810 PSCHED_TADD2(q->now, idle, cl->undertime);
814 PSCHED_SET_PASTPERFECT(cl->undertime);
815 if (avgidle > cl->maxidle)
816 cl->avgidle = cl->maxidle;
818 cl->avgidle = avgidle;
823 cbq_update_toplevel(q, this, q->tx_borrowed);
826 static __inline__ struct cbq_class *
827 cbq_under_limit(struct cbq_class *cl)
829 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
830 struct cbq_class *this_cl = cl;
832 if (cl->tparent == NULL)
835 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
836 !PSCHED_TLESS(q->now, cl->undertime)) {
842 /* It is very suspicious place. Now overlimit
843 action is generated for not bounded classes
844 only if link is completely congested.
845 Though it is in agree with ancestor-only paradigm,
846 it looks very stupid. Particularly,
847 it means that this chunk of code will either
848 never be called or result in strong amplification
849 of burstiness. Dangerous, silly, and, however,
850 no another solution exists.
852 if ((cl = cl->borrow) == NULL) {
853 this_cl->stats.overlimits++;
854 this_cl->overlimit(this_cl);
857 if (cl->level > q->toplevel)
859 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
860 PSCHED_TLESS(q->now, cl->undertime));
866 static __inline__ struct sk_buff *
867 cbq_dequeue_prio(struct Qdisc *sch, int prio)
869 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
870 struct cbq_class *cl_tail, *cl_prev, *cl;
874 cl_tail = cl_prev = q->active[prio];
875 cl = cl_prev->next_alive;
882 struct cbq_class *borrow = cl;
885 (borrow = cbq_under_limit(cl)) == NULL)
888 if (cl->deficit <= 0) {
889 /* Class exhausted its allotment per
890 this round. Switch to the next one.
893 cl->deficit += cl->quantum;
897 skb = cl->q->dequeue(cl->q);
899 /* Class did not give us any skb :-(
900 It could occur even if cl->q->q.qlen != 0
901 f.e. if cl->q == "tbf"
906 cl->deficit -= skb->len;
908 q->tx_borrowed = borrow;
910 #ifndef CBQ_XSTATS_BORROWS_BYTES
911 borrow->xstats.borrows++;
912 cl->xstats.borrows++;
914 borrow->xstats.borrows += skb->len;
915 cl->xstats.borrows += skb->len;
918 q->tx_len = skb->len;
920 if (cl->deficit <= 0) {
921 q->active[prio] = cl;
923 cl->deficit += cl->quantum;
928 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
929 /* Class is empty or penalized.
930 Unlink it from active chain.
932 cl_prev->next_alive = cl->next_alive;
933 cl->next_alive = NULL;
935 /* Did cl_tail point to it? */
940 /* Was it the last class in this band? */
943 q->active[prio] = NULL;
944 q->activemask &= ~(1<<prio);
946 cbq_activate_class(cl);
950 q->active[prio] = cl_tail;
953 cbq_activate_class(cl);
961 } while (cl_prev != cl_tail);
964 q->active[prio] = cl_prev;
969 static __inline__ struct sk_buff *
970 cbq_dequeue_1(struct Qdisc *sch)
972 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
976 activemask = q->activemask&0xFF;
978 int prio = ffz(~activemask);
979 activemask &= ~(1<<prio);
980 skb = cbq_dequeue_prio(sch, prio);
987 static struct sk_buff *
988 cbq_dequeue(struct Qdisc *sch)
991 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
995 PSCHED_GET_TIME(now);
996 incr = PSCHED_TDIFF(now, q->now_rt);
999 psched_tdiff_t incr2;
1000 /* Time integrator. We calculate EOS time
1001 by adding expected packet transmission time.
1002 If real time is greater, we warp artificial clock,
1005 cbq_time = max(real_time, work);
1007 incr2 = L2T(&q->link, q->tx_len);
1008 PSCHED_TADD(q->now, incr2);
1010 if ((incr -= incr2) < 0)
1013 PSCHED_TADD(q->now, incr);
1019 skb = cbq_dequeue_1(sch);
1022 sch->flags &= ~TCQ_F_THROTTLED;
1026 /* All the classes are overlimit.
1030 1. Scheduler is empty.
1031 2. Toplevel cutoff inhibited borrowing.
1032 3. Root class is overlimit.
1034 Reset 2d and 3d conditions and retry.
1036 Note, that NS and cbq-2.0 are buggy, peeking
1037 an arbitrary class is appropriate for ancestor-only
1038 sharing, but not for toplevel algorithm.
1040 Our version is better, but slower, because it requires
1041 two passes, but it is unavoidable with top-level sharing.
1044 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1045 PSCHED_IS_PASTPERFECT(q->link.undertime))
1048 q->toplevel = TC_CBQ_MAXLEVEL;
1049 PSCHED_SET_PASTPERFECT(q->link.undertime);
1052 /* No packets in scheduler or nobody wants to give them to us :-(
1053 Sigh... start watchdog timer in the last case. */
1056 sch->stats.overlimits++;
1057 if (q->wd_expires && !netif_queue_stopped(sch->dev)) {
1058 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1061 mod_timer(&q->wd_timer, jiffies + delay);
1062 sch->flags |= TCQ_F_THROTTLED;
1068 /* CBQ class maintanance routines */
1070 static void cbq_adjust_levels(struct cbq_class *this)
1077 struct cbq_class *cl;
1079 if ((cl = this->children) != NULL) {
1081 if (cl->level > level)
1083 } while ((cl = cl->sibling) != this->children);
1085 this->level = level+1;
1086 } while ((this = this->tparent) != NULL);
1089 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1091 struct cbq_class *cl;
1094 if (q->quanta[prio] == 0)
1097 for (h=0; h<16; h++) {
1098 for (cl = q->classes[h]; cl; cl = cl->next) {
1099 /* BUGGGG... Beware! This expression suffer of
1100 arithmetic overflows!
1102 if (cl->priority == prio) {
1103 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1106 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1107 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1108 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1114 static void cbq_sync_defmap(struct cbq_class *cl)
1116 struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
1117 struct cbq_class *split = cl->split;
1124 for (i=0; i<=TC_PRIO_MAX; i++) {
1125 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1126 split->defaults[i] = NULL;
1129 for (i=0; i<=TC_PRIO_MAX; i++) {
1130 int level = split->level;
1132 if (split->defaults[i])
1135 for (h=0; h<16; h++) {
1136 struct cbq_class *c;
1138 for (c = q->classes[h]; c; c = c->next) {
1139 if (c->split == split && c->level < level &&
1141 split->defaults[i] = c;
1149 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1151 struct cbq_class *split = NULL;
1154 if ((split = cl->split) == NULL)
1156 splitid = split->classid;
1159 if (split == NULL || split->classid != splitid) {
1160 for (split = cl->tparent; split; split = split->tparent)
1161 if (split->classid == splitid)
1168 if (cl->split != split) {
1170 cbq_sync_defmap(cl);
1172 cl->defmap = def&mask;
1174 cl->defmap = (cl->defmap&~mask)|(def&mask);
1176 cbq_sync_defmap(cl);
1179 static void cbq_unlink_class(struct cbq_class *this)
1181 struct cbq_class *cl, **clp;
1182 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1184 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1192 if (this->tparent) {
1201 } while ((cl = *clp) != this->sibling);
1203 if (this->tparent->children == this) {
1204 this->tparent->children = this->sibling;
1205 if (this->sibling == this)
1206 this->tparent->children = NULL;
1209 BUG_TRAP(this->sibling == this);
1213 static void cbq_link_class(struct cbq_class *this)
1215 struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1216 unsigned h = cbq_hash(this->classid);
1217 struct cbq_class *parent = this->tparent;
1219 this->sibling = this;
1220 this->next = q->classes[h];
1221 q->classes[h] = this;
1226 if (parent->children == NULL) {
1227 parent->children = this;
1229 this->sibling = parent->children->sibling;
1230 parent->children->sibling = this;
1234 static unsigned int cbq_drop(struct Qdisc* sch)
1236 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1237 struct cbq_class *cl, *cl_head;
1241 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1242 if ((cl_head = q->active[prio]) == NULL)
1247 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1251 } while ((cl = cl->next_alive) != cl_head);
1257 cbq_reset(struct Qdisc* sch)
1259 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1260 struct cbq_class *cl;
1267 q->tx_borrowed = NULL;
1268 del_timer(&q->wd_timer);
1269 del_timer(&q->delay_timer);
1270 q->toplevel = TC_CBQ_MAXLEVEL;
1271 PSCHED_GET_TIME(q->now);
1274 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1275 q->active[prio] = NULL;
1277 for (h = 0; h < 16; h++) {
1278 for (cl = q->classes[h]; cl; cl = cl->next) {
1281 cl->next_alive = NULL;
1282 PSCHED_SET_PASTPERFECT(cl->undertime);
1283 cl->avgidle = cl->maxidle;
1284 cl->deficit = cl->quantum;
1285 cl->cpriority = cl->priority;
1292 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1294 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1295 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1296 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1298 if (lss->change&TCF_CBQ_LSS_EWMA)
1299 cl->ewma_log = lss->ewma_log;
1300 if (lss->change&TCF_CBQ_LSS_AVPKT)
1301 cl->avpkt = lss->avpkt;
1302 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1303 cl->minidle = -(long)lss->minidle;
1304 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1305 cl->maxidle = lss->maxidle;
1306 cl->avgidle = lss->maxidle;
1308 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1309 cl->offtime = lss->offtime;
1313 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1315 q->nclasses[cl->priority]--;
1316 q->quanta[cl->priority] -= cl->weight;
1317 cbq_normalize_quanta(q, cl->priority);
1320 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1322 q->nclasses[cl->priority]++;
1323 q->quanta[cl->priority] += cl->weight;
1324 cbq_normalize_quanta(q, cl->priority);
1327 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1329 struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
1332 cl->allot = wrr->allot;
1334 cl->weight = wrr->weight;
1335 if (wrr->priority) {
1336 cl->priority = wrr->priority-1;
1337 cl->cpriority = cl->priority;
1338 if (cl->priority >= cl->priority2)
1339 cl->priority2 = TC_CBQ_MAXPRIO-1;
1346 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1348 switch (ovl->strategy) {
1349 case TC_CBQ_OVL_CLASSIC:
1350 cl->overlimit = cbq_ovl_classic;
1352 case TC_CBQ_OVL_DELAY:
1353 cl->overlimit = cbq_ovl_delay;
1355 case TC_CBQ_OVL_LOWPRIO:
1356 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1357 ovl->priority2-1 <= cl->priority)
1359 cl->priority2 = ovl->priority2-1;
1360 cl->overlimit = cbq_ovl_lowprio;
1362 case TC_CBQ_OVL_DROP:
1363 cl->overlimit = cbq_ovl_drop;
1365 case TC_CBQ_OVL_RCLASSIC:
1366 cl->overlimit = cbq_ovl_rclassic;
1371 cl->penalty = (ovl->penalty*HZ)/1000;
1375 #ifdef CONFIG_NET_CLS_POLICE
1376 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1378 cl->police = p->police;
1380 if (cl->q->handle) {
1381 if (p->police == TC_POLICE_RECLASSIFY)
1382 cl->q->reshape_fail = cbq_reshape_fail;
1384 cl->q->reshape_fail = NULL;
1390 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1392 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1396 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1398 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1399 struct rtattr *tb[TCA_CBQ_MAX];
1400 struct tc_ratespec *r;
1402 if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1403 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1404 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1407 if (tb[TCA_CBQ_LSSOPT-1] &&
1408 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1411 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1413 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1417 q->link.sibling = &q->link;
1418 q->link.classid = sch->handle;
1419 q->link.qdisc = sch;
1420 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1421 q->link.q = &noop_qdisc;
1423 q->link.priority = TC_CBQ_MAXPRIO-1;
1424 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1425 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1426 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1427 q->link.overlimit = cbq_ovl_classic;
1428 q->link.allot = psched_mtu(sch->dev);
1429 q->link.quantum = q->link.allot;
1430 q->link.weight = q->link.R_tab->rate.rate;
1432 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1433 q->link.avpkt = q->link.allot/2;
1434 q->link.minidle = -0x7FFFFFFF;
1435 q->link.stats.lock = &sch->dev->queue_lock;
1437 init_timer(&q->wd_timer);
1438 q->wd_timer.data = (unsigned long)sch;
1439 q->wd_timer.function = cbq_watchdog;
1440 init_timer(&q->delay_timer);
1441 q->delay_timer.data = (unsigned long)sch;
1442 q->delay_timer.function = cbq_undelay;
1443 q->toplevel = TC_CBQ_MAXLEVEL;
1444 PSCHED_GET_TIME(q->now);
1447 cbq_link_class(&q->link);
1449 if (tb[TCA_CBQ_LSSOPT-1])
1450 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1452 cbq_addprio(q, &q->link);
1456 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1458 unsigned char *b = skb->tail;
1460 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1464 skb_trim(skb, b - skb->data);
1468 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1470 unsigned char *b = skb->tail;
1471 struct tc_cbq_lssopt opt;
1474 if (cl->borrow == NULL)
1475 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1476 if (cl->share == NULL)
1477 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1478 opt.ewma_log = cl->ewma_log;
1479 opt.level = cl->level;
1480 opt.avpkt = cl->avpkt;
1481 opt.maxidle = cl->maxidle;
1482 opt.minidle = (u32)(-cl->minidle);
1483 opt.offtime = cl->offtime;
1485 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1489 skb_trim(skb, b - skb->data);
1493 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1495 unsigned char *b = skb->tail;
1496 struct tc_cbq_wrropt opt;
1499 opt.allot = cl->allot;
1500 opt.priority = cl->priority+1;
1501 opt.cpriority = cl->cpriority+1;
1502 opt.weight = cl->weight;
1503 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1507 skb_trim(skb, b - skb->data);
1511 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1513 unsigned char *b = skb->tail;
1514 struct tc_cbq_ovl opt;
1516 opt.strategy = cl->ovl_strategy;
1517 opt.priority2 = cl->priority2+1;
1518 opt.penalty = (cl->penalty*1000)/HZ;
1519 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1523 skb_trim(skb, b - skb->data);
1527 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1529 unsigned char *b = skb->tail;
1530 struct tc_cbq_fopt opt;
1532 if (cl->split || cl->defmap) {
1533 opt.split = cl->split ? cl->split->classid : 0;
1534 opt.defmap = cl->defmap;
1536 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1541 skb_trim(skb, b - skb->data);
1545 #ifdef CONFIG_NET_CLS_POLICE
1546 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1548 unsigned char *b = skb->tail;
1549 struct tc_cbq_police opt;
1552 opt.police = cl->police;
1553 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1558 skb_trim(skb, b - skb->data);
1563 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1565 if (cbq_dump_lss(skb, cl) < 0 ||
1566 cbq_dump_rate(skb, cl) < 0 ||
1567 cbq_dump_wrr(skb, cl) < 0 ||
1568 cbq_dump_ovl(skb, cl) < 0 ||
1569 #ifdef CONFIG_NET_CLS_POLICE
1570 cbq_dump_police(skb, cl) < 0 ||
1572 cbq_dump_fopt(skb, cl) < 0)
1577 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1579 RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1587 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1589 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1590 unsigned char *b = skb->tail;
1593 rta = (struct rtattr*)b;
1594 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1595 if (cbq_dump_attr(skb, &q->link) < 0)
1596 goto rtattr_failure;
1597 rta->rta_len = skb->tail - b;
1598 spin_lock_bh(&sch->dev->queue_lock);
1599 q->link.xstats.avgidle = q->link.avgidle;
1600 if (cbq_copy_xstats(skb, &q->link.xstats)) {
1601 spin_unlock_bh(&sch->dev->queue_lock);
1602 goto rtattr_failure;
1604 spin_unlock_bh(&sch->dev->queue_lock);
1608 skb_trim(skb, b - skb->data);
1613 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1614 struct sk_buff *skb, struct tcmsg *tcm)
1616 struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1617 struct cbq_class *cl = (struct cbq_class*)arg;
1618 unsigned char *b = skb->tail;
1622 tcm->tcm_parent = cl->tparent->classid;
1624 tcm->tcm_parent = TC_H_ROOT;
1625 tcm->tcm_handle = cl->classid;
1626 tcm->tcm_info = cl->q->handle;
1628 rta = (struct rtattr*)b;
1629 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1630 if (cbq_dump_attr(skb, cl) < 0)
1631 goto rtattr_failure;
1632 rta->rta_len = skb->tail - b;
1633 cl->stats.qlen = cl->q->q.qlen;
1634 if (qdisc_copy_stats(skb, &cl->stats))
1635 goto rtattr_failure;
1636 spin_lock_bh(&sch->dev->queue_lock);
1637 cl->xstats.avgidle = cl->avgidle;
1638 cl->xstats.undertime = 0;
1639 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1640 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1641 q->link.xstats.avgidle = q->link.avgidle;
1642 if (cbq_copy_xstats(skb, &cl->xstats)) {
1643 spin_unlock_bh(&sch->dev->queue_lock);
1644 goto rtattr_failure;
1646 spin_unlock_bh(&sch->dev->queue_lock);
1651 skb_trim(skb, b - skb->data);
1655 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1658 struct cbq_class *cl = (struct cbq_class*)arg;
1662 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1665 #ifdef CONFIG_NET_CLS_POLICE
1666 if (cl->police == TC_POLICE_RECLASSIFY)
1667 new->reshape_fail = cbq_reshape_fail;
1673 sch->q.qlen -= (*old)->q.qlen;
1675 sch_tree_unlock(sch);
1682 static struct Qdisc *
1683 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1685 struct cbq_class *cl = (struct cbq_class*)arg;
1687 return cl ? cl->q : NULL;
1690 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1692 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1693 struct cbq_class *cl = cbq_class_lookup(q, classid);
1697 return (unsigned long)cl;
1702 static void cbq_destroy_filters(struct cbq_class *cl)
1704 struct tcf_proto *tp;
1706 while ((tp = cl->filter_list) != NULL) {
1707 cl->filter_list = tp->next;
1712 static void cbq_destroy_class(struct cbq_class *cl)
1714 cbq_destroy_filters(cl);
1715 qdisc_destroy(cl->q);
1716 qdisc_put_rtab(cl->R_tab);
1717 #ifdef CONFIG_NET_ESTIMATOR
1718 qdisc_kill_estimator(&cl->stats);
1724 cbq_destroy(struct Qdisc* sch)
1726 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1727 struct cbq_class *cl;
1730 #ifdef CONFIG_NET_CLS_POLICE
1733 for (h = 0; h < 16; h++) {
1734 for (cl = q->classes[h]; cl; cl = cl->next)
1735 cbq_destroy_filters(cl);
1738 for (h = 0; h < 16; h++) {
1739 struct cbq_class *next;
1741 for (cl = q->classes[h]; cl; cl = next) {
1744 cbq_destroy_class(cl);
1748 qdisc_put_rtab(q->link.R_tab);
1751 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1753 struct cbq_class *cl = (struct cbq_class*)arg;
1755 if (--cl->refcnt == 0) {
1756 #ifdef CONFIG_NET_CLS_POLICE
1757 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1759 spin_lock_bh(&sch->dev->queue_lock);
1760 if (q->rx_class == cl)
1762 spin_unlock_bh(&sch->dev->queue_lock);
1765 cbq_destroy_class(cl);
1770 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1774 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1775 struct cbq_class *cl = (struct cbq_class*)*arg;
1776 struct rtattr *opt = tca[TCA_OPTIONS-1];
1777 struct rtattr *tb[TCA_CBQ_MAX];
1778 struct cbq_class *parent;
1779 struct qdisc_rate_table *rtab = NULL;
1782 rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1785 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1786 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1789 if (tb[TCA_CBQ_FOPT-1] &&
1790 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1793 if (tb[TCA_CBQ_RATE-1] &&
1794 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1797 if (tb[TCA_CBQ_LSSOPT-1] &&
1798 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1801 if (tb[TCA_CBQ_WRROPT-1] &&
1802 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1805 #ifdef CONFIG_NET_CLS_POLICE
1806 if (tb[TCA_CBQ_POLICE-1] &&
1807 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1814 if (cl->tparent && cl->tparent->classid != parentid)
1816 if (!cl->tparent && parentid != TC_H_ROOT)
1820 if (tb[TCA_CBQ_RATE-1]) {
1821 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1826 /* Change class parameters */
1829 if (cl->next_alive != NULL)
1830 cbq_deactivate_class(cl);
1833 rtab = xchg(&cl->R_tab, rtab);
1834 qdisc_put_rtab(rtab);
1837 if (tb[TCA_CBQ_LSSOPT-1])
1838 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1840 if (tb[TCA_CBQ_WRROPT-1]) {
1842 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1845 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1846 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1848 #ifdef CONFIG_NET_CLS_POLICE
1849 if (tb[TCA_CBQ_POLICE-1])
1850 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1853 if (tb[TCA_CBQ_FOPT-1])
1854 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1857 cbq_activate_class(cl);
1859 sch_tree_unlock(sch);
1861 #ifdef CONFIG_NET_ESTIMATOR
1862 if (tca[TCA_RATE-1]) {
1863 qdisc_kill_estimator(&cl->stats);
1864 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1870 if (parentid == TC_H_ROOT)
1873 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1874 tb[TCA_CBQ_LSSOPT-1] == NULL)
1877 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1883 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1887 classid = TC_H_MAKE(sch->handle,0x8000);
1889 for (i=0; i<0x8000; i++) {
1890 if (++q->hgenerator >= 0x8000)
1892 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1898 classid = classid|q->hgenerator;
1903 parent = cbq_class_lookup(q, parentid);
1910 cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1913 memset(cl, 0, sizeof(*cl));
1917 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1918 cl->q = &noop_qdisc;
1919 cl->classid = classid;
1920 cl->tparent = parent;
1922 cl->allot = parent->allot;
1923 cl->quantum = cl->allot;
1924 cl->weight = cl->R_tab->rate.rate;
1925 cl->stats.lock = &sch->dev->queue_lock;
1929 cl->borrow = cl->tparent;
1930 if (cl->tparent != &q->link)
1931 cl->share = cl->tparent;
1932 cbq_adjust_levels(parent);
1933 cl->minidle = -0x7FFFFFFF;
1934 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1935 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1936 if (cl->ewma_log==0)
1937 cl->ewma_log = q->link.ewma_log;
1939 cl->maxidle = q->link.maxidle;
1941 cl->avpkt = q->link.avpkt;
1942 cl->overlimit = cbq_ovl_classic;
1943 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1944 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1945 #ifdef CONFIG_NET_CLS_POLICE
1946 if (tb[TCA_CBQ_POLICE-1])
1947 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1949 if (tb[TCA_CBQ_FOPT-1])
1950 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1951 sch_tree_unlock(sch);
1953 #ifdef CONFIG_NET_ESTIMATOR
1954 if (tca[TCA_RATE-1])
1955 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1958 *arg = (unsigned long)cl;
1962 qdisc_put_rtab(rtab);
1966 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1968 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1969 struct cbq_class *cl = (struct cbq_class*)arg;
1971 if (cl->filters || cl->children || cl == &q->link)
1977 cbq_deactivate_class(cl);
1979 if (q->tx_borrowed == cl)
1980 q->tx_borrowed = q->tx_class;
1981 if (q->tx_class == cl) {
1983 q->tx_borrowed = NULL;
1985 #ifdef CONFIG_NET_CLS_POLICE
1986 if (q->rx_class == cl)
1990 cbq_unlink_class(cl);
1991 cbq_adjust_levels(cl->tparent);
1993 cbq_sync_defmap(cl);
1996 sch_tree_unlock(sch);
1998 if (--cl->refcnt == 0)
1999 cbq_destroy_class(cl);
2004 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2006 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2007 struct cbq_class *cl = (struct cbq_class *)arg;
2012 return &cl->filter_list;
2015 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2018 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2019 struct cbq_class *p = (struct cbq_class*)parent;
2020 struct cbq_class *cl = cbq_class_lookup(q, classid);
2023 if (p && p->level <= cl->level)
2026 return (unsigned long)cl;
2031 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2033 struct cbq_class *cl = (struct cbq_class*)arg;
2038 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2040 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2046 for (h = 0; h < 16; h++) {
2047 struct cbq_class *cl;
2049 for (cl = q->classes[h]; cl; cl = cl->next) {
2050 if (arg->count < arg->skip) {
2054 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2063 static struct Qdisc_class_ops cbq_class_ops = {
2068 .change = cbq_change_class,
2069 .delete = cbq_delete,
2071 .tcf_chain = cbq_find_tcf,
2072 .bind_tcf = cbq_bind_filter,
2073 .unbind_tcf = cbq_unbind_filter,
2074 .dump = cbq_dump_class,
2077 static struct Qdisc_ops cbq_qdisc_ops = {
2079 .cl_ops = &cbq_class_ops,
2081 .priv_size = sizeof(struct cbq_sched_data),
2082 .enqueue = cbq_enqueue,
2083 .dequeue = cbq_dequeue,
2084 .requeue = cbq_requeue,
2088 .destroy = cbq_destroy,
2091 .owner = THIS_MODULE,
2094 static int __init cbq_module_init(void)
2096 return register_qdisc(&cbq_qdisc_ops);
2098 static void __exit cbq_module_exit(void)
2100 unregister_qdisc(&cbq_qdisc_ops);
2102 module_init(cbq_module_init)
2103 module_exit(cbq_module_exit)
2104 MODULE_LICENSE("GPL");