2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data;
93 struct cbq_class *next; /* hash table link */
94 struct cbq_class *next_alive; /* next class with backlog in this priority band */
98 unsigned char priority; /* class priority */
99 unsigned char priority2; /* priority to be used after overlimit */
100 unsigned char ewma_log; /* time constant for idle time calculation */
101 unsigned char ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police;
108 /* Link-sharing scheduler parameters */
109 long maxidle; /* Class parameters: see below. */
113 struct qdisc_rate_table *R_tab;
115 /* Overlimit strategy parameters */
116 void (*overlimit)(struct cbq_class *cl);
119 /* General scheduler (WRR) parameters */
121 long quantum; /* Allotment per WRR round */
122 long weight; /* Relative allotment: see below */
124 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
125 struct cbq_class *split; /* Ptr to split node */
126 struct cbq_class *share; /* Ptr to LS parent in the class tree */
127 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
128 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
130 struct cbq_class *sibling; /* Sibling chain */
131 struct cbq_class *children; /* Pointer to children chain */
133 struct Qdisc *q; /* Elementary queueing discipline */
137 unsigned char cpriority; /* Effective priority */
138 unsigned char delayed;
139 unsigned char level; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last; /* Last end of service */
145 psched_time_t undertime;
147 long deficit; /* Saved deficit for WRR */
148 unsigned long penalized;
149 struct gnet_stats_basic bstats;
150 struct gnet_stats_queue qstats;
151 struct gnet_stats_rate_est rate_est;
152 spinlock_t *stats_lock;
153 struct tc_cbq_xstats xstats;
155 struct tcf_proto *filter_list;
160 struct cbq_class *defaults[TC_PRIO_MAX+1];
163 struct cbq_sched_data
165 struct cbq_class *classes[16]; /* Hash table of all classes */
166 int nclasses[TC_CBQ_MAXPRIO+1];
167 unsigned quanta[TC_CBQ_MAXPRIO+1];
169 struct cbq_class link;
172 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
175 #ifdef CONFIG_NET_CLS_POLICE
176 struct cbq_class *rx_class;
178 struct cbq_class *tx_class;
179 struct cbq_class *tx_borrowed;
181 psched_time_t now; /* Cached timestamp */
182 psched_time_t now_rt; /* Cached real time */
185 struct timer_list delay_timer;
186 struct timer_list wd_timer; /* Watchdog timer,
196 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
199 static __inline__ unsigned cbq_hash(u32 h)
206 static __inline__ struct cbq_class *
207 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
209 struct cbq_class *cl;
211 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
212 if (cl->classid == classid)
217 #ifdef CONFIG_NET_CLS_POLICE
219 static struct cbq_class *
220 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
222 struct cbq_class *cl, *new;
224 for (cl = this->tparent; cl; cl = cl->tparent)
225 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
233 /* Classify packet. The procedure is pretty complicated, but
234 it allows us to combine link sharing and priority scheduling
237 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
238 so that it resolves to split nodes. Then packets are classified
239 by logical priority, or a more specific classifier may be attached
243 static struct cbq_class *
244 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
246 struct cbq_sched_data *q = qdisc_priv(sch);
247 struct cbq_class *head = &q->link;
248 struct cbq_class **defmap;
249 struct cbq_class *cl = NULL;
250 u32 prio = skb->priority;
251 struct tcf_result res;
254 * Step 1. If skb->priority points to one of our classes, use it.
256 if (TC_H_MAJ(prio^sch->handle) == 0 &&
257 (cl = cbq_class_lookup(q, prio)) != NULL)
262 #ifdef CONFIG_NET_CLS_ACT
265 defmap = head->defaults;
268 * Step 2+n. Apply classifier.
270 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
273 if ((cl = (void*)res.class) == NULL) {
274 if (TC_H_MAJ(res.classid))
275 cl = cbq_class_lookup(q, res.classid);
276 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
277 cl = defmap[TC_PRIO_BESTEFFORT];
279 if (cl == NULL || cl->level >= head->level)
283 #ifdef CONFIG_NET_CLS_ACT
285 case TC_ACT_SHOT: /* Stop and kfree */
286 *qres = NET_XMIT_DROP;
293 case TC_ACT_RECLASSIFY: /* Things look good */
305 #ifdef CONFIG_NET_CLS_POLICE
307 case TC_POLICE_RECLASSIFY:
308 return cbq_reclassify(skb, cl);
320 * Step 3+n. If classifier selected a link sharing class,
321 * apply agency specific classifier.
322 * Repeat this procdure until we hit a leaf node.
331 * Step 4. No success...
333 if (TC_H_MAJ(prio) == 0 &&
334 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
335 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
342 A packet has just been enqueued on the empty class.
343 cbq_activate_class adds it to the tail of active class list
344 of its priority band.
347 static __inline__ void cbq_activate_class(struct cbq_class *cl)
349 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
350 int prio = cl->cpriority;
351 struct cbq_class *cl_tail;
353 cl_tail = q->active[prio];
354 q->active[prio] = cl;
356 if (cl_tail != NULL) {
357 cl->next_alive = cl_tail->next_alive;
358 cl_tail->next_alive = cl;
361 q->activemask |= (1<<prio);
366 Unlink class from active chain.
367 Note that this same procedure is done directly in cbq_dequeue*
368 during round-robin procedure.
371 static void cbq_deactivate_class(struct cbq_class *this)
373 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
374 int prio = this->cpriority;
375 struct cbq_class *cl;
376 struct cbq_class *cl_prev = q->active[prio];
379 cl = cl_prev->next_alive;
381 cl_prev->next_alive = cl->next_alive;
382 cl->next_alive = NULL;
384 if (cl == q->active[prio]) {
385 q->active[prio] = cl_prev;
386 if (cl == q->active[prio]) {
387 q->active[prio] = NULL;
388 q->activemask &= ~(1<<prio);
393 cl = cl_prev->next_alive;
396 } while ((cl_prev = cl) != q->active[prio]);
400 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
402 int toplevel = q->toplevel;
404 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
408 PSCHED_GET_TIME(now);
409 incr = PSCHED_TDIFF(now, q->now_rt);
410 PSCHED_TADD2(q->now, incr, now);
413 if (PSCHED_TLESS(cl->undertime, now)) {
414 q->toplevel = cl->level;
417 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
422 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
424 struct cbq_sched_data *q = qdisc_priv(sch);
426 int ret = NET_XMIT_SUCCESS;
427 struct cbq_class *cl = cbq_classify(skb, sch,&ret);
429 #ifdef CONFIG_NET_CLS_POLICE
433 #ifdef CONFIG_NET_CLS_POLICE
434 cl->q->__parent = sch;
436 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
438 sch->bstats.packets++;
439 sch->bstats.bytes+=len;
440 cbq_mark_toplevel(q, cl);
442 cbq_activate_class(cl);
447 #ifndef CONFIG_NET_CLS_ACT
452 cbq_mark_toplevel(q, cl);
456 if ( NET_XMIT_DROP == ret) {
461 cbq_mark_toplevel(q, cl);
469 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
471 struct cbq_sched_data *q = qdisc_priv(sch);
472 struct cbq_class *cl;
475 if ((cl = q->tx_class) == NULL) {
482 cbq_mark_toplevel(q, cl);
484 #ifdef CONFIG_NET_CLS_POLICE
486 cl->q->__parent = sch;
488 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
490 sch->qstats.requeues++;
492 cbq_activate_class(cl);
500 /* Overlimit actions */
502 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
504 static void cbq_ovl_classic(struct cbq_class *cl)
506 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
507 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
510 delay += cl->offtime;
513 Class goes to sleep, so that it will have no
514 chance to work avgidle. Let's forgive it 8)
516 BTW cbq-2.0 has a crap in this
517 place, apparently they forgot to shift it by cl->ewma_log.
520 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
521 if (cl->avgidle < cl->minidle)
522 cl->avgidle = cl->minidle;
525 PSCHED_TADD2(q->now, delay, cl->undertime);
527 cl->xstats.overactions++;
530 if (q->wd_expires == 0 || q->wd_expires > delay)
531 q->wd_expires = delay;
533 /* Dirty work! We must schedule wakeups based on
534 real available rate, rather than leaf rate,
535 which may be tiny (even zero).
537 if (q->toplevel == TC_CBQ_MAXLEVEL) {
539 psched_tdiff_t base_delay = q->wd_expires;
541 for (b = cl->borrow; b; b = b->borrow) {
542 delay = PSCHED_TDIFF(b->undertime, q->now);
543 if (delay < base_delay) {
550 q->wd_expires = base_delay;
554 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
558 static void cbq_ovl_rclassic(struct cbq_class *cl)
560 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
561 struct cbq_class *this = cl;
564 if (cl->level > q->toplevel) {
568 } while ((cl = cl->borrow) != NULL);
575 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
577 static void cbq_ovl_delay(struct cbq_class *cl)
579 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
580 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
583 unsigned long sched = jiffies;
585 delay += cl->offtime;
587 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
588 if (cl->avgidle < cl->minidle)
589 cl->avgidle = cl->minidle;
590 PSCHED_TADD2(q->now, delay, cl->undertime);
593 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
594 cl->penalized = sched;
595 cl->cpriority = TC_CBQ_MAXPRIO;
596 q->pmask |= (1<<TC_CBQ_MAXPRIO);
597 if (del_timer(&q->delay_timer) &&
598 (long)(q->delay_timer.expires - sched) > 0)
599 q->delay_timer.expires = sched;
600 add_timer(&q->delay_timer);
602 cl->xstats.overactions++;
607 if (q->wd_expires == 0 || q->wd_expires > delay)
608 q->wd_expires = delay;
611 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
613 static void cbq_ovl_lowprio(struct cbq_class *cl)
615 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
617 cl->penalized = jiffies + cl->penalty;
619 if (cl->cpriority != cl->priority2) {
620 cl->cpriority = cl->priority2;
621 q->pmask |= (1<<cl->cpriority);
622 cl->xstats.overactions++;
627 /* TC_CBQ_OVL_DROP: penalize class by dropping */
629 static void cbq_ovl_drop(struct cbq_class *cl)
631 if (cl->q->ops->drop)
632 if (cl->q->ops->drop(cl->q))
634 cl->xstats.overactions++;
638 static void cbq_watchdog(unsigned long arg)
640 struct Qdisc *sch = (struct Qdisc*)arg;
642 sch->flags &= ~TCQ_F_THROTTLED;
643 netif_schedule(sch->dev);
646 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
648 struct cbq_class *cl;
649 struct cbq_class *cl_prev = q->active[prio];
650 unsigned long now = jiffies;
651 unsigned long sched = now;
657 cl = cl_prev->next_alive;
658 if ((long)(now - cl->penalized) > 0) {
659 cl_prev->next_alive = cl->next_alive;
660 cl->next_alive = NULL;
661 cl->cpriority = cl->priority;
663 cbq_activate_class(cl);
665 if (cl == q->active[prio]) {
666 q->active[prio] = cl_prev;
667 if (cl == q->active[prio]) {
668 q->active[prio] = NULL;
673 cl = cl_prev->next_alive;
674 } else if ((long)(sched - cl->penalized) > 0)
675 sched = cl->penalized;
676 } while ((cl_prev = cl) != q->active[prio]);
678 return (long)(sched - now);
681 static void cbq_undelay(unsigned long arg)
683 struct Qdisc *sch = (struct Qdisc*)arg;
684 struct cbq_sched_data *q = qdisc_priv(sch);
692 int prio = ffz(~pmask);
697 tmp = cbq_undelay_prio(q, prio);
700 if (tmp < delay || delay == 0)
706 q->delay_timer.expires = jiffies + delay;
707 add_timer(&q->delay_timer);
710 sch->flags &= ~TCQ_F_THROTTLED;
711 netif_schedule(sch->dev);
715 #ifdef CONFIG_NET_CLS_POLICE
717 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
720 struct Qdisc *sch = child->__parent;
721 struct cbq_sched_data *q = qdisc_priv(sch);
722 struct cbq_class *cl = q->rx_class;
726 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
728 cbq_mark_toplevel(q, cl);
731 cl->q->__parent = sch;
733 if (cl->q->enqueue(skb, cl->q) == 0) {
735 sch->bstats.packets++;
736 sch->bstats.bytes+=len;
738 cbq_activate_class(cl);
751 It is mission critical procedure.
753 We "regenerate" toplevel cutoff, if transmitting class
754 has backlog and it is not regulated. It is not part of
755 original CBQ description, but looks more reasonable.
756 Probably, it is wrong. This question needs further investigation.
759 static __inline__ void
760 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
761 struct cbq_class *borrowed)
763 if (cl && q->toplevel >= borrowed->level) {
764 if (cl->q->q.qlen > 1) {
766 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
767 q->toplevel = borrowed->level;
770 } while ((borrowed=borrowed->borrow) != NULL);
773 /* It is not necessary now. Uncommenting it
774 will save CPU cycles, but decrease fairness.
776 q->toplevel = TC_CBQ_MAXLEVEL;
782 cbq_update(struct cbq_sched_data *q)
784 struct cbq_class *this = q->tx_class;
785 struct cbq_class *cl = this;
790 for ( ; cl; cl = cl->share) {
791 long avgidle = cl->avgidle;
794 cl->bstats.packets++;
795 cl->bstats.bytes += len;
798 (now - last) is total time between packet right edges.
799 (last_pktlen/rate) is "virtual" busy time, so that
801 idle = (now - last) - last_pktlen/rate
804 idle = PSCHED_TDIFF(q->now, cl->last);
805 if ((unsigned long)idle > 128*1024*1024) {
806 avgidle = cl->maxidle;
808 idle -= L2T(cl, len);
810 /* true_avgidle := (1-W)*true_avgidle + W*idle,
811 where W=2^{-ewma_log}. But cl->avgidle is scaled:
812 cl->avgidle == true_avgidle/W,
815 avgidle += idle - (avgidle>>cl->ewma_log);
819 /* Overlimit or at-limit */
821 if (avgidle < cl->minidle)
822 avgidle = cl->minidle;
824 cl->avgidle = avgidle;
826 /* Calculate expected time, when this class
827 will be allowed to send.
829 (1-W)*true_avgidle + W*delay = 0, i.e.
830 idle = (1/W - 1)*(-true_avgidle)
832 idle = (1 - W)*(-cl->avgidle);
834 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
838 To maintain the rate allocated to the class,
839 we add to undertime virtual clock,
840 necessary to complete transmitted packet.
841 (len/phys_bandwidth has been already passed
842 to the moment of cbq_update)
845 idle -= L2T(&q->link, len);
846 idle += L2T(cl, len);
848 PSCHED_AUDIT_TDIFF(idle);
850 PSCHED_TADD2(q->now, idle, cl->undertime);
854 PSCHED_SET_PASTPERFECT(cl->undertime);
855 if (avgidle > cl->maxidle)
856 cl->avgidle = cl->maxidle;
858 cl->avgidle = avgidle;
863 cbq_update_toplevel(q, this, q->tx_borrowed);
866 static __inline__ struct cbq_class *
867 cbq_under_limit(struct cbq_class *cl)
869 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
870 struct cbq_class *this_cl = cl;
872 if (cl->tparent == NULL)
875 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
876 !PSCHED_TLESS(q->now, cl->undertime)) {
882 /* It is very suspicious place. Now overlimit
883 action is generated for not bounded classes
884 only if link is completely congested.
885 Though it is in agree with ancestor-only paradigm,
886 it looks very stupid. Particularly,
887 it means that this chunk of code will either
888 never be called or result in strong amplification
889 of burstiness. Dangerous, silly, and, however,
890 no another solution exists.
892 if ((cl = cl->borrow) == NULL) {
893 this_cl->qstats.overlimits++;
894 this_cl->overlimit(this_cl);
897 if (cl->level > q->toplevel)
899 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
900 PSCHED_TLESS(q->now, cl->undertime));
906 static __inline__ struct sk_buff *
907 cbq_dequeue_prio(struct Qdisc *sch, int prio)
909 struct cbq_sched_data *q = qdisc_priv(sch);
910 struct cbq_class *cl_tail, *cl_prev, *cl;
914 cl_tail = cl_prev = q->active[prio];
915 cl = cl_prev->next_alive;
922 struct cbq_class *borrow = cl;
925 (borrow = cbq_under_limit(cl)) == NULL)
928 if (cl->deficit <= 0) {
929 /* Class exhausted its allotment per
930 this round. Switch to the next one.
933 cl->deficit += cl->quantum;
937 skb = cl->q->dequeue(cl->q);
939 /* Class did not give us any skb :-(
940 It could occur even if cl->q->q.qlen != 0
941 f.e. if cl->q == "tbf"
946 cl->deficit -= skb->len;
948 q->tx_borrowed = borrow;
950 #ifndef CBQ_XSTATS_BORROWS_BYTES
951 borrow->xstats.borrows++;
952 cl->xstats.borrows++;
954 borrow->xstats.borrows += skb->len;
955 cl->xstats.borrows += skb->len;
958 q->tx_len = skb->len;
960 if (cl->deficit <= 0) {
961 q->active[prio] = cl;
963 cl->deficit += cl->quantum;
968 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
969 /* Class is empty or penalized.
970 Unlink it from active chain.
972 cl_prev->next_alive = cl->next_alive;
973 cl->next_alive = NULL;
975 /* Did cl_tail point to it? */
980 /* Was it the last class in this band? */
983 q->active[prio] = NULL;
984 q->activemask &= ~(1<<prio);
986 cbq_activate_class(cl);
990 q->active[prio] = cl_tail;
993 cbq_activate_class(cl);
1000 cl = cl->next_alive;
1001 } while (cl_prev != cl_tail);
1004 q->active[prio] = cl_prev;
1009 static __inline__ struct sk_buff *
1010 cbq_dequeue_1(struct Qdisc *sch)
1012 struct cbq_sched_data *q = qdisc_priv(sch);
1013 struct sk_buff *skb;
1014 unsigned activemask;
1016 activemask = q->activemask&0xFF;
1017 while (activemask) {
1018 int prio = ffz(~activemask);
1019 activemask &= ~(1<<prio);
1020 skb = cbq_dequeue_prio(sch, prio);
1027 static struct sk_buff *
1028 cbq_dequeue(struct Qdisc *sch)
1030 struct sk_buff *skb;
1031 struct cbq_sched_data *q = qdisc_priv(sch);
1033 psched_tdiff_t incr;
1035 PSCHED_GET_TIME(now);
1036 incr = PSCHED_TDIFF(now, q->now_rt);
1039 psched_tdiff_t incr2;
1040 /* Time integrator. We calculate EOS time
1041 by adding expected packet transmission time.
1042 If real time is greater, we warp artificial clock,
1045 cbq_time = max(real_time, work);
1047 incr2 = L2T(&q->link, q->tx_len);
1048 PSCHED_TADD(q->now, incr2);
1050 if ((incr -= incr2) < 0)
1053 PSCHED_TADD(q->now, incr);
1059 skb = cbq_dequeue_1(sch);
1062 sch->flags &= ~TCQ_F_THROTTLED;
1066 /* All the classes are overlimit.
1070 1. Scheduler is empty.
1071 2. Toplevel cutoff inhibited borrowing.
1072 3. Root class is overlimit.
1074 Reset 2d and 3d conditions and retry.
1076 Note, that NS and cbq-2.0 are buggy, peeking
1077 an arbitrary class is appropriate for ancestor-only
1078 sharing, but not for toplevel algorithm.
1080 Our version is better, but slower, because it requires
1081 two passes, but it is unavoidable with top-level sharing.
1084 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1085 PSCHED_IS_PASTPERFECT(q->link.undertime))
1088 q->toplevel = TC_CBQ_MAXLEVEL;
1089 PSCHED_SET_PASTPERFECT(q->link.undertime);
1092 /* No packets in scheduler or nobody wants to give them to us :-(
1093 Sigh... start watchdog timer in the last case. */
1096 sch->qstats.overlimits++;
1097 if (q->wd_expires) {
1098 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1101 mod_timer(&q->wd_timer, jiffies + delay);
1102 sch->flags |= TCQ_F_THROTTLED;
1108 /* CBQ class maintanance routines */
1110 static void cbq_adjust_levels(struct cbq_class *this)
1117 struct cbq_class *cl;
1119 if ((cl = this->children) != NULL) {
1121 if (cl->level > level)
1123 } while ((cl = cl->sibling) != this->children);
1125 this->level = level+1;
1126 } while ((this = this->tparent) != NULL);
1129 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1131 struct cbq_class *cl;
1134 if (q->quanta[prio] == 0)
1137 for (h=0; h<16; h++) {
1138 for (cl = q->classes[h]; cl; cl = cl->next) {
1139 /* BUGGGG... Beware! This expression suffer of
1140 arithmetic overflows!
1142 if (cl->priority == prio) {
1143 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1146 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1147 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1148 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1154 static void cbq_sync_defmap(struct cbq_class *cl)
1156 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1157 struct cbq_class *split = cl->split;
1164 for (i=0; i<=TC_PRIO_MAX; i++) {
1165 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1166 split->defaults[i] = NULL;
1169 for (i=0; i<=TC_PRIO_MAX; i++) {
1170 int level = split->level;
1172 if (split->defaults[i])
1175 for (h=0; h<16; h++) {
1176 struct cbq_class *c;
1178 for (c = q->classes[h]; c; c = c->next) {
1179 if (c->split == split && c->level < level &&
1181 split->defaults[i] = c;
1189 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1191 struct cbq_class *split = NULL;
1194 if ((split = cl->split) == NULL)
1196 splitid = split->classid;
1199 if (split == NULL || split->classid != splitid) {
1200 for (split = cl->tparent; split; split = split->tparent)
1201 if (split->classid == splitid)
1208 if (cl->split != split) {
1210 cbq_sync_defmap(cl);
1212 cl->defmap = def&mask;
1214 cl->defmap = (cl->defmap&~mask)|(def&mask);
1216 cbq_sync_defmap(cl);
1219 static void cbq_unlink_class(struct cbq_class *this)
1221 struct cbq_class *cl, **clp;
1222 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1224 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1232 if (this->tparent) {
1241 } while ((cl = *clp) != this->sibling);
1243 if (this->tparent->children == this) {
1244 this->tparent->children = this->sibling;
1245 if (this->sibling == this)
1246 this->tparent->children = NULL;
1249 BUG_TRAP(this->sibling == this);
1253 static void cbq_link_class(struct cbq_class *this)
1255 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1256 unsigned h = cbq_hash(this->classid);
1257 struct cbq_class *parent = this->tparent;
1259 this->sibling = this;
1260 this->next = q->classes[h];
1261 q->classes[h] = this;
1266 if (parent->children == NULL) {
1267 parent->children = this;
1269 this->sibling = parent->children->sibling;
1270 parent->children->sibling = this;
1274 static unsigned int cbq_drop(struct Qdisc* sch)
1276 struct cbq_sched_data *q = qdisc_priv(sch);
1277 struct cbq_class *cl, *cl_head;
1281 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1282 if ((cl_head = q->active[prio]) == NULL)
1287 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1291 } while ((cl = cl->next_alive) != cl_head);
1297 cbq_reset(struct Qdisc* sch)
1299 struct cbq_sched_data *q = qdisc_priv(sch);
1300 struct cbq_class *cl;
1307 q->tx_borrowed = NULL;
1308 del_timer(&q->wd_timer);
1309 del_timer(&q->delay_timer);
1310 q->toplevel = TC_CBQ_MAXLEVEL;
1311 PSCHED_GET_TIME(q->now);
1314 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1315 q->active[prio] = NULL;
1317 for (h = 0; h < 16; h++) {
1318 for (cl = q->classes[h]; cl; cl = cl->next) {
1321 cl->next_alive = NULL;
1322 PSCHED_SET_PASTPERFECT(cl->undertime);
1323 cl->avgidle = cl->maxidle;
1324 cl->deficit = cl->quantum;
1325 cl->cpriority = cl->priority;
1332 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1334 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1335 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1336 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1338 if (lss->change&TCF_CBQ_LSS_EWMA)
1339 cl->ewma_log = lss->ewma_log;
1340 if (lss->change&TCF_CBQ_LSS_AVPKT)
1341 cl->avpkt = lss->avpkt;
1342 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1343 cl->minidle = -(long)lss->minidle;
1344 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1345 cl->maxidle = lss->maxidle;
1346 cl->avgidle = lss->maxidle;
1348 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1349 cl->offtime = lss->offtime;
1353 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1355 q->nclasses[cl->priority]--;
1356 q->quanta[cl->priority] -= cl->weight;
1357 cbq_normalize_quanta(q, cl->priority);
1360 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1362 q->nclasses[cl->priority]++;
1363 q->quanta[cl->priority] += cl->weight;
1364 cbq_normalize_quanta(q, cl->priority);
1367 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1369 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1372 cl->allot = wrr->allot;
1374 cl->weight = wrr->weight;
1375 if (wrr->priority) {
1376 cl->priority = wrr->priority-1;
1377 cl->cpriority = cl->priority;
1378 if (cl->priority >= cl->priority2)
1379 cl->priority2 = TC_CBQ_MAXPRIO-1;
1386 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1388 switch (ovl->strategy) {
1389 case TC_CBQ_OVL_CLASSIC:
1390 cl->overlimit = cbq_ovl_classic;
1392 case TC_CBQ_OVL_DELAY:
1393 cl->overlimit = cbq_ovl_delay;
1395 case TC_CBQ_OVL_LOWPRIO:
1396 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1397 ovl->priority2-1 <= cl->priority)
1399 cl->priority2 = ovl->priority2-1;
1400 cl->overlimit = cbq_ovl_lowprio;
1402 case TC_CBQ_OVL_DROP:
1403 cl->overlimit = cbq_ovl_drop;
1405 case TC_CBQ_OVL_RCLASSIC:
1406 cl->overlimit = cbq_ovl_rclassic;
1411 cl->penalty = (ovl->penalty*HZ)/1000;
1415 #ifdef CONFIG_NET_CLS_POLICE
1416 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1418 cl->police = p->police;
1420 if (cl->q->handle) {
1421 if (p->police == TC_POLICE_RECLASSIFY)
1422 cl->q->reshape_fail = cbq_reshape_fail;
1424 cl->q->reshape_fail = NULL;
1430 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1432 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1436 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1438 struct cbq_sched_data *q = qdisc_priv(sch);
1439 struct rtattr *tb[TCA_CBQ_MAX];
1440 struct tc_ratespec *r;
1442 if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1443 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1444 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1447 if (tb[TCA_CBQ_LSSOPT-1] &&
1448 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1451 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1453 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1457 q->link.sibling = &q->link;
1458 q->link.classid = sch->handle;
1459 q->link.qdisc = sch;
1460 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1461 q->link.q = &noop_qdisc;
1463 q->link.priority = TC_CBQ_MAXPRIO-1;
1464 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1465 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1466 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1467 q->link.overlimit = cbq_ovl_classic;
1468 q->link.allot = psched_mtu(sch->dev);
1469 q->link.quantum = q->link.allot;
1470 q->link.weight = q->link.R_tab->rate.rate;
1472 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1473 q->link.avpkt = q->link.allot/2;
1474 q->link.minidle = -0x7FFFFFFF;
1475 q->link.stats_lock = &sch->dev->queue_lock;
1477 init_timer(&q->wd_timer);
1478 q->wd_timer.data = (unsigned long)sch;
1479 q->wd_timer.function = cbq_watchdog;
1480 init_timer(&q->delay_timer);
1481 q->delay_timer.data = (unsigned long)sch;
1482 q->delay_timer.function = cbq_undelay;
1483 q->toplevel = TC_CBQ_MAXLEVEL;
1484 PSCHED_GET_TIME(q->now);
1487 cbq_link_class(&q->link);
1489 if (tb[TCA_CBQ_LSSOPT-1])
1490 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1492 cbq_addprio(q, &q->link);
1496 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1498 unsigned char *b = skb->tail;
1500 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1504 skb_trim(skb, b - skb->data);
1508 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1510 unsigned char *b = skb->tail;
1511 struct tc_cbq_lssopt opt;
1514 if (cl->borrow == NULL)
1515 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1516 if (cl->share == NULL)
1517 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1518 opt.ewma_log = cl->ewma_log;
1519 opt.level = cl->level;
1520 opt.avpkt = cl->avpkt;
1521 opt.maxidle = cl->maxidle;
1522 opt.minidle = (u32)(-cl->minidle);
1523 opt.offtime = cl->offtime;
1525 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1529 skb_trim(skb, b - skb->data);
1533 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1535 unsigned char *b = skb->tail;
1536 struct tc_cbq_wrropt opt;
1539 opt.allot = cl->allot;
1540 opt.priority = cl->priority+1;
1541 opt.cpriority = cl->cpriority+1;
1542 opt.weight = cl->weight;
1543 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1547 skb_trim(skb, b - skb->data);
1551 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1553 unsigned char *b = skb->tail;
1554 struct tc_cbq_ovl opt;
1556 opt.strategy = cl->ovl_strategy;
1557 opt.priority2 = cl->priority2+1;
1558 opt.penalty = (cl->penalty*1000)/HZ;
1559 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1563 skb_trim(skb, b - skb->data);
1567 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1569 unsigned char *b = skb->tail;
1570 struct tc_cbq_fopt opt;
1572 if (cl->split || cl->defmap) {
1573 opt.split = cl->split ? cl->split->classid : 0;
1574 opt.defmap = cl->defmap;
1576 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1581 skb_trim(skb, b - skb->data);
1585 #ifdef CONFIG_NET_CLS_POLICE
1586 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1588 unsigned char *b = skb->tail;
1589 struct tc_cbq_police opt;
1592 opt.police = cl->police;
1593 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1598 skb_trim(skb, b - skb->data);
1603 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1605 if (cbq_dump_lss(skb, cl) < 0 ||
1606 cbq_dump_rate(skb, cl) < 0 ||
1607 cbq_dump_wrr(skb, cl) < 0 ||
1608 cbq_dump_ovl(skb, cl) < 0 ||
1609 #ifdef CONFIG_NET_CLS_POLICE
1610 cbq_dump_police(skb, cl) < 0 ||
1612 cbq_dump_fopt(skb, cl) < 0)
1617 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1619 struct cbq_sched_data *q = qdisc_priv(sch);
1620 unsigned char *b = skb->tail;
1623 rta = (struct rtattr*)b;
1624 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1625 if (cbq_dump_attr(skb, &q->link) < 0)
1626 goto rtattr_failure;
1627 rta->rta_len = skb->tail - b;
1631 skb_trim(skb, b - skb->data);
1636 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1638 struct cbq_sched_data *q = qdisc_priv(sch);
1640 q->link.xstats.avgidle = q->link.avgidle;
1641 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1645 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1646 struct sk_buff *skb, struct tcmsg *tcm)
1648 struct cbq_class *cl = (struct cbq_class*)arg;
1649 unsigned char *b = skb->tail;
1653 tcm->tcm_parent = cl->tparent->classid;
1655 tcm->tcm_parent = TC_H_ROOT;
1656 tcm->tcm_handle = cl->classid;
1657 tcm->tcm_info = cl->q->handle;
1659 rta = (struct rtattr*)b;
1660 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1661 if (cbq_dump_attr(skb, cl) < 0)
1662 goto rtattr_failure;
1663 rta->rta_len = skb->tail - b;
1667 skb_trim(skb, b - skb->data);
1672 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1673 struct gnet_dump *d)
1675 struct cbq_sched_data *q = qdisc_priv(sch);
1676 struct cbq_class *cl = (struct cbq_class*)arg;
1678 cl->qstats.qlen = cl->q->q.qlen;
1679 cl->xstats.avgidle = cl->avgidle;
1680 cl->xstats.undertime = 0;
1682 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1683 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1685 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1686 #ifdef CONFIG_NET_ESTIMATOR
1687 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1689 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1692 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1695 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1698 struct cbq_class *cl = (struct cbq_class*)arg;
1702 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1705 #ifdef CONFIG_NET_CLS_POLICE
1706 if (cl->police == TC_POLICE_RECLASSIFY)
1707 new->reshape_fail = cbq_reshape_fail;
1713 sch->q.qlen -= (*old)->q.qlen;
1715 sch_tree_unlock(sch);
1722 static struct Qdisc *
1723 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1725 struct cbq_class *cl = (struct cbq_class*)arg;
1727 return cl ? cl->q : NULL;
1730 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1732 struct cbq_sched_data *q = qdisc_priv(sch);
1733 struct cbq_class *cl = cbq_class_lookup(q, classid);
1737 return (unsigned long)cl;
1742 static void cbq_destroy_filters(struct cbq_class *cl)
1744 struct tcf_proto *tp;
1746 while ((tp = cl->filter_list) != NULL) {
1747 cl->filter_list = tp->next;
1752 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1756 BUG_TRAP(!cl->filters);
1758 cbq_destroy_filters(cl);
1759 qdisc_destroy(cl->q);
1760 qdisc_put_rtab(cl->R_tab);
1761 #ifdef CONFIG_NET_ESTIMATOR
1762 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1769 cbq_destroy(struct Qdisc* sch)
1771 struct cbq_sched_data *q = qdisc_priv(sch);
1772 struct cbq_class *cl;
1775 #ifdef CONFIG_NET_CLS_POLICE
1779 * Filters must be destroyed first because we don't destroy the
1780 * classes from root to leafs which means that filters can still
1781 * be bound to classes which have been destroyed already. --TGR '04
1783 for (h = 0; h < 16; h++)
1784 for (cl = q->classes[h]; cl; cl = cl->next)
1785 cbq_destroy_filters(cl);
1787 for (h = 0; h < 16; h++) {
1788 struct cbq_class *next;
1790 for (cl = q->classes[h]; cl; cl = next) {
1792 cbq_destroy_class(sch, cl);
1797 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1799 struct cbq_class *cl = (struct cbq_class*)arg;
1801 if (--cl->refcnt == 0) {
1802 #ifdef CONFIG_NET_CLS_POLICE
1803 struct cbq_sched_data *q = qdisc_priv(sch);
1805 spin_lock_bh(&sch->dev->queue_lock);
1806 if (q->rx_class == cl)
1808 spin_unlock_bh(&sch->dev->queue_lock);
1811 cbq_destroy_class(sch, cl);
1816 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1820 struct cbq_sched_data *q = qdisc_priv(sch);
1821 struct cbq_class *cl = (struct cbq_class*)*arg;
1822 struct rtattr *opt = tca[TCA_OPTIONS-1];
1823 struct rtattr *tb[TCA_CBQ_MAX];
1824 struct cbq_class *parent;
1825 struct qdisc_rate_table *rtab = NULL;
1828 rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1831 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1832 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1835 if (tb[TCA_CBQ_FOPT-1] &&
1836 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1839 if (tb[TCA_CBQ_RATE-1] &&
1840 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1843 if (tb[TCA_CBQ_LSSOPT-1] &&
1844 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1847 if (tb[TCA_CBQ_WRROPT-1] &&
1848 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1851 #ifdef CONFIG_NET_CLS_POLICE
1852 if (tb[TCA_CBQ_POLICE-1] &&
1853 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1860 if (cl->tparent && cl->tparent->classid != parentid)
1862 if (!cl->tparent && parentid != TC_H_ROOT)
1866 if (tb[TCA_CBQ_RATE-1]) {
1867 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1872 /* Change class parameters */
1875 if (cl->next_alive != NULL)
1876 cbq_deactivate_class(cl);
1879 rtab = xchg(&cl->R_tab, rtab);
1880 qdisc_put_rtab(rtab);
1883 if (tb[TCA_CBQ_LSSOPT-1])
1884 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1886 if (tb[TCA_CBQ_WRROPT-1]) {
1888 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1891 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1892 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1894 #ifdef CONFIG_NET_CLS_POLICE
1895 if (tb[TCA_CBQ_POLICE-1])
1896 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1899 if (tb[TCA_CBQ_FOPT-1])
1900 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1903 cbq_activate_class(cl);
1905 sch_tree_unlock(sch);
1907 #ifdef CONFIG_NET_ESTIMATOR
1908 if (tca[TCA_RATE-1])
1909 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1910 cl->stats_lock, tca[TCA_RATE-1]);
1915 if (parentid == TC_H_ROOT)
1918 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1919 tb[TCA_CBQ_LSSOPT-1] == NULL)
1922 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1928 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1932 classid = TC_H_MAKE(sch->handle,0x8000);
1934 for (i=0; i<0x8000; i++) {
1935 if (++q->hgenerator >= 0x8000)
1937 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1943 classid = classid|q->hgenerator;
1948 parent = cbq_class_lookup(q, parentid);
1955 cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1958 memset(cl, 0, sizeof(*cl));
1962 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1963 cl->q = &noop_qdisc;
1964 cl->classid = classid;
1965 cl->tparent = parent;
1967 cl->allot = parent->allot;
1968 cl->quantum = cl->allot;
1969 cl->weight = cl->R_tab->rate.rate;
1970 cl->stats_lock = &sch->dev->queue_lock;
1974 cl->borrow = cl->tparent;
1975 if (cl->tparent != &q->link)
1976 cl->share = cl->tparent;
1977 cbq_adjust_levels(parent);
1978 cl->minidle = -0x7FFFFFFF;
1979 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1980 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1981 if (cl->ewma_log==0)
1982 cl->ewma_log = q->link.ewma_log;
1984 cl->maxidle = q->link.maxidle;
1986 cl->avpkt = q->link.avpkt;
1987 cl->overlimit = cbq_ovl_classic;
1988 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1989 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1990 #ifdef CONFIG_NET_CLS_POLICE
1991 if (tb[TCA_CBQ_POLICE-1])
1992 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1994 if (tb[TCA_CBQ_FOPT-1])
1995 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1996 sch_tree_unlock(sch);
1998 #ifdef CONFIG_NET_ESTIMATOR
1999 if (tca[TCA_RATE-1])
2000 gen_new_estimator(&cl->bstats, &cl->rate_est,
2001 cl->stats_lock, tca[TCA_RATE-1]);
2004 *arg = (unsigned long)cl;
2008 qdisc_put_rtab(rtab);
2012 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
2014 struct cbq_sched_data *q = qdisc_priv(sch);
2015 struct cbq_class *cl = (struct cbq_class*)arg;
2017 if (cl->filters || cl->children || cl == &q->link)
2023 cbq_deactivate_class(cl);
2025 if (q->tx_borrowed == cl)
2026 q->tx_borrowed = q->tx_class;
2027 if (q->tx_class == cl) {
2029 q->tx_borrowed = NULL;
2031 #ifdef CONFIG_NET_CLS_POLICE
2032 if (q->rx_class == cl)
2036 cbq_unlink_class(cl);
2037 cbq_adjust_levels(cl->tparent);
2039 cbq_sync_defmap(cl);
2042 sch_tree_unlock(sch);
2044 if (--cl->refcnt == 0)
2045 cbq_destroy_class(sch, cl);
2050 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2052 struct cbq_sched_data *q = qdisc_priv(sch);
2053 struct cbq_class *cl = (struct cbq_class *)arg;
2058 return &cl->filter_list;
2061 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2064 struct cbq_sched_data *q = qdisc_priv(sch);
2065 struct cbq_class *p = (struct cbq_class*)parent;
2066 struct cbq_class *cl = cbq_class_lookup(q, classid);
2069 if (p && p->level <= cl->level)
2072 return (unsigned long)cl;
2077 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2079 struct cbq_class *cl = (struct cbq_class*)arg;
2084 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2086 struct cbq_sched_data *q = qdisc_priv(sch);
2092 for (h = 0; h < 16; h++) {
2093 struct cbq_class *cl;
2095 for (cl = q->classes[h]; cl; cl = cl->next) {
2096 if (arg->count < arg->skip) {
2100 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2109 static struct Qdisc_class_ops cbq_class_ops = {
2114 .change = cbq_change_class,
2115 .delete = cbq_delete,
2117 .tcf_chain = cbq_find_tcf,
2118 .bind_tcf = cbq_bind_filter,
2119 .unbind_tcf = cbq_unbind_filter,
2120 .dump = cbq_dump_class,
2121 .dump_stats = cbq_dump_class_stats,
2124 static struct Qdisc_ops cbq_qdisc_ops = {
2126 .cl_ops = &cbq_class_ops,
2128 .priv_size = sizeof(struct cbq_sched_data),
2129 .enqueue = cbq_enqueue,
2130 .dequeue = cbq_dequeue,
2131 .requeue = cbq_requeue,
2135 .destroy = cbq_destroy,
2138 .dump_stats = cbq_dump_stats,
2139 .owner = THIS_MODULE,
2142 static int __init cbq_module_init(void)
2144 return register_qdisc(&cbq_qdisc_ops);
2146 static void __exit cbq_module_exit(void)
2148 unregister_qdisc(&cbq_qdisc_ops);
2150 module_init(cbq_module_init)
2151 module_exit(cbq_module_exit)
2152 MODULE_LICENSE("GPL");