2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
37 #include <net/pkt_sched.h>
40 /* Class-Based Queueing (CBQ) algorithm.
41 =======================================
43 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44 Management Models for Packet Networks",
45 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
47 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
49 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
52 [4] Sally Floyd and Michael Speer, "Experimental Results
53 for Class-Based Queueing", 1998, not published.
55 -----------------------------------------------------------------------
57 Algorithm skeleton was taken from NS simulator cbq.cc.
58 If someone wants to check this code against the LBL version,
59 he should take into account that ONLY the skeleton was borrowed,
60 the implementation is different. Particularly:
62 --- The WRR algorithm is different. Our version looks more
63 reasonable (I hope) and works when quanta are allowed to be
64 less than MTU, which is always the case when real time classes
65 have small rates. Note, that the statement of [3] is
66 incomplete, delay may actually be estimated even if class
67 per-round allotment is less than MTU. Namely, if per-round
68 allotment is W*r_i, and r_1+...+r_k = r < 1
70 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
72 In the worst case we have IntServ estimate with D = W*r+k*MTU
73 and C = MTU*r. The proof (if correct at all) is trivial.
76 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77 interpret some places, which look like wrong translations
78 from NS. Anyone is advised to find these differences
79 and explain to me, why I am wrong 8).
81 --- Linux has no EOI event, so that we cannot estimate true class
82 idle time. Workaround is to consider the next dequeue event
83 as sign that previous packet is finished. This is wrong because of
84 internal device queueing, but on a permanently loaded link it is true.
85 Moreover, combined with clock integrator, this scheme looks
86 very close to an ideal solution. */
88 struct cbq_sched_data;
93 struct cbq_class *next; /* hash table link */
94 struct cbq_class *next_alive; /* next class with backlog in this priority band */
98 unsigned char priority; /* class priority */
99 unsigned char priority2; /* priority to be used after overlimit */
100 unsigned char ewma_log; /* time constant for idle time calculation */
101 unsigned char ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103 unsigned char police;
108 /* Link-sharing scheduler parameters */
109 long maxidle; /* Class parameters: see below. */
113 struct qdisc_rate_table *R_tab;
115 /* Overlimit strategy parameters */
116 void (*overlimit)(struct cbq_class *cl);
119 /* General scheduler (WRR) parameters */
121 long quantum; /* Allotment per WRR round */
122 long weight; /* Relative allotment: see below */
124 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
125 struct cbq_class *split; /* Ptr to split node */
126 struct cbq_class *share; /* Ptr to LS parent in the class tree */
127 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
128 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
130 struct cbq_class *sibling; /* Sibling chain */
131 struct cbq_class *children; /* Pointer to children chain */
133 struct Qdisc *q; /* Elementary queueing discipline */
137 unsigned char cpriority; /* Effective priority */
138 unsigned char delayed;
139 unsigned char level; /* level of the class in hierarchy:
140 0 for leaf classes, and maximal
141 level of children + 1 for nodes.
144 psched_time_t last; /* Last end of service */
145 psched_time_t undertime;
147 long deficit; /* Saved deficit for WRR */
148 unsigned long penalized;
149 struct tc_stats stats;
150 spinlock_t *stats_lock;
151 struct tc_cbq_xstats xstats;
153 struct tcf_proto *filter_list;
158 struct cbq_class *defaults[TC_PRIO_MAX+1];
161 struct cbq_sched_data
163 struct cbq_class *classes[16]; /* Hash table of all classes */
164 int nclasses[TC_CBQ_MAXPRIO+1];
165 unsigned quanta[TC_CBQ_MAXPRIO+1];
167 struct cbq_class link;
170 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
173 #ifdef CONFIG_NET_CLS_POLICE
174 struct cbq_class *rx_class;
176 struct cbq_class *tx_class;
177 struct cbq_class *tx_borrowed;
179 psched_time_t now; /* Cached timestamp */
180 psched_time_t now_rt; /* Cached real time */
183 struct timer_list delay_timer;
184 struct timer_list wd_timer; /* Watchdog timer,
194 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197 static __inline__ unsigned cbq_hash(u32 h)
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 struct cbq_class *cl;
209 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210 if (cl->classid == classid)
215 #ifdef CONFIG_NET_CLS_POLICE
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 struct cbq_class *cl, *new;
222 for (cl = this->tparent; cl; cl = cl->tparent)
223 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
231 /* Classify packet. The procedure is pretty complicated, but
232 it allows us to combine link sharing and priority scheduling
235 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236 so that it resolves to split nodes. Then packets are classified
237 by logical priority, or a more specific classifier may be attached
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
244 struct cbq_sched_data *q = qdisc_priv(sch);
245 struct cbq_class *head = &q->link;
246 struct cbq_class **defmap;
247 struct cbq_class *cl = NULL;
248 u32 prio = skb->priority;
249 struct tcf_result res;
252 * Step 1. If skb->priority points to one of our classes, use it.
254 if (TC_H_MAJ(prio^sch->handle) == 0 &&
255 (cl = cbq_class_lookup(q, prio)) != NULL)
260 #ifdef CONFIG_NET_CLS_ACT
263 defmap = head->defaults;
266 * Step 2+n. Apply classifier.
268 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
271 if ((cl = (void*)res.class) == NULL) {
272 if (TC_H_MAJ(res.classid))
273 cl = cbq_class_lookup(q, res.classid);
274 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
275 cl = defmap[TC_PRIO_BESTEFFORT];
277 if (cl == NULL || cl->level >= head->level)
281 #ifdef CONFIG_NET_CLS_ACT
283 case TC_ACT_SHOT: /* Stop and kfree */
284 *qres = NET_XMIT_DROP;
291 case TC_ACT_RECLASSIFY: /* Things look good */
303 #ifdef CONFIG_NET_CLS_POLICE
305 case TC_POLICE_RECLASSIFY:
306 return cbq_reclassify(skb, cl);
318 * Step 3+n. If classifier selected a link sharing class,
319 * apply agency specific classifier.
320 * Repeat this procdure until we hit a leaf node.
329 * Step 4. No success...
331 if (TC_H_MAJ(prio) == 0 &&
332 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
333 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
340 A packet has just been enqueued on the empty class.
341 cbq_activate_class adds it to the tail of active class list
342 of its priority band.
345 static __inline__ void cbq_activate_class(struct cbq_class *cl)
347 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
348 int prio = cl->cpriority;
349 struct cbq_class *cl_tail;
351 cl_tail = q->active[prio];
352 q->active[prio] = cl;
354 if (cl_tail != NULL) {
355 cl->next_alive = cl_tail->next_alive;
356 cl_tail->next_alive = cl;
359 q->activemask |= (1<<prio);
364 Unlink class from active chain.
365 Note that this same procedure is done directly in cbq_dequeue*
366 during round-robin procedure.
369 static void cbq_deactivate_class(struct cbq_class *this)
371 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
372 int prio = this->cpriority;
373 struct cbq_class *cl;
374 struct cbq_class *cl_prev = q->active[prio];
377 cl = cl_prev->next_alive;
379 cl_prev->next_alive = cl->next_alive;
380 cl->next_alive = NULL;
382 if (cl == q->active[prio]) {
383 q->active[prio] = cl_prev;
384 if (cl == q->active[prio]) {
385 q->active[prio] = NULL;
386 q->activemask &= ~(1<<prio);
391 cl = cl_prev->next_alive;
394 } while ((cl_prev = cl) != q->active[prio]);
398 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
400 int toplevel = q->toplevel;
402 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
406 PSCHED_GET_TIME(now);
407 incr = PSCHED_TDIFF(now, q->now_rt);
408 PSCHED_TADD2(q->now, incr, now);
411 if (PSCHED_TLESS(cl->undertime, now)) {
412 q->toplevel = cl->level;
415 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
420 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
422 struct cbq_sched_data *q = qdisc_priv(sch);
424 int ret = NET_XMIT_SUCCESS;
425 struct cbq_class *cl = cbq_classify(skb, sch,&ret);
427 #ifdef CONFIG_NET_CLS_POLICE
431 #ifdef CONFIG_NET_CLS_POLICE
432 cl->q->__parent = sch;
434 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
436 sch->stats.packets++;
437 sch->stats.bytes+=len;
438 cbq_mark_toplevel(q, cl);
440 cbq_activate_class(cl);
445 #ifndef CONFIG_NET_CLS_ACT
450 cbq_mark_toplevel(q, cl);
454 if ( NET_XMIT_DROP == ret) {
459 cbq_mark_toplevel(q, cl);
467 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
469 struct cbq_sched_data *q = qdisc_priv(sch);
470 struct cbq_class *cl;
473 if ((cl = q->tx_class) == NULL) {
480 cbq_mark_toplevel(q, cl);
482 #ifdef CONFIG_NET_CLS_POLICE
484 cl->q->__parent = sch;
486 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
489 cbq_activate_class(cl);
497 /* Overlimit actions */
499 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
501 static void cbq_ovl_classic(struct cbq_class *cl)
503 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
504 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
507 delay += cl->offtime;
510 Class goes to sleep, so that it will have no
511 chance to work avgidle. Let's forgive it 8)
513 BTW cbq-2.0 has a crap in this
514 place, apparently they forgot to shift it by cl->ewma_log.
517 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
518 if (cl->avgidle < cl->minidle)
519 cl->avgidle = cl->minidle;
522 PSCHED_TADD2(q->now, delay, cl->undertime);
524 cl->xstats.overactions++;
527 if (q->wd_expires == 0 || q->wd_expires > delay)
528 q->wd_expires = delay;
530 /* Dirty work! We must schedule wakeups based on
531 real available rate, rather than leaf rate,
532 which may be tiny (even zero).
534 if (q->toplevel == TC_CBQ_MAXLEVEL) {
536 psched_tdiff_t base_delay = q->wd_expires;
538 for (b = cl->borrow; b; b = b->borrow) {
539 delay = PSCHED_TDIFF(b->undertime, q->now);
540 if (delay < base_delay) {
547 q->wd_expires = base_delay;
551 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
555 static void cbq_ovl_rclassic(struct cbq_class *cl)
557 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
558 struct cbq_class *this = cl;
561 if (cl->level > q->toplevel) {
565 } while ((cl = cl->borrow) != NULL);
572 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
574 static void cbq_ovl_delay(struct cbq_class *cl)
576 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
577 psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
580 unsigned long sched = jiffies;
582 delay += cl->offtime;
584 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
585 if (cl->avgidle < cl->minidle)
586 cl->avgidle = cl->minidle;
587 PSCHED_TADD2(q->now, delay, cl->undertime);
590 sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
591 cl->penalized = sched;
592 cl->cpriority = TC_CBQ_MAXPRIO;
593 q->pmask |= (1<<TC_CBQ_MAXPRIO);
594 if (del_timer(&q->delay_timer) &&
595 (long)(q->delay_timer.expires - sched) > 0)
596 q->delay_timer.expires = sched;
597 add_timer(&q->delay_timer);
599 cl->xstats.overactions++;
604 if (q->wd_expires == 0 || q->wd_expires > delay)
605 q->wd_expires = delay;
608 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
610 static void cbq_ovl_lowprio(struct cbq_class *cl)
612 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
614 cl->penalized = jiffies + cl->penalty;
616 if (cl->cpriority != cl->priority2) {
617 cl->cpriority = cl->priority2;
618 q->pmask |= (1<<cl->cpriority);
619 cl->xstats.overactions++;
624 /* TC_CBQ_OVL_DROP: penalize class by dropping */
626 static void cbq_ovl_drop(struct cbq_class *cl)
628 if (cl->q->ops->drop)
629 if (cl->q->ops->drop(cl->q))
631 cl->xstats.overactions++;
635 static void cbq_watchdog(unsigned long arg)
637 struct Qdisc *sch = (struct Qdisc*)arg;
639 sch->flags &= ~TCQ_F_THROTTLED;
640 netif_schedule(sch->dev);
643 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
645 struct cbq_class *cl;
646 struct cbq_class *cl_prev = q->active[prio];
647 unsigned long now = jiffies;
648 unsigned long sched = now;
654 cl = cl_prev->next_alive;
655 if ((long)(now - cl->penalized) > 0) {
656 cl_prev->next_alive = cl->next_alive;
657 cl->next_alive = NULL;
658 cl->cpriority = cl->priority;
660 cbq_activate_class(cl);
662 if (cl == q->active[prio]) {
663 q->active[prio] = cl_prev;
664 if (cl == q->active[prio]) {
665 q->active[prio] = NULL;
670 cl = cl_prev->next_alive;
671 } else if ((long)(sched - cl->penalized) > 0)
672 sched = cl->penalized;
673 } while ((cl_prev = cl) != q->active[prio]);
675 return (long)(sched - now);
678 static void cbq_undelay(unsigned long arg)
680 struct Qdisc *sch = (struct Qdisc*)arg;
681 struct cbq_sched_data *q = qdisc_priv(sch);
689 int prio = ffz(~pmask);
694 tmp = cbq_undelay_prio(q, prio);
697 if (tmp < delay || delay == 0)
703 q->delay_timer.expires = jiffies + delay;
704 add_timer(&q->delay_timer);
707 sch->flags &= ~TCQ_F_THROTTLED;
708 netif_schedule(sch->dev);
712 #ifdef CONFIG_NET_CLS_POLICE
714 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
717 struct Qdisc *sch = child->__parent;
718 struct cbq_sched_data *q = qdisc_priv(sch);
719 struct cbq_class *cl = q->rx_class;
723 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
725 cbq_mark_toplevel(q, cl);
728 cl->q->__parent = sch;
730 if (cl->q->enqueue(skb, cl->q) == 0) {
732 sch->stats.packets++;
733 sch->stats.bytes+=len;
735 cbq_activate_class(cl);
748 It is mission critical procedure.
750 We "regenerate" toplevel cutoff, if transmitting class
751 has backlog and it is not regulated. It is not part of
752 original CBQ description, but looks more reasonable.
753 Probably, it is wrong. This question needs further investigation.
756 static __inline__ void
757 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
758 struct cbq_class *borrowed)
760 if (cl && q->toplevel >= borrowed->level) {
761 if (cl->q->q.qlen > 1) {
763 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
764 q->toplevel = borrowed->level;
767 } while ((borrowed=borrowed->borrow) != NULL);
770 /* It is not necessary now. Uncommenting it
771 will save CPU cycles, but decrease fairness.
773 q->toplevel = TC_CBQ_MAXLEVEL;
779 cbq_update(struct cbq_sched_data *q)
781 struct cbq_class *this = q->tx_class;
782 struct cbq_class *cl = this;
787 for ( ; cl; cl = cl->share) {
788 long avgidle = cl->avgidle;
792 cl->stats.bytes += len;
795 (now - last) is total time between packet right edges.
796 (last_pktlen/rate) is "virtual" busy time, so that
798 idle = (now - last) - last_pktlen/rate
801 idle = PSCHED_TDIFF(q->now, cl->last);
802 if ((unsigned long)idle > 128*1024*1024) {
803 avgidle = cl->maxidle;
805 idle -= L2T(cl, len);
807 /* true_avgidle := (1-W)*true_avgidle + W*idle,
808 where W=2^{-ewma_log}. But cl->avgidle is scaled:
809 cl->avgidle == true_avgidle/W,
812 avgidle += idle - (avgidle>>cl->ewma_log);
816 /* Overlimit or at-limit */
818 if (avgidle < cl->minidle)
819 avgidle = cl->minidle;
821 cl->avgidle = avgidle;
823 /* Calculate expected time, when this class
824 will be allowed to send.
826 (1-W)*true_avgidle + W*delay = 0, i.e.
827 idle = (1/W - 1)*(-true_avgidle)
829 idle = (1 - W)*(-cl->avgidle);
831 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
835 To maintain the rate allocated to the class,
836 we add to undertime virtual clock,
837 necessary to complete transmitted packet.
838 (len/phys_bandwidth has been already passed
839 to the moment of cbq_update)
842 idle -= L2T(&q->link, len);
843 idle += L2T(cl, len);
845 PSCHED_AUDIT_TDIFF(idle);
847 PSCHED_TADD2(q->now, idle, cl->undertime);
851 PSCHED_SET_PASTPERFECT(cl->undertime);
852 if (avgidle > cl->maxidle)
853 cl->avgidle = cl->maxidle;
855 cl->avgidle = avgidle;
860 cbq_update_toplevel(q, this, q->tx_borrowed);
863 static __inline__ struct cbq_class *
864 cbq_under_limit(struct cbq_class *cl)
866 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
867 struct cbq_class *this_cl = cl;
869 if (cl->tparent == NULL)
872 if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
873 !PSCHED_TLESS(q->now, cl->undertime)) {
879 /* It is very suspicious place. Now overlimit
880 action is generated for not bounded classes
881 only if link is completely congested.
882 Though it is in agree with ancestor-only paradigm,
883 it looks very stupid. Particularly,
884 it means that this chunk of code will either
885 never be called or result in strong amplification
886 of burstiness. Dangerous, silly, and, however,
887 no another solution exists.
889 if ((cl = cl->borrow) == NULL) {
890 this_cl->stats.overlimits++;
891 this_cl->overlimit(this_cl);
894 if (cl->level > q->toplevel)
896 } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
897 PSCHED_TLESS(q->now, cl->undertime));
903 static __inline__ struct sk_buff *
904 cbq_dequeue_prio(struct Qdisc *sch, int prio)
906 struct cbq_sched_data *q = qdisc_priv(sch);
907 struct cbq_class *cl_tail, *cl_prev, *cl;
911 cl_tail = cl_prev = q->active[prio];
912 cl = cl_prev->next_alive;
919 struct cbq_class *borrow = cl;
922 (borrow = cbq_under_limit(cl)) == NULL)
925 if (cl->deficit <= 0) {
926 /* Class exhausted its allotment per
927 this round. Switch to the next one.
930 cl->deficit += cl->quantum;
934 skb = cl->q->dequeue(cl->q);
936 /* Class did not give us any skb :-(
937 It could occur even if cl->q->q.qlen != 0
938 f.e. if cl->q == "tbf"
943 cl->deficit -= skb->len;
945 q->tx_borrowed = borrow;
947 #ifndef CBQ_XSTATS_BORROWS_BYTES
948 borrow->xstats.borrows++;
949 cl->xstats.borrows++;
951 borrow->xstats.borrows += skb->len;
952 cl->xstats.borrows += skb->len;
955 q->tx_len = skb->len;
957 if (cl->deficit <= 0) {
958 q->active[prio] = cl;
960 cl->deficit += cl->quantum;
965 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
966 /* Class is empty or penalized.
967 Unlink it from active chain.
969 cl_prev->next_alive = cl->next_alive;
970 cl->next_alive = NULL;
972 /* Did cl_tail point to it? */
977 /* Was it the last class in this band? */
980 q->active[prio] = NULL;
981 q->activemask &= ~(1<<prio);
983 cbq_activate_class(cl);
987 q->active[prio] = cl_tail;
990 cbq_activate_class(cl);
998 } while (cl_prev != cl_tail);
1001 q->active[prio] = cl_prev;
1006 static __inline__ struct sk_buff *
1007 cbq_dequeue_1(struct Qdisc *sch)
1009 struct cbq_sched_data *q = qdisc_priv(sch);
1010 struct sk_buff *skb;
1011 unsigned activemask;
1013 activemask = q->activemask&0xFF;
1014 while (activemask) {
1015 int prio = ffz(~activemask);
1016 activemask &= ~(1<<prio);
1017 skb = cbq_dequeue_prio(sch, prio);
1024 static struct sk_buff *
1025 cbq_dequeue(struct Qdisc *sch)
1027 struct sk_buff *skb;
1028 struct cbq_sched_data *q = qdisc_priv(sch);
1030 psched_tdiff_t incr;
1032 PSCHED_GET_TIME(now);
1033 incr = PSCHED_TDIFF(now, q->now_rt);
1036 psched_tdiff_t incr2;
1037 /* Time integrator. We calculate EOS time
1038 by adding expected packet transmission time.
1039 If real time is greater, we warp artificial clock,
1042 cbq_time = max(real_time, work);
1044 incr2 = L2T(&q->link, q->tx_len);
1045 PSCHED_TADD(q->now, incr2);
1047 if ((incr -= incr2) < 0)
1050 PSCHED_TADD(q->now, incr);
1056 skb = cbq_dequeue_1(sch);
1059 sch->flags &= ~TCQ_F_THROTTLED;
1063 /* All the classes are overlimit.
1067 1. Scheduler is empty.
1068 2. Toplevel cutoff inhibited borrowing.
1069 3. Root class is overlimit.
1071 Reset 2d and 3d conditions and retry.
1073 Note, that NS and cbq-2.0 are buggy, peeking
1074 an arbitrary class is appropriate for ancestor-only
1075 sharing, but not for toplevel algorithm.
1077 Our version is better, but slower, because it requires
1078 two passes, but it is unavoidable with top-level sharing.
1081 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1082 PSCHED_IS_PASTPERFECT(q->link.undertime))
1085 q->toplevel = TC_CBQ_MAXLEVEL;
1086 PSCHED_SET_PASTPERFECT(q->link.undertime);
1089 /* No packets in scheduler or nobody wants to give them to us :-(
1090 Sigh... start watchdog timer in the last case. */
1093 sch->stats.overlimits++;
1094 if (q->wd_expires) {
1095 long delay = PSCHED_US2JIFFIE(q->wd_expires);
1098 mod_timer(&q->wd_timer, jiffies + delay);
1099 sch->flags |= TCQ_F_THROTTLED;
1105 /* CBQ class maintanance routines */
1107 static void cbq_adjust_levels(struct cbq_class *this)
1114 struct cbq_class *cl;
1116 if ((cl = this->children) != NULL) {
1118 if (cl->level > level)
1120 } while ((cl = cl->sibling) != this->children);
1122 this->level = level+1;
1123 } while ((this = this->tparent) != NULL);
1126 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1128 struct cbq_class *cl;
1131 if (q->quanta[prio] == 0)
1134 for (h=0; h<16; h++) {
1135 for (cl = q->classes[h]; cl; cl = cl->next) {
1136 /* BUGGGG... Beware! This expression suffer of
1137 arithmetic overflows!
1139 if (cl->priority == prio) {
1140 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1143 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1144 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1145 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1151 static void cbq_sync_defmap(struct cbq_class *cl)
1153 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1154 struct cbq_class *split = cl->split;
1161 for (i=0; i<=TC_PRIO_MAX; i++) {
1162 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1163 split->defaults[i] = NULL;
1166 for (i=0; i<=TC_PRIO_MAX; i++) {
1167 int level = split->level;
1169 if (split->defaults[i])
1172 for (h=0; h<16; h++) {
1173 struct cbq_class *c;
1175 for (c = q->classes[h]; c; c = c->next) {
1176 if (c->split == split && c->level < level &&
1178 split->defaults[i] = c;
1186 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1188 struct cbq_class *split = NULL;
1191 if ((split = cl->split) == NULL)
1193 splitid = split->classid;
1196 if (split == NULL || split->classid != splitid) {
1197 for (split = cl->tparent; split; split = split->tparent)
1198 if (split->classid == splitid)
1205 if (cl->split != split) {
1207 cbq_sync_defmap(cl);
1209 cl->defmap = def&mask;
1211 cl->defmap = (cl->defmap&~mask)|(def&mask);
1213 cbq_sync_defmap(cl);
1216 static void cbq_unlink_class(struct cbq_class *this)
1218 struct cbq_class *cl, **clp;
1219 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1221 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1229 if (this->tparent) {
1238 } while ((cl = *clp) != this->sibling);
1240 if (this->tparent->children == this) {
1241 this->tparent->children = this->sibling;
1242 if (this->sibling == this)
1243 this->tparent->children = NULL;
1246 BUG_TRAP(this->sibling == this);
1250 static void cbq_link_class(struct cbq_class *this)
1252 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1253 unsigned h = cbq_hash(this->classid);
1254 struct cbq_class *parent = this->tparent;
1256 this->sibling = this;
1257 this->next = q->classes[h];
1258 q->classes[h] = this;
1263 if (parent->children == NULL) {
1264 parent->children = this;
1266 this->sibling = parent->children->sibling;
1267 parent->children->sibling = this;
1271 static unsigned int cbq_drop(struct Qdisc* sch)
1273 struct cbq_sched_data *q = qdisc_priv(sch);
1274 struct cbq_class *cl, *cl_head;
1278 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1279 if ((cl_head = q->active[prio]) == NULL)
1284 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1288 } while ((cl = cl->next_alive) != cl_head);
1294 cbq_reset(struct Qdisc* sch)
1296 struct cbq_sched_data *q = qdisc_priv(sch);
1297 struct cbq_class *cl;
1304 q->tx_borrowed = NULL;
1305 del_timer(&q->wd_timer);
1306 del_timer(&q->delay_timer);
1307 q->toplevel = TC_CBQ_MAXLEVEL;
1308 PSCHED_GET_TIME(q->now);
1311 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1312 q->active[prio] = NULL;
1314 for (h = 0; h < 16; h++) {
1315 for (cl = q->classes[h]; cl; cl = cl->next) {
1318 cl->next_alive = NULL;
1319 PSCHED_SET_PASTPERFECT(cl->undertime);
1320 cl->avgidle = cl->maxidle;
1321 cl->deficit = cl->quantum;
1322 cl->cpriority = cl->priority;
1329 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1331 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1332 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1333 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1335 if (lss->change&TCF_CBQ_LSS_EWMA)
1336 cl->ewma_log = lss->ewma_log;
1337 if (lss->change&TCF_CBQ_LSS_AVPKT)
1338 cl->avpkt = lss->avpkt;
1339 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1340 cl->minidle = -(long)lss->minidle;
1341 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1342 cl->maxidle = lss->maxidle;
1343 cl->avgidle = lss->maxidle;
1345 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1346 cl->offtime = lss->offtime;
1350 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1352 q->nclasses[cl->priority]--;
1353 q->quanta[cl->priority] -= cl->weight;
1354 cbq_normalize_quanta(q, cl->priority);
1357 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1359 q->nclasses[cl->priority]++;
1360 q->quanta[cl->priority] += cl->weight;
1361 cbq_normalize_quanta(q, cl->priority);
1364 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1366 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1369 cl->allot = wrr->allot;
1371 cl->weight = wrr->weight;
1372 if (wrr->priority) {
1373 cl->priority = wrr->priority-1;
1374 cl->cpriority = cl->priority;
1375 if (cl->priority >= cl->priority2)
1376 cl->priority2 = TC_CBQ_MAXPRIO-1;
1383 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1385 switch (ovl->strategy) {
1386 case TC_CBQ_OVL_CLASSIC:
1387 cl->overlimit = cbq_ovl_classic;
1389 case TC_CBQ_OVL_DELAY:
1390 cl->overlimit = cbq_ovl_delay;
1392 case TC_CBQ_OVL_LOWPRIO:
1393 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1394 ovl->priority2-1 <= cl->priority)
1396 cl->priority2 = ovl->priority2-1;
1397 cl->overlimit = cbq_ovl_lowprio;
1399 case TC_CBQ_OVL_DROP:
1400 cl->overlimit = cbq_ovl_drop;
1402 case TC_CBQ_OVL_RCLASSIC:
1403 cl->overlimit = cbq_ovl_rclassic;
1408 cl->penalty = (ovl->penalty*HZ)/1000;
1412 #ifdef CONFIG_NET_CLS_POLICE
1413 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1415 cl->police = p->police;
1417 if (cl->q->handle) {
1418 if (p->police == TC_POLICE_RECLASSIFY)
1419 cl->q->reshape_fail = cbq_reshape_fail;
1421 cl->q->reshape_fail = NULL;
1427 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1429 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1433 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1435 struct cbq_sched_data *q = qdisc_priv(sch);
1436 struct rtattr *tb[TCA_CBQ_MAX];
1437 struct tc_ratespec *r;
1439 if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1440 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1441 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1444 if (tb[TCA_CBQ_LSSOPT-1] &&
1445 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1448 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1450 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1454 q->link.sibling = &q->link;
1455 q->link.classid = sch->handle;
1456 q->link.qdisc = sch;
1457 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1458 q->link.q = &noop_qdisc;
1460 q->link.priority = TC_CBQ_MAXPRIO-1;
1461 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1462 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1463 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1464 q->link.overlimit = cbq_ovl_classic;
1465 q->link.allot = psched_mtu(sch->dev);
1466 q->link.quantum = q->link.allot;
1467 q->link.weight = q->link.R_tab->rate.rate;
1469 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1470 q->link.avpkt = q->link.allot/2;
1471 q->link.minidle = -0x7FFFFFFF;
1472 q->link.stats_lock = &sch->dev->queue_lock;
1474 init_timer(&q->wd_timer);
1475 q->wd_timer.data = (unsigned long)sch;
1476 q->wd_timer.function = cbq_watchdog;
1477 init_timer(&q->delay_timer);
1478 q->delay_timer.data = (unsigned long)sch;
1479 q->delay_timer.function = cbq_undelay;
1480 q->toplevel = TC_CBQ_MAXLEVEL;
1481 PSCHED_GET_TIME(q->now);
1484 cbq_link_class(&q->link);
1486 if (tb[TCA_CBQ_LSSOPT-1])
1487 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1489 cbq_addprio(q, &q->link);
1493 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1495 unsigned char *b = skb->tail;
1497 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1501 skb_trim(skb, b - skb->data);
1505 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1507 unsigned char *b = skb->tail;
1508 struct tc_cbq_lssopt opt;
1511 if (cl->borrow == NULL)
1512 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1513 if (cl->share == NULL)
1514 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1515 opt.ewma_log = cl->ewma_log;
1516 opt.level = cl->level;
1517 opt.avpkt = cl->avpkt;
1518 opt.maxidle = cl->maxidle;
1519 opt.minidle = (u32)(-cl->minidle);
1520 opt.offtime = cl->offtime;
1522 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1526 skb_trim(skb, b - skb->data);
1530 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1532 unsigned char *b = skb->tail;
1533 struct tc_cbq_wrropt opt;
1536 opt.allot = cl->allot;
1537 opt.priority = cl->priority+1;
1538 opt.cpriority = cl->cpriority+1;
1539 opt.weight = cl->weight;
1540 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1544 skb_trim(skb, b - skb->data);
1548 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1550 unsigned char *b = skb->tail;
1551 struct tc_cbq_ovl opt;
1553 opt.strategy = cl->ovl_strategy;
1554 opt.priority2 = cl->priority2+1;
1555 opt.penalty = (cl->penalty*1000)/HZ;
1556 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1560 skb_trim(skb, b - skb->data);
1564 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1566 unsigned char *b = skb->tail;
1567 struct tc_cbq_fopt opt;
1569 if (cl->split || cl->defmap) {
1570 opt.split = cl->split ? cl->split->classid : 0;
1571 opt.defmap = cl->defmap;
1573 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1578 skb_trim(skb, b - skb->data);
1582 #ifdef CONFIG_NET_CLS_POLICE
1583 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1585 unsigned char *b = skb->tail;
1586 struct tc_cbq_police opt;
1589 opt.police = cl->police;
1590 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1595 skb_trim(skb, b - skb->data);
1600 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1602 if (cbq_dump_lss(skb, cl) < 0 ||
1603 cbq_dump_rate(skb, cl) < 0 ||
1604 cbq_dump_wrr(skb, cl) < 0 ||
1605 cbq_dump_ovl(skb, cl) < 0 ||
1606 #ifdef CONFIG_NET_CLS_POLICE
1607 cbq_dump_police(skb, cl) < 0 ||
1609 cbq_dump_fopt(skb, cl) < 0)
1614 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1616 RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1624 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1626 struct cbq_sched_data *q = qdisc_priv(sch);
1627 unsigned char *b = skb->tail;
1630 rta = (struct rtattr*)b;
1631 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1632 if (cbq_dump_attr(skb, &q->link) < 0)
1633 goto rtattr_failure;
1634 rta->rta_len = skb->tail - b;
1635 spin_lock_bh(&sch->dev->queue_lock);
1636 q->link.xstats.avgidle = q->link.avgidle;
1637 if (cbq_copy_xstats(skb, &q->link.xstats)) {
1638 spin_unlock_bh(&sch->dev->queue_lock);
1639 goto rtattr_failure;
1641 spin_unlock_bh(&sch->dev->queue_lock);
1645 skb_trim(skb, b - skb->data);
1650 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1651 struct sk_buff *skb, struct tcmsg *tcm)
1653 struct cbq_sched_data *q = qdisc_priv(sch);
1654 struct cbq_class *cl = (struct cbq_class*)arg;
1655 unsigned char *b = skb->tail;
1659 tcm->tcm_parent = cl->tparent->classid;
1661 tcm->tcm_parent = TC_H_ROOT;
1662 tcm->tcm_handle = cl->classid;
1663 tcm->tcm_info = cl->q->handle;
1665 rta = (struct rtattr*)b;
1666 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1667 if (cbq_dump_attr(skb, cl) < 0)
1668 goto rtattr_failure;
1669 rta->rta_len = skb->tail - b;
1670 cl->stats.qlen = cl->q->q.qlen;
1671 if (qdisc_copy_stats(skb, &cl->stats, cl->stats_lock))
1672 goto rtattr_failure;
1673 spin_lock_bh(&sch->dev->queue_lock);
1674 cl->xstats.avgidle = cl->avgidle;
1675 cl->xstats.undertime = 0;
1676 if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1677 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1678 q->link.xstats.avgidle = q->link.avgidle;
1679 if (cbq_copy_xstats(skb, &cl->xstats)) {
1680 spin_unlock_bh(&sch->dev->queue_lock);
1681 goto rtattr_failure;
1683 spin_unlock_bh(&sch->dev->queue_lock);
1688 skb_trim(skb, b - skb->data);
1692 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1695 struct cbq_class *cl = (struct cbq_class*)arg;
1699 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1702 #ifdef CONFIG_NET_CLS_POLICE
1703 if (cl->police == TC_POLICE_RECLASSIFY)
1704 new->reshape_fail = cbq_reshape_fail;
1710 sch->q.qlen -= (*old)->q.qlen;
1712 sch_tree_unlock(sch);
1719 static struct Qdisc *
1720 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1722 struct cbq_class *cl = (struct cbq_class*)arg;
1724 return cl ? cl->q : NULL;
1727 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1729 struct cbq_sched_data *q = qdisc_priv(sch);
1730 struct cbq_class *cl = cbq_class_lookup(q, classid);
1734 return (unsigned long)cl;
1739 static void cbq_destroy_filters(struct cbq_class *cl)
1741 struct tcf_proto *tp;
1743 while ((tp = cl->filter_list) != NULL) {
1744 cl->filter_list = tp->next;
1749 static void cbq_destroy_class(struct cbq_class *cl)
1751 cbq_destroy_filters(cl);
1752 qdisc_destroy(cl->q);
1753 qdisc_put_rtab(cl->R_tab);
1754 #ifdef CONFIG_NET_ESTIMATOR
1755 qdisc_kill_estimator(&cl->stats);
1761 cbq_destroy(struct Qdisc* sch)
1763 struct cbq_sched_data *q = qdisc_priv(sch);
1764 struct cbq_class *cl;
1767 #ifdef CONFIG_NET_CLS_POLICE
1770 for (h = 0; h < 16; h++) {
1771 for (cl = q->classes[h]; cl; cl = cl->next)
1772 cbq_destroy_filters(cl);
1775 for (h = 0; h < 16; h++) {
1776 struct cbq_class *next;
1778 for (cl = q->classes[h]; cl; cl = next) {
1781 cbq_destroy_class(cl);
1785 qdisc_put_rtab(q->link.R_tab);
1788 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1790 struct cbq_class *cl = (struct cbq_class*)arg;
1792 if (--cl->refcnt == 0) {
1793 #ifdef CONFIG_NET_CLS_POLICE
1794 struct cbq_sched_data *q = qdisc_priv(sch);
1796 spin_lock_bh(&sch->dev->queue_lock);
1797 if (q->rx_class == cl)
1799 spin_unlock_bh(&sch->dev->queue_lock);
1802 cbq_destroy_class(cl);
1807 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1811 struct cbq_sched_data *q = qdisc_priv(sch);
1812 struct cbq_class *cl = (struct cbq_class*)*arg;
1813 struct rtattr *opt = tca[TCA_OPTIONS-1];
1814 struct rtattr *tb[TCA_CBQ_MAX];
1815 struct cbq_class *parent;
1816 struct qdisc_rate_table *rtab = NULL;
1819 rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1822 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1823 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1826 if (tb[TCA_CBQ_FOPT-1] &&
1827 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1830 if (tb[TCA_CBQ_RATE-1] &&
1831 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1834 if (tb[TCA_CBQ_LSSOPT-1] &&
1835 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1838 if (tb[TCA_CBQ_WRROPT-1] &&
1839 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1842 #ifdef CONFIG_NET_CLS_POLICE
1843 if (tb[TCA_CBQ_POLICE-1] &&
1844 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1851 if (cl->tparent && cl->tparent->classid != parentid)
1853 if (!cl->tparent && parentid != TC_H_ROOT)
1857 if (tb[TCA_CBQ_RATE-1]) {
1858 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1863 /* Change class parameters */
1866 if (cl->next_alive != NULL)
1867 cbq_deactivate_class(cl);
1870 rtab = xchg(&cl->R_tab, rtab);
1871 qdisc_put_rtab(rtab);
1874 if (tb[TCA_CBQ_LSSOPT-1])
1875 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1877 if (tb[TCA_CBQ_WRROPT-1]) {
1879 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1882 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1883 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1885 #ifdef CONFIG_NET_CLS_POLICE
1886 if (tb[TCA_CBQ_POLICE-1])
1887 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1890 if (tb[TCA_CBQ_FOPT-1])
1891 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1894 cbq_activate_class(cl);
1896 sch_tree_unlock(sch);
1898 #ifdef CONFIG_NET_ESTIMATOR
1899 if (tca[TCA_RATE-1]) {
1900 qdisc_kill_estimator(&cl->stats);
1901 qdisc_new_estimator(&cl->stats, cl->stats_lock,
1908 if (parentid == TC_H_ROOT)
1911 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1912 tb[TCA_CBQ_LSSOPT-1] == NULL)
1915 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1921 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1925 classid = TC_H_MAKE(sch->handle,0x8000);
1927 for (i=0; i<0x8000; i++) {
1928 if (++q->hgenerator >= 0x8000)
1930 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1936 classid = classid|q->hgenerator;
1941 parent = cbq_class_lookup(q, parentid);
1948 cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1951 memset(cl, 0, sizeof(*cl));
1955 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1956 cl->q = &noop_qdisc;
1957 cl->classid = classid;
1958 cl->tparent = parent;
1960 cl->allot = parent->allot;
1961 cl->quantum = cl->allot;
1962 cl->weight = cl->R_tab->rate.rate;
1963 cl->stats_lock = &sch->dev->queue_lock;
1967 cl->borrow = cl->tparent;
1968 if (cl->tparent != &q->link)
1969 cl->share = cl->tparent;
1970 cbq_adjust_levels(parent);
1971 cl->minidle = -0x7FFFFFFF;
1972 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1973 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1974 if (cl->ewma_log==0)
1975 cl->ewma_log = q->link.ewma_log;
1977 cl->maxidle = q->link.maxidle;
1979 cl->avpkt = q->link.avpkt;
1980 cl->overlimit = cbq_ovl_classic;
1981 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1982 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1983 #ifdef CONFIG_NET_CLS_POLICE
1984 if (tb[TCA_CBQ_POLICE-1])
1985 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1987 if (tb[TCA_CBQ_FOPT-1])
1988 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1989 sch_tree_unlock(sch);
1991 #ifdef CONFIG_NET_ESTIMATOR
1992 if (tca[TCA_RATE-1])
1993 qdisc_new_estimator(&cl->stats, cl->stats_lock,
1997 *arg = (unsigned long)cl;
2001 qdisc_put_rtab(rtab);
2005 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
2007 struct cbq_sched_data *q = qdisc_priv(sch);
2008 struct cbq_class *cl = (struct cbq_class*)arg;
2010 if (cl->filters || cl->children || cl == &q->link)
2016 cbq_deactivate_class(cl);
2018 if (q->tx_borrowed == cl)
2019 q->tx_borrowed = q->tx_class;
2020 if (q->tx_class == cl) {
2022 q->tx_borrowed = NULL;
2024 #ifdef CONFIG_NET_CLS_POLICE
2025 if (q->rx_class == cl)
2029 cbq_unlink_class(cl);
2030 cbq_adjust_levels(cl->tparent);
2032 cbq_sync_defmap(cl);
2035 sch_tree_unlock(sch);
2037 if (--cl->refcnt == 0)
2038 cbq_destroy_class(cl);
2043 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2045 struct cbq_sched_data *q = qdisc_priv(sch);
2046 struct cbq_class *cl = (struct cbq_class *)arg;
2051 return &cl->filter_list;
2054 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2057 struct cbq_sched_data *q = qdisc_priv(sch);
2058 struct cbq_class *p = (struct cbq_class*)parent;
2059 struct cbq_class *cl = cbq_class_lookup(q, classid);
2062 if (p && p->level <= cl->level)
2065 return (unsigned long)cl;
2070 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2072 struct cbq_class *cl = (struct cbq_class*)arg;
2077 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2079 struct cbq_sched_data *q = qdisc_priv(sch);
2085 for (h = 0; h < 16; h++) {
2086 struct cbq_class *cl;
2088 for (cl = q->classes[h]; cl; cl = cl->next) {
2089 if (arg->count < arg->skip) {
2093 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2102 static struct Qdisc_class_ops cbq_class_ops = {
2107 .change = cbq_change_class,
2108 .delete = cbq_delete,
2110 .tcf_chain = cbq_find_tcf,
2111 .bind_tcf = cbq_bind_filter,
2112 .unbind_tcf = cbq_unbind_filter,
2113 .dump = cbq_dump_class,
2116 static struct Qdisc_ops cbq_qdisc_ops = {
2118 .cl_ops = &cbq_class_ops,
2120 .priv_size = sizeof(struct cbq_sched_data),
2121 .enqueue = cbq_enqueue,
2122 .dequeue = cbq_dequeue,
2123 .requeue = cbq_requeue,
2127 .destroy = cbq_destroy,
2130 .owner = THIS_MODULE,
2133 static int __init cbq_module_init(void)
2135 return register_qdisc(&cbq_qdisc_ops);
2137 static void __exit cbq_module_exit(void)
2139 unregister_qdisc(&cbq_qdisc_ops);
2141 module_init(cbq_module_init)
2142 module_exit(cbq_module_exit)
2143 MODULE_LICENSE("GPL");