VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /*      Class-Based Queueing (CBQ) algorithm.
41         =======================================
42
43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44                  Management Models for Packet Networks",
45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48
49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50                  Parameters", 1996
51
52                  [4] Sally Floyd and Michael Speer, "Experimental Results
53                  for Class-Based Queueing", 1998, not published.
54
55         -----------------------------------------------------------------------
56
57         Algorithm skeleton was taken from NS simulator cbq.cc.
58         If someone wants to check this code against the LBL version,
59         he should take into account that ONLY the skeleton was borrowed,
60         the implementation is different. Particularly:
61
62         --- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69
70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72         In the worst case we have IntServ estimate with D = W*r+k*MTU
73         and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77         interpret some places, which look like wrong translations
78         from NS. Anyone is advised to find these differences
79         and explain to me, why I am wrong 8).
80
81         --- Linux has no EOI event, so that we cannot estimate true class
82         idle time. Workaround is to consider the next dequeue event
83         as sign that previous packet is finished. This is wrong because of
84         internal device queueing, but on a permanently loaded link it is true.
85         Moreover, combined with clock integrator, this scheme looks
86         very close to an ideal solution.  */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93         struct cbq_class        *next;          /* hash table link */
94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
95
96 /* Parameters */
97         u32                     classid;
98         unsigned char           priority;       /* class priority */
99         unsigned char           priority2;      /* priority to be used after overlimit */
100         unsigned char           ewma_log;       /* time constant for idle time calculation */
101         unsigned char           ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103         unsigned char           police;
104 #endif
105
106         u32                     defmap;
107
108         /* Link-sharing scheduler parameters */
109         long                    maxidle;        /* Class parameters: see below. */
110         long                    offtime;
111         long                    minidle;
112         u32                     avpkt;
113         struct qdisc_rate_table *R_tab;
114
115         /* Overlimit strategy parameters */
116         void                    (*overlimit)(struct cbq_class *cl);
117         long                    penalty;
118
119         /* General scheduler (WRR) parameters */
120         long                    allot;
121         long                    quantum;        /* Allotment per WRR round */
122         long                    weight;         /* Relative allotment: see below */
123
124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
125         struct cbq_class        *split;         /* Ptr to split node */
126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
129                                                    parent otherwise */
130         struct cbq_class        *sibling;       /* Sibling chain */
131         struct cbq_class        *children;      /* Pointer to children chain */
132
133         struct Qdisc            *q;             /* Elementary queueing discipline */
134
135
136 /* Variables */
137         unsigned char           cpriority;      /* Effective priority */
138         unsigned char           delayed;
139         unsigned char           level;          /* level of the class in hierarchy:
140                                                    0 for leaf classes, and maximal
141                                                    level of children + 1 for nodes.
142                                                  */
143
144         psched_time_t           last;           /* Last end of service */
145         psched_time_t           undertime;
146         long                    avgidle;
147         long                    deficit;        /* Saved deficit for WRR */
148         unsigned long           penalized;
149         struct tc_stats         stats;
150         spinlock_t              *stats_lock;
151         struct tc_cbq_xstats    xstats;
152
153         struct tcf_proto        *filter_list;
154
155         int                     refcnt;
156         int                     filters;
157
158         struct cbq_class        *defaults[TC_PRIO_MAX+1];
159 };
160
161 struct cbq_sched_data
162 {
163         struct cbq_class        *classes[16];           /* Hash table of all classes */
164         int                     nclasses[TC_CBQ_MAXPRIO+1];
165         unsigned                quanta[TC_CBQ_MAXPRIO+1];
166
167         struct cbq_class        link;
168
169         unsigned                activemask;
170         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
171                                                                    with backlog */
172
173 #ifdef CONFIG_NET_CLS_POLICE
174         struct cbq_class        *rx_class;
175 #endif
176         struct cbq_class        *tx_class;
177         struct cbq_class        *tx_borrowed;
178         int                     tx_len;
179         psched_time_t           now;            /* Cached timestamp */
180         psched_time_t           now_rt;         /* Cached real time */
181         unsigned                pmask;
182
183         struct timer_list       delay_timer;
184         struct timer_list       wd_timer;       /* Watchdog timer,
185                                                    started when CBQ has
186                                                    backlog, but cannot
187                                                    transmit just now */
188         long                    wd_expires;
189         int                     toplevel;
190         u32                     hgenerator;
191 };
192
193
194 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197 static __inline__ unsigned cbq_hash(u32 h)
198 {
199         h ^= h>>8;
200         h ^= h>>4;
201         return h&0xF;
202 }
203
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206 {
207         struct cbq_class *cl;
208
209         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210                 if (cl->classid == classid)
211                         return cl;
212         return NULL;
213 }
214
215 #ifdef CONFIG_NET_CLS_POLICE
216
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219 {
220         struct cbq_class *cl, *new;
221
222         for (cl = this->tparent; cl; cl = cl->tparent)
223                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224                         return new;
225
226         return NULL;
227 }
228
229 #endif
230
231 /* Classify packet. The procedure is pretty complicated, but
232    it allows us to combine link sharing and priority scheduling
233    transparently.
234
235    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236    so that it resolves to split nodes. Then packets are classified
237    by logical priority, or a more specific classifier may be attached
238    to the split node.
239  */
240
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
243 {
244         struct cbq_sched_data *q = qdisc_priv(sch);
245         struct cbq_class *head = &q->link;
246         struct cbq_class **defmap;
247         struct cbq_class *cl = NULL;
248         u32 prio = skb->priority;
249         struct tcf_result res;
250
251         /*
252          *  Step 1. If skb->priority points to one of our classes, use it.
253          */
254         if (TC_H_MAJ(prio^sch->handle) == 0 &&
255             (cl = cbq_class_lookup(q, prio)) != NULL)
256                         return cl;
257
258         for (;;) {
259                 int result = 0;
260 #ifdef CONFIG_NET_CLS_ACT
261                 int terminal = 0;
262 #endif
263                 defmap = head->defaults;
264
265                 /*
266                  * Step 2+n. Apply classifier.
267                  */
268                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269                         goto fallback;
270
271                 if ((cl = (void*)res.class) == NULL) {
272                         if (TC_H_MAJ(res.classid))
273                                 cl = cbq_class_lookup(q, res.classid);
274                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
275                                 cl = defmap[TC_PRIO_BESTEFFORT];
276
277                         if (cl == NULL || cl->level >= head->level)
278                                 goto fallback;
279                 }
280
281 #ifdef CONFIG_NET_CLS_ACT
282                 switch (result) {
283                 case TC_ACT_SHOT: /* Stop and kfree */
284                         *qres = NET_XMIT_DROP;
285                         terminal = 1;
286                         break;
287                 case TC_ACT_QUEUED:
288                 case TC_ACT_STOLEN: 
289                         terminal = 1;
290                         break;
291                 case TC_ACT_RECLASSIFY:  /* Things look good */
292                 case TC_ACT_OK: 
293                 case TC_ACT_UNSPEC:
294                 default:
295                         break;
296                 }
297
298                 if (terminal) {
299                         kfree_skb(skb);
300                         return NULL;
301                 }
302 #else
303 #ifdef CONFIG_NET_CLS_POLICE
304                 switch (result) {
305                 case TC_POLICE_RECLASSIFY:
306                         return cbq_reclassify(skb, cl);
307                 case TC_POLICE_SHOT:
308                         return NULL;
309                 default:
310                         break;
311                 }
312 #endif
313 #endif
314                 if (cl->level == 0)
315                         return cl;
316
317                 /*
318                  * Step 3+n. If classifier selected a link sharing class,
319                  *         apply agency specific classifier.
320                  *         Repeat this procdure until we hit a leaf node.
321                  */
322                 head = cl;
323         }
324
325 fallback:
326         cl = head;
327
328         /*
329          * Step 4. No success...
330          */
331         if (TC_H_MAJ(prio) == 0 &&
332             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
333             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
334                 return head;
335
336         return cl;
337 }
338
339 /*
340    A packet has just been enqueued on the empty class.
341    cbq_activate_class adds it to the tail of active class list
342    of its priority band.
343  */
344
345 static __inline__ void cbq_activate_class(struct cbq_class *cl)
346 {
347         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
348         int prio = cl->cpriority;
349         struct cbq_class *cl_tail;
350
351         cl_tail = q->active[prio];
352         q->active[prio] = cl;
353
354         if (cl_tail != NULL) {
355                 cl->next_alive = cl_tail->next_alive;
356                 cl_tail->next_alive = cl;
357         } else {
358                 cl->next_alive = cl;
359                 q->activemask |= (1<<prio);
360         }
361 }
362
363 /*
364    Unlink class from active chain.
365    Note that this same procedure is done directly in cbq_dequeue*
366    during round-robin procedure.
367  */
368
369 static void cbq_deactivate_class(struct cbq_class *this)
370 {
371         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
372         int prio = this->cpriority;
373         struct cbq_class *cl;
374         struct cbq_class *cl_prev = q->active[prio];
375
376         do {
377                 cl = cl_prev->next_alive;
378                 if (cl == this) {
379                         cl_prev->next_alive = cl->next_alive;
380                         cl->next_alive = NULL;
381
382                         if (cl == q->active[prio]) {
383                                 q->active[prio] = cl_prev;
384                                 if (cl == q->active[prio]) {
385                                         q->active[prio] = NULL;
386                                         q->activemask &= ~(1<<prio);
387                                         return;
388                                 }
389                         }
390
391                         cl = cl_prev->next_alive;
392                         return;
393                 }
394         } while ((cl_prev = cl) != q->active[prio]);
395 }
396
397 static void
398 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
399 {
400         int toplevel = q->toplevel;
401
402         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
403                 psched_time_t now;
404                 psched_tdiff_t incr;
405
406                 PSCHED_GET_TIME(now);
407                 incr = PSCHED_TDIFF(now, q->now_rt);
408                 PSCHED_TADD2(q->now, incr, now);
409
410                 do {
411                         if (PSCHED_TLESS(cl->undertime, now)) {
412                                 q->toplevel = cl->level;
413                                 return;
414                         }
415                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
416         }
417 }
418
419 static int
420 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
421 {
422         struct cbq_sched_data *q = qdisc_priv(sch);
423         int len = skb->len;
424         int ret = NET_XMIT_SUCCESS;
425         struct cbq_class *cl = cbq_classify(skb, sch,&ret);
426
427 #ifdef CONFIG_NET_CLS_POLICE
428         q->rx_class = cl;
429 #endif
430         if (cl) {
431 #ifdef CONFIG_NET_CLS_POLICE
432                 cl->q->__parent = sch;
433 #endif
434                 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
435                         sch->q.qlen++;
436                         sch->stats.packets++;
437                         sch->stats.bytes+=len;
438                         cbq_mark_toplevel(q, cl);
439                         if (!cl->next_alive)
440                                 cbq_activate_class(cl);
441                         return ret;
442                 }
443         }
444
445 #ifndef CONFIG_NET_CLS_ACT
446         sch->stats.drops++;
447         if (cl == NULL)
448                 kfree_skb(skb);
449         else {
450                 cbq_mark_toplevel(q, cl);
451                 cl->stats.drops++;
452         }
453 #else
454         if ( NET_XMIT_DROP == ret) {
455                 sch->stats.drops++;
456         }
457
458         if (cl != NULL) {
459                 cbq_mark_toplevel(q, cl);
460                 cl->stats.drops++;
461         }
462 #endif
463         return ret;
464 }
465
466 static int
467 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
468 {
469         struct cbq_sched_data *q = qdisc_priv(sch);
470         struct cbq_class *cl;
471         int ret;
472
473         if ((cl = q->tx_class) == NULL) {
474                 kfree_skb(skb);
475                 sch->stats.drops++;
476                 return NET_XMIT_CN;
477         }
478         q->tx_class = NULL;
479
480         cbq_mark_toplevel(q, cl);
481
482 #ifdef CONFIG_NET_CLS_POLICE
483         q->rx_class = cl;
484         cl->q->__parent = sch;
485 #endif
486         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
487                 sch->q.qlen++;
488                 if (!cl->next_alive)
489                         cbq_activate_class(cl);
490                 return 0;
491         }
492         sch->stats.drops++;
493         cl->stats.drops++;
494         return ret;
495 }
496
497 /* Overlimit actions */
498
499 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
500
501 static void cbq_ovl_classic(struct cbq_class *cl)
502 {
503         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
504         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
505
506         if (!cl->delayed) {
507                 delay += cl->offtime;
508
509                 /* 
510                    Class goes to sleep, so that it will have no
511                    chance to work avgidle. Let's forgive it 8)
512
513                    BTW cbq-2.0 has a crap in this
514                    place, apparently they forgot to shift it by cl->ewma_log.
515                  */
516                 if (cl->avgidle < 0)
517                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
518                 if (cl->avgidle < cl->minidle)
519                         cl->avgidle = cl->minidle;
520                 if (delay <= 0)
521                         delay = 1;
522                 PSCHED_TADD2(q->now, delay, cl->undertime);
523
524                 cl->xstats.overactions++;
525                 cl->delayed = 1;
526         }
527         if (q->wd_expires == 0 || q->wd_expires > delay)
528                 q->wd_expires = delay;
529
530         /* Dirty work! We must schedule wakeups based on
531            real available rate, rather than leaf rate,
532            which may be tiny (even zero).
533          */
534         if (q->toplevel == TC_CBQ_MAXLEVEL) {
535                 struct cbq_class *b;
536                 psched_tdiff_t base_delay = q->wd_expires;
537
538                 for (b = cl->borrow; b; b = b->borrow) {
539                         delay = PSCHED_TDIFF(b->undertime, q->now);
540                         if (delay < base_delay) {
541                                 if (delay <= 0)
542                                         delay = 1;
543                                 base_delay = delay;
544                         }
545                 }
546
547                 q->wd_expires = base_delay;
548         }
549 }
550
551 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
552    they go overlimit
553  */
554
555 static void cbq_ovl_rclassic(struct cbq_class *cl)
556 {
557         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
558         struct cbq_class *this = cl;
559
560         do {
561                 if (cl->level > q->toplevel) {
562                         cl = NULL;
563                         break;
564                 }
565         } while ((cl = cl->borrow) != NULL);
566
567         if (cl == NULL)
568                 cl = this;
569         cbq_ovl_classic(cl);
570 }
571
572 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
573
574 static void cbq_ovl_delay(struct cbq_class *cl)
575 {
576         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
577         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
578
579         if (!cl->delayed) {
580                 unsigned long sched = jiffies;
581
582                 delay += cl->offtime;
583                 if (cl->avgidle < 0)
584                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
585                 if (cl->avgidle < cl->minidle)
586                         cl->avgidle = cl->minidle;
587                 PSCHED_TADD2(q->now, delay, cl->undertime);
588
589                 if (delay > 0) {
590                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
591                         cl->penalized = sched;
592                         cl->cpriority = TC_CBQ_MAXPRIO;
593                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
594                         if (del_timer(&q->delay_timer) &&
595                             (long)(q->delay_timer.expires - sched) > 0)
596                                 q->delay_timer.expires = sched;
597                         add_timer(&q->delay_timer);
598                         cl->delayed = 1;
599                         cl->xstats.overactions++;
600                         return;
601                 }
602                 delay = 1;
603         }
604         if (q->wd_expires == 0 || q->wd_expires > delay)
605                 q->wd_expires = delay;
606 }
607
608 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
609
610 static void cbq_ovl_lowprio(struct cbq_class *cl)
611 {
612         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
613
614         cl->penalized = jiffies + cl->penalty;
615
616         if (cl->cpriority != cl->priority2) {
617                 cl->cpriority = cl->priority2;
618                 q->pmask |= (1<<cl->cpriority);
619                 cl->xstats.overactions++;
620         }
621         cbq_ovl_classic(cl);
622 }
623
624 /* TC_CBQ_OVL_DROP: penalize class by dropping */
625
626 static void cbq_ovl_drop(struct cbq_class *cl)
627 {
628         if (cl->q->ops->drop)
629                 if (cl->q->ops->drop(cl->q))
630                         cl->qdisc->q.qlen--;
631         cl->xstats.overactions++;
632         cbq_ovl_classic(cl);
633 }
634
635 static void cbq_watchdog(unsigned long arg)
636 {
637         struct Qdisc *sch = (struct Qdisc*)arg;
638
639         sch->flags &= ~TCQ_F_THROTTLED;
640         netif_schedule(sch->dev);
641 }
642
643 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
644 {
645         struct cbq_class *cl;
646         struct cbq_class *cl_prev = q->active[prio];
647         unsigned long now = jiffies;
648         unsigned long sched = now;
649
650         if (cl_prev == NULL)
651                 return now;
652
653         do {
654                 cl = cl_prev->next_alive;
655                 if ((long)(now - cl->penalized) > 0) {
656                         cl_prev->next_alive = cl->next_alive;
657                         cl->next_alive = NULL;
658                         cl->cpriority = cl->priority;
659                         cl->delayed = 0;
660                         cbq_activate_class(cl);
661
662                         if (cl == q->active[prio]) {
663                                 q->active[prio] = cl_prev;
664                                 if (cl == q->active[prio]) {
665                                         q->active[prio] = NULL;
666                                         return 0;
667                                 }
668                         }
669
670                         cl = cl_prev->next_alive;
671                 } else if ((long)(sched - cl->penalized) > 0)
672                         sched = cl->penalized;
673         } while ((cl_prev = cl) != q->active[prio]);
674
675         return (long)(sched - now);
676 }
677
678 static void cbq_undelay(unsigned long arg)
679 {
680         struct Qdisc *sch = (struct Qdisc*)arg;
681         struct cbq_sched_data *q = qdisc_priv(sch);
682         long delay = 0;
683         unsigned pmask;
684
685         pmask = q->pmask;
686         q->pmask = 0;
687
688         while (pmask) {
689                 int prio = ffz(~pmask);
690                 long tmp;
691
692                 pmask &= ~(1<<prio);
693
694                 tmp = cbq_undelay_prio(q, prio);
695                 if (tmp > 0) {
696                         q->pmask |= 1<<prio;
697                         if (tmp < delay || delay == 0)
698                                 delay = tmp;
699                 }
700         }
701
702         if (delay) {
703                 q->delay_timer.expires = jiffies + delay;
704                 add_timer(&q->delay_timer);
705         }
706
707         sch->flags &= ~TCQ_F_THROTTLED;
708         netif_schedule(sch->dev);
709 }
710
711
712 #ifdef CONFIG_NET_CLS_POLICE
713
714 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
715 {
716         int len = skb->len;
717         struct Qdisc *sch = child->__parent;
718         struct cbq_sched_data *q = qdisc_priv(sch);
719         struct cbq_class *cl = q->rx_class;
720
721         q->rx_class = NULL;
722
723         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
724
725                 cbq_mark_toplevel(q, cl);
726
727                 q->rx_class = cl;
728                 cl->q->__parent = sch;
729
730                 if (cl->q->enqueue(skb, cl->q) == 0) {
731                         sch->q.qlen++;
732                         sch->stats.packets++;
733                         sch->stats.bytes+=len;
734                         if (!cl->next_alive)
735                                 cbq_activate_class(cl);
736                         return 0;
737                 }
738                 sch->stats.drops++;
739                 return 0;
740         }
741
742         sch->stats.drops++;
743         return -1;
744 }
745 #endif
746
747 /* 
748    It is mission critical procedure.
749
750    We "regenerate" toplevel cutoff, if transmitting class
751    has backlog and it is not regulated. It is not part of
752    original CBQ description, but looks more reasonable.
753    Probably, it is wrong. This question needs further investigation.
754 */
755
756 static __inline__ void
757 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
758                     struct cbq_class *borrowed)
759 {
760         if (cl && q->toplevel >= borrowed->level) {
761                 if (cl->q->q.qlen > 1) {
762                         do {
763                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
764                                         q->toplevel = borrowed->level;
765                                         return;
766                                 }
767                         } while ((borrowed=borrowed->borrow) != NULL);
768                 }
769 #if 0   
770         /* It is not necessary now. Uncommenting it
771            will save CPU cycles, but decrease fairness.
772          */
773                 q->toplevel = TC_CBQ_MAXLEVEL;
774 #endif
775         }
776 }
777
778 static void
779 cbq_update(struct cbq_sched_data *q)
780 {
781         struct cbq_class *this = q->tx_class;
782         struct cbq_class *cl = this;
783         int len = q->tx_len;
784
785         q->tx_class = NULL;
786
787         for ( ; cl; cl = cl->share) {
788                 long avgidle = cl->avgidle;
789                 long idle;
790
791                 cl->stats.packets++;
792                 cl->stats.bytes += len;
793
794                 /*
795                    (now - last) is total time between packet right edges.
796                    (last_pktlen/rate) is "virtual" busy time, so that
797
798                          idle = (now - last) - last_pktlen/rate
799                  */
800
801                 idle = PSCHED_TDIFF(q->now, cl->last);
802                 if ((unsigned long)idle > 128*1024*1024) {
803                         avgidle = cl->maxidle;
804                 } else {
805                         idle -= L2T(cl, len);
806
807                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
808                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
809                    cl->avgidle == true_avgidle/W,
810                    hence:
811                  */
812                         avgidle += idle - (avgidle>>cl->ewma_log);
813                 }
814
815                 if (avgidle <= 0) {
816                         /* Overlimit or at-limit */
817
818                         if (avgidle < cl->minidle)
819                                 avgidle = cl->minidle;
820
821                         cl->avgidle = avgidle;
822
823                         /* Calculate expected time, when this class
824                            will be allowed to send.
825                            It will occur, when:
826                            (1-W)*true_avgidle + W*delay = 0, i.e.
827                            idle = (1/W - 1)*(-true_avgidle)
828                            or
829                            idle = (1 - W)*(-cl->avgidle);
830                          */
831                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
832
833                         /*
834                            That is not all.
835                            To maintain the rate allocated to the class,
836                            we add to undertime virtual clock,
837                            necessary to complete transmitted packet.
838                            (len/phys_bandwidth has been already passed
839                            to the moment of cbq_update)
840                          */
841
842                         idle -= L2T(&q->link, len);
843                         idle += L2T(cl, len);
844
845                         PSCHED_AUDIT_TDIFF(idle);
846
847                         PSCHED_TADD2(q->now, idle, cl->undertime);
848                 } else {
849                         /* Underlimit */
850
851                         PSCHED_SET_PASTPERFECT(cl->undertime);
852                         if (avgidle > cl->maxidle)
853                                 cl->avgidle = cl->maxidle;
854                         else
855                                 cl->avgidle = avgidle;
856                 }
857                 cl->last = q->now;
858         }
859
860         cbq_update_toplevel(q, this, q->tx_borrowed);
861 }
862
863 static __inline__ struct cbq_class *
864 cbq_under_limit(struct cbq_class *cl)
865 {
866         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
867         struct cbq_class *this_cl = cl;
868
869         if (cl->tparent == NULL)
870                 return cl;
871
872         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
873             !PSCHED_TLESS(q->now, cl->undertime)) {
874                 cl->delayed = 0;
875                 return cl;
876         }
877
878         do {
879                 /* It is very suspicious place. Now overlimit
880                    action is generated for not bounded classes
881                    only if link is completely congested.
882                    Though it is in agree with ancestor-only paradigm,
883                    it looks very stupid. Particularly,
884                    it means that this chunk of code will either
885                    never be called or result in strong amplification
886                    of burstiness. Dangerous, silly, and, however,
887                    no another solution exists.
888                  */
889                 if ((cl = cl->borrow) == NULL) {
890                         this_cl->stats.overlimits++;
891                         this_cl->overlimit(this_cl);
892                         return NULL;
893                 }
894                 if (cl->level > q->toplevel)
895                         return NULL;
896         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
897                  PSCHED_TLESS(q->now, cl->undertime));
898
899         cl->delayed = 0;
900         return cl;
901 }
902
903 static __inline__ struct sk_buff *
904 cbq_dequeue_prio(struct Qdisc *sch, int prio)
905 {
906         struct cbq_sched_data *q = qdisc_priv(sch);
907         struct cbq_class *cl_tail, *cl_prev, *cl;
908         struct sk_buff *skb;
909         int deficit;
910
911         cl_tail = cl_prev = q->active[prio];
912         cl = cl_prev->next_alive;
913
914         do {
915                 deficit = 0;
916
917                 /* Start round */
918                 do {
919                         struct cbq_class *borrow = cl;
920
921                         if (cl->q->q.qlen &&
922                             (borrow = cbq_under_limit(cl)) == NULL)
923                                 goto skip_class;
924
925                         if (cl->deficit <= 0) {
926                                 /* Class exhausted its allotment per
927                                    this round. Switch to the next one.
928                                  */
929                                 deficit = 1;
930                                 cl->deficit += cl->quantum;
931                                 goto next_class;
932                         }
933
934                         skb = cl->q->dequeue(cl->q);
935
936                         /* Class did not give us any skb :-(
937                            It could occur even if cl->q->q.qlen != 0 
938                            f.e. if cl->q == "tbf"
939                          */
940                         if (skb == NULL)
941                                 goto skip_class;
942
943                         cl->deficit -= skb->len;
944                         q->tx_class = cl;
945                         q->tx_borrowed = borrow;
946                         if (borrow != cl) {
947 #ifndef CBQ_XSTATS_BORROWS_BYTES
948                                 borrow->xstats.borrows++;
949                                 cl->xstats.borrows++;
950 #else
951                                 borrow->xstats.borrows += skb->len;
952                                 cl->xstats.borrows += skb->len;
953 #endif
954                         }
955                         q->tx_len = skb->len;
956
957                         if (cl->deficit <= 0) {
958                                 q->active[prio] = cl;
959                                 cl = cl->next_alive;
960                                 cl->deficit += cl->quantum;
961                         }
962                         return skb;
963
964 skip_class:
965                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
966                                 /* Class is empty or penalized.
967                                    Unlink it from active chain.
968                                  */
969                                 cl_prev->next_alive = cl->next_alive;
970                                 cl->next_alive = NULL;
971
972                                 /* Did cl_tail point to it? */
973                                 if (cl == cl_tail) {
974                                         /* Repair it! */
975                                         cl_tail = cl_prev;
976
977                                         /* Was it the last class in this band? */
978                                         if (cl == cl_tail) {
979                                                 /* Kill the band! */
980                                                 q->active[prio] = NULL;
981                                                 q->activemask &= ~(1<<prio);
982                                                 if (cl->q->q.qlen)
983                                                         cbq_activate_class(cl);
984                                                 return NULL;
985                                         }
986
987                                         q->active[prio] = cl_tail;
988                                 }
989                                 if (cl->q->q.qlen)
990                                         cbq_activate_class(cl);
991
992                                 cl = cl_prev;
993                         }
994
995 next_class:
996                         cl_prev = cl;
997                         cl = cl->next_alive;
998                 } while (cl_prev != cl_tail);
999         } while (deficit);
1000
1001         q->active[prio] = cl_prev;
1002
1003         return NULL;
1004 }
1005
1006 static __inline__ struct sk_buff *
1007 cbq_dequeue_1(struct Qdisc *sch)
1008 {
1009         struct cbq_sched_data *q = qdisc_priv(sch);
1010         struct sk_buff *skb;
1011         unsigned activemask;
1012
1013         activemask = q->activemask&0xFF;
1014         while (activemask) {
1015                 int prio = ffz(~activemask);
1016                 activemask &= ~(1<<prio);
1017                 skb = cbq_dequeue_prio(sch, prio);
1018                 if (skb)
1019                         return skb;
1020         }
1021         return NULL;
1022 }
1023
1024 static struct sk_buff *
1025 cbq_dequeue(struct Qdisc *sch)
1026 {
1027         struct sk_buff *skb;
1028         struct cbq_sched_data *q = qdisc_priv(sch);
1029         psched_time_t now;
1030         psched_tdiff_t incr;
1031
1032         PSCHED_GET_TIME(now);
1033         incr = PSCHED_TDIFF(now, q->now_rt);
1034
1035         if (q->tx_class) {
1036                 psched_tdiff_t incr2;
1037                 /* Time integrator. We calculate EOS time
1038                    by adding expected packet transmission time.
1039                    If real time is greater, we warp artificial clock,
1040                    so that:
1041
1042                    cbq_time = max(real_time, work);
1043                  */
1044                 incr2 = L2T(&q->link, q->tx_len);
1045                 PSCHED_TADD(q->now, incr2);
1046                 cbq_update(q);
1047                 if ((incr -= incr2) < 0)
1048                         incr = 0;
1049         }
1050         PSCHED_TADD(q->now, incr);
1051         q->now_rt = now;
1052
1053         for (;;) {
1054                 q->wd_expires = 0;
1055
1056                 skb = cbq_dequeue_1(sch);
1057                 if (skb) {
1058                         sch->q.qlen--;
1059                         sch->flags &= ~TCQ_F_THROTTLED;
1060                         return skb;
1061                 }
1062
1063                 /* All the classes are overlimit.
1064
1065                    It is possible, if:
1066
1067                    1. Scheduler is empty.
1068                    2. Toplevel cutoff inhibited borrowing.
1069                    3. Root class is overlimit.
1070
1071                    Reset 2d and 3d conditions and retry.
1072
1073                    Note, that NS and cbq-2.0 are buggy, peeking
1074                    an arbitrary class is appropriate for ancestor-only
1075                    sharing, but not for toplevel algorithm.
1076
1077                    Our version is better, but slower, because it requires
1078                    two passes, but it is unavoidable with top-level sharing.
1079                 */
1080
1081                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1082                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1083                         break;
1084
1085                 q->toplevel = TC_CBQ_MAXLEVEL;
1086                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1087         }
1088
1089         /* No packets in scheduler or nobody wants to give them to us :-(
1090            Sigh... start watchdog timer in the last case. */
1091
1092         if (sch->q.qlen) {
1093                 sch->stats.overlimits++;
1094                 if (q->wd_expires) {
1095                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1096                         if (delay <= 0)
1097                                 delay = 1;
1098                         mod_timer(&q->wd_timer, jiffies + delay);
1099                         sch->flags |= TCQ_F_THROTTLED;
1100                 }
1101         }
1102         return NULL;
1103 }
1104
1105 /* CBQ class maintanance routines */
1106
1107 static void cbq_adjust_levels(struct cbq_class *this)
1108 {
1109         if (this == NULL)
1110                 return;
1111
1112         do {
1113                 int level = 0;
1114                 struct cbq_class *cl;
1115
1116                 if ((cl = this->children) != NULL) {
1117                         do {
1118                                 if (cl->level > level)
1119                                         level = cl->level;
1120                         } while ((cl = cl->sibling) != this->children);
1121                 }
1122                 this->level = level+1;
1123         } while ((this = this->tparent) != NULL);
1124 }
1125
1126 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1127 {
1128         struct cbq_class *cl;
1129         unsigned h;
1130
1131         if (q->quanta[prio] == 0)
1132                 return;
1133
1134         for (h=0; h<16; h++) {
1135                 for (cl = q->classes[h]; cl; cl = cl->next) {
1136                         /* BUGGGG... Beware! This expression suffer of
1137                            arithmetic overflows!
1138                          */
1139                         if (cl->priority == prio) {
1140                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1141                                         q->quanta[prio];
1142                         }
1143                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1144                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1145                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1146                         }
1147                 }
1148         }
1149 }
1150
1151 static void cbq_sync_defmap(struct cbq_class *cl)
1152 {
1153         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1154         struct cbq_class *split = cl->split;
1155         unsigned h;
1156         int i;
1157
1158         if (split == NULL)
1159                 return;
1160
1161         for (i=0; i<=TC_PRIO_MAX; i++) {
1162                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1163                         split->defaults[i] = NULL;
1164         }
1165
1166         for (i=0; i<=TC_PRIO_MAX; i++) {
1167                 int level = split->level;
1168
1169                 if (split->defaults[i])
1170                         continue;
1171
1172                 for (h=0; h<16; h++) {
1173                         struct cbq_class *c;
1174
1175                         for (c = q->classes[h]; c; c = c->next) {
1176                                 if (c->split == split && c->level < level &&
1177                                     c->defmap&(1<<i)) {
1178                                         split->defaults[i] = c;
1179                                         level = c->level;
1180                                 }
1181                         }
1182                 }
1183         }
1184 }
1185
1186 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1187 {
1188         struct cbq_class *split = NULL;
1189
1190         if (splitid == 0) {
1191                 if ((split = cl->split) == NULL)
1192                         return;
1193                 splitid = split->classid;
1194         }
1195
1196         if (split == NULL || split->classid != splitid) {
1197                 for (split = cl->tparent; split; split = split->tparent)
1198                         if (split->classid == splitid)
1199                                 break;
1200         }
1201
1202         if (split == NULL)
1203                 return;
1204
1205         if (cl->split != split) {
1206                 cl->defmap = 0;
1207                 cbq_sync_defmap(cl);
1208                 cl->split = split;
1209                 cl->defmap = def&mask;
1210         } else
1211                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1212
1213         cbq_sync_defmap(cl);
1214 }
1215
1216 static void cbq_unlink_class(struct cbq_class *this)
1217 {
1218         struct cbq_class *cl, **clp;
1219         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1220
1221         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1222                 if (cl == this) {
1223                         *clp = cl->next;
1224                         cl->next = NULL;
1225                         break;
1226                 }
1227         }
1228
1229         if (this->tparent) {
1230                 clp=&this->sibling;
1231                 cl = *clp;
1232                 do {
1233                         if (cl == this) {
1234                                 *clp = cl->sibling;
1235                                 break;
1236                         }
1237                         clp = &cl->sibling;
1238                 } while ((cl = *clp) != this->sibling);
1239
1240                 if (this->tparent->children == this) {
1241                         this->tparent->children = this->sibling;
1242                         if (this->sibling == this)
1243                                 this->tparent->children = NULL;
1244                 }
1245         } else {
1246                 BUG_TRAP(this->sibling == this);
1247         }
1248 }
1249
1250 static void cbq_link_class(struct cbq_class *this)
1251 {
1252         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1253         unsigned h = cbq_hash(this->classid);
1254         struct cbq_class *parent = this->tparent;
1255
1256         this->sibling = this;
1257         this->next = q->classes[h];
1258         q->classes[h] = this;
1259
1260         if (parent == NULL)
1261                 return;
1262
1263         if (parent->children == NULL) {
1264                 parent->children = this;
1265         } else {
1266                 this->sibling = parent->children->sibling;
1267                 parent->children->sibling = this;
1268         }
1269 }
1270
1271 static unsigned int cbq_drop(struct Qdisc* sch)
1272 {
1273         struct cbq_sched_data *q = qdisc_priv(sch);
1274         struct cbq_class *cl, *cl_head;
1275         int prio;
1276         unsigned int len;
1277
1278         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1279                 if ((cl_head = q->active[prio]) == NULL)
1280                         continue;
1281
1282                 cl = cl_head;
1283                 do {
1284                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1285                                 sch->q.qlen--;
1286                                 return len;
1287                         }
1288                 } while ((cl = cl->next_alive) != cl_head);
1289         }
1290         return 0;
1291 }
1292
1293 static void
1294 cbq_reset(struct Qdisc* sch)
1295 {
1296         struct cbq_sched_data *q = qdisc_priv(sch);
1297         struct cbq_class *cl;
1298         int prio;
1299         unsigned h;
1300
1301         q->activemask = 0;
1302         q->pmask = 0;
1303         q->tx_class = NULL;
1304         q->tx_borrowed = NULL;
1305         del_timer(&q->wd_timer);
1306         del_timer(&q->delay_timer);
1307         q->toplevel = TC_CBQ_MAXLEVEL;
1308         PSCHED_GET_TIME(q->now);
1309         q->now_rt = q->now;
1310
1311         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1312                 q->active[prio] = NULL;
1313
1314         for (h = 0; h < 16; h++) {
1315                 for (cl = q->classes[h]; cl; cl = cl->next) {
1316                         qdisc_reset(cl->q);
1317
1318                         cl->next_alive = NULL;
1319                         PSCHED_SET_PASTPERFECT(cl->undertime);
1320                         cl->avgidle = cl->maxidle;
1321                         cl->deficit = cl->quantum;
1322                         cl->cpriority = cl->priority;
1323                 }
1324         }
1325         sch->q.qlen = 0;
1326 }
1327
1328
1329 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1330 {
1331         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1332                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1333                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1334         }
1335         if (lss->change&TCF_CBQ_LSS_EWMA)
1336                 cl->ewma_log = lss->ewma_log;
1337         if (lss->change&TCF_CBQ_LSS_AVPKT)
1338                 cl->avpkt = lss->avpkt;
1339         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1340                 cl->minidle = -(long)lss->minidle;
1341         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1342                 cl->maxidle = lss->maxidle;
1343                 cl->avgidle = lss->maxidle;
1344         }
1345         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1346                 cl->offtime = lss->offtime;
1347         return 0;
1348 }
1349
1350 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1351 {
1352         q->nclasses[cl->priority]--;
1353         q->quanta[cl->priority] -= cl->weight;
1354         cbq_normalize_quanta(q, cl->priority);
1355 }
1356
1357 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1358 {
1359         q->nclasses[cl->priority]++;
1360         q->quanta[cl->priority] += cl->weight;
1361         cbq_normalize_quanta(q, cl->priority);
1362 }
1363
1364 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1365 {
1366         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1367
1368         if (wrr->allot)
1369                 cl->allot = wrr->allot;
1370         if (wrr->weight)
1371                 cl->weight = wrr->weight;
1372         if (wrr->priority) {
1373                 cl->priority = wrr->priority-1;
1374                 cl->cpriority = cl->priority;
1375                 if (cl->priority >= cl->priority2)
1376                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1377         }
1378
1379         cbq_addprio(q, cl);
1380         return 0;
1381 }
1382
1383 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1384 {
1385         switch (ovl->strategy) {
1386         case TC_CBQ_OVL_CLASSIC:
1387                 cl->overlimit = cbq_ovl_classic;
1388                 break;
1389         case TC_CBQ_OVL_DELAY:
1390                 cl->overlimit = cbq_ovl_delay;
1391                 break;
1392         case TC_CBQ_OVL_LOWPRIO:
1393                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1394                     ovl->priority2-1 <= cl->priority)
1395                         return -EINVAL;
1396                 cl->priority2 = ovl->priority2-1;
1397                 cl->overlimit = cbq_ovl_lowprio;
1398                 break;
1399         case TC_CBQ_OVL_DROP:
1400                 cl->overlimit = cbq_ovl_drop;
1401                 break;
1402         case TC_CBQ_OVL_RCLASSIC:
1403                 cl->overlimit = cbq_ovl_rclassic;
1404                 break;
1405         default:
1406                 return -EINVAL;
1407         }
1408         cl->penalty = (ovl->penalty*HZ)/1000;
1409         return 0;
1410 }
1411
1412 #ifdef CONFIG_NET_CLS_POLICE
1413 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1414 {
1415         cl->police = p->police;
1416
1417         if (cl->q->handle) {
1418                 if (p->police == TC_POLICE_RECLASSIFY)
1419                         cl->q->reshape_fail = cbq_reshape_fail;
1420                 else
1421                         cl->q->reshape_fail = NULL;
1422         }
1423         return 0;
1424 }
1425 #endif
1426
1427 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1428 {
1429         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1430         return 0;
1431 }
1432
1433 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1434 {
1435         struct cbq_sched_data *q = qdisc_priv(sch);
1436         struct rtattr *tb[TCA_CBQ_MAX];
1437         struct tc_ratespec *r;
1438
1439         if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1440             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1441             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1442                 return -EINVAL;
1443
1444         if (tb[TCA_CBQ_LSSOPT-1] &&
1445             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1446                 return -EINVAL;
1447
1448         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1449
1450         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1451                 return -EINVAL;
1452
1453         q->link.refcnt = 1;
1454         q->link.sibling = &q->link;
1455         q->link.classid = sch->handle;
1456         q->link.qdisc = sch;
1457         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1458                 q->link.q = &noop_qdisc;
1459
1460         q->link.priority = TC_CBQ_MAXPRIO-1;
1461         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1462         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1463         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1464         q->link.overlimit = cbq_ovl_classic;
1465         q->link.allot = psched_mtu(sch->dev);
1466         q->link.quantum = q->link.allot;
1467         q->link.weight = q->link.R_tab->rate.rate;
1468
1469         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1470         q->link.avpkt = q->link.allot/2;
1471         q->link.minidle = -0x7FFFFFFF;
1472         q->link.stats_lock = &sch->dev->queue_lock;
1473
1474         init_timer(&q->wd_timer);
1475         q->wd_timer.data = (unsigned long)sch;
1476         q->wd_timer.function = cbq_watchdog;
1477         init_timer(&q->delay_timer);
1478         q->delay_timer.data = (unsigned long)sch;
1479         q->delay_timer.function = cbq_undelay;
1480         q->toplevel = TC_CBQ_MAXLEVEL;
1481         PSCHED_GET_TIME(q->now);
1482         q->now_rt = q->now;
1483
1484         cbq_link_class(&q->link);
1485
1486         if (tb[TCA_CBQ_LSSOPT-1])
1487                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1488
1489         cbq_addprio(q, &q->link);
1490         return 0;
1491 }
1492
1493 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1494 {
1495         unsigned char    *b = skb->tail;
1496
1497         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1498         return skb->len;
1499
1500 rtattr_failure:
1501         skb_trim(skb, b - skb->data);
1502         return -1;
1503 }
1504
1505 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1506 {
1507         unsigned char    *b = skb->tail;
1508         struct tc_cbq_lssopt opt;
1509
1510         opt.flags = 0;
1511         if (cl->borrow == NULL)
1512                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1513         if (cl->share == NULL)
1514                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1515         opt.ewma_log = cl->ewma_log;
1516         opt.level = cl->level;
1517         opt.avpkt = cl->avpkt;
1518         opt.maxidle = cl->maxidle;
1519         opt.minidle = (u32)(-cl->minidle);
1520         opt.offtime = cl->offtime;
1521         opt.change = ~0;
1522         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1523         return skb->len;
1524
1525 rtattr_failure:
1526         skb_trim(skb, b - skb->data);
1527         return -1;
1528 }
1529
1530 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1531 {
1532         unsigned char    *b = skb->tail;
1533         struct tc_cbq_wrropt opt;
1534
1535         opt.flags = 0;
1536         opt.allot = cl->allot;
1537         opt.priority = cl->priority+1;
1538         opt.cpriority = cl->cpriority+1;
1539         opt.weight = cl->weight;
1540         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1541         return skb->len;
1542
1543 rtattr_failure:
1544         skb_trim(skb, b - skb->data);
1545         return -1;
1546 }
1547
1548 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1549 {
1550         unsigned char    *b = skb->tail;
1551         struct tc_cbq_ovl opt;
1552
1553         opt.strategy = cl->ovl_strategy;
1554         opt.priority2 = cl->priority2+1;
1555         opt.penalty = (cl->penalty*1000)/HZ;
1556         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1557         return skb->len;
1558
1559 rtattr_failure:
1560         skb_trim(skb, b - skb->data);
1561         return -1;
1562 }
1563
1564 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1565 {
1566         unsigned char    *b = skb->tail;
1567         struct tc_cbq_fopt opt;
1568
1569         if (cl->split || cl->defmap) {
1570                 opt.split = cl->split ? cl->split->classid : 0;
1571                 opt.defmap = cl->defmap;
1572                 opt.defchange = ~0;
1573                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1574         }
1575         return skb->len;
1576
1577 rtattr_failure:
1578         skb_trim(skb, b - skb->data);
1579         return -1;
1580 }
1581
1582 #ifdef CONFIG_NET_CLS_POLICE
1583 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1584 {
1585         unsigned char    *b = skb->tail;
1586         struct tc_cbq_police opt;
1587
1588         if (cl->police) {
1589                 opt.police = cl->police;
1590                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1591         }
1592         return skb->len;
1593
1594 rtattr_failure:
1595         skb_trim(skb, b - skb->data);
1596         return -1;
1597 }
1598 #endif
1599
1600 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1601 {
1602         if (cbq_dump_lss(skb, cl) < 0 ||
1603             cbq_dump_rate(skb, cl) < 0 ||
1604             cbq_dump_wrr(skb, cl) < 0 ||
1605             cbq_dump_ovl(skb, cl) < 0 ||
1606 #ifdef CONFIG_NET_CLS_POLICE
1607             cbq_dump_police(skb, cl) < 0 ||
1608 #endif
1609             cbq_dump_fopt(skb, cl) < 0)
1610                 return -1;
1611         return 0;
1612 }
1613
1614 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1615 {
1616         RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1617         return 0;
1618
1619 rtattr_failure:
1620         return -1;
1621 }
1622
1623
1624 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1625 {
1626         struct cbq_sched_data *q = qdisc_priv(sch);
1627         unsigned char    *b = skb->tail;
1628         struct rtattr *rta;
1629
1630         rta = (struct rtattr*)b;
1631         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1632         if (cbq_dump_attr(skb, &q->link) < 0)
1633                 goto rtattr_failure;
1634         rta->rta_len = skb->tail - b;
1635         spin_lock_bh(&sch->dev->queue_lock);
1636         q->link.xstats.avgidle = q->link.avgidle;
1637         if (cbq_copy_xstats(skb, &q->link.xstats)) {
1638                 spin_unlock_bh(&sch->dev->queue_lock);
1639                 goto rtattr_failure;
1640         }
1641         spin_unlock_bh(&sch->dev->queue_lock);
1642         return skb->len;
1643
1644 rtattr_failure:
1645         skb_trim(skb, b - skb->data);
1646         return -1;
1647 }
1648
1649 static int
1650 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1651                struct sk_buff *skb, struct tcmsg *tcm)
1652 {
1653         struct cbq_sched_data *q = qdisc_priv(sch);
1654         struct cbq_class *cl = (struct cbq_class*)arg;
1655         unsigned char    *b = skb->tail;
1656         struct rtattr *rta;
1657
1658         if (cl->tparent)
1659                 tcm->tcm_parent = cl->tparent->classid;
1660         else
1661                 tcm->tcm_parent = TC_H_ROOT;
1662         tcm->tcm_handle = cl->classid;
1663         tcm->tcm_info = cl->q->handle;
1664
1665         rta = (struct rtattr*)b;
1666         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1667         if (cbq_dump_attr(skb, cl) < 0)
1668                 goto rtattr_failure;
1669         rta->rta_len = skb->tail - b;
1670         cl->stats.qlen = cl->q->q.qlen;
1671         if (qdisc_copy_stats(skb, &cl->stats, cl->stats_lock))
1672                 goto rtattr_failure;
1673         spin_lock_bh(&sch->dev->queue_lock);
1674         cl->xstats.avgidle = cl->avgidle;
1675         cl->xstats.undertime = 0;
1676         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1677                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1678         q->link.xstats.avgidle = q->link.avgidle;
1679         if (cbq_copy_xstats(skb, &cl->xstats)) {
1680                 spin_unlock_bh(&sch->dev->queue_lock);
1681                 goto rtattr_failure;
1682         }
1683         spin_unlock_bh(&sch->dev->queue_lock);
1684
1685         return skb->len;
1686
1687 rtattr_failure:
1688         skb_trim(skb, b - skb->data);
1689         return -1;
1690 }
1691
1692 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1693                      struct Qdisc **old)
1694 {
1695         struct cbq_class *cl = (struct cbq_class*)arg;
1696
1697         if (cl) {
1698                 if (new == NULL) {
1699                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1700                                 return -ENOBUFS;
1701                 } else {
1702 #ifdef CONFIG_NET_CLS_POLICE
1703                         if (cl->police == TC_POLICE_RECLASSIFY)
1704                                 new->reshape_fail = cbq_reshape_fail;
1705 #endif
1706                 }
1707                 sch_tree_lock(sch);
1708                 *old = cl->q;
1709                 cl->q = new;
1710                 sch->q.qlen -= (*old)->q.qlen;
1711                 qdisc_reset(*old);
1712                 sch_tree_unlock(sch);
1713
1714                 return 0;
1715         }
1716         return -ENOENT;
1717 }
1718
1719 static struct Qdisc *
1720 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1721 {
1722         struct cbq_class *cl = (struct cbq_class*)arg;
1723
1724         return cl ? cl->q : NULL;
1725 }
1726
1727 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1728 {
1729         struct cbq_sched_data *q = qdisc_priv(sch);
1730         struct cbq_class *cl = cbq_class_lookup(q, classid);
1731
1732         if (cl) {
1733                 cl->refcnt++;
1734                 return (unsigned long)cl;
1735         }
1736         return 0;
1737 }
1738
1739 static void cbq_destroy_filters(struct cbq_class *cl)
1740 {
1741         struct tcf_proto *tp;
1742
1743         while ((tp = cl->filter_list) != NULL) {
1744                 cl->filter_list = tp->next;
1745                 tcf_destroy(tp);
1746         }
1747 }
1748
1749 static void cbq_destroy_class(struct cbq_class *cl)
1750 {
1751         cbq_destroy_filters(cl);
1752         qdisc_destroy(cl->q);
1753         qdisc_put_rtab(cl->R_tab);
1754 #ifdef CONFIG_NET_ESTIMATOR
1755         qdisc_kill_estimator(&cl->stats);
1756 #endif
1757         kfree(cl);
1758 }
1759
1760 static void
1761 cbq_destroy(struct Qdisc* sch)
1762 {
1763         struct cbq_sched_data *q = qdisc_priv(sch);
1764         struct cbq_class *cl;
1765         unsigned h;
1766
1767 #ifdef CONFIG_NET_CLS_POLICE
1768         q->rx_class = NULL;
1769 #endif
1770         for (h = 0; h < 16; h++) {
1771                 for (cl = q->classes[h]; cl; cl = cl->next)
1772                         cbq_destroy_filters(cl);
1773         }
1774
1775         for (h = 0; h < 16; h++) {
1776                 struct cbq_class *next;
1777
1778                 for (cl = q->classes[h]; cl; cl = next) {
1779                         next = cl->next;
1780                         if (cl != &q->link)
1781                                 cbq_destroy_class(cl);
1782                 }
1783         }
1784
1785         qdisc_put_rtab(q->link.R_tab);
1786 }
1787
1788 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1789 {
1790         struct cbq_class *cl = (struct cbq_class*)arg;
1791
1792         if (--cl->refcnt == 0) {
1793 #ifdef CONFIG_NET_CLS_POLICE
1794                 struct cbq_sched_data *q = qdisc_priv(sch);
1795
1796                 spin_lock_bh(&sch->dev->queue_lock);
1797                 if (q->rx_class == cl)
1798                         q->rx_class = NULL;
1799                 spin_unlock_bh(&sch->dev->queue_lock);
1800 #endif
1801
1802                 cbq_destroy_class(cl);
1803         }
1804 }
1805
1806 static int
1807 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1808                  unsigned long *arg)
1809 {
1810         int err;
1811         struct cbq_sched_data *q = qdisc_priv(sch);
1812         struct cbq_class *cl = (struct cbq_class*)*arg;
1813         struct rtattr *opt = tca[TCA_OPTIONS-1];
1814         struct rtattr *tb[TCA_CBQ_MAX];
1815         struct cbq_class *parent;
1816         struct qdisc_rate_table *rtab = NULL;
1817
1818         if (opt==NULL ||
1819             rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1820                 return -EINVAL;
1821
1822         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1823             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1824                 return -EINVAL;
1825
1826         if (tb[TCA_CBQ_FOPT-1] &&
1827             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1828                 return -EINVAL;
1829
1830         if (tb[TCA_CBQ_RATE-1] &&
1831             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1832                         return -EINVAL;
1833
1834         if (tb[TCA_CBQ_LSSOPT-1] &&
1835             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1836                         return -EINVAL;
1837
1838         if (tb[TCA_CBQ_WRROPT-1] &&
1839             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1840                         return -EINVAL;
1841
1842 #ifdef CONFIG_NET_CLS_POLICE
1843         if (tb[TCA_CBQ_POLICE-1] &&
1844             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1845                         return -EINVAL;
1846 #endif
1847
1848         if (cl) {
1849                 /* Check parent */
1850                 if (parentid) {
1851                         if (cl->tparent && cl->tparent->classid != parentid)
1852                                 return -EINVAL;
1853                         if (!cl->tparent && parentid != TC_H_ROOT)
1854                                 return -EINVAL;
1855                 }
1856
1857                 if (tb[TCA_CBQ_RATE-1]) {
1858                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1859                         if (rtab == NULL)
1860                                 return -EINVAL;
1861                 }
1862
1863                 /* Change class parameters */
1864                 sch_tree_lock(sch);
1865
1866                 if (cl->next_alive != NULL)
1867                         cbq_deactivate_class(cl);
1868
1869                 if (rtab) {
1870                         rtab = xchg(&cl->R_tab, rtab);
1871                         qdisc_put_rtab(rtab);
1872                 }
1873
1874                 if (tb[TCA_CBQ_LSSOPT-1])
1875                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1876
1877                 if (tb[TCA_CBQ_WRROPT-1]) {
1878                         cbq_rmprio(q, cl);
1879                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1880                 }
1881
1882                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1883                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1884
1885 #ifdef CONFIG_NET_CLS_POLICE
1886                 if (tb[TCA_CBQ_POLICE-1])
1887                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1888 #endif
1889
1890                 if (tb[TCA_CBQ_FOPT-1])
1891                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1892
1893                 if (cl->q->q.qlen)
1894                         cbq_activate_class(cl);
1895
1896                 sch_tree_unlock(sch);
1897
1898 #ifdef CONFIG_NET_ESTIMATOR
1899                 if (tca[TCA_RATE-1]) {
1900                         qdisc_kill_estimator(&cl->stats);
1901                         qdisc_new_estimator(&cl->stats, cl->stats_lock,
1902                                             tca[TCA_RATE-1]);
1903                 }
1904 #endif
1905                 return 0;
1906         }
1907
1908         if (parentid == TC_H_ROOT)
1909                 return -EINVAL;
1910
1911         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1912             tb[TCA_CBQ_LSSOPT-1] == NULL)
1913                 return -EINVAL;
1914
1915         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1916         if (rtab == NULL)
1917                 return -EINVAL;
1918
1919         if (classid) {
1920                 err = -EINVAL;
1921                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1922                         goto failure;
1923         } else {
1924                 int i;
1925                 classid = TC_H_MAKE(sch->handle,0x8000);
1926
1927                 for (i=0; i<0x8000; i++) {
1928                         if (++q->hgenerator >= 0x8000)
1929                                 q->hgenerator = 1;
1930                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1931                                 break;
1932                 }
1933                 err = -ENOSR;
1934                 if (i >= 0x8000)
1935                         goto failure;
1936                 classid = classid|q->hgenerator;
1937         }
1938
1939         parent = &q->link;
1940         if (parentid) {
1941                 parent = cbq_class_lookup(q, parentid);
1942                 err = -EINVAL;
1943                 if (parent == NULL)
1944                         goto failure;
1945         }
1946
1947         err = -ENOBUFS;
1948         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1949         if (cl == NULL)
1950                 goto failure;
1951         memset(cl, 0, sizeof(*cl));
1952         cl->R_tab = rtab;
1953         rtab = NULL;
1954         cl->refcnt = 1;
1955         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1956                 cl->q = &noop_qdisc;
1957         cl->classid = classid;
1958         cl->tparent = parent;
1959         cl->qdisc = sch;
1960         cl->allot = parent->allot;
1961         cl->quantum = cl->allot;
1962         cl->weight = cl->R_tab->rate.rate;
1963         cl->stats_lock = &sch->dev->queue_lock;
1964
1965         sch_tree_lock(sch);
1966         cbq_link_class(cl);
1967         cl->borrow = cl->tparent;
1968         if (cl->tparent != &q->link)
1969                 cl->share = cl->tparent;
1970         cbq_adjust_levels(parent);
1971         cl->minidle = -0x7FFFFFFF;
1972         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1973         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1974         if (cl->ewma_log==0)
1975                 cl->ewma_log = q->link.ewma_log;
1976         if (cl->maxidle==0)
1977                 cl->maxidle = q->link.maxidle;
1978         if (cl->avpkt==0)
1979                 cl->avpkt = q->link.avpkt;
1980         cl->overlimit = cbq_ovl_classic;
1981         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1982                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1983 #ifdef CONFIG_NET_CLS_POLICE
1984         if (tb[TCA_CBQ_POLICE-1])
1985                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1986 #endif
1987         if (tb[TCA_CBQ_FOPT-1])
1988                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1989         sch_tree_unlock(sch);
1990
1991 #ifdef CONFIG_NET_ESTIMATOR
1992         if (tca[TCA_RATE-1])
1993                 qdisc_new_estimator(&cl->stats, cl->stats_lock,
1994                                     tca[TCA_RATE-1]);
1995 #endif
1996
1997         *arg = (unsigned long)cl;
1998         return 0;
1999
2000 failure:
2001         qdisc_put_rtab(rtab);
2002         return err;
2003 }
2004
2005 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
2006 {
2007         struct cbq_sched_data *q = qdisc_priv(sch);
2008         struct cbq_class *cl = (struct cbq_class*)arg;
2009
2010         if (cl->filters || cl->children || cl == &q->link)
2011                 return -EBUSY;
2012
2013         sch_tree_lock(sch);
2014
2015         if (cl->next_alive)
2016                 cbq_deactivate_class(cl);
2017
2018         if (q->tx_borrowed == cl)
2019                 q->tx_borrowed = q->tx_class;
2020         if (q->tx_class == cl) {
2021                 q->tx_class = NULL;
2022                 q->tx_borrowed = NULL;
2023         }
2024 #ifdef CONFIG_NET_CLS_POLICE
2025         if (q->rx_class == cl)
2026                 q->rx_class = NULL;
2027 #endif
2028
2029         cbq_unlink_class(cl);
2030         cbq_adjust_levels(cl->tparent);
2031         cl->defmap = 0;
2032         cbq_sync_defmap(cl);
2033
2034         cbq_rmprio(q, cl);
2035         sch_tree_unlock(sch);
2036
2037         if (--cl->refcnt == 0)
2038                 cbq_destroy_class(cl);
2039
2040         return 0;
2041 }
2042
2043 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2044 {
2045         struct cbq_sched_data *q = qdisc_priv(sch);
2046         struct cbq_class *cl = (struct cbq_class *)arg;
2047
2048         if (cl == NULL)
2049                 cl = &q->link;
2050
2051         return &cl->filter_list;
2052 }
2053
2054 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2055                                      u32 classid)
2056 {
2057         struct cbq_sched_data *q = qdisc_priv(sch);
2058         struct cbq_class *p = (struct cbq_class*)parent;
2059         struct cbq_class *cl = cbq_class_lookup(q, classid);
2060
2061         if (cl) {
2062                 if (p && p->level <= cl->level)
2063                         return 0;
2064                 cl->filters++;
2065                 return (unsigned long)cl;
2066         }
2067         return 0;
2068 }
2069
2070 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2071 {
2072         struct cbq_class *cl = (struct cbq_class*)arg;
2073
2074         cl->filters--;
2075 }
2076
2077 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2078 {
2079         struct cbq_sched_data *q = qdisc_priv(sch);
2080         unsigned h;
2081
2082         if (arg->stop)
2083                 return;
2084
2085         for (h = 0; h < 16; h++) {
2086                 struct cbq_class *cl;
2087
2088                 for (cl = q->classes[h]; cl; cl = cl->next) {
2089                         if (arg->count < arg->skip) {
2090                                 arg->count++;
2091                                 continue;
2092                         }
2093                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2094                                 arg->stop = 1;
2095                                 return;
2096                         }
2097                         arg->count++;
2098                 }
2099         }
2100 }
2101
2102 static struct Qdisc_class_ops cbq_class_ops = {
2103         .graft          =       cbq_graft,
2104         .leaf           =       cbq_leaf,
2105         .get            =       cbq_get,
2106         .put            =       cbq_put,
2107         .change         =       cbq_change_class,
2108         .delete         =       cbq_delete,
2109         .walk           =       cbq_walk,
2110         .tcf_chain      =       cbq_find_tcf,
2111         .bind_tcf       =       cbq_bind_filter,
2112         .unbind_tcf     =       cbq_unbind_filter,
2113         .dump           =       cbq_dump_class,
2114 };
2115
2116 static struct Qdisc_ops cbq_qdisc_ops = {
2117         .next           =       NULL,
2118         .cl_ops         =       &cbq_class_ops,
2119         .id             =       "cbq",
2120         .priv_size      =       sizeof(struct cbq_sched_data),
2121         .enqueue        =       cbq_enqueue,
2122         .dequeue        =       cbq_dequeue,
2123         .requeue        =       cbq_requeue,
2124         .drop           =       cbq_drop,
2125         .init           =       cbq_init,
2126         .reset          =       cbq_reset,
2127         .destroy        =       cbq_destroy,
2128         .change         =       NULL,
2129         .dump           =       cbq_dump,
2130         .owner          =       THIS_MODULE,
2131 };
2132
2133 static int __init cbq_module_init(void)
2134 {
2135         return register_qdisc(&cbq_qdisc_ops);
2136 }
2137 static void __exit cbq_module_exit(void) 
2138 {
2139         unregister_qdisc(&cbq_qdisc_ops);
2140 }
2141 module_init(cbq_module_init)
2142 module_exit(cbq_module_exit)
2143 MODULE_LICENSE("GPL");