vserver 1.9.5.x5
[linux-2.6.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /*      Class-Based Queueing (CBQ) algorithm.
41         =======================================
42
43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44                  Management Models for Packet Networks",
45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48
49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50                  Parameters", 1996
51
52                  [4] Sally Floyd and Michael Speer, "Experimental Results
53                  for Class-Based Queueing", 1998, not published.
54
55         -----------------------------------------------------------------------
56
57         Algorithm skeleton was taken from NS simulator cbq.cc.
58         If someone wants to check this code against the LBL version,
59         he should take into account that ONLY the skeleton was borrowed,
60         the implementation is different. Particularly:
61
62         --- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69
70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72         In the worst case we have IntServ estimate with D = W*r+k*MTU
73         and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77         interpret some places, which look like wrong translations
78         from NS. Anyone is advised to find these differences
79         and explain to me, why I am wrong 8).
80
81         --- Linux has no EOI event, so that we cannot estimate true class
82         idle time. Workaround is to consider the next dequeue event
83         as sign that previous packet is finished. This is wrong because of
84         internal device queueing, but on a permanently loaded link it is true.
85         Moreover, combined with clock integrator, this scheme looks
86         very close to an ideal solution.  */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93         struct cbq_class        *next;          /* hash table link */
94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
95
96 /* Parameters */
97         u32                     classid;
98         unsigned char           priority;       /* class priority */
99         unsigned char           priority2;      /* priority to be used after overlimit */
100         unsigned char           ewma_log;       /* time constant for idle time calculation */
101         unsigned char           ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103         unsigned char           police;
104 #endif
105
106         u32                     defmap;
107
108         /* Link-sharing scheduler parameters */
109         long                    maxidle;        /* Class parameters: see below. */
110         long                    offtime;
111         long                    minidle;
112         u32                     avpkt;
113         struct qdisc_rate_table *R_tab;
114
115         /* Overlimit strategy parameters */
116         void                    (*overlimit)(struct cbq_class *cl);
117         long                    penalty;
118
119         /* General scheduler (WRR) parameters */
120         long                    allot;
121         long                    quantum;        /* Allotment per WRR round */
122         long                    weight;         /* Relative allotment: see below */
123
124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
125         struct cbq_class        *split;         /* Ptr to split node */
126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
129                                                    parent otherwise */
130         struct cbq_class        *sibling;       /* Sibling chain */
131         struct cbq_class        *children;      /* Pointer to children chain */
132
133         struct Qdisc            *q;             /* Elementary queueing discipline */
134
135
136 /* Variables */
137         unsigned char           cpriority;      /* Effective priority */
138         unsigned char           delayed;
139         unsigned char           level;          /* level of the class in hierarchy:
140                                                    0 for leaf classes, and maximal
141                                                    level of children + 1 for nodes.
142                                                  */
143
144         psched_time_t           last;           /* Last end of service */
145         psched_time_t           undertime;
146         long                    avgidle;
147         long                    deficit;        /* Saved deficit for WRR */
148         unsigned long           penalized;
149         struct gnet_stats_basic bstats;
150         struct gnet_stats_queue qstats;
151         struct gnet_stats_rate_est rate_est;
152         spinlock_t              *stats_lock;
153         struct tc_cbq_xstats    xstats;
154
155         struct tcf_proto        *filter_list;
156
157         int                     refcnt;
158         int                     filters;
159
160         struct cbq_class        *defaults[TC_PRIO_MAX+1];
161 };
162
163 struct cbq_sched_data
164 {
165         struct cbq_class        *classes[16];           /* Hash table of all classes */
166         int                     nclasses[TC_CBQ_MAXPRIO+1];
167         unsigned                quanta[TC_CBQ_MAXPRIO+1];
168
169         struct cbq_class        link;
170
171         unsigned                activemask;
172         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
173                                                                    with backlog */
174
175 #ifdef CONFIG_NET_CLS_POLICE
176         struct cbq_class        *rx_class;
177 #endif
178         struct cbq_class        *tx_class;
179         struct cbq_class        *tx_borrowed;
180         int                     tx_len;
181         psched_time_t           now;            /* Cached timestamp */
182         psched_time_t           now_rt;         /* Cached real time */
183         unsigned                pmask;
184
185         struct timer_list       delay_timer;
186         struct timer_list       wd_timer;       /* Watchdog timer,
187                                                    started when CBQ has
188                                                    backlog, but cannot
189                                                    transmit just now */
190         long                    wd_expires;
191         int                     toplevel;
192         u32                     hgenerator;
193 };
194
195
196 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197
198
199 static __inline__ unsigned cbq_hash(u32 h)
200 {
201         h ^= h>>8;
202         h ^= h>>4;
203         return h&0xF;
204 }
205
206 static __inline__ struct cbq_class *
207 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
208 {
209         struct cbq_class *cl;
210
211         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
212                 if (cl->classid == classid)
213                         return cl;
214         return NULL;
215 }
216
217 #ifdef CONFIG_NET_CLS_POLICE
218
219 static struct cbq_class *
220 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
221 {
222         struct cbq_class *cl, *new;
223
224         for (cl = this->tparent; cl; cl = cl->tparent)
225                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
226                         return new;
227
228         return NULL;
229 }
230
231 #endif
232
233 /* Classify packet. The procedure is pretty complicated, but
234    it allows us to combine link sharing and priority scheduling
235    transparently.
236
237    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
238    so that it resolves to split nodes. Then packets are classified
239    by logical priority, or a more specific classifier may be attached
240    to the split node.
241  */
242
243 static struct cbq_class *
244 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
245 {
246         struct cbq_sched_data *q = qdisc_priv(sch);
247         struct cbq_class *head = &q->link;
248         struct cbq_class **defmap;
249         struct cbq_class *cl = NULL;
250         u32 prio = skb->priority;
251         struct tcf_result res;
252
253         /*
254          *  Step 1. If skb->priority points to one of our classes, use it.
255          */
256         if (TC_H_MAJ(prio^sch->handle) == 0 &&
257             (cl = cbq_class_lookup(q, prio)) != NULL)
258                 return cl;
259
260         *qerr = NET_XMIT_DROP;
261         for (;;) {
262                 int result = 0;
263                 defmap = head->defaults;
264
265                 /*
266                  * Step 2+n. Apply classifier.
267                  */
268                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269                         goto fallback;
270
271                 if ((cl = (void*)res.class) == NULL) {
272                         if (TC_H_MAJ(res.classid))
273                                 cl = cbq_class_lookup(q, res.classid);
274                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
275                                 cl = defmap[TC_PRIO_BESTEFFORT];
276
277                         if (cl == NULL || cl->level >= head->level)
278                                 goto fallback;
279                 }
280
281 #ifdef CONFIG_NET_CLS_ACT
282                 switch (result) {
283                 case TC_ACT_QUEUED:
284                 case TC_ACT_STOLEN: 
285                         *qerr = NET_XMIT_SUCCESS;
286                 case TC_ACT_SHOT:
287                         return NULL;
288                 }
289 #elif defined(CONFIG_NET_CLS_POLICE)
290                 switch (result) {
291                 case TC_POLICE_RECLASSIFY:
292                         return cbq_reclassify(skb, cl);
293                 case TC_POLICE_SHOT:
294                         return NULL;
295                 default:
296                         break;
297                 }
298 #endif
299                 if (cl->level == 0)
300                         return cl;
301
302                 /*
303                  * Step 3+n. If classifier selected a link sharing class,
304                  *         apply agency specific classifier.
305                  *         Repeat this procdure until we hit a leaf node.
306                  */
307                 head = cl;
308         }
309
310 fallback:
311         cl = head;
312
313         /*
314          * Step 4. No success...
315          */
316         if (TC_H_MAJ(prio) == 0 &&
317             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
318             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
319                 return head;
320
321         return cl;
322 }
323
324 /*
325    A packet has just been enqueued on the empty class.
326    cbq_activate_class adds it to the tail of active class list
327    of its priority band.
328  */
329
330 static __inline__ void cbq_activate_class(struct cbq_class *cl)
331 {
332         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
333         int prio = cl->cpriority;
334         struct cbq_class *cl_tail;
335
336         cl_tail = q->active[prio];
337         q->active[prio] = cl;
338
339         if (cl_tail != NULL) {
340                 cl->next_alive = cl_tail->next_alive;
341                 cl_tail->next_alive = cl;
342         } else {
343                 cl->next_alive = cl;
344                 q->activemask |= (1<<prio);
345         }
346 }
347
348 /*
349    Unlink class from active chain.
350    Note that this same procedure is done directly in cbq_dequeue*
351    during round-robin procedure.
352  */
353
354 static void cbq_deactivate_class(struct cbq_class *this)
355 {
356         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
357         int prio = this->cpriority;
358         struct cbq_class *cl;
359         struct cbq_class *cl_prev = q->active[prio];
360
361         do {
362                 cl = cl_prev->next_alive;
363                 if (cl == this) {
364                         cl_prev->next_alive = cl->next_alive;
365                         cl->next_alive = NULL;
366
367                         if (cl == q->active[prio]) {
368                                 q->active[prio] = cl_prev;
369                                 if (cl == q->active[prio]) {
370                                         q->active[prio] = NULL;
371                                         q->activemask &= ~(1<<prio);
372                                         return;
373                                 }
374                         }
375
376                         cl = cl_prev->next_alive;
377                         return;
378                 }
379         } while ((cl_prev = cl) != q->active[prio]);
380 }
381
382 static void
383 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
384 {
385         int toplevel = q->toplevel;
386
387         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
388                 psched_time_t now;
389                 psched_tdiff_t incr;
390
391                 PSCHED_GET_TIME(now);
392                 incr = PSCHED_TDIFF(now, q->now_rt);
393                 PSCHED_TADD2(q->now, incr, now);
394
395                 do {
396                         if (PSCHED_TLESS(cl->undertime, now)) {
397                                 q->toplevel = cl->level;
398                                 return;
399                         }
400                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
401         }
402 }
403
404 static int
405 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
406 {
407         struct cbq_sched_data *q = qdisc_priv(sch);
408         int len = skb->len;
409         int ret;
410         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
411
412 #ifdef CONFIG_NET_CLS_POLICE
413         q->rx_class = cl;
414 #endif
415         if (cl == NULL) {
416                 if (ret == NET_XMIT_DROP)
417                         sch->qstats.drops++;
418                 kfree_skb(skb);
419                 return ret;
420         }
421
422 #ifdef CONFIG_NET_CLS_POLICE
423         cl->q->__parent = sch;
424 #endif
425         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
426                 sch->q.qlen++;
427                 sch->bstats.packets++;
428                 sch->bstats.bytes+=len;
429                 cbq_mark_toplevel(q, cl);
430                 if (!cl->next_alive)
431                         cbq_activate_class(cl);
432                 return ret;
433         }
434
435         sch->qstats.drops++;
436         cbq_mark_toplevel(q, cl);
437         cl->qstats.drops++;
438         return ret;
439 }
440
441 static int
442 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
443 {
444         struct cbq_sched_data *q = qdisc_priv(sch);
445         struct cbq_class *cl;
446         int ret;
447
448         if ((cl = q->tx_class) == NULL) {
449                 kfree_skb(skb);
450                 sch->qstats.drops++;
451                 return NET_XMIT_CN;
452         }
453         q->tx_class = NULL;
454
455         cbq_mark_toplevel(q, cl);
456
457 #ifdef CONFIG_NET_CLS_POLICE
458         q->rx_class = cl;
459         cl->q->__parent = sch;
460 #endif
461         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
462                 sch->q.qlen++;
463                 sch->qstats.requeues++;
464                 if (!cl->next_alive)
465                         cbq_activate_class(cl);
466                 return 0;
467         }
468         sch->qstats.drops++;
469         cl->qstats.drops++;
470         return ret;
471 }
472
473 /* Overlimit actions */
474
475 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
476
477 static void cbq_ovl_classic(struct cbq_class *cl)
478 {
479         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
480         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
481
482         if (!cl->delayed) {
483                 delay += cl->offtime;
484
485                 /* 
486                    Class goes to sleep, so that it will have no
487                    chance to work avgidle. Let's forgive it 8)
488
489                    BTW cbq-2.0 has a crap in this
490                    place, apparently they forgot to shift it by cl->ewma_log.
491                  */
492                 if (cl->avgidle < 0)
493                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
494                 if (cl->avgidle < cl->minidle)
495                         cl->avgidle = cl->minidle;
496                 if (delay <= 0)
497                         delay = 1;
498                 PSCHED_TADD2(q->now, delay, cl->undertime);
499
500                 cl->xstats.overactions++;
501                 cl->delayed = 1;
502         }
503         if (q->wd_expires == 0 || q->wd_expires > delay)
504                 q->wd_expires = delay;
505
506         /* Dirty work! We must schedule wakeups based on
507            real available rate, rather than leaf rate,
508            which may be tiny (even zero).
509          */
510         if (q->toplevel == TC_CBQ_MAXLEVEL) {
511                 struct cbq_class *b;
512                 psched_tdiff_t base_delay = q->wd_expires;
513
514                 for (b = cl->borrow; b; b = b->borrow) {
515                         delay = PSCHED_TDIFF(b->undertime, q->now);
516                         if (delay < base_delay) {
517                                 if (delay <= 0)
518                                         delay = 1;
519                                 base_delay = delay;
520                         }
521                 }
522
523                 q->wd_expires = base_delay;
524         }
525 }
526
527 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
528    they go overlimit
529  */
530
531 static void cbq_ovl_rclassic(struct cbq_class *cl)
532 {
533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
534         struct cbq_class *this = cl;
535
536         do {
537                 if (cl->level > q->toplevel) {
538                         cl = NULL;
539                         break;
540                 }
541         } while ((cl = cl->borrow) != NULL);
542
543         if (cl == NULL)
544                 cl = this;
545         cbq_ovl_classic(cl);
546 }
547
548 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
549
550 static void cbq_ovl_delay(struct cbq_class *cl)
551 {
552         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
553         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
554
555         if (!cl->delayed) {
556                 unsigned long sched = jiffies;
557
558                 delay += cl->offtime;
559                 if (cl->avgidle < 0)
560                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
561                 if (cl->avgidle < cl->minidle)
562                         cl->avgidle = cl->minidle;
563                 PSCHED_TADD2(q->now, delay, cl->undertime);
564
565                 if (delay > 0) {
566                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
567                         cl->penalized = sched;
568                         cl->cpriority = TC_CBQ_MAXPRIO;
569                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
570                         if (del_timer(&q->delay_timer) &&
571                             (long)(q->delay_timer.expires - sched) > 0)
572                                 q->delay_timer.expires = sched;
573                         add_timer(&q->delay_timer);
574                         cl->delayed = 1;
575                         cl->xstats.overactions++;
576                         return;
577                 }
578                 delay = 1;
579         }
580         if (q->wd_expires == 0 || q->wd_expires > delay)
581                 q->wd_expires = delay;
582 }
583
584 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
585
586 static void cbq_ovl_lowprio(struct cbq_class *cl)
587 {
588         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
589
590         cl->penalized = jiffies + cl->penalty;
591
592         if (cl->cpriority != cl->priority2) {
593                 cl->cpriority = cl->priority2;
594                 q->pmask |= (1<<cl->cpriority);
595                 cl->xstats.overactions++;
596         }
597         cbq_ovl_classic(cl);
598 }
599
600 /* TC_CBQ_OVL_DROP: penalize class by dropping */
601
602 static void cbq_ovl_drop(struct cbq_class *cl)
603 {
604         if (cl->q->ops->drop)
605                 if (cl->q->ops->drop(cl->q))
606                         cl->qdisc->q.qlen--;
607         cl->xstats.overactions++;
608         cbq_ovl_classic(cl);
609 }
610
611 static void cbq_watchdog(unsigned long arg)
612 {
613         struct Qdisc *sch = (struct Qdisc*)arg;
614
615         sch->flags &= ~TCQ_F_THROTTLED;
616         netif_schedule(sch->dev);
617 }
618
619 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
620 {
621         struct cbq_class *cl;
622         struct cbq_class *cl_prev = q->active[prio];
623         unsigned long now = jiffies;
624         unsigned long sched = now;
625
626         if (cl_prev == NULL)
627                 return now;
628
629         do {
630                 cl = cl_prev->next_alive;
631                 if ((long)(now - cl->penalized) > 0) {
632                         cl_prev->next_alive = cl->next_alive;
633                         cl->next_alive = NULL;
634                         cl->cpriority = cl->priority;
635                         cl->delayed = 0;
636                         cbq_activate_class(cl);
637
638                         if (cl == q->active[prio]) {
639                                 q->active[prio] = cl_prev;
640                                 if (cl == q->active[prio]) {
641                                         q->active[prio] = NULL;
642                                         return 0;
643                                 }
644                         }
645
646                         cl = cl_prev->next_alive;
647                 } else if ((long)(sched - cl->penalized) > 0)
648                         sched = cl->penalized;
649         } while ((cl_prev = cl) != q->active[prio]);
650
651         return (long)(sched - now);
652 }
653
654 static void cbq_undelay(unsigned long arg)
655 {
656         struct Qdisc *sch = (struct Qdisc*)arg;
657         struct cbq_sched_data *q = qdisc_priv(sch);
658         long delay = 0;
659         unsigned pmask;
660
661         pmask = q->pmask;
662         q->pmask = 0;
663
664         while (pmask) {
665                 int prio = ffz(~pmask);
666                 long tmp;
667
668                 pmask &= ~(1<<prio);
669
670                 tmp = cbq_undelay_prio(q, prio);
671                 if (tmp > 0) {
672                         q->pmask |= 1<<prio;
673                         if (tmp < delay || delay == 0)
674                                 delay = tmp;
675                 }
676         }
677
678         if (delay) {
679                 q->delay_timer.expires = jiffies + delay;
680                 add_timer(&q->delay_timer);
681         }
682
683         sch->flags &= ~TCQ_F_THROTTLED;
684         netif_schedule(sch->dev);
685 }
686
687
688 #ifdef CONFIG_NET_CLS_POLICE
689
690 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
691 {
692         int len = skb->len;
693         struct Qdisc *sch = child->__parent;
694         struct cbq_sched_data *q = qdisc_priv(sch);
695         struct cbq_class *cl = q->rx_class;
696
697         q->rx_class = NULL;
698
699         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
700
701                 cbq_mark_toplevel(q, cl);
702
703                 q->rx_class = cl;
704                 cl->q->__parent = sch;
705
706                 if (cl->q->enqueue(skb, cl->q) == 0) {
707                         sch->q.qlen++;
708                         sch->bstats.packets++;
709                         sch->bstats.bytes+=len;
710                         if (!cl->next_alive)
711                                 cbq_activate_class(cl);
712                         return 0;
713                 }
714                 sch->qstats.drops++;
715                 return 0;
716         }
717
718         sch->qstats.drops++;
719         return -1;
720 }
721 #endif
722
723 /* 
724    It is mission critical procedure.
725
726    We "regenerate" toplevel cutoff, if transmitting class
727    has backlog and it is not regulated. It is not part of
728    original CBQ description, but looks more reasonable.
729    Probably, it is wrong. This question needs further investigation.
730 */
731
732 static __inline__ void
733 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
734                     struct cbq_class *borrowed)
735 {
736         if (cl && q->toplevel >= borrowed->level) {
737                 if (cl->q->q.qlen > 1) {
738                         do {
739                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
740                                         q->toplevel = borrowed->level;
741                                         return;
742                                 }
743                         } while ((borrowed=borrowed->borrow) != NULL);
744                 }
745 #if 0   
746         /* It is not necessary now. Uncommenting it
747            will save CPU cycles, but decrease fairness.
748          */
749                 q->toplevel = TC_CBQ_MAXLEVEL;
750 #endif
751         }
752 }
753
754 static void
755 cbq_update(struct cbq_sched_data *q)
756 {
757         struct cbq_class *this = q->tx_class;
758         struct cbq_class *cl = this;
759         int len = q->tx_len;
760
761         q->tx_class = NULL;
762
763         for ( ; cl; cl = cl->share) {
764                 long avgidle = cl->avgidle;
765                 long idle;
766
767                 cl->bstats.packets++;
768                 cl->bstats.bytes += len;
769
770                 /*
771                    (now - last) is total time between packet right edges.
772                    (last_pktlen/rate) is "virtual" busy time, so that
773
774                          idle = (now - last) - last_pktlen/rate
775                  */
776
777                 idle = PSCHED_TDIFF(q->now, cl->last);
778                 if ((unsigned long)idle > 128*1024*1024) {
779                         avgidle = cl->maxidle;
780                 } else {
781                         idle -= L2T(cl, len);
782
783                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
784                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
785                    cl->avgidle == true_avgidle/W,
786                    hence:
787                  */
788                         avgidle += idle - (avgidle>>cl->ewma_log);
789                 }
790
791                 if (avgidle <= 0) {
792                         /* Overlimit or at-limit */
793
794                         if (avgidle < cl->minidle)
795                                 avgidle = cl->minidle;
796
797                         cl->avgidle = avgidle;
798
799                         /* Calculate expected time, when this class
800                            will be allowed to send.
801                            It will occur, when:
802                            (1-W)*true_avgidle + W*delay = 0, i.e.
803                            idle = (1/W - 1)*(-true_avgidle)
804                            or
805                            idle = (1 - W)*(-cl->avgidle);
806                          */
807                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
808
809                         /*
810                            That is not all.
811                            To maintain the rate allocated to the class,
812                            we add to undertime virtual clock,
813                            necessary to complete transmitted packet.
814                            (len/phys_bandwidth has been already passed
815                            to the moment of cbq_update)
816                          */
817
818                         idle -= L2T(&q->link, len);
819                         idle += L2T(cl, len);
820
821                         PSCHED_AUDIT_TDIFF(idle);
822
823                         PSCHED_TADD2(q->now, idle, cl->undertime);
824                 } else {
825                         /* Underlimit */
826
827                         PSCHED_SET_PASTPERFECT(cl->undertime);
828                         if (avgidle > cl->maxidle)
829                                 cl->avgidle = cl->maxidle;
830                         else
831                                 cl->avgidle = avgidle;
832                 }
833                 cl->last = q->now;
834         }
835
836         cbq_update_toplevel(q, this, q->tx_borrowed);
837 }
838
839 static __inline__ struct cbq_class *
840 cbq_under_limit(struct cbq_class *cl)
841 {
842         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843         struct cbq_class *this_cl = cl;
844
845         if (cl->tparent == NULL)
846                 return cl;
847
848         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
849             !PSCHED_TLESS(q->now, cl->undertime)) {
850                 cl->delayed = 0;
851                 return cl;
852         }
853
854         do {
855                 /* It is very suspicious place. Now overlimit
856                    action is generated for not bounded classes
857                    only if link is completely congested.
858                    Though it is in agree with ancestor-only paradigm,
859                    it looks very stupid. Particularly,
860                    it means that this chunk of code will either
861                    never be called or result in strong amplification
862                    of burstiness. Dangerous, silly, and, however,
863                    no another solution exists.
864                  */
865                 if ((cl = cl->borrow) == NULL) {
866                         this_cl->qstats.overlimits++;
867                         this_cl->overlimit(this_cl);
868                         return NULL;
869                 }
870                 if (cl->level > q->toplevel)
871                         return NULL;
872         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
873                  PSCHED_TLESS(q->now, cl->undertime));
874
875         cl->delayed = 0;
876         return cl;
877 }
878
879 static __inline__ struct sk_buff *
880 cbq_dequeue_prio(struct Qdisc *sch, int prio)
881 {
882         struct cbq_sched_data *q = qdisc_priv(sch);
883         struct cbq_class *cl_tail, *cl_prev, *cl;
884         struct sk_buff *skb;
885         int deficit;
886
887         cl_tail = cl_prev = q->active[prio];
888         cl = cl_prev->next_alive;
889
890         do {
891                 deficit = 0;
892
893                 /* Start round */
894                 do {
895                         struct cbq_class *borrow = cl;
896
897                         if (cl->q->q.qlen &&
898                             (borrow = cbq_under_limit(cl)) == NULL)
899                                 goto skip_class;
900
901                         if (cl->deficit <= 0) {
902                                 /* Class exhausted its allotment per
903                                    this round. Switch to the next one.
904                                  */
905                                 deficit = 1;
906                                 cl->deficit += cl->quantum;
907                                 goto next_class;
908                         }
909
910                         skb = cl->q->dequeue(cl->q);
911
912                         /* Class did not give us any skb :-(
913                            It could occur even if cl->q->q.qlen != 0 
914                            f.e. if cl->q == "tbf"
915                          */
916                         if (skb == NULL)
917                                 goto skip_class;
918
919                         cl->deficit -= skb->len;
920                         q->tx_class = cl;
921                         q->tx_borrowed = borrow;
922                         if (borrow != cl) {
923 #ifndef CBQ_XSTATS_BORROWS_BYTES
924                                 borrow->xstats.borrows++;
925                                 cl->xstats.borrows++;
926 #else
927                                 borrow->xstats.borrows += skb->len;
928                                 cl->xstats.borrows += skb->len;
929 #endif
930                         }
931                         q->tx_len = skb->len;
932
933                         if (cl->deficit <= 0) {
934                                 q->active[prio] = cl;
935                                 cl = cl->next_alive;
936                                 cl->deficit += cl->quantum;
937                         }
938                         return skb;
939
940 skip_class:
941                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
942                                 /* Class is empty or penalized.
943                                    Unlink it from active chain.
944                                  */
945                                 cl_prev->next_alive = cl->next_alive;
946                                 cl->next_alive = NULL;
947
948                                 /* Did cl_tail point to it? */
949                                 if (cl == cl_tail) {
950                                         /* Repair it! */
951                                         cl_tail = cl_prev;
952
953                                         /* Was it the last class in this band? */
954                                         if (cl == cl_tail) {
955                                                 /* Kill the band! */
956                                                 q->active[prio] = NULL;
957                                                 q->activemask &= ~(1<<prio);
958                                                 if (cl->q->q.qlen)
959                                                         cbq_activate_class(cl);
960                                                 return NULL;
961                                         }
962
963                                         q->active[prio] = cl_tail;
964                                 }
965                                 if (cl->q->q.qlen)
966                                         cbq_activate_class(cl);
967
968                                 cl = cl_prev;
969                         }
970
971 next_class:
972                         cl_prev = cl;
973                         cl = cl->next_alive;
974                 } while (cl_prev != cl_tail);
975         } while (deficit);
976
977         q->active[prio] = cl_prev;
978
979         return NULL;
980 }
981
982 static __inline__ struct sk_buff *
983 cbq_dequeue_1(struct Qdisc *sch)
984 {
985         struct cbq_sched_data *q = qdisc_priv(sch);
986         struct sk_buff *skb;
987         unsigned activemask;
988
989         activemask = q->activemask&0xFF;
990         while (activemask) {
991                 int prio = ffz(~activemask);
992                 activemask &= ~(1<<prio);
993                 skb = cbq_dequeue_prio(sch, prio);
994                 if (skb)
995                         return skb;
996         }
997         return NULL;
998 }
999
1000 static struct sk_buff *
1001 cbq_dequeue(struct Qdisc *sch)
1002 {
1003         struct sk_buff *skb;
1004         struct cbq_sched_data *q = qdisc_priv(sch);
1005         psched_time_t now;
1006         psched_tdiff_t incr;
1007
1008         PSCHED_GET_TIME(now);
1009         incr = PSCHED_TDIFF(now, q->now_rt);
1010
1011         if (q->tx_class) {
1012                 psched_tdiff_t incr2;
1013                 /* Time integrator. We calculate EOS time
1014                    by adding expected packet transmission time.
1015                    If real time is greater, we warp artificial clock,
1016                    so that:
1017
1018                    cbq_time = max(real_time, work);
1019                  */
1020                 incr2 = L2T(&q->link, q->tx_len);
1021                 PSCHED_TADD(q->now, incr2);
1022                 cbq_update(q);
1023                 if ((incr -= incr2) < 0)
1024                         incr = 0;
1025         }
1026         PSCHED_TADD(q->now, incr);
1027         q->now_rt = now;
1028
1029         for (;;) {
1030                 q->wd_expires = 0;
1031
1032                 skb = cbq_dequeue_1(sch);
1033                 if (skb) {
1034                         sch->q.qlen--;
1035                         sch->flags &= ~TCQ_F_THROTTLED;
1036                         return skb;
1037                 }
1038
1039                 /* All the classes are overlimit.
1040
1041                    It is possible, if:
1042
1043                    1. Scheduler is empty.
1044                    2. Toplevel cutoff inhibited borrowing.
1045                    3. Root class is overlimit.
1046
1047                    Reset 2d and 3d conditions and retry.
1048
1049                    Note, that NS and cbq-2.0 are buggy, peeking
1050                    an arbitrary class is appropriate for ancestor-only
1051                    sharing, but not for toplevel algorithm.
1052
1053                    Our version is better, but slower, because it requires
1054                    two passes, but it is unavoidable with top-level sharing.
1055                 */
1056
1057                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1058                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1059                         break;
1060
1061                 q->toplevel = TC_CBQ_MAXLEVEL;
1062                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1063         }
1064
1065         /* No packets in scheduler or nobody wants to give them to us :-(
1066            Sigh... start watchdog timer in the last case. */
1067
1068         if (sch->q.qlen) {
1069                 sch->qstats.overlimits++;
1070                 if (q->wd_expires) {
1071                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1072                         if (delay <= 0)
1073                                 delay = 1;
1074                         mod_timer(&q->wd_timer, jiffies + delay);
1075                         sch->flags |= TCQ_F_THROTTLED;
1076                 }
1077         }
1078         return NULL;
1079 }
1080
1081 /* CBQ class maintanance routines */
1082
1083 static void cbq_adjust_levels(struct cbq_class *this)
1084 {
1085         if (this == NULL)
1086                 return;
1087
1088         do {
1089                 int level = 0;
1090                 struct cbq_class *cl;
1091
1092                 if ((cl = this->children) != NULL) {
1093                         do {
1094                                 if (cl->level > level)
1095                                         level = cl->level;
1096                         } while ((cl = cl->sibling) != this->children);
1097                 }
1098                 this->level = level+1;
1099         } while ((this = this->tparent) != NULL);
1100 }
1101
1102 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1103 {
1104         struct cbq_class *cl;
1105         unsigned h;
1106
1107         if (q->quanta[prio] == 0)
1108                 return;
1109
1110         for (h=0; h<16; h++) {
1111                 for (cl = q->classes[h]; cl; cl = cl->next) {
1112                         /* BUGGGG... Beware! This expression suffer of
1113                            arithmetic overflows!
1114                          */
1115                         if (cl->priority == prio) {
1116                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1117                                         q->quanta[prio];
1118                         }
1119                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1120                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1121                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1122                         }
1123                 }
1124         }
1125 }
1126
1127 static void cbq_sync_defmap(struct cbq_class *cl)
1128 {
1129         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1130         struct cbq_class *split = cl->split;
1131         unsigned h;
1132         int i;
1133
1134         if (split == NULL)
1135                 return;
1136
1137         for (i=0; i<=TC_PRIO_MAX; i++) {
1138                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1139                         split->defaults[i] = NULL;
1140         }
1141
1142         for (i=0; i<=TC_PRIO_MAX; i++) {
1143                 int level = split->level;
1144
1145                 if (split->defaults[i])
1146                         continue;
1147
1148                 for (h=0; h<16; h++) {
1149                         struct cbq_class *c;
1150
1151                         for (c = q->classes[h]; c; c = c->next) {
1152                                 if (c->split == split && c->level < level &&
1153                                     c->defmap&(1<<i)) {
1154                                         split->defaults[i] = c;
1155                                         level = c->level;
1156                                 }
1157                         }
1158                 }
1159         }
1160 }
1161
1162 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1163 {
1164         struct cbq_class *split = NULL;
1165
1166         if (splitid == 0) {
1167                 if ((split = cl->split) == NULL)
1168                         return;
1169                 splitid = split->classid;
1170         }
1171
1172         if (split == NULL || split->classid != splitid) {
1173                 for (split = cl->tparent; split; split = split->tparent)
1174                         if (split->classid == splitid)
1175                                 break;
1176         }
1177
1178         if (split == NULL)
1179                 return;
1180
1181         if (cl->split != split) {
1182                 cl->defmap = 0;
1183                 cbq_sync_defmap(cl);
1184                 cl->split = split;
1185                 cl->defmap = def&mask;
1186         } else
1187                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1188
1189         cbq_sync_defmap(cl);
1190 }
1191
1192 static void cbq_unlink_class(struct cbq_class *this)
1193 {
1194         struct cbq_class *cl, **clp;
1195         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1196
1197         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1198                 if (cl == this) {
1199                         *clp = cl->next;
1200                         cl->next = NULL;
1201                         break;
1202                 }
1203         }
1204
1205         if (this->tparent) {
1206                 clp=&this->sibling;
1207                 cl = *clp;
1208                 do {
1209                         if (cl == this) {
1210                                 *clp = cl->sibling;
1211                                 break;
1212                         }
1213                         clp = &cl->sibling;
1214                 } while ((cl = *clp) != this->sibling);
1215
1216                 if (this->tparent->children == this) {
1217                         this->tparent->children = this->sibling;
1218                         if (this->sibling == this)
1219                                 this->tparent->children = NULL;
1220                 }
1221         } else {
1222                 BUG_TRAP(this->sibling == this);
1223         }
1224 }
1225
1226 static void cbq_link_class(struct cbq_class *this)
1227 {
1228         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1229         unsigned h = cbq_hash(this->classid);
1230         struct cbq_class *parent = this->tparent;
1231
1232         this->sibling = this;
1233         this->next = q->classes[h];
1234         q->classes[h] = this;
1235
1236         if (parent == NULL)
1237                 return;
1238
1239         if (parent->children == NULL) {
1240                 parent->children = this;
1241         } else {
1242                 this->sibling = parent->children->sibling;
1243                 parent->children->sibling = this;
1244         }
1245 }
1246
1247 static unsigned int cbq_drop(struct Qdisc* sch)
1248 {
1249         struct cbq_sched_data *q = qdisc_priv(sch);
1250         struct cbq_class *cl, *cl_head;
1251         int prio;
1252         unsigned int len;
1253
1254         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1255                 if ((cl_head = q->active[prio]) == NULL)
1256                         continue;
1257
1258                 cl = cl_head;
1259                 do {
1260                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1261                                 sch->q.qlen--;
1262                                 return len;
1263                         }
1264                 } while ((cl = cl->next_alive) != cl_head);
1265         }
1266         return 0;
1267 }
1268
1269 static void
1270 cbq_reset(struct Qdisc* sch)
1271 {
1272         struct cbq_sched_data *q = qdisc_priv(sch);
1273         struct cbq_class *cl;
1274         int prio;
1275         unsigned h;
1276
1277         q->activemask = 0;
1278         q->pmask = 0;
1279         q->tx_class = NULL;
1280         q->tx_borrowed = NULL;
1281         del_timer(&q->wd_timer);
1282         del_timer(&q->delay_timer);
1283         q->toplevel = TC_CBQ_MAXLEVEL;
1284         PSCHED_GET_TIME(q->now);
1285         q->now_rt = q->now;
1286
1287         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1288                 q->active[prio] = NULL;
1289
1290         for (h = 0; h < 16; h++) {
1291                 for (cl = q->classes[h]; cl; cl = cl->next) {
1292                         qdisc_reset(cl->q);
1293
1294                         cl->next_alive = NULL;
1295                         PSCHED_SET_PASTPERFECT(cl->undertime);
1296                         cl->avgidle = cl->maxidle;
1297                         cl->deficit = cl->quantum;
1298                         cl->cpriority = cl->priority;
1299                 }
1300         }
1301         sch->q.qlen = 0;
1302 }
1303
1304
1305 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1306 {
1307         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1308                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1309                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1310         }
1311         if (lss->change&TCF_CBQ_LSS_EWMA)
1312                 cl->ewma_log = lss->ewma_log;
1313         if (lss->change&TCF_CBQ_LSS_AVPKT)
1314                 cl->avpkt = lss->avpkt;
1315         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1316                 cl->minidle = -(long)lss->minidle;
1317         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1318                 cl->maxidle = lss->maxidle;
1319                 cl->avgidle = lss->maxidle;
1320         }
1321         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1322                 cl->offtime = lss->offtime;
1323         return 0;
1324 }
1325
1326 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1327 {
1328         q->nclasses[cl->priority]--;
1329         q->quanta[cl->priority] -= cl->weight;
1330         cbq_normalize_quanta(q, cl->priority);
1331 }
1332
1333 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1334 {
1335         q->nclasses[cl->priority]++;
1336         q->quanta[cl->priority] += cl->weight;
1337         cbq_normalize_quanta(q, cl->priority);
1338 }
1339
1340 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1341 {
1342         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1343
1344         if (wrr->allot)
1345                 cl->allot = wrr->allot;
1346         if (wrr->weight)
1347                 cl->weight = wrr->weight;
1348         if (wrr->priority) {
1349                 cl->priority = wrr->priority-1;
1350                 cl->cpriority = cl->priority;
1351                 if (cl->priority >= cl->priority2)
1352                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1353         }
1354
1355         cbq_addprio(q, cl);
1356         return 0;
1357 }
1358
1359 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1360 {
1361         switch (ovl->strategy) {
1362         case TC_CBQ_OVL_CLASSIC:
1363                 cl->overlimit = cbq_ovl_classic;
1364                 break;
1365         case TC_CBQ_OVL_DELAY:
1366                 cl->overlimit = cbq_ovl_delay;
1367                 break;
1368         case TC_CBQ_OVL_LOWPRIO:
1369                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1370                     ovl->priority2-1 <= cl->priority)
1371                         return -EINVAL;
1372                 cl->priority2 = ovl->priority2-1;
1373                 cl->overlimit = cbq_ovl_lowprio;
1374                 break;
1375         case TC_CBQ_OVL_DROP:
1376                 cl->overlimit = cbq_ovl_drop;
1377                 break;
1378         case TC_CBQ_OVL_RCLASSIC:
1379                 cl->overlimit = cbq_ovl_rclassic;
1380                 break;
1381         default:
1382                 return -EINVAL;
1383         }
1384         cl->penalty = (ovl->penalty*HZ)/1000;
1385         return 0;
1386 }
1387
1388 #ifdef CONFIG_NET_CLS_POLICE
1389 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1390 {
1391         cl->police = p->police;
1392
1393         if (cl->q->handle) {
1394                 if (p->police == TC_POLICE_RECLASSIFY)
1395                         cl->q->reshape_fail = cbq_reshape_fail;
1396                 else
1397                         cl->q->reshape_fail = NULL;
1398         }
1399         return 0;
1400 }
1401 #endif
1402
1403 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1404 {
1405         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1406         return 0;
1407 }
1408
1409 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1410 {
1411         struct cbq_sched_data *q = qdisc_priv(sch);
1412         struct rtattr *tb[TCA_CBQ_MAX];
1413         struct tc_ratespec *r;
1414
1415         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1416             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1417             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1418                 return -EINVAL;
1419
1420         if (tb[TCA_CBQ_LSSOPT-1] &&
1421             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1422                 return -EINVAL;
1423
1424         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1425
1426         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1427                 return -EINVAL;
1428
1429         q->link.refcnt = 1;
1430         q->link.sibling = &q->link;
1431         q->link.classid = sch->handle;
1432         q->link.qdisc = sch;
1433         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1434                 q->link.q = &noop_qdisc;
1435
1436         q->link.priority = TC_CBQ_MAXPRIO-1;
1437         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440         q->link.overlimit = cbq_ovl_classic;
1441         q->link.allot = psched_mtu(sch->dev);
1442         q->link.quantum = q->link.allot;
1443         q->link.weight = q->link.R_tab->rate.rate;
1444
1445         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446         q->link.avpkt = q->link.allot/2;
1447         q->link.minidle = -0x7FFFFFFF;
1448         q->link.stats_lock = &sch->dev->queue_lock;
1449
1450         init_timer(&q->wd_timer);
1451         q->wd_timer.data = (unsigned long)sch;
1452         q->wd_timer.function = cbq_watchdog;
1453         init_timer(&q->delay_timer);
1454         q->delay_timer.data = (unsigned long)sch;
1455         q->delay_timer.function = cbq_undelay;
1456         q->toplevel = TC_CBQ_MAXLEVEL;
1457         PSCHED_GET_TIME(q->now);
1458         q->now_rt = q->now;
1459
1460         cbq_link_class(&q->link);
1461
1462         if (tb[TCA_CBQ_LSSOPT-1])
1463                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464
1465         cbq_addprio(q, &q->link);
1466         return 0;
1467 }
1468
1469 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470 {
1471         unsigned char    *b = skb->tail;
1472
1473         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1474         return skb->len;
1475
1476 rtattr_failure:
1477         skb_trim(skb, b - skb->data);
1478         return -1;
1479 }
1480
1481 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482 {
1483         unsigned char    *b = skb->tail;
1484         struct tc_cbq_lssopt opt;
1485
1486         opt.flags = 0;
1487         if (cl->borrow == NULL)
1488                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1489         if (cl->share == NULL)
1490                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1491         opt.ewma_log = cl->ewma_log;
1492         opt.level = cl->level;
1493         opt.avpkt = cl->avpkt;
1494         opt.maxidle = cl->maxidle;
1495         opt.minidle = (u32)(-cl->minidle);
1496         opt.offtime = cl->offtime;
1497         opt.change = ~0;
1498         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1499         return skb->len;
1500
1501 rtattr_failure:
1502         skb_trim(skb, b - skb->data);
1503         return -1;
1504 }
1505
1506 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507 {
1508         unsigned char    *b = skb->tail;
1509         struct tc_cbq_wrropt opt;
1510
1511         opt.flags = 0;
1512         opt.allot = cl->allot;
1513         opt.priority = cl->priority+1;
1514         opt.cpriority = cl->cpriority+1;
1515         opt.weight = cl->weight;
1516         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1517         return skb->len;
1518
1519 rtattr_failure:
1520         skb_trim(skb, b - skb->data);
1521         return -1;
1522 }
1523
1524 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char    *b = skb->tail;
1527         struct tc_cbq_ovl opt;
1528
1529         opt.strategy = cl->ovl_strategy;
1530         opt.priority2 = cl->priority2+1;
1531         opt.penalty = (cl->penalty*1000)/HZ;
1532         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1533         return skb->len;
1534
1535 rtattr_failure:
1536         skb_trim(skb, b - skb->data);
1537         return -1;
1538 }
1539
1540 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541 {
1542         unsigned char    *b = skb->tail;
1543         struct tc_cbq_fopt opt;
1544
1545         if (cl->split || cl->defmap) {
1546                 opt.split = cl->split ? cl->split->classid : 0;
1547                 opt.defmap = cl->defmap;
1548                 opt.defchange = ~0;
1549                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1550         }
1551         return skb->len;
1552
1553 rtattr_failure:
1554         skb_trim(skb, b - skb->data);
1555         return -1;
1556 }
1557
1558 #ifdef CONFIG_NET_CLS_POLICE
1559 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560 {
1561         unsigned char    *b = skb->tail;
1562         struct tc_cbq_police opt;
1563
1564         if (cl->police) {
1565                 opt.police = cl->police;
1566                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1567         }
1568         return skb->len;
1569
1570 rtattr_failure:
1571         skb_trim(skb, b - skb->data);
1572         return -1;
1573 }
1574 #endif
1575
1576 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1577 {
1578         if (cbq_dump_lss(skb, cl) < 0 ||
1579             cbq_dump_rate(skb, cl) < 0 ||
1580             cbq_dump_wrr(skb, cl) < 0 ||
1581             cbq_dump_ovl(skb, cl) < 0 ||
1582 #ifdef CONFIG_NET_CLS_POLICE
1583             cbq_dump_police(skb, cl) < 0 ||
1584 #endif
1585             cbq_dump_fopt(skb, cl) < 0)
1586                 return -1;
1587         return 0;
1588 }
1589
1590 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1591 {
1592         struct cbq_sched_data *q = qdisc_priv(sch);
1593         unsigned char    *b = skb->tail;
1594         struct rtattr *rta;
1595
1596         rta = (struct rtattr*)b;
1597         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1598         if (cbq_dump_attr(skb, &q->link) < 0)
1599                 goto rtattr_failure;
1600         rta->rta_len = skb->tail - b;
1601         return skb->len;
1602
1603 rtattr_failure:
1604         skb_trim(skb, b - skb->data);
1605         return -1;
1606 }
1607
1608 static int
1609 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1610 {
1611         struct cbq_sched_data *q = qdisc_priv(sch);
1612
1613         q->link.xstats.avgidle = q->link.avgidle;
1614         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1615 }
1616
1617 static int
1618 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1619                struct sk_buff *skb, struct tcmsg *tcm)
1620 {
1621         struct cbq_class *cl = (struct cbq_class*)arg;
1622         unsigned char    *b = skb->tail;
1623         struct rtattr *rta;
1624
1625         if (cl->tparent)
1626                 tcm->tcm_parent = cl->tparent->classid;
1627         else
1628                 tcm->tcm_parent = TC_H_ROOT;
1629         tcm->tcm_handle = cl->classid;
1630         tcm->tcm_info = cl->q->handle;
1631
1632         rta = (struct rtattr*)b;
1633         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1634         if (cbq_dump_attr(skb, cl) < 0)
1635                 goto rtattr_failure;
1636         rta->rta_len = skb->tail - b;
1637         return skb->len;
1638
1639 rtattr_failure:
1640         skb_trim(skb, b - skb->data);
1641         return -1;
1642 }
1643
1644 static int
1645 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1646         struct gnet_dump *d)
1647 {
1648         struct cbq_sched_data *q = qdisc_priv(sch);
1649         struct cbq_class *cl = (struct cbq_class*)arg;
1650
1651         cl->qstats.qlen = cl->q->q.qlen;
1652         cl->xstats.avgidle = cl->avgidle;
1653         cl->xstats.undertime = 0;
1654
1655         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1656                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1657
1658         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1659 #ifdef CONFIG_NET_ESTIMATOR
1660             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1661 #endif
1662             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1663                 return -1;
1664
1665         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1666 }
1667
1668 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1669                      struct Qdisc **old)
1670 {
1671         struct cbq_class *cl = (struct cbq_class*)arg;
1672
1673         if (cl) {
1674                 if (new == NULL) {
1675                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1676                                 return -ENOBUFS;
1677                 } else {
1678 #ifdef CONFIG_NET_CLS_POLICE
1679                         if (cl->police == TC_POLICE_RECLASSIFY)
1680                                 new->reshape_fail = cbq_reshape_fail;
1681 #endif
1682                 }
1683                 sch_tree_lock(sch);
1684                 *old = cl->q;
1685                 cl->q = new;
1686                 sch->q.qlen -= (*old)->q.qlen;
1687                 qdisc_reset(*old);
1688                 sch_tree_unlock(sch);
1689
1690                 return 0;
1691         }
1692         return -ENOENT;
1693 }
1694
1695 static struct Qdisc *
1696 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1697 {
1698         struct cbq_class *cl = (struct cbq_class*)arg;
1699
1700         return cl ? cl->q : NULL;
1701 }
1702
1703 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1704 {
1705         struct cbq_sched_data *q = qdisc_priv(sch);
1706         struct cbq_class *cl = cbq_class_lookup(q, classid);
1707
1708         if (cl) {
1709                 cl->refcnt++;
1710                 return (unsigned long)cl;
1711         }
1712         return 0;
1713 }
1714
1715 static void cbq_destroy_filters(struct cbq_class *cl)
1716 {
1717         struct tcf_proto *tp;
1718
1719         while ((tp = cl->filter_list) != NULL) {
1720                 cl->filter_list = tp->next;
1721                 tcf_destroy(tp);
1722         }
1723 }
1724
1725 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1726 {
1727         struct cbq_sched_data *q = qdisc_priv(sch);
1728
1729         BUG_TRAP(!cl->filters);
1730
1731         cbq_destroy_filters(cl);
1732         qdisc_destroy(cl->q);
1733         qdisc_put_rtab(cl->R_tab);
1734 #ifdef CONFIG_NET_ESTIMATOR
1735         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1736 #endif
1737         if (cl != &q->link)
1738                 kfree(cl);
1739 }
1740
1741 static void
1742 cbq_destroy(struct Qdisc* sch)
1743 {
1744         struct cbq_sched_data *q = qdisc_priv(sch);
1745         struct cbq_class *cl;
1746         unsigned h;
1747
1748 #ifdef CONFIG_NET_CLS_POLICE
1749         q->rx_class = NULL;
1750 #endif
1751         /*
1752          * Filters must be destroyed first because we don't destroy the
1753          * classes from root to leafs which means that filters can still
1754          * be bound to classes which have been destroyed already. --TGR '04
1755          */
1756         for (h = 0; h < 16; h++)
1757                 for (cl = q->classes[h]; cl; cl = cl->next)
1758                         cbq_destroy_filters(cl);
1759
1760         for (h = 0; h < 16; h++) {
1761                 struct cbq_class *next;
1762
1763                 for (cl = q->classes[h]; cl; cl = next) {
1764                         next = cl->next;
1765                         cbq_destroy_class(sch, cl);
1766                 }
1767         }
1768 }
1769
1770 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1771 {
1772         struct cbq_class *cl = (struct cbq_class*)arg;
1773
1774         if (--cl->refcnt == 0) {
1775 #ifdef CONFIG_NET_CLS_POLICE
1776                 struct cbq_sched_data *q = qdisc_priv(sch);
1777
1778                 spin_lock_bh(&sch->dev->queue_lock);
1779                 if (q->rx_class == cl)
1780                         q->rx_class = NULL;
1781                 spin_unlock_bh(&sch->dev->queue_lock);
1782 #endif
1783
1784                 cbq_destroy_class(sch, cl);
1785         }
1786 }
1787
1788 static int
1789 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1790                  unsigned long *arg)
1791 {
1792         int err;
1793         struct cbq_sched_data *q = qdisc_priv(sch);
1794         struct cbq_class *cl = (struct cbq_class*)*arg;
1795         struct rtattr *opt = tca[TCA_OPTIONS-1];
1796         struct rtattr *tb[TCA_CBQ_MAX];
1797         struct cbq_class *parent;
1798         struct qdisc_rate_table *rtab = NULL;
1799
1800         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1801                 return -EINVAL;
1802
1803         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1804             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1805                 return -EINVAL;
1806
1807         if (tb[TCA_CBQ_FOPT-1] &&
1808             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1809                 return -EINVAL;
1810
1811         if (tb[TCA_CBQ_RATE-1] &&
1812             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1813                         return -EINVAL;
1814
1815         if (tb[TCA_CBQ_LSSOPT-1] &&
1816             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1817                         return -EINVAL;
1818
1819         if (tb[TCA_CBQ_WRROPT-1] &&
1820             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1821                         return -EINVAL;
1822
1823 #ifdef CONFIG_NET_CLS_POLICE
1824         if (tb[TCA_CBQ_POLICE-1] &&
1825             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1826                         return -EINVAL;
1827 #endif
1828
1829         if (cl) {
1830                 /* Check parent */
1831                 if (parentid) {
1832                         if (cl->tparent && cl->tparent->classid != parentid)
1833                                 return -EINVAL;
1834                         if (!cl->tparent && parentid != TC_H_ROOT)
1835                                 return -EINVAL;
1836                 }
1837
1838                 if (tb[TCA_CBQ_RATE-1]) {
1839                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1840                         if (rtab == NULL)
1841                                 return -EINVAL;
1842                 }
1843
1844                 /* Change class parameters */
1845                 sch_tree_lock(sch);
1846
1847                 if (cl->next_alive != NULL)
1848                         cbq_deactivate_class(cl);
1849
1850                 if (rtab) {
1851                         rtab = xchg(&cl->R_tab, rtab);
1852                         qdisc_put_rtab(rtab);
1853                 }
1854
1855                 if (tb[TCA_CBQ_LSSOPT-1])
1856                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1857
1858                 if (tb[TCA_CBQ_WRROPT-1]) {
1859                         cbq_rmprio(q, cl);
1860                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1861                 }
1862
1863                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1864                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1865
1866 #ifdef CONFIG_NET_CLS_POLICE
1867                 if (tb[TCA_CBQ_POLICE-1])
1868                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1869 #endif
1870
1871                 if (tb[TCA_CBQ_FOPT-1])
1872                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1873
1874                 if (cl->q->q.qlen)
1875                         cbq_activate_class(cl);
1876
1877                 sch_tree_unlock(sch);
1878
1879 #ifdef CONFIG_NET_ESTIMATOR
1880                 if (tca[TCA_RATE-1])
1881                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1882                                 cl->stats_lock, tca[TCA_RATE-1]);
1883 #endif
1884                 return 0;
1885         }
1886
1887         if (parentid == TC_H_ROOT)
1888                 return -EINVAL;
1889
1890         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1891             tb[TCA_CBQ_LSSOPT-1] == NULL)
1892                 return -EINVAL;
1893
1894         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1895         if (rtab == NULL)
1896                 return -EINVAL;
1897
1898         if (classid) {
1899                 err = -EINVAL;
1900                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1901                         goto failure;
1902         } else {
1903                 int i;
1904                 classid = TC_H_MAKE(sch->handle,0x8000);
1905
1906                 for (i=0; i<0x8000; i++) {
1907                         if (++q->hgenerator >= 0x8000)
1908                                 q->hgenerator = 1;
1909                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1910                                 break;
1911                 }
1912                 err = -ENOSR;
1913                 if (i >= 0x8000)
1914                         goto failure;
1915                 classid = classid|q->hgenerator;
1916         }
1917
1918         parent = &q->link;
1919         if (parentid) {
1920                 parent = cbq_class_lookup(q, parentid);
1921                 err = -EINVAL;
1922                 if (parent == NULL)
1923                         goto failure;
1924         }
1925
1926         err = -ENOBUFS;
1927         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1928         if (cl == NULL)
1929                 goto failure;
1930         memset(cl, 0, sizeof(*cl));
1931         cl->R_tab = rtab;
1932         rtab = NULL;
1933         cl->refcnt = 1;
1934         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1935                 cl->q = &noop_qdisc;
1936         cl->classid = classid;
1937         cl->tparent = parent;
1938         cl->qdisc = sch;
1939         cl->allot = parent->allot;
1940         cl->quantum = cl->allot;
1941         cl->weight = cl->R_tab->rate.rate;
1942         cl->stats_lock = &sch->dev->queue_lock;
1943
1944         sch_tree_lock(sch);
1945         cbq_link_class(cl);
1946         cl->borrow = cl->tparent;
1947         if (cl->tparent != &q->link)
1948                 cl->share = cl->tparent;
1949         cbq_adjust_levels(parent);
1950         cl->minidle = -0x7FFFFFFF;
1951         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1952         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1953         if (cl->ewma_log==0)
1954                 cl->ewma_log = q->link.ewma_log;
1955         if (cl->maxidle==0)
1956                 cl->maxidle = q->link.maxidle;
1957         if (cl->avpkt==0)
1958                 cl->avpkt = q->link.avpkt;
1959         cl->overlimit = cbq_ovl_classic;
1960         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1961                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1962 #ifdef CONFIG_NET_CLS_POLICE
1963         if (tb[TCA_CBQ_POLICE-1])
1964                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1965 #endif
1966         if (tb[TCA_CBQ_FOPT-1])
1967                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1968         sch_tree_unlock(sch);
1969
1970 #ifdef CONFIG_NET_ESTIMATOR
1971         if (tca[TCA_RATE-1])
1972                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1973                         cl->stats_lock, tca[TCA_RATE-1]);
1974 #endif
1975
1976         *arg = (unsigned long)cl;
1977         return 0;
1978
1979 failure:
1980         qdisc_put_rtab(rtab);
1981         return err;
1982 }
1983
1984 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1985 {
1986         struct cbq_sched_data *q = qdisc_priv(sch);
1987         struct cbq_class *cl = (struct cbq_class*)arg;
1988
1989         if (cl->filters || cl->children || cl == &q->link)
1990                 return -EBUSY;
1991
1992         sch_tree_lock(sch);
1993
1994         if (cl->next_alive)
1995                 cbq_deactivate_class(cl);
1996
1997         if (q->tx_borrowed == cl)
1998                 q->tx_borrowed = q->tx_class;
1999         if (q->tx_class == cl) {
2000                 q->tx_class = NULL;
2001                 q->tx_borrowed = NULL;
2002         }
2003 #ifdef CONFIG_NET_CLS_POLICE
2004         if (q->rx_class == cl)
2005                 q->rx_class = NULL;
2006 #endif
2007
2008         cbq_unlink_class(cl);
2009         cbq_adjust_levels(cl->tparent);
2010         cl->defmap = 0;
2011         cbq_sync_defmap(cl);
2012
2013         cbq_rmprio(q, cl);
2014         sch_tree_unlock(sch);
2015
2016         if (--cl->refcnt == 0)
2017                 cbq_destroy_class(sch, cl);
2018
2019         return 0;
2020 }
2021
2022 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2023 {
2024         struct cbq_sched_data *q = qdisc_priv(sch);
2025         struct cbq_class *cl = (struct cbq_class *)arg;
2026
2027         if (cl == NULL)
2028                 cl = &q->link;
2029
2030         return &cl->filter_list;
2031 }
2032
2033 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2034                                      u32 classid)
2035 {
2036         struct cbq_sched_data *q = qdisc_priv(sch);
2037         struct cbq_class *p = (struct cbq_class*)parent;
2038         struct cbq_class *cl = cbq_class_lookup(q, classid);
2039
2040         if (cl) {
2041                 if (p && p->level <= cl->level)
2042                         return 0;
2043                 cl->filters++;
2044                 return (unsigned long)cl;
2045         }
2046         return 0;
2047 }
2048
2049 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2050 {
2051         struct cbq_class *cl = (struct cbq_class*)arg;
2052
2053         cl->filters--;
2054 }
2055
2056 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2057 {
2058         struct cbq_sched_data *q = qdisc_priv(sch);
2059         unsigned h;
2060
2061         if (arg->stop)
2062                 return;
2063
2064         for (h = 0; h < 16; h++) {
2065                 struct cbq_class *cl;
2066
2067                 for (cl = q->classes[h]; cl; cl = cl->next) {
2068                         if (arg->count < arg->skip) {
2069                                 arg->count++;
2070                                 continue;
2071                         }
2072                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2073                                 arg->stop = 1;
2074                                 return;
2075                         }
2076                         arg->count++;
2077                 }
2078         }
2079 }
2080
2081 static struct Qdisc_class_ops cbq_class_ops = {
2082         .graft          =       cbq_graft,
2083         .leaf           =       cbq_leaf,
2084         .get            =       cbq_get,
2085         .put            =       cbq_put,
2086         .change         =       cbq_change_class,
2087         .delete         =       cbq_delete,
2088         .walk           =       cbq_walk,
2089         .tcf_chain      =       cbq_find_tcf,
2090         .bind_tcf       =       cbq_bind_filter,
2091         .unbind_tcf     =       cbq_unbind_filter,
2092         .dump           =       cbq_dump_class,
2093         .dump_stats     =       cbq_dump_class_stats,
2094 };
2095
2096 static struct Qdisc_ops cbq_qdisc_ops = {
2097         .next           =       NULL,
2098         .cl_ops         =       &cbq_class_ops,
2099         .id             =       "cbq",
2100         .priv_size      =       sizeof(struct cbq_sched_data),
2101         .enqueue        =       cbq_enqueue,
2102         .dequeue        =       cbq_dequeue,
2103         .requeue        =       cbq_requeue,
2104         .drop           =       cbq_drop,
2105         .init           =       cbq_init,
2106         .reset          =       cbq_reset,
2107         .destroy        =       cbq_destroy,
2108         .change         =       NULL,
2109         .dump           =       cbq_dump,
2110         .dump_stats     =       cbq_dump_stats,
2111         .owner          =       THIS_MODULE,
2112 };
2113
2114 static int __init cbq_module_init(void)
2115 {
2116         return register_qdisc(&cbq_qdisc_ops);
2117 }
2118 static void __exit cbq_module_exit(void) 
2119 {
2120         unregister_qdisc(&cbq_qdisc_ops);
2121 }
2122 module_init(cbq_module_init)
2123 module_exit(cbq_module_exit)
2124 MODULE_LICENSE("GPL");