upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /*      Class-Based Queueing (CBQ) algorithm.
41         =======================================
42
43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44                  Management Models for Packet Networks",
45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48
49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50                  Parameters", 1996
51
52                  [4] Sally Floyd and Michael Speer, "Experimental Results
53                  for Class-Based Queueing", 1998, not published.
54
55         -----------------------------------------------------------------------
56
57         Algorithm skeleton was taken from NS simulator cbq.cc.
58         If someone wants to check this code against the LBL version,
59         he should take into account that ONLY the skeleton was borrowed,
60         the implementation is different. Particularly:
61
62         --- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69
70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72         In the worst case we have IntServ estimate with D = W*r+k*MTU
73         and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77         interpret some places, which look like wrong translations
78         from NS. Anyone is advised to find these differences
79         and explain to me, why I am wrong 8).
80
81         --- Linux has no EOI event, so that we cannot estimate true class
82         idle time. Workaround is to consider the next dequeue event
83         as sign that previous packet is finished. This is wrong because of
84         internal device queueing, but on a permanently loaded link it is true.
85         Moreover, combined with clock integrator, this scheme looks
86         very close to an ideal solution.  */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93         struct cbq_class        *next;          /* hash table link */
94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
95
96 /* Parameters */
97         u32                     classid;
98         unsigned char           priority;       /* class priority */
99         unsigned char           priority2;      /* priority to be used after overlimit */
100         unsigned char           ewma_log;       /* time constant for idle time calculation */
101         unsigned char           ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103         unsigned char           police;
104 #endif
105
106         u32                     defmap;
107
108         /* Link-sharing scheduler parameters */
109         long                    maxidle;        /* Class parameters: see below. */
110         long                    offtime;
111         long                    minidle;
112         u32                     avpkt;
113         struct qdisc_rate_table *R_tab;
114
115         /* Overlimit strategy parameters */
116         void                    (*overlimit)(struct cbq_class *cl);
117         long                    penalty;
118
119         /* General scheduler (WRR) parameters */
120         long                    allot;
121         long                    quantum;        /* Allotment per WRR round */
122         long                    weight;         /* Relative allotment: see below */
123
124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
125         struct cbq_class        *split;         /* Ptr to split node */
126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
129                                                    parent otherwise */
130         struct cbq_class        *sibling;       /* Sibling chain */
131         struct cbq_class        *children;      /* Pointer to children chain */
132
133         struct Qdisc            *q;             /* Elementary queueing discipline */
134
135
136 /* Variables */
137         unsigned char           cpriority;      /* Effective priority */
138         unsigned char           delayed;
139         unsigned char           level;          /* level of the class in hierarchy:
140                                                    0 for leaf classes, and maximal
141                                                    level of children + 1 for nodes.
142                                                  */
143
144         psched_time_t           last;           /* Last end of service */
145         psched_time_t           undertime;
146         long                    avgidle;
147         long                    deficit;        /* Saved deficit for WRR */
148         unsigned long           penalized;
149         struct gnet_stats_basic bstats;
150         struct gnet_stats_queue qstats;
151         struct gnet_stats_rate_est rate_est;
152         spinlock_t              *stats_lock;
153         struct tc_cbq_xstats    xstats;
154
155         struct tcf_proto        *filter_list;
156
157         int                     refcnt;
158         int                     filters;
159
160         struct cbq_class        *defaults[TC_PRIO_MAX+1];
161 };
162
163 struct cbq_sched_data
164 {
165         struct cbq_class        *classes[16];           /* Hash table of all classes */
166         int                     nclasses[TC_CBQ_MAXPRIO+1];
167         unsigned                quanta[TC_CBQ_MAXPRIO+1];
168
169         struct cbq_class        link;
170
171         unsigned                activemask;
172         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
173                                                                    with backlog */
174
175 #ifdef CONFIG_NET_CLS_POLICE
176         struct cbq_class        *rx_class;
177 #endif
178         struct cbq_class        *tx_class;
179         struct cbq_class        *tx_borrowed;
180         int                     tx_len;
181         psched_time_t           now;            /* Cached timestamp */
182         psched_time_t           now_rt;         /* Cached real time */
183         unsigned                pmask;
184
185         struct timer_list       delay_timer;
186         struct timer_list       wd_timer;       /* Watchdog timer,
187                                                    started when CBQ has
188                                                    backlog, but cannot
189                                                    transmit just now */
190         long                    wd_expires;
191         int                     toplevel;
192         u32                     hgenerator;
193 };
194
195
196 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197
198
199 static __inline__ unsigned cbq_hash(u32 h)
200 {
201         h ^= h>>8;
202         h ^= h>>4;
203         return h&0xF;
204 }
205
206 static __inline__ struct cbq_class *
207 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
208 {
209         struct cbq_class *cl;
210
211         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
212                 if (cl->classid == classid)
213                         return cl;
214         return NULL;
215 }
216
217 #ifdef CONFIG_NET_CLS_POLICE
218
219 static struct cbq_class *
220 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
221 {
222         struct cbq_class *cl, *new;
223
224         for (cl = this->tparent; cl; cl = cl->tparent)
225                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
226                         return new;
227
228         return NULL;
229 }
230
231 #endif
232
233 /* Classify packet. The procedure is pretty complicated, but
234    it allows us to combine link sharing and priority scheduling
235    transparently.
236
237    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
238    so that it resolves to split nodes. Then packets are classified
239    by logical priority, or a more specific classifier may be attached
240    to the split node.
241  */
242
243 static struct cbq_class *
244 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
245 {
246         struct cbq_sched_data *q = qdisc_priv(sch);
247         struct cbq_class *head = &q->link;
248         struct cbq_class **defmap;
249         struct cbq_class *cl = NULL;
250         u32 prio = skb->priority;
251         struct tcf_result res;
252
253         /*
254          *  Step 1. If skb->priority points to one of our classes, use it.
255          */
256         if (TC_H_MAJ(prio^sch->handle) == 0 &&
257             (cl = cbq_class_lookup(q, prio)) != NULL)
258                         return cl;
259
260         for (;;) {
261                 int result = 0;
262 #ifdef CONFIG_NET_CLS_ACT
263                 int terminal = 0;
264 #endif
265                 defmap = head->defaults;
266
267                 /*
268                  * Step 2+n. Apply classifier.
269                  */
270                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
271                         goto fallback;
272
273                 if ((cl = (void*)res.class) == NULL) {
274                         if (TC_H_MAJ(res.classid))
275                                 cl = cbq_class_lookup(q, res.classid);
276                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
277                                 cl = defmap[TC_PRIO_BESTEFFORT];
278
279                         if (cl == NULL || cl->level >= head->level)
280                                 goto fallback;
281                 }
282
283 #ifdef CONFIG_NET_CLS_ACT
284                 switch (result) {
285                 case TC_ACT_SHOT: /* Stop and kfree */
286                         *qres = NET_XMIT_DROP;
287                         terminal = 1;
288                         break;
289                 case TC_ACT_QUEUED:
290                 case TC_ACT_STOLEN: 
291                         terminal = 1;
292                         break;
293                 case TC_ACT_RECLASSIFY:  /* Things look good */
294                 case TC_ACT_OK: 
295                 case TC_ACT_UNSPEC:
296                 default:
297                         break;
298                 }
299
300                 if (terminal) {
301                         kfree_skb(skb);
302                         return NULL;
303                 }
304 #else
305 #ifdef CONFIG_NET_CLS_POLICE
306                 switch (result) {
307                 case TC_POLICE_RECLASSIFY:
308                         return cbq_reclassify(skb, cl);
309                 case TC_POLICE_SHOT:
310                         return NULL;
311                 default:
312                         break;
313                 }
314 #endif
315 #endif
316                 if (cl->level == 0)
317                         return cl;
318
319                 /*
320                  * Step 3+n. If classifier selected a link sharing class,
321                  *         apply agency specific classifier.
322                  *         Repeat this procdure until we hit a leaf node.
323                  */
324                 head = cl;
325         }
326
327 fallback:
328         cl = head;
329
330         /*
331          * Step 4. No success...
332          */
333         if (TC_H_MAJ(prio) == 0 &&
334             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
335             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
336                 return head;
337
338         return cl;
339 }
340
341 /*
342    A packet has just been enqueued on the empty class.
343    cbq_activate_class adds it to the tail of active class list
344    of its priority band.
345  */
346
347 static __inline__ void cbq_activate_class(struct cbq_class *cl)
348 {
349         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
350         int prio = cl->cpriority;
351         struct cbq_class *cl_tail;
352
353         cl_tail = q->active[prio];
354         q->active[prio] = cl;
355
356         if (cl_tail != NULL) {
357                 cl->next_alive = cl_tail->next_alive;
358                 cl_tail->next_alive = cl;
359         } else {
360                 cl->next_alive = cl;
361                 q->activemask |= (1<<prio);
362         }
363 }
364
365 /*
366    Unlink class from active chain.
367    Note that this same procedure is done directly in cbq_dequeue*
368    during round-robin procedure.
369  */
370
371 static void cbq_deactivate_class(struct cbq_class *this)
372 {
373         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
374         int prio = this->cpriority;
375         struct cbq_class *cl;
376         struct cbq_class *cl_prev = q->active[prio];
377
378         do {
379                 cl = cl_prev->next_alive;
380                 if (cl == this) {
381                         cl_prev->next_alive = cl->next_alive;
382                         cl->next_alive = NULL;
383
384                         if (cl == q->active[prio]) {
385                                 q->active[prio] = cl_prev;
386                                 if (cl == q->active[prio]) {
387                                         q->active[prio] = NULL;
388                                         q->activemask &= ~(1<<prio);
389                                         return;
390                                 }
391                         }
392
393                         cl = cl_prev->next_alive;
394                         return;
395                 }
396         } while ((cl_prev = cl) != q->active[prio]);
397 }
398
399 static void
400 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
401 {
402         int toplevel = q->toplevel;
403
404         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
405                 psched_time_t now;
406                 psched_tdiff_t incr;
407
408                 PSCHED_GET_TIME(now);
409                 incr = PSCHED_TDIFF(now, q->now_rt);
410                 PSCHED_TADD2(q->now, incr, now);
411
412                 do {
413                         if (PSCHED_TLESS(cl->undertime, now)) {
414                                 q->toplevel = cl->level;
415                                 return;
416                         }
417                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
418         }
419 }
420
421 static int
422 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
423 {
424         struct cbq_sched_data *q = qdisc_priv(sch);
425         int len = skb->len;
426         int ret = NET_XMIT_SUCCESS;
427         struct cbq_class *cl = cbq_classify(skb, sch,&ret);
428
429 #ifdef CONFIG_NET_CLS_POLICE
430         q->rx_class = cl;
431 #endif
432         if (cl) {
433 #ifdef CONFIG_NET_CLS_POLICE
434                 cl->q->__parent = sch;
435 #endif
436                 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
437                         sch->q.qlen++;
438                         sch->bstats.packets++;
439                         sch->bstats.bytes+=len;
440                         cbq_mark_toplevel(q, cl);
441                         if (!cl->next_alive)
442                                 cbq_activate_class(cl);
443                         return ret;
444                 }
445         }
446
447 #ifndef CONFIG_NET_CLS_ACT
448         sch->qstats.drops++;
449         if (cl == NULL)
450                 kfree_skb(skb);
451         else {
452                 cbq_mark_toplevel(q, cl);
453                 cl->qstats.drops++;
454         }
455 #else
456         if ( NET_XMIT_DROP == ret) {
457                 sch->qstats.drops++;
458         }
459
460         if (cl != NULL) {
461                 cbq_mark_toplevel(q, cl);
462                 cl->qstats.drops++;
463         }
464 #endif
465         return ret;
466 }
467
468 static int
469 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
470 {
471         struct cbq_sched_data *q = qdisc_priv(sch);
472         struct cbq_class *cl;
473         int ret;
474
475         if ((cl = q->tx_class) == NULL) {
476                 kfree_skb(skb);
477                 sch->qstats.drops++;
478                 return NET_XMIT_CN;
479         }
480         q->tx_class = NULL;
481
482         cbq_mark_toplevel(q, cl);
483
484 #ifdef CONFIG_NET_CLS_POLICE
485         q->rx_class = cl;
486         cl->q->__parent = sch;
487 #endif
488         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
489                 sch->q.qlen++;
490                 sch->qstats.requeues++;
491                 if (!cl->next_alive)
492                         cbq_activate_class(cl);
493                 return 0;
494         }
495         sch->qstats.drops++;
496         cl->qstats.drops++;
497         return ret;
498 }
499
500 /* Overlimit actions */
501
502 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
503
504 static void cbq_ovl_classic(struct cbq_class *cl)
505 {
506         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
507         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
508
509         if (!cl->delayed) {
510                 delay += cl->offtime;
511
512                 /* 
513                    Class goes to sleep, so that it will have no
514                    chance to work avgidle. Let's forgive it 8)
515
516                    BTW cbq-2.0 has a crap in this
517                    place, apparently they forgot to shift it by cl->ewma_log.
518                  */
519                 if (cl->avgidle < 0)
520                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
521                 if (cl->avgidle < cl->minidle)
522                         cl->avgidle = cl->minidle;
523                 if (delay <= 0)
524                         delay = 1;
525                 PSCHED_TADD2(q->now, delay, cl->undertime);
526
527                 cl->xstats.overactions++;
528                 cl->delayed = 1;
529         }
530         if (q->wd_expires == 0 || q->wd_expires > delay)
531                 q->wd_expires = delay;
532
533         /* Dirty work! We must schedule wakeups based on
534            real available rate, rather than leaf rate,
535            which may be tiny (even zero).
536          */
537         if (q->toplevel == TC_CBQ_MAXLEVEL) {
538                 struct cbq_class *b;
539                 psched_tdiff_t base_delay = q->wd_expires;
540
541                 for (b = cl->borrow; b; b = b->borrow) {
542                         delay = PSCHED_TDIFF(b->undertime, q->now);
543                         if (delay < base_delay) {
544                                 if (delay <= 0)
545                                         delay = 1;
546                                 base_delay = delay;
547                         }
548                 }
549
550                 q->wd_expires = base_delay;
551         }
552 }
553
554 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
555    they go overlimit
556  */
557
558 static void cbq_ovl_rclassic(struct cbq_class *cl)
559 {
560         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
561         struct cbq_class *this = cl;
562
563         do {
564                 if (cl->level > q->toplevel) {
565                         cl = NULL;
566                         break;
567                 }
568         } while ((cl = cl->borrow) != NULL);
569
570         if (cl == NULL)
571                 cl = this;
572         cbq_ovl_classic(cl);
573 }
574
575 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
576
577 static void cbq_ovl_delay(struct cbq_class *cl)
578 {
579         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
580         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
581
582         if (!cl->delayed) {
583                 unsigned long sched = jiffies;
584
585                 delay += cl->offtime;
586                 if (cl->avgidle < 0)
587                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
588                 if (cl->avgidle < cl->minidle)
589                         cl->avgidle = cl->minidle;
590                 PSCHED_TADD2(q->now, delay, cl->undertime);
591
592                 if (delay > 0) {
593                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
594                         cl->penalized = sched;
595                         cl->cpriority = TC_CBQ_MAXPRIO;
596                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
597                         if (del_timer(&q->delay_timer) &&
598                             (long)(q->delay_timer.expires - sched) > 0)
599                                 q->delay_timer.expires = sched;
600                         add_timer(&q->delay_timer);
601                         cl->delayed = 1;
602                         cl->xstats.overactions++;
603                         return;
604                 }
605                 delay = 1;
606         }
607         if (q->wd_expires == 0 || q->wd_expires > delay)
608                 q->wd_expires = delay;
609 }
610
611 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
612
613 static void cbq_ovl_lowprio(struct cbq_class *cl)
614 {
615         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
616
617         cl->penalized = jiffies + cl->penalty;
618
619         if (cl->cpriority != cl->priority2) {
620                 cl->cpriority = cl->priority2;
621                 q->pmask |= (1<<cl->cpriority);
622                 cl->xstats.overactions++;
623         }
624         cbq_ovl_classic(cl);
625 }
626
627 /* TC_CBQ_OVL_DROP: penalize class by dropping */
628
629 static void cbq_ovl_drop(struct cbq_class *cl)
630 {
631         if (cl->q->ops->drop)
632                 if (cl->q->ops->drop(cl->q))
633                         cl->qdisc->q.qlen--;
634         cl->xstats.overactions++;
635         cbq_ovl_classic(cl);
636 }
637
638 static void cbq_watchdog(unsigned long arg)
639 {
640         struct Qdisc *sch = (struct Qdisc*)arg;
641
642         sch->flags &= ~TCQ_F_THROTTLED;
643         netif_schedule(sch->dev);
644 }
645
646 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
647 {
648         struct cbq_class *cl;
649         struct cbq_class *cl_prev = q->active[prio];
650         unsigned long now = jiffies;
651         unsigned long sched = now;
652
653         if (cl_prev == NULL)
654                 return now;
655
656         do {
657                 cl = cl_prev->next_alive;
658                 if ((long)(now - cl->penalized) > 0) {
659                         cl_prev->next_alive = cl->next_alive;
660                         cl->next_alive = NULL;
661                         cl->cpriority = cl->priority;
662                         cl->delayed = 0;
663                         cbq_activate_class(cl);
664
665                         if (cl == q->active[prio]) {
666                                 q->active[prio] = cl_prev;
667                                 if (cl == q->active[prio]) {
668                                         q->active[prio] = NULL;
669                                         return 0;
670                                 }
671                         }
672
673                         cl = cl_prev->next_alive;
674                 } else if ((long)(sched - cl->penalized) > 0)
675                         sched = cl->penalized;
676         } while ((cl_prev = cl) != q->active[prio]);
677
678         return (long)(sched - now);
679 }
680
681 static void cbq_undelay(unsigned long arg)
682 {
683         struct Qdisc *sch = (struct Qdisc*)arg;
684         struct cbq_sched_data *q = qdisc_priv(sch);
685         long delay = 0;
686         unsigned pmask;
687
688         pmask = q->pmask;
689         q->pmask = 0;
690
691         while (pmask) {
692                 int prio = ffz(~pmask);
693                 long tmp;
694
695                 pmask &= ~(1<<prio);
696
697                 tmp = cbq_undelay_prio(q, prio);
698                 if (tmp > 0) {
699                         q->pmask |= 1<<prio;
700                         if (tmp < delay || delay == 0)
701                                 delay = tmp;
702                 }
703         }
704
705         if (delay) {
706                 q->delay_timer.expires = jiffies + delay;
707                 add_timer(&q->delay_timer);
708         }
709
710         sch->flags &= ~TCQ_F_THROTTLED;
711         netif_schedule(sch->dev);
712 }
713
714
715 #ifdef CONFIG_NET_CLS_POLICE
716
717 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
718 {
719         int len = skb->len;
720         struct Qdisc *sch = child->__parent;
721         struct cbq_sched_data *q = qdisc_priv(sch);
722         struct cbq_class *cl = q->rx_class;
723
724         q->rx_class = NULL;
725
726         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
727
728                 cbq_mark_toplevel(q, cl);
729
730                 q->rx_class = cl;
731                 cl->q->__parent = sch;
732
733                 if (cl->q->enqueue(skb, cl->q) == 0) {
734                         sch->q.qlen++;
735                         sch->bstats.packets++;
736                         sch->bstats.bytes+=len;
737                         if (!cl->next_alive)
738                                 cbq_activate_class(cl);
739                         return 0;
740                 }
741                 sch->qstats.drops++;
742                 return 0;
743         }
744
745         sch->qstats.drops++;
746         return -1;
747 }
748 #endif
749
750 /* 
751    It is mission critical procedure.
752
753    We "regenerate" toplevel cutoff, if transmitting class
754    has backlog and it is not regulated. It is not part of
755    original CBQ description, but looks more reasonable.
756    Probably, it is wrong. This question needs further investigation.
757 */
758
759 static __inline__ void
760 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
761                     struct cbq_class *borrowed)
762 {
763         if (cl && q->toplevel >= borrowed->level) {
764                 if (cl->q->q.qlen > 1) {
765                         do {
766                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
767                                         q->toplevel = borrowed->level;
768                                         return;
769                                 }
770                         } while ((borrowed=borrowed->borrow) != NULL);
771                 }
772 #if 0   
773         /* It is not necessary now. Uncommenting it
774            will save CPU cycles, but decrease fairness.
775          */
776                 q->toplevel = TC_CBQ_MAXLEVEL;
777 #endif
778         }
779 }
780
781 static void
782 cbq_update(struct cbq_sched_data *q)
783 {
784         struct cbq_class *this = q->tx_class;
785         struct cbq_class *cl = this;
786         int len = q->tx_len;
787
788         q->tx_class = NULL;
789
790         for ( ; cl; cl = cl->share) {
791                 long avgidle = cl->avgidle;
792                 long idle;
793
794                 cl->bstats.packets++;
795                 cl->bstats.bytes += len;
796
797                 /*
798                    (now - last) is total time between packet right edges.
799                    (last_pktlen/rate) is "virtual" busy time, so that
800
801                          idle = (now - last) - last_pktlen/rate
802                  */
803
804                 idle = PSCHED_TDIFF(q->now, cl->last);
805                 if ((unsigned long)idle > 128*1024*1024) {
806                         avgidle = cl->maxidle;
807                 } else {
808                         idle -= L2T(cl, len);
809
810                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
811                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
812                    cl->avgidle == true_avgidle/W,
813                    hence:
814                  */
815                         avgidle += idle - (avgidle>>cl->ewma_log);
816                 }
817
818                 if (avgidle <= 0) {
819                         /* Overlimit or at-limit */
820
821                         if (avgidle < cl->minidle)
822                                 avgidle = cl->minidle;
823
824                         cl->avgidle = avgidle;
825
826                         /* Calculate expected time, when this class
827                            will be allowed to send.
828                            It will occur, when:
829                            (1-W)*true_avgidle + W*delay = 0, i.e.
830                            idle = (1/W - 1)*(-true_avgidle)
831                            or
832                            idle = (1 - W)*(-cl->avgidle);
833                          */
834                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
835
836                         /*
837                            That is not all.
838                            To maintain the rate allocated to the class,
839                            we add to undertime virtual clock,
840                            necessary to complete transmitted packet.
841                            (len/phys_bandwidth has been already passed
842                            to the moment of cbq_update)
843                          */
844
845                         idle -= L2T(&q->link, len);
846                         idle += L2T(cl, len);
847
848                         PSCHED_AUDIT_TDIFF(idle);
849
850                         PSCHED_TADD2(q->now, idle, cl->undertime);
851                 } else {
852                         /* Underlimit */
853
854                         PSCHED_SET_PASTPERFECT(cl->undertime);
855                         if (avgidle > cl->maxidle)
856                                 cl->avgidle = cl->maxidle;
857                         else
858                                 cl->avgidle = avgidle;
859                 }
860                 cl->last = q->now;
861         }
862
863         cbq_update_toplevel(q, this, q->tx_borrowed);
864 }
865
866 static __inline__ struct cbq_class *
867 cbq_under_limit(struct cbq_class *cl)
868 {
869         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
870         struct cbq_class *this_cl = cl;
871
872         if (cl->tparent == NULL)
873                 return cl;
874
875         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
876             !PSCHED_TLESS(q->now, cl->undertime)) {
877                 cl->delayed = 0;
878                 return cl;
879         }
880
881         do {
882                 /* It is very suspicious place. Now overlimit
883                    action is generated for not bounded classes
884                    only if link is completely congested.
885                    Though it is in agree with ancestor-only paradigm,
886                    it looks very stupid. Particularly,
887                    it means that this chunk of code will either
888                    never be called or result in strong amplification
889                    of burstiness. Dangerous, silly, and, however,
890                    no another solution exists.
891                  */
892                 if ((cl = cl->borrow) == NULL) {
893                         this_cl->qstats.overlimits++;
894                         this_cl->overlimit(this_cl);
895                         return NULL;
896                 }
897                 if (cl->level > q->toplevel)
898                         return NULL;
899         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
900                  PSCHED_TLESS(q->now, cl->undertime));
901
902         cl->delayed = 0;
903         return cl;
904 }
905
906 static __inline__ struct sk_buff *
907 cbq_dequeue_prio(struct Qdisc *sch, int prio)
908 {
909         struct cbq_sched_data *q = qdisc_priv(sch);
910         struct cbq_class *cl_tail, *cl_prev, *cl;
911         struct sk_buff *skb;
912         int deficit;
913
914         cl_tail = cl_prev = q->active[prio];
915         cl = cl_prev->next_alive;
916
917         do {
918                 deficit = 0;
919
920                 /* Start round */
921                 do {
922                         struct cbq_class *borrow = cl;
923
924                         if (cl->q->q.qlen &&
925                             (borrow = cbq_under_limit(cl)) == NULL)
926                                 goto skip_class;
927
928                         if (cl->deficit <= 0) {
929                                 /* Class exhausted its allotment per
930                                    this round. Switch to the next one.
931                                  */
932                                 deficit = 1;
933                                 cl->deficit += cl->quantum;
934                                 goto next_class;
935                         }
936
937                         skb = cl->q->dequeue(cl->q);
938
939                         /* Class did not give us any skb :-(
940                            It could occur even if cl->q->q.qlen != 0 
941                            f.e. if cl->q == "tbf"
942                          */
943                         if (skb == NULL)
944                                 goto skip_class;
945
946                         cl->deficit -= skb->len;
947                         q->tx_class = cl;
948                         q->tx_borrowed = borrow;
949                         if (borrow != cl) {
950 #ifndef CBQ_XSTATS_BORROWS_BYTES
951                                 borrow->xstats.borrows++;
952                                 cl->xstats.borrows++;
953 #else
954                                 borrow->xstats.borrows += skb->len;
955                                 cl->xstats.borrows += skb->len;
956 #endif
957                         }
958                         q->tx_len = skb->len;
959
960                         if (cl->deficit <= 0) {
961                                 q->active[prio] = cl;
962                                 cl = cl->next_alive;
963                                 cl->deficit += cl->quantum;
964                         }
965                         return skb;
966
967 skip_class:
968                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
969                                 /* Class is empty or penalized.
970                                    Unlink it from active chain.
971                                  */
972                                 cl_prev->next_alive = cl->next_alive;
973                                 cl->next_alive = NULL;
974
975                                 /* Did cl_tail point to it? */
976                                 if (cl == cl_tail) {
977                                         /* Repair it! */
978                                         cl_tail = cl_prev;
979
980                                         /* Was it the last class in this band? */
981                                         if (cl == cl_tail) {
982                                                 /* Kill the band! */
983                                                 q->active[prio] = NULL;
984                                                 q->activemask &= ~(1<<prio);
985                                                 if (cl->q->q.qlen)
986                                                         cbq_activate_class(cl);
987                                                 return NULL;
988                                         }
989
990                                         q->active[prio] = cl_tail;
991                                 }
992                                 if (cl->q->q.qlen)
993                                         cbq_activate_class(cl);
994
995                                 cl = cl_prev;
996                         }
997
998 next_class:
999                         cl_prev = cl;
1000                         cl = cl->next_alive;
1001                 } while (cl_prev != cl_tail);
1002         } while (deficit);
1003
1004         q->active[prio] = cl_prev;
1005
1006         return NULL;
1007 }
1008
1009 static __inline__ struct sk_buff *
1010 cbq_dequeue_1(struct Qdisc *sch)
1011 {
1012         struct cbq_sched_data *q = qdisc_priv(sch);
1013         struct sk_buff *skb;
1014         unsigned activemask;
1015
1016         activemask = q->activemask&0xFF;
1017         while (activemask) {
1018                 int prio = ffz(~activemask);
1019                 activemask &= ~(1<<prio);
1020                 skb = cbq_dequeue_prio(sch, prio);
1021                 if (skb)
1022                         return skb;
1023         }
1024         return NULL;
1025 }
1026
1027 static struct sk_buff *
1028 cbq_dequeue(struct Qdisc *sch)
1029 {
1030         struct sk_buff *skb;
1031         struct cbq_sched_data *q = qdisc_priv(sch);
1032         psched_time_t now;
1033         psched_tdiff_t incr;
1034
1035         PSCHED_GET_TIME(now);
1036         incr = PSCHED_TDIFF(now, q->now_rt);
1037
1038         if (q->tx_class) {
1039                 psched_tdiff_t incr2;
1040                 /* Time integrator. We calculate EOS time
1041                    by adding expected packet transmission time.
1042                    If real time is greater, we warp artificial clock,
1043                    so that:
1044
1045                    cbq_time = max(real_time, work);
1046                  */
1047                 incr2 = L2T(&q->link, q->tx_len);
1048                 PSCHED_TADD(q->now, incr2);
1049                 cbq_update(q);
1050                 if ((incr -= incr2) < 0)
1051                         incr = 0;
1052         }
1053         PSCHED_TADD(q->now, incr);
1054         q->now_rt = now;
1055
1056         for (;;) {
1057                 q->wd_expires = 0;
1058
1059                 skb = cbq_dequeue_1(sch);
1060                 if (skb) {
1061                         sch->q.qlen--;
1062                         sch->flags &= ~TCQ_F_THROTTLED;
1063                         return skb;
1064                 }
1065
1066                 /* All the classes are overlimit.
1067
1068                    It is possible, if:
1069
1070                    1. Scheduler is empty.
1071                    2. Toplevel cutoff inhibited borrowing.
1072                    3. Root class is overlimit.
1073
1074                    Reset 2d and 3d conditions and retry.
1075
1076                    Note, that NS and cbq-2.0 are buggy, peeking
1077                    an arbitrary class is appropriate for ancestor-only
1078                    sharing, but not for toplevel algorithm.
1079
1080                    Our version is better, but slower, because it requires
1081                    two passes, but it is unavoidable with top-level sharing.
1082                 */
1083
1084                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1085                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1086                         break;
1087
1088                 q->toplevel = TC_CBQ_MAXLEVEL;
1089                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1090         }
1091
1092         /* No packets in scheduler or nobody wants to give them to us :-(
1093            Sigh... start watchdog timer in the last case. */
1094
1095         if (sch->q.qlen) {
1096                 sch->qstats.overlimits++;
1097                 if (q->wd_expires) {
1098                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1099                         if (delay <= 0)
1100                                 delay = 1;
1101                         mod_timer(&q->wd_timer, jiffies + delay);
1102                         sch->flags |= TCQ_F_THROTTLED;
1103                 }
1104         }
1105         return NULL;
1106 }
1107
1108 /* CBQ class maintanance routines */
1109
1110 static void cbq_adjust_levels(struct cbq_class *this)
1111 {
1112         if (this == NULL)
1113                 return;
1114
1115         do {
1116                 int level = 0;
1117                 struct cbq_class *cl;
1118
1119                 if ((cl = this->children) != NULL) {
1120                         do {
1121                                 if (cl->level > level)
1122                                         level = cl->level;
1123                         } while ((cl = cl->sibling) != this->children);
1124                 }
1125                 this->level = level+1;
1126         } while ((this = this->tparent) != NULL);
1127 }
1128
1129 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1130 {
1131         struct cbq_class *cl;
1132         unsigned h;
1133
1134         if (q->quanta[prio] == 0)
1135                 return;
1136
1137         for (h=0; h<16; h++) {
1138                 for (cl = q->classes[h]; cl; cl = cl->next) {
1139                         /* BUGGGG... Beware! This expression suffer of
1140                            arithmetic overflows!
1141                          */
1142                         if (cl->priority == prio) {
1143                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1144                                         q->quanta[prio];
1145                         }
1146                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1147                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1148                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1149                         }
1150                 }
1151         }
1152 }
1153
1154 static void cbq_sync_defmap(struct cbq_class *cl)
1155 {
1156         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1157         struct cbq_class *split = cl->split;
1158         unsigned h;
1159         int i;
1160
1161         if (split == NULL)
1162                 return;
1163
1164         for (i=0; i<=TC_PRIO_MAX; i++) {
1165                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1166                         split->defaults[i] = NULL;
1167         }
1168
1169         for (i=0; i<=TC_PRIO_MAX; i++) {
1170                 int level = split->level;
1171
1172                 if (split->defaults[i])
1173                         continue;
1174
1175                 for (h=0; h<16; h++) {
1176                         struct cbq_class *c;
1177
1178                         for (c = q->classes[h]; c; c = c->next) {
1179                                 if (c->split == split && c->level < level &&
1180                                     c->defmap&(1<<i)) {
1181                                         split->defaults[i] = c;
1182                                         level = c->level;
1183                                 }
1184                         }
1185                 }
1186         }
1187 }
1188
1189 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1190 {
1191         struct cbq_class *split = NULL;
1192
1193         if (splitid == 0) {
1194                 if ((split = cl->split) == NULL)
1195                         return;
1196                 splitid = split->classid;
1197         }
1198
1199         if (split == NULL || split->classid != splitid) {
1200                 for (split = cl->tparent; split; split = split->tparent)
1201                         if (split->classid == splitid)
1202                                 break;
1203         }
1204
1205         if (split == NULL)
1206                 return;
1207
1208         if (cl->split != split) {
1209                 cl->defmap = 0;
1210                 cbq_sync_defmap(cl);
1211                 cl->split = split;
1212                 cl->defmap = def&mask;
1213         } else
1214                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1215
1216         cbq_sync_defmap(cl);
1217 }
1218
1219 static void cbq_unlink_class(struct cbq_class *this)
1220 {
1221         struct cbq_class *cl, **clp;
1222         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1223
1224         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1225                 if (cl == this) {
1226                         *clp = cl->next;
1227                         cl->next = NULL;
1228                         break;
1229                 }
1230         }
1231
1232         if (this->tparent) {
1233                 clp=&this->sibling;
1234                 cl = *clp;
1235                 do {
1236                         if (cl == this) {
1237                                 *clp = cl->sibling;
1238                                 break;
1239                         }
1240                         clp = &cl->sibling;
1241                 } while ((cl = *clp) != this->sibling);
1242
1243                 if (this->tparent->children == this) {
1244                         this->tparent->children = this->sibling;
1245                         if (this->sibling == this)
1246                                 this->tparent->children = NULL;
1247                 }
1248         } else {
1249                 BUG_TRAP(this->sibling == this);
1250         }
1251 }
1252
1253 static void cbq_link_class(struct cbq_class *this)
1254 {
1255         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1256         unsigned h = cbq_hash(this->classid);
1257         struct cbq_class *parent = this->tparent;
1258
1259         this->sibling = this;
1260         this->next = q->classes[h];
1261         q->classes[h] = this;
1262
1263         if (parent == NULL)
1264                 return;
1265
1266         if (parent->children == NULL) {
1267                 parent->children = this;
1268         } else {
1269                 this->sibling = parent->children->sibling;
1270                 parent->children->sibling = this;
1271         }
1272 }
1273
1274 static unsigned int cbq_drop(struct Qdisc* sch)
1275 {
1276         struct cbq_sched_data *q = qdisc_priv(sch);
1277         struct cbq_class *cl, *cl_head;
1278         int prio;
1279         unsigned int len;
1280
1281         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1282                 if ((cl_head = q->active[prio]) == NULL)
1283                         continue;
1284
1285                 cl = cl_head;
1286                 do {
1287                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1288                                 sch->q.qlen--;
1289                                 return len;
1290                         }
1291                 } while ((cl = cl->next_alive) != cl_head);
1292         }
1293         return 0;
1294 }
1295
1296 static void
1297 cbq_reset(struct Qdisc* sch)
1298 {
1299         struct cbq_sched_data *q = qdisc_priv(sch);
1300         struct cbq_class *cl;
1301         int prio;
1302         unsigned h;
1303
1304         q->activemask = 0;
1305         q->pmask = 0;
1306         q->tx_class = NULL;
1307         q->tx_borrowed = NULL;
1308         del_timer(&q->wd_timer);
1309         del_timer(&q->delay_timer);
1310         q->toplevel = TC_CBQ_MAXLEVEL;
1311         PSCHED_GET_TIME(q->now);
1312         q->now_rt = q->now;
1313
1314         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1315                 q->active[prio] = NULL;
1316
1317         for (h = 0; h < 16; h++) {
1318                 for (cl = q->classes[h]; cl; cl = cl->next) {
1319                         qdisc_reset(cl->q);
1320
1321                         cl->next_alive = NULL;
1322                         PSCHED_SET_PASTPERFECT(cl->undertime);
1323                         cl->avgidle = cl->maxidle;
1324                         cl->deficit = cl->quantum;
1325                         cl->cpriority = cl->priority;
1326                 }
1327         }
1328         sch->q.qlen = 0;
1329 }
1330
1331
1332 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1333 {
1334         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1335                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1336                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1337         }
1338         if (lss->change&TCF_CBQ_LSS_EWMA)
1339                 cl->ewma_log = lss->ewma_log;
1340         if (lss->change&TCF_CBQ_LSS_AVPKT)
1341                 cl->avpkt = lss->avpkt;
1342         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1343                 cl->minidle = -(long)lss->minidle;
1344         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1345                 cl->maxidle = lss->maxidle;
1346                 cl->avgidle = lss->maxidle;
1347         }
1348         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1349                 cl->offtime = lss->offtime;
1350         return 0;
1351 }
1352
1353 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1354 {
1355         q->nclasses[cl->priority]--;
1356         q->quanta[cl->priority] -= cl->weight;
1357         cbq_normalize_quanta(q, cl->priority);
1358 }
1359
1360 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1361 {
1362         q->nclasses[cl->priority]++;
1363         q->quanta[cl->priority] += cl->weight;
1364         cbq_normalize_quanta(q, cl->priority);
1365 }
1366
1367 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1368 {
1369         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1370
1371         if (wrr->allot)
1372                 cl->allot = wrr->allot;
1373         if (wrr->weight)
1374                 cl->weight = wrr->weight;
1375         if (wrr->priority) {
1376                 cl->priority = wrr->priority-1;
1377                 cl->cpriority = cl->priority;
1378                 if (cl->priority >= cl->priority2)
1379                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1380         }
1381
1382         cbq_addprio(q, cl);
1383         return 0;
1384 }
1385
1386 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1387 {
1388         switch (ovl->strategy) {
1389         case TC_CBQ_OVL_CLASSIC:
1390                 cl->overlimit = cbq_ovl_classic;
1391                 break;
1392         case TC_CBQ_OVL_DELAY:
1393                 cl->overlimit = cbq_ovl_delay;
1394                 break;
1395         case TC_CBQ_OVL_LOWPRIO:
1396                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1397                     ovl->priority2-1 <= cl->priority)
1398                         return -EINVAL;
1399                 cl->priority2 = ovl->priority2-1;
1400                 cl->overlimit = cbq_ovl_lowprio;
1401                 break;
1402         case TC_CBQ_OVL_DROP:
1403                 cl->overlimit = cbq_ovl_drop;
1404                 break;
1405         case TC_CBQ_OVL_RCLASSIC:
1406                 cl->overlimit = cbq_ovl_rclassic;
1407                 break;
1408         default:
1409                 return -EINVAL;
1410         }
1411         cl->penalty = (ovl->penalty*HZ)/1000;
1412         return 0;
1413 }
1414
1415 #ifdef CONFIG_NET_CLS_POLICE
1416 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1417 {
1418         cl->police = p->police;
1419
1420         if (cl->q->handle) {
1421                 if (p->police == TC_POLICE_RECLASSIFY)
1422                         cl->q->reshape_fail = cbq_reshape_fail;
1423                 else
1424                         cl->q->reshape_fail = NULL;
1425         }
1426         return 0;
1427 }
1428 #endif
1429
1430 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1431 {
1432         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1433         return 0;
1434 }
1435
1436 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1437 {
1438         struct cbq_sched_data *q = qdisc_priv(sch);
1439         struct rtattr *tb[TCA_CBQ_MAX];
1440         struct tc_ratespec *r;
1441
1442         if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1443             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1444             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1445                 return -EINVAL;
1446
1447         if (tb[TCA_CBQ_LSSOPT-1] &&
1448             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1449                 return -EINVAL;
1450
1451         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1452
1453         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1454                 return -EINVAL;
1455
1456         q->link.refcnt = 1;
1457         q->link.sibling = &q->link;
1458         q->link.classid = sch->handle;
1459         q->link.qdisc = sch;
1460         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1461                 q->link.q = &noop_qdisc;
1462
1463         q->link.priority = TC_CBQ_MAXPRIO-1;
1464         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1465         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1466         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1467         q->link.overlimit = cbq_ovl_classic;
1468         q->link.allot = psched_mtu(sch->dev);
1469         q->link.quantum = q->link.allot;
1470         q->link.weight = q->link.R_tab->rate.rate;
1471
1472         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1473         q->link.avpkt = q->link.allot/2;
1474         q->link.minidle = -0x7FFFFFFF;
1475         q->link.stats_lock = &sch->dev->queue_lock;
1476
1477         init_timer(&q->wd_timer);
1478         q->wd_timer.data = (unsigned long)sch;
1479         q->wd_timer.function = cbq_watchdog;
1480         init_timer(&q->delay_timer);
1481         q->delay_timer.data = (unsigned long)sch;
1482         q->delay_timer.function = cbq_undelay;
1483         q->toplevel = TC_CBQ_MAXLEVEL;
1484         PSCHED_GET_TIME(q->now);
1485         q->now_rt = q->now;
1486
1487         cbq_link_class(&q->link);
1488
1489         if (tb[TCA_CBQ_LSSOPT-1])
1490                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1491
1492         cbq_addprio(q, &q->link);
1493         return 0;
1494 }
1495
1496 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1497 {
1498         unsigned char    *b = skb->tail;
1499
1500         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1501         return skb->len;
1502
1503 rtattr_failure:
1504         skb_trim(skb, b - skb->data);
1505         return -1;
1506 }
1507
1508 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1509 {
1510         unsigned char    *b = skb->tail;
1511         struct tc_cbq_lssopt opt;
1512
1513         opt.flags = 0;
1514         if (cl->borrow == NULL)
1515                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1516         if (cl->share == NULL)
1517                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1518         opt.ewma_log = cl->ewma_log;
1519         opt.level = cl->level;
1520         opt.avpkt = cl->avpkt;
1521         opt.maxidle = cl->maxidle;
1522         opt.minidle = (u32)(-cl->minidle);
1523         opt.offtime = cl->offtime;
1524         opt.change = ~0;
1525         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1526         return skb->len;
1527
1528 rtattr_failure:
1529         skb_trim(skb, b - skb->data);
1530         return -1;
1531 }
1532
1533 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1534 {
1535         unsigned char    *b = skb->tail;
1536         struct tc_cbq_wrropt opt;
1537
1538         opt.flags = 0;
1539         opt.allot = cl->allot;
1540         opt.priority = cl->priority+1;
1541         opt.cpriority = cl->cpriority+1;
1542         opt.weight = cl->weight;
1543         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1544         return skb->len;
1545
1546 rtattr_failure:
1547         skb_trim(skb, b - skb->data);
1548         return -1;
1549 }
1550
1551 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1552 {
1553         unsigned char    *b = skb->tail;
1554         struct tc_cbq_ovl opt;
1555
1556         opt.strategy = cl->ovl_strategy;
1557         opt.priority2 = cl->priority2+1;
1558         opt.penalty = (cl->penalty*1000)/HZ;
1559         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1560         return skb->len;
1561
1562 rtattr_failure:
1563         skb_trim(skb, b - skb->data);
1564         return -1;
1565 }
1566
1567 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1568 {
1569         unsigned char    *b = skb->tail;
1570         struct tc_cbq_fopt opt;
1571
1572         if (cl->split || cl->defmap) {
1573                 opt.split = cl->split ? cl->split->classid : 0;
1574                 opt.defmap = cl->defmap;
1575                 opt.defchange = ~0;
1576                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1577         }
1578         return skb->len;
1579
1580 rtattr_failure:
1581         skb_trim(skb, b - skb->data);
1582         return -1;
1583 }
1584
1585 #ifdef CONFIG_NET_CLS_POLICE
1586 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1587 {
1588         unsigned char    *b = skb->tail;
1589         struct tc_cbq_police opt;
1590
1591         if (cl->police) {
1592                 opt.police = cl->police;
1593                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1594         }
1595         return skb->len;
1596
1597 rtattr_failure:
1598         skb_trim(skb, b - skb->data);
1599         return -1;
1600 }
1601 #endif
1602
1603 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1604 {
1605         if (cbq_dump_lss(skb, cl) < 0 ||
1606             cbq_dump_rate(skb, cl) < 0 ||
1607             cbq_dump_wrr(skb, cl) < 0 ||
1608             cbq_dump_ovl(skb, cl) < 0 ||
1609 #ifdef CONFIG_NET_CLS_POLICE
1610             cbq_dump_police(skb, cl) < 0 ||
1611 #endif
1612             cbq_dump_fopt(skb, cl) < 0)
1613                 return -1;
1614         return 0;
1615 }
1616
1617 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1618 {
1619         struct cbq_sched_data *q = qdisc_priv(sch);
1620         unsigned char    *b = skb->tail;
1621         struct rtattr *rta;
1622
1623         rta = (struct rtattr*)b;
1624         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1625         if (cbq_dump_attr(skb, &q->link) < 0)
1626                 goto rtattr_failure;
1627         rta->rta_len = skb->tail - b;
1628         return skb->len;
1629
1630 rtattr_failure:
1631         skb_trim(skb, b - skb->data);
1632         return -1;
1633 }
1634
1635 static int
1636 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1637 {
1638         struct cbq_sched_data *q = qdisc_priv(sch);
1639
1640         q->link.xstats.avgidle = q->link.avgidle;
1641         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1642 }
1643
1644 static int
1645 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1646                struct sk_buff *skb, struct tcmsg *tcm)
1647 {
1648         struct cbq_class *cl = (struct cbq_class*)arg;
1649         unsigned char    *b = skb->tail;
1650         struct rtattr *rta;
1651
1652         if (cl->tparent)
1653                 tcm->tcm_parent = cl->tparent->classid;
1654         else
1655                 tcm->tcm_parent = TC_H_ROOT;
1656         tcm->tcm_handle = cl->classid;
1657         tcm->tcm_info = cl->q->handle;
1658
1659         rta = (struct rtattr*)b;
1660         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1661         if (cbq_dump_attr(skb, cl) < 0)
1662                 goto rtattr_failure;
1663         rta->rta_len = skb->tail - b;
1664         return skb->len;
1665
1666 rtattr_failure:
1667         skb_trim(skb, b - skb->data);
1668         return -1;
1669 }
1670
1671 static int
1672 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1673         struct gnet_dump *d)
1674 {
1675         struct cbq_sched_data *q = qdisc_priv(sch);
1676         struct cbq_class *cl = (struct cbq_class*)arg;
1677
1678         cl->qstats.qlen = cl->q->q.qlen;
1679         cl->xstats.avgidle = cl->avgidle;
1680         cl->xstats.undertime = 0;
1681
1682         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1683                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1684
1685         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1686 #ifdef CONFIG_NET_ESTIMATOR
1687             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1688 #endif
1689             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1690                 return -1;
1691
1692         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1693 }
1694
1695 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1696                      struct Qdisc **old)
1697 {
1698         struct cbq_class *cl = (struct cbq_class*)arg;
1699
1700         if (cl) {
1701                 if (new == NULL) {
1702                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1703                                 return -ENOBUFS;
1704                 } else {
1705 #ifdef CONFIG_NET_CLS_POLICE
1706                         if (cl->police == TC_POLICE_RECLASSIFY)
1707                                 new->reshape_fail = cbq_reshape_fail;
1708 #endif
1709                 }
1710                 sch_tree_lock(sch);
1711                 *old = cl->q;
1712                 cl->q = new;
1713                 sch->q.qlen -= (*old)->q.qlen;
1714                 qdisc_reset(*old);
1715                 sch_tree_unlock(sch);
1716
1717                 return 0;
1718         }
1719         return -ENOENT;
1720 }
1721
1722 static struct Qdisc *
1723 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1724 {
1725         struct cbq_class *cl = (struct cbq_class*)arg;
1726
1727         return cl ? cl->q : NULL;
1728 }
1729
1730 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1731 {
1732         struct cbq_sched_data *q = qdisc_priv(sch);
1733         struct cbq_class *cl = cbq_class_lookup(q, classid);
1734
1735         if (cl) {
1736                 cl->refcnt++;
1737                 return (unsigned long)cl;
1738         }
1739         return 0;
1740 }
1741
1742 static void cbq_destroy_filters(struct cbq_class *cl)
1743 {
1744         struct tcf_proto *tp;
1745
1746         while ((tp = cl->filter_list) != NULL) {
1747                 cl->filter_list = tp->next;
1748                 tcf_destroy(tp);
1749         }
1750 }
1751
1752 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1753 {
1754         struct cbq_sched_data *q = qdisc_priv(sch);
1755
1756         BUG_TRAP(!cl->filters);
1757
1758         cbq_destroy_filters(cl);
1759         qdisc_destroy(cl->q);
1760         qdisc_put_rtab(cl->R_tab);
1761 #ifdef CONFIG_NET_ESTIMATOR
1762         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1763 #endif
1764         if (cl != &q->link)
1765                 kfree(cl);
1766 }
1767
1768 static void
1769 cbq_destroy(struct Qdisc* sch)
1770 {
1771         struct cbq_sched_data *q = qdisc_priv(sch);
1772         struct cbq_class *cl;
1773         unsigned h;
1774
1775 #ifdef CONFIG_NET_CLS_POLICE
1776         q->rx_class = NULL;
1777 #endif
1778         /*
1779          * Filters must be destroyed first because we don't destroy the
1780          * classes from root to leafs which means that filters can still
1781          * be bound to classes which have been destroyed already. --TGR '04
1782          */
1783         for (h = 0; h < 16; h++)
1784                 for (cl = q->classes[h]; cl; cl = cl->next)
1785                         cbq_destroy_filters(cl);
1786
1787         for (h = 0; h < 16; h++) {
1788                 struct cbq_class *next;
1789
1790                 for (cl = q->classes[h]; cl; cl = next) {
1791                         next = cl->next;
1792                         cbq_destroy_class(sch, cl);
1793                 }
1794         }
1795 }
1796
1797 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1798 {
1799         struct cbq_class *cl = (struct cbq_class*)arg;
1800
1801         if (--cl->refcnt == 0) {
1802 #ifdef CONFIG_NET_CLS_POLICE
1803                 struct cbq_sched_data *q = qdisc_priv(sch);
1804
1805                 spin_lock_bh(&sch->dev->queue_lock);
1806                 if (q->rx_class == cl)
1807                         q->rx_class = NULL;
1808                 spin_unlock_bh(&sch->dev->queue_lock);
1809 #endif
1810
1811                 cbq_destroy_class(sch, cl);
1812         }
1813 }
1814
1815 static int
1816 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1817                  unsigned long *arg)
1818 {
1819         int err;
1820         struct cbq_sched_data *q = qdisc_priv(sch);
1821         struct cbq_class *cl = (struct cbq_class*)*arg;
1822         struct rtattr *opt = tca[TCA_OPTIONS-1];
1823         struct rtattr *tb[TCA_CBQ_MAX];
1824         struct cbq_class *parent;
1825         struct qdisc_rate_table *rtab = NULL;
1826
1827         if (opt==NULL ||
1828             rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1829                 return -EINVAL;
1830
1831         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1832             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1833                 return -EINVAL;
1834
1835         if (tb[TCA_CBQ_FOPT-1] &&
1836             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1837                 return -EINVAL;
1838
1839         if (tb[TCA_CBQ_RATE-1] &&
1840             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1841                         return -EINVAL;
1842
1843         if (tb[TCA_CBQ_LSSOPT-1] &&
1844             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1845                         return -EINVAL;
1846
1847         if (tb[TCA_CBQ_WRROPT-1] &&
1848             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1849                         return -EINVAL;
1850
1851 #ifdef CONFIG_NET_CLS_POLICE
1852         if (tb[TCA_CBQ_POLICE-1] &&
1853             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1854                         return -EINVAL;
1855 #endif
1856
1857         if (cl) {
1858                 /* Check parent */
1859                 if (parentid) {
1860                         if (cl->tparent && cl->tparent->classid != parentid)
1861                                 return -EINVAL;
1862                         if (!cl->tparent && parentid != TC_H_ROOT)
1863                                 return -EINVAL;
1864                 }
1865
1866                 if (tb[TCA_CBQ_RATE-1]) {
1867                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1868                         if (rtab == NULL)
1869                                 return -EINVAL;
1870                 }
1871
1872                 /* Change class parameters */
1873                 sch_tree_lock(sch);
1874
1875                 if (cl->next_alive != NULL)
1876                         cbq_deactivate_class(cl);
1877
1878                 if (rtab) {
1879                         rtab = xchg(&cl->R_tab, rtab);
1880                         qdisc_put_rtab(rtab);
1881                 }
1882
1883                 if (tb[TCA_CBQ_LSSOPT-1])
1884                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1885
1886                 if (tb[TCA_CBQ_WRROPT-1]) {
1887                         cbq_rmprio(q, cl);
1888                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1889                 }
1890
1891                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1892                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1893
1894 #ifdef CONFIG_NET_CLS_POLICE
1895                 if (tb[TCA_CBQ_POLICE-1])
1896                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1897 #endif
1898
1899                 if (tb[TCA_CBQ_FOPT-1])
1900                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1901
1902                 if (cl->q->q.qlen)
1903                         cbq_activate_class(cl);
1904
1905                 sch_tree_unlock(sch);
1906
1907 #ifdef CONFIG_NET_ESTIMATOR
1908                 if (tca[TCA_RATE-1])
1909                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1910                                 cl->stats_lock, tca[TCA_RATE-1]);
1911 #endif
1912                 return 0;
1913         }
1914
1915         if (parentid == TC_H_ROOT)
1916                 return -EINVAL;
1917
1918         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1919             tb[TCA_CBQ_LSSOPT-1] == NULL)
1920                 return -EINVAL;
1921
1922         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1923         if (rtab == NULL)
1924                 return -EINVAL;
1925
1926         if (classid) {
1927                 err = -EINVAL;
1928                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1929                         goto failure;
1930         } else {
1931                 int i;
1932                 classid = TC_H_MAKE(sch->handle,0x8000);
1933
1934                 for (i=0; i<0x8000; i++) {
1935                         if (++q->hgenerator >= 0x8000)
1936                                 q->hgenerator = 1;
1937                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1938                                 break;
1939                 }
1940                 err = -ENOSR;
1941                 if (i >= 0x8000)
1942                         goto failure;
1943                 classid = classid|q->hgenerator;
1944         }
1945
1946         parent = &q->link;
1947         if (parentid) {
1948                 parent = cbq_class_lookup(q, parentid);
1949                 err = -EINVAL;
1950                 if (parent == NULL)
1951                         goto failure;
1952         }
1953
1954         err = -ENOBUFS;
1955         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1956         if (cl == NULL)
1957                 goto failure;
1958         memset(cl, 0, sizeof(*cl));
1959         cl->R_tab = rtab;
1960         rtab = NULL;
1961         cl->refcnt = 1;
1962         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1963                 cl->q = &noop_qdisc;
1964         cl->classid = classid;
1965         cl->tparent = parent;
1966         cl->qdisc = sch;
1967         cl->allot = parent->allot;
1968         cl->quantum = cl->allot;
1969         cl->weight = cl->R_tab->rate.rate;
1970         cl->stats_lock = &sch->dev->queue_lock;
1971
1972         sch_tree_lock(sch);
1973         cbq_link_class(cl);
1974         cl->borrow = cl->tparent;
1975         if (cl->tparent != &q->link)
1976                 cl->share = cl->tparent;
1977         cbq_adjust_levels(parent);
1978         cl->minidle = -0x7FFFFFFF;
1979         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1980         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1981         if (cl->ewma_log==0)
1982                 cl->ewma_log = q->link.ewma_log;
1983         if (cl->maxidle==0)
1984                 cl->maxidle = q->link.maxidle;
1985         if (cl->avpkt==0)
1986                 cl->avpkt = q->link.avpkt;
1987         cl->overlimit = cbq_ovl_classic;
1988         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1989                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1990 #ifdef CONFIG_NET_CLS_POLICE
1991         if (tb[TCA_CBQ_POLICE-1])
1992                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1993 #endif
1994         if (tb[TCA_CBQ_FOPT-1])
1995                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1996         sch_tree_unlock(sch);
1997
1998 #ifdef CONFIG_NET_ESTIMATOR
1999         if (tca[TCA_RATE-1])
2000                 gen_new_estimator(&cl->bstats, &cl->rate_est,
2001                         cl->stats_lock, tca[TCA_RATE-1]);
2002 #endif
2003
2004         *arg = (unsigned long)cl;
2005         return 0;
2006
2007 failure:
2008         qdisc_put_rtab(rtab);
2009         return err;
2010 }
2011
2012 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
2013 {
2014         struct cbq_sched_data *q = qdisc_priv(sch);
2015         struct cbq_class *cl = (struct cbq_class*)arg;
2016
2017         if (cl->filters || cl->children || cl == &q->link)
2018                 return -EBUSY;
2019
2020         sch_tree_lock(sch);
2021
2022         if (cl->next_alive)
2023                 cbq_deactivate_class(cl);
2024
2025         if (q->tx_borrowed == cl)
2026                 q->tx_borrowed = q->tx_class;
2027         if (q->tx_class == cl) {
2028                 q->tx_class = NULL;
2029                 q->tx_borrowed = NULL;
2030         }
2031 #ifdef CONFIG_NET_CLS_POLICE
2032         if (q->rx_class == cl)
2033                 q->rx_class = NULL;
2034 #endif
2035
2036         cbq_unlink_class(cl);
2037         cbq_adjust_levels(cl->tparent);
2038         cl->defmap = 0;
2039         cbq_sync_defmap(cl);
2040
2041         cbq_rmprio(q, cl);
2042         sch_tree_unlock(sch);
2043
2044         if (--cl->refcnt == 0)
2045                 cbq_destroy_class(sch, cl);
2046
2047         return 0;
2048 }
2049
2050 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2051 {
2052         struct cbq_sched_data *q = qdisc_priv(sch);
2053         struct cbq_class *cl = (struct cbq_class *)arg;
2054
2055         if (cl == NULL)
2056                 cl = &q->link;
2057
2058         return &cl->filter_list;
2059 }
2060
2061 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2062                                      u32 classid)
2063 {
2064         struct cbq_sched_data *q = qdisc_priv(sch);
2065         struct cbq_class *p = (struct cbq_class*)parent;
2066         struct cbq_class *cl = cbq_class_lookup(q, classid);
2067
2068         if (cl) {
2069                 if (p && p->level <= cl->level)
2070                         return 0;
2071                 cl->filters++;
2072                 return (unsigned long)cl;
2073         }
2074         return 0;
2075 }
2076
2077 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2078 {
2079         struct cbq_class *cl = (struct cbq_class*)arg;
2080
2081         cl->filters--;
2082 }
2083
2084 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2085 {
2086         struct cbq_sched_data *q = qdisc_priv(sch);
2087         unsigned h;
2088
2089         if (arg->stop)
2090                 return;
2091
2092         for (h = 0; h < 16; h++) {
2093                 struct cbq_class *cl;
2094
2095                 for (cl = q->classes[h]; cl; cl = cl->next) {
2096                         if (arg->count < arg->skip) {
2097                                 arg->count++;
2098                                 continue;
2099                         }
2100                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2101                                 arg->stop = 1;
2102                                 return;
2103                         }
2104                         arg->count++;
2105                 }
2106         }
2107 }
2108
2109 static struct Qdisc_class_ops cbq_class_ops = {
2110         .graft          =       cbq_graft,
2111         .leaf           =       cbq_leaf,
2112         .get            =       cbq_get,
2113         .put            =       cbq_put,
2114         .change         =       cbq_change_class,
2115         .delete         =       cbq_delete,
2116         .walk           =       cbq_walk,
2117         .tcf_chain      =       cbq_find_tcf,
2118         .bind_tcf       =       cbq_bind_filter,
2119         .unbind_tcf     =       cbq_unbind_filter,
2120         .dump           =       cbq_dump_class,
2121         .dump_stats     =       cbq_dump_class_stats,
2122 };
2123
2124 static struct Qdisc_ops cbq_qdisc_ops = {
2125         .next           =       NULL,
2126         .cl_ops         =       &cbq_class_ops,
2127         .id             =       "cbq",
2128         .priv_size      =       sizeof(struct cbq_sched_data),
2129         .enqueue        =       cbq_enqueue,
2130         .dequeue        =       cbq_dequeue,
2131         .requeue        =       cbq_requeue,
2132         .drop           =       cbq_drop,
2133         .init           =       cbq_init,
2134         .reset          =       cbq_reset,
2135         .destroy        =       cbq_destroy,
2136         .change         =       NULL,
2137         .dump           =       cbq_dump,
2138         .dump_stats     =       cbq_dump_stats,
2139         .owner          =       THIS_MODULE,
2140 };
2141
2142 static int __init cbq_module_init(void)
2143 {
2144         return register_qdisc(&cbq_qdisc_ops);
2145 }
2146 static void __exit cbq_module_exit(void) 
2147 {
2148         unregister_qdisc(&cbq_qdisc_ops);
2149 }
2150 module_init(cbq_module_init)
2151 module_exit(cbq_module_exit)
2152 MODULE_LICENSE("GPL");