vserver 1.9.3
[linux-2.6.git] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <asm/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /*      Class-Based Queueing (CBQ) algorithm.
41         =======================================
42
43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44                  Management Models for Packet Networks",
45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48
49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50                  Parameters", 1996
51
52                  [4] Sally Floyd and Michael Speer, "Experimental Results
53                  for Class-Based Queueing", 1998, not published.
54
55         -----------------------------------------------------------------------
56
57         Algorithm skeleton was taken from NS simulator cbq.cc.
58         If someone wants to check this code against the LBL version,
59         he should take into account that ONLY the skeleton was borrowed,
60         the implementation is different. Particularly:
61
62         --- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69
70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72         In the worst case we have IntServ estimate with D = W*r+k*MTU
73         and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77         interpret some places, which look like wrong translations
78         from NS. Anyone is advised to find these differences
79         and explain to me, why I am wrong 8).
80
81         --- Linux has no EOI event, so that we cannot estimate true class
82         idle time. Workaround is to consider the next dequeue event
83         as sign that previous packet is finished. This is wrong because of
84         internal device queueing, but on a permanently loaded link it is true.
85         Moreover, combined with clock integrator, this scheme looks
86         very close to an ideal solution.  */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93         struct cbq_class        *next;          /* hash table link */
94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
95
96 /* Parameters */
97         u32                     classid;
98         unsigned char           priority;       /* class priority */
99         unsigned char           priority2;      /* priority to be used after overlimit */
100         unsigned char           ewma_log;       /* time constant for idle time calculation */
101         unsigned char           ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103         unsigned char           police;
104 #endif
105
106         u32                     defmap;
107
108         /* Link-sharing scheduler parameters */
109         long                    maxidle;        /* Class parameters: see below. */
110         long                    offtime;
111         long                    minidle;
112         u32                     avpkt;
113         struct qdisc_rate_table *R_tab;
114
115         /* Overlimit strategy parameters */
116         void                    (*overlimit)(struct cbq_class *cl);
117         long                    penalty;
118
119         /* General scheduler (WRR) parameters */
120         long                    allot;
121         long                    quantum;        /* Allotment per WRR round */
122         long                    weight;         /* Relative allotment: see below */
123
124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
125         struct cbq_class        *split;         /* Ptr to split node */
126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
129                                                    parent otherwise */
130         struct cbq_class        *sibling;       /* Sibling chain */
131         struct cbq_class        *children;      /* Pointer to children chain */
132
133         struct Qdisc            *q;             /* Elementary queueing discipline */
134
135
136 /* Variables */
137         unsigned char           cpriority;      /* Effective priority */
138         unsigned char           delayed;
139         unsigned char           level;          /* level of the class in hierarchy:
140                                                    0 for leaf classes, and maximal
141                                                    level of children + 1 for nodes.
142                                                  */
143
144         psched_time_t           last;           /* Last end of service */
145         psched_time_t           undertime;
146         long                    avgidle;
147         long                    deficit;        /* Saved deficit for WRR */
148         unsigned long           penalized;
149         struct tc_stats         stats;
150         spinlock_t              *stats_lock;
151         struct tc_cbq_xstats    xstats;
152
153         struct tcf_proto        *filter_list;
154
155         int                     refcnt;
156         int                     filters;
157
158         struct cbq_class        *defaults[TC_PRIO_MAX+1];
159 };
160
161 struct cbq_sched_data
162 {
163         struct cbq_class        *classes[16];           /* Hash table of all classes */
164         int                     nclasses[TC_CBQ_MAXPRIO+1];
165         unsigned                quanta[TC_CBQ_MAXPRIO+1];
166
167         struct cbq_class        link;
168
169         unsigned                activemask;
170         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
171                                                                    with backlog */
172
173 #ifdef CONFIG_NET_CLS_POLICE
174         struct cbq_class        *rx_class;
175 #endif
176         struct cbq_class        *tx_class;
177         struct cbq_class        *tx_borrowed;
178         int                     tx_len;
179         psched_time_t           now;            /* Cached timestamp */
180         psched_time_t           now_rt;         /* Cached real time */
181         unsigned                pmask;
182
183         struct timer_list       delay_timer;
184         struct timer_list       wd_timer;       /* Watchdog timer,
185                                                    started when CBQ has
186                                                    backlog, but cannot
187                                                    transmit just now */
188         long                    wd_expires;
189         int                     toplevel;
190         u32                     hgenerator;
191 };
192
193
194 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
195
196
197 static __inline__ unsigned cbq_hash(u32 h)
198 {
199         h ^= h>>8;
200         h ^= h>>4;
201         return h&0xF;
202 }
203
204 static __inline__ struct cbq_class *
205 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
206 {
207         struct cbq_class *cl;
208
209         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
210                 if (cl->classid == classid)
211                         return cl;
212         return NULL;
213 }
214
215 #ifdef CONFIG_NET_CLS_POLICE
216
217 static struct cbq_class *
218 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
219 {
220         struct cbq_class *cl, *new;
221
222         for (cl = this->tparent; cl; cl = cl->tparent)
223                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
224                         return new;
225
226         return NULL;
227 }
228
229 #endif
230
231 /* Classify packet. The procedure is pretty complicated, but
232    it allows us to combine link sharing and priority scheduling
233    transparently.
234
235    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
236    so that it resolves to split nodes. Then packets are classified
237    by logical priority, or a more specific classifier may be attached
238    to the split node.
239  */
240
241 static struct cbq_class *
242 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
243 {
244         struct cbq_sched_data *q = qdisc_priv(sch);
245         struct cbq_class *head = &q->link;
246         struct cbq_class **defmap;
247         struct cbq_class *cl = NULL;
248         u32 prio = skb->priority;
249         struct tcf_result res;
250
251         /*
252          *  Step 1. If skb->priority points to one of our classes, use it.
253          */
254         if (TC_H_MAJ(prio^sch->handle) == 0 &&
255             (cl = cbq_class_lookup(q, prio)) != NULL)
256                         return cl;
257
258         for (;;) {
259                 int result = 0;
260 #ifdef CONFIG_NET_CLS_ACT
261                 int terminal = 0;
262 #endif
263                 defmap = head->defaults;
264
265                 /*
266                  * Step 2+n. Apply classifier.
267                  */
268                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269                         goto fallback;
270
271                 if ((cl = (void*)res.class) == NULL) {
272                         if (TC_H_MAJ(res.classid))
273                                 cl = cbq_class_lookup(q, res.classid);
274                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
275                                 cl = defmap[TC_PRIO_BESTEFFORT];
276
277                         if (cl == NULL || cl->level >= head->level)
278                                 goto fallback;
279                 }
280
281 #ifdef CONFIG_NET_CLS_ACT
282                 switch (result) {
283                 case TC_ACT_SHOT: /* Stop and kfree */
284                         *qres = NET_XMIT_DROP;
285                         terminal = 1;
286                         break;
287                 case TC_ACT_QUEUED:
288                 case TC_ACT_STOLEN: 
289                         terminal = 1;
290                         break;
291                 case TC_ACT_RECLASSIFY:  /* Things look good */
292                 case TC_ACT_OK: 
293                 case TC_ACT_UNSPEC:
294                 default:
295                         break;
296                 }
297
298                 if (terminal) {
299                         kfree_skb(skb);
300                         return NULL;
301                 }
302 #else
303 #ifdef CONFIG_NET_CLS_POLICE
304                 switch (result) {
305                 case TC_POLICE_RECLASSIFY:
306                         return cbq_reclassify(skb, cl);
307                 case TC_POLICE_SHOT:
308                         return NULL;
309                 default:
310                         break;
311                 }
312 #endif
313 #endif
314                 if (cl->level == 0)
315                         return cl;
316
317                 /*
318                  * Step 3+n. If classifier selected a link sharing class,
319                  *         apply agency specific classifier.
320                  *         Repeat this procdure until we hit a leaf node.
321                  */
322                 head = cl;
323         }
324
325 fallback:
326         cl = head;
327
328         /*
329          * Step 4. No success...
330          */
331         if (TC_H_MAJ(prio) == 0 &&
332             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
333             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
334                 return head;
335
336         return cl;
337 }
338
339 /*
340    A packet has just been enqueued on the empty class.
341    cbq_activate_class adds it to the tail of active class list
342    of its priority band.
343  */
344
345 static __inline__ void cbq_activate_class(struct cbq_class *cl)
346 {
347         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
348         int prio = cl->cpriority;
349         struct cbq_class *cl_tail;
350
351         cl_tail = q->active[prio];
352         q->active[prio] = cl;
353
354         if (cl_tail != NULL) {
355                 cl->next_alive = cl_tail->next_alive;
356                 cl_tail->next_alive = cl;
357         } else {
358                 cl->next_alive = cl;
359                 q->activemask |= (1<<prio);
360         }
361 }
362
363 /*
364    Unlink class from active chain.
365    Note that this same procedure is done directly in cbq_dequeue*
366    during round-robin procedure.
367  */
368
369 static void cbq_deactivate_class(struct cbq_class *this)
370 {
371         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
372         int prio = this->cpriority;
373         struct cbq_class *cl;
374         struct cbq_class *cl_prev = q->active[prio];
375
376         do {
377                 cl = cl_prev->next_alive;
378                 if (cl == this) {
379                         cl_prev->next_alive = cl->next_alive;
380                         cl->next_alive = NULL;
381
382                         if (cl == q->active[prio]) {
383                                 q->active[prio] = cl_prev;
384                                 if (cl == q->active[prio]) {
385                                         q->active[prio] = NULL;
386                                         q->activemask &= ~(1<<prio);
387                                         return;
388                                 }
389                         }
390
391                         cl = cl_prev->next_alive;
392                         return;
393                 }
394         } while ((cl_prev = cl) != q->active[prio]);
395 }
396
397 static void
398 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
399 {
400         int toplevel = q->toplevel;
401
402         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
403                 psched_time_t now;
404                 psched_tdiff_t incr;
405
406                 PSCHED_GET_TIME(now);
407                 incr = PSCHED_TDIFF(now, q->now_rt);
408                 PSCHED_TADD2(q->now, incr, now);
409
410                 do {
411                         if (PSCHED_TLESS(cl->undertime, now)) {
412                                 q->toplevel = cl->level;
413                                 return;
414                         }
415                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
416         }
417 }
418
419 static int
420 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
421 {
422         struct cbq_sched_data *q = qdisc_priv(sch);
423         int len = skb->len;
424         int ret = NET_XMIT_SUCCESS;
425         struct cbq_class *cl = cbq_classify(skb, sch,&ret);
426
427 #ifdef CONFIG_NET_CLS_POLICE
428         q->rx_class = cl;
429 #endif
430         if (cl) {
431 #ifdef CONFIG_NET_CLS_POLICE
432                 cl->q->__parent = sch;
433 #endif
434                 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
435                         sch->q.qlen++;
436                         sch->stats.packets++;
437                         sch->stats.bytes+=len;
438                         cbq_mark_toplevel(q, cl);
439                         if (!cl->next_alive)
440                                 cbq_activate_class(cl);
441                         return ret;
442                 }
443         }
444
445 #ifndef CONFIG_NET_CLS_ACT
446         sch->stats.drops++;
447         if (cl == NULL)
448                 kfree_skb(skb);
449         else {
450                 cbq_mark_toplevel(q, cl);
451                 cl->stats.drops++;
452         }
453 #else
454         if ( NET_XMIT_DROP == ret) {
455                 sch->stats.drops++;
456         }
457
458         if (cl != NULL) {
459                 cbq_mark_toplevel(q, cl);
460                 cl->stats.drops++;
461         }
462 #endif
463         return ret;
464 }
465
466 static int
467 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
468 {
469         struct cbq_sched_data *q = qdisc_priv(sch);
470         struct cbq_class *cl;
471         int ret;
472
473         if ((cl = q->tx_class) == NULL) {
474                 kfree_skb(skb);
475                 sch->stats.drops++;
476                 return NET_XMIT_CN;
477         }
478         q->tx_class = NULL;
479
480         cbq_mark_toplevel(q, cl);
481
482 #ifdef CONFIG_NET_CLS_POLICE
483         q->rx_class = cl;
484         cl->q->__parent = sch;
485 #endif
486         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
487                 sch->q.qlen++;
488                 if (!cl->next_alive)
489                         cbq_activate_class(cl);
490                 return 0;
491         }
492         sch->stats.drops++;
493         cl->stats.drops++;
494         return ret;
495 }
496
497 /* Overlimit actions */
498
499 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
500
501 static void cbq_ovl_classic(struct cbq_class *cl)
502 {
503         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
504         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
505
506         if (!cl->delayed) {
507                 delay += cl->offtime;
508
509                 /* 
510                    Class goes to sleep, so that it will have no
511                    chance to work avgidle. Let's forgive it 8)
512
513                    BTW cbq-2.0 has a crap in this
514                    place, apparently they forgot to shift it by cl->ewma_log.
515                  */
516                 if (cl->avgidle < 0)
517                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
518                 if (cl->avgidle < cl->minidle)
519                         cl->avgidle = cl->minidle;
520                 if (delay <= 0)
521                         delay = 1;
522                 PSCHED_TADD2(q->now, delay, cl->undertime);
523
524                 cl->xstats.overactions++;
525                 cl->delayed = 1;
526         }
527         if (q->wd_expires == 0 || q->wd_expires > delay)
528                 q->wd_expires = delay;
529
530         /* Dirty work! We must schedule wakeups based on
531            real available rate, rather than leaf rate,
532            which may be tiny (even zero).
533          */
534         if (q->toplevel == TC_CBQ_MAXLEVEL) {
535                 struct cbq_class *b;
536                 psched_tdiff_t base_delay = q->wd_expires;
537
538                 for (b = cl->borrow; b; b = b->borrow) {
539                         delay = PSCHED_TDIFF(b->undertime, q->now);
540                         if (delay < base_delay) {
541                                 if (delay <= 0)
542                                         delay = 1;
543                                 base_delay = delay;
544                         }
545                 }
546
547                 q->wd_expires = base_delay;
548         }
549 }
550
551 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
552    they go overlimit
553  */
554
555 static void cbq_ovl_rclassic(struct cbq_class *cl)
556 {
557         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
558         struct cbq_class *this = cl;
559
560         do {
561                 if (cl->level > q->toplevel) {
562                         cl = NULL;
563                         break;
564                 }
565         } while ((cl = cl->borrow) != NULL);
566
567         if (cl == NULL)
568                 cl = this;
569         cbq_ovl_classic(cl);
570 }
571
572 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
573
574 static void cbq_ovl_delay(struct cbq_class *cl)
575 {
576         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
577         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
578
579         if (!cl->delayed) {
580                 unsigned long sched = jiffies;
581
582                 delay += cl->offtime;
583                 if (cl->avgidle < 0)
584                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
585                 if (cl->avgidle < cl->minidle)
586                         cl->avgidle = cl->minidle;
587                 PSCHED_TADD2(q->now, delay, cl->undertime);
588
589                 if (delay > 0) {
590                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
591                         cl->penalized = sched;
592                         cl->cpriority = TC_CBQ_MAXPRIO;
593                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
594                         if (del_timer(&q->delay_timer) &&
595                             (long)(q->delay_timer.expires - sched) > 0)
596                                 q->delay_timer.expires = sched;
597                         add_timer(&q->delay_timer);
598                         cl->delayed = 1;
599                         cl->xstats.overactions++;
600                         return;
601                 }
602                 delay = 1;
603         }
604         if (q->wd_expires == 0 || q->wd_expires > delay)
605                 q->wd_expires = delay;
606 }
607
608 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
609
610 static void cbq_ovl_lowprio(struct cbq_class *cl)
611 {
612         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
613
614         cl->penalized = jiffies + cl->penalty;
615
616         if (cl->cpriority != cl->priority2) {
617                 cl->cpriority = cl->priority2;
618                 q->pmask |= (1<<cl->cpriority);
619                 cl->xstats.overactions++;
620         }
621         cbq_ovl_classic(cl);
622 }
623
624 /* TC_CBQ_OVL_DROP: penalize class by dropping */
625
626 static void cbq_ovl_drop(struct cbq_class *cl)
627 {
628         if (cl->q->ops->drop)
629                 if (cl->q->ops->drop(cl->q))
630                         cl->qdisc->q.qlen--;
631         cl->xstats.overactions++;
632         cbq_ovl_classic(cl);
633 }
634
635 static void cbq_watchdog(unsigned long arg)
636 {
637         struct Qdisc *sch = (struct Qdisc*)arg;
638
639         sch->flags &= ~TCQ_F_THROTTLED;
640         netif_schedule(sch->dev);
641 }
642
643 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
644 {
645         struct cbq_class *cl;
646         struct cbq_class *cl_prev = q->active[prio];
647         unsigned long now = jiffies;
648         unsigned long sched = now;
649
650         if (cl_prev == NULL)
651                 return now;
652
653         do {
654                 cl = cl_prev->next_alive;
655                 if ((long)(now - cl->penalized) > 0) {
656                         cl_prev->next_alive = cl->next_alive;
657                         cl->next_alive = NULL;
658                         cl->cpriority = cl->priority;
659                         cl->delayed = 0;
660                         cbq_activate_class(cl);
661
662                         if (cl == q->active[prio]) {
663                                 q->active[prio] = cl_prev;
664                                 if (cl == q->active[prio]) {
665                                         q->active[prio] = NULL;
666                                         return 0;
667                                 }
668                         }
669
670                         cl = cl_prev->next_alive;
671                 } else if ((long)(sched - cl->penalized) > 0)
672                         sched = cl->penalized;
673         } while ((cl_prev = cl) != q->active[prio]);
674
675         return (long)(sched - now);
676 }
677
678 static void cbq_undelay(unsigned long arg)
679 {
680         struct Qdisc *sch = (struct Qdisc*)arg;
681         struct cbq_sched_data *q = qdisc_priv(sch);
682         long delay = 0;
683         unsigned pmask;
684
685         pmask = q->pmask;
686         q->pmask = 0;
687
688         while (pmask) {
689                 int prio = ffz(~pmask);
690                 long tmp;
691
692                 pmask &= ~(1<<prio);
693
694                 tmp = cbq_undelay_prio(q, prio);
695                 if (tmp > 0) {
696                         q->pmask |= 1<<prio;
697                         if (tmp < delay || delay == 0)
698                                 delay = tmp;
699                 }
700         }
701
702         if (delay) {
703                 q->delay_timer.expires = jiffies + delay;
704                 add_timer(&q->delay_timer);
705         }
706
707         sch->flags &= ~TCQ_F_THROTTLED;
708         netif_schedule(sch->dev);
709 }
710
711
712 #ifdef CONFIG_NET_CLS_POLICE
713
714 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
715 {
716         int len = skb->len;
717         struct Qdisc *sch = child->__parent;
718         struct cbq_sched_data *q = qdisc_priv(sch);
719         struct cbq_class *cl = q->rx_class;
720
721         q->rx_class = NULL;
722
723         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
724
725                 cbq_mark_toplevel(q, cl);
726
727                 q->rx_class = cl;
728                 cl->q->__parent = sch;
729
730                 if (cl->q->enqueue(skb, cl->q) == 0) {
731                         sch->q.qlen++;
732                         sch->stats.packets++;
733                         sch->stats.bytes+=len;
734                         if (!cl->next_alive)
735                                 cbq_activate_class(cl);
736                         return 0;
737                 }
738                 sch->stats.drops++;
739                 return 0;
740         }
741
742         sch->stats.drops++;
743         return -1;
744 }
745 #endif
746
747 /* 
748    It is mission critical procedure.
749
750    We "regenerate" toplevel cutoff, if transmitting class
751    has backlog and it is not regulated. It is not part of
752    original CBQ description, but looks more reasonable.
753    Probably, it is wrong. This question needs further investigation.
754 */
755
756 static __inline__ void
757 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
758                     struct cbq_class *borrowed)
759 {
760         if (cl && q->toplevel >= borrowed->level) {
761                 if (cl->q->q.qlen > 1) {
762                         do {
763                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
764                                         q->toplevel = borrowed->level;
765                                         return;
766                                 }
767                         } while ((borrowed=borrowed->borrow) != NULL);
768                 }
769 #if 0   
770         /* It is not necessary now. Uncommenting it
771            will save CPU cycles, but decrease fairness.
772          */
773                 q->toplevel = TC_CBQ_MAXLEVEL;
774 #endif
775         }
776 }
777
778 static void
779 cbq_update(struct cbq_sched_data *q)
780 {
781         struct cbq_class *this = q->tx_class;
782         struct cbq_class *cl = this;
783         int len = q->tx_len;
784
785         q->tx_class = NULL;
786
787         for ( ; cl; cl = cl->share) {
788                 long avgidle = cl->avgidle;
789                 long idle;
790
791                 cl->stats.packets++;
792                 cl->stats.bytes += len;
793
794                 /*
795                    (now - last) is total time between packet right edges.
796                    (last_pktlen/rate) is "virtual" busy time, so that
797
798                          idle = (now - last) - last_pktlen/rate
799                  */
800
801                 idle = PSCHED_TDIFF(q->now, cl->last);
802                 if ((unsigned long)idle > 128*1024*1024) {
803                         avgidle = cl->maxidle;
804                 } else {
805                         idle -= L2T(cl, len);
806
807                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
808                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
809                    cl->avgidle == true_avgidle/W,
810                    hence:
811                  */
812                         avgidle += idle - (avgidle>>cl->ewma_log);
813                 }
814
815                 if (avgidle <= 0) {
816                         /* Overlimit or at-limit */
817
818                         if (avgidle < cl->minidle)
819                                 avgidle = cl->minidle;
820
821                         cl->avgidle = avgidle;
822
823                         /* Calculate expected time, when this class
824                            will be allowed to send.
825                            It will occur, when:
826                            (1-W)*true_avgidle + W*delay = 0, i.e.
827                            idle = (1/W - 1)*(-true_avgidle)
828                            or
829                            idle = (1 - W)*(-cl->avgidle);
830                          */
831                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
832
833                         /*
834                            That is not all.
835                            To maintain the rate allocated to the class,
836                            we add to undertime virtual clock,
837                            necessary to complete transmitted packet.
838                            (len/phys_bandwidth has been already passed
839                            to the moment of cbq_update)
840                          */
841
842                         idle -= L2T(&q->link, len);
843                         idle += L2T(cl, len);
844
845                         PSCHED_AUDIT_TDIFF(idle);
846
847                         PSCHED_TADD2(q->now, idle, cl->undertime);
848                 } else {
849                         /* Underlimit */
850
851                         PSCHED_SET_PASTPERFECT(cl->undertime);
852                         if (avgidle > cl->maxidle)
853                                 cl->avgidle = cl->maxidle;
854                         else
855                                 cl->avgidle = avgidle;
856                 }
857                 cl->last = q->now;
858         }
859
860         cbq_update_toplevel(q, this, q->tx_borrowed);
861 }
862
863 static __inline__ struct cbq_class *
864 cbq_under_limit(struct cbq_class *cl)
865 {
866         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
867         struct cbq_class *this_cl = cl;
868
869         if (cl->tparent == NULL)
870                 return cl;
871
872         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
873             !PSCHED_TLESS(q->now, cl->undertime)) {
874                 cl->delayed = 0;
875                 return cl;
876         }
877
878         do {
879                 /* It is very suspicious place. Now overlimit
880                    action is generated for not bounded classes
881                    only if link is completely congested.
882                    Though it is in agree with ancestor-only paradigm,
883                    it looks very stupid. Particularly,
884                    it means that this chunk of code will either
885                    never be called or result in strong amplification
886                    of burstiness. Dangerous, silly, and, however,
887                    no another solution exists.
888                  */
889                 if ((cl = cl->borrow) == NULL) {
890                         this_cl->stats.overlimits++;
891                         this_cl->overlimit(this_cl);
892                         return NULL;
893                 }
894                 if (cl->level > q->toplevel)
895                         return NULL;
896         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
897                  PSCHED_TLESS(q->now, cl->undertime));
898
899         cl->delayed = 0;
900         return cl;
901 }
902
903 static __inline__ struct sk_buff *
904 cbq_dequeue_prio(struct Qdisc *sch, int prio)
905 {
906         struct cbq_sched_data *q = qdisc_priv(sch);
907         struct cbq_class *cl_tail, *cl_prev, *cl;
908         struct sk_buff *skb;
909         int deficit;
910
911         cl_tail = cl_prev = q->active[prio];
912         cl = cl_prev->next_alive;
913
914         do {
915                 deficit = 0;
916
917                 /* Start round */
918                 do {
919                         struct cbq_class *borrow = cl;
920
921                         if (cl->q->q.qlen &&
922                             (borrow = cbq_under_limit(cl)) == NULL)
923                                 goto skip_class;
924
925                         if (cl->deficit <= 0) {
926                                 /* Class exhausted its allotment per
927                                    this round. Switch to the next one.
928                                  */
929                                 deficit = 1;
930                                 cl->deficit += cl->quantum;
931                                 goto next_class;
932                         }
933
934                         skb = cl->q->dequeue(cl->q);
935
936                         /* Class did not give us any skb :-(
937                            It could occur even if cl->q->q.qlen != 0 
938                            f.e. if cl->q == "tbf"
939                          */
940                         if (skb == NULL)
941                                 goto skip_class;
942
943                         cl->deficit -= skb->len;
944                         q->tx_class = cl;
945                         q->tx_borrowed = borrow;
946                         if (borrow != cl) {
947 #ifndef CBQ_XSTATS_BORROWS_BYTES
948                                 borrow->xstats.borrows++;
949                                 cl->xstats.borrows++;
950 #else
951                                 borrow->xstats.borrows += skb->len;
952                                 cl->xstats.borrows += skb->len;
953 #endif
954                         }
955                         q->tx_len = skb->len;
956
957                         if (cl->deficit <= 0) {
958                                 q->active[prio] = cl;
959                                 cl = cl->next_alive;
960                                 cl->deficit += cl->quantum;
961                         }
962                         return skb;
963
964 skip_class:
965                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
966                                 /* Class is empty or penalized.
967                                    Unlink it from active chain.
968                                  */
969                                 cl_prev->next_alive = cl->next_alive;
970                                 cl->next_alive = NULL;
971
972                                 /* Did cl_tail point to it? */
973                                 if (cl == cl_tail) {
974                                         /* Repair it! */
975                                         cl_tail = cl_prev;
976
977                                         /* Was it the last class in this band? */
978                                         if (cl == cl_tail) {
979                                                 /* Kill the band! */
980                                                 q->active[prio] = NULL;
981                                                 q->activemask &= ~(1<<prio);
982                                                 if (cl->q->q.qlen)
983                                                         cbq_activate_class(cl);
984                                                 return NULL;
985                                         }
986
987                                         q->active[prio] = cl_tail;
988                                 }
989                                 if (cl->q->q.qlen)
990                                         cbq_activate_class(cl);
991
992                                 cl = cl_prev;
993                         }
994
995 next_class:
996                         cl_prev = cl;
997                         cl = cl->next_alive;
998                 } while (cl_prev != cl_tail);
999         } while (deficit);
1000
1001         q->active[prio] = cl_prev;
1002
1003         return NULL;
1004 }
1005
1006 static __inline__ struct sk_buff *
1007 cbq_dequeue_1(struct Qdisc *sch)
1008 {
1009         struct cbq_sched_data *q = qdisc_priv(sch);
1010         struct sk_buff *skb;
1011         unsigned activemask;
1012
1013         activemask = q->activemask&0xFF;
1014         while (activemask) {
1015                 int prio = ffz(~activemask);
1016                 activemask &= ~(1<<prio);
1017                 skb = cbq_dequeue_prio(sch, prio);
1018                 if (skb)
1019                         return skb;
1020         }
1021         return NULL;
1022 }
1023
1024 static struct sk_buff *
1025 cbq_dequeue(struct Qdisc *sch)
1026 {
1027         struct sk_buff *skb;
1028         struct cbq_sched_data *q = qdisc_priv(sch);
1029         psched_time_t now;
1030         psched_tdiff_t incr;
1031
1032         PSCHED_GET_TIME(now);
1033         incr = PSCHED_TDIFF(now, q->now_rt);
1034
1035         if (q->tx_class) {
1036                 psched_tdiff_t incr2;
1037                 /* Time integrator. We calculate EOS time
1038                    by adding expected packet transmission time.
1039                    If real time is greater, we warp artificial clock,
1040                    so that:
1041
1042                    cbq_time = max(real_time, work);
1043                  */
1044                 incr2 = L2T(&q->link, q->tx_len);
1045                 PSCHED_TADD(q->now, incr2);
1046                 cbq_update(q);
1047                 if ((incr -= incr2) < 0)
1048                         incr = 0;
1049         }
1050         PSCHED_TADD(q->now, incr);
1051         q->now_rt = now;
1052
1053         for (;;) {
1054                 q->wd_expires = 0;
1055
1056                 skb = cbq_dequeue_1(sch);
1057                 if (skb) {
1058                         sch->q.qlen--;
1059                         sch->flags &= ~TCQ_F_THROTTLED;
1060                         return skb;
1061                 }
1062
1063                 /* All the classes are overlimit.
1064
1065                    It is possible, if:
1066
1067                    1. Scheduler is empty.
1068                    2. Toplevel cutoff inhibited borrowing.
1069                    3. Root class is overlimit.
1070
1071                    Reset 2d and 3d conditions and retry.
1072
1073                    Note, that NS and cbq-2.0 are buggy, peeking
1074                    an arbitrary class is appropriate for ancestor-only
1075                    sharing, but not for toplevel algorithm.
1076
1077                    Our version is better, but slower, because it requires
1078                    two passes, but it is unavoidable with top-level sharing.
1079                 */
1080
1081                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1082                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1083                         break;
1084
1085                 q->toplevel = TC_CBQ_MAXLEVEL;
1086                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1087         }
1088
1089         /* No packets in scheduler or nobody wants to give them to us :-(
1090            Sigh... start watchdog timer in the last case. */
1091
1092         if (sch->q.qlen) {
1093                 sch->stats.overlimits++;
1094                 if (q->wd_expires) {
1095                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1096                         if (delay <= 0)
1097                                 delay = 1;
1098                         mod_timer(&q->wd_timer, jiffies + delay);
1099                         sch->flags |= TCQ_F_THROTTLED;
1100                 }
1101         }
1102         return NULL;
1103 }
1104
1105 /* CBQ class maintanance routines */
1106
1107 static void cbq_adjust_levels(struct cbq_class *this)
1108 {
1109         if (this == NULL)
1110                 return;
1111
1112         do {
1113                 int level = 0;
1114                 struct cbq_class *cl;
1115
1116                 if ((cl = this->children) != NULL) {
1117                         do {
1118                                 if (cl->level > level)
1119                                         level = cl->level;
1120                         } while ((cl = cl->sibling) != this->children);
1121                 }
1122                 this->level = level+1;
1123         } while ((this = this->tparent) != NULL);
1124 }
1125
1126 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1127 {
1128         struct cbq_class *cl;
1129         unsigned h;
1130
1131         if (q->quanta[prio] == 0)
1132                 return;
1133
1134         for (h=0; h<16; h++) {
1135                 for (cl = q->classes[h]; cl; cl = cl->next) {
1136                         /* BUGGGG... Beware! This expression suffer of
1137                            arithmetic overflows!
1138                          */
1139                         if (cl->priority == prio) {
1140                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1141                                         q->quanta[prio];
1142                         }
1143                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1144                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1145                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1146                         }
1147                 }
1148         }
1149 }
1150
1151 static void cbq_sync_defmap(struct cbq_class *cl)
1152 {
1153         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1154         struct cbq_class *split = cl->split;
1155         unsigned h;
1156         int i;
1157
1158         if (split == NULL)
1159                 return;
1160
1161         for (i=0; i<=TC_PRIO_MAX; i++) {
1162                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1163                         split->defaults[i] = NULL;
1164         }
1165
1166         for (i=0; i<=TC_PRIO_MAX; i++) {
1167                 int level = split->level;
1168
1169                 if (split->defaults[i])
1170                         continue;
1171
1172                 for (h=0; h<16; h++) {
1173                         struct cbq_class *c;
1174
1175                         for (c = q->classes[h]; c; c = c->next) {
1176                                 if (c->split == split && c->level < level &&
1177                                     c->defmap&(1<<i)) {
1178                                         split->defaults[i] = c;
1179                                         level = c->level;
1180                                 }
1181                         }
1182                 }
1183         }
1184 }
1185
1186 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1187 {
1188         struct cbq_class *split = NULL;
1189
1190         if (splitid == 0) {
1191                 if ((split = cl->split) == NULL)
1192                         return;
1193                 splitid = split->classid;
1194         }
1195
1196         if (split == NULL || split->classid != splitid) {
1197                 for (split = cl->tparent; split; split = split->tparent)
1198                         if (split->classid == splitid)
1199                                 break;
1200         }
1201
1202         if (split == NULL)
1203                 return;
1204
1205         if (cl->split != split) {
1206                 cl->defmap = 0;
1207                 cbq_sync_defmap(cl);
1208                 cl->split = split;
1209                 cl->defmap = def&mask;
1210         } else
1211                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1212
1213         cbq_sync_defmap(cl);
1214 }
1215
1216 static void cbq_unlink_class(struct cbq_class *this)
1217 {
1218         struct cbq_class *cl, **clp;
1219         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1220
1221         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1222                 if (cl == this) {
1223                         *clp = cl->next;
1224                         cl->next = NULL;
1225                         break;
1226                 }
1227         }
1228
1229         if (this->tparent) {
1230                 clp=&this->sibling;
1231                 cl = *clp;
1232                 do {
1233                         if (cl == this) {
1234                                 *clp = cl->sibling;
1235                                 break;
1236                         }
1237                         clp = &cl->sibling;
1238                 } while ((cl = *clp) != this->sibling);
1239
1240                 if (this->tparent->children == this) {
1241                         this->tparent->children = this->sibling;
1242                         if (this->sibling == this)
1243                                 this->tparent->children = NULL;
1244                 }
1245         } else {
1246                 BUG_TRAP(this->sibling == this);
1247         }
1248 }
1249
1250 static void cbq_link_class(struct cbq_class *this)
1251 {
1252         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1253         unsigned h = cbq_hash(this->classid);
1254         struct cbq_class *parent = this->tparent;
1255
1256         this->sibling = this;
1257         this->next = q->classes[h];
1258         q->classes[h] = this;
1259
1260         if (parent == NULL)
1261                 return;
1262
1263         if (parent->children == NULL) {
1264                 parent->children = this;
1265         } else {
1266                 this->sibling = parent->children->sibling;
1267                 parent->children->sibling = this;
1268         }
1269 }
1270
1271 static unsigned int cbq_drop(struct Qdisc* sch)
1272 {
1273         struct cbq_sched_data *q = qdisc_priv(sch);
1274         struct cbq_class *cl, *cl_head;
1275         int prio;
1276         unsigned int len;
1277
1278         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1279                 if ((cl_head = q->active[prio]) == NULL)
1280                         continue;
1281
1282                 cl = cl_head;
1283                 do {
1284                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1285                                 sch->q.qlen--;
1286                                 return len;
1287                         }
1288                 } while ((cl = cl->next_alive) != cl_head);
1289         }
1290         return 0;
1291 }
1292
1293 static void
1294 cbq_reset(struct Qdisc* sch)
1295 {
1296         struct cbq_sched_data *q = qdisc_priv(sch);
1297         struct cbq_class *cl;
1298         int prio;
1299         unsigned h;
1300
1301         q->activemask = 0;
1302         q->pmask = 0;
1303         q->tx_class = NULL;
1304         q->tx_borrowed = NULL;
1305         del_timer(&q->wd_timer);
1306         del_timer(&q->delay_timer);
1307         q->toplevel = TC_CBQ_MAXLEVEL;
1308         PSCHED_GET_TIME(q->now);
1309         q->now_rt = q->now;
1310
1311         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1312                 q->active[prio] = NULL;
1313
1314         for (h = 0; h < 16; h++) {
1315                 for (cl = q->classes[h]; cl; cl = cl->next) {
1316                         qdisc_reset(cl->q);
1317
1318                         cl->next_alive = NULL;
1319                         PSCHED_SET_PASTPERFECT(cl->undertime);
1320                         cl->avgidle = cl->maxidle;
1321                         cl->deficit = cl->quantum;
1322                         cl->cpriority = cl->priority;
1323                 }
1324         }
1325         sch->q.qlen = 0;
1326 }
1327
1328
1329 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1330 {
1331         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1332                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1333                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1334         }
1335         if (lss->change&TCF_CBQ_LSS_EWMA)
1336                 cl->ewma_log = lss->ewma_log;
1337         if (lss->change&TCF_CBQ_LSS_AVPKT)
1338                 cl->avpkt = lss->avpkt;
1339         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1340                 cl->minidle = -(long)lss->minidle;
1341         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1342                 cl->maxidle = lss->maxidle;
1343                 cl->avgidle = lss->maxidle;
1344         }
1345         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1346                 cl->offtime = lss->offtime;
1347         return 0;
1348 }
1349
1350 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1351 {
1352         q->nclasses[cl->priority]--;
1353         q->quanta[cl->priority] -= cl->weight;
1354         cbq_normalize_quanta(q, cl->priority);
1355 }
1356
1357 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1358 {
1359         q->nclasses[cl->priority]++;
1360         q->quanta[cl->priority] += cl->weight;
1361         cbq_normalize_quanta(q, cl->priority);
1362 }
1363
1364 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1365 {
1366         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1367
1368         if (wrr->allot)
1369                 cl->allot = wrr->allot;
1370         if (wrr->weight)
1371                 cl->weight = wrr->weight;
1372         if (wrr->priority) {
1373                 cl->priority = wrr->priority-1;
1374                 cl->cpriority = cl->priority;
1375                 if (cl->priority >= cl->priority2)
1376                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1377         }
1378
1379         cbq_addprio(q, cl);
1380         return 0;
1381 }
1382
1383 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1384 {
1385         switch (ovl->strategy) {
1386         case TC_CBQ_OVL_CLASSIC:
1387                 cl->overlimit = cbq_ovl_classic;
1388                 break;
1389         case TC_CBQ_OVL_DELAY:
1390                 cl->overlimit = cbq_ovl_delay;
1391                 break;
1392         case TC_CBQ_OVL_LOWPRIO:
1393                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1394                     ovl->priority2-1 <= cl->priority)
1395                         return -EINVAL;
1396                 cl->priority2 = ovl->priority2-1;
1397                 cl->overlimit = cbq_ovl_lowprio;
1398                 break;
1399         case TC_CBQ_OVL_DROP:
1400                 cl->overlimit = cbq_ovl_drop;
1401                 break;
1402         case TC_CBQ_OVL_RCLASSIC:
1403                 cl->overlimit = cbq_ovl_rclassic;
1404                 break;
1405         default:
1406                 return -EINVAL;
1407         }
1408         cl->penalty = (ovl->penalty*HZ)/1000;
1409         return 0;
1410 }
1411
1412 #ifdef CONFIG_NET_CLS_POLICE
1413 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1414 {
1415         cl->police = p->police;
1416
1417         if (cl->q->handle) {
1418                 if (p->police == TC_POLICE_RECLASSIFY)
1419                         cl->q->reshape_fail = cbq_reshape_fail;
1420                 else
1421                         cl->q->reshape_fail = NULL;
1422         }
1423         return 0;
1424 }
1425 #endif
1426
1427 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1428 {
1429         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1430         return 0;
1431 }
1432
1433 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1434 {
1435         struct cbq_sched_data *q = qdisc_priv(sch);
1436         struct rtattr *tb[TCA_CBQ_MAX];
1437         struct tc_ratespec *r;
1438
1439         if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1440             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1441             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1442                 return -EINVAL;
1443
1444         if (tb[TCA_CBQ_LSSOPT-1] &&
1445             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1446                 return -EINVAL;
1447
1448         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1449
1450         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1451                 return -EINVAL;
1452
1453         q->link.refcnt = 1;
1454         q->link.sibling = &q->link;
1455         q->link.classid = sch->handle;
1456         q->link.qdisc = sch;
1457         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1458                 q->link.q = &noop_qdisc;
1459
1460         q->link.priority = TC_CBQ_MAXPRIO-1;
1461         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1462         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1463         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1464         q->link.overlimit = cbq_ovl_classic;
1465         q->link.allot = psched_mtu(sch->dev);
1466         q->link.quantum = q->link.allot;
1467         q->link.weight = q->link.R_tab->rate.rate;
1468
1469         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1470         q->link.avpkt = q->link.allot/2;
1471         q->link.minidle = -0x7FFFFFFF;
1472         q->link.stats_lock = &sch->dev->queue_lock;
1473
1474         init_timer(&q->wd_timer);
1475         q->wd_timer.data = (unsigned long)sch;
1476         q->wd_timer.function = cbq_watchdog;
1477         init_timer(&q->delay_timer);
1478         q->delay_timer.data = (unsigned long)sch;
1479         q->delay_timer.function = cbq_undelay;
1480         q->toplevel = TC_CBQ_MAXLEVEL;
1481         PSCHED_GET_TIME(q->now);
1482         q->now_rt = q->now;
1483
1484         cbq_link_class(&q->link);
1485
1486         if (tb[TCA_CBQ_LSSOPT-1])
1487                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1488
1489         cbq_addprio(q, &q->link);
1490         return 0;
1491 }
1492
1493 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1494 {
1495         unsigned char    *b = skb->tail;
1496
1497         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1498         return skb->len;
1499
1500 rtattr_failure:
1501         skb_trim(skb, b - skb->data);
1502         return -1;
1503 }
1504
1505 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1506 {
1507         unsigned char    *b = skb->tail;
1508         struct tc_cbq_lssopt opt;
1509
1510         opt.flags = 0;
1511         if (cl->borrow == NULL)
1512                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1513         if (cl->share == NULL)
1514                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1515         opt.ewma_log = cl->ewma_log;
1516         opt.level = cl->level;
1517         opt.avpkt = cl->avpkt;
1518         opt.maxidle = cl->maxidle;
1519         opt.minidle = (u32)(-cl->minidle);
1520         opt.offtime = cl->offtime;
1521         opt.change = ~0;
1522         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1523         return skb->len;
1524
1525 rtattr_failure:
1526         skb_trim(skb, b - skb->data);
1527         return -1;
1528 }
1529
1530 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1531 {
1532         unsigned char    *b = skb->tail;
1533         struct tc_cbq_wrropt opt;
1534
1535         opt.flags = 0;
1536         opt.allot = cl->allot;
1537         opt.priority = cl->priority+1;
1538         opt.cpriority = cl->cpriority+1;
1539         opt.weight = cl->weight;
1540         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1541         return skb->len;
1542
1543 rtattr_failure:
1544         skb_trim(skb, b - skb->data);
1545         return -1;
1546 }
1547
1548 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1549 {
1550         unsigned char    *b = skb->tail;
1551         struct tc_cbq_ovl opt;
1552
1553         opt.strategy = cl->ovl_strategy;
1554         opt.priority2 = cl->priority2+1;
1555         opt.penalty = (cl->penalty*1000)/HZ;
1556         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1557         return skb->len;
1558
1559 rtattr_failure:
1560         skb_trim(skb, b - skb->data);
1561         return -1;
1562 }
1563
1564 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1565 {
1566         unsigned char    *b = skb->tail;
1567         struct tc_cbq_fopt opt;
1568
1569         if (cl->split || cl->defmap) {
1570                 opt.split = cl->split ? cl->split->classid : 0;
1571                 opt.defmap = cl->defmap;
1572                 opt.defchange = ~0;
1573                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1574         }
1575         return skb->len;
1576
1577 rtattr_failure:
1578         skb_trim(skb, b - skb->data);
1579         return -1;
1580 }
1581
1582 #ifdef CONFIG_NET_CLS_POLICE
1583 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1584 {
1585         unsigned char    *b = skb->tail;
1586         struct tc_cbq_police opt;
1587
1588         if (cl->police) {
1589                 opt.police = cl->police;
1590                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1591         }
1592         return skb->len;
1593
1594 rtattr_failure:
1595         skb_trim(skb, b - skb->data);
1596         return -1;
1597 }
1598 #endif
1599
1600 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1601 {
1602         if (cbq_dump_lss(skb, cl) < 0 ||
1603             cbq_dump_rate(skb, cl) < 0 ||
1604             cbq_dump_wrr(skb, cl) < 0 ||
1605             cbq_dump_ovl(skb, cl) < 0 ||
1606 #ifdef CONFIG_NET_CLS_POLICE
1607             cbq_dump_police(skb, cl) < 0 ||
1608 #endif
1609             cbq_dump_fopt(skb, cl) < 0)
1610                 return -1;
1611         return 0;
1612 }
1613
1614 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1615 {
1616         RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1617         return 0;
1618
1619 rtattr_failure:
1620         return -1;
1621 }
1622
1623
1624 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1625 {
1626         struct cbq_sched_data *q = qdisc_priv(sch);
1627         unsigned char    *b = skb->tail;
1628         struct rtattr *rta;
1629
1630         rta = (struct rtattr*)b;
1631         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1632         if (cbq_dump_attr(skb, &q->link) < 0)
1633                 goto rtattr_failure;
1634         rta->rta_len = skb->tail - b;
1635         spin_lock_bh(&sch->dev->queue_lock);
1636         q->link.xstats.avgidle = q->link.avgidle;
1637         if (cbq_copy_xstats(skb, &q->link.xstats)) {
1638                 spin_unlock_bh(&sch->dev->queue_lock);
1639                 goto rtattr_failure;
1640         }
1641         spin_unlock_bh(&sch->dev->queue_lock);
1642         return skb->len;
1643
1644 rtattr_failure:
1645         skb_trim(skb, b - skb->data);
1646         return -1;
1647 }
1648
1649 static int
1650 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1651                struct sk_buff *skb, struct tcmsg *tcm)
1652 {
1653         struct cbq_sched_data *q = qdisc_priv(sch);
1654         struct cbq_class *cl = (struct cbq_class*)arg;
1655         unsigned char    *b = skb->tail;
1656         struct rtattr *rta;
1657
1658         if (cl->tparent)
1659                 tcm->tcm_parent = cl->tparent->classid;
1660         else
1661                 tcm->tcm_parent = TC_H_ROOT;
1662         tcm->tcm_handle = cl->classid;
1663         tcm->tcm_info = cl->q->handle;
1664
1665         rta = (struct rtattr*)b;
1666         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1667         if (cbq_dump_attr(skb, cl) < 0)
1668                 goto rtattr_failure;
1669         rta->rta_len = skb->tail - b;
1670         cl->stats.qlen = cl->q->q.qlen;
1671         if (qdisc_copy_stats(skb, &cl->stats, cl->stats_lock))
1672                 goto rtattr_failure;
1673         spin_lock_bh(&sch->dev->queue_lock);
1674         cl->xstats.avgidle = cl->avgidle;
1675         cl->xstats.undertime = 0;
1676         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1677                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1678         if (cbq_copy_xstats(skb, &cl->xstats)) {
1679                 spin_unlock_bh(&sch->dev->queue_lock);
1680                 goto rtattr_failure;
1681         }
1682         spin_unlock_bh(&sch->dev->queue_lock);
1683
1684         return skb->len;
1685
1686 rtattr_failure:
1687         skb_trim(skb, b - skb->data);
1688         return -1;
1689 }
1690
1691 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1692                      struct Qdisc **old)
1693 {
1694         struct cbq_class *cl = (struct cbq_class*)arg;
1695
1696         if (cl) {
1697                 if (new == NULL) {
1698                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1699                                 return -ENOBUFS;
1700                 } else {
1701 #ifdef CONFIG_NET_CLS_POLICE
1702                         if (cl->police == TC_POLICE_RECLASSIFY)
1703                                 new->reshape_fail = cbq_reshape_fail;
1704 #endif
1705                 }
1706                 sch_tree_lock(sch);
1707                 *old = cl->q;
1708                 cl->q = new;
1709                 sch->q.qlen -= (*old)->q.qlen;
1710                 qdisc_reset(*old);
1711                 sch_tree_unlock(sch);
1712
1713                 return 0;
1714         }
1715         return -ENOENT;
1716 }
1717
1718 static struct Qdisc *
1719 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1720 {
1721         struct cbq_class *cl = (struct cbq_class*)arg;
1722
1723         return cl ? cl->q : NULL;
1724 }
1725
1726 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1727 {
1728         struct cbq_sched_data *q = qdisc_priv(sch);
1729         struct cbq_class *cl = cbq_class_lookup(q, classid);
1730
1731         if (cl) {
1732                 cl->refcnt++;
1733                 return (unsigned long)cl;
1734         }
1735         return 0;
1736 }
1737
1738 static void cbq_destroy_filters(struct cbq_class *cl)
1739 {
1740         struct tcf_proto *tp;
1741
1742         while ((tp = cl->filter_list) != NULL) {
1743                 cl->filter_list = tp->next;
1744                 tcf_destroy(tp);
1745         }
1746 }
1747
1748 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1749 {
1750         struct cbq_sched_data *q = qdisc_priv(sch);
1751
1752         cbq_destroy_filters(cl);
1753         qdisc_destroy(cl->q);
1754         qdisc_put_rtab(cl->R_tab);
1755 #ifdef CONFIG_NET_ESTIMATOR
1756         qdisc_kill_estimator(&cl->stats);
1757 #endif
1758         if (cl != &q->link)
1759                 kfree(cl);
1760 }
1761
1762 static void
1763 cbq_destroy(struct Qdisc* sch)
1764 {
1765         struct cbq_sched_data *q = qdisc_priv(sch);
1766         struct cbq_class *cl;
1767         unsigned h;
1768
1769 #ifdef CONFIG_NET_CLS_POLICE
1770         q->rx_class = NULL;
1771 #endif
1772
1773         for (h = 0; h < 16; h++) {
1774                 struct cbq_class *next;
1775
1776                 for (cl = q->classes[h]; cl; cl = next) {
1777                         next = cl->next;
1778                         cbq_destroy_class(sch, cl);
1779                 }
1780         }
1781 }
1782
1783 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1784 {
1785         struct cbq_class *cl = (struct cbq_class*)arg;
1786
1787         if (--cl->refcnt == 0) {
1788 #ifdef CONFIG_NET_CLS_POLICE
1789                 struct cbq_sched_data *q = qdisc_priv(sch);
1790
1791                 spin_lock_bh(&sch->dev->queue_lock);
1792                 if (q->rx_class == cl)
1793                         q->rx_class = NULL;
1794                 spin_unlock_bh(&sch->dev->queue_lock);
1795 #endif
1796
1797                 cbq_destroy_class(sch, cl);
1798         }
1799 }
1800
1801 static int
1802 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1803                  unsigned long *arg)
1804 {
1805         int err;
1806         struct cbq_sched_data *q = qdisc_priv(sch);
1807         struct cbq_class *cl = (struct cbq_class*)*arg;
1808         struct rtattr *opt = tca[TCA_OPTIONS-1];
1809         struct rtattr *tb[TCA_CBQ_MAX];
1810         struct cbq_class *parent;
1811         struct qdisc_rate_table *rtab = NULL;
1812
1813         if (opt==NULL ||
1814             rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1815                 return -EINVAL;
1816
1817         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1818             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1819                 return -EINVAL;
1820
1821         if (tb[TCA_CBQ_FOPT-1] &&
1822             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1823                 return -EINVAL;
1824
1825         if (tb[TCA_CBQ_RATE-1] &&
1826             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1827                         return -EINVAL;
1828
1829         if (tb[TCA_CBQ_LSSOPT-1] &&
1830             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1831                         return -EINVAL;
1832
1833         if (tb[TCA_CBQ_WRROPT-1] &&
1834             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1835                         return -EINVAL;
1836
1837 #ifdef CONFIG_NET_CLS_POLICE
1838         if (tb[TCA_CBQ_POLICE-1] &&
1839             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1840                         return -EINVAL;
1841 #endif
1842
1843         if (cl) {
1844                 /* Check parent */
1845                 if (parentid) {
1846                         if (cl->tparent && cl->tparent->classid != parentid)
1847                                 return -EINVAL;
1848                         if (!cl->tparent && parentid != TC_H_ROOT)
1849                                 return -EINVAL;
1850                 }
1851
1852                 if (tb[TCA_CBQ_RATE-1]) {
1853                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1854                         if (rtab == NULL)
1855                                 return -EINVAL;
1856                 }
1857
1858                 /* Change class parameters */
1859                 sch_tree_lock(sch);
1860
1861                 if (cl->next_alive != NULL)
1862                         cbq_deactivate_class(cl);
1863
1864                 if (rtab) {
1865                         rtab = xchg(&cl->R_tab, rtab);
1866                         qdisc_put_rtab(rtab);
1867                 }
1868
1869                 if (tb[TCA_CBQ_LSSOPT-1])
1870                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1871
1872                 if (tb[TCA_CBQ_WRROPT-1]) {
1873                         cbq_rmprio(q, cl);
1874                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1875                 }
1876
1877                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1878                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1879
1880 #ifdef CONFIG_NET_CLS_POLICE
1881                 if (tb[TCA_CBQ_POLICE-1])
1882                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1883 #endif
1884
1885                 if (tb[TCA_CBQ_FOPT-1])
1886                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1887
1888                 if (cl->q->q.qlen)
1889                         cbq_activate_class(cl);
1890
1891                 sch_tree_unlock(sch);
1892
1893 #ifdef CONFIG_NET_ESTIMATOR
1894                 if (tca[TCA_RATE-1]) {
1895                         qdisc_kill_estimator(&cl->stats);
1896                         qdisc_new_estimator(&cl->stats, cl->stats_lock,
1897                                             tca[TCA_RATE-1]);
1898                 }
1899 #endif
1900                 return 0;
1901         }
1902
1903         if (parentid == TC_H_ROOT)
1904                 return -EINVAL;
1905
1906         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1907             tb[TCA_CBQ_LSSOPT-1] == NULL)
1908                 return -EINVAL;
1909
1910         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1911         if (rtab == NULL)
1912                 return -EINVAL;
1913
1914         if (classid) {
1915                 err = -EINVAL;
1916                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1917                         goto failure;
1918         } else {
1919                 int i;
1920                 classid = TC_H_MAKE(sch->handle,0x8000);
1921
1922                 for (i=0; i<0x8000; i++) {
1923                         if (++q->hgenerator >= 0x8000)
1924                                 q->hgenerator = 1;
1925                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1926                                 break;
1927                 }
1928                 err = -ENOSR;
1929                 if (i >= 0x8000)
1930                         goto failure;
1931                 classid = classid|q->hgenerator;
1932         }
1933
1934         parent = &q->link;
1935         if (parentid) {
1936                 parent = cbq_class_lookup(q, parentid);
1937                 err = -EINVAL;
1938                 if (parent == NULL)
1939                         goto failure;
1940         }
1941
1942         err = -ENOBUFS;
1943         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1944         if (cl == NULL)
1945                 goto failure;
1946         memset(cl, 0, sizeof(*cl));
1947         cl->R_tab = rtab;
1948         rtab = NULL;
1949         cl->refcnt = 1;
1950         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1951                 cl->q = &noop_qdisc;
1952         cl->classid = classid;
1953         cl->tparent = parent;
1954         cl->qdisc = sch;
1955         cl->allot = parent->allot;
1956         cl->quantum = cl->allot;
1957         cl->weight = cl->R_tab->rate.rate;
1958         cl->stats_lock = &sch->dev->queue_lock;
1959
1960         sch_tree_lock(sch);
1961         cbq_link_class(cl);
1962         cl->borrow = cl->tparent;
1963         if (cl->tparent != &q->link)
1964                 cl->share = cl->tparent;
1965         cbq_adjust_levels(parent);
1966         cl->minidle = -0x7FFFFFFF;
1967         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1968         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1969         if (cl->ewma_log==0)
1970                 cl->ewma_log = q->link.ewma_log;
1971         if (cl->maxidle==0)
1972                 cl->maxidle = q->link.maxidle;
1973         if (cl->avpkt==0)
1974                 cl->avpkt = q->link.avpkt;
1975         cl->overlimit = cbq_ovl_classic;
1976         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1977                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1978 #ifdef CONFIG_NET_CLS_POLICE
1979         if (tb[TCA_CBQ_POLICE-1])
1980                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1981 #endif
1982         if (tb[TCA_CBQ_FOPT-1])
1983                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1984         sch_tree_unlock(sch);
1985
1986 #ifdef CONFIG_NET_ESTIMATOR
1987         if (tca[TCA_RATE-1])
1988                 qdisc_new_estimator(&cl->stats, cl->stats_lock,
1989                                     tca[TCA_RATE-1]);
1990 #endif
1991
1992         *arg = (unsigned long)cl;
1993         return 0;
1994
1995 failure:
1996         qdisc_put_rtab(rtab);
1997         return err;
1998 }
1999
2000 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
2001 {
2002         struct cbq_sched_data *q = qdisc_priv(sch);
2003         struct cbq_class *cl = (struct cbq_class*)arg;
2004
2005         if (cl->filters || cl->children || cl == &q->link)
2006                 return -EBUSY;
2007
2008         sch_tree_lock(sch);
2009
2010         if (cl->next_alive)
2011                 cbq_deactivate_class(cl);
2012
2013         if (q->tx_borrowed == cl)
2014                 q->tx_borrowed = q->tx_class;
2015         if (q->tx_class == cl) {
2016                 q->tx_class = NULL;
2017                 q->tx_borrowed = NULL;
2018         }
2019 #ifdef CONFIG_NET_CLS_POLICE
2020         if (q->rx_class == cl)
2021                 q->rx_class = NULL;
2022 #endif
2023
2024         cbq_unlink_class(cl);
2025         cbq_adjust_levels(cl->tparent);
2026         cl->defmap = 0;
2027         cbq_sync_defmap(cl);
2028
2029         cbq_rmprio(q, cl);
2030         sch_tree_unlock(sch);
2031
2032         if (--cl->refcnt == 0)
2033                 cbq_destroy_class(sch, cl);
2034
2035         return 0;
2036 }
2037
2038 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2039 {
2040         struct cbq_sched_data *q = qdisc_priv(sch);
2041         struct cbq_class *cl = (struct cbq_class *)arg;
2042
2043         if (cl == NULL)
2044                 cl = &q->link;
2045
2046         return &cl->filter_list;
2047 }
2048
2049 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2050                                      u32 classid)
2051 {
2052         struct cbq_sched_data *q = qdisc_priv(sch);
2053         struct cbq_class *p = (struct cbq_class*)parent;
2054         struct cbq_class *cl = cbq_class_lookup(q, classid);
2055
2056         if (cl) {
2057                 if (p && p->level <= cl->level)
2058                         return 0;
2059                 cl->filters++;
2060                 return (unsigned long)cl;
2061         }
2062         return 0;
2063 }
2064
2065 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2066 {
2067         struct cbq_class *cl = (struct cbq_class*)arg;
2068
2069         cl->filters--;
2070 }
2071
2072 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2073 {
2074         struct cbq_sched_data *q = qdisc_priv(sch);
2075         unsigned h;
2076
2077         if (arg->stop)
2078                 return;
2079
2080         for (h = 0; h < 16; h++) {
2081                 struct cbq_class *cl;
2082
2083                 for (cl = q->classes[h]; cl; cl = cl->next) {
2084                         if (arg->count < arg->skip) {
2085                                 arg->count++;
2086                                 continue;
2087                         }
2088                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2089                                 arg->stop = 1;
2090                                 return;
2091                         }
2092                         arg->count++;
2093                 }
2094         }
2095 }
2096
2097 static struct Qdisc_class_ops cbq_class_ops = {
2098         .graft          =       cbq_graft,
2099         .leaf           =       cbq_leaf,
2100         .get            =       cbq_get,
2101         .put            =       cbq_put,
2102         .change         =       cbq_change_class,
2103         .delete         =       cbq_delete,
2104         .walk           =       cbq_walk,
2105         .tcf_chain      =       cbq_find_tcf,
2106         .bind_tcf       =       cbq_bind_filter,
2107         .unbind_tcf     =       cbq_unbind_filter,
2108         .dump           =       cbq_dump_class,
2109 };
2110
2111 static struct Qdisc_ops cbq_qdisc_ops = {
2112         .next           =       NULL,
2113         .cl_ops         =       &cbq_class_ops,
2114         .id             =       "cbq",
2115         .priv_size      =       sizeof(struct cbq_sched_data),
2116         .enqueue        =       cbq_enqueue,
2117         .dequeue        =       cbq_dequeue,
2118         .requeue        =       cbq_requeue,
2119         .drop           =       cbq_drop,
2120         .init           =       cbq_init,
2121         .reset          =       cbq_reset,
2122         .destroy        =       cbq_destroy,
2123         .change         =       NULL,
2124         .dump           =       cbq_dump,
2125         .owner          =       THIS_MODULE,
2126 };
2127
2128 static int __init cbq_module_init(void)
2129 {
2130         return register_qdisc(&cbq_qdisc_ops);
2131 }
2132 static void __exit cbq_module_exit(void) 
2133 {
2134         unregister_qdisc(&cbq_qdisc_ops);
2135 }
2136 module_init(cbq_module_init)
2137 module_exit(cbq_module_exit)
2138 MODULE_LICENSE("GPL");