net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/config.h>
  14 #include <linux/module.h>
  15 #include <asm/uaccess.h>
  16 #include <asm/system.h>
  17 #include <asm/bitops.h>
  18 #include <linux/types.h>
  19 #include <linux/kernel.h>
  20 #include <linux/sched.h>
  21 #include <linux/string.h>
  22 #include <linux/mm.h>
  23 #include <linux/socket.h>
  24 #include <linux/sockios.h>
  25 #include <linux/in.h>
  26 #include <linux/errno.h>
  27 #include <linux/interrupt.h>
  28 #include <linux/if_ether.h>
  29 #include <linux/inet.h>
  30 #include <linux/netdevice.h>
  31 #include <linux/etherdevice.h>
  32 #include <linux/notifier.h>
  33 #include <net/ip.h>
  34 #include <net/route.h>
  35 #include <linux/skbuff.h>
  36 #include <net/sock.h>
  37 #include <net/pkt_sched.h>
  38
  39
  40 /*      Class-Based Queueing (CBQ) algorithm.
  41         =======================================
  42
  43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  44                  Management Models for Packet Networks",
  45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  46
  47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  48
  49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  50                  Parameters", 1996
  51
  52                  [4] Sally Floyd and Michael Speer, "Experimental Results
  53                  for Class-Based Queueing", 1998, not published.
  54
  55         -----------------------------------------------------------------------
  56
  57         Algorithm skeleton was taken from NS simulator cbq.cc.
  58         If someone wants to check this code against the LBL version,
  59         he should take into account that ONLY the skeleton was borrowed,
  60         the implementation is different. Particularly:
  61
  62         --- The WRR algorithm is different. Our version looks more
  63         reasonable (I hope) and works when quanta are allowed to be
  64         less than MTU, which is always the case when real time classes
  65         have small rates. Note, that the statement of [3] is
  66         incomplete, delay may actually be estimated even if class
  67         per-round allotment is less than MTU. Namely, if per-round
  68         allotment is W*r_i, and r_1+...+r_k = r < 1
  69
  70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  71
  72         In the worst case we have IntServ estimate with D = W*r+k*MTU
  73         and C = MTU*r. The proof (if correct at all) is trivial.
  74
  75
  76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  77         interpret some places, which look like wrong translations
  78         from NS. Anyone is advised to find these differences
  79         and explain to me, why I am wrong 8).
  80
  81         --- Linux has no EOI event, so that we cannot estimate true class
  82         idle time. Workaround is to consider the next dequeue event
  83         as sign that previous packet is finished. This is wrong because of
  84         internal device queueing, but on a permanently loaded link it is true.
  85         Moreover, combined with clock integrator, this scheme looks
  86         very close to an ideal solution.  */
  87
  88 struct cbq_sched_data;
  89
  90
  91 struct cbq_class
  92 {
  93         struct cbq_class        *next;          /* hash table link */
  94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  95
  96 /* Parameters */
  97         u32                     classid;
  98         unsigned char           priority;       /* class priority */
  99         unsigned char           priority2;      /* priority to be used after overlimit */
 100         unsigned char           ewma_log;       /* time constant for idle time calculation */
 101         unsigned char           ovl_strategy;
 102 #ifdef CONFIG_NET_CLS_POLICE
 103         unsigned char           police;
 104 #endif
 105
 106         u32                     defmap;
 107
 108         /* Link-sharing scheduler parameters */
 109         long                    maxidle;        /* Class parameters: see below. */
 110         long                    offtime;
 111         long                    minidle;
 112         u32                     avpkt;
 113         struct qdisc_rate_table *R_tab;
 114
 115         /* Overlimit strategy parameters */
 116         void                    (*overlimit)(struct cbq_class *cl);
 117         long                    penalty;
 118
 119         /* General scheduler (WRR) parameters */
 120         long                    allot;
 121         long                    quantum;        /* Allotment per WRR round */
 122         long                    weight;         /* Relative allotment: see below */
 123
 124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 125         struct cbq_class        *split;         /* Ptr to split node */
 126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 129                                                    parent otherwise */
 130         struct cbq_class        *sibling;       /* Sibling chain */
 131         struct cbq_class        *children;      /* Pointer to children chain */
 132
 133         struct Qdisc            *q;             /* Elementary queueing discipline */
 134
 135
 136 /* Variables */
 137         unsigned char           cpriority;      /* Effective priority */
 138         unsigned char           delayed;
 139         unsigned char           level;          /* level of the class in hierarchy:
 140                                                    0 for leaf classes, and maximal
 141                                                    level of children + 1 for nodes.
 142                                                  */
 143
 144         psched_time_t           last;           /* Last end of service */
 145         psched_time_t           undertime;
 146         long                    avgidle;
 147         long                    deficit;        /* Saved deficit for WRR */
 148         unsigned long           penalized;
 149         struct tc_stats         stats;
 150         struct tc_cbq_xstats    xstats;
 151
 152         struct tcf_proto        *filter_list;
 153
 154         int                     refcnt;
 155         int                     filters;
 156
 157         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 158 };
 159
 160 struct cbq_sched_data
 161 {
 162         struct cbq_class        *classes[16];           /* Hash table of all classes */
 163         int                     nclasses[TC_CBQ_MAXPRIO+1];
 164         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 165
 166         struct cbq_class        link;
 167
 168         unsigned                activemask;
 169         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 170                                                                    with backlog */
 171
 172 #ifdef CONFIG_NET_CLS_POLICE
 173         struct cbq_class        *rx_class;
 174 #endif
 175         struct cbq_class        *tx_class;
 176         struct cbq_class        *tx_borrowed;
 177         int                     tx_len;
 178         psched_time_t           now;            /* Cached timestamp */
 179         psched_time_t           now_rt;         /* Cached real time */
 180         unsigned                pmask;
 181
 182         struct timer_list       delay_timer;
 183         struct timer_list       wd_timer;       /* Watchdog timer,
 184                                                    started when CBQ has
 185                                                    backlog, but cannot
 186                                                    transmit just now */
 187         long                    wd_expires;
 188         int                     toplevel;
 189         u32                     hgenerator;
 190 };
 191
 192
 193 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
 194
 195
 196 static __inline__ unsigned cbq_hash(u32 h)
 197 {
 198         h ^= h>>8;
 199         h ^= h>>4;
 200         return h&0xF;
 201 }
 202
 203 static __inline__ struct cbq_class *
 204 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 205 {
 206         struct cbq_class *cl;
 207
 208         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 209                 if (cl->classid == classid)
 210                         return cl;
 211         return NULL;
 212 }
 213
 214 #ifdef CONFIG_NET_CLS_POLICE
 215
 216 static struct cbq_class *
 217 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 218 {
 219         struct cbq_class *cl, *new;
 220
 221         for (cl = this->tparent; cl; cl = cl->tparent)
 222                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 223                         return new;
 224
 225         return NULL;
 226 }
 227
 228 #endif
 229
 230 /* Classify packet. The procedure is pretty complicated, but
 231    it allows us to combine link sharing and priority scheduling
 232    transparently.
 233
 234    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 235    so that it resolves to split nodes. Then packets are classified
 236    by logical priority, or a more specific classifier may be attached
 237    to the split node.
 238  */
 239
 240 static struct cbq_class *
 241 cbq_classify(struct sk_buff *skb, struct Qdisc *sch)
 242 {
 243         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
 244         struct cbq_class *head = &q->link;
 245         struct cbq_class **defmap;
 246         struct cbq_class *cl = NULL;
 247         u32 prio = skb->priority;
 248         struct tcf_result res;
 249
 250         /*
 251          *  Step 1. If skb->priority points to one of our classes, use it.
 252          */
 253         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 254             (cl = cbq_class_lookup(q, prio)) != NULL)
 255                         return cl;
 256
 257         for (;;) {
 258                 int result = 0;
 259
 260                 defmap = head->defaults;
 261
 262                 /*
 263                  * Step 2+n. Apply classifier.
 264                  */
 265                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
 266                         goto fallback;
 267
 268                 if ((cl = (void*)res.class) == NULL) {
 269                         if (TC_H_MAJ(res.classid))
 270                                 cl = cbq_class_lookup(q, res.classid);
 271                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 272                                 cl = defmap[TC_PRIO_BESTEFFORT];
 273
 274                         if (cl == NULL || cl->level >= head->level)
 275                                 goto fallback;
 276                 }
 277
 278 #ifdef CONFIG_NET_CLS_POLICE
 279                 switch (result) {
 280                 case TC_POLICE_RECLASSIFY:
 281                         return cbq_reclassify(skb, cl);
 282                 case TC_POLICE_SHOT:
 283                         return NULL;
 284                 default:
 285                         break;
 286                 }
 287 #endif
 288                 if (cl->level == 0)
 289                         return cl;
 290
 291                 /*
 292                  * Step 3+n. If classifier selected a link sharing class,
 293                  *         apply agency specific classifier.
 294                  *         Repeat this procdure until we hit a leaf node.
 295                  */
 296                 head = cl;
 297         }
 298
 299 fallback:
 300         cl = head;
 301
 302         /*
 303          * Step 4. No success...
 304          */
 305         if (TC_H_MAJ(prio) == 0 &&
 306             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 307             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 308                 return head;
 309
 310         return cl;
 311 }
 312
 313 /*
 314    A packet has just been enqueued on the empty class.
 315    cbq_activate_class adds it to the tail of active class list
 316    of its priority band.
 317  */
 318
 319 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 320 {
 321         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
 322         int prio = cl->cpriority;
 323         struct cbq_class *cl_tail;
 324
 325         cl_tail = q->active[prio];
 326         q->active[prio] = cl;
 327
 328         if (cl_tail != NULL) {
 329                 cl->next_alive = cl_tail->next_alive;
 330                 cl_tail->next_alive = cl;
 331         } else {
 332                 cl->next_alive = cl;
 333                 q->activemask |= (1<<prio);
 334         }
 335 }
 336
 337 /*
 338    Unlink class from active chain.
 339    Note that this same procedure is done directly in cbq_dequeue*
 340    during round-robin procedure.
 341  */
 342
 343 static void cbq_deactivate_class(struct cbq_class *this)
 344 {
 345         struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
 346         int prio = this->cpriority;
 347         struct cbq_class *cl;
 348         struct cbq_class *cl_prev = q->active[prio];
 349
 350         do {
 351                 cl = cl_prev->next_alive;
 352                 if (cl == this) {
 353                         cl_prev->next_alive = cl->next_alive;
 354                         cl->next_alive = NULL;
 355
 356                         if (cl == q->active[prio]) {
 357                                 q->active[prio] = cl_prev;
 358                                 if (cl == q->active[prio]) {
 359                                         q->active[prio] = NULL;
 360                                         q->activemask &= ~(1<<prio);
 361                                         return;
 362                                 }
 363                         }
 364
 365                         cl = cl_prev->next_alive;
 366                         return;
 367                 }
 368         } while ((cl_prev = cl) != q->active[prio]);
 369 }
 370
 371 static void
 372 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 373 {
 374         int toplevel = q->toplevel;
 375
 376         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 377                 psched_time_t now;
 378                 psched_tdiff_t incr;
 379
 380                 PSCHED_GET_TIME(now);
 381                 incr = PSCHED_TDIFF(now, q->now_rt);
 382                 PSCHED_TADD2(q->now, incr, now);
 383
 384                 do {
 385                         if (PSCHED_TLESS(cl->undertime, now)) {
 386                                 q->toplevel = cl->level;
 387                                 return;
 388                         }
 389                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 390         }
 391 }
 392
 393 static int
 394 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 395 {
 396         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 397         struct cbq_class *cl = cbq_classify(skb, sch);
 398         int len = skb->len;
 399         int ret = NET_XMIT_POLICED;
 400
 401 #ifdef CONFIG_NET_CLS_POLICE
 402         q->rx_class = cl;
 403 #endif
 404         if (cl) {
 405 #ifdef CONFIG_NET_CLS_POLICE
 406                 cl->q->__parent = sch;
 407 #endif
 408                 if ((ret = cl->q->enqueue(skb, cl->q)) == 0) {
 409                         sch->q.qlen++;
 410                         sch->stats.packets++;
 411                         sch->stats.bytes+=len;
 412                         cbq_mark_toplevel(q, cl);
 413                         if (!cl->next_alive)
 414                                 cbq_activate_class(cl);
 415                         return 0;
 416                 }
 417         }
 418
 419         sch->stats.drops++;
 420         if (cl == NULL)
 421                 kfree_skb(skb);
 422         else {
 423                 cbq_mark_toplevel(q, cl);
 424                 cl->stats.drops++;
 425         }
 426         return ret;
 427 }
 428
 429 static int
 430 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 431 {
 432         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 433         struct cbq_class *cl;
 434         int ret;
 435
 436         if ((cl = q->tx_class) == NULL) {
 437                 kfree_skb(skb);
 438                 sch->stats.drops++;
 439                 return NET_XMIT_CN;
 440         }
 441         q->tx_class = NULL;
 442
 443         cbq_mark_toplevel(q, cl);
 444
 445 #ifdef CONFIG_NET_CLS_POLICE
 446         q->rx_class = cl;
 447         cl->q->__parent = sch;
 448 #endif
 449         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 450                 sch->q.qlen++;
 451                 if (!cl->next_alive)
 452                         cbq_activate_class(cl);
 453                 return 0;
 454         }
 455         sch->stats.drops++;
 456         cl->stats.drops++;
 457         return ret;
 458 }
 459
 460 /* Overlimit actions */
 461
 462 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 463
 464 static void cbq_ovl_classic(struct cbq_class *cl)
 465 {
 466         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
 467         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 468
 469         if (!cl->delayed) {
 470                 delay += cl->offtime;
 471
 472                 /*
 473                    Class goes to sleep, so that it will have no
 474                    chance to work avgidle. Let's forgive it 8)
 475
 476                    BTW cbq-2.0 has a crap in this
 477                    place, apparently they forgot to shift it by cl->ewma_log.
 478                  */
 479                 if (cl->avgidle < 0)
 480                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 481                 if (cl->avgidle < cl->minidle)
 482                         cl->avgidle = cl->minidle;
 483                 if (delay <= 0)
 484                         delay = 1;
 485                 PSCHED_TADD2(q->now, delay, cl->undertime);
 486
 487                 cl->xstats.overactions++;
 488                 cl->delayed = 1;
 489         }
 490         if (q->wd_expires == 0 || q->wd_expires > delay)
 491                 q->wd_expires = delay;
 492
 493         /* Dirty work! We must schedule wakeups based on
 494            real available rate, rather than leaf rate,
 495            which may be tiny (even zero).
 496          */
 497         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 498                 struct cbq_class *b;
 499                 psched_tdiff_t base_delay = q->wd_expires;
 500
 501                 for (b = cl->borrow; b; b = b->borrow) {
 502                         delay = PSCHED_TDIFF(b->undertime, q->now);
 503                         if (delay < base_delay) {
 504                                 if (delay <= 0)
 505                                         delay = 1;
 506                                 base_delay = delay;
 507                         }
 508                 }
 509
 510                 q->wd_expires = base_delay;
 511         }
 512 }
 513
 514 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 515    they go overlimit
 516  */
 517
 518 static void cbq_ovl_rclassic(struct cbq_class *cl)
 519 {
 520         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
 521         struct cbq_class *this = cl;
 522
 523         do {
 524                 if (cl->level > q->toplevel) {
 525                         cl = NULL;
 526                         break;
 527                 }
 528         } while ((cl = cl->borrow) != NULL);
 529
 530         if (cl == NULL)
 531                 cl = this;
 532         cbq_ovl_classic(cl);
 533 }
 534
 535 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 536
 537 static void cbq_ovl_delay(struct cbq_class *cl)
 538 {
 539         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
 540         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 541
 542         if (!cl->delayed) {
 543                 unsigned long sched = jiffies;
 544
 545                 delay += cl->offtime;
 546                 if (cl->avgidle < 0)
 547                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 548                 if (cl->avgidle < cl->minidle)
 549                         cl->avgidle = cl->minidle;
 550                 PSCHED_TADD2(q->now, delay, cl->undertime);
 551
 552                 if (delay > 0) {
 553                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
 554                         cl->penalized = sched;
 555                         cl->cpriority = TC_CBQ_MAXPRIO;
 556                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 557                         if (del_timer(&q->delay_timer) &&
 558                             (long)(q->delay_timer.expires - sched) > 0)
 559                                 q->delay_timer.expires = sched;
 560                         add_timer(&q->delay_timer);
 561                         cl->delayed = 1;
 562                         cl->xstats.overactions++;
 563                         return;
 564                 }
 565                 delay = 1;
 566         }
 567         if (q->wd_expires == 0 || q->wd_expires > delay)
 568                 q->wd_expires = delay;
 569 }
 570
 571 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 572
 573 static void cbq_ovl_lowprio(struct cbq_class *cl)
 574 {
 575         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
 576
 577         cl->penalized = jiffies + cl->penalty;
 578
 579         if (cl->cpriority != cl->priority2) {
 580                 cl->cpriority = cl->priority2;
 581                 q->pmask |= (1<<cl->cpriority);
 582                 cl->xstats.overactions++;
 583         }
 584         cbq_ovl_classic(cl);
 585 }
 586
 587 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 588
 589 static void cbq_ovl_drop(struct cbq_class *cl)
 590 {
 591         if (cl->q->ops->drop)
 592                 if (cl->q->ops->drop(cl->q))
 593                         cl->qdisc->q.qlen--;
 594         cl->xstats.overactions++;
 595         cbq_ovl_classic(cl);
 596 }
 597
 598 static void cbq_watchdog(unsigned long arg)
 599 {
 600         struct Qdisc *sch = (struct Qdisc*)arg;
 601
 602         sch->flags &= ~TCQ_F_THROTTLED;
 603         netif_schedule(sch->dev);
 604 }
 605
 606 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 607 {
 608         struct cbq_class *cl;
 609         struct cbq_class *cl_prev = q->active[prio];
 610         unsigned long now = jiffies;
 611         unsigned long sched = now;
 612
 613         if (cl_prev == NULL)
 614                 return now;
 615
 616         do {
 617                 cl = cl_prev->next_alive;
 618                 if ((long)(now - cl->penalized) > 0) {
 619                         cl_prev->next_alive = cl->next_alive;
 620                         cl->next_alive = NULL;
 621                         cl->cpriority = cl->priority;
 622                         cl->delayed = 0;
 623                         cbq_activate_class(cl);
 624
 625                         if (cl == q->active[prio]) {
 626                                 q->active[prio] = cl_prev;
 627                                 if (cl == q->active[prio]) {
 628                                         q->active[prio] = NULL;
 629                                         return 0;
 630                                 }
 631                         }
 632
 633                         cl = cl_prev->next_alive;
 634                 } else if ((long)(sched - cl->penalized) > 0)
 635                         sched = cl->penalized;
 636         } while ((cl_prev = cl) != q->active[prio]);
 637
 638         return (long)(sched - now);
 639 }
 640
 641 static void cbq_undelay(unsigned long arg)
 642 {
 643         struct Qdisc *sch = (struct Qdisc*)arg;
 644         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
 645         long delay = 0;
 646         unsigned pmask;
 647
 648         pmask = q->pmask;
 649         q->pmask = 0;
 650
 651         while (pmask) {
 652                 int prio = ffz(~pmask);
 653                 long tmp;
 654
 655                 pmask &= ~(1<<prio);
 656
 657                 tmp = cbq_undelay_prio(q, prio);
 658                 if (tmp > 0) {
 659                         q->pmask |= 1<<prio;
 660                         if (tmp < delay || delay == 0)
 661                                 delay = tmp;
 662                 }
 663         }
 664
 665         if (delay) {
 666                 q->delay_timer.expires = jiffies + delay;
 667                 add_timer(&q->delay_timer);
 668         }
 669
 670         sch->flags &= ~TCQ_F_THROTTLED;
 671         netif_schedule(sch->dev);
 672 }
 673
 674
 675 #ifdef CONFIG_NET_CLS_POLICE
 676
 677 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 678 {
 679         int len = skb->len;
 680         struct Qdisc *sch = child->__parent;
 681         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 682         struct cbq_class *cl = q->rx_class;
 683
 684         q->rx_class = NULL;
 685
 686         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 687
 688                 cbq_mark_toplevel(q, cl);
 689
 690                 q->rx_class = cl;
 691                 cl->q->__parent = sch;
 692
 693                 if (cl->q->enqueue(skb, cl->q) == 0) {
 694                         sch->q.qlen++;
 695                         sch->stats.packets++;
 696                         sch->stats.bytes+=len;
 697                         if (!cl->next_alive)
 698                                 cbq_activate_class(cl);
 699                         return 0;
 700                 }
 701                 sch->stats.drops++;
 702                 return 0;
 703         }
 704
 705         sch->stats.drops++;
 706         return -1;
 707 }
 708 #endif
 709
 710 /*
 711    It is mission critical procedure.
 712
 713    We "regenerate" toplevel cutoff, if transmitting class
 714    has backlog and it is not regulated. It is not part of
 715    original CBQ description, but looks more reasonable.
 716    Probably, it is wrong. This question needs further investigation.
 717 */
 718
 719 static __inline__ void
 720 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 721                     struct cbq_class *borrowed)
 722 {
 723         if (cl && q->toplevel >= borrowed->level) {
 724                 if (cl->q->q.qlen > 1) {
 725                         do {
 726                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
 727                                         q->toplevel = borrowed->level;
 728                                         return;
 729                                 }
 730                         } while ((borrowed=borrowed->borrow) != NULL);
 731                 }
 732 #if 0
 733         /* It is not necessary now. Uncommenting it
 734            will save CPU cycles, but decrease fairness.
 735          */
 736                 q->toplevel = TC_CBQ_MAXLEVEL;
 737 #endif
 738         }
 739 }
 740
 741 static void
 742 cbq_update(struct cbq_sched_data *q)
 743 {
 744         struct cbq_class *this = q->tx_class;
 745         struct cbq_class *cl = this;
 746         int len = q->tx_len;
 747
 748         q->tx_class = NULL;
 749
 750         for ( ; cl; cl = cl->share) {
 751                 long avgidle = cl->avgidle;
 752                 long idle;
 753
 754                 cl->stats.packets++;
 755                 cl->stats.bytes += len;
 756
 757                 /*
 758                    (now - last) is total time between packet right edges.
 759                    (last_pktlen/rate) is "virtual" busy time, so that
 760
 761                          idle = (now - last) - last_pktlen/rate
 762                  */
 763
 764                 idle = PSCHED_TDIFF(q->now, cl->last);
 765                 if ((unsigned long)idle > 128*1024*1024) {
 766                         avgidle = cl->maxidle;
 767                 } else {
 768                         idle -= L2T(cl, len);
 769
 770                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 771                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 772                    cl->avgidle == true_avgidle/W,
 773                    hence:
 774                  */
 775                         avgidle += idle - (avgidle>>cl->ewma_log);
 776                 }
 777
 778                 if (avgidle <= 0) {
 779                         /* Overlimit or at-limit */
 780
 781                         if (avgidle < cl->minidle)
 782                                 avgidle = cl->minidle;
 783
 784                         cl->avgidle = avgidle;
 785
 786                         /* Calculate expected time, when this class
 787                            will be allowed to send.
 788                            It will occur, when:
 789                            (1-W)*true_avgidle + W*delay = 0, i.e.
 790                            idle = (1/W - 1)*(-true_avgidle)
 791                            or
 792                            idle = (1 - W)*(-cl->avgidle);
 793                          */
 794                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 795
 796                         /*
 797                            That is not all.
 798                            To maintain the rate allocated to the class,
 799                            we add to undertime virtual clock,
 800                            necessary to complete transmitted packet.
 801                            (len/phys_bandwidth has been already passed
 802                            to the moment of cbq_update)
 803                          */
 804
 805                         idle -= L2T(&q->link, len);
 806                         idle += L2T(cl, len);
 807
 808                         PSCHED_AUDIT_TDIFF(idle);
 809
 810                         PSCHED_TADD2(q->now, idle, cl->undertime);
 811                 } else {
 812                         /* Underlimit */
 813
 814                         PSCHED_SET_PASTPERFECT(cl->undertime);
 815                         if (avgidle > cl->maxidle)
 816                                 cl->avgidle = cl->maxidle;
 817                         else
 818                                 cl->avgidle = avgidle;
 819                 }
 820                 cl->last = q->now;
 821         }
 822
 823         cbq_update_toplevel(q, this, q->tx_borrowed);
 824 }
 825
 826 static __inline__ struct cbq_class *
 827 cbq_under_limit(struct cbq_class *cl)
 828 {
 829         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
 830         struct cbq_class *this_cl = cl;
 831
 832         if (cl->tparent == NULL)
 833                 return cl;
 834
 835         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
 836             !PSCHED_TLESS(q->now, cl->undertime)) {
 837                 cl->delayed = 0;
 838                 return cl;
 839         }
 840
 841         do {
 842                 /* It is very suspicious place. Now overlimit
 843                    action is generated for not bounded classes
 844                    only if link is completely congested.
 845                    Though it is in agree with ancestor-only paradigm,
 846                    it looks very stupid. Particularly,
 847                    it means that this chunk of code will either
 848                    never be called or result in strong amplification
 849                    of burstiness. Dangerous, silly, and, however,
 850                    no another solution exists.
 851                  */
 852                 if ((cl = cl->borrow) == NULL) {
 853                         this_cl->stats.overlimits++;
 854                         this_cl->overlimit(this_cl);
 855                         return NULL;
 856                 }
 857                 if (cl->level > q->toplevel)
 858                         return NULL;
 859         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
 860                  PSCHED_TLESS(q->now, cl->undertime));
 861
 862         cl->delayed = 0;
 863         return cl;
 864 }
 865
 866 static __inline__ struct sk_buff *
 867 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 868 {
 869         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 870         struct cbq_class *cl_tail, *cl_prev, *cl;
 871         struct sk_buff *skb;
 872         int deficit;
 873
 874         cl_tail = cl_prev = q->active[prio];
 875         cl = cl_prev->next_alive;
 876
 877         do {
 878                 deficit = 0;
 879
 880                 /* Start round */
 881                 do {
 882                         struct cbq_class *borrow = cl;
 883
 884                         if (cl->q->q.qlen &&
 885                             (borrow = cbq_under_limit(cl)) == NULL)
 886                                 goto skip_class;
 887
 888                         if (cl->deficit <= 0) {
 889                                 /* Class exhausted its allotment per
 890                                    this round. Switch to the next one.
 891                                  */
 892                                 deficit = 1;
 893                                 cl->deficit += cl->quantum;
 894                                 goto next_class;
 895                         }
 896
 897                         skb = cl->q->dequeue(cl->q);
 898
 899                         /* Class did not give us any skb :-(
 900                            It could occur even if cl->q->q.qlen != 0
 901                            f.e. if cl->q == "tbf"
 902                          */
 903                         if (skb == NULL)
 904                                 goto skip_class;
 905
 906                         cl->deficit -= skb->len;
 907                         q->tx_class = cl;
 908                         q->tx_borrowed = borrow;
 909                         if (borrow != cl) {
 910 #ifndef CBQ_XSTATS_BORROWS_BYTES
 911                                 borrow->xstats.borrows++;
 912                                 cl->xstats.borrows++;
 913 #else
 914                                 borrow->xstats.borrows += skb->len;
 915                                 cl->xstats.borrows += skb->len;
 916 #endif
 917                         }
 918                         q->tx_len = skb->len;
 919
 920                         if (cl->deficit <= 0) {
 921                                 q->active[prio] = cl;
 922                                 cl = cl->next_alive;
 923                                 cl->deficit += cl->quantum;
 924                         }
 925                         return skb;
 926
 927 skip_class:
 928                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 929                                 /* Class is empty or penalized.
 930                                    Unlink it from active chain.
 931                                  */
 932                                 cl_prev->next_alive = cl->next_alive;
 933                                 cl->next_alive = NULL;
 934
 935                                 /* Did cl_tail point to it? */
 936                                 if (cl == cl_tail) {
 937                                         /* Repair it! */
 938                                         cl_tail = cl_prev;
 939
 940                                         /* Was it the last class in this band? */
 941                                         if (cl == cl_tail) {
 942                                                 /* Kill the band! */
 943                                                 q->active[prio] = NULL;
 944                                                 q->activemask &= ~(1<<prio);
 945                                                 if (cl->q->q.qlen)
 946                                                         cbq_activate_class(cl);
 947                                                 return NULL;
 948                                         }
 949
 950                                         q->active[prio] = cl_tail;
 951                                 }
 952                                 if (cl->q->q.qlen)
 953                                         cbq_activate_class(cl);
 954
 955                                 cl = cl_prev;
 956                         }
 957
 958 next_class:
 959                         cl_prev = cl;
 960                         cl = cl->next_alive;
 961                 } while (cl_prev != cl_tail);
 962         } while (deficit);
 963
 964         q->active[prio] = cl_prev;
 965
 966         return NULL;
 967 }
 968
 969 static __inline__ struct sk_buff *
 970 cbq_dequeue_1(struct Qdisc *sch)
 971 {
 972         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 973         struct sk_buff *skb;
 974         unsigned activemask;
 975
 976         activemask = q->activemask&0xFF;
 977         while (activemask) {
 978                 int prio = ffz(~activemask);
 979                 activemask &= ~(1<<prio);
 980                 skb = cbq_dequeue_prio(sch, prio);
 981                 if (skb)
 982                         return skb;
 983         }
 984         return NULL;
 985 }
 986
 987 static struct sk_buff *
 988 cbq_dequeue(struct Qdisc *sch)
 989 {
 990         struct sk_buff *skb;
 991         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
 992         psched_time_t now;
 993         psched_tdiff_t incr;
 994
 995         PSCHED_GET_TIME(now);
 996         incr = PSCHED_TDIFF(now, q->now_rt);
 997
 998         if (q->tx_class) {
 999                 psched_tdiff_t incr2;
1000                 /* Time integrator. We calculate EOS time
1001                    by adding expected packet transmission time.
1002                    If real time is greater, we warp artificial clock,
1003                    so that:
1004
1005                    cbq_time = max(real_time, work);
1006                  */
1007                 incr2 = L2T(&q->link, q->tx_len);
1008                 PSCHED_TADD(q->now, incr2);
1009                 cbq_update(q);
1010                 if ((incr -= incr2) < 0)
1011                         incr = 0;
1012         }
1013         PSCHED_TADD(q->now, incr);
1014         q->now_rt = now;
1015
1016         for (;;) {
1017                 q->wd_expires = 0;
1018
1019                 skb = cbq_dequeue_1(sch);
1020                 if (skb) {
1021                         sch->q.qlen--;
1022                         sch->flags &= ~TCQ_F_THROTTLED;
1023                         return skb;
1024                 }
1025
1026                 /* All the classes are overlimit.
1027
1028                    It is possible, if:
1029
1030                    1. Scheduler is empty.
1031                    2. Toplevel cutoff inhibited borrowing.
1032                    3. Root class is overlimit.
1033
1034                    Reset 2d and 3d conditions and retry.
1035
1036                    Note, that NS and cbq-2.0 are buggy, peeking
1037                    an arbitrary class is appropriate for ancestor-only
1038                    sharing, but not for toplevel algorithm.
1039
1040                    Our version is better, but slower, because it requires
1041                    two passes, but it is unavoidable with top-level sharing.
1042                 */
1043
1044                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1045                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1046                         break;
1047
1048                 q->toplevel = TC_CBQ_MAXLEVEL;
1049                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1050         }
1051
1052         /* No packets in scheduler or nobody wants to give them to us :-(
1053            Sigh... start watchdog timer in the last case. */
1054
1055         if (sch->q.qlen) {
1056                 sch->stats.overlimits++;
1057                 if (q->wd_expires && !netif_queue_stopped(sch->dev)) {
1058                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1059                         if (delay <= 0)
1060                                 delay = 1;
1061                         mod_timer(&q->wd_timer, jiffies + delay);
1062                         sch->flags |= TCQ_F_THROTTLED;
1063                 }
1064         }
1065         return NULL;
1066 }
1067
1068 /* CBQ class maintanance routines */
1069
1070 static void cbq_adjust_levels(struct cbq_class *this)
1071 {
1072         if (this == NULL)
1073                 return;
1074
1075         do {
1076                 int level = 0;
1077                 struct cbq_class *cl;
1078
1079                 if ((cl = this->children) != NULL) {
1080                         do {
1081                                 if (cl->level > level)
1082                                         level = cl->level;
1083                         } while ((cl = cl->sibling) != this->children);
1084                 }
1085                 this->level = level+1;
1086         } while ((this = this->tparent) != NULL);
1087 }
1088
1089 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1090 {
1091         struct cbq_class *cl;
1092         unsigned h;
1093
1094         if (q->quanta[prio] == 0)
1095                 return;
1096
1097         for (h=0; h<16; h++) {
1098                 for (cl = q->classes[h]; cl; cl = cl->next) {
1099                         /* BUGGGG... Beware! This expression suffer of
1100                            arithmetic overflows!
1101                          */
1102                         if (cl->priority == prio) {
1103                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1104                                         q->quanta[prio];
1105                         }
1106                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1107                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1108                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1109                         }
1110                 }
1111         }
1112 }
1113
1114 static void cbq_sync_defmap(struct cbq_class *cl)
1115 {
1116         struct cbq_sched_data *q = (struct cbq_sched_data*)cl->qdisc->data;
1117         struct cbq_class *split = cl->split;
1118         unsigned h;
1119         int i;
1120
1121         if (split == NULL)
1122                 return;
1123
1124         for (i=0; i<=TC_PRIO_MAX; i++) {
1125                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1126                         split->defaults[i] = NULL;
1127         }
1128
1129         for (i=0; i<=TC_PRIO_MAX; i++) {
1130                 int level = split->level;
1131
1132                 if (split->defaults[i])
1133                         continue;
1134
1135                 for (h=0; h<16; h++) {
1136                         struct cbq_class *c;
1137
1138                         for (c = q->classes[h]; c; c = c->next) {
1139                                 if (c->split == split && c->level < level &&
1140                                     c->defmap&(1<<i)) {
1141                                         split->defaults[i] = c;
1142                                         level = c->level;
1143                                 }
1144                         }
1145                 }
1146         }
1147 }
1148
1149 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1150 {
1151         struct cbq_class *split = NULL;
1152
1153         if (splitid == 0) {
1154                 if ((split = cl->split) == NULL)
1155                         return;
1156                 splitid = split->classid;
1157         }
1158
1159         if (split == NULL || split->classid != splitid) {
1160                 for (split = cl->tparent; split; split = split->tparent)
1161                         if (split->classid == splitid)
1162                                 break;
1163         }
1164
1165         if (split == NULL)
1166                 return;
1167
1168         if (cl->split != split) {
1169                 cl->defmap = 0;
1170                 cbq_sync_defmap(cl);
1171                 cl->split = split;
1172                 cl->defmap = def&mask;
1173         } else
1174                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1175
1176         cbq_sync_defmap(cl);
1177 }
1178
1179 static void cbq_unlink_class(struct cbq_class *this)
1180 {
1181         struct cbq_class *cl, **clp;
1182         struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1183
1184         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1185                 if (cl == this) {
1186                         *clp = cl->next;
1187                         cl->next = NULL;
1188                         break;
1189                 }
1190         }
1191
1192         if (this->tparent) {
1193                 clp=&this->sibling;
1194                 cl = *clp;
1195                 do {
1196                         if (cl == this) {
1197                                 *clp = cl->sibling;
1198                                 break;
1199                         }
1200                         clp = &cl->sibling;
1201                 } while ((cl = *clp) != this->sibling);
1202
1203                 if (this->tparent->children == this) {
1204                         this->tparent->children = this->sibling;
1205                         if (this->sibling == this)
1206                                 this->tparent->children = NULL;
1207                 }
1208         } else {
1209                 BUG_TRAP(this->sibling == this);
1210         }
1211 }
1212
1213 static void cbq_link_class(struct cbq_class *this)
1214 {
1215         struct cbq_sched_data *q = (struct cbq_sched_data*)this->qdisc->data;
1216         unsigned h = cbq_hash(this->classid);
1217         struct cbq_class *parent = this->tparent;
1218
1219         this->sibling = this;
1220         this->next = q->classes[h];
1221         q->classes[h] = this;
1222
1223         if (parent == NULL)
1224                 return;
1225
1226         if (parent->children == NULL) {
1227                 parent->children = this;
1228         } else {
1229                 this->sibling = parent->children->sibling;
1230                 parent->children->sibling = this;
1231         }
1232 }
1233
1234 static unsigned int cbq_drop(struct Qdisc* sch)
1235 {
1236         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1237         struct cbq_class *cl, *cl_head;
1238         int prio;
1239         unsigned int len;
1240
1241         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1242                 if ((cl_head = q->active[prio]) == NULL)
1243                         continue;
1244
1245                 cl = cl_head;
1246                 do {
1247                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1248                                 sch->q.qlen--;
1249                                 return len;
1250                         }
1251                 } while ((cl = cl->next_alive) != cl_head);
1252         }
1253         return 0;
1254 }
1255
1256 static void
1257 cbq_reset(struct Qdisc* sch)
1258 {
1259         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1260         struct cbq_class *cl;
1261         int prio;
1262         unsigned h;
1263
1264         q->activemask = 0;
1265         q->pmask = 0;
1266         q->tx_class = NULL;
1267         q->tx_borrowed = NULL;
1268         del_timer(&q->wd_timer);
1269         del_timer(&q->delay_timer);
1270         q->toplevel = TC_CBQ_MAXLEVEL;
1271         PSCHED_GET_TIME(q->now);
1272         q->now_rt = q->now;
1273
1274         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1275                 q->active[prio] = NULL;
1276
1277         for (h = 0; h < 16; h++) {
1278                 for (cl = q->classes[h]; cl; cl = cl->next) {
1279                         qdisc_reset(cl->q);
1280
1281                         cl->next_alive = NULL;
1282                         PSCHED_SET_PASTPERFECT(cl->undertime);
1283                         cl->avgidle = cl->maxidle;
1284                         cl->deficit = cl->quantum;
1285                         cl->cpriority = cl->priority;
1286                 }
1287         }
1288         sch->q.qlen = 0;
1289 }
1290
1291
1292 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1293 {
1294         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1295                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1296                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1297         }
1298         if (lss->change&TCF_CBQ_LSS_EWMA)
1299                 cl->ewma_log = lss->ewma_log;
1300         if (lss->change&TCF_CBQ_LSS_AVPKT)
1301                 cl->avpkt = lss->avpkt;
1302         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1303                 cl->minidle = -(long)lss->minidle;
1304         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1305                 cl->maxidle = lss->maxidle;
1306                 cl->avgidle = lss->maxidle;
1307         }
1308         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1309                 cl->offtime = lss->offtime;
1310         return 0;
1311 }
1312
1313 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1314 {
1315         q->nclasses[cl->priority]--;
1316         q->quanta[cl->priority] -= cl->weight;
1317         cbq_normalize_quanta(q, cl->priority);
1318 }
1319
1320 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1321 {
1322         q->nclasses[cl->priority]++;
1323         q->quanta[cl->priority] += cl->weight;
1324         cbq_normalize_quanta(q, cl->priority);
1325 }
1326
1327 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1328 {
1329         struct cbq_sched_data *q = (struct cbq_sched_data *)cl->qdisc->data;
1330
1331         if (wrr->allot)
1332                 cl->allot = wrr->allot;
1333         if (wrr->weight)
1334                 cl->weight = wrr->weight;
1335         if (wrr->priority) {
1336                 cl->priority = wrr->priority-1;
1337                 cl->cpriority = cl->priority;
1338                 if (cl->priority >= cl->priority2)
1339                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1340         }
1341
1342         cbq_addprio(q, cl);
1343         return 0;
1344 }
1345
1346 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1347 {
1348         switch (ovl->strategy) {
1349         case TC_CBQ_OVL_CLASSIC:
1350                 cl->overlimit = cbq_ovl_classic;
1351                 break;
1352         case TC_CBQ_OVL_DELAY:
1353                 cl->overlimit = cbq_ovl_delay;
1354                 break;
1355         case TC_CBQ_OVL_LOWPRIO:
1356                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1357                     ovl->priority2-1 <= cl->priority)
1358                         return -EINVAL;
1359                 cl->priority2 = ovl->priority2-1;
1360                 cl->overlimit = cbq_ovl_lowprio;
1361                 break;
1362         case TC_CBQ_OVL_DROP:
1363                 cl->overlimit = cbq_ovl_drop;
1364                 break;
1365         case TC_CBQ_OVL_RCLASSIC:
1366                 cl->overlimit = cbq_ovl_rclassic;
1367                 break;
1368         default:
1369                 return -EINVAL;
1370         }
1371         cl->penalty = (ovl->penalty*HZ)/1000;
1372         return 0;
1373 }
1374
1375 #ifdef CONFIG_NET_CLS_POLICE
1376 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1377 {
1378         cl->police = p->police;
1379
1380         if (cl->q->handle) {
1381                 if (p->police == TC_POLICE_RECLASSIFY)
1382                         cl->q->reshape_fail = cbq_reshape_fail;
1383                 else
1384                         cl->q->reshape_fail = NULL;
1385         }
1386         return 0;
1387 }
1388 #endif
1389
1390 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1391 {
1392         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1393         return 0;
1394 }
1395
1396 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1397 {
1398         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1399         struct rtattr *tb[TCA_CBQ_MAX];
1400         struct tc_ratespec *r;
1401
1402         if (rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0 ||
1403             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1404             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1405                 return -EINVAL;
1406
1407         if (tb[TCA_CBQ_LSSOPT-1] &&
1408             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1409                 return -EINVAL;
1410
1411         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1412
1413         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1414                 return -EINVAL;
1415
1416         q->link.refcnt = 1;
1417         q->link.sibling = &q->link;
1418         q->link.classid = sch->handle;
1419         q->link.qdisc = sch;
1420         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1421                 q->link.q = &noop_qdisc;
1422
1423         q->link.priority = TC_CBQ_MAXPRIO-1;
1424         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1425         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1426         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1427         q->link.overlimit = cbq_ovl_classic;
1428         q->link.allot = psched_mtu(sch->dev);
1429         q->link.quantum = q->link.allot;
1430         q->link.weight = q->link.R_tab->rate.rate;
1431
1432         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1433         q->link.avpkt = q->link.allot/2;
1434         q->link.minidle = -0x7FFFFFFF;
1435         q->link.stats.lock = &sch->dev->queue_lock;
1436
1437         init_timer(&q->wd_timer);
1438         q->wd_timer.data = (unsigned long)sch;
1439         q->wd_timer.function = cbq_watchdog;
1440         init_timer(&q->delay_timer);
1441         q->delay_timer.data = (unsigned long)sch;
1442         q->delay_timer.function = cbq_undelay;
1443         q->toplevel = TC_CBQ_MAXLEVEL;
1444         PSCHED_GET_TIME(q->now);
1445         q->now_rt = q->now;
1446
1447         cbq_link_class(&q->link);
1448
1449         if (tb[TCA_CBQ_LSSOPT-1])
1450                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1451
1452         cbq_addprio(q, &q->link);
1453         return 0;
1454 }
1455
1456 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1457 {
1458         unsigned char    *b = skb->tail;
1459
1460         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1461         return skb->len;
1462
1463 rtattr_failure:
1464         skb_trim(skb, b - skb->data);
1465         return -1;
1466 }
1467
1468 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1469 {
1470         unsigned char    *b = skb->tail;
1471         struct tc_cbq_lssopt opt;
1472
1473         opt.flags = 0;
1474         if (cl->borrow == NULL)
1475                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1476         if (cl->share == NULL)
1477                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1478         opt.ewma_log = cl->ewma_log;
1479         opt.level = cl->level;
1480         opt.avpkt = cl->avpkt;
1481         opt.maxidle = cl->maxidle;
1482         opt.minidle = (u32)(-cl->minidle);
1483         opt.offtime = cl->offtime;
1484         opt.change = ~0;
1485         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1486         return skb->len;
1487
1488 rtattr_failure:
1489         skb_trim(skb, b - skb->data);
1490         return -1;
1491 }
1492
1493 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1494 {
1495         unsigned char    *b = skb->tail;
1496         struct tc_cbq_wrropt opt;
1497
1498         opt.flags = 0;
1499         opt.allot = cl->allot;
1500         opt.priority = cl->priority+1;
1501         opt.cpriority = cl->cpriority+1;
1502         opt.weight = cl->weight;
1503         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1504         return skb->len;
1505
1506 rtattr_failure:
1507         skb_trim(skb, b - skb->data);
1508         return -1;
1509 }
1510
1511 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1512 {
1513         unsigned char    *b = skb->tail;
1514         struct tc_cbq_ovl opt;
1515
1516         opt.strategy = cl->ovl_strategy;
1517         opt.priority2 = cl->priority2+1;
1518         opt.penalty = (cl->penalty*1000)/HZ;
1519         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1520         return skb->len;
1521
1522 rtattr_failure:
1523         skb_trim(skb, b - skb->data);
1524         return -1;
1525 }
1526
1527 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1528 {
1529         unsigned char    *b = skb->tail;
1530         struct tc_cbq_fopt opt;
1531
1532         if (cl->split || cl->defmap) {
1533                 opt.split = cl->split ? cl->split->classid : 0;
1534                 opt.defmap = cl->defmap;
1535                 opt.defchange = ~0;
1536                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1537         }
1538         return skb->len;
1539
1540 rtattr_failure:
1541         skb_trim(skb, b - skb->data);
1542         return -1;
1543 }
1544
1545 #ifdef CONFIG_NET_CLS_POLICE
1546 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1547 {
1548         unsigned char    *b = skb->tail;
1549         struct tc_cbq_police opt;
1550
1551         if (cl->police) {
1552                 opt.police = cl->police;
1553                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1554         }
1555         return skb->len;
1556
1557 rtattr_failure:
1558         skb_trim(skb, b - skb->data);
1559         return -1;
1560 }
1561 #endif
1562
1563 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1564 {
1565         if (cbq_dump_lss(skb, cl) < 0 ||
1566             cbq_dump_rate(skb, cl) < 0 ||
1567             cbq_dump_wrr(skb, cl) < 0 ||
1568             cbq_dump_ovl(skb, cl) < 0 ||
1569 #ifdef CONFIG_NET_CLS_POLICE
1570             cbq_dump_police(skb, cl) < 0 ||
1571 #endif
1572             cbq_dump_fopt(skb, cl) < 0)
1573                 return -1;
1574         return 0;
1575 }
1576
1577 int cbq_copy_xstats(struct sk_buff *skb, struct tc_cbq_xstats *st)
1578 {
1579         RTA_PUT(skb, TCA_XSTATS, sizeof(*st), st);
1580         return 0;
1581
1582 rtattr_failure:
1583         return -1;
1584 }
1585
1586
1587 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1588 {
1589         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1590         unsigned char    *b = skb->tail;
1591         struct rtattr *rta;
1592
1593         rta = (struct rtattr*)b;
1594         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1595         if (cbq_dump_attr(skb, &q->link) < 0)
1596                 goto rtattr_failure;
1597         rta->rta_len = skb->tail - b;
1598         spin_lock_bh(&sch->dev->queue_lock);
1599         q->link.xstats.avgidle = q->link.avgidle;
1600         if (cbq_copy_xstats(skb, &q->link.xstats)) {
1601                 spin_unlock_bh(&sch->dev->queue_lock);
1602                 goto rtattr_failure;
1603         }
1604         spin_unlock_bh(&sch->dev->queue_lock);
1605         return skb->len;
1606
1607 rtattr_failure:
1608         skb_trim(skb, b - skb->data);
1609         return -1;
1610 }
1611
1612 static int
1613 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1614                struct sk_buff *skb, struct tcmsg *tcm)
1615 {
1616         struct cbq_sched_data *q = (struct cbq_sched_data*)sch->data;
1617         struct cbq_class *cl = (struct cbq_class*)arg;
1618         unsigned char    *b = skb->tail;
1619         struct rtattr *rta;
1620
1621         if (cl->tparent)
1622                 tcm->tcm_parent = cl->tparent->classid;
1623         else
1624                 tcm->tcm_parent = TC_H_ROOT;
1625         tcm->tcm_handle = cl->classid;
1626         tcm->tcm_info = cl->q->handle;
1627
1628         rta = (struct rtattr*)b;
1629         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1630         if (cbq_dump_attr(skb, cl) < 0)
1631                 goto rtattr_failure;
1632         rta->rta_len = skb->tail - b;
1633         cl->stats.qlen = cl->q->q.qlen;
1634         if (qdisc_copy_stats(skb, &cl->stats))
1635                 goto rtattr_failure;
1636         spin_lock_bh(&sch->dev->queue_lock);
1637         cl->xstats.avgidle = cl->avgidle;
1638         cl->xstats.undertime = 0;
1639         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1640                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1641         q->link.xstats.avgidle = q->link.avgidle;
1642         if (cbq_copy_xstats(skb, &cl->xstats)) {
1643                 spin_unlock_bh(&sch->dev->queue_lock);
1644                 goto rtattr_failure;
1645         }
1646         spin_unlock_bh(&sch->dev->queue_lock);
1647
1648         return skb->len;
1649
1650 rtattr_failure:
1651         skb_trim(skb, b - skb->data);
1652         return -1;
1653 }
1654
1655 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1656                      struct Qdisc **old)
1657 {
1658         struct cbq_class *cl = (struct cbq_class*)arg;
1659
1660         if (cl) {
1661                 if (new == NULL) {
1662                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1663                                 return -ENOBUFS;
1664                 } else {
1665 #ifdef CONFIG_NET_CLS_POLICE
1666                         if (cl->police == TC_POLICE_RECLASSIFY)
1667                                 new->reshape_fail = cbq_reshape_fail;
1668 #endif
1669                 }
1670                 sch_tree_lock(sch);
1671                 *old = cl->q;
1672                 cl->q = new;
1673                 sch->q.qlen -= (*old)->q.qlen;
1674                 qdisc_reset(*old);
1675                 sch_tree_unlock(sch);
1676
1677                 return 0;
1678         }
1679         return -ENOENT;
1680 }
1681
1682 static struct Qdisc *
1683 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1684 {
1685         struct cbq_class *cl = (struct cbq_class*)arg;
1686
1687         return cl ? cl->q : NULL;
1688 }
1689
1690 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1691 {
1692         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1693         struct cbq_class *cl = cbq_class_lookup(q, classid);
1694
1695         if (cl) {
1696                 cl->refcnt++;
1697                 return (unsigned long)cl;
1698         }
1699         return 0;
1700 }
1701
1702 static void cbq_destroy_filters(struct cbq_class *cl)
1703 {
1704         struct tcf_proto *tp;
1705
1706         while ((tp = cl->filter_list) != NULL) {
1707                 cl->filter_list = tp->next;
1708                 tcf_destroy(tp);
1709         }
1710 }
1711
1712 static void cbq_destroy_class(struct cbq_class *cl)
1713 {
1714         cbq_destroy_filters(cl);
1715         qdisc_destroy(cl->q);
1716         qdisc_put_rtab(cl->R_tab);
1717 #ifdef CONFIG_NET_ESTIMATOR
1718         qdisc_kill_estimator(&cl->stats);
1719 #endif
1720         kfree(cl);
1721 }
1722
1723 static void
1724 cbq_destroy(struct Qdisc* sch)
1725 {
1726         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1727         struct cbq_class *cl;
1728         unsigned h;
1729
1730 #ifdef CONFIG_NET_CLS_POLICE
1731         q->rx_class = NULL;
1732 #endif
1733         for (h = 0; h < 16; h++) {
1734                 for (cl = q->classes[h]; cl; cl = cl->next)
1735                         cbq_destroy_filters(cl);
1736         }
1737
1738         for (h = 0; h < 16; h++) {
1739                 struct cbq_class *next;
1740
1741                 for (cl = q->classes[h]; cl; cl = next) {
1742                         next = cl->next;
1743                         if (cl != &q->link)
1744                                 cbq_destroy_class(cl);
1745                 }
1746         }
1747
1748         qdisc_put_rtab(q->link.R_tab);
1749 }
1750
1751 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1752 {
1753         struct cbq_class *cl = (struct cbq_class*)arg;
1754
1755         if (--cl->refcnt == 0) {
1756 #ifdef CONFIG_NET_CLS_POLICE
1757                 struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1758
1759                 spin_lock_bh(&sch->dev->queue_lock);
1760                 if (q->rx_class == cl)
1761                         q->rx_class = NULL;
1762                 spin_unlock_bh(&sch->dev->queue_lock);
1763 #endif
1764
1765                 cbq_destroy_class(cl);
1766         }
1767 }
1768
1769 static int
1770 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1771                  unsigned long *arg)
1772 {
1773         int err;
1774         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1775         struct cbq_class *cl = (struct cbq_class*)*arg;
1776         struct rtattr *opt = tca[TCA_OPTIONS-1];
1777         struct rtattr *tb[TCA_CBQ_MAX];
1778         struct cbq_class *parent;
1779         struct qdisc_rate_table *rtab = NULL;
1780
1781         if (opt==NULL ||
1782             rtattr_parse(tb, TCA_CBQ_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1783                 return -EINVAL;
1784
1785         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1786             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1787                 return -EINVAL;
1788
1789         if (tb[TCA_CBQ_FOPT-1] &&
1790             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1791                 return -EINVAL;
1792
1793         if (tb[TCA_CBQ_RATE-1] &&
1794             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1795                         return -EINVAL;
1796
1797         if (tb[TCA_CBQ_LSSOPT-1] &&
1798             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1799                         return -EINVAL;
1800
1801         if (tb[TCA_CBQ_WRROPT-1] &&
1802             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1803                         return -EINVAL;
1804
1805 #ifdef CONFIG_NET_CLS_POLICE
1806         if (tb[TCA_CBQ_POLICE-1] &&
1807             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1808                         return -EINVAL;
1809 #endif
1810
1811         if (cl) {
1812                 /* Check parent */
1813                 if (parentid) {
1814                         if (cl->tparent && cl->tparent->classid != parentid)
1815                                 return -EINVAL;
1816                         if (!cl->tparent && parentid != TC_H_ROOT)
1817                                 return -EINVAL;
1818                 }
1819
1820                 if (tb[TCA_CBQ_RATE-1]) {
1821                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1822                         if (rtab == NULL)
1823                                 return -EINVAL;
1824                 }
1825
1826                 /* Change class parameters */
1827                 sch_tree_lock(sch);
1828
1829                 if (cl->next_alive != NULL)
1830                         cbq_deactivate_class(cl);
1831
1832                 if (rtab) {
1833                         rtab = xchg(&cl->R_tab, rtab);
1834                         qdisc_put_rtab(rtab);
1835                 }
1836
1837                 if (tb[TCA_CBQ_LSSOPT-1])
1838                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1839
1840                 if (tb[TCA_CBQ_WRROPT-1]) {
1841                         cbq_rmprio(q, cl);
1842                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1843                 }
1844
1845                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1846                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1847
1848 #ifdef CONFIG_NET_CLS_POLICE
1849                 if (tb[TCA_CBQ_POLICE-1])
1850                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1851 #endif
1852
1853                 if (tb[TCA_CBQ_FOPT-1])
1854                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1855
1856                 if (cl->q->q.qlen)
1857                         cbq_activate_class(cl);
1858
1859                 sch_tree_unlock(sch);
1860
1861 #ifdef CONFIG_NET_ESTIMATOR
1862                 if (tca[TCA_RATE-1]) {
1863                         qdisc_kill_estimator(&cl->stats);
1864                         qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1865                 }
1866 #endif
1867                 return 0;
1868         }
1869
1870         if (parentid == TC_H_ROOT)
1871                 return -EINVAL;
1872
1873         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1874             tb[TCA_CBQ_LSSOPT-1] == NULL)
1875                 return -EINVAL;
1876
1877         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1878         if (rtab == NULL)
1879                 return -EINVAL;
1880
1881         if (classid) {
1882                 err = -EINVAL;
1883                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1884                         goto failure;
1885         } else {
1886                 int i;
1887                 classid = TC_H_MAKE(sch->handle,0x8000);
1888
1889                 for (i=0; i<0x8000; i++) {
1890                         if (++q->hgenerator >= 0x8000)
1891                                 q->hgenerator = 1;
1892                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1893                                 break;
1894                 }
1895                 err = -ENOSR;
1896                 if (i >= 0x8000)
1897                         goto failure;
1898                 classid = classid|q->hgenerator;
1899         }
1900
1901         parent = &q->link;
1902         if (parentid) {
1903                 parent = cbq_class_lookup(q, parentid);
1904                 err = -EINVAL;
1905                 if (parent == NULL)
1906                         goto failure;
1907         }
1908
1909         err = -ENOBUFS;
1910         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1911         if (cl == NULL)
1912                 goto failure;
1913         memset(cl, 0, sizeof(*cl));
1914         cl->R_tab = rtab;
1915         rtab = NULL;
1916         cl->refcnt = 1;
1917         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1918                 cl->q = &noop_qdisc;
1919         cl->classid = classid;
1920         cl->tparent = parent;
1921         cl->qdisc = sch;
1922         cl->allot = parent->allot;
1923         cl->quantum = cl->allot;
1924         cl->weight = cl->R_tab->rate.rate;
1925         cl->stats.lock = &sch->dev->queue_lock;
1926
1927         sch_tree_lock(sch);
1928         cbq_link_class(cl);
1929         cl->borrow = cl->tparent;
1930         if (cl->tparent != &q->link)
1931                 cl->share = cl->tparent;
1932         cbq_adjust_levels(parent);
1933         cl->minidle = -0x7FFFFFFF;
1934         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1935         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1936         if (cl->ewma_log==0)
1937                 cl->ewma_log = q->link.ewma_log;
1938         if (cl->maxidle==0)
1939                 cl->maxidle = q->link.maxidle;
1940         if (cl->avpkt==0)
1941                 cl->avpkt = q->link.avpkt;
1942         cl->overlimit = cbq_ovl_classic;
1943         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1944                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1945 #ifdef CONFIG_NET_CLS_POLICE
1946         if (tb[TCA_CBQ_POLICE-1])
1947                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1948 #endif
1949         if (tb[TCA_CBQ_FOPT-1])
1950                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1951         sch_tree_unlock(sch);
1952
1953 #ifdef CONFIG_NET_ESTIMATOR
1954         if (tca[TCA_RATE-1])
1955                 qdisc_new_estimator(&cl->stats, tca[TCA_RATE-1]);
1956 #endif
1957
1958         *arg = (unsigned long)cl;
1959         return 0;
1960
1961 failure:
1962         qdisc_put_rtab(rtab);
1963         return err;
1964 }
1965
1966 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1967 {
1968         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
1969         struct cbq_class *cl = (struct cbq_class*)arg;
1970
1971         if (cl->filters || cl->children || cl == &q->link)
1972                 return -EBUSY;
1973
1974         sch_tree_lock(sch);
1975
1976         if (cl->next_alive)
1977                 cbq_deactivate_class(cl);
1978
1979         if (q->tx_borrowed == cl)
1980                 q->tx_borrowed = q->tx_class;
1981         if (q->tx_class == cl) {
1982                 q->tx_class = NULL;
1983                 q->tx_borrowed = NULL;
1984         }
1985 #ifdef CONFIG_NET_CLS_POLICE
1986         if (q->rx_class == cl)
1987                 q->rx_class = NULL;
1988 #endif
1989
1990         cbq_unlink_class(cl);
1991         cbq_adjust_levels(cl->tparent);
1992         cl->defmap = 0;
1993         cbq_sync_defmap(cl);
1994
1995         cbq_rmprio(q, cl);
1996         sch_tree_unlock(sch);
1997
1998         if (--cl->refcnt == 0)
1999                 cbq_destroy_class(cl);
2000
2001         return 0;
2002 }
2003
2004 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2005 {
2006         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2007         struct cbq_class *cl = (struct cbq_class *)arg;
2008
2009         if (cl == NULL)
2010                 cl = &q->link;
2011
2012         return &cl->filter_list;
2013 }
2014
2015 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2016                                      u32 classid)
2017 {
2018         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2019         struct cbq_class *p = (struct cbq_class*)parent;
2020         struct cbq_class *cl = cbq_class_lookup(q, classid);
2021
2022         if (cl) {
2023                 if (p && p->level <= cl->level)
2024                         return 0;
2025                 cl->filters++;
2026                 return (unsigned long)cl;
2027         }
2028         return 0;
2029 }
2030
2031 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2032 {
2033         struct cbq_class *cl = (struct cbq_class*)arg;
2034
2035         cl->filters--;
2036 }
2037
2038 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2039 {
2040         struct cbq_sched_data *q = (struct cbq_sched_data *)sch->data;
2041         unsigned h;
2042
2043         if (arg->stop)
2044                 return;
2045
2046         for (h = 0; h < 16; h++) {
2047                 struct cbq_class *cl;
2048
2049                 for (cl = q->classes[h]; cl; cl = cl->next) {
2050                         if (arg->count < arg->skip) {
2051                                 arg->count++;
2052                                 continue;
2053                         }
2054                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2055                                 arg->stop = 1;
2056                                 return;
2057                         }
2058                         arg->count++;
2059                 }
2060         }
2061 }
2062
2063 static struct Qdisc_class_ops cbq_class_ops = {
2064         .graft          =       cbq_graft,
2065         .leaf           =       cbq_leaf,
2066         .get            =       cbq_get,
2067         .put            =       cbq_put,
2068         .change         =       cbq_change_class,
2069         .delete         =       cbq_delete,
2070         .walk           =       cbq_walk,
2071         .tcf_chain      =       cbq_find_tcf,
2072         .bind_tcf       =       cbq_bind_filter,
2073         .unbind_tcf     =       cbq_unbind_filter,
2074         .dump           =       cbq_dump_class,
2075 };
2076
2077 static struct Qdisc_ops cbq_qdisc_ops = {
2078         .next           =       NULL,
2079         .cl_ops         =       &cbq_class_ops,
2080         .id             =       "cbq",
2081         .priv_size      =       sizeof(struct cbq_sched_data),
2082         .enqueue        =       cbq_enqueue,
2083         .dequeue        =       cbq_dequeue,
2084         .requeue        =       cbq_requeue,
2085         .drop           =       cbq_drop,
2086         .init           =       cbq_init,
2087         .reset          =       cbq_reset,
2088         .destroy        =       cbq_destroy,
2089         .change         =       NULL,
2090         .dump           =       cbq_dump,
2091         .owner          =       THIS_MODULE,
2092 };
2093
2094 static int __init cbq_module_init(void)
2095 {
2096         return register_qdisc(&cbq_qdisc_ops);
2097 }
2098 static void __exit cbq_module_exit(void)
2099 {
2100         unregister_qdisc(&cbq_qdisc_ops);
2101 }
2102 module_init(cbq_module_init)
2103 module_exit(cbq_module_exit)
2104 MODULE_LICENSE("GPL");