ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / sched / sch_htb.c
1 /* vim: ts=8 sw=8
2  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Martin Devera, <devik@cdi.cz>
10  *
11  * Credits (in time order) for older HTB versions:
12  *              Stef Coene <stef.coene@docum.org>
13  *                      HTB support at LARTC mailing list
14  *              Ondrej Kraus, <krauso@barr.cz> 
15  *                      found missing INIT_QDISC(htb)
16  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17  *                      helped a lot to locate nasty class stall bug
18  *              Andi Kleen, Jamal Hadi, Bert Hubert
19  *                      code review and helpful comments on shaping
20  *              Tomasz Wrona, <tw@eter.tym.pl>
21  *                      created test case so that I was able to fix nasty bug
22  *              Wilfried Weissmann
23  *                      spotted bug in dequeue code and helped with fix
24  *              Jiri Fojtasek
25  *                      fixed requeue routine
26  *              and many others. thanks.
27  *
28  * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29  */
30 #include <linux/config.h>
31 #include <linux/module.h>
32 #include <asm/uaccess.h>
33 #include <asm/system.h>
34 #include <asm/bitops.h>
35 #include <linux/types.h>
36 #include <linux/kernel.h>
37 #include <linux/sched.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/inet.h>
47 #include <linux/netdevice.h>
48 #include <linux/etherdevice.h>
49 #include <linux/notifier.h>
50 #include <net/ip.h>
51 #include <net/route.h>
52 #include <linux/skbuff.h>
53 #include <linux/list.h>
54 #include <linux/compiler.h>
55 #include <net/sock.h>
56 #include <net/pkt_sched.h>
57 #include <linux/rbtree.h>
58
59 /* HTB algorithm.
60     Author: devik@cdi.cz
61     ========================================================================
62     HTB is like TBF with multiple classes. It is also similar to CBQ because
63     it allows to assign priority to each class in hierarchy. 
64     In fact it is another implementation of Floyd's formal sharing.
65
66     Levels:
67     Each class is assigned level. Leaf has ALWAYS level 0 and root 
68     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
69     one less than their parent.
70 */
71
72 #define HTB_HSIZE 16    /* classid hash size */
73 #define HTB_EWMAC 2     /* rate average over HTB_EWMAC*HTB_HSIZE sec */
74 #define HTB_DEBUG 1     /* compile debugging support (activated by tc tool) */
75 #define HTB_RATECM 1    /* whether to use rate computer */
76 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
77 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
78 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
79 #define HTB_VER 0x30010 /* major must be matched with number suplied by TC as version */
80
81 #if HTB_VER >> 16 != TC_HTB_PROTOVER
82 #error "Mismatched sch_htb.c and pkt_sch.h"
83 #endif
84
85 /* debugging support; S is subsystem, these are defined:
86   0 - netlink messages
87   1 - enqueue
88   2 - drop & requeue
89   3 - dequeue main
90   4 - dequeue one prio DRR part
91   5 - dequeue class accounting
92   6 - class overlimit status computation
93   7 - hint tree
94   8 - event queue
95  10 - rate estimator
96  11 - classifier 
97  12 - fast dequeue cache
98
99  L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
100  q->debug uint32 contains 16 2-bit fields one for subsystem starting
101  from LSB
102  */
103 #ifdef HTB_DEBUG
104 #define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
105 #define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
106         printk(KERN_DEBUG FMT,##ARG)
107 #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
108 #define HTB_PASSQ q,
109 #define HTB_ARGQ struct htb_sched *q,
110 #define static
111 #undef __inline__
112 #define __inline__
113 #undef inline
114 #define inline
115 #define HTB_CMAGIC 0xFEFAFEF1
116 #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
117                 if ((N)->rb_color == -1) break; \
118                 rb_erase(N,R); \
119                 (N)->rb_color = -1; } while (0)
120 #else
121 #define HTB_DBG_COND(S,L) (0)
122 #define HTB_DBG(S,L,FMT,ARG...)
123 #define HTB_PASSQ
124 #define HTB_ARGQ
125 #define HTB_CHCL(cl)
126 #define htb_safe_rb_erase(N,R) rb_erase(N,R)
127 #endif
128
129
130 /* used internaly to keep status of single class */
131 enum htb_cmode {
132     HTB_CANT_SEND,              /* class can't send and can't borrow */
133     HTB_MAY_BORROW,             /* class can't send but may borrow */
134     HTB_CAN_SEND                /* class can send */
135 };
136
137 /* interior & leaf nodes; props specific to leaves are marked L: */
138 struct htb_class
139 {
140 #ifdef HTB_DEBUG
141         unsigned magic;
142 #endif
143     /* general class parameters */
144     u32 classid;
145     struct tc_stats     stats;  /* generic stats */
146     struct tc_htb_xstats xstats;/* our special stats */
147     int refcnt;                 /* usage count of this class */
148
149 #ifdef HTB_RATECM
150     /* rate measurement counters */
151     unsigned long rate_bytes,sum_bytes;
152     unsigned long rate_packets,sum_packets;
153 #endif
154
155     /* topology */
156     int level;                  /* our level (see above) */
157     struct htb_class *parent;   /* parent class */
158     struct list_head hlist;     /* classid hash list item */
159     struct list_head sibling;   /* sibling list item */
160     struct list_head children;  /* children list */
161
162     union {
163             struct htb_class_leaf {
164                     struct Qdisc *q;
165                     int prio;
166                     int aprio;  
167                     int quantum;
168                     int deficit[TC_HTB_MAXDEPTH];
169                     struct list_head drop_list;
170             } leaf;
171             struct htb_class_inner {
172                     struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
173                     struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
174             } inner;
175     } un;
176     struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
177     struct rb_node pq_node;              /* node for event queue */
178     unsigned long pq_key;       /* the same type as jiffies global */
179     
180     int prio_activity;          /* for which prios are we active */
181     enum htb_cmode cmode;       /* current mode of the class */
182
183     /* class attached filters */
184     struct tcf_proto *filter_list;
185     int filter_cnt;
186
187     int warned;         /* only one warning about non work conserving .. */
188
189     /* token bucket parameters */
190     struct qdisc_rate_table *rate;      /* rate table of the class itself */
191     struct qdisc_rate_table *ceil;      /* ceiling rate (limits borrows too) */
192     long buffer,cbuffer;                /* token bucket depth/rate */
193     long mbuffer;                       /* max wait time */
194     long tokens,ctokens;                /* current number of tokens */
195     psched_time_t t_c;                  /* checkpoint time */
196 };
197
198 /* TODO: maybe compute rate when size is too large .. or drop ? */
199 static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
200         int size)
201
202     int slot = size >> rate->rate.cell_log;
203     if (slot > 255) {
204         cl->xstats.giants++;
205         slot = 255;
206     }
207     return rate->data[slot];
208 }
209
210 struct htb_sched
211 {
212     struct list_head root;                      /* root classes list */
213     struct list_head hash[HTB_HSIZE];           /* hashed by classid */
214     struct list_head drops[TC_HTB_NUMPRIO];     /* active leaves (for drops) */
215     
216     /* self list - roots of self generating tree */
217     struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
218     int row_mask[TC_HTB_MAXDEPTH];
219     struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
220
221     /* self wait list - roots of wait PQs per row */
222     struct rb_root wait_pq[TC_HTB_MAXDEPTH];
223
224     /* time of nearest event per level (row) */
225     unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
226
227     /* cached value of jiffies in dequeue */
228     unsigned long jiffies;
229
230     /* whether we hit non-work conserving class during this dequeue; we use */
231     int nwc_hit;        /* this to disable mindelay complaint in dequeue */
232
233     int defcls;         /* class where unclassified flows go to */
234     u32 debug;          /* subsystem debug levels */
235
236     /* filters for qdisc itself */
237     struct tcf_proto *filter_list;
238     int filter_cnt;
239
240     int rate2quantum;           /* quant = rate / rate2quantum */
241     psched_time_t now;          /* cached dequeue time */
242     struct timer_list timer;    /* send delay timer */
243 #ifdef HTB_RATECM
244     struct timer_list rttim;    /* rate computer timer */
245     int recmp_bucket;           /* which hash bucket to recompute next */
246 #endif
247     
248     /* non shaped skbs; let them go directly thru */
249     struct sk_buff_head direct_queue;
250     int direct_qlen;  /* max qlen of above */
251
252     long direct_pkts;
253 };
254
255 /* compute hash of size HTB_HSIZE for given handle */
256 static __inline__ int htb_hash(u32 h) 
257 {
258 #if HTB_HSIZE != 16
259  #error "Declare new hash for your HTB_HSIZE"
260 #endif
261     h ^= h>>8;  /* stolen from cbq_hash */
262     h ^= h>>4;
263     return h & 0xf;
264 }
265
266 /* find class in global hash table using given handle */
267 static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
268 {
269         struct htb_sched *q = (struct htb_sched *)sch->data;
270         struct list_head *p;
271         if (TC_H_MAJ(handle) != sch->handle) 
272                 return NULL;
273         
274         list_for_each (p,q->hash+htb_hash(handle)) {
275                 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
276                 if (cl->classid == handle)
277                         return cl;
278         }
279         return NULL;
280 }
281
282 /**
283  * htb_classify - classify a packet into class
284  *
285  * It returns NULL if the packet should be dropped or -1 if the packet
286  * should be passed directly thru. In all other cases leaf class is returned.
287  * We allow direct class selection by classid in priority. The we examine
288  * filters in qdisc and in inner nodes (if higher filter points to the inner
289  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
290  * internal fifo (direct). These packets then go directly thru. If we still 
291  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
292  * then finish and return direct queue.
293  */
294 #define HTB_DIRECT (struct htb_class*)-1
295 static inline u32 htb_classid(struct htb_class *cl)
296 {
297         return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
298 }
299
300 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch)
301 {
302         struct htb_sched *q = (struct htb_sched *)sch->data;
303         struct htb_class *cl;
304         struct tcf_result res;
305         struct tcf_proto *tcf;
306         int result;
307
308         /* allow to select class by setting skb->priority to valid classid;
309            note that nfmark can be used too by attaching filter fw with no
310            rules in it */
311         if (skb->priority == sch->handle)
312                 return HTB_DIRECT;  /* X:0 (direct flow) selected */
313         if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
314                 return cl;
315
316         tcf = q->filter_list;
317         while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
318 #ifdef CONFIG_NET_CLS_POLICE
319                 if (result == TC_POLICE_SHOT)
320                         return NULL;
321 #endif
322                 if ((cl = (void*)res.class) == NULL) {
323                         if (res.classid == sch->handle)
324                                 return HTB_DIRECT;  /* X:0 (direct flow) */
325                         if ((cl = htb_find(res.classid,sch)) == NULL)
326                                 break; /* filter selected invalid classid */
327                 }
328                 if (!cl->level)
329                         return cl; /* we hit leaf; return it */
330
331                 /* we have got inner class; apply inner filter chain */
332                 tcf = cl->filter_list;
333         }
334         /* classification failed; try to use default class */
335         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
336         if (!cl || cl->level)
337                 return HTB_DIRECT; /* bad default .. this is safe bet */
338         return cl;
339 }
340
341 #ifdef HTB_DEBUG
342 static void htb_next_rb_node(struct rb_node **n);
343 #define HTB_DUMTREE(root,memb) if(root) { \
344         struct rb_node *n = (root)->rb_node; \
345         while (n->rb_left) n = n->rb_left; \
346         while (n) { \
347                 struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
348                 printk(" %x",cl->classid); htb_next_rb_node (&n); \
349         } }
350
351 static void htb_debug_dump (struct htb_sched *q)
352 {
353         int i,p;
354         printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
355         /* rows */
356         for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
357                 printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
358                 for (p=0;p<TC_HTB_NUMPRIO;p++) {
359                         if (!q->row[i][p].rb_node) continue;
360                         printk(" p%d:",p);
361                         HTB_DUMTREE(q->row[i]+p,node[p]);
362                 }
363                 printk("\n");
364         }
365         /* classes */
366         for (i = 0; i < HTB_HSIZE; i++) {
367                 struct list_head *l;
368                 list_for_each (l,q->hash+i) {
369                         struct htb_class *cl = list_entry(l,struct htb_class,hlist);
370                         long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
371                         printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
372                                         "pa=%x f:",
373                                 cl->classid,cl->cmode,cl->tokens,cl->ctokens,
374                                 cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
375                                 cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
376                         if (cl->level)
377                         for (p=0;p<TC_HTB_NUMPRIO;p++) {
378                                 if (!cl->un.inner.feed[p].rb_node) continue;
379                                 printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
380                                 HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
381                         }
382                         printk("\n");
383                 }
384         }
385 }
386 #endif
387 /**
388  * htb_add_to_id_tree - adds class to the round robin list
389  *
390  * Routine adds class to the list (actually tree) sorted by classid.
391  * Make sure that class is not already on such list for given prio.
392  */
393 static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
394                 struct htb_class *cl,int prio)
395 {
396         struct rb_node **p = &root->rb_node, *parent = NULL;
397         HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
398 #ifdef HTB_DEBUG
399         if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
400         HTB_CHCL(cl);
401         if (*p) {
402                 struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
403                 HTB_CHCL(x);
404         }
405 #endif
406         while (*p) {
407                 struct htb_class *c; parent = *p;
408                 c = rb_entry(parent, struct htb_class, node[prio]);
409                 HTB_CHCL(c);
410                 if (cl->classid > c->classid)
411                         p = &parent->rb_right;
412                 else 
413                         p = &parent->rb_left;
414         }
415         rb_link_node(&cl->node[prio], parent, p);
416         rb_insert_color(&cl->node[prio], root);
417 }
418
419 /**
420  * htb_add_to_wait_tree - adds class to the event queue with delay
421  *
422  * The class is added to priority event queue to indicate that class will
423  * change its mode in cl->pq_key microseconds. Make sure that class is not
424  * already in the queue.
425  */
426 static void htb_add_to_wait_tree (struct htb_sched *q,
427                 struct htb_class *cl,long delay,int debug_hint)
428 {
429         struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
430         HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
431 #ifdef HTB_DEBUG
432         if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
433         HTB_CHCL(cl);
434         if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
435                 printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
436 #endif
437         cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
438         if (cl->pq_key == q->jiffies)
439                 cl->pq_key++;
440
441         /* update the nearest event cache */
442         if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
443                 q->near_ev_cache[cl->level] = cl->pq_key;
444         
445         while (*p) {
446                 struct htb_class *c; parent = *p;
447                 c = rb_entry(parent, struct htb_class, pq_node);
448                 if (time_after_eq(cl->pq_key, c->pq_key))
449                         p = &parent->rb_right;
450                 else 
451                         p = &parent->rb_left;
452         }
453         rb_link_node(&cl->pq_node, parent, p);
454         rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
455 }
456
457 /**
458  * htb_next_rb_node - finds next node in binary tree
459  *
460  * When we are past last key we return NULL.
461  * Average complexity is 2 steps per call.
462  */
463 static void htb_next_rb_node(struct rb_node **n)
464 {
465         *n = rb_next(*n);
466 }
467
468 /**
469  * htb_add_class_to_row - add class to its row
470  *
471  * The class is added to row at priorities marked in mask.
472  * It does nothing if mask == 0.
473  */
474 static inline void htb_add_class_to_row(struct htb_sched *q, 
475                 struct htb_class *cl,int mask)
476 {
477         HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
478                         cl->classid,mask,q->row_mask[cl->level]);
479         HTB_CHCL(cl);
480         q->row_mask[cl->level] |= mask;
481         while (mask) {
482                 int prio = ffz(~mask);
483                 mask &= ~(1 << prio);
484                 htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
485         }
486 }
487
488 /**
489  * htb_remove_class_from_row - removes class from its row
490  *
491  * The class is removed from row at priorities marked in mask.
492  * It does nothing if mask == 0.
493  */
494 static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
495                 struct htb_class *cl,int mask)
496 {
497         int m = 0;
498         HTB_CHCL(cl);
499         while (mask) {
500                 int prio = ffz(~mask);
501                 mask &= ~(1 << prio);
502                 if (q->ptr[cl->level][prio] == cl->node+prio)
503                         htb_next_rb_node(q->ptr[cl->level]+prio);
504                 htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
505                 if (!q->row[cl->level][prio].rb_node) 
506                         m |= 1 << prio;
507         }
508         HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
509                         cl->classid,mask,q->row_mask[cl->level],m);
510         q->row_mask[cl->level] &= ~m;
511 }
512
513 /**
514  * htb_activate_prios - creates active classe's feed chain
515  *
516  * The class is connected to ancestors and/or appropriate rows
517  * for priorities it is participating on. cl->cmode must be new 
518  * (activated) mode. It does nothing if cl->prio_activity == 0.
519  */
520 static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
521 {
522         struct htb_class *p = cl->parent;
523         long m,mask = cl->prio_activity;
524         HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
525         HTB_CHCL(cl);
526
527         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
528                 HTB_CHCL(p);
529                 m = mask; while (m) {
530                         int prio = ffz(~m);
531                         m &= ~(1 << prio);
532                         
533                         if (p->un.inner.feed[prio].rb_node)
534                                 /* parent already has its feed in use so that
535                                    reset bit in mask as parent is already ok */
536                                 mask &= ~(1 << prio);
537                         
538                         htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
539                 }
540                 HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
541                                 p->classid,p->prio_activity,mask,p->cmode);
542                 p->prio_activity |= mask;
543                 cl = p; p = cl->parent;
544                 HTB_CHCL(cl);
545         }
546         if (cl->cmode == HTB_CAN_SEND && mask)
547                 htb_add_class_to_row(q,cl,mask);
548 }
549
550 /**
551  * htb_deactivate_prios - remove class from feed chain
552  *
553  * cl->cmode must represent old mode (before deactivation). It does 
554  * nothing if cl->prio_activity == 0. Class is removed from all feed
555  * chains and rows.
556  */
557 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
558 {
559         struct htb_class *p = cl->parent;
560         long m,mask = cl->prio_activity;
561         HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
562         HTB_CHCL(cl);
563
564         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
565                 m = mask; mask = 0; 
566                 while (m) {
567                         int prio = ffz(~m);
568                         m &= ~(1 << prio);
569                         
570                         if (p->un.inner.ptr[prio] == cl->node+prio)
571                                 htb_next_rb_node(p->un.inner.ptr + prio);
572                         
573                         htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
574                         
575                         if (!p->un.inner.feed[prio].rb_node) 
576                                 mask |= 1 << prio;
577                 }
578                 HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
579                                 p->classid,p->prio_activity,mask,p->cmode);
580                 p->prio_activity &= ~mask;
581                 cl = p; p = cl->parent;
582                 HTB_CHCL(cl);
583         }
584         if (cl->cmode == HTB_CAN_SEND && mask) 
585                 htb_remove_class_from_row(q,cl,mask);
586 }
587
588 /**
589  * htb_class_mode - computes and returns current class mode
590  *
591  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
592  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
593  * from now to time when cl will change its state. 
594  * Also it is worth to note that class mode doesn't change simply
595  * at cl->{c,}tokens == 0 but there can rather be hysteresis of 
596  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
597  * mode transitions per time unit. The speed gain is about 1/6.
598  */
599 static __inline__ enum htb_cmode 
600 htb_class_mode(struct htb_class *cl,long *diff)
601 {
602     long toks;
603
604     if ((toks = (cl->ctokens + *diff)) < (
605 #if HTB_HYSTERESIS
606             cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
607 #endif
608             0)) {
609             *diff = -toks;
610             return HTB_CANT_SEND;
611     }
612     if ((toks = (cl->tokens + *diff)) >= (
613 #if HTB_HYSTERESIS
614             cl->cmode == HTB_CAN_SEND ? -cl->buffer :
615 #endif
616             0))
617             return HTB_CAN_SEND;
618
619     *diff = -toks;
620     return HTB_MAY_BORROW;
621 }
622
623 /**
624  * htb_change_class_mode - changes classe's mode
625  *
626  * This should be the only way how to change classe's mode under normal
627  * cirsumstances. Routine will update feed lists linkage, change mode
628  * and add class to the wait event queue if appropriate. New mode should
629  * be different from old one and cl->pq_key has to be valid if changing
630  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
631  */
632 static void 
633 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
634
635         enum htb_cmode new_mode = htb_class_mode(cl,diff);
636         
637         HTB_CHCL(cl);
638         HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
639
640         if (new_mode == cl->cmode)
641                 return; 
642         
643         if (cl->prio_activity) { /* not necessary: speed optimization */
644                 if (cl->cmode != HTB_CANT_SEND) 
645                         htb_deactivate_prios(q,cl);
646                 cl->cmode = new_mode;
647                 if (new_mode != HTB_CANT_SEND) 
648                         htb_activate_prios(q,cl);
649         } else 
650                 cl->cmode = new_mode;
651 }
652
653 /**
654  * htb_activate - inserts leaf cl into appropriate active feeds 
655  *
656  * Routine learns (new) priority of leaf and activates feed chain
657  * for the prio. It can be called on already active leaf safely.
658  * It also adds leaf into droplist.
659  */
660 static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
661 {
662         BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
663         HTB_CHCL(cl);
664         if (!cl->prio_activity) {
665                 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
666                 htb_activate_prios(q,cl);
667                 list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
668         }
669 }
670
671 /**
672  * htb_deactivate - remove leaf cl from active feeds 
673  *
674  * Make sure that leaf is active. In the other words it can't be called
675  * with non-active leaf. It also removes class from the drop list.
676  */
677 static __inline__ void 
678 htb_deactivate(struct htb_sched *q,struct htb_class *cl)
679 {
680         BUG_TRAP(cl->prio_activity);
681         HTB_CHCL(cl);
682         htb_deactivate_prios(q,cl);
683         cl->prio_activity = 0;
684         list_del_init(&cl->un.leaf.drop_list);
685 }
686
687 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
688 {
689     struct htb_sched *q = (struct htb_sched *)sch->data;
690     struct htb_class *cl = htb_classify(skb,sch);
691
692     if (cl == HTB_DIRECT || !cl) {
693         /* enqueue to helper queue */
694         if (q->direct_queue.qlen < q->direct_qlen && cl) {
695             __skb_queue_tail(&q->direct_queue, skb);
696             q->direct_pkts++;
697         } else {
698             kfree_skb (skb);
699             sch->stats.drops++;
700             return NET_XMIT_DROP;
701         }
702     } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
703         sch->stats.drops++;
704         cl->stats.drops++;
705         return NET_XMIT_DROP;
706     } else {
707         cl->stats.packets++; cl->stats.bytes += skb->len;
708         htb_activate (q,cl);
709     }
710
711     sch->q.qlen++;
712     sch->stats.packets++; sch->stats.bytes += skb->len;
713     HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
714     return NET_XMIT_SUCCESS;
715 }
716
717 /* TODO: requeuing packet charges it to policers again !! */
718 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
719 {
720     struct htb_sched *q = (struct htb_sched *)sch->data;
721     struct htb_class *cl = htb_classify(skb,sch);
722     struct sk_buff *tskb;
723
724     if (cl == HTB_DIRECT || !cl) {
725         /* enqueue to helper queue */
726         if (q->direct_queue.qlen < q->direct_qlen && cl) {
727             __skb_queue_head(&q->direct_queue, skb);
728         } else {
729             __skb_queue_head(&q->direct_queue, skb);
730             tskb = __skb_dequeue_tail(&q->direct_queue);
731             kfree_skb (tskb);
732             sch->stats.drops++;
733             return NET_XMIT_CN; 
734         }
735     } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
736         sch->stats.drops++;
737         cl->stats.drops++;
738         return NET_XMIT_DROP;
739     } else 
740             htb_activate (q,cl);
741
742     sch->q.qlen++;
743     HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
744     return NET_XMIT_SUCCESS;
745 }
746
747 static void htb_timer(unsigned long arg)
748 {
749     struct Qdisc *sch = (struct Qdisc*)arg;
750     sch->flags &= ~TCQ_F_THROTTLED;
751     wmb();
752     netif_schedule(sch->dev);
753 }
754
755 #ifdef HTB_RATECM
756 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
757 static void htb_rate_timer(unsigned long arg)
758 {
759         struct Qdisc *sch = (struct Qdisc*)arg;
760         struct htb_sched *q = (struct htb_sched *)sch->data;
761         struct list_head *p;
762
763         /* lock queue so that we can muck with it */
764         HTB_QLOCK(sch);
765         HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
766
767         q->rttim.expires = jiffies + HZ;
768         add_timer(&q->rttim);
769
770         /* scan and recompute one bucket at time */
771         if (++q->recmp_bucket >= HTB_HSIZE) 
772                 q->recmp_bucket = 0;
773         list_for_each (p,q->hash+q->recmp_bucket) {
774                 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
775                 HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
776                                 cl->classid,cl->sum_bytes,cl->sum_packets);
777                 RT_GEN (cl->sum_bytes,cl->rate_bytes);
778                 RT_GEN (cl->sum_packets,cl->rate_packets);
779         }
780         HTB_QUNLOCK(sch);
781 }
782 #endif
783
784 /**
785  * htb_charge_class - charges amount "bytes" to leaf and ancestors
786  *
787  * Routine assumes that packet "bytes" long was dequeued from leaf cl
788  * borrowing from "level". It accounts bytes to ceil leaky bucket for
789  * leaf and all ancestors and to rate bucket for ancestors at levels
790  * "level" and higher. It also handles possible change of mode resulting
791  * from the update. Note that mode can also increase here (MAY_BORROW to
792  * CAN_SEND) because we can use more precise clock that event queue here.
793  * In such case we remove class from event queue first.
794  */
795 static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
796                 int level,int bytes)
797 {       
798         long toks,diff;
799         enum htb_cmode old_mode;
800         HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
801
802 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
803         if (toks > cl->B) toks = cl->B; \
804         toks -= L2T(cl, cl->R, bytes); \
805         if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
806         cl->T = toks
807
808         while (cl) {
809                 HTB_CHCL(cl);
810                 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
811 #ifdef HTB_DEBUG
812                 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
813                         if (net_ratelimit())
814                                 printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
815                                        cl->classid, diff,
816                                        (unsigned long long) q->now,
817                                        (unsigned long long) cl->t_c,
818                                        q->jiffies);
819                         diff = 1000;
820                 }
821 #endif
822                 if (cl->level >= level) {
823                         if (cl->level == level) cl->xstats.lends++;
824                         HTB_ACCNT (tokens,buffer,rate);
825                 } else {
826                         cl->xstats.borrows++;
827                         cl->tokens += diff; /* we moved t_c; update tokens */
828                 }
829                 HTB_ACCNT (ctokens,cbuffer,ceil);
830                 cl->t_c = q->now;
831                 HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
832
833                 old_mode = cl->cmode; diff = 0;
834                 htb_change_class_mode(q,cl,&diff);
835                 if (old_mode != cl->cmode) {
836                         if (old_mode != HTB_CAN_SEND)
837                                 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
838                         if (cl->cmode != HTB_CAN_SEND)
839                                 htb_add_to_wait_tree (q,cl,diff,1);
840                 }
841                 
842 #ifdef HTB_RATECM
843                 /* update rate counters */
844                 cl->sum_bytes += bytes; cl->sum_packets++;
845 #endif
846
847                 /* update byte stats except for leaves which are already updated */
848                 if (cl->level) {
849                         cl->stats.bytes += bytes;
850                         cl->stats.packets++;
851                 }
852                 cl = cl->parent;
853         }
854 }
855
856 /**
857  * htb_do_events - make mode changes to classes at the level
858  *
859  * Scans event queue for pending events and applies them. Returns jiffies to
860  * next pending event (0 for no event in pq).
861  * Note: Aplied are events whose have cl->pq_key <= jiffies.
862  */
863 static long htb_do_events(struct htb_sched *q,int level)
864 {
865         int i;
866         HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
867                         level,q->wait_pq[level].rb_node,q->row_mask[level]);
868         for (i = 0; i < 500; i++) {
869                 struct htb_class *cl;
870                 long diff;
871                 struct rb_node *p = q->wait_pq[level].rb_node;
872                 if (!p) return 0;
873                 while (p->rb_left) p = p->rb_left;
874
875                 cl = rb_entry(p, struct htb_class, pq_node);
876                 if (time_after(cl->pq_key, q->jiffies)) {
877                         HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
878                         return cl->pq_key - q->jiffies;
879                 }
880                 htb_safe_rb_erase(p,q->wait_pq+level);
881                 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
882 #ifdef HTB_DEBUG
883                 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
884                         if (net_ratelimit())
885                                 printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
886                                        cl->classid, diff,
887                                        (unsigned long long) q->now,
888                                        (unsigned long long) cl->t_c,
889                                        q->jiffies);
890                         diff = 1000;
891                 }
892 #endif
893                 htb_change_class_mode(q,cl,&diff);
894                 if (cl->cmode != HTB_CAN_SEND)
895                         htb_add_to_wait_tree (q,cl,diff,2);
896         }
897         if (net_ratelimit())
898                 printk(KERN_WARNING "htb: too many events !\n");
899         return HZ/10;
900 }
901
902 /**
903  * htb_lookup_leaf - returns next leaf class in DRR order
904  *
905  * Find leaf where current feed pointers points to.
906  */
907 static struct htb_class *
908 htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr)
909 {
910         int i;
911         struct {
912                 struct rb_node *root;
913                 struct rb_node **pptr;
914         } stk[TC_HTB_MAXDEPTH],*sp = stk;
915         
916         BUG_TRAP(tree->rb_node);
917         sp->root = tree->rb_node;
918         sp->pptr = pptr;
919
920         for (i = 0; i < 65535; i++) {
921                 if (!*sp->pptr) { /* we are at right end; rewind & go up */
922                         *sp->pptr = sp->root;
923                         while ((*sp->pptr)->rb_left) 
924                                 *sp->pptr = (*sp->pptr)->rb_left;
925                         if (sp > stk) {
926                                 sp--;
927                                 BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
928                                 htb_next_rb_node (sp->pptr);
929                         }
930                 } else {
931                         struct htb_class *cl;
932                         cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
933                         HTB_CHCL(cl);
934                         if (!cl->level) 
935                                 return cl;
936                         (++sp)->root = cl->un.inner.feed[prio].rb_node;
937                         sp->pptr = cl->un.inner.ptr+prio;
938                 }
939         }
940         BUG_TRAP(0);
941         return NULL;
942 }
943
944 /* dequeues packet at given priority and level; call only if
945    you are sure that there is active class at prio/level */
946 static struct sk_buff *
947 htb_dequeue_tree(struct htb_sched *q,int prio,int level)
948 {
949         struct sk_buff *skb = NULL;
950         struct htb_class *cl,*start;
951         /* look initial class up in the row */
952         start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
953         
954         do {
955 next:
956                 BUG_TRAP(cl); 
957                 if (!cl) return NULL;
958                 HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
959                                 prio,level,cl->classid,cl->un.leaf.deficit[level]);
960
961                 /* class can be empty - it is unlikely but can be true if leaf
962                    qdisc drops packets in enqueue routine or if someone used
963                    graft operation on the leaf since last dequeue; 
964                    simply deactivate and skip such class */
965                 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
966                         struct htb_class *next;
967                         htb_deactivate(q,cl);
968
969                         /* row/level might become empty */
970                         if ((q->row_mask[level] & (1 << prio)) == 0)
971                                 return NULL; 
972                         
973                         next = htb_lookup_leaf (q->row[level]+prio,
974                                         prio,q->ptr[level]+prio);
975                         if (cl == start) /* fix start if we just deleted it */
976                                 start = next;
977                         cl = next;
978                         goto next;
979                 }
980         
981                 if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
982                         break;
983                 if (!cl->warned) {
984                         printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
985                         cl->warned = 1;
986                 }
987                 q->nwc_hit++;
988                 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
989                 cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
990         } while (cl != start);
991
992         if (likely(skb != NULL)) {
993                 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
994                         HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
995                                 level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
996                         cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
997                         htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
998                 }
999                 /* this used to be after charge_class but this constelation
1000                    gives us slightly better performance */
1001                 if (!cl->un.leaf.q->q.qlen)
1002                         htb_deactivate (q,cl);
1003                 htb_charge_class (q,cl,level,skb->len);
1004         }
1005         return skb;
1006 }
1007
1008 static void htb_delay_by(struct Qdisc *sch,long delay)
1009 {
1010         struct htb_sched *q = (struct htb_sched *)sch->data;
1011         if (netif_queue_stopped(sch->dev)) return;
1012         if (delay <= 0) delay = 1;
1013         if (unlikely(delay > 5*HZ)) {
1014                 if (net_ratelimit())
1015                         printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
1016                 delay = 5*HZ;
1017         }
1018         /* why don't use jiffies here ? because expires can be in past */
1019         mod_timer(&q->timer, q->jiffies + delay);
1020         sch->flags |= TCQ_F_THROTTLED;
1021         sch->stats.overlimits++;
1022         HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
1023 }
1024
1025 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1026 {
1027         struct sk_buff *skb = NULL;
1028         struct htb_sched *q = (struct htb_sched *)sch->data;
1029         int level;
1030         long min_delay;
1031 #ifdef HTB_DEBUG
1032         int evs_used = 0;
1033 #endif
1034
1035         q->jiffies = jiffies;
1036         HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
1037                         sch->q.qlen);
1038
1039         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
1040         if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
1041                 sch->flags &= ~TCQ_F_THROTTLED;
1042                 sch->q.qlen--;
1043                 return skb;
1044         }
1045
1046         if (!sch->q.qlen) goto fin;
1047         PSCHED_GET_TIME(q->now);
1048
1049         min_delay = LONG_MAX;
1050         q->nwc_hit = 0;
1051         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
1052                 /* common case optimization - skip event handler quickly */
1053                 int m;
1054                 long delay;
1055                 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
1056                         delay = htb_do_events(q,level);
1057                         q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
1058 #ifdef HTB_DEBUG
1059                         evs_used++;
1060 #endif
1061                 } else
1062                         delay = q->near_ev_cache[level] - q->jiffies;   
1063                 
1064                 if (delay && min_delay > delay) 
1065                         min_delay = delay;
1066                 m = ~q->row_mask[level];
1067                 while (m != (int)(-1)) {
1068                         int prio = ffz (m);
1069                         m |= 1 << prio;
1070                         skb = htb_dequeue_tree(q,prio,level);
1071                         if (likely(skb != NULL)) {
1072                                 sch->q.qlen--;
1073                                 sch->flags &= ~TCQ_F_THROTTLED;
1074                                 goto fin;
1075                         }
1076                 }
1077         }
1078 #ifdef HTB_DEBUG
1079         if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
1080                 if (min_delay == LONG_MAX) {
1081                         printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
1082                                         evs_used,q->jiffies,jiffies);
1083                         htb_debug_dump(q);
1084                 } else 
1085                         printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
1086                                         "too small rate\n",min_delay);
1087         }
1088 #endif
1089         htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
1090 fin:
1091         HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
1092         return skb;
1093 }
1094
1095 /* try to drop from each class (by prio) until one succeed */
1096 static unsigned int htb_drop(struct Qdisc* sch)
1097 {
1098         struct htb_sched *q = (struct htb_sched *)sch->data;
1099         int prio;
1100
1101         for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1102                 struct list_head *p;
1103                 list_for_each (p,q->drops+prio) {
1104                         struct htb_class *cl = list_entry(p, struct htb_class,
1105                                                           un.leaf.drop_list);
1106                         unsigned int len;
1107                         if (cl->un.leaf.q->ops->drop && 
1108                                 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1109                                 sch->q.qlen--;
1110                                 if (!cl->un.leaf.q->q.qlen)
1111                                         htb_deactivate (q,cl);
1112                                 return len;
1113                         }
1114                 }
1115         }
1116         return 0;
1117 }
1118
1119 /* reset all classes */
1120 /* always caled under BH & queue lock */
1121 static void htb_reset(struct Qdisc* sch)
1122 {
1123         struct htb_sched *q = (struct htb_sched *)sch->data;
1124         int i;
1125         HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
1126
1127         for (i = 0; i < HTB_HSIZE; i++) {
1128                 struct list_head *p;
1129                 list_for_each (p,q->hash+i) {
1130                         struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1131                         if (cl->level)
1132                                 memset(&cl->un.inner,0,sizeof(cl->un.inner));
1133                         else {
1134                                 if (cl->un.leaf.q) 
1135                                         qdisc_reset(cl->un.leaf.q);
1136                                 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1137                         }
1138                         cl->prio_activity = 0;
1139                         cl->cmode = HTB_CAN_SEND;
1140 #ifdef HTB_DEBUG
1141                         cl->pq_node.rb_color = -1;
1142                         memset(cl->node,255,sizeof(cl->node));
1143 #endif
1144
1145                 }
1146         }
1147         sch->flags &= ~TCQ_F_THROTTLED;
1148         del_timer(&q->timer);
1149         __skb_queue_purge(&q->direct_queue);
1150         sch->q.qlen = 0;
1151         memset(q->row,0,sizeof(q->row));
1152         memset(q->row_mask,0,sizeof(q->row_mask));
1153         memset(q->wait_pq,0,sizeof(q->wait_pq));
1154         memset(q->ptr,0,sizeof(q->ptr));
1155         for (i = 0; i < TC_HTB_NUMPRIO; i++)
1156                 INIT_LIST_HEAD(q->drops+i);
1157 }
1158
1159 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1160 {
1161         struct htb_sched *q = (struct htb_sched*)sch->data;
1162         struct rtattr *tb[TCA_HTB_INIT];
1163         struct tc_htb_glob *gopt;
1164         int i;
1165 #ifdef HTB_DEBUG
1166         printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
1167                           HTB_VER >> 16,HTB_VER & 0xffff);
1168 #endif
1169         if (!opt || rtattr_parse(tb, TCA_HTB_INIT, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1170                         tb[TCA_HTB_INIT-1] == NULL ||
1171                         RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
1172                 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1173                 return -EINVAL;
1174         }
1175         gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
1176         if (gopt->version != HTB_VER >> 16) {
1177                 printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1178                                 HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
1179                 return -EINVAL;
1180         }
1181         memset(q,0,sizeof(*q));
1182         q->debug = gopt->debug;
1183         HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
1184
1185         INIT_LIST_HEAD(&q->root);
1186         for (i = 0; i < HTB_HSIZE; i++)
1187                 INIT_LIST_HEAD(q->hash+i);
1188         for (i = 0; i < TC_HTB_NUMPRIO; i++)
1189                 INIT_LIST_HEAD(q->drops+i);
1190
1191         init_timer(&q->timer);
1192         skb_queue_head_init(&q->direct_queue);
1193
1194         q->direct_qlen = sch->dev->tx_queue_len;
1195         if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1196                 q->direct_qlen = 2;
1197         q->timer.function = htb_timer;
1198         q->timer.data = (unsigned long)sch;
1199
1200 #ifdef HTB_RATECM
1201         init_timer(&q->rttim);
1202         q->rttim.function = htb_rate_timer;
1203         q->rttim.data = (unsigned long)sch;
1204         q->rttim.expires = jiffies + HZ;
1205         add_timer(&q->rttim);
1206 #endif
1207         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1208                 q->rate2quantum = 1;
1209         q->defcls = gopt->defcls;
1210
1211         return 0;
1212 }
1213
1214 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1215 {
1216         struct htb_sched *q = (struct htb_sched*)sch->data;
1217         unsigned char    *b = skb->tail;
1218         struct rtattr *rta;
1219         struct tc_htb_glob gopt;
1220         HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
1221         /* stats */
1222         HTB_QLOCK(sch);
1223         gopt.direct_pkts = q->direct_pkts;
1224
1225 #ifdef HTB_DEBUG
1226         if (HTB_DBG_COND(0,2))
1227                 htb_debug_dump(q);
1228 #endif
1229         gopt.version = HTB_VER;
1230         gopt.rate2quantum = q->rate2quantum;
1231         gopt.defcls = q->defcls;
1232         gopt.debug = q->debug;
1233         rta = (struct rtattr*)b;
1234         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1235         RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1236         rta->rta_len = skb->tail - b;
1237         sch->stats.qlen = sch->q.qlen;
1238         RTA_PUT(skb, TCA_STATS, sizeof(sch->stats), &sch->stats);
1239         HTB_QUNLOCK(sch);
1240         return skb->len;
1241 rtattr_failure:
1242         HTB_QUNLOCK(sch);
1243         skb_trim(skb, skb->tail - skb->data);
1244         return -1;
1245 }
1246
1247 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1248         struct sk_buff *skb, struct tcmsg *tcm)
1249 {
1250 #ifdef HTB_DEBUG
1251         struct htb_sched *q = (struct htb_sched*)sch->data;
1252 #endif
1253         struct htb_class *cl = (struct htb_class*)arg;
1254         unsigned char    *b = skb->tail;
1255         struct rtattr *rta;
1256         struct tc_htb_opt opt;
1257
1258         HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
1259
1260         HTB_QLOCK(sch);
1261         tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1262         tcm->tcm_handle = cl->classid;
1263         if (!cl->level && cl->un.leaf.q) {
1264                 tcm->tcm_info = cl->un.leaf.q->handle;
1265                 cl->stats.qlen = cl->un.leaf.q->q.qlen;
1266         }
1267
1268         rta = (struct rtattr*)b;
1269         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1270
1271         memset (&opt,0,sizeof(opt));
1272
1273         opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
1274         opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
1275         opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
1276         opt.level = cl->level; 
1277         RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1278         rta->rta_len = skb->tail - b;
1279
1280 #ifdef HTB_RATECM
1281         cl->stats.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
1282         cl->stats.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
1283 #endif
1284
1285         cl->xstats.tokens = cl->tokens;
1286         cl->xstats.ctokens = cl->ctokens;
1287         RTA_PUT(skb, TCA_STATS, sizeof(cl->stats), &cl->stats);
1288         RTA_PUT(skb, TCA_XSTATS, sizeof(cl->xstats), &cl->xstats);
1289         HTB_QUNLOCK(sch);
1290         return skb->len;
1291 rtattr_failure:
1292         HTB_QUNLOCK(sch);
1293         skb_trim(skb, b - skb->data);
1294         return -1;
1295 }
1296
1297 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1298         struct Qdisc **old)
1299 {
1300         struct htb_class *cl = (struct htb_class*)arg;
1301
1302         if (cl && !cl->level) {
1303                 if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
1304                                         &pfifo_qdisc_ops)) == NULL)
1305                                         return -ENOBUFS;
1306                 sch_tree_lock(sch);
1307                 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1308                         if (cl->prio_activity)
1309                                 htb_deactivate ((struct htb_sched*)sch->data,cl);
1310
1311                         /* TODO: is it correct ? Why CBQ doesn't do it ? */
1312                         sch->q.qlen -= (*old)->q.qlen;  
1313                         qdisc_reset(*old);
1314                 }
1315                 sch_tree_unlock(sch);
1316                 return 0;
1317         }
1318         return -ENOENT;
1319 }
1320
1321 static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
1322 {
1323         struct htb_class *cl = (struct htb_class*)arg;
1324         return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1325 }
1326
1327 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1328 {
1329 #ifdef HTB_DEBUG
1330         struct htb_sched *q = (struct htb_sched *)sch->data;
1331 #endif
1332         struct htb_class *cl = htb_find(classid,sch);
1333         HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
1334         if (cl) 
1335                 cl->refcnt++;
1336         return (unsigned long)cl;
1337 }
1338
1339 static void htb_destroy_filters(struct tcf_proto **fl)
1340 {
1341         struct tcf_proto *tp;
1342
1343         while ((tp = *fl) != NULL) {
1344                 *fl = tp->next;
1345                 tcf_destroy(tp);
1346         }
1347 }
1348
1349 static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
1350 {
1351         struct htb_sched *q = (struct htb_sched *)sch->data;
1352         HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
1353         if (!cl->level) {
1354                 BUG_TRAP(cl->un.leaf.q);
1355                 sch->q.qlen -= cl->un.leaf.q->q.qlen;
1356                 qdisc_destroy(cl->un.leaf.q);
1357         }
1358         qdisc_put_rtab(cl->rate);
1359         qdisc_put_rtab(cl->ceil);
1360         
1361 #ifdef CONFIG_NET_ESTIMATOR
1362         qdisc_kill_estimator(&cl->stats);
1363 #endif
1364         htb_destroy_filters (&cl->filter_list);
1365         
1366         while (!list_empty(&cl->children)) 
1367                 htb_destroy_class (sch,list_entry(cl->children.next,
1368                                         struct htb_class,sibling));
1369
1370         /* note: this delete may happen twice (see htb_delete) */
1371         list_del(&cl->hlist);
1372         list_del(&cl->sibling);
1373         
1374         if (cl->prio_activity)
1375                 htb_deactivate (q,cl);
1376         
1377         if (cl->cmode != HTB_CAN_SEND)
1378                 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
1379         
1380         kfree(cl);
1381 }
1382
1383 /* always caled under BH & queue lock */
1384 static void htb_destroy(struct Qdisc* sch)
1385 {
1386         struct htb_sched *q = (struct htb_sched *)sch->data;
1387         HTB_DBG(0,1,"htb_destroy q=%p\n",q);
1388
1389         del_timer_sync (&q->timer);
1390 #ifdef HTB_RATECM
1391         del_timer_sync (&q->rttim);
1392 #endif
1393         /* This line used to be after htb_destroy_class call below
1394            and surprisingly it worked in 2.4. But it must precede it 
1395            because filter need its target class alive to be able to call
1396            unbind_filter on it (without Oops). */
1397         htb_destroy_filters(&q->filter_list);
1398         
1399         while (!list_empty(&q->root)) 
1400                 htb_destroy_class (sch,list_entry(q->root.next,
1401                                         struct htb_class,sibling));
1402
1403         __skb_queue_purge(&q->direct_queue);
1404 }
1405
1406 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1407 {
1408         struct htb_sched *q = (struct htb_sched *)sch->data;
1409         struct htb_class *cl = (struct htb_class*)arg;
1410         HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1411
1412         // TODO: why don't allow to delete subtree ? references ? does
1413         // tc subsys quarantee us that in htb_destroy it holds no class
1414         // refs so that we can remove children safely there ?
1415         if (!list_empty(&cl->children) || cl->filter_cnt)
1416                 return -EBUSY;
1417         
1418         sch_tree_lock(sch);
1419         
1420         /* delete from hash and active; remainder in destroy_class */
1421         list_del_init(&cl->hlist);
1422         if (cl->prio_activity)
1423                 htb_deactivate (q,cl);
1424
1425         if (--cl->refcnt == 0)
1426                 htb_destroy_class(sch,cl);
1427
1428         sch_tree_unlock(sch);
1429         return 0;
1430 }
1431
1432 static void htb_put(struct Qdisc *sch, unsigned long arg)
1433 {
1434 #ifdef HTB_DEBUG
1435         struct htb_sched *q = (struct htb_sched *)sch->data;
1436 #endif
1437         struct htb_class *cl = (struct htb_class*)arg;
1438         HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1439
1440         if (--cl->refcnt == 0)
1441                 htb_destroy_class(sch,cl);
1442 }
1443
1444 static int htb_change_class(struct Qdisc *sch, u32 classid, 
1445                 u32 parentid, struct rtattr **tca, unsigned long *arg)
1446 {
1447         int err = -EINVAL;
1448         struct htb_sched *q = (struct htb_sched *)sch->data;
1449         struct htb_class *cl = (struct htb_class*)*arg,*parent;
1450         struct rtattr *opt = tca[TCA_OPTIONS-1];
1451         struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1452         struct rtattr *tb[TCA_HTB_RTAB];
1453         struct tc_htb_opt *hopt;
1454
1455         /* extract all subattrs from opt attr */
1456         if (!opt || rtattr_parse(tb, TCA_HTB_RTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1457                         tb[TCA_HTB_PARMS-1] == NULL ||
1458                         RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
1459                 goto failure;
1460         
1461         parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
1462
1463         hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
1464         HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
1465         rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
1466         ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
1467         if (!rtab || !ctab) goto failure;
1468
1469         if (!cl) { /* new class */
1470                 struct Qdisc *new_q;
1471                 /* check for valid classid */
1472                 if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
1473                         goto failure;
1474
1475                 /* check maximal depth */
1476                 if (parent && parent->parent && parent->parent->level < 2) {
1477                         printk(KERN_ERR "htb: tree is too deep\n");
1478                         goto failure;
1479                 }
1480                 err = -ENOBUFS;
1481                 if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1482                         goto failure;
1483                 
1484                 memset(cl, 0, sizeof(*cl));
1485                 cl->refcnt = 1;
1486                 INIT_LIST_HEAD(&cl->sibling);
1487                 INIT_LIST_HEAD(&cl->hlist);
1488                 INIT_LIST_HEAD(&cl->children);
1489                 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1490 #ifdef HTB_DEBUG
1491                 cl->magic = HTB_CMAGIC;
1492 #endif
1493
1494                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1495                    so that can't be used inside of sch_tree_lock
1496                    -- thanks to Karlis Peisenieks */
1497                 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1498                 sch_tree_lock(sch);
1499                 if (parent && !parent->level) {
1500                         /* turn parent into inner node */
1501                         sch->q.qlen -= parent->un.leaf.q->q.qlen;
1502                         qdisc_destroy (parent->un.leaf.q);
1503                         if (parent->prio_activity) 
1504                                 htb_deactivate (q,parent);
1505
1506                         /* remove from evt list because of level change */
1507                         if (parent->cmode != HTB_CAN_SEND) {
1508                                 htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
1509                                 parent->cmode = HTB_CAN_SEND;
1510                         }
1511                         parent->level = (parent->parent ? parent->parent->level
1512                                         : TC_HTB_MAXDEPTH) - 1;
1513                         memset (&parent->un.inner,0,sizeof(parent->un.inner));
1514                 }
1515                 /* leaf (we) needs elementary qdisc */
1516                 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1517
1518                 cl->classid = classid; cl->parent = parent;
1519
1520                 /* set class to be in HTB_CAN_SEND state */
1521                 cl->tokens = hopt->buffer;
1522                 cl->ctokens = hopt->cbuffer;
1523                 cl->mbuffer = 60000000; /* 1min */
1524                 PSCHED_GET_TIME(cl->t_c);
1525                 cl->cmode = HTB_CAN_SEND;
1526
1527                 /* attach to the hash list and parent's family */
1528                 list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
1529                 list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
1530 #ifdef HTB_DEBUG
1531                 { 
1532                         int i;
1533                         for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
1534                         cl->pq_node.rb_color = -1;
1535                 }
1536 #endif
1537         } else sch_tree_lock(sch);
1538
1539         /* it used to be a nasty bug here, we have to check that node
1540            is really leaf before changing cl->un.leaf ! */
1541         if (!cl->level) {
1542                 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1543                 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1544                         printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
1545                         cl->un.leaf.quantum = 1000;
1546                 }
1547                 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1548                         printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
1549                         cl->un.leaf.quantum = 200000;
1550                 }
1551                 if (hopt->quantum)
1552                         cl->un.leaf.quantum = hopt->quantum;
1553                 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1554                         cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1555         }
1556
1557         cl->buffer = hopt->buffer;
1558         cl->cbuffer = hopt->cbuffer;
1559         if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
1560         if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
1561         sch_tree_unlock(sch);
1562
1563         *arg = (unsigned long)cl;
1564         return 0;
1565
1566 failure:
1567         if (rtab) qdisc_put_rtab(rtab);
1568         if (ctab) qdisc_put_rtab(ctab);
1569         return err;
1570 }
1571
1572 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1573 {
1574         struct htb_sched *q = (struct htb_sched *)sch->data;
1575         struct htb_class *cl = (struct htb_class *)arg;
1576         struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1577         HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
1578         return fl;
1579 }
1580
1581 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1582         u32 classid)
1583 {
1584         struct htb_sched *q = (struct htb_sched *)sch->data;
1585         struct htb_class *cl = htb_find (classid,sch);
1586         HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
1587         /*if (cl && !cl->level) return 0;
1588           The line above used to be there to prevent attaching filters to 
1589           leaves. But at least tc_index filter uses this just to get class 
1590           for other reasons so that we have to allow for it.
1591           ----
1592           19.6.2002 As Werner explained it is ok - bind filter is just
1593           another way to "lock" the class - unlike "get" this lock can
1594           be broken by class during destroy IIUC.
1595          */
1596         if (cl) 
1597                 cl->filter_cnt++; 
1598         else 
1599                 q->filter_cnt++;
1600         return (unsigned long)cl;
1601 }
1602
1603 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1604 {
1605         struct htb_sched *q = (struct htb_sched *)sch->data;
1606         struct htb_class *cl = (struct htb_class *)arg;
1607         HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
1608         if (cl) 
1609                 cl->filter_cnt--; 
1610         else 
1611                 q->filter_cnt--;
1612 }
1613
1614 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1615 {
1616         struct htb_sched *q = (struct htb_sched *)sch->data;
1617         int i;
1618
1619         if (arg->stop)
1620                 return;
1621
1622         for (i = 0; i < HTB_HSIZE; i++) {
1623                 struct list_head *p;
1624                 list_for_each (p,q->hash+i) {
1625                         struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1626                         if (arg->count < arg->skip) {
1627                                 arg->count++;
1628                                 continue;
1629                         }
1630                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1631                                 arg->stop = 1;
1632                                 return;
1633                         }
1634                         arg->count++;
1635                 }
1636         }
1637 }
1638
1639 static struct Qdisc_class_ops htb_class_ops = {
1640         .graft          =       htb_graft,
1641         .leaf           =       htb_leaf,
1642         .get            =       htb_get,
1643         .put            =       htb_put,
1644         .change         =       htb_change_class,
1645         .delete         =       htb_delete,
1646         .walk           =       htb_walk,
1647         .tcf_chain      =       htb_find_tcf,
1648         .bind_tcf       =       htb_bind_filter,
1649         .unbind_tcf     =       htb_unbind_filter,
1650         .dump           =       htb_dump_class,
1651 };
1652
1653 static struct Qdisc_ops htb_qdisc_ops = {
1654         .next           =       NULL,
1655         .cl_ops         =       &htb_class_ops,
1656         .id             =       "htb",
1657         .priv_size      =       sizeof(struct htb_sched),
1658         .enqueue        =       htb_enqueue,
1659         .dequeue        =       htb_dequeue,
1660         .requeue        =       htb_requeue,
1661         .drop           =       htb_drop,
1662         .init           =       htb_init,
1663         .reset          =       htb_reset,
1664         .destroy        =       htb_destroy,
1665         .change         =       NULL /* htb_change */,
1666         .dump           =       htb_dump,
1667         .owner          =       THIS_MODULE,
1668 };
1669
1670 static int __init htb_module_init(void)
1671 {
1672     return register_qdisc(&htb_qdisc_ops);
1673 }
1674 static void __exit htb_module_exit(void) 
1675 {
1676     unregister_qdisc(&htb_qdisc_ops);
1677 }
1678 module_init(htb_module_init)
1679 module_exit(htb_module_exit)
1680 MODULE_LICENSE("GPL");