Merge to Fedora kernel-2.6.7-1.441
[linux-2.6.git] / net / sched / sch_htb.c
1 /* vim: ts=8 sw=8
2  * net/sched/sch_htb.c  Hierarchical token bucket, feed tree version
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Martin Devera, <devik@cdi.cz>
10  *
11  * Credits (in time order) for older HTB versions:
12  *              Stef Coene <stef.coene@docum.org>
13  *                      HTB support at LARTC mailing list
14  *              Ondrej Kraus, <krauso@barr.cz> 
15  *                      found missing INIT_QDISC(htb)
16  *              Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17  *                      helped a lot to locate nasty class stall bug
18  *              Andi Kleen, Jamal Hadi, Bert Hubert
19  *                      code review and helpful comments on shaping
20  *              Tomasz Wrona, <tw@eter.tym.pl>
21  *                      created test case so that I was able to fix nasty bug
22  *              Wilfried Weissmann
23  *                      spotted bug in dequeue code and helped with fix
24  *              Jiri Fojtasek
25  *                      fixed requeue routine
26  *              and many others. thanks.
27  *
28  * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $
29  */
30 #include <linux/config.h>
31 #include <linux/module.h>
32 #include <asm/uaccess.h>
33 #include <asm/system.h>
34 #include <asm/bitops.h>
35 #include <linux/types.h>
36 #include <linux/kernel.h>
37 #include <linux/sched.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/inet.h>
47 #include <linux/netdevice.h>
48 #include <linux/etherdevice.h>
49 #include <linux/notifier.h>
50 #include <net/ip.h>
51 #include <net/route.h>
52 #include <linux/skbuff.h>
53 #include <linux/list.h>
54 #include <linux/compiler.h>
55 #include <net/sock.h>
56 #include <net/pkt_sched.h>
57 #include <linux/rbtree.h>
58
59 /* HTB algorithm.
60     Author: devik@cdi.cz
61     ========================================================================
62     HTB is like TBF with multiple classes. It is also similar to CBQ because
63     it allows to assign priority to each class in hierarchy. 
64     In fact it is another implementation of Floyd's formal sharing.
65
66     Levels:
67     Each class is assigned level. Leaf has ALWAYS level 0 and root 
68     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
69     one less than their parent.
70 */
71
72 #define HTB_HSIZE 16    /* classid hash size */
73 #define HTB_EWMAC 2     /* rate average over HTB_EWMAC*HTB_HSIZE sec */
74 #define HTB_DEBUG 1     /* compile debugging support (activated by tc tool) */
75 #define HTB_RATECM 1    /* whether to use rate computer */
76 #define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */
77 #define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock)
78 #define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock)
79 #define HTB_VER 0x30010 /* major must be matched with number suplied by TC as version */
80
81 #if HTB_VER >> 16 != TC_HTB_PROTOVER
82 #error "Mismatched sch_htb.c and pkt_sch.h"
83 #endif
84
85 /* debugging support; S is subsystem, these are defined:
86   0 - netlink messages
87   1 - enqueue
88   2 - drop & requeue
89   3 - dequeue main
90   4 - dequeue one prio DRR part
91   5 - dequeue class accounting
92   6 - class overlimit status computation
93   7 - hint tree
94   8 - event queue
95  10 - rate estimator
96  11 - classifier 
97  12 - fast dequeue cache
98
99  L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full
100  q->debug uint32 contains 16 2-bit fields one for subsystem starting
101  from LSB
102  */
103 #ifdef HTB_DEBUG
104 #define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L)
105 #define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \
106         printk(KERN_DEBUG FMT,##ARG)
107 #define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC)
108 #define HTB_PASSQ q,
109 #define HTB_ARGQ struct htb_sched *q,
110 #define static
111 #undef __inline__
112 #define __inline__
113 #undef inline
114 #define inline
115 #define HTB_CMAGIC 0xFEFAFEF1
116 #define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \
117                 if ((N)->rb_color == -1) break; \
118                 rb_erase(N,R); \
119                 (N)->rb_color = -1; } while (0)
120 #else
121 #define HTB_DBG_COND(S,L) (0)
122 #define HTB_DBG(S,L,FMT,ARG...)
123 #define HTB_PASSQ
124 #define HTB_ARGQ
125 #define HTB_CHCL(cl)
126 #define htb_safe_rb_erase(N,R) rb_erase(N,R)
127 #endif
128
129
130 /* used internaly to keep status of single class */
131 enum htb_cmode {
132     HTB_CANT_SEND,              /* class can't send and can't borrow */
133     HTB_MAY_BORROW,             /* class can't send but may borrow */
134     HTB_CAN_SEND                /* class can send */
135 };
136
137 /* interior & leaf nodes; props specific to leaves are marked L: */
138 struct htb_class
139 {
140 #ifdef HTB_DEBUG
141         unsigned magic;
142 #endif
143     /* general class parameters */
144     u32 classid;
145     struct tc_stats     stats;  /* generic stats */
146     struct tc_htb_xstats xstats;/* our special stats */
147     int refcnt;                 /* usage count of this class */
148
149 #ifdef HTB_RATECM
150     /* rate measurement counters */
151     unsigned long rate_bytes,sum_bytes;
152     unsigned long rate_packets,sum_packets;
153 #endif
154
155     /* topology */
156     int level;                  /* our level (see above) */
157     struct htb_class *parent;   /* parent class */
158     struct list_head hlist;     /* classid hash list item */
159     struct list_head sibling;   /* sibling list item */
160     struct list_head children;  /* children list */
161
162     union {
163             struct htb_class_leaf {
164                     struct Qdisc *q;
165                     int prio;
166                     int aprio;  
167                     int quantum;
168                     int deficit[TC_HTB_MAXDEPTH];
169                     struct list_head drop_list;
170             } leaf;
171             struct htb_class_inner {
172                     struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
173                     struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
174             } inner;
175     } un;
176     struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
177     struct rb_node pq_node;              /* node for event queue */
178     unsigned long pq_key;       /* the same type as jiffies global */
179     
180     int prio_activity;          /* for which prios are we active */
181     enum htb_cmode cmode;       /* current mode of the class */
182
183     /* class attached filters */
184     struct tcf_proto *filter_list;
185     int filter_cnt;
186
187     int warned;         /* only one warning about non work conserving .. */
188
189     /* token bucket parameters */
190     struct qdisc_rate_table *rate;      /* rate table of the class itself */
191     struct qdisc_rate_table *ceil;      /* ceiling rate (limits borrows too) */
192     long buffer,cbuffer;                /* token bucket depth/rate */
193     long mbuffer;                       /* max wait time */
194     long tokens,ctokens;                /* current number of tokens */
195     psched_time_t t_c;                  /* checkpoint time */
196 };
197
198 /* TODO: maybe compute rate when size is too large .. or drop ? */
199 static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate,
200         int size)
201
202     int slot = size >> rate->rate.cell_log;
203     if (slot > 255) {
204         cl->xstats.giants++;
205         slot = 255;
206     }
207     return rate->data[slot];
208 }
209
210 struct htb_sched
211 {
212     struct list_head root;                      /* root classes list */
213     struct list_head hash[HTB_HSIZE];           /* hashed by classid */
214     struct list_head drops[TC_HTB_NUMPRIO];     /* active leaves (for drops) */
215     
216     /* self list - roots of self generating tree */
217     struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
218     int row_mask[TC_HTB_MAXDEPTH];
219     struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO];
220
221     /* self wait list - roots of wait PQs per row */
222     struct rb_root wait_pq[TC_HTB_MAXDEPTH];
223
224     /* time of nearest event per level (row) */
225     unsigned long near_ev_cache[TC_HTB_MAXDEPTH];
226
227     /* cached value of jiffies in dequeue */
228     unsigned long jiffies;
229
230     /* whether we hit non-work conserving class during this dequeue; we use */
231     int nwc_hit;        /* this to disable mindelay complaint in dequeue */
232
233     int defcls;         /* class where unclassified flows go to */
234     u32 debug;          /* subsystem debug levels */
235
236     /* filters for qdisc itself */
237     struct tcf_proto *filter_list;
238     int filter_cnt;
239
240     int rate2quantum;           /* quant = rate / rate2quantum */
241     psched_time_t now;          /* cached dequeue time */
242     struct timer_list timer;    /* send delay timer */
243 #ifdef HTB_RATECM
244     struct timer_list rttim;    /* rate computer timer */
245     int recmp_bucket;           /* which hash bucket to recompute next */
246 #endif
247     
248     /* non shaped skbs; let them go directly thru */
249     struct sk_buff_head direct_queue;
250     int direct_qlen;  /* max qlen of above */
251
252     long direct_pkts;
253 };
254
255 /* compute hash of size HTB_HSIZE for given handle */
256 static __inline__ int htb_hash(u32 h) 
257 {
258 #if HTB_HSIZE != 16
259  #error "Declare new hash for your HTB_HSIZE"
260 #endif
261     h ^= h>>8;  /* stolen from cbq_hash */
262     h ^= h>>4;
263     return h & 0xf;
264 }
265
266 /* find class in global hash table using given handle */
267 static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
268 {
269         struct htb_sched *q = (struct htb_sched *)sch->data;
270         struct list_head *p;
271         if (TC_H_MAJ(handle) != sch->handle) 
272                 return NULL;
273         
274         list_for_each (p,q->hash+htb_hash(handle)) {
275                 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
276                 if (cl->classid == handle)
277                         return cl;
278         }
279         return NULL;
280 }
281
282 /**
283  * htb_classify - classify a packet into class
284  *
285  * It returns NULL if the packet should be dropped or -1 if the packet
286  * should be passed directly thru. In all other cases leaf class is returned.
287  * We allow direct class selection by classid in priority. The we examine
288  * filters in qdisc and in inner nodes (if higher filter points to the inner
289  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
290  * internal fifo (direct). These packets then go directly thru. If we still 
291  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
292  * then finish and return direct queue.
293  */
294 #define HTB_DIRECT (struct htb_class*)-1
295 static inline u32 htb_classid(struct htb_class *cl)
296 {
297         return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
298 }
299
300 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch)
301 {
302         struct htb_sched *q = (struct htb_sched *)sch->data;
303         struct htb_class *cl;
304         struct tcf_result res;
305         struct tcf_proto *tcf;
306         int result;
307
308         /* allow to select class by setting skb->priority to valid classid;
309            note that nfmark can be used too by attaching filter fw with no
310            rules in it */
311         if (skb->priority == sch->handle)
312                 return HTB_DIRECT;  /* X:0 (direct flow) selected */
313         if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) 
314                 return cl;
315
316         tcf = q->filter_list;
317         while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
318 #ifdef CONFIG_NET_CLS_POLICE
319                 if (result == TC_POLICE_SHOT)
320                         return NULL;
321 #endif
322                 if ((cl = (void*)res.class) == NULL) {
323                         if (res.classid == sch->handle)
324                                 return HTB_DIRECT;  /* X:0 (direct flow) */
325                         if ((cl = htb_find(res.classid,sch)) == NULL)
326                                 break; /* filter selected invalid classid */
327                 }
328                 if (!cl->level)
329                         return cl; /* we hit leaf; return it */
330
331                 /* we have got inner class; apply inner filter chain */
332                 tcf = cl->filter_list;
333         }
334         /* classification failed; try to use default class */
335         cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch);
336         if (!cl || cl->level)
337                 return HTB_DIRECT; /* bad default .. this is safe bet */
338         return cl;
339 }
340
341 #ifdef HTB_DEBUG
342 static void htb_next_rb_node(struct rb_node **n);
343 #define HTB_DUMTREE(root,memb) if(root) { \
344         struct rb_node *n = (root)->rb_node; \
345         while (n->rb_left) n = n->rb_left; \
346         while (n) { \
347                 struct htb_class *cl = rb_entry(n, struct htb_class, memb); \
348                 printk(" %x",cl->classid); htb_next_rb_node (&n); \
349         } }
350
351 static void htb_debug_dump (struct htb_sched *q)
352 {
353         int i,p;
354         printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies);
355         /* rows */
356         for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) {
357                 printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]);
358                 for (p=0;p<TC_HTB_NUMPRIO;p++) {
359                         if (!q->row[i][p].rb_node) continue;
360                         printk(" p%d:",p);
361                         HTB_DUMTREE(q->row[i]+p,node[p]);
362                 }
363                 printk("\n");
364         }
365         /* classes */
366         for (i = 0; i < HTB_HSIZE; i++) {
367                 struct list_head *l;
368                 list_for_each (l,q->hash+i) {
369                         struct htb_class *cl = list_entry(l,struct htb_class,hlist);
370                         long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
371                         printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d "
372                                         "pa=%x f:",
373                                 cl->classid,cl->cmode,cl->tokens,cl->ctokens,
374                                 cl->pq_node.rb_color==-1?0:cl->pq_key,diff,
375                                 cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity);
376                         if (cl->level)
377                         for (p=0;p<TC_HTB_NUMPRIO;p++) {
378                                 if (!cl->un.inner.feed[p].rb_node) continue;
379                                 printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0);
380                                 HTB_DUMTREE(cl->un.inner.feed+p,node[p]);
381                         }
382                         printk("\n");
383                 }
384         }
385 }
386 #endif
387 /**
388  * htb_add_to_id_tree - adds class to the round robin list
389  *
390  * Routine adds class to the list (actually tree) sorted by classid.
391  * Make sure that class is not already on such list for given prio.
392  */
393 static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root,
394                 struct htb_class *cl,int prio)
395 {
396         struct rb_node **p = &root->rb_node, *parent = NULL;
397         HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio);
398 #ifdef HTB_DEBUG
399         if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; }
400         HTB_CHCL(cl);
401         if (*p) {
402                 struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]);
403                 HTB_CHCL(x);
404         }
405 #endif
406         while (*p) {
407                 struct htb_class *c; parent = *p;
408                 c = rb_entry(parent, struct htb_class, node[prio]);
409                 HTB_CHCL(c);
410                 if (cl->classid > c->classid)
411                         p = &parent->rb_right;
412                 else 
413                         p = &parent->rb_left;
414         }
415         rb_link_node(&cl->node[prio], parent, p);
416         rb_insert_color(&cl->node[prio], root);
417 }
418
419 /**
420  * htb_add_to_wait_tree - adds class to the event queue with delay
421  *
422  * The class is added to priority event queue to indicate that class will
423  * change its mode in cl->pq_key microseconds. Make sure that class is not
424  * already in the queue.
425  */
426 static void htb_add_to_wait_tree (struct htb_sched *q,
427                 struct htb_class *cl,long delay,int debug_hint)
428 {
429         struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL;
430         HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key);
431 #ifdef HTB_DEBUG
432         if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; }
433         HTB_CHCL(cl);
434         if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit())
435                 printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint);
436 #endif
437         cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay);
438         if (cl->pq_key == q->jiffies)
439                 cl->pq_key++;
440
441         /* update the nearest event cache */
442         if (time_after(q->near_ev_cache[cl->level], cl->pq_key))
443                 q->near_ev_cache[cl->level] = cl->pq_key;
444         
445         while (*p) {
446                 struct htb_class *c; parent = *p;
447                 c = rb_entry(parent, struct htb_class, pq_node);
448                 if (time_after_eq(cl->pq_key, c->pq_key))
449                         p = &parent->rb_right;
450                 else 
451                         p = &parent->rb_left;
452         }
453         rb_link_node(&cl->pq_node, parent, p);
454         rb_insert_color(&cl->pq_node, &q->wait_pq[cl->level]);
455 }
456
457 /**
458  * htb_next_rb_node - finds next node in binary tree
459  *
460  * When we are past last key we return NULL.
461  * Average complexity is 2 steps per call.
462  */
463 static void htb_next_rb_node(struct rb_node **n)
464 {
465         *n = rb_next(*n);
466 }
467
468 /**
469  * htb_add_class_to_row - add class to its row
470  *
471  * The class is added to row at priorities marked in mask.
472  * It does nothing if mask == 0.
473  */
474 static inline void htb_add_class_to_row(struct htb_sched *q, 
475                 struct htb_class *cl,int mask)
476 {
477         HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n",
478                         cl->classid,mask,q->row_mask[cl->level]);
479         HTB_CHCL(cl);
480         q->row_mask[cl->level] |= mask;
481         while (mask) {
482                 int prio = ffz(~mask);
483                 mask &= ~(1 << prio);
484                 htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio);
485         }
486 }
487
488 /**
489  * htb_remove_class_from_row - removes class from its row
490  *
491  * The class is removed from row at priorities marked in mask.
492  * It does nothing if mask == 0.
493  */
494 static __inline__ void htb_remove_class_from_row(struct htb_sched *q,
495                 struct htb_class *cl,int mask)
496 {
497         int m = 0;
498         HTB_CHCL(cl);
499         while (mask) {
500                 int prio = ffz(~mask);
501                 mask &= ~(1 << prio);
502                 if (q->ptr[cl->level][prio] == cl->node+prio)
503                         htb_next_rb_node(q->ptr[cl->level]+prio);
504                 htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio);
505                 if (!q->row[cl->level][prio].rb_node) 
506                         m |= 1 << prio;
507         }
508         HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n",
509                         cl->classid,mask,q->row_mask[cl->level],m);
510         q->row_mask[cl->level] &= ~m;
511 }
512
513 /**
514  * htb_activate_prios - creates active classe's feed chain
515  *
516  * The class is connected to ancestors and/or appropriate rows
517  * for priorities it is participating on. cl->cmode must be new 
518  * (activated) mode. It does nothing if cl->prio_activity == 0.
519  */
520 static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl)
521 {
522         struct htb_class *p = cl->parent;
523         long m,mask = cl->prio_activity;
524         HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
525         HTB_CHCL(cl);
526
527         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
528                 HTB_CHCL(p);
529                 m = mask; while (m) {
530                         int prio = ffz(~m);
531                         m &= ~(1 << prio);
532                         
533                         if (p->un.inner.feed[prio].rb_node)
534                                 /* parent already has its feed in use so that
535                                    reset bit in mask as parent is already ok */
536                                 mask &= ~(1 << prio);
537                         
538                         htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio);
539                 }
540                 HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
541                                 p->classid,p->prio_activity,mask,p->cmode);
542                 p->prio_activity |= mask;
543                 cl = p; p = cl->parent;
544                 HTB_CHCL(cl);
545         }
546         if (cl->cmode == HTB_CAN_SEND && mask)
547                 htb_add_class_to_row(q,cl,mask);
548 }
549
550 /**
551  * htb_deactivate_prios - remove class from feed chain
552  *
553  * cl->cmode must represent old mode (before deactivation). It does 
554  * nothing if cl->prio_activity == 0. Class is removed from all feed
555  * chains and rows.
556  */
557 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
558 {
559         struct htb_class *p = cl->parent;
560         long m,mask = cl->prio_activity;
561         HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode);
562         HTB_CHCL(cl);
563
564         while (cl->cmode == HTB_MAY_BORROW && p && mask) {
565                 m = mask; mask = 0; 
566                 while (m) {
567                         int prio = ffz(~m);
568                         m &= ~(1 << prio);
569                         
570                         if (p->un.inner.ptr[prio] == cl->node+prio)
571                                 htb_next_rb_node(p->un.inner.ptr + prio);
572                         
573                         htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio);
574                         
575                         if (!p->un.inner.feed[prio].rb_node) 
576                                 mask |= 1 << prio;
577                 }
578                 HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n",
579                                 p->classid,p->prio_activity,mask,p->cmode);
580                 p->prio_activity &= ~mask;
581                 cl = p; p = cl->parent;
582                 HTB_CHCL(cl);
583         }
584         if (cl->cmode == HTB_CAN_SEND && mask) 
585                 htb_remove_class_from_row(q,cl,mask);
586 }
587
588 /**
589  * htb_class_mode - computes and returns current class mode
590  *
591  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
592  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
593  * from now to time when cl will change its state. 
594  * Also it is worth to note that class mode doesn't change simply
595  * at cl->{c,}tokens == 0 but there can rather be hysteresis of 
596  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
597  * mode transitions per time unit. The speed gain is about 1/6.
598  */
599 static __inline__ enum htb_cmode 
600 htb_class_mode(struct htb_class *cl,long *diff)
601 {
602     long toks;
603
604     if ((toks = (cl->ctokens + *diff)) < (
605 #if HTB_HYSTERESIS
606             cl->cmode != HTB_CANT_SEND ? -cl->cbuffer :
607 #endif
608             0)) {
609             *diff = -toks;
610             return HTB_CANT_SEND;
611     }
612     if ((toks = (cl->tokens + *diff)) >= (
613 #if HTB_HYSTERESIS
614             cl->cmode == HTB_CAN_SEND ? -cl->buffer :
615 #endif
616             0))
617             return HTB_CAN_SEND;
618
619     *diff = -toks;
620     return HTB_MAY_BORROW;
621 }
622
623 /**
624  * htb_change_class_mode - changes classe's mode
625  *
626  * This should be the only way how to change classe's mode under normal
627  * cirsumstances. Routine will update feed lists linkage, change mode
628  * and add class to the wait event queue if appropriate. New mode should
629  * be different from old one and cl->pq_key has to be valid if changing
630  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
631  */
632 static void 
633 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff)
634
635         enum htb_cmode new_mode = htb_class_mode(cl,diff);
636         
637         HTB_CHCL(cl);
638         HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid);
639
640         if (new_mode == cl->cmode)
641                 return; 
642         
643         if (cl->prio_activity) { /* not necessary: speed optimization */
644                 if (cl->cmode != HTB_CANT_SEND) 
645                         htb_deactivate_prios(q,cl);
646                 cl->cmode = new_mode;
647                 if (new_mode != HTB_CANT_SEND) 
648                         htb_activate_prios(q,cl);
649         } else 
650                 cl->cmode = new_mode;
651 }
652
653 /**
654  * htb_activate - inserts leaf cl into appropriate active feeds 
655  *
656  * Routine learns (new) priority of leaf and activates feed chain
657  * for the prio. It can be called on already active leaf safely.
658  * It also adds leaf into droplist.
659  */
660 static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl)
661 {
662         BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen);
663         HTB_CHCL(cl);
664         if (!cl->prio_activity) {
665                 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio);
666                 htb_activate_prios(q,cl);
667                 list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio);
668         }
669 }
670
671 /**
672  * htb_deactivate - remove leaf cl from active feeds 
673  *
674  * Make sure that leaf is active. In the other words it can't be called
675  * with non-active leaf. It also removes class from the drop list.
676  */
677 static __inline__ void 
678 htb_deactivate(struct htb_sched *q,struct htb_class *cl)
679 {
680         BUG_TRAP(cl->prio_activity);
681         HTB_CHCL(cl);
682         htb_deactivate_prios(q,cl);
683         cl->prio_activity = 0;
684         list_del_init(&cl->un.leaf.drop_list);
685 }
686
687 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
688 {
689     struct htb_sched *q = (struct htb_sched *)sch->data;
690     struct htb_class *cl = htb_classify(skb,sch);
691
692     if (cl == HTB_DIRECT || !cl) {
693         /* enqueue to helper queue */
694         if (q->direct_queue.qlen < q->direct_qlen && cl) {
695             __skb_queue_tail(&q->direct_queue, skb);
696             q->direct_pkts++;
697         } else {
698             kfree_skb (skb);
699             sch->stats.drops++;
700             return NET_XMIT_DROP;
701         }
702     } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
703         sch->stats.drops++;
704         cl->stats.drops++;
705         return NET_XMIT_DROP;
706     } else {
707         cl->stats.packets++; cl->stats.bytes += skb->len;
708         htb_activate (q,cl);
709     }
710
711     sch->q.qlen++;
712     sch->stats.packets++; sch->stats.bytes += skb->len;
713     HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
714     return NET_XMIT_SUCCESS;
715 }
716
717 /* TODO: requeuing packet charges it to policers again !! */
718 static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
719 {
720     struct htb_sched *q = (struct htb_sched *)sch->data;
721     struct htb_class *cl = htb_classify(skb,sch);
722     struct sk_buff *tskb;
723
724     if (cl == HTB_DIRECT || !cl) {
725         /* enqueue to helper queue */
726         if (q->direct_queue.qlen < q->direct_qlen && cl) {
727             __skb_queue_head(&q->direct_queue, skb);
728         } else {
729             __skb_queue_head(&q->direct_queue, skb);
730             tskb = __skb_dequeue_tail(&q->direct_queue);
731             kfree_skb (tskb);
732             sch->stats.drops++;
733             return NET_XMIT_CN; 
734         }
735     } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
736         sch->stats.drops++;
737         cl->stats.drops++;
738         return NET_XMIT_DROP;
739     } else 
740             htb_activate (q,cl);
741
742     sch->q.qlen++;
743     HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb);
744     return NET_XMIT_SUCCESS;
745 }
746
747 static void htb_timer(unsigned long arg)
748 {
749     struct Qdisc *sch = (struct Qdisc*)arg;
750     sch->flags &= ~TCQ_F_THROTTLED;
751     wmb();
752     netif_schedule(sch->dev);
753 }
754
755 #ifdef HTB_RATECM
756 #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0
757 static void htb_rate_timer(unsigned long arg)
758 {
759         struct Qdisc *sch = (struct Qdisc*)arg;
760         struct htb_sched *q = (struct htb_sched *)sch->data;
761         struct list_head *p;
762
763         /* lock queue so that we can muck with it */
764         HTB_QLOCK(sch);
765         HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies);
766
767         q->rttim.expires = jiffies + HZ;
768         add_timer(&q->rttim);
769
770         /* scan and recompute one bucket at time */
771         if (++q->recmp_bucket >= HTB_HSIZE) 
772                 q->recmp_bucket = 0;
773         list_for_each (p,q->hash+q->recmp_bucket) {
774                 struct htb_class *cl = list_entry(p,struct htb_class,hlist);
775                 HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n",
776                                 cl->classid,cl->sum_bytes,cl->sum_packets);
777                 RT_GEN (cl->sum_bytes,cl->rate_bytes);
778                 RT_GEN (cl->sum_packets,cl->rate_packets);
779         }
780         HTB_QUNLOCK(sch);
781 }
782 #endif
783
784 /**
785  * htb_charge_class - charges amount "bytes" to leaf and ancestors
786  *
787  * Routine assumes that packet "bytes" long was dequeued from leaf cl
788  * borrowing from "level". It accounts bytes to ceil leaky bucket for
789  * leaf and all ancestors and to rate bucket for ancestors at levels
790  * "level" and higher. It also handles possible change of mode resulting
791  * from the update. Note that mode can also increase here (MAY_BORROW to
792  * CAN_SEND) because we can use more precise clock that event queue here.
793  * In such case we remove class from event queue first.
794  */
795 static void htb_charge_class(struct htb_sched *q,struct htb_class *cl,
796                 int level,int bytes)
797 {       
798         long toks,diff;
799         enum htb_cmode old_mode;
800         HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes);
801
802 #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
803         if (toks > cl->B) toks = cl->B; \
804         toks -= L2T(cl, cl->R, bytes); \
805         if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
806         cl->T = toks
807
808         while (cl) {
809                 HTB_CHCL(cl);
810                 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
811 #ifdef HTB_DEBUG
812                 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
813                         if (net_ratelimit())
814                                 printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
815                                        cl->classid, diff,
816                                        (unsigned long long) q->now,
817                                        (unsigned long long) cl->t_c,
818                                        q->jiffies);
819                         diff = 1000;
820                 }
821 #endif
822                 if (cl->level >= level) {
823                         if (cl->level == level) cl->xstats.lends++;
824                         HTB_ACCNT (tokens,buffer,rate);
825                 } else {
826                         cl->xstats.borrows++;
827                         cl->tokens += diff; /* we moved t_c; update tokens */
828                 }
829                 HTB_ACCNT (ctokens,cbuffer,ceil);
830                 cl->t_c = q->now;
831                 HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens);
832
833                 old_mode = cl->cmode; diff = 0;
834                 htb_change_class_mode(q,cl,&diff);
835                 if (old_mode != cl->cmode) {
836                         if (old_mode != HTB_CAN_SEND)
837                                 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
838                         if (cl->cmode != HTB_CAN_SEND)
839                                 htb_add_to_wait_tree (q,cl,diff,1);
840                 }
841                 
842 #ifdef HTB_RATECM
843                 /* update rate counters */
844                 cl->sum_bytes += bytes; cl->sum_packets++;
845 #endif
846
847                 /* update byte stats except for leaves which are already updated */
848                 if (cl->level) {
849                         cl->stats.bytes += bytes;
850                         cl->stats.packets++;
851                 }
852                 cl = cl->parent;
853         }
854 }
855
856 /**
857  * htb_do_events - make mode changes to classes at the level
858  *
859  * Scans event queue for pending events and applies them. Returns jiffies to
860  * next pending event (0 for no event in pq).
861  * Note: Aplied are events whose have cl->pq_key <= jiffies.
862  */
863 static long htb_do_events(struct htb_sched *q,int level)
864 {
865         int i;
866         HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n",
867                         level,q->wait_pq[level].rb_node,q->row_mask[level]);
868         for (i = 0; i < 500; i++) {
869                 struct htb_class *cl;
870                 long diff;
871                 struct rb_node *p = q->wait_pq[level].rb_node;
872                 if (!p) return 0;
873                 while (p->rb_left) p = p->rb_left;
874
875                 cl = rb_entry(p, struct htb_class, pq_node);
876                 if (time_after(cl->pq_key, q->jiffies)) {
877                         HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies);
878                         return cl->pq_key - q->jiffies;
879                 }
880                 htb_safe_rb_erase(p,q->wait_pq+level);
881                 diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer, 0);
882 #ifdef HTB_DEBUG
883                 if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) {
884                         if (net_ratelimit())
885                                 printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n",
886                                        cl->classid, diff,
887                                        (unsigned long long) q->now,
888                                        (unsigned long long) cl->t_c,
889                                        q->jiffies);
890                         diff = 1000;
891                 }
892 #endif
893                 htb_change_class_mode(q,cl,&diff);
894                 if (cl->cmode != HTB_CAN_SEND)
895                         htb_add_to_wait_tree (q,cl,diff,2);
896         }
897         if (net_ratelimit())
898                 printk(KERN_WARNING "htb: too many events !\n");
899         return HZ/10;
900 }
901
902 /**
903  * htb_lookup_leaf - returns next leaf class in DRR order
904  *
905  * Find leaf where current feed pointers points to.
906  */
907 static struct htb_class *
908 htb_lookup_leaf(struct rb_root *tree,int prio,struct rb_node **pptr)
909 {
910         int i;
911         struct {
912                 struct rb_node *root;
913                 struct rb_node **pptr;
914         } stk[TC_HTB_MAXDEPTH],*sp = stk;
915         
916         BUG_TRAP(tree->rb_node);
917         sp->root = tree->rb_node;
918         sp->pptr = pptr;
919
920         for (i = 0; i < 65535; i++) {
921                 if (!*sp->pptr) { /* we are at right end; rewind & go up */
922                         *sp->pptr = sp->root;
923                         while ((*sp->pptr)->rb_left) 
924                                 *sp->pptr = (*sp->pptr)->rb_left;
925                         if (sp > stk) {
926                                 sp--;
927                                 BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL;
928                                 htb_next_rb_node (sp->pptr);
929                         }
930                 } else {
931                         struct htb_class *cl;
932                         cl = rb_entry(*sp->pptr,struct htb_class,node[prio]);
933                         HTB_CHCL(cl);
934                         if (!cl->level) 
935                                 return cl;
936                         (++sp)->root = cl->un.inner.feed[prio].rb_node;
937                         sp->pptr = cl->un.inner.ptr+prio;
938                 }
939         }
940         BUG_TRAP(0);
941         return NULL;
942 }
943
944 /* dequeues packet at given priority and level; call only if
945    you are sure that there is active class at prio/level */
946 static struct sk_buff *
947 htb_dequeue_tree(struct htb_sched *q,int prio,int level)
948 {
949         struct sk_buff *skb = NULL;
950         struct htb_class *cl,*start;
951         /* look initial class up in the row */
952         start = cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
953         
954         do {
955 next:
956                 BUG_TRAP(cl); 
957                 if (!cl) return NULL;
958                 HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n",
959                                 prio,level,cl->classid,cl->un.leaf.deficit[level]);
960
961                 /* class can be empty - it is unlikely but can be true if leaf
962                    qdisc drops packets in enqueue routine or if someone used
963                    graft operation on the leaf since last dequeue; 
964                    simply deactivate and skip such class */
965                 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
966                         struct htb_class *next;
967                         htb_deactivate(q,cl);
968
969                         /* row/level might become empty */
970                         if ((q->row_mask[level] & (1 << prio)) == 0)
971                                 return NULL; 
972                         
973                         next = htb_lookup_leaf (q->row[level]+prio,
974                                         prio,q->ptr[level]+prio);
975                         if (cl == start) /* fix start if we just deleted it */
976                                 start = next;
977                         cl = next;
978                         goto next;
979                 }
980         
981                 if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) 
982                         break;
983                 if (!cl->warned) {
984                         printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid);
985                         cl->warned = 1;
986                 }
987                 q->nwc_hit++;
988                 htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
989                 cl = htb_lookup_leaf (q->row[level]+prio,prio,q->ptr[level]+prio);
990         } while (cl != start);
991
992         if (likely(skb != NULL)) {
993                 if ((cl->un.leaf.deficit[level] -= skb->len) < 0) {
994                         HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n",
995                                 level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum);
996                         cl->un.leaf.deficit[level] += cl->un.leaf.quantum;
997                         htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio);
998                 }
999                 /* this used to be after charge_class but this constelation
1000                    gives us slightly better performance */
1001                 if (!cl->un.leaf.q->q.qlen)
1002                         htb_deactivate (q,cl);
1003                 htb_charge_class (q,cl,level,skb->len);
1004         }
1005         return skb;
1006 }
1007
1008 static void htb_delay_by(struct Qdisc *sch,long delay)
1009 {
1010         struct htb_sched *q = (struct htb_sched *)sch->data;
1011         if (delay <= 0) delay = 1;
1012         if (unlikely(delay > 5*HZ)) {
1013                 if (net_ratelimit())
1014                         printk(KERN_INFO "HTB delay %ld > 5sec\n", delay);
1015                 delay = 5*HZ;
1016         }
1017         /* why don't use jiffies here ? because expires can be in past */
1018         mod_timer(&q->timer, q->jiffies + delay);
1019         sch->flags |= TCQ_F_THROTTLED;
1020         sch->stats.overlimits++;
1021         HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay);
1022 }
1023
1024 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
1025 {
1026         struct sk_buff *skb = NULL;
1027         struct htb_sched *q = (struct htb_sched *)sch->data;
1028         int level;
1029         long min_delay;
1030 #ifdef HTB_DEBUG
1031         int evs_used = 0;
1032 #endif
1033
1034         q->jiffies = jiffies;
1035         HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue),
1036                         sch->q.qlen);
1037
1038         /* try to dequeue direct packets as high prio (!) to minimize cpu work */
1039         if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) {
1040                 sch->flags &= ~TCQ_F_THROTTLED;
1041                 sch->q.qlen--;
1042                 return skb;
1043         }
1044
1045         if (!sch->q.qlen) goto fin;
1046         PSCHED_GET_TIME(q->now);
1047
1048         min_delay = LONG_MAX;
1049         q->nwc_hit = 0;
1050         for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
1051                 /* common case optimization - skip event handler quickly */
1052                 int m;
1053                 long delay;
1054                 if (time_after_eq(q->jiffies, q->near_ev_cache[level])) {
1055                         delay = htb_do_events(q,level);
1056                         q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ);
1057 #ifdef HTB_DEBUG
1058                         evs_used++;
1059 #endif
1060                 } else
1061                         delay = q->near_ev_cache[level] - q->jiffies;   
1062                 
1063                 if (delay && min_delay > delay) 
1064                         min_delay = delay;
1065                 m = ~q->row_mask[level];
1066                 while (m != (int)(-1)) {
1067                         int prio = ffz (m);
1068                         m |= 1 << prio;
1069                         skb = htb_dequeue_tree(q,prio,level);
1070                         if (likely(skb != NULL)) {
1071                                 sch->q.qlen--;
1072                                 sch->flags &= ~TCQ_F_THROTTLED;
1073                                 goto fin;
1074                         }
1075                 }
1076         }
1077 #ifdef HTB_DEBUG
1078         if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) {
1079                 if (min_delay == LONG_MAX) {
1080                         printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n",
1081                                         evs_used,q->jiffies,jiffies);
1082                         htb_debug_dump(q);
1083                 } else 
1084                         printk(KERN_WARNING "HTB: mindelay=%ld, some class has "
1085                                         "too small rate\n",min_delay);
1086         }
1087 #endif
1088         htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay);
1089 fin:
1090         HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb);
1091         return skb;
1092 }
1093
1094 /* try to drop from each class (by prio) until one succeed */
1095 static unsigned int htb_drop(struct Qdisc* sch)
1096 {
1097         struct htb_sched *q = (struct htb_sched *)sch->data;
1098         int prio;
1099
1100         for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
1101                 struct list_head *p;
1102                 list_for_each (p,q->drops+prio) {
1103                         struct htb_class *cl = list_entry(p, struct htb_class,
1104                                                           un.leaf.drop_list);
1105                         unsigned int len;
1106                         if (cl->un.leaf.q->ops->drop && 
1107                                 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1108                                 sch->q.qlen--;
1109                                 if (!cl->un.leaf.q->q.qlen)
1110                                         htb_deactivate (q,cl);
1111                                 return len;
1112                         }
1113                 }
1114         }
1115         return 0;
1116 }
1117
1118 /* reset all classes */
1119 /* always caled under BH & queue lock */
1120 static void htb_reset(struct Qdisc* sch)
1121 {
1122         struct htb_sched *q = (struct htb_sched *)sch->data;
1123         int i;
1124         HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle);
1125
1126         for (i = 0; i < HTB_HSIZE; i++) {
1127                 struct list_head *p;
1128                 list_for_each (p,q->hash+i) {
1129                         struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1130                         if (cl->level)
1131                                 memset(&cl->un.inner,0,sizeof(cl->un.inner));
1132                         else {
1133                                 if (cl->un.leaf.q) 
1134                                         qdisc_reset(cl->un.leaf.q);
1135                                 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1136                         }
1137                         cl->prio_activity = 0;
1138                         cl->cmode = HTB_CAN_SEND;
1139 #ifdef HTB_DEBUG
1140                         cl->pq_node.rb_color = -1;
1141                         memset(cl->node,255,sizeof(cl->node));
1142 #endif
1143
1144                 }
1145         }
1146         sch->flags &= ~TCQ_F_THROTTLED;
1147         del_timer(&q->timer);
1148         __skb_queue_purge(&q->direct_queue);
1149         sch->q.qlen = 0;
1150         memset(q->row,0,sizeof(q->row));
1151         memset(q->row_mask,0,sizeof(q->row_mask));
1152         memset(q->wait_pq,0,sizeof(q->wait_pq));
1153         memset(q->ptr,0,sizeof(q->ptr));
1154         for (i = 0; i < TC_HTB_NUMPRIO; i++)
1155                 INIT_LIST_HEAD(q->drops+i);
1156 }
1157
1158 static int htb_init(struct Qdisc *sch, struct rtattr *opt)
1159 {
1160         struct htb_sched *q = (struct htb_sched*)sch->data;
1161         struct rtattr *tb[TCA_HTB_INIT];
1162         struct tc_htb_glob *gopt;
1163         int i;
1164 #ifdef HTB_DEBUG
1165         printk(KERN_INFO "HTB init, kernel part version %d.%d\n",
1166                           HTB_VER >> 16,HTB_VER & 0xffff);
1167 #endif
1168         if (!opt || rtattr_parse(tb, TCA_HTB_INIT, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1169                         tb[TCA_HTB_INIT-1] == NULL ||
1170                         RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) {
1171                 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
1172                 return -EINVAL;
1173         }
1174         gopt = RTA_DATA(tb[TCA_HTB_INIT-1]);
1175         if (gopt->version != HTB_VER >> 16) {
1176                 printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1177                                 HTB_VER >> 16,HTB_VER & 0xffff,gopt->version);
1178                 return -EINVAL;
1179         }
1180         memset(q,0,sizeof(*q));
1181         q->debug = gopt->debug;
1182         HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum);
1183
1184         INIT_LIST_HEAD(&q->root);
1185         for (i = 0; i < HTB_HSIZE; i++)
1186                 INIT_LIST_HEAD(q->hash+i);
1187         for (i = 0; i < TC_HTB_NUMPRIO; i++)
1188                 INIT_LIST_HEAD(q->drops+i);
1189
1190         init_timer(&q->timer);
1191         skb_queue_head_init(&q->direct_queue);
1192
1193         q->direct_qlen = sch->dev->tx_queue_len;
1194         if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1195                 q->direct_qlen = 2;
1196         q->timer.function = htb_timer;
1197         q->timer.data = (unsigned long)sch;
1198
1199 #ifdef HTB_RATECM
1200         init_timer(&q->rttim);
1201         q->rttim.function = htb_rate_timer;
1202         q->rttim.data = (unsigned long)sch;
1203         q->rttim.expires = jiffies + HZ;
1204         add_timer(&q->rttim);
1205 #endif
1206         if ((q->rate2quantum = gopt->rate2quantum) < 1)
1207                 q->rate2quantum = 1;
1208         q->defcls = gopt->defcls;
1209
1210         return 0;
1211 }
1212
1213 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1214 {
1215         struct htb_sched *q = (struct htb_sched*)sch->data;
1216         unsigned char    *b = skb->tail;
1217         struct rtattr *rta;
1218         struct tc_htb_glob gopt;
1219         HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle);
1220         /* stats */
1221         HTB_QLOCK(sch);
1222         gopt.direct_pkts = q->direct_pkts;
1223
1224 #ifdef HTB_DEBUG
1225         if (HTB_DBG_COND(0,2))
1226                 htb_debug_dump(q);
1227 #endif
1228         gopt.version = HTB_VER;
1229         gopt.rate2quantum = q->rate2quantum;
1230         gopt.defcls = q->defcls;
1231         gopt.debug = q->debug;
1232         rta = (struct rtattr*)b;
1233         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1234         RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt);
1235         rta->rta_len = skb->tail - b;
1236         sch->stats.qlen = sch->q.qlen;
1237         RTA_PUT(skb, TCA_STATS, sizeof(sch->stats), &sch->stats);
1238         HTB_QUNLOCK(sch);
1239         return skb->len;
1240 rtattr_failure:
1241         HTB_QUNLOCK(sch);
1242         skb_trim(skb, skb->tail - skb->data);
1243         return -1;
1244 }
1245
1246 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1247         struct sk_buff *skb, struct tcmsg *tcm)
1248 {
1249 #ifdef HTB_DEBUG
1250         struct htb_sched *q = (struct htb_sched*)sch->data;
1251 #endif
1252         struct htb_class *cl = (struct htb_class*)arg;
1253         unsigned char    *b = skb->tail;
1254         struct rtattr *rta;
1255         struct tc_htb_opt opt;
1256
1257         HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid);
1258
1259         HTB_QLOCK(sch);
1260         tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT;
1261         tcm->tcm_handle = cl->classid;
1262         if (!cl->level && cl->un.leaf.q) {
1263                 tcm->tcm_info = cl->un.leaf.q->handle;
1264                 cl->stats.qlen = cl->un.leaf.q->q.qlen;
1265         }
1266
1267         rta = (struct rtattr*)b;
1268         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1269
1270         memset (&opt,0,sizeof(opt));
1271
1272         opt.rate = cl->rate->rate; opt.buffer = cl->buffer;
1273         opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer;
1274         opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio;
1275         opt.level = cl->level; 
1276         RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1277         rta->rta_len = skb->tail - b;
1278
1279 #ifdef HTB_RATECM
1280         cl->stats.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE);
1281         cl->stats.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE);
1282 #endif
1283
1284         cl->xstats.tokens = cl->tokens;
1285         cl->xstats.ctokens = cl->ctokens;
1286         RTA_PUT(skb, TCA_STATS, sizeof(cl->stats), &cl->stats);
1287         RTA_PUT(skb, TCA_XSTATS, sizeof(cl->xstats), &cl->xstats);
1288         HTB_QUNLOCK(sch);
1289         return skb->len;
1290 rtattr_failure:
1291         HTB_QUNLOCK(sch);
1292         skb_trim(skb, b - skb->data);
1293         return -1;
1294 }
1295
1296 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1297         struct Qdisc **old)
1298 {
1299         struct htb_class *cl = (struct htb_class*)arg;
1300
1301         if (cl && !cl->level) {
1302                 if (new == NULL && (new = qdisc_create_dflt(sch->dev, 
1303                                         &pfifo_qdisc_ops)) == NULL)
1304                                         return -ENOBUFS;
1305                 sch_tree_lock(sch);
1306                 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) {
1307                         if (cl->prio_activity)
1308                                 htb_deactivate ((struct htb_sched*)sch->data,cl);
1309
1310                         /* TODO: is it correct ? Why CBQ doesn't do it ? */
1311                         sch->q.qlen -= (*old)->q.qlen;  
1312                         qdisc_reset(*old);
1313                 }
1314                 sch_tree_unlock(sch);
1315                 return 0;
1316         }
1317         return -ENOENT;
1318 }
1319
1320 static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg)
1321 {
1322         struct htb_class *cl = (struct htb_class*)arg;
1323         return (cl && !cl->level) ? cl->un.leaf.q : NULL;
1324 }
1325
1326 static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1327 {
1328 #ifdef HTB_DEBUG
1329         struct htb_sched *q = (struct htb_sched *)sch->data;
1330 #endif
1331         struct htb_class *cl = htb_find(classid,sch);
1332         HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0);
1333         if (cl) 
1334                 cl->refcnt++;
1335         return (unsigned long)cl;
1336 }
1337
1338 static void htb_destroy_filters(struct tcf_proto **fl)
1339 {
1340         struct tcf_proto *tp;
1341
1342         while ((tp = *fl) != NULL) {
1343                 *fl = tp->next;
1344                 tcf_destroy(tp);
1345         }
1346 }
1347
1348 static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl)
1349 {
1350         struct htb_sched *q = (struct htb_sched *)sch->data;
1351         HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0);
1352         if (!cl->level) {
1353                 BUG_TRAP(cl->un.leaf.q);
1354                 sch->q.qlen -= cl->un.leaf.q->q.qlen;
1355                 qdisc_destroy(cl->un.leaf.q);
1356         }
1357         qdisc_put_rtab(cl->rate);
1358         qdisc_put_rtab(cl->ceil);
1359         
1360 #ifdef CONFIG_NET_ESTIMATOR
1361         qdisc_kill_estimator(&cl->stats);
1362 #endif
1363         htb_destroy_filters (&cl->filter_list);
1364         
1365         while (!list_empty(&cl->children)) 
1366                 htb_destroy_class (sch,list_entry(cl->children.next,
1367                                         struct htb_class,sibling));
1368
1369         /* note: this delete may happen twice (see htb_delete) */
1370         list_del(&cl->hlist);
1371         list_del(&cl->sibling);
1372         
1373         if (cl->prio_activity)
1374                 htb_deactivate (q,cl);
1375         
1376         if (cl->cmode != HTB_CAN_SEND)
1377                 htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level);
1378         
1379         kfree(cl);
1380 }
1381
1382 /* always caled under BH & queue lock */
1383 static void htb_destroy(struct Qdisc* sch)
1384 {
1385         struct htb_sched *q = (struct htb_sched *)sch->data;
1386         HTB_DBG(0,1,"htb_destroy q=%p\n",q);
1387
1388         del_timer_sync (&q->timer);
1389 #ifdef HTB_RATECM
1390         del_timer_sync (&q->rttim);
1391 #endif
1392         /* This line used to be after htb_destroy_class call below
1393            and surprisingly it worked in 2.4. But it must precede it 
1394            because filter need its target class alive to be able to call
1395            unbind_filter on it (without Oops). */
1396         htb_destroy_filters(&q->filter_list);
1397         
1398         while (!list_empty(&q->root)) 
1399                 htb_destroy_class (sch,list_entry(q->root.next,
1400                                         struct htb_class,sibling));
1401
1402         __skb_queue_purge(&q->direct_queue);
1403 }
1404
1405 static int htb_delete(struct Qdisc *sch, unsigned long arg)
1406 {
1407         struct htb_sched *q = (struct htb_sched *)sch->data;
1408         struct htb_class *cl = (struct htb_class*)arg;
1409         HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1410
1411         // TODO: why don't allow to delete subtree ? references ? does
1412         // tc subsys quarantee us that in htb_destroy it holds no class
1413         // refs so that we can remove children safely there ?
1414         if (!list_empty(&cl->children) || cl->filter_cnt)
1415                 return -EBUSY;
1416         
1417         sch_tree_lock(sch);
1418         
1419         /* delete from hash and active; remainder in destroy_class */
1420         list_del_init(&cl->hlist);
1421         if (cl->prio_activity)
1422                 htb_deactivate (q,cl);
1423
1424         if (--cl->refcnt == 0)
1425                 htb_destroy_class(sch,cl);
1426
1427         sch_tree_unlock(sch);
1428         return 0;
1429 }
1430
1431 static void htb_put(struct Qdisc *sch, unsigned long arg)
1432 {
1433 #ifdef HTB_DEBUG
1434         struct htb_sched *q = (struct htb_sched *)sch->data;
1435 #endif
1436         struct htb_class *cl = (struct htb_class*)arg;
1437         HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0);
1438
1439         if (--cl->refcnt == 0)
1440                 htb_destroy_class(sch,cl);
1441 }
1442
1443 static int htb_change_class(struct Qdisc *sch, u32 classid, 
1444                 u32 parentid, struct rtattr **tca, unsigned long *arg)
1445 {
1446         int err = -EINVAL;
1447         struct htb_sched *q = (struct htb_sched *)sch->data;
1448         struct htb_class *cl = (struct htb_class*)*arg,*parent;
1449         struct rtattr *opt = tca[TCA_OPTIONS-1];
1450         struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1451         struct rtattr *tb[TCA_HTB_RTAB];
1452         struct tc_htb_opt *hopt;
1453
1454         /* extract all subattrs from opt attr */
1455         if (!opt || rtattr_parse(tb, TCA_HTB_RTAB, RTA_DATA(opt), RTA_PAYLOAD(opt)) ||
1456                         tb[TCA_HTB_PARMS-1] == NULL ||
1457                         RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt))
1458                 goto failure;
1459         
1460         parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch);
1461
1462         hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]);
1463         HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum);
1464         rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]);
1465         ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]);
1466         if (!rtab || !ctab) goto failure;
1467
1468         if (!cl) { /* new class */
1469                 struct Qdisc *new_q;
1470                 /* check for valid classid */
1471                 if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch))
1472                         goto failure;
1473
1474                 /* check maximal depth */
1475                 if (parent && parent->parent && parent->parent->level < 2) {
1476                         printk(KERN_ERR "htb: tree is too deep\n");
1477                         goto failure;
1478                 }
1479                 err = -ENOBUFS;
1480                 if ((cl = kmalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1481                         goto failure;
1482                 
1483                 memset(cl, 0, sizeof(*cl));
1484                 cl->refcnt = 1;
1485                 INIT_LIST_HEAD(&cl->sibling);
1486                 INIT_LIST_HEAD(&cl->hlist);
1487                 INIT_LIST_HEAD(&cl->children);
1488                 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
1489 #ifdef HTB_DEBUG
1490                 cl->magic = HTB_CMAGIC;
1491 #endif
1492
1493                 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1494                    so that can't be used inside of sch_tree_lock
1495                    -- thanks to Karlis Peisenieks */
1496                 new_q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1497                 sch_tree_lock(sch);
1498                 if (parent && !parent->level) {
1499                         /* turn parent into inner node */
1500                         sch->q.qlen -= parent->un.leaf.q->q.qlen;
1501                         qdisc_destroy (parent->un.leaf.q);
1502                         if (parent->prio_activity) 
1503                                 htb_deactivate (q,parent);
1504
1505                         /* remove from evt list because of level change */
1506                         if (parent->cmode != HTB_CAN_SEND) {
1507                                 htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/);
1508                                 parent->cmode = HTB_CAN_SEND;
1509                         }
1510                         parent->level = (parent->parent ? parent->parent->level
1511                                         : TC_HTB_MAXDEPTH) - 1;
1512                         memset (&parent->un.inner,0,sizeof(parent->un.inner));
1513                 }
1514                 /* leaf (we) needs elementary qdisc */
1515                 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1516
1517                 cl->classid = classid; cl->parent = parent;
1518
1519                 /* set class to be in HTB_CAN_SEND state */
1520                 cl->tokens = hopt->buffer;
1521                 cl->ctokens = hopt->cbuffer;
1522                 cl->mbuffer = 60000000; /* 1min */
1523                 PSCHED_GET_TIME(cl->t_c);
1524                 cl->cmode = HTB_CAN_SEND;
1525
1526                 /* attach to the hash list and parent's family */
1527                 list_add_tail(&cl->hlist, q->hash+htb_hash(classid));
1528                 list_add_tail(&cl->sibling, parent ? &parent->children : &q->root);
1529 #ifdef HTB_DEBUG
1530                 { 
1531                         int i;
1532                         for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1;
1533                         cl->pq_node.rb_color = -1;
1534                 }
1535 #endif
1536         } else sch_tree_lock(sch);
1537
1538         /* it used to be a nasty bug here, we have to check that node
1539            is really leaf before changing cl->un.leaf ! */
1540         if (!cl->level) {
1541                 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum;
1542                 if (!hopt->quantum && cl->un.leaf.quantum < 1000) {
1543                         printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid);
1544                         cl->un.leaf.quantum = 1000;
1545                 }
1546                 if (!hopt->quantum && cl->un.leaf.quantum > 200000) {
1547                         printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid);
1548                         cl->un.leaf.quantum = 200000;
1549                 }
1550                 if (hopt->quantum)
1551                         cl->un.leaf.quantum = hopt->quantum;
1552                 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO)
1553                         cl->un.leaf.prio = TC_HTB_NUMPRIO - 1;
1554         }
1555
1556         cl->buffer = hopt->buffer;
1557         cl->cbuffer = hopt->cbuffer;
1558         if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab;
1559         if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab;
1560         sch_tree_unlock(sch);
1561
1562         *arg = (unsigned long)cl;
1563         return 0;
1564
1565 failure:
1566         if (rtab) qdisc_put_rtab(rtab);
1567         if (ctab) qdisc_put_rtab(ctab);
1568         return err;
1569 }
1570
1571 static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1572 {
1573         struct htb_sched *q = (struct htb_sched *)sch->data;
1574         struct htb_class *cl = (struct htb_class *)arg;
1575         struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
1576         HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl);
1577         return fl;
1578 }
1579
1580 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1581         u32 classid)
1582 {
1583         struct htb_sched *q = (struct htb_sched *)sch->data;
1584         struct htb_class *cl = htb_find (classid,sch);
1585         HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt);
1586         /*if (cl && !cl->level) return 0;
1587           The line above used to be there to prevent attaching filters to 
1588           leaves. But at least tc_index filter uses this just to get class 
1589           for other reasons so that we have to allow for it.
1590           ----
1591           19.6.2002 As Werner explained it is ok - bind filter is just
1592           another way to "lock" the class - unlike "get" this lock can
1593           be broken by class during destroy IIUC.
1594          */
1595         if (cl) 
1596                 cl->filter_cnt++; 
1597         else 
1598                 q->filter_cnt++;
1599         return (unsigned long)cl;
1600 }
1601
1602 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1603 {
1604         struct htb_sched *q = (struct htb_sched *)sch->data;
1605         struct htb_class *cl = (struct htb_class *)arg;
1606         HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt);
1607         if (cl) 
1608                 cl->filter_cnt--; 
1609         else 
1610                 q->filter_cnt--;
1611 }
1612
1613 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1614 {
1615         struct htb_sched *q = (struct htb_sched *)sch->data;
1616         int i;
1617
1618         if (arg->stop)
1619                 return;
1620
1621         for (i = 0; i < HTB_HSIZE; i++) {
1622                 struct list_head *p;
1623                 list_for_each (p,q->hash+i) {
1624                         struct htb_class *cl = list_entry(p,struct htb_class,hlist);
1625                         if (arg->count < arg->skip) {
1626                                 arg->count++;
1627                                 continue;
1628                         }
1629                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1630                                 arg->stop = 1;
1631                                 return;
1632                         }
1633                         arg->count++;
1634                 }
1635         }
1636 }
1637
1638 static struct Qdisc_class_ops htb_class_ops = {
1639         .graft          =       htb_graft,
1640         .leaf           =       htb_leaf,
1641         .get            =       htb_get,
1642         .put            =       htb_put,
1643         .change         =       htb_change_class,
1644         .delete         =       htb_delete,
1645         .walk           =       htb_walk,
1646         .tcf_chain      =       htb_find_tcf,
1647         .bind_tcf       =       htb_bind_filter,
1648         .unbind_tcf     =       htb_unbind_filter,
1649         .dump           =       htb_dump_class,
1650 };
1651
1652 static struct Qdisc_ops htb_qdisc_ops = {
1653         .next           =       NULL,
1654         .cl_ops         =       &htb_class_ops,
1655         .id             =       "htb",
1656         .priv_size      =       sizeof(struct htb_sched),
1657         .enqueue        =       htb_enqueue,
1658         .dequeue        =       htb_dequeue,
1659         .requeue        =       htb_requeue,
1660         .drop           =       htb_drop,
1661         .init           =       htb_init,
1662         .reset          =       htb_reset,
1663         .destroy        =       htb_destroy,
1664         .change         =       NULL /* htb_change */,
1665         .dump           =       htb_dump,
1666         .owner          =       THIS_MODULE,
1667 };
1668
1669 static int __init htb_module_init(void)
1670 {
1671     return register_qdisc(&htb_qdisc_ops);
1672 }
1673 static void __exit htb_module_exit(void) 
1674 {
1675     unregister_qdisc(&htb_qdisc_ops);
1676 }
1677 module_init(htb_module_init)
1678 module_exit(htb_module_exit)
1679 MODULE_LICENSE("GPL");