VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / net / sched / sch_hfsc.c
1 /*
2  * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net>
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * 2003-10-17 - Ported from altq
10  */
11 /*
12  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
13  *
14  * Permission to use, copy, modify, and distribute this software and
15  * its documentation is hereby granted (including for commercial or
16  * for-profit use), provided that both the copyright notice and this
17  * permission notice appear in all copies of the software, derivative
18  * works, or modified versions, and any portions thereof.
19  *
20  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
21  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
22  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
23  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
28  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
29  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
32  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
33  * DAMAGE.
34  *
35  * Carnegie Mellon encourages (but does not require) users of this
36  * software to return any improvements or extensions that they make,
37  * and to grant Carnegie Mellon the rights to redistribute these
38  * changes without encumbrance.
39  */
40 /*
41  * H-FSC is described in Proceedings of SIGCOMM'97,
42  * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
43  * Real-Time and Priority Service"
44  * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
45  *
46  * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
47  * when a class has an upperlimit, the fit-time is computed from the
48  * upperlimit service curve.  the link-sharing scheduler does not schedule
49  * a class whose fit-time exceeds the current time.
50  */
51
52 #include <linux/kernel.h>
53 #include <linux/config.h>
54 #include <linux/module.h>
55 #include <linux/types.h>
56 #include <linux/errno.h>
57 #include <linux/jiffies.h>
58 #include <linux/compiler.h>
59 #include <linux/spinlock.h>
60 #include <linux/skbuff.h>
61 #include <linux/string.h>
62 #include <linux/slab.h>
63 #include <linux/timer.h>
64 #include <linux/list.h>
65 #include <linux/init.h>
66 #include <linux/netdevice.h>
67 #include <linux/rtnetlink.h>
68 #include <linux/pkt_sched.h>
69 #include <net/pkt_sched.h>
70 #include <net/pkt_cls.h>
71 #include <asm/system.h>
72 #include <asm/div64.h>
73
74 #define HFSC_DEBUG 1
75
76 /*
77  * kernel internal service curve representation:
78  *   coordinates are given by 64 bit unsigned integers.
79  *   x-axis: unit is clock count.
80  *   y-axis: unit is byte.
81  *
82  *   The service curve parameters are converted to the internal
83  *   representation. The slope values are scaled to avoid overflow.
84  *   the inverse slope values as well as the y-projection of the 1st
85  *   segment are kept in order to to avoid 64-bit divide operations
86  *   that are expensive on 32-bit architectures.
87  */
88
89 struct internal_sc
90 {
91         u64     sm1;    /* scaled slope of the 1st segment */
92         u64     ism1;   /* scaled inverse-slope of the 1st segment */
93         u64     dx;     /* the x-projection of the 1st segment */
94         u64     dy;     /* the y-projection of the 1st segment */
95         u64     sm2;    /* scaled slope of the 2nd segment */
96         u64     ism2;   /* scaled inverse-slope of the 2nd segment */
97 };
98
99 /* runtime service curve */
100 struct runtime_sc
101 {
102         u64     x;      /* current starting position on x-axis */
103         u64     y;      /* current starting position on y-axis */
104         u64     sm1;    /* scaled slope of the 1st segment */
105         u64     ism1;   /* scaled inverse-slope of the 1st segment */
106         u64     dx;     /* the x-projection of the 1st segment */
107         u64     dy;     /* the y-projection of the 1st segment */
108         u64     sm2;    /* scaled slope of the 2nd segment */
109         u64     ism2;   /* scaled inverse-slope of the 2nd segment */
110 };
111
112 enum hfsc_class_flags
113 {
114         HFSC_RSC = 0x1,
115         HFSC_FSC = 0x2,
116         HFSC_USC = 0x4
117 };
118
119 struct hfsc_class
120 {
121         u32             classid;        /* class id */
122         unsigned int    refcnt;         /* usage count */
123
124         struct tc_stats stats;          /* generic statistics */
125         spinlock_t      *stats_lock;
126         unsigned int    level;          /* class level in hierarchy */
127         struct tcf_proto *filter_list;  /* filter list */
128         unsigned int    filter_cnt;     /* filter count */
129
130         struct hfsc_sched *sched;       /* scheduler data */
131         struct hfsc_class *cl_parent;   /* parent class */
132         struct list_head siblings;      /* sibling classes */
133         struct list_head children;      /* child classes */
134         struct Qdisc    *qdisc;         /* leaf qdisc */
135
136         struct list_head actlist;       /* active children list */
137         struct list_head alist;         /* active children list member */
138         struct list_head ellist;        /* eligible list member */
139         struct list_head hlist;         /* hash list member */
140         struct list_head dlist;         /* drop list member */
141
142         u64     cl_total;               /* total work in bytes */
143         u64     cl_cumul;               /* cumulative work in bytes done by
144                                            real-time criteria */
145
146         u64     cl_d;                   /* deadline*/
147         u64     cl_e;                   /* eligible time */
148         u64     cl_vt;                  /* virtual time */
149         u64     cl_f;                   /* time when this class will fit for
150                                            link-sharing, max(myf, cfmin) */
151         u64     cl_myf;                 /* my fit-time (calculated from this
152                                            class's own upperlimit curve) */
153         u64     cl_myfadj;              /* my fit-time adjustment (to cancel
154                                            history dependence) */
155         u64     cl_cfmin;               /* earliest children's fit-time (used
156                                            with cl_myf to obtain cl_f) */
157         u64     cl_cvtmin;              /* minimal virtual time among the
158                                            children fit for link-sharing
159                                            (monotonic within a period) */
160         u64     cl_vtadj;               /* intra-period cumulative vt
161                                            adjustment */
162         u64     cl_vtoff;               /* inter-period cumulative vt offset */
163         u64     cl_cvtmax;              /* max child's vt in the last period */
164
165         struct internal_sc cl_rsc;      /* internal real-time service curve */
166         struct internal_sc cl_fsc;      /* internal fair service curve */
167         struct internal_sc cl_usc;      /* internal upperlimit service curve */
168         struct runtime_sc cl_deadline;  /* deadline curve */
169         struct runtime_sc cl_eligible;  /* eligible curve */
170         struct runtime_sc cl_virtual;   /* virtual curve */
171         struct runtime_sc cl_ulimit;    /* upperlimit curve */
172
173         unsigned long   cl_flags;       /* which curves are valid */
174         unsigned long   cl_vtperiod;    /* vt period sequence number */
175         unsigned long   cl_parentperiod;/* parent's vt period sequence number*/
176         unsigned long   cl_nactive;     /* number of active children */
177 };
178
179 #define HFSC_HSIZE      16
180
181 struct hfsc_sched
182 {
183         u16     defcls;                         /* default class id */
184         struct hfsc_class root;                 /* root class */
185         struct list_head clhash[HFSC_HSIZE];    /* class hash */
186         struct list_head eligible;              /* eligible list */
187         struct list_head droplist;              /* active leaf class list (for
188                                                    dropping) */
189         struct sk_buff_head requeue;            /* requeued packet */
190         struct timer_list wd_timer;             /* watchdog timer */
191 };
192
193 /*
194  * macros
195  */
196 #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
197 #include <linux/time.h>
198 #undef PSCHED_GET_TIME
199 #define PSCHED_GET_TIME(stamp)                                          \
200 do {                                                                    \
201         struct timeval tv;                                              \
202         do_gettimeofday(&tv);                                           \
203         (stamp) = 1000000ULL * tv.tv_sec + tv.tv_usec;                  \
204 } while (0)
205 #endif
206
207 #if HFSC_DEBUG
208 #define ASSERT(cond)                                                    \
209 do {                                                                    \
210         if (unlikely(!(cond)))                                          \
211                 printk("assertion %s failed at %s:%i (%s)\n",           \
212                        #cond, __FILE__, __LINE__, __FUNCTION__);        \
213 } while (0)
214 #else
215 #define ASSERT(cond)
216 #endif /* HFSC_DEBUG */
217
218 #define HT_INFINITY     0xffffffffffffffffULL   /* infinite time value */
219
220
221 /*
222  * eligible list holds backlogged classes being sorted by their eligible times.
223  * there is one eligible list per hfsc instance.
224  */
225
226 static void
227 ellist_insert(struct hfsc_class *cl)
228 {
229         struct list_head *head = &cl->sched->eligible;
230         struct hfsc_class *p;
231
232         /* check the last entry first */
233         if (list_empty(head) ||
234             ((p = list_entry(head->prev, struct hfsc_class, ellist)) &&
235              p->cl_e <= cl->cl_e)) {
236                 list_add_tail(&cl->ellist, head);
237                 return;
238         }
239
240         list_for_each_entry(p, head, ellist) {
241                 if (cl->cl_e < p->cl_e) {
242                         /* insert cl before p */
243                         list_add_tail(&cl->ellist, &p->ellist);
244                         return;
245                 }
246         }
247         ASSERT(0); /* should not reach here */
248 }
249
250 static inline void
251 ellist_remove(struct hfsc_class *cl)
252 {
253         list_del(&cl->ellist);
254 }
255
256 static void
257 ellist_update(struct hfsc_class *cl)
258 {
259         struct list_head *head = &cl->sched->eligible;
260         struct hfsc_class *p, *last;
261
262         /*
263          * the eligible time of a class increases monotonically.
264          * if the next entry has a larger eligible time, nothing to do.
265          */
266         if (cl->ellist.next == head ||
267             ((p = list_entry(cl->ellist.next, struct hfsc_class, ellist)) &&
268              cl->cl_e <= p->cl_e))
269                 return;
270
271         /* check the last entry */
272         last = list_entry(head->prev, struct hfsc_class, ellist);
273         if (last->cl_e <= cl->cl_e) {
274                 list_move_tail(&cl->ellist, head);
275                 return;
276         }
277
278         /*
279          * the new position must be between the next entry
280          * and the last entry
281          */
282         list_for_each_entry_continue(p, head, ellist) {
283                 if (cl->cl_e < p->cl_e) {
284                         list_move_tail(&cl->ellist, &p->ellist);
285                         return;
286                 }
287         }
288         ASSERT(0); /* should not reach here */
289 }
290
291 /* find the class with the minimum deadline among the eligible classes */
292 static inline struct hfsc_class *
293 ellist_get_mindl(struct list_head *head, u64 cur_time)
294 {
295         struct hfsc_class *p, *cl = NULL;
296
297         list_for_each_entry(p, head, ellist) {
298                 if (p->cl_e > cur_time)
299                         break;
300                 if (cl == NULL || p->cl_d < cl->cl_d)
301                         cl = p;
302         }
303         return cl;
304 }
305
306 /* find the class with minimum eligible time among the eligible classes */
307 static inline struct hfsc_class *
308 ellist_get_minel(struct list_head *head)
309 {
310         if (list_empty(head))
311                 return NULL;
312         return list_entry(head->next, struct hfsc_class, ellist);
313 }
314
315 /*
316  * active children list holds backlogged child classes being sorted
317  * by their virtual time. each intermediate class has one active
318  * children list.
319  */
320 static void
321 actlist_insert(struct hfsc_class *cl)
322 {
323         struct list_head *head = &cl->cl_parent->actlist;
324         struct hfsc_class *p;
325
326         /* check the last entry first */
327         if (list_empty(head) ||
328             ((p = list_entry(head->prev, struct hfsc_class, alist)) &&
329              p->cl_vt <= cl->cl_vt)) {
330                 list_add_tail(&cl->alist, head);
331                 return;
332         }
333
334         list_for_each_entry(p, head, alist) {
335                 if (cl->cl_vt < p->cl_vt) {
336                         /* insert cl before p */
337                         list_add_tail(&cl->alist, &p->alist);
338                         return;
339                 }
340         }
341         ASSERT(0); /* should not reach here */
342 }
343
344 static inline void
345 actlist_remove(struct hfsc_class *cl)
346 {
347         list_del(&cl->alist);
348 }
349
350 static void
351 actlist_update(struct hfsc_class *cl)
352 {
353         struct list_head *head = &cl->cl_parent->actlist;
354         struct hfsc_class *p, *last;
355
356         /*
357          * the virtual time of a class increases monotonically.
358          * if the next entry has a larger virtual time, nothing to do.
359          */
360         if (cl->alist.next == head ||
361             ((p = list_entry(cl->alist.next, struct hfsc_class, alist)) &&
362              cl->cl_vt <= p->cl_vt))
363                 return;
364
365         /* check the last entry */
366         last = list_entry(head->prev, struct hfsc_class, alist);
367         if (last->cl_vt <= cl->cl_vt) {
368                 list_move_tail(&cl->alist, head);
369                 return;
370         }
371
372         /*
373          * the new position must be between the next entry
374          * and the last entry
375          */
376         list_for_each_entry_continue(p, head, alist) {
377                 if (cl->cl_vt < p->cl_vt) {
378                         list_move_tail(&cl->alist, &p->alist);
379                         return;
380                 }
381         }
382         ASSERT(0); /* should not reach here */
383 }
384
385 static inline struct hfsc_class *
386 actlist_firstfit(struct hfsc_class *cl, u64 cur_time)
387 {
388         struct hfsc_class *p;
389
390         list_for_each_entry(p, &cl->actlist, alist) {
391                 if (p->cl_f <= cur_time) {
392                         return p;
393                 }
394         }
395         return NULL;
396 }
397
398 /*
399  * get the leaf class with the minimum vt in the hierarchy
400  */
401 static struct hfsc_class *
402 actlist_get_minvt(struct hfsc_class *cl, u64 cur_time)
403 {
404         /* if root-class's cfmin is bigger than cur_time nothing to do */
405         if (cl->cl_cfmin > cur_time)
406                 return NULL;
407
408         while (cl->level > 0) {
409                 cl = actlist_firstfit(cl, cur_time);
410                 if (cl == NULL)
411                         return NULL;
412                 /*
413                  * update parent's cl_cvtmin.
414                  */
415                 if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
416                         cl->cl_parent->cl_cvtmin = cl->cl_vt;
417         }
418         return cl;
419 }
420
421 /*
422  * service curve support functions
423  *
424  *  external service curve parameters
425  *      m: bps
426  *      d: us
427  *  internal service curve parameters
428  *      sm: (bytes/psched_us) << SM_SHIFT
429  *      ism: (psched_us/byte) << ISM_SHIFT
430  *      dx: psched_us
431  *
432  * Clock source resolution (CONFIG_NET_SCH_CLK_*)
433  *  JIFFIES: for 48<=HZ<=1534 resolution is between 0.63us and 1.27us.
434  *  CPU: resolution is between 0.5us and 1us.
435  *  GETTIMEOFDAY: resolution is exactly 1us.
436  *
437  * sm and ism are scaled in order to keep effective digits.
438  * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective
439  * digits in decimal using the following table.
440  *
441  * Note: We can afford the additional accuracy (altq hfsc keeps at most
442  * 3 effective digits) thanks to the fact that linux clock is bounded
443  * much more tightly.
444  *
445  *  bits/sec      100Kbps     1Mbps     10Mbps     100Mbps    1Gbps
446  *  ------------+-------------------------------------------------------
447  *  bytes/0.5us   6.25e-3    62.5e-3    625e-3     6250e-e    62500e-3
448  *  bytes/us      12.5e-3    125e-3     1250e-3    12500e-3   125000e-3
449  *  bytes/1.27us  15.875e-3  158.75e-3  1587.5e-3  15875e-3   158750e-3
450  *
451  *  0.5us/byte    160        16         1.6        0.16       0.016
452  *  us/byte       80         8          0.8        0.08       0.008
453  *  1.27us/byte   63         6.3        0.63       0.063      0.0063
454  */
455 #define SM_SHIFT        20
456 #define ISM_SHIFT       18
457
458 #define SM_MASK         ((1ULL << SM_SHIFT) - 1)
459 #define ISM_MASK        ((1ULL << ISM_SHIFT) - 1)
460
461 static inline u64
462 seg_x2y(u64 x, u64 sm)
463 {
464         u64 y;
465
466         /*
467          * compute
468          *      y = x * sm >> SM_SHIFT
469          * but divide it for the upper and lower bits to avoid overflow
470          */
471         y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
472         return y;
473 }
474
475 static inline u64
476 seg_y2x(u64 y, u64 ism)
477 {
478         u64 x;
479
480         if (y == 0)
481                 x = 0;
482         else if (ism == HT_INFINITY)
483                 x = HT_INFINITY;
484         else {
485                 x = (y >> ISM_SHIFT) * ism
486                     + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
487         }
488         return x;
489 }
490
491 /* Convert m (bps) into sm (bytes/psched us) */
492 static u64
493 m2sm(u32 m)
494 {
495         u64 sm;
496
497         sm = ((u64)m << SM_SHIFT);
498         sm += PSCHED_JIFFIE2US(HZ) - 1;
499         do_div(sm, PSCHED_JIFFIE2US(HZ));
500         return sm;
501 }
502
503 /* convert m (bps) into ism (psched us/byte) */
504 static u64
505 m2ism(u32 m)
506 {
507         u64 ism;
508
509         if (m == 0)
510                 ism = HT_INFINITY;
511         else {
512                 ism = ((u64)PSCHED_JIFFIE2US(HZ) << ISM_SHIFT);
513                 ism += m - 1;
514                 do_div(ism, m);
515         }
516         return ism;
517 }
518
519 /* convert d (us) into dx (psched us) */
520 static u64
521 d2dx(u32 d)
522 {
523         u64 dx;
524
525         dx = ((u64)d * PSCHED_JIFFIE2US(HZ));
526         dx += 1000000 - 1;
527         do_div(dx, 1000000);
528         return dx;
529 }
530
531 /* convert sm (bytes/psched us) into m (bps) */
532 static u32
533 sm2m(u64 sm)
534 {
535         u64 m;
536
537         m = (sm * PSCHED_JIFFIE2US(HZ)) >> SM_SHIFT;
538         return (u32)m;
539 }
540
541 /* convert dx (psched us) into d (us) */
542 static u32
543 dx2d(u64 dx)
544 {
545         u64 d;
546
547         d = dx * 1000000;
548         do_div(d, PSCHED_JIFFIE2US(HZ));
549         return (u32)d;
550 }
551
552 static void
553 sc2isc(struct tc_service_curve *sc, struct internal_sc *isc)
554 {
555         isc->sm1  = m2sm(sc->m1);
556         isc->ism1 = m2ism(sc->m1);
557         isc->dx   = d2dx(sc->d);
558         isc->dy   = seg_x2y(isc->dx, isc->sm1);
559         isc->sm2  = m2sm(sc->m2);
560         isc->ism2 = m2ism(sc->m2);
561 }
562
563 /*
564  * initialize the runtime service curve with the given internal
565  * service curve starting at (x, y).
566  */
567 static void
568 rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
569 {
570         rtsc->x    = x;
571         rtsc->y    = y;
572         rtsc->sm1  = isc->sm1;
573         rtsc->ism1 = isc->ism1;
574         rtsc->dx   = isc->dx;
575         rtsc->dy   = isc->dy;
576         rtsc->sm2  = isc->sm2;
577         rtsc->ism2 = isc->ism2;
578 }
579
580 /*
581  * calculate the y-projection of the runtime service curve by the
582  * given x-projection value
583  */
584 static u64
585 rtsc_y2x(struct runtime_sc *rtsc, u64 y)
586 {
587         u64 x;
588
589         if (y < rtsc->y)
590                 x = rtsc->x;
591         else if (y <= rtsc->y + rtsc->dy) {
592                 /* x belongs to the 1st segment */
593                 if (rtsc->dy == 0)
594                         x = rtsc->x + rtsc->dx;
595                 else
596                         x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
597         } else {
598                 /* x belongs to the 2nd segment */
599                 x = rtsc->x + rtsc->dx
600                     + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
601         }
602         return x;
603 }
604
605 static u64
606 rtsc_x2y(struct runtime_sc *rtsc, u64 x)
607 {
608         u64 y;
609
610         if (x <= rtsc->x)
611                 y = rtsc->y;
612         else if (x <= rtsc->x + rtsc->dx)
613                 /* y belongs to the 1st segment */
614                 y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
615         else
616                 /* y belongs to the 2nd segment */
617                 y = rtsc->y + rtsc->dy
618                     + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
619         return y;
620 }
621
622 /*
623  * update the runtime service curve by taking the minimum of the current
624  * runtime service curve and the service curve starting at (x, y).
625  */
626 static void
627 rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
628 {
629         u64 y1, y2, dx, dy;
630         u32 dsm;
631
632         if (isc->sm1 <= isc->sm2) {
633                 /* service curve is convex */
634                 y1 = rtsc_x2y(rtsc, x);
635                 if (y1 < y)
636                         /* the current rtsc is smaller */
637                         return;
638                 rtsc->x = x;
639                 rtsc->y = y;
640                 return;
641         }
642
643         /*
644          * service curve is concave
645          * compute the two y values of the current rtsc
646          *      y1: at x
647          *      y2: at (x + dx)
648          */
649         y1 = rtsc_x2y(rtsc, x);
650         if (y1 <= y) {
651                 /* rtsc is below isc, no change to rtsc */
652                 return;
653         }
654
655         y2 = rtsc_x2y(rtsc, x + isc->dx);
656         if (y2 >= y + isc->dy) {
657                 /* rtsc is above isc, replace rtsc by isc */
658                 rtsc->x = x;
659                 rtsc->y = y;
660                 rtsc->dx = isc->dx;
661                 rtsc->dy = isc->dy;
662                 return;
663         }
664
665         /*
666          * the two curves intersect
667          * compute the offsets (dx, dy) using the reverse
668          * function of seg_x2y()
669          *      seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
670          */
671         dx = (y1 - y) << SM_SHIFT;
672         dsm = isc->sm1 - isc->sm2;
673         do_div(dx, dsm);
674         /*
675          * check if (x, y1) belongs to the 1st segment of rtsc.
676          * if so, add the offset.
677          */
678         if (rtsc->x + rtsc->dx > x)
679                 dx += rtsc->x + rtsc->dx - x;
680         dy = seg_x2y(dx, isc->sm1);
681
682         rtsc->x = x;
683         rtsc->y = y;
684         rtsc->dx = dx;
685         rtsc->dy = dy;
686         return;
687 }
688
689 static void
690 init_ed(struct hfsc_class *cl, unsigned int next_len)
691 {
692         u64 cur_time;
693
694         PSCHED_GET_TIME(cur_time);
695
696         /* update the deadline curve */
697         rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
698
699         /*
700          * update the eligible curve.
701          * for concave, it is equal to the deadline curve.
702          * for convex, it is a linear curve with slope m2.
703          */
704         cl->cl_eligible = cl->cl_deadline;
705         if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
706                 cl->cl_eligible.dx = 0;
707                 cl->cl_eligible.dy = 0;
708         }
709
710         /* compute e and d */
711         cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
712         cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
713
714         ellist_insert(cl);
715 }
716
717 static void
718 update_ed(struct hfsc_class *cl, unsigned int next_len)
719 {
720         cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
721         cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
722
723         ellist_update(cl);
724 }
725
726 static inline void
727 update_d(struct hfsc_class *cl, unsigned int next_len)
728 {
729         cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
730 }
731
732 static void
733 update_cfmin(struct hfsc_class *cl)
734 {
735         struct hfsc_class *p;
736         u64 cfmin;
737
738         if (list_empty(&cl->actlist)) {
739                 cl->cl_cfmin = 0;
740                 return;
741         }
742         cfmin = HT_INFINITY;
743         list_for_each_entry(p, &cl->actlist, alist) {
744                 if (p->cl_f == 0) {
745                         cl->cl_cfmin = 0;
746                         return;
747                 }
748                 if (p->cl_f < cfmin)
749                         cfmin = p->cl_f;
750         }
751         cl->cl_cfmin = cfmin;
752 }
753
754 static void
755 init_vf(struct hfsc_class *cl, unsigned int len)
756 {
757         struct hfsc_class *max_cl, *p;
758         u64 vt, f, cur_time;
759         int go_active;
760
761         cur_time = 0;
762         go_active = 1;
763         for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
764                 if (go_active && cl->cl_nactive++ == 0)
765                         go_active = 1;
766                 else
767                         go_active = 0;
768
769                 if (go_active) {
770                         if (!list_empty(&cl->cl_parent->actlist)) {
771                                 max_cl = list_entry(cl->cl_parent->actlist.prev,
772                                                     struct hfsc_class, alist);
773                                 /*
774                                  * set vt to the average of the min and max
775                                  * classes.  if the parent's period didn't
776                                  * change, don't decrease vt of the class.
777                                  */
778                                 vt = max_cl->cl_vt;
779                                 if (cl->cl_parent->cl_cvtmin != 0)
780                                         vt = (cl->cl_parent->cl_cvtmin + vt)/2;
781
782                                 if (cl->cl_parent->cl_vtperiod !=
783                                     cl->cl_parentperiod || vt > cl->cl_vt)
784                                         cl->cl_vt = vt;
785                         } else {
786                                 /*
787                                  * first child for a new parent backlog period.
788                                  * add parent's cvtmax to vtoff of children
789                                  * to make a new vt (vtoff + vt) larger than
790                                  * the vt in the last period for all children.
791                                  */
792                                 vt = cl->cl_parent->cl_cvtmax;
793                                 list_for_each_entry(p, &cl->cl_parent->children,
794                                                                        siblings)
795                                         p->cl_vtoff += vt;
796                                 cl->cl_vt = 0;
797                                 cl->cl_parent->cl_cvtmax = 0;
798                                 cl->cl_parent->cl_cvtmin = 0;
799                         }
800
801                         /* update the virtual curve */
802                         vt = cl->cl_vt + cl->cl_vtoff;
803                         rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt,
804                                                       cl->cl_total);
805                         if (cl->cl_virtual.x == vt) {
806                                 cl->cl_virtual.x -= cl->cl_vtoff;
807                                 cl->cl_vtoff = 0;
808                         }
809                         cl->cl_vtadj = 0;
810
811                         cl->cl_vtperiod++;  /* increment vt period */
812                         cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
813                         if (cl->cl_parent->cl_nactive == 0)
814                                 cl->cl_parentperiod++;
815                         cl->cl_f = 0;
816
817                         actlist_insert(cl);
818
819                         if (cl->cl_flags & HFSC_USC) {
820                                 /* class has upper limit curve */
821                                 if (cur_time == 0)
822                                         PSCHED_GET_TIME(cur_time);
823
824                                 /* update the ulimit curve */
825                                 rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time,
826                                          cl->cl_total);
827                                 /* compute myf */
828                                 cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
829                                                       cl->cl_total);
830                                 cl->cl_myfadj = 0;
831                         }
832                 }
833
834                 f = max(cl->cl_myf, cl->cl_cfmin);
835                 if (f != cl->cl_f) {
836                         cl->cl_f = f;
837                         update_cfmin(cl->cl_parent);
838                 }
839         }
840 }
841
842 static void
843 update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time)
844 {
845         u64 f; /* , myf_bound, delta; */
846         int go_passive = 0;
847
848         if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC)
849                 go_passive = 1;
850
851         for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
852                 cl->cl_total += len;
853
854                 if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0)
855                         continue;
856
857                 if (go_passive && --cl->cl_nactive == 0)
858                         go_passive = 1;
859                 else
860                         go_passive = 0;
861
862                 if (go_passive) {
863                         /* no more active child, going passive */
864
865                         /* update cvtmax of the parent class */
866                         if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
867                                 cl->cl_parent->cl_cvtmax = cl->cl_vt;
868
869                         /* remove this class from the vt list */
870                         actlist_remove(cl);
871
872                         update_cfmin(cl->cl_parent);
873
874                         continue;
875                 }
876
877                 /*
878                  * update vt and f
879                  */
880                 cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
881                             - cl->cl_vtoff + cl->cl_vtadj;
882
883                 /*
884                  * if vt of the class is smaller than cvtmin,
885                  * the class was skipped in the past due to non-fit.
886                  * if so, we need to adjust vtadj.
887                  */
888                 if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
889                         cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
890                         cl->cl_vt = cl->cl_parent->cl_cvtmin;
891                 }
892
893                 /* update the vt list */
894                 actlist_update(cl);
895
896                 if (cl->cl_flags & HFSC_USC) {
897                         cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit,
898                                                               cl->cl_total);
899 #if 0
900                         /*
901                          * This code causes classes to stay way under their
902                          * limit when multiple classes are used at gigabit
903                          * speed. needs investigation. -kaber
904                          */
905                         /*
906                          * if myf lags behind by more than one clock tick
907                          * from the current time, adjust myfadj to prevent
908                          * a rate-limited class from going greedy.
909                          * in a steady state under rate-limiting, myf
910                          * fluctuates within one clock tick.
911                          */
912                         myf_bound = cur_time - PSCHED_JIFFIE2US(1);
913                         if (cl->cl_myf < myf_bound) {
914                                 delta = cur_time - cl->cl_myf;
915                                 cl->cl_myfadj += delta;
916                                 cl->cl_myf += delta;
917                         }
918 #endif
919                 }
920
921                 f = max(cl->cl_myf, cl->cl_cfmin);
922                 if (f != cl->cl_f) {
923                         cl->cl_f = f;
924                         update_cfmin(cl->cl_parent);
925                 }
926         }
927 }
928
929 static void
930 set_active(struct hfsc_class *cl, unsigned int len)
931 {
932         if (cl->cl_flags & HFSC_RSC)
933                 init_ed(cl, len);
934         if (cl->cl_flags & HFSC_FSC)
935                 init_vf(cl, len);
936
937         list_add_tail(&cl->dlist, &cl->sched->droplist);
938 }
939
940 static void
941 set_passive(struct hfsc_class *cl)
942 {
943         if (cl->cl_flags & HFSC_RSC)
944                 ellist_remove(cl);
945
946         list_del(&cl->dlist);
947
948         /*
949          * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
950          * needs to be called explicitly to remove a class from actlist
951          */
952 }
953
954 /*
955  * hack to get length of first packet in queue.
956  */
957 static unsigned int
958 qdisc_peek_len(struct Qdisc *sch)
959 {
960         struct sk_buff *skb;
961         unsigned int len;
962
963         skb = sch->dequeue(sch);
964         if (skb == NULL) {
965                 if (net_ratelimit())
966                         printk("qdisc_peek_len: non work-conserving qdisc ?\n");
967                 return 0;
968         }
969         len = skb->len;
970         if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) {
971                 if (net_ratelimit())
972                         printk("qdisc_peek_len: failed to requeue\n");
973                 return 0;
974         }
975         return len;
976 }
977
978 static void
979 hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
980 {
981         unsigned int len = cl->qdisc->q.qlen;
982
983         qdisc_reset(cl->qdisc);
984         if (len > 0) {
985                 update_vf(cl, 0, 0);
986                 set_passive(cl);
987                 sch->q.qlen -= len;
988         }
989 }
990
991 static void
992 hfsc_adjust_levels(struct hfsc_class *cl)
993 {
994         struct hfsc_class *p;
995         unsigned int level;
996
997         do {
998                 level = 0;
999                 list_for_each_entry(p, &cl->children, siblings) {
1000                         if (p->level > level)
1001                                 level = p->level;
1002                 }
1003                 cl->level = level + 1;
1004         } while ((cl = cl->cl_parent) != NULL);
1005 }
1006
1007 static inline unsigned int
1008 hfsc_hash(u32 h)
1009 {
1010         h ^= h >> 8;
1011         h ^= h >> 4;
1012
1013         return h & (HFSC_HSIZE - 1);
1014 }
1015
1016 static inline struct hfsc_class *
1017 hfsc_find_class(u32 classid, struct Qdisc *sch)
1018 {
1019         struct hfsc_sched *q = qdisc_priv(sch);
1020         struct hfsc_class *cl;
1021
1022         list_for_each_entry(cl, &q->clhash[hfsc_hash(classid)], hlist) {
1023                 if (cl->classid == classid)
1024                         return cl;
1025         }
1026         return NULL;
1027 }
1028
1029 static void
1030 hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc,
1031                 u64 cur_time)
1032 {
1033         sc2isc(rsc, &cl->cl_rsc);
1034         rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul);
1035         cl->cl_eligible = cl->cl_deadline;
1036         if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) {
1037                 cl->cl_eligible.dx = 0;
1038                 cl->cl_eligible.dy = 0;
1039         }
1040         cl->cl_flags |= HFSC_RSC;
1041 }
1042
1043 static void
1044 hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc)
1045 {
1046         sc2isc(fsc, &cl->cl_fsc);
1047         rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
1048         cl->cl_flags |= HFSC_FSC;
1049 }
1050
1051 static void
1052 hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
1053                 u64 cur_time)
1054 {
1055         sc2isc(usc, &cl->cl_usc);
1056         rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total);
1057         cl->cl_flags |= HFSC_USC;
1058 }
1059
1060 static int
1061 hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1062                   struct rtattr **tca, unsigned long *arg)
1063 {
1064         struct hfsc_sched *q = qdisc_priv(sch);
1065         struct hfsc_class *cl = (struct hfsc_class *)*arg;
1066         struct hfsc_class *parent = NULL;
1067         struct rtattr *opt = tca[TCA_OPTIONS-1];
1068         struct rtattr *tb[TCA_HFSC_MAX];
1069         struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL;
1070         u64 cur_time;
1071
1072         if (opt == NULL ||
1073             rtattr_parse(tb, TCA_HFSC_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)))
1074                 return -EINVAL;
1075
1076         if (tb[TCA_HFSC_RSC-1]) {
1077                 if (RTA_PAYLOAD(tb[TCA_HFSC_RSC-1]) < sizeof(*rsc))
1078                         return -EINVAL;
1079                 rsc = RTA_DATA(tb[TCA_HFSC_RSC-1]);
1080                 if (rsc->m1 == 0 && rsc->m2 == 0)
1081                         rsc = NULL;
1082         }
1083
1084         if (tb[TCA_HFSC_FSC-1]) {
1085                 if (RTA_PAYLOAD(tb[TCA_HFSC_FSC-1]) < sizeof(*fsc))
1086                         return -EINVAL;
1087                 fsc = RTA_DATA(tb[TCA_HFSC_FSC-1]);
1088                 if (fsc->m1 == 0 && fsc->m2 == 0)
1089                         fsc = NULL;
1090         }
1091
1092         if (tb[TCA_HFSC_USC-1]) {
1093                 if (RTA_PAYLOAD(tb[TCA_HFSC_USC-1]) < sizeof(*usc))
1094                         return -EINVAL;
1095                 usc = RTA_DATA(tb[TCA_HFSC_USC-1]);
1096                 if (usc->m1 == 0 && usc->m2 == 0)
1097                         usc = NULL;
1098         }
1099
1100         if (cl != NULL) {
1101                 if (parentid) {
1102                         if (cl->cl_parent && cl->cl_parent->classid != parentid)
1103                                 return -EINVAL;
1104                         if (cl->cl_parent == NULL && parentid != TC_H_ROOT)
1105                                 return -EINVAL;
1106                 }
1107                 PSCHED_GET_TIME(cur_time);
1108
1109                 sch_tree_lock(sch);
1110                 if (rsc != NULL)
1111                         hfsc_change_rsc(cl, rsc, cur_time);
1112                 if (fsc != NULL)
1113                         hfsc_change_fsc(cl, fsc);
1114                 if (usc != NULL)
1115                         hfsc_change_usc(cl, usc, cur_time);
1116
1117                 if (cl->qdisc->q.qlen != 0) {
1118                         if (cl->cl_flags & HFSC_RSC)
1119                                 update_ed(cl, qdisc_peek_len(cl->qdisc));
1120                         if (cl->cl_flags & HFSC_FSC)
1121                                 update_vf(cl, 0, cur_time);
1122                 }
1123                 sch_tree_unlock(sch);
1124
1125 #ifdef CONFIG_NET_ESTIMATOR
1126                 if (tca[TCA_RATE-1]) {
1127                         qdisc_kill_estimator(&cl->stats);
1128                         qdisc_new_estimator(&cl->stats, cl->stats_lock,
1129                                             tca[TCA_RATE-1]);
1130                 }
1131 #endif
1132                 return 0;
1133         }
1134
1135         if (parentid == TC_H_ROOT)
1136                 return -EEXIST;
1137
1138         parent = &q->root;
1139         if (parentid) {
1140                 parent = hfsc_find_class(parentid, sch);
1141                 if (parent == NULL)
1142                         return -ENOENT;
1143         }
1144
1145         if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
1146                 return -EINVAL;
1147         if (hfsc_find_class(classid, sch))
1148                 return -EEXIST;
1149
1150         if (rsc == NULL && fsc == NULL)
1151                 return -EINVAL;
1152
1153         cl = kmalloc(sizeof(struct hfsc_class), GFP_KERNEL);
1154         if (cl == NULL)
1155                 return -ENOBUFS;
1156         memset(cl, 0, sizeof(struct hfsc_class));
1157
1158         if (rsc != NULL)
1159                 hfsc_change_rsc(cl, rsc, 0);
1160         if (fsc != NULL)
1161                 hfsc_change_fsc(cl, fsc);
1162         if (usc != NULL)
1163                 hfsc_change_usc(cl, usc, 0);
1164
1165         cl->refcnt    = 1;
1166         cl->classid   = classid;
1167         cl->sched     = q;
1168         cl->cl_parent = parent;
1169         cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1170         if (cl->qdisc == NULL)
1171                 cl->qdisc = &noop_qdisc;
1172         cl->stats_lock = &sch->dev->queue_lock;
1173         INIT_LIST_HEAD(&cl->children);
1174         INIT_LIST_HEAD(&cl->actlist);
1175
1176         sch_tree_lock(sch);
1177         list_add_tail(&cl->hlist, &q->clhash[hfsc_hash(classid)]);
1178         list_add_tail(&cl->siblings, &parent->children);
1179         if (parent->level == 0)
1180                 hfsc_purge_queue(sch, parent);
1181         hfsc_adjust_levels(parent);
1182         sch_tree_unlock(sch);
1183
1184 #ifdef CONFIG_NET_ESTIMATOR
1185         if (tca[TCA_RATE-1])
1186                 qdisc_new_estimator(&cl->stats, cl->stats_lock,
1187                                     tca[TCA_RATE-1]);
1188 #endif
1189         *arg = (unsigned long)cl;
1190         return 0;
1191 }
1192
1193 static void
1194 hfsc_destroy_filters(struct tcf_proto **fl)
1195 {
1196         struct tcf_proto *tp;
1197
1198         while ((tp = *fl) != NULL) {
1199                 *fl = tp->next;
1200                 tcf_destroy(tp);
1201         }
1202 }
1203
1204 static void
1205 hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
1206 {
1207         struct hfsc_sched *q = qdisc_priv(sch);
1208
1209         hfsc_destroy_filters(&cl->filter_list);
1210         qdisc_destroy(cl->qdisc);
1211 #ifdef CONFIG_NET_ESTIMATOR
1212         qdisc_kill_estimator(&cl->stats);
1213 #endif
1214         if (cl != &q->root)
1215                 kfree(cl);
1216 }
1217
1218 static int
1219 hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
1220 {
1221         struct hfsc_sched *q = qdisc_priv(sch);
1222         struct hfsc_class *cl = (struct hfsc_class *)arg;
1223
1224         if (cl->level > 0 || cl->filter_cnt > 0 || cl == &q->root)
1225                 return -EBUSY;
1226
1227         sch_tree_lock(sch);
1228
1229         list_del(&cl->hlist);
1230         list_del(&cl->siblings);
1231         hfsc_adjust_levels(cl->cl_parent);
1232         hfsc_purge_queue(sch, cl);
1233         if (--cl->refcnt == 0)
1234                 hfsc_destroy_class(sch, cl);
1235
1236         sch_tree_unlock(sch);
1237         return 0;
1238 }
1239
1240 static struct hfsc_class *
1241 hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
1242 {
1243         struct hfsc_sched *q = qdisc_priv(sch);
1244         struct hfsc_class *cl;
1245         struct tcf_result res;
1246         struct tcf_proto *tcf;
1247         int result;
1248
1249         if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 &&
1250             (cl = hfsc_find_class(skb->priority, sch)) != NULL)
1251                 if (cl->level == 0)
1252                         return cl;
1253
1254         tcf = q->root.filter_list;
1255         while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
1256 #ifdef CONFIG_NET_CLS_ACT
1257                 int terminal = 0;
1258                 switch (result) {
1259                 case TC_ACT_SHOT: 
1260                         *qres = NET_XMIT_DROP;
1261                         terminal = 1;
1262                         break;
1263                 case TC_ACT_QUEUED:
1264                 case TC_ACT_STOLEN: 
1265                         terminal = 1;
1266                         break;
1267                 case TC_ACT_RECLASSIFY: 
1268                 case TC_ACT_OK:
1269                 case TC_ACT_UNSPEC:
1270                 default:
1271                 break;
1272                 }
1273
1274                 if (terminal) {
1275                         kfree_skb(skb);
1276                         return NULL;
1277                 }
1278 #else
1279 #ifdef CONFIG_NET_CLS_POLICE
1280                 if (result == TC_POLICE_SHOT)
1281                         return NULL;
1282 #endif
1283 #endif
1284                 if ((cl = (struct hfsc_class *)res.class) == NULL) {
1285                         if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
1286                                 break; /* filter selected invalid classid */
1287                 }
1288
1289                 if (cl->level == 0)
1290                         return cl; /* hit leaf class */
1291
1292                 /* apply inner filter chain */
1293                 tcf = cl->filter_list;
1294         }
1295
1296         /* classification failed, try default class */
1297         cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1298         if (cl == NULL || cl->level > 0)
1299                 return NULL;
1300
1301         return cl;
1302 }
1303
1304 static int
1305 hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1306                  struct Qdisc **old)
1307 {
1308         struct hfsc_class *cl = (struct hfsc_class *)arg;
1309
1310         if (cl == NULL)
1311                 return -ENOENT;
1312         if (cl->level > 0)
1313                 return -EINVAL;
1314         if (new == NULL) {
1315                 new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1316                 if (new == NULL)
1317                         new = &noop_qdisc;
1318         }
1319
1320         sch_tree_lock(sch);
1321         hfsc_purge_queue(sch, cl);
1322         *old = xchg(&cl->qdisc, new);
1323         sch_tree_unlock(sch);
1324         return 0;
1325 }
1326
1327 static struct Qdisc *
1328 hfsc_class_leaf(struct Qdisc *sch, unsigned long arg)
1329 {
1330         struct hfsc_class *cl = (struct hfsc_class *)arg;
1331
1332         if (cl != NULL && cl->level == 0)
1333                 return cl->qdisc;
1334
1335         return NULL;
1336 }
1337
1338 static unsigned long
1339 hfsc_get_class(struct Qdisc *sch, u32 classid)
1340 {
1341         struct hfsc_class *cl = hfsc_find_class(classid, sch);
1342
1343         if (cl != NULL)
1344                 cl->refcnt++;
1345
1346         return (unsigned long)cl;
1347 }
1348
1349 static void
1350 hfsc_put_class(struct Qdisc *sch, unsigned long arg)
1351 {
1352         struct hfsc_class *cl = (struct hfsc_class *)arg;
1353
1354         if (--cl->refcnt == 0)
1355                 hfsc_destroy_class(sch, cl);
1356 }
1357
1358 static unsigned long
1359 hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid)
1360 {
1361         struct hfsc_class *p = (struct hfsc_class *)parent;
1362         struct hfsc_class *cl = hfsc_find_class(classid, sch);
1363
1364         if (cl != NULL) {
1365                 if (p != NULL && p->level <= cl->level)
1366                         return 0;
1367                 cl->filter_cnt++;
1368         }
1369
1370         return (unsigned long)cl;
1371 }
1372
1373 static void
1374 hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
1375 {
1376         struct hfsc_class *cl = (struct hfsc_class *)arg;
1377
1378         cl->filter_cnt--;
1379 }
1380
1381 static struct tcf_proto **
1382 hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg)
1383 {
1384         struct hfsc_sched *q = qdisc_priv(sch);
1385         struct hfsc_class *cl = (struct hfsc_class *)arg;
1386
1387         if (cl == NULL)
1388                 cl = &q->root;
1389
1390         return &cl->filter_list;
1391 }
1392
1393 static int
1394 hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
1395 {
1396         struct tc_service_curve tsc;
1397
1398         tsc.m1 = sm2m(sc->sm1);
1399         tsc.d  = dx2d(sc->dx);
1400         tsc.m2 = sm2m(sc->sm2);
1401         RTA_PUT(skb, attr, sizeof(tsc), &tsc);
1402
1403         return skb->len;
1404
1405  rtattr_failure:
1406         return -1;
1407 }
1408
1409 static inline int
1410 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
1411 {
1412         if ((cl->cl_flags & HFSC_RSC) &&
1413             (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0))
1414                 goto rtattr_failure;
1415
1416         if ((cl->cl_flags & HFSC_FSC) &&
1417             (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0))
1418                 goto rtattr_failure;
1419
1420         if ((cl->cl_flags & HFSC_USC) &&
1421             (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0))
1422                 goto rtattr_failure;
1423
1424         return skb->len;
1425
1426  rtattr_failure:
1427         return -1;
1428 }
1429
1430 static inline int
1431 hfsc_dump_stats(struct sk_buff *skb, struct hfsc_class *cl)
1432 {
1433         cl->stats.qlen = cl->qdisc->q.qlen;
1434         if (qdisc_copy_stats(skb, &cl->stats, cl->stats_lock) < 0)
1435                 goto rtattr_failure;
1436
1437         return skb->len;
1438
1439  rtattr_failure:
1440         return -1;
1441 }
1442
1443 static inline int
1444 hfsc_dump_xstats(struct sk_buff *skb, struct hfsc_class *cl)
1445 {
1446         struct tc_hfsc_stats xstats;
1447
1448         xstats.level  = cl->level;
1449         xstats.period = cl->cl_vtperiod;
1450         xstats.work   = cl->cl_total;
1451         xstats.rtwork = cl->cl_cumul;
1452         RTA_PUT(skb, TCA_XSTATS, sizeof(xstats), &xstats);
1453
1454         return skb->len;
1455
1456  rtattr_failure:
1457         return -1;
1458 }
1459
1460 static int
1461 hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
1462                 struct tcmsg *tcm)
1463 {
1464         struct hfsc_class *cl = (struct hfsc_class *)arg;
1465         unsigned char *b = skb->tail;
1466         struct rtattr *rta = (struct rtattr *)b;
1467
1468         tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->classid : TC_H_ROOT;
1469         tcm->tcm_handle = cl->classid;
1470         if (cl->level == 0)
1471                 tcm->tcm_info = cl->qdisc->handle;
1472
1473         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1474         if (hfsc_dump_curves(skb, cl) < 0)
1475                 goto rtattr_failure;
1476         rta->rta_len = skb->tail - b;
1477
1478         if ((hfsc_dump_stats(skb, cl) < 0) ||
1479             (hfsc_dump_xstats(skb, cl) < 0))
1480                 goto rtattr_failure;
1481
1482         return skb->len;
1483
1484  rtattr_failure:
1485         skb_trim(skb, b - skb->data);
1486         return -1;
1487 }
1488
1489 static void
1490 hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1491 {
1492         struct hfsc_sched *q = qdisc_priv(sch);
1493         struct hfsc_class *cl;
1494         unsigned int i;
1495
1496         if (arg->stop)
1497                 return;
1498
1499         for (i = 0; i < HFSC_HSIZE; i++) {
1500                 list_for_each_entry(cl, &q->clhash[i], hlist) {
1501                         if (arg->count < arg->skip) {
1502                                 arg->count++;
1503                                 continue;
1504                         }
1505                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1506                                 arg->stop = 1;
1507                                 return;
1508                         }
1509                         arg->count++;
1510                 }
1511         }
1512 }
1513
1514 static void
1515 hfsc_watchdog(unsigned long arg)
1516 {
1517         struct Qdisc *sch = (struct Qdisc *)arg;
1518
1519         sch->flags &= ~TCQ_F_THROTTLED;
1520         netif_schedule(sch->dev);
1521 }
1522
1523 static void
1524 hfsc_schedule_watchdog(struct Qdisc *sch, u64 cur_time)
1525 {
1526         struct hfsc_sched *q = qdisc_priv(sch);
1527         struct hfsc_class *cl;
1528         u64 next_time = 0;
1529         long delay;
1530
1531         if ((cl = ellist_get_minel(&q->eligible)) != NULL)
1532                 next_time = cl->cl_e;
1533         if (q->root.cl_cfmin != 0) {
1534                 if (next_time == 0 || next_time > q->root.cl_cfmin)
1535                         next_time = q->root.cl_cfmin;
1536         }
1537         ASSERT(next_time != 0);
1538         delay = next_time - cur_time;
1539         delay = PSCHED_US2JIFFIE(delay);
1540
1541         sch->flags |= TCQ_F_THROTTLED;
1542         mod_timer(&q->wd_timer, jiffies + delay);
1543 }
1544
1545 static int
1546 hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt)
1547 {
1548         struct hfsc_sched *q = qdisc_priv(sch);
1549         struct tc_hfsc_qopt *qopt;
1550         unsigned int i;
1551
1552         if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
1553                 return -EINVAL;
1554         qopt = RTA_DATA(opt);
1555
1556         memset(q, 0, sizeof(struct hfsc_sched));
1557         sch->stats_lock = &sch->dev->queue_lock;
1558
1559         q->defcls = qopt->defcls;
1560         for (i = 0; i < HFSC_HSIZE; i++)
1561                 INIT_LIST_HEAD(&q->clhash[i]);
1562         INIT_LIST_HEAD(&q->eligible);
1563         INIT_LIST_HEAD(&q->droplist);
1564         skb_queue_head_init(&q->requeue);
1565
1566         q->root.refcnt  = 1;
1567         q->root.classid = sch->handle;
1568         q->root.sched   = q;
1569         q->root.qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1570         if (q->root.qdisc == NULL)
1571                 q->root.qdisc = &noop_qdisc;
1572         q->root.stats_lock = &sch->dev->queue_lock;
1573         INIT_LIST_HEAD(&q->root.children);
1574         INIT_LIST_HEAD(&q->root.actlist);
1575
1576         list_add(&q->root.hlist, &q->clhash[hfsc_hash(q->root.classid)]);
1577
1578         init_timer(&q->wd_timer);
1579         q->wd_timer.function = hfsc_watchdog;
1580         q->wd_timer.data = (unsigned long)sch;
1581
1582         return 0;
1583 }
1584
1585 static int
1586 hfsc_change_qdisc(struct Qdisc *sch, struct rtattr *opt)
1587 {
1588         struct hfsc_sched *q = qdisc_priv(sch);
1589         struct tc_hfsc_qopt *qopt;
1590
1591         if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
1592                 return -EINVAL;
1593         qopt = RTA_DATA(opt);
1594
1595         sch_tree_lock(sch);
1596         q->defcls = qopt->defcls;
1597         sch_tree_unlock(sch);
1598
1599         return 0;
1600 }
1601
1602 static void
1603 hfsc_reset_class(struct hfsc_class *cl)
1604 {
1605         cl->cl_total        = 0;
1606         cl->cl_cumul        = 0;
1607         cl->cl_d            = 0;
1608         cl->cl_e            = 0;
1609         cl->cl_vt           = 0;
1610         cl->cl_vtadj        = 0;
1611         cl->cl_vtoff        = 0;
1612         cl->cl_cvtmin       = 0;
1613         cl->cl_cvtmax       = 0;
1614         cl->cl_vtperiod     = 0;
1615         cl->cl_parentperiod = 0;
1616         cl->cl_f            = 0;
1617         cl->cl_myf          = 0;
1618         cl->cl_myfadj       = 0;
1619         cl->cl_cfmin        = 0;
1620         cl->cl_nactive      = 0;
1621         INIT_LIST_HEAD(&cl->actlist);
1622         qdisc_reset(cl->qdisc);
1623
1624         if (cl->cl_flags & HFSC_RSC)
1625                 rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0);
1626         if (cl->cl_flags & HFSC_FSC)
1627                 rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0);
1628         if (cl->cl_flags & HFSC_USC)
1629                 rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0);
1630 }
1631
1632 static void
1633 hfsc_reset_qdisc(struct Qdisc *sch)
1634 {
1635         struct hfsc_sched *q = qdisc_priv(sch);
1636         struct hfsc_class *cl;
1637         unsigned int i;
1638
1639         for (i = 0; i < HFSC_HSIZE; i++) {
1640                 list_for_each_entry(cl, &q->clhash[i], hlist)
1641                         hfsc_reset_class(cl);
1642         }
1643         __skb_queue_purge(&q->requeue);
1644         INIT_LIST_HEAD(&q->eligible);
1645         INIT_LIST_HEAD(&q->droplist);
1646         del_timer(&q->wd_timer);
1647         sch->flags &= ~TCQ_F_THROTTLED;
1648         sch->q.qlen = 0;
1649 }
1650
1651 static void
1652 hfsc_destroy_qdisc(struct Qdisc *sch)
1653 {
1654         struct hfsc_sched *q = qdisc_priv(sch);
1655         struct hfsc_class *cl, *next;
1656         unsigned int i;
1657
1658         for (i = 0; i < HFSC_HSIZE; i++) {
1659                 list_for_each_entry_safe(cl, next, &q->clhash[i], hlist)
1660                         hfsc_destroy_class(sch, cl);
1661         }
1662         __skb_queue_purge(&q->requeue);
1663         del_timer(&q->wd_timer);
1664 }
1665
1666 static int
1667 hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
1668 {
1669         struct hfsc_sched *q = qdisc_priv(sch);
1670         unsigned char *b = skb->tail;
1671         struct tc_hfsc_qopt qopt;
1672
1673         qopt.defcls = q->defcls;
1674         RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
1675
1676         sch->stats.qlen = sch->q.qlen;
1677         if (qdisc_copy_stats(skb, &sch->stats, sch->stats_lock) < 0)
1678                 goto rtattr_failure;
1679
1680         return skb->len;
1681
1682  rtattr_failure:
1683         skb_trim(skb, b - skb->data);
1684         return -1;
1685 }
1686
1687 static int
1688 hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1689 {
1690         int ret = NET_XMIT_SUCCESS;
1691         struct hfsc_class *cl = hfsc_classify(skb, sch, &ret);
1692         unsigned int len = skb->len;
1693         int err;
1694
1695
1696 #ifdef CONFIG_NET_CLS_ACT
1697         if (cl == NULL) {
1698                 if (NET_XMIT_DROP == ret) {
1699                         sch->stats.drops++;
1700                 }
1701                 return ret;
1702         }
1703 #else
1704         if (cl == NULL) {
1705                 kfree_skb(skb);
1706                 sch->stats.drops++;
1707                 return NET_XMIT_DROP;
1708         }
1709 #endif
1710
1711         err = cl->qdisc->enqueue(skb, cl->qdisc);
1712         if (unlikely(err != NET_XMIT_SUCCESS)) {
1713                 cl->stats.drops++;
1714                 sch->stats.drops++;
1715                 return err;
1716         }
1717
1718         if (cl->qdisc->q.qlen == 1)
1719                 set_active(cl, len);
1720
1721         cl->stats.packets++;
1722         cl->stats.bytes += len;
1723         sch->stats.packets++;
1724         sch->stats.bytes += len;
1725         sch->q.qlen++;
1726
1727         return NET_XMIT_SUCCESS;
1728 }
1729
1730 static struct sk_buff *
1731 hfsc_dequeue(struct Qdisc *sch)
1732 {
1733         struct hfsc_sched *q = qdisc_priv(sch);
1734         struct hfsc_class *cl;
1735         struct sk_buff *skb;
1736         u64 cur_time;
1737         unsigned int next_len;
1738         int realtime = 0;
1739
1740         if (sch->q.qlen == 0)
1741                 return NULL;
1742         if ((skb = __skb_dequeue(&q->requeue)))
1743                 goto out;
1744
1745         PSCHED_GET_TIME(cur_time);
1746
1747         /*
1748          * if there are eligible classes, use real-time criteria.
1749          * find the class with the minimum deadline among
1750          * the eligible classes.
1751          */
1752         if ((cl = ellist_get_mindl(&q->eligible, cur_time)) != NULL) {
1753                 realtime = 1;
1754         } else {
1755                 /*
1756                  * use link-sharing criteria
1757                  * get the class with the minimum vt in the hierarchy
1758                  */
1759                 cl = actlist_get_minvt(&q->root, cur_time);
1760                 if (cl == NULL) {
1761                         sch->stats.overlimits++;
1762                         hfsc_schedule_watchdog(sch, cur_time);
1763                         return NULL;
1764                 }
1765         }
1766
1767         skb = cl->qdisc->dequeue(cl->qdisc);
1768         if (skb == NULL) {
1769                 if (net_ratelimit())
1770                         printk("HFSC: Non-work-conserving qdisc ?\n");
1771                 return NULL;
1772         }
1773
1774         update_vf(cl, skb->len, cur_time);
1775         if (realtime)
1776                 cl->cl_cumul += skb->len;
1777
1778         if (cl->qdisc->q.qlen != 0) {
1779                 if (cl->cl_flags & HFSC_RSC) {
1780                         /* update ed */
1781                         next_len = qdisc_peek_len(cl->qdisc);
1782                         if (realtime)
1783                                 update_ed(cl, next_len);
1784                         else
1785                                 update_d(cl, next_len);
1786                 }
1787         } else {
1788                 /* the class becomes passive */
1789                 set_passive(cl);
1790         }
1791
1792  out:
1793         sch->flags &= ~TCQ_F_THROTTLED;
1794         sch->q.qlen--;
1795
1796         return skb;
1797 }
1798
1799 static int
1800 hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
1801 {
1802         struct hfsc_sched *q = qdisc_priv(sch);
1803
1804         __skb_queue_head(&q->requeue, skb);
1805         sch->q.qlen++;
1806         return NET_XMIT_SUCCESS;
1807 }
1808
1809 static unsigned int
1810 hfsc_drop(struct Qdisc *sch)
1811 {
1812         struct hfsc_sched *q = qdisc_priv(sch);
1813         struct hfsc_class *cl;
1814         unsigned int len;
1815
1816         list_for_each_entry(cl, &q->droplist, dlist) {
1817                 if (cl->qdisc->ops->drop != NULL &&
1818                     (len = cl->qdisc->ops->drop(cl->qdisc)) > 0) {
1819                         if (cl->qdisc->q.qlen == 0) {
1820                                 update_vf(cl, 0, 0);
1821                                 set_passive(cl);
1822                         } else {
1823                                 list_move_tail(&cl->dlist, &q->droplist);
1824                         }
1825                         cl->stats.drops++;
1826                         sch->stats.drops++;
1827                         sch->q.qlen--;
1828                         return len;
1829                 }
1830         }
1831         return 0;
1832 }
1833
1834 static struct Qdisc_class_ops hfsc_class_ops = {
1835         .change         = hfsc_change_class,
1836         .delete         = hfsc_delete_class,
1837         .graft          = hfsc_graft_class,
1838         .leaf           = hfsc_class_leaf,
1839         .get            = hfsc_get_class,
1840         .put            = hfsc_put_class,
1841         .bind_tcf       = hfsc_bind_tcf,
1842         .unbind_tcf     = hfsc_unbind_tcf,
1843         .tcf_chain      = hfsc_tcf_chain,
1844         .dump           = hfsc_dump_class,
1845         .walk           = hfsc_walk
1846 };
1847
1848 static struct Qdisc_ops hfsc_qdisc_ops = {
1849         .id             = "hfsc",
1850         .init           = hfsc_init_qdisc,
1851         .change         = hfsc_change_qdisc,
1852         .reset          = hfsc_reset_qdisc,
1853         .destroy        = hfsc_destroy_qdisc,
1854         .dump           = hfsc_dump_qdisc,
1855         .enqueue        = hfsc_enqueue,
1856         .dequeue        = hfsc_dequeue,
1857         .requeue        = hfsc_requeue,
1858         .drop           = hfsc_drop,
1859         .cl_ops         = &hfsc_class_ops,
1860         .priv_size      = sizeof(struct hfsc_sched),
1861         .owner          = THIS_MODULE
1862 };
1863
1864 static int __init
1865 hfsc_init(void)
1866 {
1867         return register_qdisc(&hfsc_qdisc_ops);
1868 }
1869
1870 static void __exit
1871 hfsc_cleanup(void)
1872 {
1873         unregister_qdisc(&hfsc_qdisc_ops);
1874 }
1875
1876 MODULE_LICENSE("GPL");
1877 module_init(hfsc_init);
1878 module_exit(hfsc_cleanup);