ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / sched / cls_tcindex.c
1 /*
2  * net/sched/cls_tcindex.c      Packet classifier for skb->tc_index
3  *
4  * Written 1998,1999 by Werner Almesberger, EPFL ICA
5  */
6
7 #include <linux/config.h>
8 #include <linux/module.h>
9 #include <linux/types.h>
10 #include <linux/kernel.h>
11 #include <linux/skbuff.h>
12 #include <linux/errno.h>
13 #include <linux/netdevice.h>
14 #include <net/ip.h>
15 #include <net/pkt_sched.h>
16 #include <net/route.h>
17
18
19 /*
20  * Not quite sure if we need all the xchgs Alexey uses when accessing things.
21  * Can always add them later ... :)
22  */
23
24 /*
25  * Passing parameters to the root seems to be done more awkwardly than really
26  * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
27  * verified. FIXME.
28  */
29
30 #define PERFECT_HASH_THRESHOLD  64      /* use perfect hash if not bigger */
31 #define DEFAULT_HASH_SIZE       64      /* optimized for diffserv */
32
33
34 #if 1 /* control */
35 #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
36 #else
37 #define DPRINTK(format,args...)
38 #endif
39
40 #if 0 /* data */
41 #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
42 #else
43 #define D2PRINTK(format,args...)
44 #endif
45
46
47 #define PRIV(tp)        ((struct tcindex_data *) (tp)->root)
48
49
50 struct tcindex_filter_result {
51         struct tcf_police *police;
52         struct tcf_result res;
53 };
54
55 struct tcindex_filter {
56         __u16 key;
57         struct tcindex_filter_result result;
58         struct tcindex_filter *next;
59 };
60
61
62 struct tcindex_data {
63         struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
64         struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
65                                       NULL if unused */
66         __u16 mask;             /* AND key with mask */
67         int shift;              /* shift ANDed key to the right */
68         int hash;               /* hash table size; 0 if undefined */
69         int alloc_hash;         /* allocated size */
70         int fall_through;       /* 0: only classify if explicit match */
71 };
72
73
74 static struct tcindex_filter_result *lookup(struct tcindex_data *p,__u16 key)
75 {
76         struct tcindex_filter *f;
77
78         if (p->perfect)
79                 return p->perfect[key].res.class ? p->perfect+key : NULL;
80         if (!p->h)
81                 return NULL;
82         for (f = p->h[key % p->hash]; f; f = f->next) {
83                 if (f->key == key)
84                         return &f->result;
85         }
86         return NULL;
87 }
88
89
90 static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp,
91                           struct tcf_result *res)
92 {
93         struct tcindex_data *p = PRIV(tp);
94         struct tcindex_filter_result *f;
95
96         D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p);
97
98         f = lookup(p,(skb->tc_index & p->mask) >> p->shift);
99         if (!f) {
100                 if (!p->fall_through)
101                         return -1;
102                 res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle),
103                     (skb->tc_index& p->mask) >> p->shift);
104                 res->class = 0;
105                 D2PRINTK("alg 0x%x\n",res->classid);
106                 return 0;
107         }
108         *res = f->res;
109         D2PRINTK("map 0x%x\n",res->classid);
110 #ifdef CONFIG_NET_CLS_POLICE
111         if (f->police) {
112                 int result;
113
114                 result = tcf_police(skb,f->police);
115                 D2PRINTK("police %d\n",res);
116                 return result;
117         }
118 #endif
119         return 0;
120 }
121
122
123 static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
124 {
125         struct tcindex_data *p = PRIV(tp);
126         struct tcindex_filter_result *r;
127
128         DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle);
129         if (p->perfect && handle >= p->alloc_hash)
130                 return 0;
131         r = lookup(PRIV(tp),handle);
132         return r && r->res.class ? (unsigned long) r : 0;
133 }
134
135
136 static void tcindex_put(struct tcf_proto *tp, unsigned long f)
137 {
138         DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f);
139 }
140
141
142 static int tcindex_init(struct tcf_proto *tp)
143 {
144         struct tcindex_data *p;
145
146         DPRINTK("tcindex_init(tp %p)\n",tp);
147         p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL);
148         if (!p)
149                 return -ENOMEM;
150
151         tp->root = p;
152         p->perfect = NULL;
153         p->h = NULL;
154         p->hash = 0;
155         p->mask = 0xffff;
156         p->shift = 0;
157         p->fall_through = 1;
158         return 0;
159 }
160
161
162 static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
163 {
164         struct tcindex_data *p = PRIV(tp);
165         struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
166         struct tcindex_filter *f = NULL;
167         unsigned long cl;
168
169         DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f);
170         if (p->perfect) {
171                 if (!r->res.class)
172                         return -ENOENT;
173         } else {
174                 int i;
175                 struct tcindex_filter **walk = NULL;
176
177                 for (i = 0; i < p->hash; i++)
178                         for (walk = p->h+i; *walk; walk = &(*walk)->next)
179                                 if (&(*walk)->result == r)
180                                         goto found;
181                 return -ENOENT;
182
183 found:
184                 f = *walk;
185                 tcf_tree_lock(tp); 
186                 *walk = f->next;
187                 tcf_tree_unlock(tp);
188         }
189         cl = __cls_set_class(&r->res.class,0);
190         if (cl)
191                 tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
192 #ifdef CONFIG_NET_CLS_POLICE
193         tcf_police_release(r->police);
194 #endif
195         if (f)
196                 kfree(f);
197         return 0;
198 }
199
200
201 /*
202  * There are no parameters for tcindex_init, so we overload tcindex_change
203  */
204
205
206 static int tcindex_change(struct tcf_proto *tp,unsigned long base,u32 handle,
207     struct rtattr **tca,unsigned long *arg)
208 {
209         struct tcindex_filter_result new_filter_result = {
210                 NULL,           /* no policing */
211                 { 0,0 },        /* no classification */
212         };
213         struct rtattr *opt = tca[TCA_OPTIONS-1];
214         struct rtattr *tb[TCA_TCINDEX_MAX];
215         struct tcindex_data *p = PRIV(tp);
216         struct tcindex_filter *f;
217         struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
218         struct tcindex_filter **walk;
219         int hash,shift;
220         __u16 mask;
221
222         DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
223             "p %p,r %p\n",tp,handle,tca,arg,opt,p,r);
224         if (arg)
225                 DPRINTK("*arg = 0x%lx\n",*arg);
226         if (!opt)
227                 return 0;
228         if (rtattr_parse(tb,TCA_TCINDEX_MAX,RTA_DATA(opt),RTA_PAYLOAD(opt)) < 0)
229                 return -EINVAL;
230         if (!tb[TCA_TCINDEX_HASH-1]) {
231                 hash = p->hash;
232         } else {
233                 if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(int))
234                         return -EINVAL;
235                 hash = *(int *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]);
236         }
237         if (!tb[TCA_TCINDEX_MASK-1]) {
238                 mask = p->mask;
239         } else {
240                 if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(__u16))
241                         return -EINVAL;
242                 mask = *(__u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]);
243         }
244         if (!tb[TCA_TCINDEX_SHIFT-1])
245                 shift = p->shift;
246         else {
247                 if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(__u16))
248                         return -EINVAL;
249                 shift = *(int *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]);
250         }
251         if (p->perfect && hash <= (mask >> shift))
252                 return -EBUSY;
253         if (p->perfect && hash > p->alloc_hash)
254                 return -EBUSY;
255         if (p->h && hash != p->alloc_hash)
256                 return -EBUSY;
257         p->hash = hash;
258         p->mask = mask;
259         p->shift = shift;
260         if (tb[TCA_TCINDEX_FALL_THROUGH-1]) {
261                 if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(int))
262                         return -EINVAL;
263                 p->fall_through =
264                     *(int *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]);
265         }
266         DPRINTK("classid/police %p/%p\n",tb[TCA_TCINDEX_CLASSID-1],
267             tb[TCA_TCINDEX_POLICE-1]);
268         if (!tb[TCA_TCINDEX_CLASSID-1] && !tb[TCA_TCINDEX_POLICE-1])
269                 return 0;
270         if (!hash) {
271                 if ((mask >> shift) < PERFECT_HASH_THRESHOLD) {
272                         p->hash = (mask >> shift)+1;
273                 } else {
274                         p->hash = DEFAULT_HASH_SIZE;
275                 }
276         }
277         if (!p->perfect && !p->h) {
278                 p->alloc_hash = p->hash;
279                 DPRINTK("hash %d mask %d\n",p->hash,p->mask);
280                 if (p->hash > (mask >> shift)) {
281                         p->perfect = kmalloc(p->hash*
282                             sizeof(struct tcindex_filter_result),GFP_KERNEL);
283                         if (!p->perfect)
284                                 return -ENOMEM;
285                         memset(p->perfect, 0,
286                                p->hash * sizeof(struct tcindex_filter_result));
287                 } else {
288                         p->h = kmalloc(p->hash*sizeof(struct tcindex_filter *),
289                             GFP_KERNEL);
290                         if (!p->h)
291                                 return -ENOMEM;
292                         memset(p->h, 0, p->hash*sizeof(struct tcindex_filter *));
293                 }
294         }
295         /*
296          * Note: this could be as restrictive as
297          * if (handle & ~(mask >> shift))
298          * but then, we'd fail handles that may become valid after some
299          * future mask change. While this is extremely unlikely to ever
300          * matter, the check below is safer (and also more
301          * backwards-compatible).
302          */
303         if (p->perfect && handle >= p->alloc_hash)
304                 return -EINVAL;
305         if (p->perfect) {
306                 r = p->perfect+handle;
307         } else {
308                 r = lookup(p,handle);
309                 DPRINTK("r=%p\n",r);
310                 if (!r)
311                         r = &new_filter_result;
312         }
313         DPRINTK("r=%p\n",r);
314         if (tb[TCA_TCINDEX_CLASSID-1]) {
315                 unsigned long cl = cls_set_class(tp,&r->res.class,0);
316
317                 if (cl)
318                         tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
319                 r->res.classid = *(__u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]);
320                 r->res.class = tp->q->ops->cl_ops->bind_tcf(tp->q,base,
321                                                             r->res.classid);
322                 if (!r->res.class) {
323                         r->res.classid = 0;
324                         return -ENOENT;
325                 }
326         }
327 #ifdef CONFIG_NET_CLS_POLICE
328         {
329                 struct tcf_police *police;
330
331                 police = tb[TCA_TCINDEX_POLICE-1] ?
332                     tcf_police_locate(tb[TCA_TCINDEX_POLICE-1],NULL) : NULL;
333                 tcf_tree_lock(tp);
334                 police = xchg(&r->police,police);
335                 tcf_tree_unlock(tp);
336                 tcf_police_release(police);
337         }
338 #endif
339         if (r != &new_filter_result)
340                 return 0;
341         f = kmalloc(sizeof(struct tcindex_filter),GFP_KERNEL);
342         if (!f)
343                 return -ENOMEM;
344         f->key = handle;
345         f->result = new_filter_result;
346         f->next = NULL;
347         for (walk = p->h+(handle % p->hash); *walk; walk = &(*walk)->next)
348                 /* nothing */;
349         wmb();
350         *walk = f;
351         return 0;
352 }
353
354
355 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
356 {
357         struct tcindex_data *p = PRIV(tp);
358         struct tcindex_filter *f,*next;
359         int i;
360
361         DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p);
362         if (p->perfect) {
363                 for (i = 0; i < p->hash; i++) {
364                         if (!p->perfect[i].res.class)
365                                 continue;
366                         if (walker->count >= walker->skip) {
367                                 if (walker->fn(tp,
368                                     (unsigned long) (p->perfect+i), walker)
369                                      < 0) {
370                                         walker->stop = 1;
371                                         return;
372                                 }
373                         }
374                         walker->count++;
375                 }
376         }
377         if (!p->h)
378                 return;
379         for (i = 0; i < p->hash; i++) {
380                 for (f = p->h[i]; f; f = next) {
381                         next = f->next;
382                         if (walker->count >= walker->skip) {
383                                 if (walker->fn(tp,(unsigned long) &f->result,
384                                     walker) < 0) {
385                                         walker->stop = 1;
386                                         return;
387                                 }
388                         }
389                         walker->count++;
390                 }
391         }
392 }
393
394
395 static int tcindex_destroy_element(struct tcf_proto *tp,
396     unsigned long arg, struct tcf_walker *walker)
397 {
398         return tcindex_delete(tp,arg);
399 }
400
401
402 static void tcindex_destroy(struct tcf_proto *tp)
403 {
404         struct tcindex_data *p = PRIV(tp);
405         struct tcf_walker walker;
406
407         DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p);
408         walker.count = 0;
409         walker.skip = 0;
410         walker.fn = &tcindex_destroy_element;
411         tcindex_walk(tp,&walker);
412         if (p->perfect)
413                 kfree(p->perfect);
414         if (p->h)
415                 kfree(p->h);
416         kfree(p);
417         tp->root = NULL;
418 }
419
420
421 static int tcindex_dump(struct tcf_proto *tp, unsigned long fh,
422     struct sk_buff *skb, struct tcmsg *t)
423 {
424         struct tcindex_data *p = PRIV(tp);
425         struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
426         unsigned char *b = skb->tail;
427         struct rtattr *rta;
428
429         DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n",
430             tp,fh,skb,t,p,r,b);
431         DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h);
432         rta = (struct rtattr *) b;
433         RTA_PUT(skb,TCA_OPTIONS,0,NULL);
434         if (!fh) {
435                 t->tcm_handle = ~0; /* whatever ... */
436                 RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash);
437                 RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask);
438                 RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift);
439                 RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through),
440                     &p->fall_through);
441         } else {
442                 if (p->perfect) {
443                         t->tcm_handle = r-p->perfect;
444                 } else {
445                         struct tcindex_filter *f;
446                         int i;
447
448                         t->tcm_handle = 0;
449                         for (i = 0; !t->tcm_handle && i < p->hash; i++) {
450                                 for (f = p->h[i]; !t->tcm_handle && f;
451                                      f = f->next) {
452                                         if (&f->result == r)
453                                                 t->tcm_handle = f->key;
454                                 }
455                         }
456                 }
457                 DPRINTK("handle = %d\n",t->tcm_handle);
458                 if (r->res.class)
459                         RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid);
460 #ifdef CONFIG_NET_CLS_POLICE
461                 if (r->police) {
462                         struct rtattr *p_rta = (struct rtattr *) skb->tail;
463
464                         RTA_PUT(skb,TCA_TCINDEX_POLICE,0,NULL);
465                         if (tcf_police_dump(skb,r->police) < 0)
466                                 goto rtattr_failure;
467                         p_rta->rta_len = skb->tail-(u8 *) p_rta;
468                 }
469 #endif
470         }
471         rta->rta_len = skb->tail-b;
472         return skb->len;
473
474 rtattr_failure:
475         skb_trim(skb, b - skb->data);
476         return -1;
477 }
478
479 static struct tcf_proto_ops cls_tcindex_ops = {
480         .next           =       NULL,
481         .kind           =       "tcindex",
482         .classify       =       tcindex_classify,
483         .init           =       tcindex_init,
484         .destroy        =       tcindex_destroy,
485         .get            =       tcindex_get,
486         .put            =       tcindex_put,
487         .change         =       tcindex_change,
488         .delete         =       tcindex_delete,
489         .walk           =       tcindex_walk,
490         .dump           =       tcindex_dump,
491         .owner          =       THIS_MODULE,
492 };
493
494 static int __init init_tcindex(void)
495 {
496         return register_tcf_proto_ops(&cls_tcindex_ops);
497 }
498
499 static void __exit exit_tcindex(void) 
500 {
501         unregister_tcf_proto_ops(&cls_tcindex_ops);
502 }
503
504 module_init(init_tcindex)
505 module_exit(exit_tcindex)
506 MODULE_LICENSE("GPL");