upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68 #include <linux/config.h>
69
70 struct rsvp_head
71 {
72         u32                     tmap[256/32];
73         u32                     hgenerator;
74         u8                      tgenerator;
75         struct rsvp_session     *ht[256];
76 };
77
78 struct rsvp_session
79 {
80         struct rsvp_session     *next;
81         u32                     dst[RSVP_DST_LEN];
82         struct tc_rsvp_gpi      dpi;
83         u8                      protocol;
84         u8                      tunnelid;
85         /* 16 (src,sport) hash slots, and one wildcard source slot */
86         struct rsvp_filter      *ht[16+1];
87 };
88
89
90 struct rsvp_filter
91 {
92         struct rsvp_filter      *next;
93         u32                     src[RSVP_DST_LEN];
94         struct tc_rsvp_gpi      spi;
95         u8                      tunnelhdr;
96
97         struct tcf_result       res;
98 #ifdef CONFIG_NET_CLS_POLICE
99         struct tcf_police       *police;
100 #endif
101
102         u32                     handle;
103         struct rsvp_session     *sess;
104 };
105
106 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
107 {
108         unsigned h = dst[RSVP_DST_LEN-1];
109         h ^= h>>16;
110         h ^= h>>8;
111         return (h ^ protocol ^ tunnelid) & 0xFF;
112 }
113
114 static __inline__ unsigned hash_src(u32 *src)
115 {
116         unsigned h = src[RSVP_DST_LEN-1];
117         h ^= h>>16;
118         h ^= h>>8;
119         h ^= h>>4;
120         return h & 0xF;
121 }
122
123 #ifdef CONFIG_NET_CLS_POLICE
124 #define RSVP_POLICE() \
125 if (f->police) { \
126         int pol_res = tcf_police(skb, f->police); \
127         if (pol_res < 0) continue; \
128         if (pol_res) return pol_res; \
129 }
130 #else
131 #define RSVP_POLICE()
132 #endif
133
134
135 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
136                          struct tcf_result *res)
137 {
138         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
139         struct rsvp_session *s;
140         struct rsvp_filter *f;
141         unsigned h1, h2;
142         u32 *dst, *src;
143         u8 protocol;
144         u8 tunnelid = 0;
145         u8 *xprt;
146 #if RSVP_DST_LEN == 4
147         struct ipv6hdr *nhptr = skb->nh.ipv6h;
148 #else
149         struct iphdr *nhptr = skb->nh.iph;
150 #endif
151
152 restart:
153
154 #if RSVP_DST_LEN == 4
155         src = &nhptr->saddr.s6_addr32[0];
156         dst = &nhptr->daddr.s6_addr32[0];
157         protocol = nhptr->nexthdr;
158         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
159 #else
160         src = &nhptr->saddr;
161         dst = &nhptr->daddr;
162         protocol = nhptr->protocol;
163         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
164         if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
165                 return -1;
166 #endif
167
168         h1 = hash_dst(dst, protocol, tunnelid);
169         h2 = hash_src(src);
170
171         for (s = sht[h1]; s; s = s->next) {
172                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
173                     protocol == s->protocol &&
174                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
175 #if RSVP_DST_LEN == 4
176                     && dst[0] == s->dst[0]
177                     && dst[1] == s->dst[1]
178                     && dst[2] == s->dst[2]
179 #endif
180                     && tunnelid == s->tunnelid) {
181
182                         for (f = s->ht[h2]; f; f = f->next) {
183                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
184                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
185 #if RSVP_DST_LEN == 4
186                                     && src[0] == f->src[0]
187                                     && src[1] == f->src[1]
188                                     && src[2] == f->src[2]
189 #endif
190                                     ) {
191                                         *res = f->res;
192
193                                         RSVP_POLICE();
194
195 matched:
196                                         if (f->tunnelhdr == 0)
197                                                 return 0;
198
199                                         tunnelid = f->res.classid;
200                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
201                                         goto restart;
202                                 }
203                         }
204
205                         /* And wildcard bucket... */
206                         for (f = s->ht[16]; f; f = f->next) {
207                                 *res = f->res;
208                                 RSVP_POLICE();
209                                 goto matched;
210                         }
211                         return -1;
212                 }
213         }
214         return -1;
215 }
216
217 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
218 {
219         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
220         struct rsvp_session *s;
221         struct rsvp_filter *f;
222         unsigned h1 = handle&0xFF;
223         unsigned h2 = (handle>>8)&0xFF;
224
225         if (h2 > 16)
226                 return 0;
227
228         for (s = sht[h1]; s; s = s->next) {
229                 for (f = s->ht[h2]; f; f = f->next) {
230                         if (f->handle == handle)
231                                 return (unsigned long)f;
232                 }
233         }
234         return 0;
235 }
236
237 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
238 {
239 }
240
241 static int rsvp_init(struct tcf_proto *tp)
242 {
243         struct rsvp_head *data;
244
245         data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
246         if (data) {
247                 memset(data, 0, sizeof(struct rsvp_head));
248                 tp->root = data;
249                 return 0;
250         }
251         return -ENOBUFS;
252 }
253
254 static void rsvp_destroy(struct tcf_proto *tp)
255 {
256         struct rsvp_head *data = xchg(&tp->root, NULL);
257         struct rsvp_session **sht;
258         int h1, h2;
259
260         if (data == NULL)
261                 return;
262
263         sht = data->ht;
264
265         for (h1=0; h1<256; h1++) {
266                 struct rsvp_session *s;
267
268                 while ((s = sht[h1]) != NULL) {
269                         sht[h1] = s->next;
270
271                         for (h2=0; h2<=16; h2++) {
272                                 struct rsvp_filter *f;
273
274                                 while ((f = s->ht[h2]) != NULL) {
275                                         s->ht[h2] = f->next;
276                                         tcf_unbind_filter(tp, &f->res);
277 #ifdef CONFIG_NET_CLS_POLICE
278                                         tcf_police_release(f->police,TCA_ACT_UNBIND);
279 #endif
280                                         kfree(f);
281                                 }
282                         }
283                         kfree(s);
284                 }
285         }
286         kfree(data);
287 }
288
289 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
290 {
291         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
292         unsigned h = f->handle;
293         struct rsvp_session **sp;
294         struct rsvp_session *s = f->sess;
295         int i;
296
297         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
298                 if (*fp == f) {
299                         tcf_tree_lock(tp);
300                         *fp = f->next;
301                         tcf_tree_unlock(tp);
302                         tcf_unbind_filter(tp, &f->res);
303 #ifdef CONFIG_NET_CLS_POLICE
304                         tcf_police_release(f->police,TCA_ACT_UNBIND);
305 #endif
306
307                         kfree(f);
308
309                         /* Strip tree */
310
311                         for (i=0; i<=16; i++)
312                                 if (s->ht[i])
313                                         return 0;
314
315                         /* OK, session has no flows */
316                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
317                              *sp; sp = &(*sp)->next) {
318                                 if (*sp == s) {
319                                         tcf_tree_lock(tp);
320                                         *sp = s->next;
321                                         tcf_tree_unlock(tp);
322
323                                         kfree(s);
324                                         return 0;
325                                 }
326                         }
327
328                         return 0;
329                 }
330         }
331         return 0;
332 }
333
334 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
335 {
336         struct rsvp_head *data = tp->root;
337         int i = 0xFFFF;
338
339         while (i-- > 0) {
340                 u32 h;
341                 if ((data->hgenerator += 0x10000) == 0)
342                         data->hgenerator = 0x10000;
343                 h = data->hgenerator|salt;
344                 if (rsvp_get(tp, h) == 0)
345                         return h;
346         }
347         return 0;
348 }
349
350 static int tunnel_bts(struct rsvp_head *data)
351 {
352         int n = data->tgenerator>>5;
353         u32 b = 1<<(data->tgenerator&0x1F);
354         
355         if (data->tmap[n]&b)
356                 return 0;
357         data->tmap[n] |= b;
358         return 1;
359 }
360
361 static void tunnel_recycle(struct rsvp_head *data)
362 {
363         struct rsvp_session **sht = data->ht;
364         u32 tmap[256/32];
365         int h1, h2;
366
367         memset(tmap, 0, sizeof(tmap));
368
369         for (h1=0; h1<256; h1++) {
370                 struct rsvp_session *s;
371                 for (s = sht[h1]; s; s = s->next) {
372                         for (h2=0; h2<=16; h2++) {
373                                 struct rsvp_filter *f;
374
375                                 for (f = s->ht[h2]; f; f = f->next) {
376                                         if (f->tunnelhdr == 0)
377                                                 continue;
378                                         data->tgenerator = f->res.classid;
379                                         tunnel_bts(data);
380                                 }
381                         }
382                 }
383         }
384
385         memcpy(data->tmap, tmap, sizeof(tmap));
386 }
387
388 static u32 gen_tunnel(struct rsvp_head *data)
389 {
390         int i, k;
391
392         for (k=0; k<2; k++) {
393                 for (i=255; i>0; i--) {
394                         if (++data->tgenerator == 0)
395                                 data->tgenerator = 1;
396                         if (tunnel_bts(data))
397                                 return data->tgenerator;
398                 }
399                 tunnel_recycle(data);
400         }
401         return 0;
402 }
403
404 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
405                        u32 handle,
406                        struct rtattr **tca,
407                        unsigned long *arg)
408 {
409         struct rsvp_head *data = tp->root;
410         struct rsvp_filter *f, **fp;
411         struct rsvp_session *s, **sp;
412         struct tc_rsvp_pinfo *pinfo = NULL;
413         struct rtattr *opt = tca[TCA_OPTIONS-1];
414         struct rtattr *tb[TCA_RSVP_MAX];
415         unsigned h1, h2;
416         u32 *dst;
417         int err;
418
419         if (opt == NULL)
420                 return handle ? -EINVAL : 0;
421
422         if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
423                 return -EINVAL;
424
425         if ((f = (struct rsvp_filter*)*arg) != NULL) {
426                 /* Node exists: adjust only classid */
427
428                 if (f->handle != handle && handle)
429                         return -EINVAL;
430                 if (tb[TCA_RSVP_CLASSID-1]) {
431                         f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
432                         tcf_bind_filter(tp, &f->res, base);
433                 }
434 #ifdef CONFIG_NET_CLS_POLICE
435                 if (tb[TCA_RSVP_POLICE-1]) {
436                         err = tcf_change_police(tp, &f->police,
437                                 tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
438                         if (err < 0)
439                                 return err;
440                 }
441 #endif
442                 return 0;
443         }
444
445         /* Now more serious part... */
446         if (handle)
447                 return -EINVAL;
448         if (tb[TCA_RSVP_DST-1] == NULL)
449                 return -EINVAL;
450
451         f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
452         if (f == NULL)
453                 return -ENOBUFS;
454
455         memset(f, 0, sizeof(*f));
456         h2 = 16;
457         if (tb[TCA_RSVP_SRC-1]) {
458                 err = -EINVAL;
459                 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
460                         goto errout;
461                 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
462                 h2 = hash_src(f->src);
463         }
464         if (tb[TCA_RSVP_PINFO-1]) {
465                 err = -EINVAL;
466                 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
467                         goto errout;
468                 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
469                 f->spi = pinfo->spi;
470                 f->tunnelhdr = pinfo->tunnelhdr;
471         }
472         if (tb[TCA_RSVP_CLASSID-1]) {
473                 err = -EINVAL;
474                 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
475                         goto errout;
476                 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
477         }
478
479         err = -EINVAL;
480         if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
481                 goto errout;
482         dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
483         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
484
485         err = -ENOMEM;
486         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
487                 goto errout;
488
489         if (f->tunnelhdr) {
490                 err = -EINVAL;
491                 if (f->res.classid > 255)
492                         goto errout;
493
494                 err = -ENOMEM;
495                 if (f->res.classid == 0 &&
496                     (f->res.classid = gen_tunnel(data)) == 0)
497                         goto errout;
498         }
499
500         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
501                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
502                     pinfo && pinfo->protocol == s->protocol &&
503                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
504 #if RSVP_DST_LEN == 4
505                     && dst[0] == s->dst[0]
506                     && dst[1] == s->dst[1]
507                     && dst[2] == s->dst[2]
508 #endif
509                     && pinfo->tunnelid == s->tunnelid) {
510
511 insert:
512                         /* OK, we found appropriate session */
513
514                         fp = &s->ht[h2];
515
516                         f->sess = s;
517                         if (f->tunnelhdr == 0)
518                                 tcf_bind_filter(tp, &f->res, base);
519 #ifdef CONFIG_NET_CLS_POLICE
520                         if (tb[TCA_RSVP_POLICE-1])
521                                 tcf_change_police(tp, &f->police, tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
522 #endif
523
524                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
525                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
526                                         break;
527                         f->next = *fp;
528                         wmb();
529                         *fp = f;
530
531                         *arg = (unsigned long)f;
532                         return 0;
533                 }
534         }
535
536         /* No session found. Create new one. */
537
538         err = -ENOBUFS;
539         s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
540         if (s == NULL)
541                 goto errout;
542         memset(s, 0, sizeof(*s));
543         memcpy(s->dst, dst, sizeof(s->dst));
544
545         if (pinfo) {
546                 s->dpi = pinfo->dpi;
547                 s->protocol = pinfo->protocol;
548                 s->tunnelid = pinfo->tunnelid;
549         }
550         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
551                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
552                         break;
553         }
554         s->next = *sp;
555         wmb();
556         *sp = s;
557         
558         goto insert;
559
560 errout:
561         if (f)
562                 kfree(f);
563         return err;
564 }
565
566 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
567 {
568         struct rsvp_head *head = tp->root;
569         unsigned h, h1;
570
571         if (arg->stop)
572                 return;
573
574         for (h = 0; h < 256; h++) {
575                 struct rsvp_session *s;
576
577                 for (s = head->ht[h]; s; s = s->next) {
578                         for (h1 = 0; h1 <= 16; h1++) {
579                                 struct rsvp_filter *f;
580
581                                 for (f = s->ht[h1]; f; f = f->next) {
582                                         if (arg->count < arg->skip) {
583                                                 arg->count++;
584                                                 continue;
585                                         }
586                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
587                                                 arg->stop = 1;
588                                                 return;
589                                         }
590                                         arg->count++;
591                                 }
592                         }
593                 }
594         }
595 }
596
597 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
598                      struct sk_buff *skb, struct tcmsg *t)
599 {
600         struct rsvp_filter *f = (struct rsvp_filter*)fh;
601         struct rsvp_session *s;
602         unsigned char    *b = skb->tail;
603         struct rtattr *rta;
604         struct tc_rsvp_pinfo pinfo;
605
606         if (f == NULL)
607                 return skb->len;
608         s = f->sess;
609
610         t->tcm_handle = f->handle;
611
612
613         rta = (struct rtattr*)b;
614         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
615
616         RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
617         pinfo.dpi = s->dpi;
618         pinfo.spi = f->spi;
619         pinfo.protocol = s->protocol;
620         pinfo.tunnelid = s->tunnelid;
621         pinfo.tunnelhdr = f->tunnelhdr;
622         RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
623         if (f->res.classid)
624                 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
625         if (((f->handle>>8)&0xFF) != 16)
626                 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
627 #ifdef CONFIG_NET_CLS_POLICE
628         if (tcf_dump_police(skb, f->police, TCA_RSVP_POLICE) < 0)
629                 goto rtattr_failure;
630 #endif
631
632         rta->rta_len = skb->tail - b;
633 #ifdef CONFIG_NET_CLS_POLICE
634         if (f->police)
635                 if (tcf_police_dump_stats(skb, f->police) < 0)
636                         goto rtattr_failure;
637 #endif
638         return skb->len;
639
640 rtattr_failure:
641         skb_trim(skb, b - skb->data);
642         return -1;
643 }
644
645 static struct tcf_proto_ops RSVP_OPS = {
646         .next           =       NULL,
647         .kind           =       RSVP_ID,
648         .classify       =       rsvp_classify,
649         .init           =       rsvp_init,
650         .destroy        =       rsvp_destroy,
651         .get            =       rsvp_get,
652         .put            =       rsvp_put,
653         .change         =       rsvp_change,
654         .delete         =       rsvp_delete,
655         .walk           =       rsvp_walk,
656         .dump           =       rsvp_dump,
657         .owner          =       THIS_MODULE,
658 };
659
660 static int __init init_rsvp(void)
661 {
662         return register_tcf_proto_ops(&RSVP_OPS);
663 }
664
665 static void __exit exit_rsvp(void) 
666 {
667         unregister_tcf_proto_ops(&RSVP_OPS);
668 }
669
670 module_init(init_rsvp)
671 module_exit(exit_rsvp)