vserver 1.9.5.x5
[linux-2.6.git] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68 #include <linux/config.h>
69
70 struct rsvp_head
71 {
72         u32                     tmap[256/32];
73         u32                     hgenerator;
74         u8                      tgenerator;
75         struct rsvp_session     *ht[256];
76 };
77
78 struct rsvp_session
79 {
80         struct rsvp_session     *next;
81         u32                     dst[RSVP_DST_LEN];
82         struct tc_rsvp_gpi      dpi;
83         u8                      protocol;
84         u8                      tunnelid;
85         /* 16 (src,sport) hash slots, and one wildcard source slot */
86         struct rsvp_filter      *ht[16+1];
87 };
88
89
90 struct rsvp_filter
91 {
92         struct rsvp_filter      *next;
93         u32                     src[RSVP_DST_LEN];
94         struct tc_rsvp_gpi      spi;
95         u8                      tunnelhdr;
96
97         struct tcf_result       res;
98         struct tcf_exts         exts;
99
100         u32                     handle;
101         struct rsvp_session     *sess;
102 };
103
104 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
105 {
106         unsigned h = dst[RSVP_DST_LEN-1];
107         h ^= h>>16;
108         h ^= h>>8;
109         return (h ^ protocol ^ tunnelid) & 0xFF;
110 }
111
112 static __inline__ unsigned hash_src(u32 *src)
113 {
114         unsigned h = src[RSVP_DST_LEN-1];
115         h ^= h>>16;
116         h ^= h>>8;
117         h ^= h>>4;
118         return h & 0xF;
119 }
120
121 static struct tcf_ext_map rsvp_ext_map = {
122         .police = TCA_RSVP_POLICE,
123         .action = TCA_RSVP_ACT
124 };
125
126 #define RSVP_APPLY_RESULT()                             \
127 {                                                       \
128         int r = tcf_exts_exec(skb, &f->exts, res);      \
129         if (r < 0)                                      \
130                 continue;                               \
131         else if (r > 0)                                 \
132                 return r;                               \
133 }
134         
135 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
136                          struct tcf_result *res)
137 {
138         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
139         struct rsvp_session *s;
140         struct rsvp_filter *f;
141         unsigned h1, h2;
142         u32 *dst, *src;
143         u8 protocol;
144         u8 tunnelid = 0;
145         u8 *xprt;
146 #if RSVP_DST_LEN == 4
147         struct ipv6hdr *nhptr = skb->nh.ipv6h;
148 #else
149         struct iphdr *nhptr = skb->nh.iph;
150 #endif
151
152 restart:
153
154 #if RSVP_DST_LEN == 4
155         src = &nhptr->saddr.s6_addr32[0];
156         dst = &nhptr->daddr.s6_addr32[0];
157         protocol = nhptr->nexthdr;
158         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
159 #else
160         src = &nhptr->saddr;
161         dst = &nhptr->daddr;
162         protocol = nhptr->protocol;
163         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
164         if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
165                 return -1;
166 #endif
167
168         h1 = hash_dst(dst, protocol, tunnelid);
169         h2 = hash_src(src);
170
171         for (s = sht[h1]; s; s = s->next) {
172                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
173                     protocol == s->protocol &&
174                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
175 #if RSVP_DST_LEN == 4
176                     && dst[0] == s->dst[0]
177                     && dst[1] == s->dst[1]
178                     && dst[2] == s->dst[2]
179 #endif
180                     && tunnelid == s->tunnelid) {
181
182                         for (f = s->ht[h2]; f; f = f->next) {
183                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
184                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
185 #if RSVP_DST_LEN == 4
186                                     && src[0] == f->src[0]
187                                     && src[1] == f->src[1]
188                                     && src[2] == f->src[2]
189 #endif
190                                     ) {
191                                         *res = f->res;
192                                         RSVP_APPLY_RESULT();
193
194 matched:
195                                         if (f->tunnelhdr == 0)
196                                                 return 0;
197
198                                         tunnelid = f->res.classid;
199                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
200                                         goto restart;
201                                 }
202                         }
203
204                         /* And wildcard bucket... */
205                         for (f = s->ht[16]; f; f = f->next) {
206                                 *res = f->res;
207                                 RSVP_APPLY_RESULT();
208                                 goto matched;
209                         }
210                         return -1;
211                 }
212         }
213         return -1;
214 }
215
216 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
217 {
218         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
219         struct rsvp_session *s;
220         struct rsvp_filter *f;
221         unsigned h1 = handle&0xFF;
222         unsigned h2 = (handle>>8)&0xFF;
223
224         if (h2 > 16)
225                 return 0;
226
227         for (s = sht[h1]; s; s = s->next) {
228                 for (f = s->ht[h2]; f; f = f->next) {
229                         if (f->handle == handle)
230                                 return (unsigned long)f;
231                 }
232         }
233         return 0;
234 }
235
236 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
237 {
238 }
239
240 static int rsvp_init(struct tcf_proto *tp)
241 {
242         struct rsvp_head *data;
243
244         data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
245         if (data) {
246                 memset(data, 0, sizeof(struct rsvp_head));
247                 tp->root = data;
248                 return 0;
249         }
250         return -ENOBUFS;
251 }
252
253 static inline void
254 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
255 {
256         tcf_unbind_filter(tp, &f->res);
257         tcf_exts_destroy(tp, &f->exts);
258         kfree(f);
259 }
260
261 static void rsvp_destroy(struct tcf_proto *tp)
262 {
263         struct rsvp_head *data = xchg(&tp->root, NULL);
264         struct rsvp_session **sht;
265         int h1, h2;
266
267         if (data == NULL)
268                 return;
269
270         sht = data->ht;
271
272         for (h1=0; h1<256; h1++) {
273                 struct rsvp_session *s;
274
275                 while ((s = sht[h1]) != NULL) {
276                         sht[h1] = s->next;
277
278                         for (h2=0; h2<=16; h2++) {
279                                 struct rsvp_filter *f;
280
281                                 while ((f = s->ht[h2]) != NULL) {
282                                         s->ht[h2] = f->next;
283                                         rsvp_delete_filter(tp, f);
284                                 }
285                         }
286                         kfree(s);
287                 }
288         }
289         kfree(data);
290 }
291
292 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
293 {
294         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
295         unsigned h = f->handle;
296         struct rsvp_session **sp;
297         struct rsvp_session *s = f->sess;
298         int i;
299
300         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
301                 if (*fp == f) {
302                         tcf_tree_lock(tp);
303                         *fp = f->next;
304                         tcf_tree_unlock(tp);
305                         rsvp_delete_filter(tp, f);
306
307                         /* Strip tree */
308
309                         for (i=0; i<=16; i++)
310                                 if (s->ht[i])
311                                         return 0;
312
313                         /* OK, session has no flows */
314                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
315                              *sp; sp = &(*sp)->next) {
316                                 if (*sp == s) {
317                                         tcf_tree_lock(tp);
318                                         *sp = s->next;
319                                         tcf_tree_unlock(tp);
320
321                                         kfree(s);
322                                         return 0;
323                                 }
324                         }
325
326                         return 0;
327                 }
328         }
329         return 0;
330 }
331
332 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
333 {
334         struct rsvp_head *data = tp->root;
335         int i = 0xFFFF;
336
337         while (i-- > 0) {
338                 u32 h;
339                 if ((data->hgenerator += 0x10000) == 0)
340                         data->hgenerator = 0x10000;
341                 h = data->hgenerator|salt;
342                 if (rsvp_get(tp, h) == 0)
343                         return h;
344         }
345         return 0;
346 }
347
348 static int tunnel_bts(struct rsvp_head *data)
349 {
350         int n = data->tgenerator>>5;
351         u32 b = 1<<(data->tgenerator&0x1F);
352         
353         if (data->tmap[n]&b)
354                 return 0;
355         data->tmap[n] |= b;
356         return 1;
357 }
358
359 static void tunnel_recycle(struct rsvp_head *data)
360 {
361         struct rsvp_session **sht = data->ht;
362         u32 tmap[256/32];
363         int h1, h2;
364
365         memset(tmap, 0, sizeof(tmap));
366
367         for (h1=0; h1<256; h1++) {
368                 struct rsvp_session *s;
369                 for (s = sht[h1]; s; s = s->next) {
370                         for (h2=0; h2<=16; h2++) {
371                                 struct rsvp_filter *f;
372
373                                 for (f = s->ht[h2]; f; f = f->next) {
374                                         if (f->tunnelhdr == 0)
375                                                 continue;
376                                         data->tgenerator = f->res.classid;
377                                         tunnel_bts(data);
378                                 }
379                         }
380                 }
381         }
382
383         memcpy(data->tmap, tmap, sizeof(tmap));
384 }
385
386 static u32 gen_tunnel(struct rsvp_head *data)
387 {
388         int i, k;
389
390         for (k=0; k<2; k++) {
391                 for (i=255; i>0; i--) {
392                         if (++data->tgenerator == 0)
393                                 data->tgenerator = 1;
394                         if (tunnel_bts(data))
395                                 return data->tgenerator;
396                 }
397                 tunnel_recycle(data);
398         }
399         return 0;
400 }
401
402 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
403                        u32 handle,
404                        struct rtattr **tca,
405                        unsigned long *arg)
406 {
407         struct rsvp_head *data = tp->root;
408         struct rsvp_filter *f, **fp;
409         struct rsvp_session *s, **sp;
410         struct tc_rsvp_pinfo *pinfo = NULL;
411         struct rtattr *opt = tca[TCA_OPTIONS-1];
412         struct rtattr *tb[TCA_RSVP_MAX];
413         struct tcf_exts e;
414         unsigned h1, h2;
415         u32 *dst;
416         int err;
417
418         if (opt == NULL)
419                 return handle ? -EINVAL : 0;
420
421         if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
422                 return -EINVAL;
423
424         err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
425         if (err < 0)
426                 return err;
427
428         if ((f = (struct rsvp_filter*)*arg) != NULL) {
429                 /* Node exists: adjust only classid */
430
431                 if (f->handle != handle && handle)
432                         goto errout2;
433                 if (tb[TCA_RSVP_CLASSID-1]) {
434                         f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
435                         tcf_bind_filter(tp, &f->res, base);
436                 }
437
438                 tcf_exts_change(tp, &f->exts, &e);
439                 return 0;
440         }
441
442         /* Now more serious part... */
443         err = -EINVAL;
444         if (handle)
445                 goto errout2;
446         if (tb[TCA_RSVP_DST-1] == NULL)
447                 goto errout2;
448
449         err = -ENOBUFS;
450         f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
451         if (f == NULL)
452                 goto errout2;
453
454         memset(f, 0, sizeof(*f));
455         h2 = 16;
456         if (tb[TCA_RSVP_SRC-1]) {
457                 err = -EINVAL;
458                 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
459                         goto errout;
460                 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
461                 h2 = hash_src(f->src);
462         }
463         if (tb[TCA_RSVP_PINFO-1]) {
464                 err = -EINVAL;
465                 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
466                         goto errout;
467                 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
468                 f->spi = pinfo->spi;
469                 f->tunnelhdr = pinfo->tunnelhdr;
470         }
471         if (tb[TCA_RSVP_CLASSID-1]) {
472                 err = -EINVAL;
473                 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
474                         goto errout;
475                 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
476         }
477
478         err = -EINVAL;
479         if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
480                 goto errout;
481         dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
482         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
483
484         err = -ENOMEM;
485         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
486                 goto errout;
487
488         if (f->tunnelhdr) {
489                 err = -EINVAL;
490                 if (f->res.classid > 255)
491                         goto errout;
492
493                 err = -ENOMEM;
494                 if (f->res.classid == 0 &&
495                     (f->res.classid = gen_tunnel(data)) == 0)
496                         goto errout;
497         }
498
499         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
500                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
501                     pinfo && pinfo->protocol == s->protocol &&
502                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
503 #if RSVP_DST_LEN == 4
504                     && dst[0] == s->dst[0]
505                     && dst[1] == s->dst[1]
506                     && dst[2] == s->dst[2]
507 #endif
508                     && pinfo->tunnelid == s->tunnelid) {
509
510 insert:
511                         /* OK, we found appropriate session */
512
513                         fp = &s->ht[h2];
514
515                         f->sess = s;
516                         if (f->tunnelhdr == 0)
517                                 tcf_bind_filter(tp, &f->res, base);
518
519                         tcf_exts_change(tp, &f->exts, &e);
520
521                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
522                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
523                                         break;
524                         f->next = *fp;
525                         wmb();
526                         *fp = f;
527
528                         *arg = (unsigned long)f;
529                         return 0;
530                 }
531         }
532
533         /* No session found. Create new one. */
534
535         err = -ENOBUFS;
536         s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
537         if (s == NULL)
538                 goto errout;
539         memset(s, 0, sizeof(*s));
540         memcpy(s->dst, dst, sizeof(s->dst));
541
542         if (pinfo) {
543                 s->dpi = pinfo->dpi;
544                 s->protocol = pinfo->protocol;
545                 s->tunnelid = pinfo->tunnelid;
546         }
547         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
548                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
549                         break;
550         }
551         s->next = *sp;
552         wmb();
553         *sp = s;
554         
555         goto insert;
556
557 errout:
558         if (f)
559                 kfree(f);
560 errout2:
561         tcf_exts_destroy(tp, &e);
562         return err;
563 }
564
565 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
566 {
567         struct rsvp_head *head = tp->root;
568         unsigned h, h1;
569
570         if (arg->stop)
571                 return;
572
573         for (h = 0; h < 256; h++) {
574                 struct rsvp_session *s;
575
576                 for (s = head->ht[h]; s; s = s->next) {
577                         for (h1 = 0; h1 <= 16; h1++) {
578                                 struct rsvp_filter *f;
579
580                                 for (f = s->ht[h1]; f; f = f->next) {
581                                         if (arg->count < arg->skip) {
582                                                 arg->count++;
583                                                 continue;
584                                         }
585                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
586                                                 arg->stop = 1;
587                                                 return;
588                                         }
589                                         arg->count++;
590                                 }
591                         }
592                 }
593         }
594 }
595
596 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
597                      struct sk_buff *skb, struct tcmsg *t)
598 {
599         struct rsvp_filter *f = (struct rsvp_filter*)fh;
600         struct rsvp_session *s;
601         unsigned char    *b = skb->tail;
602         struct rtattr *rta;
603         struct tc_rsvp_pinfo pinfo;
604
605         if (f == NULL)
606                 return skb->len;
607         s = f->sess;
608
609         t->tcm_handle = f->handle;
610
611
612         rta = (struct rtattr*)b;
613         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
614
615         RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
616         pinfo.dpi = s->dpi;
617         pinfo.spi = f->spi;
618         pinfo.protocol = s->protocol;
619         pinfo.tunnelid = s->tunnelid;
620         pinfo.tunnelhdr = f->tunnelhdr;
621         RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
622         if (f->res.classid)
623                 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
624         if (((f->handle>>8)&0xFF) != 16)
625                 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
626
627         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
628                 goto rtattr_failure;
629
630         rta->rta_len = skb->tail - b;
631
632         if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
633                 goto rtattr_failure;
634         return skb->len;
635
636 rtattr_failure:
637         skb_trim(skb, b - skb->data);
638         return -1;
639 }
640
641 static struct tcf_proto_ops RSVP_OPS = {
642         .next           =       NULL,
643         .kind           =       RSVP_ID,
644         .classify       =       rsvp_classify,
645         .init           =       rsvp_init,
646         .destroy        =       rsvp_destroy,
647         .get            =       rsvp_get,
648         .put            =       rsvp_put,
649         .change         =       rsvp_change,
650         .delete         =       rsvp_delete,
651         .walk           =       rsvp_walk,
652         .dump           =       rsvp_dump,
653         .owner          =       THIS_MODULE,
654 };
655
656 static int __init init_rsvp(void)
657 {
658         return register_tcf_proto_ops(&RSVP_OPS);
659 }
660
661 static void __exit exit_rsvp(void) 
662 {
663         unregister_tcf_proto_ops(&RSVP_OPS);
664 }
665
666 module_init(init_rsvp)
667 module_exit(exit_rsvp)