ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68 #include <linux/config.h>
69
70 struct rsvp_head
71 {
72         u32                     tmap[256/32];
73         u32                     hgenerator;
74         u8                      tgenerator;
75         struct rsvp_session     *ht[256];
76 };
77
78 struct rsvp_session
79 {
80         struct rsvp_session     *next;
81         u32                     dst[RSVP_DST_LEN];
82         struct tc_rsvp_gpi      dpi;
83         u8                      protocol;
84         u8                      tunnelid;
85         /* 16 (src,sport) hash slots, and one wildcard source slot */
86         struct rsvp_filter      *ht[16+1];
87 };
88
89
90 struct rsvp_filter
91 {
92         struct rsvp_filter      *next;
93         u32                     src[RSVP_DST_LEN];
94         struct tc_rsvp_gpi      spi;
95         u8                      tunnelhdr;
96
97         struct tcf_result       res;
98 #ifdef CONFIG_NET_CLS_POLICE
99         struct tcf_police       *police;
100 #endif
101
102         u32                     handle;
103         struct rsvp_session     *sess;
104 };
105
106 static __inline__ unsigned hash_dst(u32 *dst, u8 protocol, u8 tunnelid)
107 {
108         unsigned h = dst[RSVP_DST_LEN-1];
109         h ^= h>>16;
110         h ^= h>>8;
111         return (h ^ protocol ^ tunnelid) & 0xFF;
112 }
113
114 static __inline__ unsigned hash_src(u32 *src)
115 {
116         unsigned h = src[RSVP_DST_LEN-1];
117         h ^= h>>16;
118         h ^= h>>8;
119         h ^= h>>4;
120         return h & 0xF;
121 }
122
123 #ifdef CONFIG_NET_CLS_POLICE
124 #define RSVP_POLICE() \
125 if (f->police) { \
126         int pol_res = tcf_police(skb, f->police); \
127         if (pol_res < 0) continue; \
128         if (pol_res) return pol_res; \
129 }
130 #else
131 #define RSVP_POLICE()
132 #endif
133
134
135 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
136                          struct tcf_result *res)
137 {
138         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
139         struct rsvp_session *s;
140         struct rsvp_filter *f;
141         unsigned h1, h2;
142         u32 *dst, *src;
143         u8 protocol;
144         u8 tunnelid = 0;
145         u8 *xprt;
146 #if RSVP_DST_LEN == 4
147         struct ipv6hdr *nhptr = skb->nh.ipv6h;
148 #else
149         struct iphdr *nhptr = skb->nh.iph;
150 #endif
151
152 restart:
153
154 #if RSVP_DST_LEN == 4
155         src = &nhptr->saddr.s6_addr32[0];
156         dst = &nhptr->daddr.s6_addr32[0];
157         protocol = nhptr->nexthdr;
158         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
159 #else
160         src = &nhptr->saddr;
161         dst = &nhptr->daddr;
162         protocol = nhptr->protocol;
163         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
164         if (nhptr->frag_off&__constant_htons(IP_MF|IP_OFFSET))
165                 return -1;
166 #endif
167
168         h1 = hash_dst(dst, protocol, tunnelid);
169         h2 = hash_src(src);
170
171         for (s = sht[h1]; s; s = s->next) {
172                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
173                     protocol == s->protocol &&
174                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
175 #if RSVP_DST_LEN == 4
176                     && dst[0] == s->dst[0]
177                     && dst[1] == s->dst[1]
178                     && dst[2] == s->dst[2]
179 #endif
180                     && tunnelid == s->tunnelid) {
181
182                         for (f = s->ht[h2]; f; f = f->next) {
183                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
184                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
185 #if RSVP_DST_LEN == 4
186                                     && src[0] == f->src[0]
187                                     && src[1] == f->src[1]
188                                     && src[2] == f->src[2]
189 #endif
190                                     ) {
191                                         *res = f->res;
192
193                                         RSVP_POLICE();
194
195 matched:
196                                         if (f->tunnelhdr == 0)
197                                                 return 0;
198
199                                         tunnelid = f->res.classid;
200                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
201                                         goto restart;
202                                 }
203                         }
204
205                         /* And wildcard bucket... */
206                         for (f = s->ht[16]; f; f = f->next) {
207                                 *res = f->res;
208                                 RSVP_POLICE();
209                                 goto matched;
210                         }
211                         return -1;
212                 }
213         }
214         return -1;
215 }
216
217 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
218 {
219         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
220         struct rsvp_session *s;
221         struct rsvp_filter *f;
222         unsigned h1 = handle&0xFF;
223         unsigned h2 = (handle>>8)&0xFF;
224
225         if (h2 > 16)
226                 return 0;
227
228         for (s = sht[h1]; s; s = s->next) {
229                 for (f = s->ht[h2]; f; f = f->next) {
230                         if (f->handle == handle)
231                                 return (unsigned long)f;
232                 }
233         }
234         return 0;
235 }
236
237 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
238 {
239 }
240
241 static int rsvp_init(struct tcf_proto *tp)
242 {
243         struct rsvp_head *data;
244
245         data = kmalloc(sizeof(struct rsvp_head), GFP_KERNEL);
246         if (data) {
247                 memset(data, 0, sizeof(struct rsvp_head));
248                 tp->root = data;
249                 return 0;
250         }
251         return -ENOBUFS;
252 }
253
254 static void rsvp_destroy(struct tcf_proto *tp)
255 {
256         struct rsvp_head *data = xchg(&tp->root, NULL);
257         struct rsvp_session **sht;
258         int h1, h2;
259
260         if (data == NULL)
261                 return;
262
263         sht = data->ht;
264
265         for (h1=0; h1<256; h1++) {
266                 struct rsvp_session *s;
267
268                 while ((s = sht[h1]) != NULL) {
269                         sht[h1] = s->next;
270
271                         for (h2=0; h2<=16; h2++) {
272                                 struct rsvp_filter *f;
273
274                                 while ((f = s->ht[h2]) != NULL) {
275                                         unsigned long cl;
276
277                                         s->ht[h2] = f->next;
278                                         if ((cl = __cls_set_class(&f->res.class, 0)) != 0)
279                                                 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
280 #ifdef CONFIG_NET_CLS_POLICE
281                                         tcf_police_release(f->police);
282 #endif
283                                         kfree(f);
284                                 }
285                         }
286                         kfree(s);
287                 }
288         }
289         kfree(data);
290 }
291
292 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
293 {
294         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
295         unsigned h = f->handle;
296         struct rsvp_session **sp;
297         struct rsvp_session *s = f->sess;
298         int i;
299
300         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
301                 if (*fp == f) {
302                         unsigned long cl;
303
304
305                         tcf_tree_lock(tp);
306                         *fp = f->next;
307                         tcf_tree_unlock(tp);
308
309                         if ((cl = cls_set_class(tp, &f->res.class, 0)) != 0)
310                                 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
311
312 #ifdef CONFIG_NET_CLS_POLICE
313                         tcf_police_release(f->police);
314 #endif
315
316                         kfree(f);
317
318                         /* Strip tree */
319
320                         for (i=0; i<=16; i++)
321                                 if (s->ht[i])
322                                         return 0;
323
324                         /* OK, session has no flows */
325                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
326                              *sp; sp = &(*sp)->next) {
327                                 if (*sp == s) {
328                                         tcf_tree_lock(tp);
329                                         *sp = s->next;
330                                         tcf_tree_unlock(tp);
331
332                                         kfree(s);
333                                         return 0;
334                                 }
335                         }
336
337                         return 0;
338                 }
339         }
340         return 0;
341 }
342
343 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
344 {
345         struct rsvp_head *data = tp->root;
346         int i = 0xFFFF;
347
348         while (i-- > 0) {
349                 u32 h;
350                 if ((data->hgenerator += 0x10000) == 0)
351                         data->hgenerator = 0x10000;
352                 h = data->hgenerator|salt;
353                 if (rsvp_get(tp, h) == 0)
354                         return h;
355         }
356         return 0;
357 }
358
359 static int tunnel_bts(struct rsvp_head *data)
360 {
361         int n = data->tgenerator>>5;
362         u32 b = 1<<(data->tgenerator&0x1F);
363         
364         if (data->tmap[n]&b)
365                 return 0;
366         data->tmap[n] |= b;
367         return 1;
368 }
369
370 static void tunnel_recycle(struct rsvp_head *data)
371 {
372         struct rsvp_session **sht = data->ht;
373         u32 tmap[256/32];
374         int h1, h2;
375
376         memset(tmap, 0, sizeof(tmap));
377
378         for (h1=0; h1<256; h1++) {
379                 struct rsvp_session *s;
380                 for (s = sht[h1]; s; s = s->next) {
381                         for (h2=0; h2<=16; h2++) {
382                                 struct rsvp_filter *f;
383
384                                 for (f = s->ht[h2]; f; f = f->next) {
385                                         if (f->tunnelhdr == 0)
386                                                 continue;
387                                         data->tgenerator = f->res.classid;
388                                         tunnel_bts(data);
389                                 }
390                         }
391                 }
392         }
393
394         memcpy(data->tmap, tmap, sizeof(tmap));
395 }
396
397 static u32 gen_tunnel(struct rsvp_head *data)
398 {
399         int i, k;
400
401         for (k=0; k<2; k++) {
402                 for (i=255; i>0; i--) {
403                         if (++data->tgenerator == 0)
404                                 data->tgenerator = 1;
405                         if (tunnel_bts(data))
406                                 return data->tgenerator;
407                 }
408                 tunnel_recycle(data);
409         }
410         return 0;
411 }
412
413 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
414                        u32 handle,
415                        struct rtattr **tca,
416                        unsigned long *arg)
417 {
418         struct rsvp_head *data = tp->root;
419         struct rsvp_filter *f, **fp;
420         struct rsvp_session *s, **sp;
421         struct tc_rsvp_pinfo *pinfo = NULL;
422         struct rtattr *opt = tca[TCA_OPTIONS-1];
423         struct rtattr *tb[TCA_RSVP_MAX];
424         unsigned h1, h2;
425         u32 *dst;
426         int err;
427
428         if (opt == NULL)
429                 return handle ? -EINVAL : 0;
430
431         if (rtattr_parse(tb, TCA_RSVP_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
432                 return -EINVAL;
433
434         if ((f = (struct rsvp_filter*)*arg) != NULL) {
435                 /* Node exists: adjust only classid */
436
437                 if (f->handle != handle && handle)
438                         return -EINVAL;
439                 if (tb[TCA_RSVP_CLASSID-1]) {
440                         unsigned long cl;
441
442                         f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
443                         cl = cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
444                         if (cl)
445                                 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
446                 }
447 #ifdef CONFIG_NET_CLS_POLICE
448                 if (tb[TCA_RSVP_POLICE-1]) {
449                         struct tcf_police *police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
450
451                         tcf_tree_lock(tp);
452                         police = xchg(&f->police, police);
453                         tcf_tree_unlock(tp);
454
455                         tcf_police_release(police);
456                 }
457 #endif
458                 return 0;
459         }
460
461         /* Now more serious part... */
462         if (handle)
463                 return -EINVAL;
464         if (tb[TCA_RSVP_DST-1] == NULL)
465                 return -EINVAL;
466
467         f = kmalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
468         if (f == NULL)
469                 return -ENOBUFS;
470
471         memset(f, 0, sizeof(*f));
472         h2 = 16;
473         if (tb[TCA_RSVP_SRC-1]) {
474                 err = -EINVAL;
475                 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
476                         goto errout;
477                 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
478                 h2 = hash_src(f->src);
479         }
480         if (tb[TCA_RSVP_PINFO-1]) {
481                 err = -EINVAL;
482                 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
483                         goto errout;
484                 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
485                 f->spi = pinfo->spi;
486                 f->tunnelhdr = pinfo->tunnelhdr;
487         }
488         if (tb[TCA_RSVP_CLASSID-1]) {
489                 err = -EINVAL;
490                 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
491                         goto errout;
492                 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
493         }
494
495         err = -EINVAL;
496         if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
497                 goto errout;
498         dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
499         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
500
501         err = -ENOMEM;
502         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
503                 goto errout;
504
505         if (f->tunnelhdr) {
506                 err = -EINVAL;
507                 if (f->res.classid > 255)
508                         goto errout;
509
510                 err = -ENOMEM;
511                 if (f->res.classid == 0 &&
512                     (f->res.classid = gen_tunnel(data)) == 0)
513                         goto errout;
514         }
515
516         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
517                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
518                     pinfo && pinfo->protocol == s->protocol &&
519                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
520 #if RSVP_DST_LEN == 4
521                     && dst[0] == s->dst[0]
522                     && dst[1] == s->dst[1]
523                     && dst[2] == s->dst[2]
524 #endif
525                     && pinfo->tunnelid == s->tunnelid) {
526
527 insert:
528                         /* OK, we found appropriate session */
529
530                         fp = &s->ht[h2];
531
532                         f->sess = s;
533                         if (f->tunnelhdr == 0)
534                                 cls_set_class(tp, &f->res.class, tp->q->ops->cl_ops->bind_tcf(tp->q, base, f->res.classid));
535 #ifdef CONFIG_NET_CLS_POLICE
536                         if (tb[TCA_RSVP_POLICE-1])
537                                 f->police = tcf_police_locate(tb[TCA_RSVP_POLICE-1], tca[TCA_RATE-1]);
538 #endif
539
540                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
541                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
542                                         break;
543                         f->next = *fp;
544                         wmb();
545                         *fp = f;
546
547                         *arg = (unsigned long)f;
548                         return 0;
549                 }
550         }
551
552         /* No session found. Create new one. */
553
554         err = -ENOBUFS;
555         s = kmalloc(sizeof(struct rsvp_session), GFP_KERNEL);
556         if (s == NULL)
557                 goto errout;
558         memset(s, 0, sizeof(*s));
559         memcpy(s->dst, dst, sizeof(s->dst));
560
561         if (pinfo) {
562                 s->dpi = pinfo->dpi;
563                 s->protocol = pinfo->protocol;
564                 s->tunnelid = pinfo->tunnelid;
565         }
566         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
567                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
568                         break;
569         }
570         s->next = *sp;
571         wmb();
572         *sp = s;
573         
574         goto insert;
575
576 errout:
577         if (f)
578                 kfree(f);
579         return err;
580 }
581
582 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
583 {
584         struct rsvp_head *head = tp->root;
585         unsigned h, h1;
586
587         if (arg->stop)
588                 return;
589
590         for (h = 0; h < 256; h++) {
591                 struct rsvp_session *s;
592
593                 for (s = head->ht[h]; s; s = s->next) {
594                         for (h1 = 0; h1 <= 16; h1++) {
595                                 struct rsvp_filter *f;
596
597                                 for (f = s->ht[h1]; f; f = f->next) {
598                                         if (arg->count < arg->skip) {
599                                                 arg->count++;
600                                                 continue;
601                                         }
602                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
603                                                 arg->stop = 1;
604                                                 break;
605                                         }
606                                         arg->count++;
607                                 }
608                         }
609                 }
610         }
611 }
612
613 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
614                      struct sk_buff *skb, struct tcmsg *t)
615 {
616         struct rsvp_filter *f = (struct rsvp_filter*)fh;
617         struct rsvp_session *s;
618         unsigned char    *b = skb->tail;
619         struct rtattr *rta;
620         struct tc_rsvp_pinfo pinfo;
621
622         if (f == NULL)
623                 return skb->len;
624         s = f->sess;
625
626         t->tcm_handle = f->handle;
627
628
629         rta = (struct rtattr*)b;
630         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
631
632         RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
633         pinfo.dpi = s->dpi;
634         pinfo.spi = f->spi;
635         pinfo.protocol = s->protocol;
636         pinfo.tunnelid = s->tunnelid;
637         pinfo.tunnelhdr = f->tunnelhdr;
638         RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
639         if (f->res.classid)
640                 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
641         if (((f->handle>>8)&0xFF) != 16)
642                 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
643 #ifdef CONFIG_NET_CLS_POLICE
644         if (f->police) {
645                 struct rtattr * p_rta = (struct rtattr*)skb->tail;
646
647                 RTA_PUT(skb, TCA_RSVP_POLICE, 0, NULL);
648
649                 if (tcf_police_dump(skb, f->police) < 0)
650                         goto rtattr_failure;
651
652                 p_rta->rta_len = skb->tail - (u8*)p_rta;
653         }
654 #endif
655
656         rta->rta_len = skb->tail - b;
657 #ifdef CONFIG_NET_CLS_POLICE
658         if (f->police) {
659                 if (qdisc_copy_stats(skb, &f->police->stats))
660                         goto rtattr_failure;
661         }
662 #endif
663         return skb->len;
664
665 rtattr_failure:
666         skb_trim(skb, b - skb->data);
667         return -1;
668 }
669
670 static struct tcf_proto_ops RSVP_OPS = {
671         .next           =       NULL,
672         .kind           =       RSVP_ID,
673         .classify       =       rsvp_classify,
674         .init           =       rsvp_init,
675         .destroy        =       rsvp_destroy,
676         .get            =       rsvp_get,
677         .put            =       rsvp_put,
678         .change         =       rsvp_change,
679         .delete         =       rsvp_delete,
680         .walk           =       rsvp_walk,
681         .dump           =       rsvp_dump,
682         .owner          =       THIS_MODULE,
683 };
684
685 static int __init init_rsvp(void)
686 {
687         return register_tcf_proto_ops(&RSVP_OPS);
688 }
689
690 static void __exit exit_rsvp(void) 
691 {
692         unregister_tcf_proto_ops(&RSVP_OPS);
693 }
694
695 module_init(init_rsvp)
696 module_exit(exit_rsvp)