2 * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 * The filters are packed to hash tables of key nodes
12 * with a set of 32bit key/mask pairs at every node.
13 * Nodes reference next level hash tables etc.
15 * This scheme is the best universal classifier I managed to
16 * invent; it is not super-fast, but it is not slow (provided you
17 * program it correctly), and general enough. And its relative
18 * speed grows as the number of rules becomes larger.
20 * It seems that it represents the best middle point between
21 * speed and manageability both by human and by machine.
23 * It is especially useful for link sharing combined with QoS;
24 * pure RSVP doesn't need such a general approach and can use
25 * much simpler (and faster) schemes, sort of cls_rsvp.c.
27 * JHS: We should remove the CONFIG_NET_CLS_IND from here
28 * eventually when the meta match extension is made available
32 #include <asm/uaccess.h>
33 #include <asm/system.h>
34 #include <asm/bitops.h>
35 #include <linux/config.h>
36 #include <linux/module.h>
37 #include <linux/types.h>
38 #include <linux/kernel.h>
39 #include <linux/sched.h>
40 #include <linux/string.h>
42 #include <linux/socket.h>
43 #include <linux/sockios.h>
45 #include <linux/errno.h>
46 #include <linux/interrupt.h>
47 #include <linux/if_ether.h>
48 #include <linux/inet.h>
49 #include <linux/netdevice.h>
50 #include <linux/etherdevice.h>
51 #include <linux/notifier.h>
52 #include <linux/rtnetlink.h>
54 #include <net/route.h>
55 #include <linux/skbuff.h>
57 #include <net/pkt_sched.h>
62 struct tc_u_knode *next;
64 struct tc_u_hnode *ht_up;
65 #ifdef CONFIG_NET_CLS_ACT
66 struct tc_action *action;
67 #ifdef CONFIG_NET_CLS_IND
71 #ifdef CONFIG_NET_CLS_POLICE
72 struct tcf_police *police;
76 struct tcf_result res;
77 struct tc_u_hnode *ht_down;
78 struct tc_u32_sel sel;
83 struct tc_u_hnode *next;
85 struct tc_u_common *tp_c;
89 struct tc_u_knode *ht[1];
94 struct tc_u_common *next;
95 struct tc_u_hnode *hlist;
101 static struct tc_u_common *u32_list;
103 static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift)
105 unsigned h = (key & sel->hmask)>>fshift;
110 static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
113 struct tc_u_knode *knode;
115 } stack[TC_U32_MAXDEPTH];
117 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
118 u8 *ptr = skb->nh.raw;
119 struct tc_u_knode *n;
130 struct tc_u32_key *key = n->sel.keys;
132 #ifdef CONFIG_CLS_U32_PERF
135 for (i = n->sel.nkeys; i>0; i--, key++) {
137 if ((*(u32*)(ptr+key->off+(off2&key->offmask))^key->val)&key->mask) {
141 #ifdef CONFIG_CLS_U32_PERF
145 if (n->ht_down == NULL) {
147 if (n->sel.flags&TC_U32_TERMINAL) {
150 #ifdef CONFIG_NET_CLS_ACT
151 #ifdef CONFIG_NET_CLS_IND
152 /* yes, i know it sucks but the feature is
153 ** optional dammit! - JHS */
154 if (0 != n->indev[0]) {
155 if (NULL == skb->input_dev) {
159 if (0 != strcmp(n->indev, skb->input_dev->name)) {
166 #ifdef CONFIG_CLS_U32_PERF
170 int pol_res = tcf_action_exec(skb, n->action);
171 if (skb->tc_classid > 0) {
172 res->classid = skb->tc_classid;
180 #ifdef CONFIG_NET_CLS_POLICE
182 int pol_res = tcf_police(skb, n->police);
195 if (sdepth >= TC_U32_MAXDEPTH)
197 stack[sdepth].knode = n;
198 stack[sdepth].ptr = ptr;
204 sel = ht->divisor&u32_hash_fold(*(u32*)(ptr+n->sel.hoff), &n->sel,n->fshift);
206 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
209 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
210 off2 = n->sel.off + 3;
211 if (n->sel.flags&TC_U32_VAROFFSET)
212 off2 += ntohs(n->sel.offmask & *(u16*)(ptr+n->sel.offoff)) >>n->sel.offshift;
215 if (n->sel.flags&TC_U32_EAT) {
226 n = stack[sdepth].knode;
228 ptr = stack[sdepth].ptr;
235 printk("cls_u32: dead loop\n");
239 static __inline__ struct tc_u_hnode *
240 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
242 struct tc_u_hnode *ht;
244 for (ht = tp_c->hlist; ht; ht = ht->next)
245 if (ht->handle == handle)
251 static __inline__ struct tc_u_knode *
252 u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
255 struct tc_u_knode *n = NULL;
257 sel = TC_U32_HASH(handle);
258 if (sel > ht->divisor)
261 for (n = ht->ht[sel]; n; n = n->next)
262 if (n->handle == handle)
269 static unsigned long u32_get(struct tcf_proto *tp, u32 handle)
271 struct tc_u_hnode *ht;
272 struct tc_u_common *tp_c = tp->data;
274 if (TC_U32_HTID(handle) == TC_U32_ROOT)
277 ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle));
282 if (TC_U32_KEY(handle) == 0)
283 return (unsigned long)ht;
285 return (unsigned long)u32_lookup_key(ht, handle);
288 static void u32_put(struct tcf_proto *tp, unsigned long f)
292 static u32 gen_new_htid(struct tc_u_common *tp_c)
297 if (++tp_c->hgenerator == 0x7FF)
298 tp_c->hgenerator = 1;
299 } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
301 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
304 static int u32_init(struct tcf_proto *tp)
306 struct tc_u_hnode *root_ht;
307 struct tc_u_common *tp_c;
309 for (tp_c = u32_list; tp_c; tp_c = tp_c->next)
310 if (tp_c->q == tp->q)
313 root_ht = kmalloc(sizeof(*root_ht), GFP_KERNEL);
317 memset(root_ht, 0, sizeof(*root_ht));
318 root_ht->divisor = 0;
320 root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
323 tp_c = kmalloc(sizeof(*tp_c), GFP_KERNEL);
328 memset(tp_c, 0, sizeof(*tp_c));
330 tp_c->next = u32_list;
335 root_ht->next = tp_c->hlist;
336 tp_c->hlist = root_ht;
337 root_ht->tp_c = tp_c;
344 static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n)
348 if ((cl = __cls_set_class(&n->res.class, 0)) != 0)
349 tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
350 #ifdef CONFIG_NET_CLS_ACT
352 tcf_action_destroy(n->action, TCA_ACT_UNBIND);
355 #ifdef CONFIG_NET_CLS_POLICE
356 tcf_police_release(n->police, TCA_ACT_UNBIND);
360 n->ht_down->refcnt--;
365 static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
367 struct tc_u_knode **kp;
368 struct tc_u_hnode *ht = key->ht_up;
371 for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) {
377 u32_destroy_key(tp, key);
386 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
388 struct tc_u_knode *n;
391 for (h=0; h<=ht->divisor; h++) {
392 while ((n = ht->ht[h]) != NULL) {
395 u32_destroy_key(tp, n);
400 static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
402 struct tc_u_common *tp_c = tp->data;
403 struct tc_u_hnode **hn;
405 BUG_TRAP(!ht->refcnt);
407 u32_clear_hnode(tp, ht);
409 for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) {
421 static void u32_destroy(struct tcf_proto *tp)
423 struct tc_u_common *tp_c = tp->data;
424 struct tc_u_hnode *root_ht = xchg(&tp->root, NULL);
426 BUG_TRAP(root_ht != NULL);
428 if (root_ht && --root_ht->refcnt == 0)
429 u32_destroy_hnode(tp, root_ht);
431 if (--tp_c->refcnt == 0) {
432 struct tc_u_hnode *ht;
433 struct tc_u_common **tp_cp;
435 for (tp_cp = &u32_list; *tp_cp; tp_cp = &(*tp_cp)->next) {
436 if (*tp_cp == tp_c) {
442 for (ht=tp_c->hlist; ht; ht = ht->next)
443 u32_clear_hnode(tp, ht);
445 while ((ht = tp_c->hlist) != NULL) {
446 tp_c->hlist = ht->next;
448 BUG_TRAP(ht->refcnt == 0);
459 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
461 struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
466 if (TC_U32_KEY(ht->handle))
467 return u32_delete_key(tp, (struct tc_u_knode*)ht);
472 if (--ht->refcnt == 0)
473 u32_destroy_hnode(tp, ht);
478 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
480 struct tc_u_knode *n;
483 for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
484 if (i < TC_U32_NODE(n->handle))
485 i = TC_U32_NODE(n->handle);
488 return handle|(i>0xFFF ? 0xFFF : i);
491 static int u32_set_parms(struct Qdisc *q, unsigned long base,
492 struct tc_u_hnode *ht,
493 struct tc_u_knode *n, struct rtattr **tb,
496 #ifdef CONFIG_NET_CLS_ACT
497 struct tc_action *act = NULL;
500 if (tb[TCA_U32_LINK-1]) {
501 u32 handle = *(u32*)RTA_DATA(tb[TCA_U32_LINK-1]);
502 struct tc_u_hnode *ht_down = NULL;
504 if (TC_U32_KEY(handle))
508 ht_down = u32_lookup_ht(ht->tp_c, handle);
516 ht_down = xchg(&n->ht_down, ht_down);
522 if (tb[TCA_U32_CLASSID-1]) {
525 n->res.classid = *(u32*)RTA_DATA(tb[TCA_U32_CLASSID-1]);
527 cl = __cls_set_class(&n->res.class, q->ops->cl_ops->bind_tcf(q, base, n->res.classid));
530 q->ops->cl_ops->unbind_tcf(q, cl);
532 #ifdef CONFIG_NET_CLS_ACT
533 /*backward compatibility */
534 if (tb[TCA_U32_POLICE-1])
536 act = kmalloc(sizeof(*act),GFP_KERNEL);
540 memset(act,0,sizeof(*act));
541 ret = tcf_action_init_1(tb[TCA_U32_POLICE-1], est,act,"police", TCA_ACT_NOREPLACE, TCA_ACT_BIND);
543 tcf_action_destroy(act, TCA_ACT_UNBIND);
546 act->type = TCA_OLD_COMPAT;
549 act = xchg(&n->action, act);
552 tcf_action_destroy(act, TCA_ACT_UNBIND);
556 if(tb[TCA_U32_ACT-1]) {
557 act = kmalloc(sizeof(*act),GFP_KERNEL);
560 memset(act,0,sizeof(*act));
561 ret = tcf_action_init(tb[TCA_U32_ACT-1], est,act,NULL,TCA_ACT_NOREPLACE, TCA_ACT_BIND);
563 tcf_action_destroy(act, TCA_ACT_UNBIND);
568 act = xchg(&n->action, act);
571 tcf_action_destroy(act, TCA_ACT_UNBIND);
574 #ifdef CONFIG_NET_CLS_IND
576 if(tb[TCA_U32_INDEV-1]) {
577 struct rtattr *input_dev = tb[TCA_U32_INDEV-1];
578 if (RTA_PAYLOAD(input_dev) >= IFNAMSIZ) {
579 printk("cls_u32: bad indev name %s\n",(char*)RTA_DATA(input_dev));
580 /* should we clear state first? */
583 sprintf(n->indev, "%s", (char*)RTA_DATA(input_dev));
588 #ifdef CONFIG_NET_CLS_POLICE
589 if (tb[TCA_U32_POLICE-1]) {
590 struct tcf_police *police = tcf_police_locate(tb[TCA_U32_POLICE-1], est);
592 police = xchg(&n->police, police);
594 tcf_police_release(police, TCA_ACT_UNBIND);
602 static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
606 struct tc_u_common *tp_c = tp->data;
607 struct tc_u_hnode *ht;
608 struct tc_u_knode *n;
609 struct tc_u32_sel *s;
610 struct rtattr *opt = tca[TCA_OPTIONS-1];
611 struct rtattr *tb[TCA_U32_MAX];
616 return handle ? -EINVAL : 0;
618 if (rtattr_parse(tb, TCA_U32_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
621 if ((n = (struct tc_u_knode*)*arg) != NULL) {
622 if (TC_U32_KEY(n->handle) == 0)
625 return u32_set_parms(tp->q, base, n->ht_up, n, tb, tca[TCA_RATE-1]);
628 if (tb[TCA_U32_DIVISOR-1]) {
629 unsigned divisor = *(unsigned*)RTA_DATA(tb[TCA_U32_DIVISOR-1]);
631 if (--divisor > 0x100)
633 if (TC_U32_KEY(handle))
636 handle = gen_new_htid(tp->data);
640 ht = kmalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
643 memset(ht, 0, sizeof(*ht) + divisor*sizeof(void*));
646 ht->divisor = divisor;
648 ht->next = tp_c->hlist;
650 *arg = (unsigned long)ht;
654 if (tb[TCA_U32_HASH-1]) {
655 htid = *(unsigned*)RTA_DATA(tb[TCA_U32_HASH-1]);
656 if (TC_U32_HTID(htid) == TC_U32_ROOT) {
660 ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid));
669 if (ht->divisor < TC_U32_HASH(htid))
673 if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
675 handle = htid | TC_U32_NODE(handle);
677 handle = gen_new_kid(ht, htid);
679 if (tb[TCA_U32_SEL-1] == 0 ||
680 RTA_PAYLOAD(tb[TCA_U32_SEL-1]) < sizeof(struct tc_u32_sel))
683 s = RTA_DATA(tb[TCA_U32_SEL-1]);
685 #ifdef CONFIG_CLS_U32_PERF
686 if (RTA_PAYLOAD(tb[TCA_U32_SEL-1]) <
687 (s->nkeys*sizeof(struct tc_u32_key)) + sizeof(struct tc_u32_sel)) {
688 printk("Please upgrade your iproute2 tools or compile proper options in!\n");
692 n = kmalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
695 memset(n, 0, sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key));
696 memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
703 while (!(mask & 1)) {
710 err = u32_set_parms(tp->q, base, ht, n, tb, tca[TCA_RATE-1]);
712 struct tc_u_knode **ins;
713 for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next)
714 if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle))
721 *arg = (unsigned long)n;
728 static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
730 struct tc_u_common *tp_c = tp->data;
731 struct tc_u_hnode *ht;
732 struct tc_u_knode *n;
738 for (ht = tp_c->hlist; ht; ht = ht->next) {
739 if (arg->count >= arg->skip) {
740 if (arg->fn(tp, (unsigned long)ht, arg) < 0) {
746 for (h = 0; h <= ht->divisor; h++) {
747 for (n = ht->ht[h]; n; n = n->next) {
748 if (arg->count < arg->skip) {
752 if (arg->fn(tp, (unsigned long)n, arg) < 0) {
762 static int u32_dump(struct tcf_proto *tp, unsigned long fh,
763 struct sk_buff *skb, struct tcmsg *t)
765 struct tc_u_knode *n = (struct tc_u_knode*)fh;
766 unsigned char *b = skb->tail;
772 t->tcm_handle = n->handle;
774 rta = (struct rtattr*)b;
775 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
777 if (TC_U32_KEY(n->handle) == 0) {
778 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
779 u32 divisor = ht->divisor+1;
780 RTA_PUT(skb, TCA_U32_DIVISOR, 4, &divisor);
782 RTA_PUT(skb, TCA_U32_SEL,
783 sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key),
786 u32 htid = n->handle & 0xFFFFF000;
787 RTA_PUT(skb, TCA_U32_HASH, 4, &htid);
790 RTA_PUT(skb, TCA_U32_CLASSID, 4, &n->res.classid);
792 RTA_PUT(skb, TCA_U32_LINK, 4, &n->ht_down->handle);
793 #ifdef CONFIG_NET_CLS_ACT
794 /* again for backward compatible mode - we want
795 * to work with both old and new modes of entering
796 * tc data even if iproute2 was newer - jhs
799 struct rtattr * p_rta = (struct rtattr*)skb->tail;
801 if (n->action->type != TCA_OLD_COMPAT) {
802 RTA_PUT(skb, TCA_U32_ACT, 0, NULL);
803 if (tcf_action_dump(skb,n->action, 0, 0) < 0) {
807 RTA_PUT(skb, TCA_U32_POLICE, 0, NULL);
808 if (tcf_action_dump_old(skb,n->action,0,0) < 0) {
813 p_rta->rta_len = skb->tail - (u8*)p_rta;
815 #ifdef CONFIG_NET_CLS_IND
816 if(strlen(n->indev)) {
817 struct rtattr * p_rta = (struct rtattr*)skb->tail;
818 RTA_PUT(skb, TCA_U32_INDEV, IFNAMSIZ, n->indev);
819 p_rta->rta_len = skb->tail - (u8*)p_rta;
824 #ifdef CONFIG_NET_CLS_POLICE
826 struct rtattr * p_rta = (struct rtattr*)skb->tail;
827 RTA_PUT(skb, TCA_U32_POLICE, 0, NULL);
829 if (tcf_police_dump(skb, n->police) < 0)
832 p_rta->rta_len = skb->tail - (u8*)p_rta;
839 rta->rta_len = skb->tail - b;
840 #ifdef CONFIG_NET_CLS_ACT
841 if (TC_U32_KEY(n->handle) && n->action && n->action->type == TCA_OLD_COMPAT) {
842 if (tcf_action_copy_stats(skb,n->action))
846 #ifdef CONFIG_NET_CLS_POLICE
847 if (TC_U32_KEY(n->handle) && n->police) {
848 if (qdisc_copy_stats(skb, &n->police->stats,
849 n->police->stats_lock))
857 skb_trim(skb, b - skb->data);
861 static struct tcf_proto_ops cls_u32_ops = {
864 .classify = u32_classify,
866 .destroy = u32_destroy,
869 .change = u32_change,
870 .delete = u32_delete,
873 .owner = THIS_MODULE,
876 static int __init init_u32(void)
878 return register_tcf_proto_ops(&cls_u32_ops);
881 static void __exit exit_u32(void)
883 unregister_tcf_proto_ops(&cls_u32_ops);
886 module_init(init_u32)
887 module_exit(exit_u32)
888 MODULE_LICENSE("GPL");