Merge to kernel-2.6.20-1.2949.fc6.vs2.2.0.1
[linux-2.6.git] / net / ipv4 / netfilter / ip_set_iptree.c
1 /* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation.  
6  */
7
8 /* Kernel module implementing an IP set type: the iptree type */
9
10 #include <linux/version.h>
11 #include <linux/module.h>
12 #include <linux/ip.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <linux/delay.h>
16 #include <linux/netfilter_ipv4/ip_tables.h>
17 #include <linux/netfilter_ipv4/ip_set.h>
18 #include <linux/errno.h>
19 #include <asm/uaccess.h>
20 #include <asm/bitops.h>
21 #include <linux/spinlock.h>
22
23 /* Backward compatibility */
24 #ifndef __nocast
25 #define __nocast
26 #endif
27
28 #include <linux/netfilter_ipv4/ip_set_iptree.h>
29
30 static int limit = MAX_RANGE;
31
32 /* Garbage collection interval in seconds: */
33 #define IPTREE_GC_TIME          5*60
34 /* Sleep so many milliseconds before trying again 
35  * to delete the gc timer at destroying/flushing a set */ 
36 #define IPTREE_DESTROY_SLEEP    100
37
38 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
39 static struct kmem_cache *branch_cachep;
40 static struct kmem_cache *leaf_cachep;
41 #else
42 static kmem_cache_t *branch_cachep;
43 static kmem_cache_t *leaf_cachep;
44 #endif
45
46 #define ABCD(a,b,c,d,addrp) do {                \
47         a = ((unsigned char *)addrp)[3];        \
48         b = ((unsigned char *)addrp)[2];        \
49         c = ((unsigned char *)addrp)[1];        \
50         d = ((unsigned char *)addrp)[0];        \
51 } while (0)
52
53 #define TESTIP_WALK(map, elem, branch) do {     \
54         if ((map)->tree[elem]) {                \
55                 branch = (map)->tree[elem];     \
56         } else                                  \
57                 return 0;                       \
58 } while (0)
59
60 static inline int
61 __testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
62 {
63         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
64         struct ip_set_iptreeb *btree;
65         struct ip_set_iptreec *ctree;
66         struct ip_set_iptreed *dtree;
67         unsigned char a,b,c,d;
68
69         if (!ip)
70                 return -ERANGE;
71         
72         *hash_ip = ip;
73         ABCD(a, b, c, d, hash_ip);
74         DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout);
75         TESTIP_WALK(map, a, btree);
76         TESTIP_WALK(btree, b, ctree);
77         TESTIP_WALK(ctree, c, dtree);
78         DP("%lu %lu", dtree->expires[d], jiffies);
79         return !!(map->timeout ? (time_after(dtree->expires[d], jiffies))
80                                : dtree->expires[d]);
81 }
82
83 static int
84 testip(struct ip_set *set, const void *data, size_t size,
85        ip_set_ip_t *hash_ip)
86 {
87         struct ip_set_req_iptree *req = 
88             (struct ip_set_req_iptree *) data;
89
90         if (size != sizeof(struct ip_set_req_iptree)) {
91                 ip_set_printk("data length wrong (want %zu, have %zu)",
92                               sizeof(struct ip_set_req_iptree),
93                               size);
94                 return -EINVAL;
95         }
96         return __testip(set, req->ip, hash_ip);
97 }
98
99 static int
100 testip_kernel(struct ip_set *set, 
101               const struct sk_buff *skb,
102               ip_set_ip_t *hash_ip,
103               const u_int32_t *flags,
104               unsigned char index)
105 {
106         int res;
107         
108         DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
109            flags[index] & IPSET_SRC ? "SRC" : "DST",
110            NIPQUAD(skb->nh.iph->saddr),
111            NIPQUAD(skb->nh.iph->daddr));
112
113         res =  __testip(set,
114                         ntohl(flags[index] & IPSET_SRC 
115                                 ? skb->nh.iph->saddr 
116                                 : skb->nh.iph->daddr),
117                         hash_ip);
118         return (res < 0 ? 0 : res);
119 }
120
121 #define ADDIP_WALK(map, elem, branch, type, cachep, flags) do { \
122         if ((map)->tree[elem]) {                                \
123                 DP("found %u", elem);                           \
124                 branch = (map)->tree[elem];                     \
125         } else {                                                \
126                 branch = (type *)                               \
127                         kmem_cache_alloc(cachep, flags);        \
128                 if (branch == NULL)                             \
129                         return -ENOMEM;                         \
130                 memset(branch, 0, sizeof(*branch));             \
131                 (map)->tree[elem] = branch;                     \
132                 DP("alloc %u", elem);                           \
133         }                                                       \
134 } while (0)     
135
136 static inline int
137 __addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout,
138         ip_set_ip_t *hash_ip,
139         unsigned int __nocast flags)
140 {
141         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
142         struct ip_set_iptreeb *btree;
143         struct ip_set_iptreec *ctree;
144         struct ip_set_iptreed *dtree;
145         unsigned char a,b,c,d;
146         int ret = 0;
147         
148         if (!ip || map->elements > limit)
149                 /* We could call the garbage collector
150                  * but it's probably overkill */
151                 return -ERANGE;
152         
153         *hash_ip = ip;
154         ABCD(a, b, c, d, hash_ip);
155         DP("%u %u %u %u timeout %u", a, b, c, d, timeout);
156         ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep, flags);
157         ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep, flags);
158         ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep, flags);
159         if (dtree->expires[d]
160             && (!map->timeout || time_after(dtree->expires[d], jiffies)))
161                 ret = -EEXIST;
162         dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1;
163         /* Lottery */
164         if (dtree->expires[d] == 0)
165                 dtree->expires[d] = 1;
166         DP("%u %lu", d, dtree->expires[d]);
167         if (ret == 0)
168                 map->elements++;
169         return ret;
170 }
171
172 static int
173 addip(struct ip_set *set, const void *data, size_t size,
174       ip_set_ip_t *hash_ip)
175 {
176         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
177         struct ip_set_req_iptree *req = 
178                 (struct ip_set_req_iptree *) data;
179
180         if (size != sizeof(struct ip_set_req_iptree)) {
181                 ip_set_printk("data length wrong (want %zu, have %zu)",
182                               sizeof(struct ip_set_req_iptree),
183                               size);
184                 return -EINVAL;
185         }
186         DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout);
187         return __addip(set, req->ip,
188                        req->timeout ? req->timeout : map->timeout,
189                        hash_ip,
190                        GFP_ATOMIC);
191 }
192
193 static int
194 addip_kernel(struct ip_set *set, 
195              const struct sk_buff *skb,
196              ip_set_ip_t *hash_ip,
197              const u_int32_t *flags,
198              unsigned char index)
199 {
200         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
201
202         return __addip(set,
203                        ntohl(flags[index] & IPSET_SRC 
204                                 ? skb->nh.iph->saddr 
205                                 : skb->nh.iph->daddr),
206                        map->timeout,
207                        hash_ip,
208                        GFP_ATOMIC);
209 }
210
211 #define DELIP_WALK(map, elem, branch) do {      \
212         if ((map)->tree[elem]) {                \
213                 branch = (map)->tree[elem];     \
214         } else                                  \
215                 return -EEXIST;                 \
216 } while (0)
217
218 static inline int 
219 __delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
220 {
221         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
222         struct ip_set_iptreeb *btree;
223         struct ip_set_iptreec *ctree;
224         struct ip_set_iptreed *dtree;
225         unsigned char a,b,c,d;
226         
227         if (!ip)
228                 return -ERANGE;
229                 
230         *hash_ip = ip;
231         ABCD(a, b, c, d, hash_ip);
232         DELIP_WALK(map, a, btree);
233         DELIP_WALK(btree, b, ctree);
234         DELIP_WALK(ctree, c, dtree);
235
236         if (dtree->expires[d]) {
237                 dtree->expires[d] = 0;
238                 map->elements--;
239                 return 0;
240         }
241         return -EEXIST;
242 }
243
244 static int
245 delip(struct ip_set *set, const void *data, size_t size,
246       ip_set_ip_t *hash_ip)
247 {
248         struct ip_set_req_iptree *req =
249             (struct ip_set_req_iptree *) data;
250
251         if (size != sizeof(struct ip_set_req_iptree)) {
252                 ip_set_printk("data length wrong (want %zu, have %zu)",
253                               sizeof(struct ip_set_req_iptree),
254                               size);
255                 return -EINVAL;
256         }
257         return __delip(set, req->ip, hash_ip);
258 }
259
260 static int
261 delip_kernel(struct ip_set *set, 
262              const struct sk_buff *skb,
263              ip_set_ip_t *hash_ip,
264              const u_int32_t *flags,
265              unsigned char index)
266 {
267         return __delip(set,
268                        ntohl(flags[index] & IPSET_SRC 
269                                 ? skb->nh.iph->saddr 
270                                 : skb->nh.iph->daddr),
271                        hash_ip);
272 }
273
274 #define LOOP_WALK_BEGIN(map, i, branch) \
275         for (i = 0; i < 256; i++) {     \
276                 if (!(map)->tree[i])    \
277                         continue;       \
278                 branch = (map)->tree[i]
279
280 #define LOOP_WALK_END }
281
282 static void ip_tree_gc(unsigned long ul_set)
283 {
284         struct ip_set *set = (void *) ul_set;
285         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
286         struct ip_set_iptreeb *btree;
287         struct ip_set_iptreec *ctree;
288         struct ip_set_iptreed *dtree;
289         unsigned int a,b,c,d;
290         unsigned char i,j,k;
291
292         i = j = k = 0;
293         DP("gc: %s", set->name);
294         write_lock_bh(&set->lock);
295         LOOP_WALK_BEGIN(map, a, btree);
296         LOOP_WALK_BEGIN(btree, b, ctree);
297         LOOP_WALK_BEGIN(ctree, c, dtree);
298         for (d = 0; d < 256; d++) {
299                 if (dtree->expires[d]) {
300                         DP("gc: %u %u %u %u: expires %lu jiffies %lu",
301                             a, b, c, d,
302                             dtree->expires[d], jiffies);
303                         if (map->timeout
304                             && time_before(dtree->expires[d], jiffies)) {
305                                 dtree->expires[d] = 0;
306                                 map->elements--;
307                         } else
308                                 k = 1;
309                 }
310         }
311         if (k == 0) {
312                 DP("gc: %s: leaf %u %u %u empty",
313                     set->name, a, b, c);
314                 kmem_cache_free(leaf_cachep, dtree);
315                 ctree->tree[c] = NULL;
316         } else {
317                 DP("gc: %s: leaf %u %u %u not empty",
318                     set->name, a, b, c);
319                 j = 1;
320                 k = 0;
321         }
322         LOOP_WALK_END;
323         if (j == 0) {
324                 DP("gc: %s: branch %u %u empty",
325                     set->name, a, b);
326                 kmem_cache_free(branch_cachep, ctree);
327                 btree->tree[b] = NULL;
328         } else {
329                 DP("gc: %s: branch %u %u not empty",
330                     set->name, a, b);
331                 i = 1;
332                 j = k = 0;
333         }
334         LOOP_WALK_END;
335         if (i == 0) {
336                 DP("gc: %s: branch %u empty",
337                     set->name, a);
338                 kmem_cache_free(branch_cachep, btree);
339                 map->tree[a] = NULL;
340         } else {
341                 DP("gc: %s: branch %u not empty",
342                     set->name, a);
343                 i = j = k = 0;
344         }
345         LOOP_WALK_END;
346         write_unlock_bh(&set->lock);
347         
348         map->gc.expires = jiffies + map->gc_interval * HZ;
349         add_timer(&map->gc);
350 }
351
352 static inline void init_gc_timer(struct ip_set *set)
353 {
354         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
355
356         /* Even if there is no timeout for the entries,
357          * we still have to call gc because delete
358          * do not clean up empty branches */
359         map->gc_interval = IPTREE_GC_TIME;
360         init_timer(&map->gc);
361         map->gc.data = (unsigned long) set;
362         map->gc.function = ip_tree_gc;
363         map->gc.expires = jiffies + map->gc_interval * HZ;
364         add_timer(&map->gc);
365 }
366
367 static int create(struct ip_set *set, const void *data, size_t size)
368 {
369         struct ip_set_req_iptree_create *req =
370             (struct ip_set_req_iptree_create *) data;
371         struct ip_set_iptree *map;
372
373         if (size != sizeof(struct ip_set_req_iptree_create)) {
374                 ip_set_printk("data length wrong (want %zu, have %zu)",
375                               sizeof(struct ip_set_req_iptree_create),
376                               size);
377                 return -EINVAL;
378         }
379
380         map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL);
381         if (!map) {
382                 DP("out of memory for %d bytes",
383                    sizeof(struct ip_set_iptree));
384                 return -ENOMEM;
385         }
386         memset(map, 0, sizeof(*map));
387         map->timeout = req->timeout;
388         map->elements = 0;
389         set->data = map;
390
391         init_gc_timer(set);
392
393         return 0;
394 }
395
396 static void __flush(struct ip_set_iptree *map)
397 {
398         struct ip_set_iptreeb *btree;
399         struct ip_set_iptreec *ctree;
400         struct ip_set_iptreed *dtree;
401         unsigned int a,b,c;
402
403         LOOP_WALK_BEGIN(map, a, btree);
404         LOOP_WALK_BEGIN(btree, b, ctree);
405         LOOP_WALK_BEGIN(ctree, c, dtree);
406         kmem_cache_free(leaf_cachep, dtree);
407         LOOP_WALK_END;
408         kmem_cache_free(branch_cachep, ctree);
409         LOOP_WALK_END;
410         kmem_cache_free(branch_cachep, btree);
411         LOOP_WALK_END;
412         map->elements = 0;
413 }
414
415 static void destroy(struct ip_set *set)
416 {
417         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
418
419         /* gc might be running */
420         while (!del_timer(&map->gc))
421                 msleep(IPTREE_DESTROY_SLEEP);
422         __flush(map);
423         kfree(map);
424         set->data = NULL;
425 }
426
427 static void flush(struct ip_set *set)
428 {
429         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
430         unsigned int timeout = map->timeout;
431         
432         /* gc might be running */
433         while (!del_timer(&map->gc))
434                 msleep(IPTREE_DESTROY_SLEEP);
435         __flush(map);
436         memset(map, 0, sizeof(*map));
437         map->timeout = timeout;
438
439         init_gc_timer(set);
440 }
441
442 static void list_header(const struct ip_set *set, void *data)
443 {
444         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
445         struct ip_set_req_iptree_create *header =
446             (struct ip_set_req_iptree_create *) data;
447
448         header->timeout = map->timeout;
449 }
450
451 static int list_members_size(const struct ip_set *set)
452 {
453         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
454         struct ip_set_iptreeb *btree;
455         struct ip_set_iptreec *ctree;
456         struct ip_set_iptreed *dtree;
457         unsigned int a,b,c,d;
458         unsigned int count = 0;
459
460         LOOP_WALK_BEGIN(map, a, btree);
461         LOOP_WALK_BEGIN(btree, b, ctree);
462         LOOP_WALK_BEGIN(ctree, c, dtree);
463         for (d = 0; d < 256; d++) {
464                 if (dtree->expires[d]
465                     && (!map->timeout || time_after(dtree->expires[d], jiffies)))
466                         count++;
467         }
468         LOOP_WALK_END;
469         LOOP_WALK_END;
470         LOOP_WALK_END;
471
472         DP("members %u", count);
473         return (count * sizeof(struct ip_set_req_iptree));
474 }
475
476 static void list_members(const struct ip_set *set, void *data)
477 {
478         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
479         struct ip_set_iptreeb *btree;
480         struct ip_set_iptreec *ctree;
481         struct ip_set_iptreed *dtree;
482         unsigned int a,b,c,d;
483         size_t offset = 0;
484         struct ip_set_req_iptree *entry;
485
486         LOOP_WALK_BEGIN(map, a, btree);
487         LOOP_WALK_BEGIN(btree, b, ctree);
488         LOOP_WALK_BEGIN(ctree, c, dtree);
489         for (d = 0; d < 256; d++) {
490                 if (dtree->expires[d]
491                     && (!map->timeout || time_after(dtree->expires[d], jiffies))) {
492                         entry = (struct ip_set_req_iptree *)(data + offset);
493                         entry->ip = ((a << 24) | (b << 16) | (c << 8) | d);
494                         entry->timeout = !map->timeout ? 0 
495                                 : (dtree->expires[d] - jiffies)/HZ;
496                         offset += sizeof(struct ip_set_req_iptree);
497                 }
498         }
499         LOOP_WALK_END;
500         LOOP_WALK_END;
501         LOOP_WALK_END;
502 }
503
504 static struct ip_set_type ip_set_iptree = {
505         .typename               = SETTYPE_NAME,
506         .features               = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
507         .protocol_version       = IP_SET_PROTOCOL_VERSION,
508         .create                 = &create,
509         .destroy                = &destroy,
510         .flush                  = &flush,
511         .reqsize                = sizeof(struct ip_set_req_iptree),
512         .addip                  = &addip,
513         .addip_kernel           = &addip_kernel,
514         .delip                  = &delip,
515         .delip_kernel           = &delip_kernel,
516         .testip                 = &testip,
517         .testip_kernel          = &testip_kernel,
518         .header_size            = sizeof(struct ip_set_req_iptree_create),
519         .list_header            = &list_header,
520         .list_members_size      = &list_members_size,
521         .list_members           = &list_members,
522         .me                     = THIS_MODULE,
523 };
524
525 MODULE_LICENSE("GPL");
526 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
527 MODULE_DESCRIPTION("iptree type of IP sets");
528 module_param(limit, int, 0600);
529 MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
530
531 static int __init init(void)
532 {
533         int ret;
534         
535         branch_cachep = kmem_cache_create("ip_set_iptreeb",
536                                 sizeof(struct ip_set_iptreeb),
537                                 0, 0, NULL, NULL);
538         if (!branch_cachep) {
539                 printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n");
540                 ret = -ENOMEM;
541                 goto out;
542         }
543         leaf_cachep = kmem_cache_create("ip_set_iptreed",
544                                 sizeof(struct ip_set_iptreed),
545                                 0, 0, NULL, NULL);
546         if (!leaf_cachep) {
547                 printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n");
548                 ret = -ENOMEM;
549                 goto free_branch;
550         }
551         ret = ip_set_register_set_type(&ip_set_iptree);
552         if (ret == 0)
553                 goto out;
554
555         kmem_cache_destroy(leaf_cachep);
556     free_branch:        
557         kmem_cache_destroy(branch_cachep);
558     out:
559         return ret;
560 }
561
562 static void __exit fini(void)
563 {
564         /* FIXME: possible race with ip_set_create() */
565         ip_set_unregister_set_type(&ip_set_iptree);
566         kmem_cache_destroy(leaf_cachep);
567         kmem_cache_destroy(branch_cachep);
568 }
569
570 module_init(init);
571 module_exit(fini);