Merge to Fedora kernel-2.6.18-1.2260_FC5 patched with stable patch-2.6.18.5-vs2.0...
[linux-2.6.git] / net / ipv4 / netfilter / ip_set_iptree.c
1 /* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation.  
6  */
7
8 /* Kernel module implementing an IP set type: the iptree type */
9
10 #include <linux/module.h>
11 #include <linux/ip.h>
12 #include <linux/skbuff.h>
13 #include <linux/slab.h>
14 #include <linux/delay.h>
15 #include <linux/netfilter_ipv4/ip_tables.h>
16 #include <linux/netfilter_ipv4/ip_set.h>
17 #include <linux/errno.h>
18 #include <asm/uaccess.h>
19 #include <asm/bitops.h>
20 #include <linux/spinlock.h>
21
22 /* Backward compatibility */
23 #ifndef __nocast
24 #define __nocast
25 #endif
26
27 #include <linux/netfilter_ipv4/ip_set_iptree.h>
28
29 static int limit = MAX_RANGE;
30
31 /* Garbage collection interval in seconds: */
32 #define IPTREE_GC_TIME          5*60
33 /* Sleep so many milliseconds before trying again 
34  * to delete the gc timer at destroying/flushing a set */ 
35 #define IPTREE_DESTROY_SLEEP    100
36
37 static kmem_cache_t *branch_cachep;
38 static kmem_cache_t *leaf_cachep;
39
40 #define ABCD(a,b,c,d,addrp) do {                \
41         a = ((unsigned char *)addrp)[3];        \
42         b = ((unsigned char *)addrp)[2];        \
43         c = ((unsigned char *)addrp)[1];        \
44         d = ((unsigned char *)addrp)[0];        \
45 } while (0)
46
47 #define TESTIP_WALK(map, elem, branch) do {     \
48         if ((map)->tree[elem]) {                \
49                 branch = (map)->tree[elem];     \
50         } else                                  \
51                 return 0;                       \
52 } while (0)
53
54 static inline int
55 __testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
56 {
57         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
58         struct ip_set_iptreeb *btree;
59         struct ip_set_iptreec *ctree;
60         struct ip_set_iptreed *dtree;
61         unsigned char a,b,c,d;
62
63         if (!ip)
64                 return -ERANGE;
65         
66         *hash_ip = ip;
67         ABCD(a, b, c, d, hash_ip);
68         DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout);
69         TESTIP_WALK(map, a, btree);
70         TESTIP_WALK(btree, b, ctree);
71         TESTIP_WALK(ctree, c, dtree);
72         DP("%lu %lu", dtree->expires[d], jiffies);
73         return !!(map->timeout ? (time_after(dtree->expires[d], jiffies))
74                                : dtree->expires[d]);
75 }
76
77 static int
78 testip(struct ip_set *set, const void *data, size_t size,
79        ip_set_ip_t *hash_ip)
80 {
81         struct ip_set_req_iptree *req = 
82             (struct ip_set_req_iptree *) data;
83
84         if (size != sizeof(struct ip_set_req_iptree)) {
85                 ip_set_printk("data length wrong (want %zu, have %zu)",
86                               sizeof(struct ip_set_req_iptree),
87                               size);
88                 return -EINVAL;
89         }
90         return __testip(set, req->ip, hash_ip);
91 }
92
93 static int
94 testip_kernel(struct ip_set *set, 
95               const struct sk_buff *skb,
96               ip_set_ip_t *hash_ip,
97               const u_int32_t *flags,
98               unsigned char index)
99 {
100         int res;
101         
102         DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
103            flags[index] & IPSET_SRC ? "SRC" : "DST",
104            NIPQUAD(skb->nh.iph->saddr),
105            NIPQUAD(skb->nh.iph->daddr));
106
107         res =  __testip(set,
108                         ntohl(flags[index] & IPSET_SRC 
109                                 ? skb->nh.iph->saddr 
110                                 : skb->nh.iph->daddr),
111                         hash_ip);
112         return (res < 0 ? 0 : res);
113 }
114
115 #define ADDIP_WALK(map, elem, branch, type, cachep, flags) do { \
116         if ((map)->tree[elem]) {                                \
117                 DP("found %u", elem);                           \
118                 branch = (map)->tree[elem];                     \
119         } else {                                                \
120                 branch = (type *)                               \
121                         kmem_cache_alloc(cachep, flags);        \
122                 if (branch == NULL)                             \
123                         return -ENOMEM;                         \
124                 memset(branch, 0, sizeof(*branch));             \
125                 (map)->tree[elem] = branch;                     \
126                 DP("alloc %u", elem);                           \
127         }                                                       \
128 } while (0)     
129
130 static inline int
131 __addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout,
132         ip_set_ip_t *hash_ip,
133         unsigned int __nocast flags)
134 {
135         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
136         struct ip_set_iptreeb *btree;
137         struct ip_set_iptreec *ctree;
138         struct ip_set_iptreed *dtree;
139         unsigned char a,b,c,d;
140         int ret = 0;
141         
142         if (!ip || map->elements > limit)
143                 /* We could call the garbage collector
144                  * but it's probably overkill */
145                 return -ERANGE;
146         
147         *hash_ip = ip;
148         ABCD(a, b, c, d, hash_ip);
149         DP("%u %u %u %u timeout %u", a, b, c, d, timeout);
150         ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep, flags);
151         ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep, flags);
152         ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep, flags);
153         if (dtree->expires[d]
154             && (!map->timeout || time_after(dtree->expires[d], jiffies)))
155                 ret = -EEXIST;
156         dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1;
157         /* Lottery */
158         if (dtree->expires[d] == 0)
159                 dtree->expires[d] = 1;
160         DP("%u %lu", d, dtree->expires[d]);
161         if (ret == 0)
162                 map->elements++;
163         return ret;
164 }
165
166 static int
167 addip(struct ip_set *set, const void *data, size_t size,
168       ip_set_ip_t *hash_ip)
169 {
170         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
171         struct ip_set_req_iptree *req = 
172                 (struct ip_set_req_iptree *) data;
173
174         if (size != sizeof(struct ip_set_req_iptree)) {
175                 ip_set_printk("data length wrong (want %zu, have %zu)",
176                               sizeof(struct ip_set_req_iptree),
177                               size);
178                 return -EINVAL;
179         }
180         DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout);
181         return __addip(set, req->ip,
182                        req->timeout ? req->timeout : map->timeout,
183                        hash_ip,
184                        GFP_ATOMIC);
185 }
186
187 static int
188 addip_kernel(struct ip_set *set, 
189              const struct sk_buff *skb,
190              ip_set_ip_t *hash_ip,
191              const u_int32_t *flags,
192              unsigned char index)
193 {
194         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
195
196         return __addip(set,
197                        ntohl(flags[index] & IPSET_SRC 
198                                 ? skb->nh.iph->saddr 
199                                 : skb->nh.iph->daddr),
200                        map->timeout,
201                        hash_ip,
202                        GFP_ATOMIC);
203 }
204
205 #define DELIP_WALK(map, elem, branch) do {      \
206         if ((map)->tree[elem]) {                \
207                 branch = (map)->tree[elem];     \
208         } else                                  \
209                 return -EEXIST;                 \
210 } while (0)
211
212 static inline int 
213 __delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
214 {
215         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
216         struct ip_set_iptreeb *btree;
217         struct ip_set_iptreec *ctree;
218         struct ip_set_iptreed *dtree;
219         unsigned char a,b,c,d;
220         
221         if (!ip)
222                 return -ERANGE;
223                 
224         *hash_ip = ip;
225         ABCD(a, b, c, d, hash_ip);
226         DELIP_WALK(map, a, btree);
227         DELIP_WALK(btree, b, ctree);
228         DELIP_WALK(ctree, c, dtree);
229
230         if (dtree->expires[d]) {
231                 dtree->expires[d] = 0;
232                 map->elements--;
233                 return 0;
234         }
235         return -EEXIST;
236 }
237
238 static int
239 delip(struct ip_set *set, const void *data, size_t size,
240       ip_set_ip_t *hash_ip)
241 {
242         struct ip_set_req_iptree *req =
243             (struct ip_set_req_iptree *) data;
244
245         if (size != sizeof(struct ip_set_req_iptree)) {
246                 ip_set_printk("data length wrong (want %zu, have %zu)",
247                               sizeof(struct ip_set_req_iptree),
248                               size);
249                 return -EINVAL;
250         }
251         return __delip(set, req->ip, hash_ip);
252 }
253
254 static int
255 delip_kernel(struct ip_set *set, 
256              const struct sk_buff *skb,
257              ip_set_ip_t *hash_ip,
258              const u_int32_t *flags,
259              unsigned char index)
260 {
261         return __delip(set,
262                        ntohl(flags[index] & IPSET_SRC 
263                                 ? skb->nh.iph->saddr 
264                                 : skb->nh.iph->daddr),
265                        hash_ip);
266 }
267
268 #define LOOP_WALK_BEGIN(map, i, branch) \
269         for (i = 0; i < 256; i++) {     \
270                 if (!(map)->tree[i])    \
271                         continue;       \
272                 branch = (map)->tree[i]
273
274 #define LOOP_WALK_END }
275
276 static void ip_tree_gc(unsigned long ul_set)
277 {
278         struct ip_set *set = (void *) ul_set;
279         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
280         struct ip_set_iptreeb *btree;
281         struct ip_set_iptreec *ctree;
282         struct ip_set_iptreed *dtree;
283         unsigned int a,b,c,d;
284         unsigned char i,j,k;
285
286         i = j = k = 0;
287         DP("gc: %s", set->name);
288         write_lock_bh(&set->lock);
289         LOOP_WALK_BEGIN(map, a, btree);
290         LOOP_WALK_BEGIN(btree, b, ctree);
291         LOOP_WALK_BEGIN(ctree, c, dtree);
292         for (d = 0; d < 256; d++) {
293                 if (dtree->expires[d]) {
294                         DP("gc: %u %u %u %u: expires %lu jiffies %lu",
295                             a, b, c, d,
296                             dtree->expires[d], jiffies);
297                         if (map->timeout
298                             && time_before(dtree->expires[d], jiffies)) {
299                                 dtree->expires[d] = 0;
300                                 map->elements--;
301                         } else
302                                 k = 1;
303                 }
304         }
305         if (k == 0) {
306                 DP("gc: %s: leaf %u %u %u empty",
307                     set->name, a, b, c);
308                 kmem_cache_free(leaf_cachep, dtree);
309                 ctree->tree[c] = NULL;
310         } else {
311                 DP("gc: %s: leaf %u %u %u not empty",
312                     set->name, a, b, c);
313                 j = 1;
314                 k = 0;
315         }
316         LOOP_WALK_END;
317         if (j == 0) {
318                 DP("gc: %s: branch %u %u empty",
319                     set->name, a, b);
320                 kmem_cache_free(branch_cachep, ctree);
321                 btree->tree[b] = NULL;
322         } else {
323                 DP("gc: %s: branch %u %u not empty",
324                     set->name, a, b);
325                 i = 1;
326                 j = k = 0;
327         }
328         LOOP_WALK_END;
329         if (i == 0) {
330                 DP("gc: %s: branch %u empty",
331                     set->name, a);
332                 kmem_cache_free(branch_cachep, btree);
333                 map->tree[a] = NULL;
334         } else {
335                 DP("gc: %s: branch %u not empty",
336                     set->name, a);
337                 i = j = k = 0;
338         }
339         LOOP_WALK_END;
340         write_unlock_bh(&set->lock);
341         
342         map->gc.expires = jiffies + map->gc_interval * HZ;
343         add_timer(&map->gc);
344 }
345
346 static inline void init_gc_timer(struct ip_set *set)
347 {
348         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
349
350         /* Even if there is no timeout for the entries,
351          * we still have to call gc because delete
352          * do not clean up empty branches */
353         map->gc_interval = IPTREE_GC_TIME;
354         init_timer(&map->gc);
355         map->gc.data = (unsigned long) set;
356         map->gc.function = ip_tree_gc;
357         map->gc.expires = jiffies + map->gc_interval * HZ;
358         add_timer(&map->gc);
359 }
360
361 static int create(struct ip_set *set, const void *data, size_t size)
362 {
363         struct ip_set_req_iptree_create *req =
364             (struct ip_set_req_iptree_create *) data;
365         struct ip_set_iptree *map;
366
367         if (size != sizeof(struct ip_set_req_iptree_create)) {
368                 ip_set_printk("data length wrong (want %zu, have %zu)",
369                               sizeof(struct ip_set_req_iptree_create),
370                               size);
371                 return -EINVAL;
372         }
373
374         map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL);
375         if (!map) {
376                 DP("out of memory for %d bytes",
377                    sizeof(struct ip_set_iptree));
378                 return -ENOMEM;
379         }
380         memset(map, 0, sizeof(*map));
381         map->timeout = req->timeout;
382         map->elements = 0;
383         set->data = map;
384
385         init_gc_timer(set);
386
387         return 0;
388 }
389
390 static void __flush(struct ip_set_iptree *map)
391 {
392         struct ip_set_iptreeb *btree;
393         struct ip_set_iptreec *ctree;
394         struct ip_set_iptreed *dtree;
395         unsigned int a,b,c;
396
397         LOOP_WALK_BEGIN(map, a, btree);
398         LOOP_WALK_BEGIN(btree, b, ctree);
399         LOOP_WALK_BEGIN(ctree, c, dtree);
400         kmem_cache_free(leaf_cachep, dtree);
401         LOOP_WALK_END;
402         kmem_cache_free(branch_cachep, ctree);
403         LOOP_WALK_END;
404         kmem_cache_free(branch_cachep, btree);
405         LOOP_WALK_END;
406         map->elements = 0;
407 }
408
409 static void destroy(struct ip_set *set)
410 {
411         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
412
413         /* gc might be running */
414         while (!del_timer(&map->gc))
415                 msleep(IPTREE_DESTROY_SLEEP);
416         __flush(map);
417         kfree(map);
418         set->data = NULL;
419 }
420
421 static void flush(struct ip_set *set)
422 {
423         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
424         unsigned int timeout = map->timeout;
425         
426         /* gc might be running */
427         while (!del_timer(&map->gc))
428                 msleep(IPTREE_DESTROY_SLEEP);
429         __flush(map);
430         memset(map, 0, sizeof(*map));
431         map->timeout = timeout;
432
433         init_gc_timer(set);
434 }
435
436 static void list_header(const struct ip_set *set, void *data)
437 {
438         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
439         struct ip_set_req_iptree_create *header =
440             (struct ip_set_req_iptree_create *) data;
441
442         header->timeout = map->timeout;
443 }
444
445 static int list_members_size(const struct ip_set *set)
446 {
447         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
448         struct ip_set_iptreeb *btree;
449         struct ip_set_iptreec *ctree;
450         struct ip_set_iptreed *dtree;
451         unsigned int a,b,c,d;
452         unsigned int count = 0;
453
454         LOOP_WALK_BEGIN(map, a, btree);
455         LOOP_WALK_BEGIN(btree, b, ctree);
456         LOOP_WALK_BEGIN(ctree, c, dtree);
457         for (d = 0; d < 256; d++) {
458                 if (dtree->expires[d]
459                     && (!map->timeout || time_after(dtree->expires[d], jiffies)))
460                         count++;
461         }
462         LOOP_WALK_END;
463         LOOP_WALK_END;
464         LOOP_WALK_END;
465
466         DP("members %u", count);
467         return (count * sizeof(struct ip_set_req_iptree));
468 }
469
470 static void list_members(const struct ip_set *set, void *data)
471 {
472         struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
473         struct ip_set_iptreeb *btree;
474         struct ip_set_iptreec *ctree;
475         struct ip_set_iptreed *dtree;
476         unsigned int a,b,c,d;
477         size_t offset = 0;
478         struct ip_set_req_iptree *entry;
479
480         LOOP_WALK_BEGIN(map, a, btree);
481         LOOP_WALK_BEGIN(btree, b, ctree);
482         LOOP_WALK_BEGIN(ctree, c, dtree);
483         for (d = 0; d < 256; d++) {
484                 if (dtree->expires[d]
485                     && (!map->timeout || time_after(dtree->expires[d], jiffies))) {
486                         entry = (struct ip_set_req_iptree *)(data + offset);
487                         entry->ip = ((a << 24) | (b << 16) | (c << 8) | d);
488                         entry->timeout = !map->timeout ? 0 
489                                 : (dtree->expires[d] - jiffies)/HZ;
490                         offset += sizeof(struct ip_set_req_iptree);
491                 }
492         }
493         LOOP_WALK_END;
494         LOOP_WALK_END;
495         LOOP_WALK_END;
496 }
497
498 static struct ip_set_type ip_set_iptree = {
499         .typename               = SETTYPE_NAME,
500         .features               = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
501         .protocol_version       = IP_SET_PROTOCOL_VERSION,
502         .create                 = &create,
503         .destroy                = &destroy,
504         .flush                  = &flush,
505         .reqsize                = sizeof(struct ip_set_req_iptree),
506         .addip                  = &addip,
507         .addip_kernel           = &addip_kernel,
508         .delip                  = &delip,
509         .delip_kernel           = &delip_kernel,
510         .testip                 = &testip,
511         .testip_kernel          = &testip_kernel,
512         .header_size            = sizeof(struct ip_set_req_iptree_create),
513         .list_header            = &list_header,
514         .list_members_size      = &list_members_size,
515         .list_members           = &list_members,
516         .me                     = THIS_MODULE,
517 };
518
519 MODULE_LICENSE("GPL");
520 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
521 MODULE_DESCRIPTION("iptree type of IP sets");
522 module_param(limit, int, 0600);
523 MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
524
525 static int __init init(void)
526 {
527         int ret;
528         
529         branch_cachep = kmem_cache_create("ip_set_iptreeb",
530                                 sizeof(struct ip_set_iptreeb),
531                                 0, 0, NULL, NULL);
532         if (!branch_cachep) {
533                 printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n");
534                 ret = -ENOMEM;
535                 goto out;
536         }
537         leaf_cachep = kmem_cache_create("ip_set_iptreed",
538                                 sizeof(struct ip_set_iptreed),
539                                 0, 0, NULL, NULL);
540         if (!leaf_cachep) {
541                 printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n");
542                 ret = -ENOMEM;
543                 goto free_branch;
544         }
545         ret = ip_set_register_set_type(&ip_set_iptree);
546         if (ret == 0)
547                 goto out;
548
549         kmem_cache_destroy(leaf_cachep);
550     free_branch:        
551         kmem_cache_destroy(branch_cachep);
552     out:
553         return ret;
554 }
555
556 static void __exit fini(void)
557 {
558         /* FIXME: possible race with ip_set_create() */
559         ip_set_unregister_set_type(&ip_set_iptree);
560         kmem_cache_destroy(leaf_cachep);
561         kmem_cache_destroy(branch_cachep);
562 }
563
564 module_init(init);
565 module_exit(fini);