1 /* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
8 /* Kernel module implementing an IP set type: the iptree type */
10 #include <linux/version.h>
11 #include <linux/module.h>
13 #include <linux/skbuff.h>
14 #include <linux/slab.h>
15 #include <linux/delay.h>
16 #include <linux/netfilter_ipv4/ip_tables.h>
17 #include <linux/netfilter_ipv4/ip_set.h>
18 #include <linux/errno.h>
19 #include <asm/uaccess.h>
20 #include <asm/bitops.h>
21 #include <linux/spinlock.h>
23 /* Backward compatibility */
28 #include <linux/netfilter_ipv4/ip_set_iptree.h>
30 static int limit = MAX_RANGE;
32 /* Garbage collection interval in seconds: */
33 #define IPTREE_GC_TIME 5*60
34 /* Sleep so many milliseconds before trying again
35 * to delete the gc timer at destroying/flushing a set */
36 #define IPTREE_DESTROY_SLEEP 100
38 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21)
39 static struct kmem_cache *branch_cachep;
40 static struct kmem_cache *leaf_cachep;
42 static kmem_cache_t *branch_cachep;
43 static kmem_cache_t *leaf_cachep;
46 #define ABCD(a,b,c,d,addrp) do { \
47 a = ((unsigned char *)addrp)[3]; \
48 b = ((unsigned char *)addrp)[2]; \
49 c = ((unsigned char *)addrp)[1]; \
50 d = ((unsigned char *)addrp)[0]; \
53 #define TESTIP_WALK(map, elem, branch) do { \
54 if ((map)->tree[elem]) { \
55 branch = (map)->tree[elem]; \
61 __testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
63 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
64 struct ip_set_iptreeb *btree;
65 struct ip_set_iptreec *ctree;
66 struct ip_set_iptreed *dtree;
67 unsigned char a,b,c,d;
73 ABCD(a, b, c, d, hash_ip);
74 DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout);
75 TESTIP_WALK(map, a, btree);
76 TESTIP_WALK(btree, b, ctree);
77 TESTIP_WALK(ctree, c, dtree);
78 DP("%lu %lu", dtree->expires[d], jiffies);
79 return !!(map->timeout ? (time_after(dtree->expires[d], jiffies))
84 testip(struct ip_set *set, const void *data, size_t size,
87 struct ip_set_req_iptree *req =
88 (struct ip_set_req_iptree *) data;
90 if (size != sizeof(struct ip_set_req_iptree)) {
91 ip_set_printk("data length wrong (want %zu, have %zu)",
92 sizeof(struct ip_set_req_iptree),
96 return __testip(set, req->ip, hash_ip);
100 testip_kernel(struct ip_set *set,
101 const struct sk_buff *skb,
102 ip_set_ip_t *hash_ip,
103 const u_int32_t *flags,
108 DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
109 flags[index] & IPSET_SRC ? "SRC" : "DST",
110 NIPQUAD(skb->nh.iph->saddr),
111 NIPQUAD(skb->nh.iph->daddr));
114 ntohl(flags[index] & IPSET_SRC
116 : skb->nh.iph->daddr),
118 return (res < 0 ? 0 : res);
121 #define ADDIP_WALK(map, elem, branch, type, cachep, flags) do { \
122 if ((map)->tree[elem]) { \
123 DP("found %u", elem); \
124 branch = (map)->tree[elem]; \
127 kmem_cache_alloc(cachep, flags); \
128 if (branch == NULL) \
130 memset(branch, 0, sizeof(*branch)); \
131 (map)->tree[elem] = branch; \
132 DP("alloc %u", elem); \
137 __addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout,
138 ip_set_ip_t *hash_ip,
139 unsigned int __nocast flags)
141 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
142 struct ip_set_iptreeb *btree;
143 struct ip_set_iptreec *ctree;
144 struct ip_set_iptreed *dtree;
145 unsigned char a,b,c,d;
148 if (!ip || map->elements > limit)
149 /* We could call the garbage collector
150 * but it's probably overkill */
154 ABCD(a, b, c, d, hash_ip);
155 DP("%u %u %u %u timeout %u", a, b, c, d, timeout);
156 ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep, flags);
157 ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep, flags);
158 ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep, flags);
159 if (dtree->expires[d]
160 && (!map->timeout || time_after(dtree->expires[d], jiffies)))
162 dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1;
164 if (dtree->expires[d] == 0)
165 dtree->expires[d] = 1;
166 DP("%u %lu", d, dtree->expires[d]);
173 addip(struct ip_set *set, const void *data, size_t size,
174 ip_set_ip_t *hash_ip)
176 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
177 struct ip_set_req_iptree *req =
178 (struct ip_set_req_iptree *) data;
180 if (size != sizeof(struct ip_set_req_iptree)) {
181 ip_set_printk("data length wrong (want %zu, have %zu)",
182 sizeof(struct ip_set_req_iptree),
186 DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout);
187 return __addip(set, req->ip,
188 req->timeout ? req->timeout : map->timeout,
194 addip_kernel(struct ip_set *set,
195 const struct sk_buff *skb,
196 ip_set_ip_t *hash_ip,
197 const u_int32_t *flags,
200 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
203 ntohl(flags[index] & IPSET_SRC
205 : skb->nh.iph->daddr),
211 #define DELIP_WALK(map, elem, branch) do { \
212 if ((map)->tree[elem]) { \
213 branch = (map)->tree[elem]; \
219 __delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
221 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
222 struct ip_set_iptreeb *btree;
223 struct ip_set_iptreec *ctree;
224 struct ip_set_iptreed *dtree;
225 unsigned char a,b,c,d;
231 ABCD(a, b, c, d, hash_ip);
232 DELIP_WALK(map, a, btree);
233 DELIP_WALK(btree, b, ctree);
234 DELIP_WALK(ctree, c, dtree);
236 if (dtree->expires[d]) {
237 dtree->expires[d] = 0;
245 delip(struct ip_set *set, const void *data, size_t size,
246 ip_set_ip_t *hash_ip)
248 struct ip_set_req_iptree *req =
249 (struct ip_set_req_iptree *) data;
251 if (size != sizeof(struct ip_set_req_iptree)) {
252 ip_set_printk("data length wrong (want %zu, have %zu)",
253 sizeof(struct ip_set_req_iptree),
257 return __delip(set, req->ip, hash_ip);
261 delip_kernel(struct ip_set *set,
262 const struct sk_buff *skb,
263 ip_set_ip_t *hash_ip,
264 const u_int32_t *flags,
268 ntohl(flags[index] & IPSET_SRC
270 : skb->nh.iph->daddr),
274 #define LOOP_WALK_BEGIN(map, i, branch) \
275 for (i = 0; i < 256; i++) { \
276 if (!(map)->tree[i]) \
278 branch = (map)->tree[i]
280 #define LOOP_WALK_END }
282 static void ip_tree_gc(unsigned long ul_set)
284 struct ip_set *set = (void *) ul_set;
285 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
286 struct ip_set_iptreeb *btree;
287 struct ip_set_iptreec *ctree;
288 struct ip_set_iptreed *dtree;
289 unsigned int a,b,c,d;
293 DP("gc: %s", set->name);
294 write_lock_bh(&set->lock);
295 LOOP_WALK_BEGIN(map, a, btree);
296 LOOP_WALK_BEGIN(btree, b, ctree);
297 LOOP_WALK_BEGIN(ctree, c, dtree);
298 for (d = 0; d < 256; d++) {
299 if (dtree->expires[d]) {
300 DP("gc: %u %u %u %u: expires %lu jiffies %lu",
302 dtree->expires[d], jiffies);
304 && time_before(dtree->expires[d], jiffies)) {
305 dtree->expires[d] = 0;
312 DP("gc: %s: leaf %u %u %u empty",
314 kmem_cache_free(leaf_cachep, dtree);
315 ctree->tree[c] = NULL;
317 DP("gc: %s: leaf %u %u %u not empty",
324 DP("gc: %s: branch %u %u empty",
326 kmem_cache_free(branch_cachep, ctree);
327 btree->tree[b] = NULL;
329 DP("gc: %s: branch %u %u not empty",
336 DP("gc: %s: branch %u empty",
338 kmem_cache_free(branch_cachep, btree);
341 DP("gc: %s: branch %u not empty",
346 write_unlock_bh(&set->lock);
348 map->gc.expires = jiffies + map->gc_interval * HZ;
352 static inline void init_gc_timer(struct ip_set *set)
354 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
356 /* Even if there is no timeout for the entries,
357 * we still have to call gc because delete
358 * do not clean up empty branches */
359 map->gc_interval = IPTREE_GC_TIME;
360 init_timer(&map->gc);
361 map->gc.data = (unsigned long) set;
362 map->gc.function = ip_tree_gc;
363 map->gc.expires = jiffies + map->gc_interval * HZ;
367 static int create(struct ip_set *set, const void *data, size_t size)
369 struct ip_set_req_iptree_create *req =
370 (struct ip_set_req_iptree_create *) data;
371 struct ip_set_iptree *map;
373 if (size != sizeof(struct ip_set_req_iptree_create)) {
374 ip_set_printk("data length wrong (want %zu, have %zu)",
375 sizeof(struct ip_set_req_iptree_create),
380 map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL);
382 DP("out of memory for %d bytes",
383 sizeof(struct ip_set_iptree));
386 memset(map, 0, sizeof(*map));
387 map->timeout = req->timeout;
396 static void __flush(struct ip_set_iptree *map)
398 struct ip_set_iptreeb *btree;
399 struct ip_set_iptreec *ctree;
400 struct ip_set_iptreed *dtree;
403 LOOP_WALK_BEGIN(map, a, btree);
404 LOOP_WALK_BEGIN(btree, b, ctree);
405 LOOP_WALK_BEGIN(ctree, c, dtree);
406 kmem_cache_free(leaf_cachep, dtree);
408 kmem_cache_free(branch_cachep, ctree);
410 kmem_cache_free(branch_cachep, btree);
415 static void destroy(struct ip_set *set)
417 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
419 /* gc might be running */
420 while (!del_timer(&map->gc))
421 msleep(IPTREE_DESTROY_SLEEP);
427 static void flush(struct ip_set *set)
429 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
430 unsigned int timeout = map->timeout;
432 /* gc might be running */
433 while (!del_timer(&map->gc))
434 msleep(IPTREE_DESTROY_SLEEP);
436 memset(map, 0, sizeof(*map));
437 map->timeout = timeout;
442 static void list_header(const struct ip_set *set, void *data)
444 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
445 struct ip_set_req_iptree_create *header =
446 (struct ip_set_req_iptree_create *) data;
448 header->timeout = map->timeout;
451 static int list_members_size(const struct ip_set *set)
453 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
454 struct ip_set_iptreeb *btree;
455 struct ip_set_iptreec *ctree;
456 struct ip_set_iptreed *dtree;
457 unsigned int a,b,c,d;
458 unsigned int count = 0;
460 LOOP_WALK_BEGIN(map, a, btree);
461 LOOP_WALK_BEGIN(btree, b, ctree);
462 LOOP_WALK_BEGIN(ctree, c, dtree);
463 for (d = 0; d < 256; d++) {
464 if (dtree->expires[d]
465 && (!map->timeout || time_after(dtree->expires[d], jiffies)))
472 DP("members %u", count);
473 return (count * sizeof(struct ip_set_req_iptree));
476 static void list_members(const struct ip_set *set, void *data)
478 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
479 struct ip_set_iptreeb *btree;
480 struct ip_set_iptreec *ctree;
481 struct ip_set_iptreed *dtree;
482 unsigned int a,b,c,d;
484 struct ip_set_req_iptree *entry;
486 LOOP_WALK_BEGIN(map, a, btree);
487 LOOP_WALK_BEGIN(btree, b, ctree);
488 LOOP_WALK_BEGIN(ctree, c, dtree);
489 for (d = 0; d < 256; d++) {
490 if (dtree->expires[d]
491 && (!map->timeout || time_after(dtree->expires[d], jiffies))) {
492 entry = (struct ip_set_req_iptree *)(data + offset);
493 entry->ip = ((a << 24) | (b << 16) | (c << 8) | d);
494 entry->timeout = !map->timeout ? 0
495 : (dtree->expires[d] - jiffies)/HZ;
496 offset += sizeof(struct ip_set_req_iptree);
504 static struct ip_set_type ip_set_iptree = {
505 .typename = SETTYPE_NAME,
506 .features = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
507 .protocol_version = IP_SET_PROTOCOL_VERSION,
511 .reqsize = sizeof(struct ip_set_req_iptree),
513 .addip_kernel = &addip_kernel,
515 .delip_kernel = &delip_kernel,
517 .testip_kernel = &testip_kernel,
518 .header_size = sizeof(struct ip_set_req_iptree_create),
519 .list_header = &list_header,
520 .list_members_size = &list_members_size,
521 .list_members = &list_members,
525 MODULE_LICENSE("GPL");
526 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
527 MODULE_DESCRIPTION("iptree type of IP sets");
528 module_param(limit, int, 0600);
529 MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
531 static int __init init(void)
535 branch_cachep = kmem_cache_create("ip_set_iptreeb",
536 sizeof(struct ip_set_iptreeb),
538 if (!branch_cachep) {
539 printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n");
543 leaf_cachep = kmem_cache_create("ip_set_iptreed",
544 sizeof(struct ip_set_iptreed),
547 printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n");
551 ret = ip_set_register_set_type(&ip_set_iptree);
555 kmem_cache_destroy(leaf_cachep);
557 kmem_cache_destroy(branch_cachep);
562 static void __exit fini(void)
564 /* FIXME: possible race with ip_set_create() */
565 ip_set_unregister_set_type(&ip_set_iptree);
566 kmem_cache_destroy(leaf_cachep);
567 kmem_cache_destroy(branch_cachep);