1 /* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation.
8 /* Kernel module implementing an IP set type: the iptree type */
10 #include <linux/module.h>
12 #include <linux/skbuff.h>
13 #include <linux/slab.h>
14 #include <linux/delay.h>
15 #include <linux/netfilter_ipv4/ip_tables.h>
16 #include <linux/netfilter_ipv4/ip_set.h>
17 #include <linux/errno.h>
18 #include <asm/uaccess.h>
19 #include <asm/bitops.h>
20 #include <linux/spinlock.h>
22 /* Backward compatibility */
27 #include <linux/netfilter_ipv4/ip_set_iptree.h>
29 static int limit = MAX_RANGE;
31 /* Garbage collection interval in seconds: */
32 #define IPTREE_GC_TIME 5*60
33 /* Sleep so many milliseconds before trying again
34 * to delete the gc timer at destroying/flushing a set */
35 #define IPTREE_DESTROY_SLEEP 100
37 static kmem_cache_t *branch_cachep;
38 static kmem_cache_t *leaf_cachep;
40 #define ABCD(a,b,c,d,addrp) do { \
41 a = ((unsigned char *)addrp)[3]; \
42 b = ((unsigned char *)addrp)[2]; \
43 c = ((unsigned char *)addrp)[1]; \
44 d = ((unsigned char *)addrp)[0]; \
47 #define TESTIP_WALK(map, elem, branch) do { \
48 if ((map)->tree[elem]) { \
49 branch = (map)->tree[elem]; \
55 __testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
57 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
58 struct ip_set_iptreeb *btree;
59 struct ip_set_iptreec *ctree;
60 struct ip_set_iptreed *dtree;
61 unsigned char a,b,c,d;
67 ABCD(a, b, c, d, hash_ip);
68 DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout);
69 TESTIP_WALK(map, a, btree);
70 TESTIP_WALK(btree, b, ctree);
71 TESTIP_WALK(ctree, c, dtree);
72 DP("%lu %lu", dtree->expires[d], jiffies);
73 return !!(map->timeout ? (time_after(dtree->expires[d], jiffies))
78 testip(struct ip_set *set, const void *data, size_t size,
81 struct ip_set_req_iptree *req =
82 (struct ip_set_req_iptree *) data;
84 if (size != sizeof(struct ip_set_req_iptree)) {
85 ip_set_printk("data length wrong (want %zu, have %zu)",
86 sizeof(struct ip_set_req_iptree),
90 return __testip(set, req->ip, hash_ip);
94 testip_kernel(struct ip_set *set,
95 const struct sk_buff *skb,
97 const u_int32_t *flags,
102 DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u",
103 flags[index] & IPSET_SRC ? "SRC" : "DST",
104 NIPQUAD(skb->nh.iph->saddr),
105 NIPQUAD(skb->nh.iph->daddr));
108 ntohl(flags[index] & IPSET_SRC
110 : skb->nh.iph->daddr),
112 return (res < 0 ? 0 : res);
115 #define ADDIP_WALK(map, elem, branch, type, cachep, flags) do { \
116 if ((map)->tree[elem]) { \
117 DP("found %u", elem); \
118 branch = (map)->tree[elem]; \
121 kmem_cache_alloc(cachep, flags); \
122 if (branch == NULL) \
124 memset(branch, 0, sizeof(*branch)); \
125 (map)->tree[elem] = branch; \
126 DP("alloc %u", elem); \
131 __addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout,
132 ip_set_ip_t *hash_ip,
133 unsigned int __nocast flags)
135 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
136 struct ip_set_iptreeb *btree;
137 struct ip_set_iptreec *ctree;
138 struct ip_set_iptreed *dtree;
139 unsigned char a,b,c,d;
142 if (!ip || map->elements > limit)
143 /* We could call the garbage collector
144 * but it's probably overkill */
148 ABCD(a, b, c, d, hash_ip);
149 DP("%u %u %u %u timeout %u", a, b, c, d, timeout);
150 ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep, flags);
151 ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep, flags);
152 ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep, flags);
153 if (dtree->expires[d]
154 && (!map->timeout || time_after(dtree->expires[d], jiffies)))
156 dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1;
158 if (dtree->expires[d] == 0)
159 dtree->expires[d] = 1;
160 DP("%u %lu", d, dtree->expires[d]);
167 addip(struct ip_set *set, const void *data, size_t size,
168 ip_set_ip_t *hash_ip)
170 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
171 struct ip_set_req_iptree *req =
172 (struct ip_set_req_iptree *) data;
174 if (size != sizeof(struct ip_set_req_iptree)) {
175 ip_set_printk("data length wrong (want %zu, have %zu)",
176 sizeof(struct ip_set_req_iptree),
180 DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout);
181 return __addip(set, req->ip,
182 req->timeout ? req->timeout : map->timeout,
188 addip_kernel(struct ip_set *set,
189 const struct sk_buff *skb,
190 ip_set_ip_t *hash_ip,
191 const u_int32_t *flags,
194 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
197 ntohl(flags[index] & IPSET_SRC
199 : skb->nh.iph->daddr),
205 #define DELIP_WALK(map, elem, branch) do { \
206 if ((map)->tree[elem]) { \
207 branch = (map)->tree[elem]; \
213 __delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip)
215 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
216 struct ip_set_iptreeb *btree;
217 struct ip_set_iptreec *ctree;
218 struct ip_set_iptreed *dtree;
219 unsigned char a,b,c,d;
225 ABCD(a, b, c, d, hash_ip);
226 DELIP_WALK(map, a, btree);
227 DELIP_WALK(btree, b, ctree);
228 DELIP_WALK(ctree, c, dtree);
230 if (dtree->expires[d]) {
231 dtree->expires[d] = 0;
239 delip(struct ip_set *set, const void *data, size_t size,
240 ip_set_ip_t *hash_ip)
242 struct ip_set_req_iptree *req =
243 (struct ip_set_req_iptree *) data;
245 if (size != sizeof(struct ip_set_req_iptree)) {
246 ip_set_printk("data length wrong (want %zu, have %zu)",
247 sizeof(struct ip_set_req_iptree),
251 return __delip(set, req->ip, hash_ip);
255 delip_kernel(struct ip_set *set,
256 const struct sk_buff *skb,
257 ip_set_ip_t *hash_ip,
258 const u_int32_t *flags,
262 ntohl(flags[index] & IPSET_SRC
264 : skb->nh.iph->daddr),
268 #define LOOP_WALK_BEGIN(map, i, branch) \
269 for (i = 0; i < 256; i++) { \
270 if (!(map)->tree[i]) \
272 branch = (map)->tree[i]
274 #define LOOP_WALK_END }
276 static void ip_tree_gc(unsigned long ul_set)
278 struct ip_set *set = (void *) ul_set;
279 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
280 struct ip_set_iptreeb *btree;
281 struct ip_set_iptreec *ctree;
282 struct ip_set_iptreed *dtree;
283 unsigned int a,b,c,d;
287 DP("gc: %s", set->name);
288 write_lock_bh(&set->lock);
289 LOOP_WALK_BEGIN(map, a, btree);
290 LOOP_WALK_BEGIN(btree, b, ctree);
291 LOOP_WALK_BEGIN(ctree, c, dtree);
292 for (d = 0; d < 256; d++) {
293 if (dtree->expires[d]) {
294 DP("gc: %u %u %u %u: expires %lu jiffies %lu",
296 dtree->expires[d], jiffies);
298 && time_before(dtree->expires[d], jiffies)) {
299 dtree->expires[d] = 0;
306 DP("gc: %s: leaf %u %u %u empty",
308 kmem_cache_free(leaf_cachep, dtree);
309 ctree->tree[c] = NULL;
311 DP("gc: %s: leaf %u %u %u not empty",
318 DP("gc: %s: branch %u %u empty",
320 kmem_cache_free(branch_cachep, ctree);
321 btree->tree[b] = NULL;
323 DP("gc: %s: branch %u %u not empty",
330 DP("gc: %s: branch %u empty",
332 kmem_cache_free(branch_cachep, btree);
335 DP("gc: %s: branch %u not empty",
340 write_unlock_bh(&set->lock);
342 map->gc.expires = jiffies + map->gc_interval * HZ;
346 static inline void init_gc_timer(struct ip_set *set)
348 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
350 /* Even if there is no timeout for the entries,
351 * we still have to call gc because delete
352 * do not clean up empty branches */
353 map->gc_interval = IPTREE_GC_TIME;
354 init_timer(&map->gc);
355 map->gc.data = (unsigned long) set;
356 map->gc.function = ip_tree_gc;
357 map->gc.expires = jiffies + map->gc_interval * HZ;
361 static int create(struct ip_set *set, const void *data, size_t size)
363 struct ip_set_req_iptree_create *req =
364 (struct ip_set_req_iptree_create *) data;
365 struct ip_set_iptree *map;
367 if (size != sizeof(struct ip_set_req_iptree_create)) {
368 ip_set_printk("data length wrong (want %zu, have %zu)",
369 sizeof(struct ip_set_req_iptree_create),
374 map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL);
376 DP("out of memory for %d bytes",
377 sizeof(struct ip_set_iptree));
380 memset(map, 0, sizeof(*map));
381 map->timeout = req->timeout;
390 static void __flush(struct ip_set_iptree *map)
392 struct ip_set_iptreeb *btree;
393 struct ip_set_iptreec *ctree;
394 struct ip_set_iptreed *dtree;
397 LOOP_WALK_BEGIN(map, a, btree);
398 LOOP_WALK_BEGIN(btree, b, ctree);
399 LOOP_WALK_BEGIN(ctree, c, dtree);
400 kmem_cache_free(leaf_cachep, dtree);
402 kmem_cache_free(branch_cachep, ctree);
404 kmem_cache_free(branch_cachep, btree);
409 static void destroy(struct ip_set *set)
411 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
413 /* gc might be running */
414 while (!del_timer(&map->gc))
415 msleep(IPTREE_DESTROY_SLEEP);
421 static void flush(struct ip_set *set)
423 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
424 unsigned int timeout = map->timeout;
426 /* gc might be running */
427 while (!del_timer(&map->gc))
428 msleep(IPTREE_DESTROY_SLEEP);
430 memset(map, 0, sizeof(*map));
431 map->timeout = timeout;
436 static void list_header(const struct ip_set *set, void *data)
438 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
439 struct ip_set_req_iptree_create *header =
440 (struct ip_set_req_iptree_create *) data;
442 header->timeout = map->timeout;
445 static int list_members_size(const struct ip_set *set)
447 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
448 struct ip_set_iptreeb *btree;
449 struct ip_set_iptreec *ctree;
450 struct ip_set_iptreed *dtree;
451 unsigned int a,b,c,d;
452 unsigned int count = 0;
454 LOOP_WALK_BEGIN(map, a, btree);
455 LOOP_WALK_BEGIN(btree, b, ctree);
456 LOOP_WALK_BEGIN(ctree, c, dtree);
457 for (d = 0; d < 256; d++) {
458 if (dtree->expires[d]
459 && (!map->timeout || time_after(dtree->expires[d], jiffies)))
466 DP("members %u", count);
467 return (count * sizeof(struct ip_set_req_iptree));
470 static void list_members(const struct ip_set *set, void *data)
472 struct ip_set_iptree *map = (struct ip_set_iptree *) set->data;
473 struct ip_set_iptreeb *btree;
474 struct ip_set_iptreec *ctree;
475 struct ip_set_iptreed *dtree;
476 unsigned int a,b,c,d;
478 struct ip_set_req_iptree *entry;
480 LOOP_WALK_BEGIN(map, a, btree);
481 LOOP_WALK_BEGIN(btree, b, ctree);
482 LOOP_WALK_BEGIN(ctree, c, dtree);
483 for (d = 0; d < 256; d++) {
484 if (dtree->expires[d]
485 && (!map->timeout || time_after(dtree->expires[d], jiffies))) {
486 entry = (struct ip_set_req_iptree *)(data + offset);
487 entry->ip = ((a << 24) | (b << 16) | (c << 8) | d);
488 entry->timeout = !map->timeout ? 0
489 : (dtree->expires[d] - jiffies)/HZ;
490 offset += sizeof(struct ip_set_req_iptree);
498 static struct ip_set_type ip_set_iptree = {
499 .typename = SETTYPE_NAME,
500 .features = IPSET_TYPE_IP | IPSET_DATA_SINGLE,
501 .protocol_version = IP_SET_PROTOCOL_VERSION,
505 .reqsize = sizeof(struct ip_set_req_iptree),
507 .addip_kernel = &addip_kernel,
509 .delip_kernel = &delip_kernel,
511 .testip_kernel = &testip_kernel,
512 .header_size = sizeof(struct ip_set_req_iptree_create),
513 .list_header = &list_header,
514 .list_members_size = &list_members_size,
515 .list_members = &list_members,
519 MODULE_LICENSE("GPL");
520 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
521 MODULE_DESCRIPTION("iptree type of IP sets");
522 module_param(limit, int, 0600);
523 MODULE_PARM_DESC(limit, "maximal number of elements stored in the sets");
525 static int __init init(void)
529 branch_cachep = kmem_cache_create("ip_set_iptreeb",
530 sizeof(struct ip_set_iptreeb),
532 if (!branch_cachep) {
533 printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n");
537 leaf_cachep = kmem_cache_create("ip_set_iptreed",
538 sizeof(struct ip_set_iptreed),
541 printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n");
545 ret = ip_set_register_set_type(&ip_set_iptree);
549 kmem_cache_destroy(leaf_cachep);
551 kmem_cache_destroy(branch_cachep);
556 static void __exit fini(void)
558 /* FIXME: possible race with ip_set_create() */
559 ip_set_unregister_set_type(&ip_set_iptree);
560 kmem_cache_destroy(leaf_cachep);
561 kmem_cache_destroy(branch_cachep);