Merge to kernel-2.6.20-1.2949.fc6.vs2.2.0.1
[linux-2.6.git] / net / ipv4 / netfilter / ip_set.c
1 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2  *                         Patrick Schaaf <bof@bof.de>
3  * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.  
8  */
9
10 /* Kernel module for IP set management */
11
12 #include <linux/version.h>
13 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
14 #include <linux/config.h>
15 #endif
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/kmod.h>
19 #include <linux/ip.h>
20 #include <linux/skbuff.h>
21 #include <linux/random.h>
22 #include <linux/jhash.h>
23 #include <linux/netfilter_ipv4/ip_tables.h>
24 #include <linux/errno.h>
25 #include <asm/uaccess.h>
26 #include <asm/bitops.h>
27 #include <asm/semaphore.h>
28 #include <linux/spinlock.h>
29 #include <linux/vmalloc.h>
30
31 #define ASSERT_READ_LOCK(x)
32 #define ASSERT_WRITE_LOCK(x)
33 #include <linux/netfilter_ipv4/ip_set.h>
34
35 static struct list_head set_type_list;          /* all registered sets */
36 static struct ip_set **ip_set_list;             /* all individual sets */
37 static DEFINE_RWLOCK(ip_set_lock);              /* protects the lists and the hash */
38 static DECLARE_MUTEX(ip_set_app_mutex);         /* serializes user access */
39 static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX;
40 static ip_set_id_t ip_set_bindings_hash_size =  CONFIG_IP_NF_SET_HASHSIZE;
41 static struct list_head *ip_set_hash;           /* hash of bindings */
42 static unsigned int ip_set_hash_random;         /* random seed */
43
44 /*
45  * Sets are identified either by the index in ip_set_list or by id.
46  * The id never changes and is used to find a key in the hash. 
47  * The index may change by swapping and used at all other places 
48  * (set/SET netfilter modules, binding value, etc.)
49  *
50  * Userspace requests are serialized by ip_set_mutex and sets can
51  * be deleted only from userspace. Therefore ip_set_list locking 
52  * must obey the following rules:
53  *
54  * - kernel requests: read and write locking mandatory
55  * - user requests: read locking optional, write locking mandatory
56  */
57
58 static inline void
59 __ip_set_get(ip_set_id_t index)
60 {
61         atomic_inc(&ip_set_list[index]->ref);
62 }
63
64 static inline void
65 __ip_set_put(ip_set_id_t index)
66 {
67         atomic_dec(&ip_set_list[index]->ref);
68 }
69
70 /*
71  * Binding routines
72  */
73
74 static inline struct ip_set_hash *
75 __ip_set_find(u_int32_t key, ip_set_id_t id, ip_set_ip_t ip)
76 {
77         struct ip_set_hash *set_hash;
78
79         list_for_each_entry(set_hash, &ip_set_hash[key], list)
80                 if (set_hash->id == id && set_hash->ip == ip)
81                         return set_hash;
82                         
83         return NULL;
84 }
85
86 static ip_set_id_t
87 ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip)
88 {
89         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) 
90                                 % ip_set_bindings_hash_size;
91         struct ip_set_hash *set_hash;
92
93         ASSERT_READ_LOCK(&ip_set_lock);
94         IP_SET_ASSERT(ip_set_list[id]);
95         DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
96         
97         set_hash = __ip_set_find(key, id, ip);
98         
99         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
100            HIPQUAD(ip),
101            set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
102
103         return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID);
104 }
105
106 static inline void 
107 __set_hash_del(struct ip_set_hash *set_hash)
108 {
109         ASSERT_WRITE_LOCK(&ip_set_lock);
110         IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
111
112         __ip_set_put(set_hash->binding);
113         list_del(&set_hash->list);
114         kfree(set_hash);
115 }
116
117 static int
118 ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip)
119 {
120         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
121                                 % ip_set_bindings_hash_size;
122         struct ip_set_hash *set_hash;
123         
124         IP_SET_ASSERT(ip_set_list[id]);
125         DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
126         write_lock_bh(&ip_set_lock);
127         set_hash = __ip_set_find(key, id, ip);
128         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
129            HIPQUAD(ip),
130            set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
131
132         if (set_hash != NULL)
133                 __set_hash_del(set_hash);
134         write_unlock_bh(&ip_set_lock);
135         return 0;
136 }
137
138 static int 
139 ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding)
140 {
141         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
142                                 % ip_set_bindings_hash_size;
143         struct ip_set_hash *set_hash;
144         int ret = 0;
145         
146         IP_SET_ASSERT(ip_set_list[id]);
147         IP_SET_ASSERT(ip_set_list[binding]);
148         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
149            HIPQUAD(ip), ip_set_list[binding]->name);
150         write_lock_bh(&ip_set_lock);
151         set_hash = __ip_set_find(key, id, ip);
152         if (!set_hash) {
153                 set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_ATOMIC);
154                 if (!set_hash) {
155                         ret = -ENOMEM;
156                         goto unlock;
157                 }
158                 INIT_LIST_HEAD(&set_hash->list);
159                 set_hash->id = id;
160                 set_hash->ip = ip;
161                 list_add(&set_hash->list, &ip_set_hash[key]);
162         } else {
163                 IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
164                 DP("overwrite binding: %s",
165                    ip_set_list[set_hash->binding]->name);
166                 __ip_set_put(set_hash->binding);
167         }
168         set_hash->binding = binding;
169         __ip_set_get(set_hash->binding);
170         DP("stored: key %u, id %u (%s), ip %u.%u.%u.%u, binding %u (%s)",
171            key, id, ip_set_list[id]->name,
172            HIPQUAD(ip), binding, ip_set_list[binding]->name);
173     unlock:
174         write_unlock_bh(&ip_set_lock);
175         return ret;
176 }
177
178 #define FOREACH_HASH_DO(fn, args...)                                            \
179 ({                                                                              \
180         ip_set_id_t __key;                                                      \
181         struct ip_set_hash *__set_hash;                                         \
182                                                                                 \
183         for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
184                 list_for_each_entry(__set_hash, &ip_set_hash[__key], list)      \
185                         fn(__set_hash , ## args);                               \
186         }                                                                       \
187 })
188
189 #define FOREACH_HASH_RW_DO(fn, args...)                                                 \
190 ({                                                                              \
191         ip_set_id_t __key;                                                      \
192         struct ip_set_hash *__set_hash, *__n;                                   \
193                                                                                 \
194         ASSERT_WRITE_LOCK(&ip_set_lock);                                        \
195         for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
196                 list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\
197                         fn(__set_hash , ## args);                               \
198         }                                                                       \
199 })
200
201 /* Add, del and test set entries from kernel */
202
203 #define follow_bindings(index, set, ip)                                 \
204 ((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID      \
205  || (index = (set)->binding) != IP_SET_INVALID_ID)
206
207 int
208 ip_set_testip_kernel(ip_set_id_t index,
209                      const struct sk_buff *skb,
210                      const u_int32_t *flags)
211 {
212         struct ip_set *set;
213         ip_set_ip_t ip;
214         int res;
215         unsigned char i = 0;
216         
217         IP_SET_ASSERT(flags[i]);
218         read_lock_bh(&ip_set_lock);
219         do {
220                 set = ip_set_list[index];
221                 IP_SET_ASSERT(set);
222                 DP("set %s, index %u", set->name, index);
223                 read_lock_bh(&set->lock);
224                 res = set->type->testip_kernel(set, skb, &ip, flags, i++);
225                 read_unlock_bh(&set->lock);
226                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
227         } while (res > 0 
228                  && flags[i] 
229                  && follow_bindings(index, set, ip));
230         read_unlock_bh(&ip_set_lock);
231
232         return res;
233 }
234
235 void
236 ip_set_addip_kernel(ip_set_id_t index,
237                     const struct sk_buff *skb,
238                     const u_int32_t *flags)
239 {
240         struct ip_set *set;
241         ip_set_ip_t ip;
242         int res;
243         unsigned char i = 0;
244
245         IP_SET_ASSERT(flags[i]);
246    retry:
247         read_lock_bh(&ip_set_lock);
248         do {
249                 set = ip_set_list[index];
250                 IP_SET_ASSERT(set);
251                 DP("set %s, index %u", set->name, index);
252                 write_lock_bh(&set->lock);
253                 res = set->type->addip_kernel(set, skb, &ip, flags, i++);
254                 write_unlock_bh(&set->lock);
255                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
256         } while ((res == 0 || res == -EEXIST)
257                  && flags[i] 
258                  && follow_bindings(index, set, ip));
259         read_unlock_bh(&ip_set_lock);
260
261         if (res == -EAGAIN
262             && set->type->retry
263             && (res = set->type->retry(set)) == 0)
264                 goto retry;
265 }
266
267 void
268 ip_set_delip_kernel(ip_set_id_t index,
269                     const struct sk_buff *skb,
270                     const u_int32_t *flags)
271 {
272         struct ip_set *set;
273         ip_set_ip_t ip;
274         int res;
275         unsigned char i = 0;
276
277         IP_SET_ASSERT(flags[i]);
278         read_lock_bh(&ip_set_lock);
279         do {
280                 set = ip_set_list[index];
281                 IP_SET_ASSERT(set);
282                 DP("set %s, index %u", set->name, index);
283                 write_lock_bh(&set->lock);
284                 res = set->type->delip_kernel(set, skb, &ip, flags, i++);
285                 write_unlock_bh(&set->lock);
286                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
287         } while ((res == 0 || res == -EEXIST)
288                  && flags[i] 
289                  && follow_bindings(index, set, ip));
290         read_unlock_bh(&ip_set_lock);
291 }
292
293 /* Register and deregister settype */
294
295 static inline struct ip_set_type *
296 find_set_type(const char *name)
297 {
298         struct ip_set_type *set_type;
299
300         list_for_each_entry(set_type, &set_type_list, list)
301                 if (!strncmp(set_type->typename, name, IP_SET_MAXNAMELEN - 1))
302                         return set_type;
303         return NULL;
304 }
305
306 int 
307 ip_set_register_set_type(struct ip_set_type *set_type)
308 {
309         int ret = 0;
310         
311         if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) {
312                 ip_set_printk("'%s' uses wrong protocol version %u (want %u)",
313                               set_type->typename,
314                               set_type->protocol_version,
315                               IP_SET_PROTOCOL_VERSION);
316                 return -EINVAL;
317         }
318
319         write_lock_bh(&ip_set_lock);
320         if (find_set_type(set_type->typename)) {
321                 /* Duplicate! */
322                 ip_set_printk("'%s' already registered!", 
323                               set_type->typename);
324                 ret = -EINVAL;
325                 goto unlock;
326         }
327         if (!try_module_get(THIS_MODULE)) {
328                 ret = -EFAULT;
329                 goto unlock;
330         }
331         list_add(&set_type->list, &set_type_list);
332         DP("'%s' registered.", set_type->typename);
333    unlock:
334         write_unlock_bh(&ip_set_lock);
335         return ret;
336 }
337
338 void
339 ip_set_unregister_set_type(struct ip_set_type *set_type)
340 {
341         write_lock_bh(&ip_set_lock);
342         if (!find_set_type(set_type->typename)) {
343                 ip_set_printk("'%s' not registered?",
344                               set_type->typename);
345                 goto unlock;
346         }
347         list_del(&set_type->list);
348         module_put(THIS_MODULE);
349         DP("'%s' unregistered.", set_type->typename);
350    unlock:
351         write_unlock_bh(&ip_set_lock);
352
353 }
354
355 /*
356  * Userspace routines
357  */
358
359 /*
360  * Find set by name, reference it once. The reference makes sure the
361  * thing pointed to, does not go away under our feet. Drop the reference
362  * later, using ip_set_put().
363  */
364 ip_set_id_t
365 ip_set_get_byname(const char *name)
366 {
367         ip_set_id_t i, index = IP_SET_INVALID_ID;
368         
369         down(&ip_set_app_mutex);
370         for (i = 0; i < ip_set_max; i++) {
371                 if (ip_set_list[i] != NULL
372                     && strcmp(ip_set_list[i]->name, name) == 0) {
373                         __ip_set_get(i);
374                         index = i;
375                         break;
376                 }
377         }
378         up(&ip_set_app_mutex);
379         return index;
380 }
381
382 /*
383  * Find set by index, reference it once. The reference makes sure the
384  * thing pointed to, does not go away under our feet. Drop the reference
385  * later, using ip_set_put().
386  */
387 ip_set_id_t
388 ip_set_get_byindex(ip_set_id_t index)
389 {
390         down(&ip_set_app_mutex);
391
392         if (index >= ip_set_max)
393                 return IP_SET_INVALID_ID;
394         
395         if (ip_set_list[index])
396                 __ip_set_get(index);
397         else
398                 index = IP_SET_INVALID_ID;
399                 
400         up(&ip_set_app_mutex);
401         return index;
402 }
403
404 /*
405  * If the given set pointer points to a valid set, decrement
406  * reference count by 1. The caller shall not assume the index
407  * to be valid, after calling this function.
408  */
409 void ip_set_put(ip_set_id_t index)
410 {
411         down(&ip_set_app_mutex);
412         if (ip_set_list[index])
413                 __ip_set_put(index);
414         up(&ip_set_app_mutex);
415 }
416
417 /* Find a set by name or index */
418 static ip_set_id_t
419 ip_set_find_byname(const char *name)
420 {
421         ip_set_id_t i, index = IP_SET_INVALID_ID;
422         
423         for (i = 0; i < ip_set_max; i++) {
424                 if (ip_set_list[i] != NULL
425                     && strcmp(ip_set_list[i]->name, name) == 0) {
426                         index = i;
427                         break;
428                 }
429         }
430         return index;
431 }
432
433 static ip_set_id_t
434 ip_set_find_byindex(ip_set_id_t index)
435 {
436         if (index >= ip_set_max || ip_set_list[index] == NULL)
437                 index = IP_SET_INVALID_ID;
438         
439         return index;
440 }
441
442 /*
443  * Add, del, test, bind and unbind
444  */
445
446 static inline int
447 __ip_set_testip(struct ip_set *set,
448                 const void *data,
449                 size_t size,
450                 ip_set_ip_t *ip)
451 {
452         int res;
453
454         read_lock_bh(&set->lock);
455         res = set->type->testip(set, data, size, ip);
456         read_unlock_bh(&set->lock);
457
458         return res;
459 }
460
461 static int
462 __ip_set_addip(ip_set_id_t index,
463                const void *data,
464                size_t size)
465 {
466         struct ip_set *set = ip_set_list[index];
467         ip_set_ip_t ip;
468         int res;
469         
470         IP_SET_ASSERT(set);
471         do {
472                 write_lock_bh(&set->lock);
473                 res = set->type->addip(set, data, size, &ip);
474                 write_unlock_bh(&set->lock);
475         } while (res == -EAGAIN
476                  && set->type->retry
477                  && (res = set->type->retry(set)) == 0);
478
479         return res;
480 }
481
482 static int
483 ip_set_addip(ip_set_id_t index,
484              const void *data,
485              size_t size)
486 {
487
488         return __ip_set_addip(index,
489                               data + sizeof(struct ip_set_req_adt),
490                               size - sizeof(struct ip_set_req_adt));
491 }
492
493 static int
494 ip_set_delip(ip_set_id_t index,
495              const void *data,
496              size_t size)
497 {
498         struct ip_set *set = ip_set_list[index];
499         ip_set_ip_t ip;
500         int res;
501         
502         IP_SET_ASSERT(set);
503         write_lock_bh(&set->lock);
504         res = set->type->delip(set,
505                                data + sizeof(struct ip_set_req_adt),
506                                size - sizeof(struct ip_set_req_adt),
507                                &ip);
508         write_unlock_bh(&set->lock);
509
510         return res;
511 }
512
513 static int
514 ip_set_testip(ip_set_id_t index,
515               const void *data,
516               size_t size)
517 {
518         struct ip_set *set = ip_set_list[index];
519         ip_set_ip_t ip;
520         int res;
521
522         IP_SET_ASSERT(set);
523         res = __ip_set_testip(set,
524                               data + sizeof(struct ip_set_req_adt),
525                               size - sizeof(struct ip_set_req_adt),
526                               &ip);
527
528         return (res > 0 ? -EEXIST : res);
529 }
530
531 static int
532 ip_set_bindip(ip_set_id_t index,
533               const void *data,
534               size_t size)
535 {
536         struct ip_set *set = ip_set_list[index];
537         struct ip_set_req_bind *req_bind;
538         ip_set_id_t binding;
539         ip_set_ip_t ip;
540         int res;
541
542         IP_SET_ASSERT(set);
543         if (size < sizeof(struct ip_set_req_bind))
544                 return -EINVAL;
545                 
546         req_bind = (struct ip_set_req_bind *) data;
547         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
548
549         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
550                 /* Default binding of a set */
551                 char *binding_name;
552                 
553                 if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
554                         return -EINVAL;
555
556                 binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
557                 binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
558
559                 binding = ip_set_find_byname(binding_name);
560                 if (binding == IP_SET_INVALID_ID)
561                         return -ENOENT;
562
563                 write_lock_bh(&ip_set_lock);
564                 /* Sets as binding values are referenced */
565                 if (set->binding != IP_SET_INVALID_ID)
566                         __ip_set_put(set->binding);
567                 set->binding = binding;
568                 __ip_set_get(set->binding);
569                 write_unlock_bh(&ip_set_lock);
570
571                 return 0;
572         }
573         binding = ip_set_find_byname(req_bind->binding);
574         if (binding == IP_SET_INVALID_ID)
575                 return -ENOENT;
576
577         res = __ip_set_testip(set,
578                               data + sizeof(struct ip_set_req_bind),
579                               size - sizeof(struct ip_set_req_bind),
580                               &ip);
581         DP("set %s, ip: %u.%u.%u.%u, binding %s",
582            set->name, HIPQUAD(ip), ip_set_list[binding]->name);
583         
584         if (res >= 0)
585                 res = ip_set_hash_add(set->id, ip, binding);
586
587         return res;
588 }
589
590 #define FOREACH_SET_DO(fn, args...)                             \
591 ({                                                              \
592         ip_set_id_t __i;                                        \
593         struct ip_set *__set;                                   \
594                                                                 \
595         for (__i = 0; __i < ip_set_max; __i++) {                \
596                 __set = ip_set_list[__i];                       \
597                 if (__set != NULL)                              \
598                         fn(__set , ##args);                     \
599         }                                                       \
600 })
601
602 static inline void
603 __set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id)
604 {
605         if (set_hash->id == id)
606                 __set_hash_del(set_hash);
607 }
608
609 static inline void
610 __unbind_default(struct ip_set *set)
611 {
612         if (set->binding != IP_SET_INVALID_ID) {
613                 /* Sets as binding values are referenced */
614                 __ip_set_put(set->binding);
615                 set->binding = IP_SET_INVALID_ID;
616         }
617 }
618
619 static int
620 ip_set_unbindip(ip_set_id_t index,
621                 const void *data,
622                 size_t size)
623 {
624         struct ip_set *set;
625         struct ip_set_req_bind *req_bind;
626         ip_set_ip_t ip;
627         int res;
628
629         DP("");
630         if (size < sizeof(struct ip_set_req_bind))
631                 return -EINVAL;
632                 
633         req_bind = (struct ip_set_req_bind *) data;
634         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
635         
636         DP("%u %s", index, req_bind->binding);
637         if (index == IP_SET_INVALID_ID) {
638                 /* unbind :all: */
639                 if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
640                         /* Default binding of sets */
641                         write_lock_bh(&ip_set_lock);
642                         FOREACH_SET_DO(__unbind_default);
643                         write_unlock_bh(&ip_set_lock);
644                         return 0;
645                 } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
646                         /* Flush all bindings of all sets*/
647                         write_lock_bh(&ip_set_lock);
648                         FOREACH_HASH_RW_DO(__set_hash_del);
649                         write_unlock_bh(&ip_set_lock);
650                         return 0;
651                 }
652                 DP("unreachable reached!");
653                 return -EINVAL;
654         }
655         
656         set = ip_set_list[index];
657         IP_SET_ASSERT(set);
658         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
659                 /* Default binding of set */
660                 ip_set_id_t binding = ip_set_find_byindex(set->binding);
661
662                 if (binding == IP_SET_INVALID_ID)
663                         return -ENOENT;
664                         
665                 write_lock_bh(&ip_set_lock);
666                 /* Sets in hash values are referenced */
667                 __ip_set_put(set->binding);
668                 set->binding = IP_SET_INVALID_ID;
669                 write_unlock_bh(&ip_set_lock);
670
671                 return 0;
672         } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
673                 /* Flush all bindings */
674
675                 write_lock_bh(&ip_set_lock);
676                 FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
677                 write_unlock_bh(&ip_set_lock);
678                 return 0;
679         }
680         
681         res = __ip_set_testip(set,
682                               data + sizeof(struct ip_set_req_bind),
683                               size - sizeof(struct ip_set_req_bind),
684                               &ip);
685
686         DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
687         if (res >= 0)
688                 res = ip_set_hash_del(set->id, ip);
689
690         return res;
691 }
692
693 static int
694 ip_set_testbind(ip_set_id_t index,
695                 const void *data,
696                 size_t size)
697 {
698         struct ip_set *set = ip_set_list[index];
699         struct ip_set_req_bind *req_bind;
700         ip_set_id_t binding;
701         ip_set_ip_t ip;
702         int res;
703
704         IP_SET_ASSERT(set);
705         if (size < sizeof(struct ip_set_req_bind))
706                 return -EINVAL;
707                 
708         req_bind = (struct ip_set_req_bind *) data;
709         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
710
711         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
712                 /* Default binding of set */
713                 char *binding_name;
714                 
715                 if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
716                         return -EINVAL;
717
718                 binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
719                 binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
720
721                 binding = ip_set_find_byname(binding_name);
722                 if (binding == IP_SET_INVALID_ID)
723                         return -ENOENT;
724                 
725                 res = (set->binding == binding) ? -EEXIST : 0;
726
727                 return res;
728         }
729         binding = ip_set_find_byname(req_bind->binding);
730         if (binding == IP_SET_INVALID_ID)
731                 return -ENOENT;
732                 
733         
734         res = __ip_set_testip(set,
735                               data + sizeof(struct ip_set_req_bind),
736                               size - sizeof(struct ip_set_req_bind),
737                               &ip);
738         DP("set %s, ip: %u.%u.%u.%u, binding %s",
739            set->name, HIPQUAD(ip), ip_set_list[binding]->name);
740            
741         if (res >= 0)
742                 res = (ip_set_find_in_hash(set->id, ip) == binding)
743                         ? -EEXIST : 0;
744
745         return res;
746 }
747
748 static struct ip_set_type *
749 find_set_type_rlock(const char *typename)
750 {
751         struct ip_set_type *type;
752         
753         read_lock_bh(&ip_set_lock);
754         type = find_set_type(typename);
755         if (type == NULL)
756                 read_unlock_bh(&ip_set_lock);
757
758         return type;
759 }
760
761 static int
762 find_free_id(const char *name,
763              ip_set_id_t *index,
764              ip_set_id_t *id)
765 {
766         ip_set_id_t i;
767
768         *id = IP_SET_INVALID_ID;
769         for (i = 0;  i < ip_set_max; i++) {
770                 if (ip_set_list[i] == NULL) {
771                         if (*id == IP_SET_INVALID_ID)
772                                 *id = *index = i;
773                 } else if (strcmp(name, ip_set_list[i]->name) == 0)
774                         /* Name clash */
775                         return -EEXIST;
776         }
777         if (*id == IP_SET_INVALID_ID)
778                 /* No free slot remained */
779                 return -ERANGE;
780         /* Check that index is usable as id (swapping) */
781     check:      
782         for (i = 0;  i < ip_set_max; i++) {
783                 if (ip_set_list[i] != NULL
784                     && ip_set_list[i]->id == *id) {
785                     *id = i;
786                     goto check;
787                 }
788         }
789         return 0;
790 }
791
792 /*
793  * Create a set
794  */
795 static int
796 ip_set_create(const char *name,
797               const char *typename,
798               ip_set_id_t restore,
799               const void *data,
800               size_t size)
801 {
802         struct ip_set *set;
803         ip_set_id_t index = 0, id;
804         int res = 0;
805
806         DP("setname: %s, typename: %s, id: %u", name, typename, restore);
807         /*
808          * First, and without any locks, allocate and initialize
809          * a normal base set structure.
810          */
811         set = kmalloc(sizeof(struct ip_set), GFP_KERNEL);
812         if (!set)
813                 return -ENOMEM;
814         set->lock = RW_LOCK_UNLOCKED;
815         strncpy(set->name, name, IP_SET_MAXNAMELEN);
816         set->binding = IP_SET_INVALID_ID;
817         atomic_set(&set->ref, 0);
818
819         /*
820          * Next, take the &ip_set_lock, check that we know the type,
821          * and take a reference on the type, to make sure it
822          * stays available while constructing our new set.
823          *
824          * After referencing the type, we drop the &ip_set_lock,
825          * and let the new set construction run without locks.
826          */
827         set->type = find_set_type_rlock(typename);
828         if (set->type == NULL) {
829                 /* Try loading the module */
830                 char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1];
831                 strcpy(modulename, "ip_set_");
832                 strcat(modulename, typename);
833                 DP("try to load %s", modulename);
834                 request_module(modulename);
835                 set->type = find_set_type_rlock(typename);
836         }
837         if (set->type == NULL) {
838                 ip_set_printk("no set type '%s', set '%s' not created",
839                               typename, name);
840                 res = -ENOENT;
841                 goto out;
842         }
843         if (!try_module_get(set->type->me)) {
844                 read_unlock_bh(&ip_set_lock);
845                 res = -EFAULT;
846                 goto out;
847         }
848         read_unlock_bh(&ip_set_lock);
849
850         /*
851          * Without holding any locks, create private part.
852          */
853         res = set->type->create(set, data, size);
854         if (res != 0)
855                 goto put_out;
856
857         /* BTW, res==0 here. */
858
859         /*
860          * Here, we have a valid, constructed set. &ip_set_lock again,
861          * find free id/index and check that it is not already in 
862          * ip_set_list.
863          */
864         write_lock_bh(&ip_set_lock);
865         if ((res = find_free_id(set->name, &index, &id)) != 0) {
866                 DP("no free id!");
867                 goto cleanup;
868         }
869
870         /* Make sure restore gets the same index */
871         if (restore != IP_SET_INVALID_ID && index != restore) {
872                 DP("Can't restore, sets are screwed up");
873                 res = -ERANGE;
874                 goto cleanup;
875         }
876          
877         /*
878          * Finally! Add our shiny new set to the list, and be done.
879          */
880         DP("create: '%s' created with index %u, id %u!", set->name, index, id);
881         set->id = id;
882         ip_set_list[index] = set;
883         write_unlock_bh(&ip_set_lock);
884         return res;
885         
886     cleanup:
887         write_unlock_bh(&ip_set_lock);
888         set->type->destroy(set);
889     put_out:
890         module_put(set->type->me);
891     out:
892         kfree(set);
893         return res;
894 }
895
896 /*
897  * Destroy a given existing set
898  */
899 static void
900 ip_set_destroy_set(ip_set_id_t index)
901 {
902         struct ip_set *set = ip_set_list[index];
903
904         IP_SET_ASSERT(set);
905         DP("set: %s",  set->name);
906         write_lock_bh(&ip_set_lock);
907         FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
908         if (set->binding != IP_SET_INVALID_ID)
909                 __ip_set_put(set->binding);
910         ip_set_list[index] = NULL;
911         write_unlock_bh(&ip_set_lock);
912
913         /* Must call it without holding any lock */
914         set->type->destroy(set);
915         module_put(set->type->me);
916         kfree(set);
917 }
918
919 /*
920  * Destroy a set - or all sets
921  * Sets must not be referenced/used.
922  */
923 static int
924 ip_set_destroy(ip_set_id_t index)
925 {
926         ip_set_id_t i;
927
928         /* ref modification always protected by the mutex */
929         if (index != IP_SET_INVALID_ID) {
930                 if (atomic_read(&ip_set_list[index]->ref))
931                         return -EBUSY;
932                 ip_set_destroy_set(index);
933         } else {
934                 for (i = 0; i < ip_set_max; i++) {
935                         if (ip_set_list[i] != NULL 
936                             && (atomic_read(&ip_set_list[i]->ref)))
937                                 return -EBUSY;
938                 }
939
940                 for (i = 0; i < ip_set_max; i++) {
941                         if (ip_set_list[i] != NULL)
942                                 ip_set_destroy_set(i);
943                 }
944         }
945         return 0;
946 }
947
948 static void
949 ip_set_flush_set(struct ip_set *set)
950 {
951         DP("set: %s %u",  set->name, set->id);
952
953         write_lock_bh(&set->lock);
954         set->type->flush(set);
955         write_unlock_bh(&set->lock);
956 }
957
958 /* 
959  * Flush data in a set - or in all sets
960  */
961 static int
962 ip_set_flush(ip_set_id_t index)
963 {
964         if (index != IP_SET_INVALID_ID) {
965                 IP_SET_ASSERT(ip_set_list[index]);
966                 ip_set_flush_set(ip_set_list[index]);
967         } else
968                 FOREACH_SET_DO(ip_set_flush_set);
969
970         return 0;
971 }
972
973 /* Rename a set */
974 static int
975 ip_set_rename(ip_set_id_t index, const char *name)
976 {
977         struct ip_set *set = ip_set_list[index];
978         ip_set_id_t i;
979         int res = 0;
980
981         DP("set: %s to %s",  set->name, name);
982         write_lock_bh(&ip_set_lock);
983         for (i = 0; i < ip_set_max; i++) {
984                 if (ip_set_list[i] != NULL
985                     && strncmp(ip_set_list[i]->name, 
986                                name,
987                                IP_SET_MAXNAMELEN - 1) == 0) {
988                         res = -EEXIST;
989                         goto unlock;
990                 }
991         }
992         strncpy(set->name, name, IP_SET_MAXNAMELEN);
993     unlock:
994         write_unlock_bh(&ip_set_lock);
995         return res;
996 }
997
998 /*
999  * Swap two sets so that name/index points to the other.
1000  * References are also swapped.
1001  */
1002 static int
1003 ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index)
1004 {
1005         struct ip_set *from = ip_set_list[from_index];
1006         struct ip_set *to = ip_set_list[to_index];
1007         char from_name[IP_SET_MAXNAMELEN];
1008         u_int32_t from_ref;
1009
1010         DP("set: %s to %s",  from->name, to->name);
1011         /* Features must not change. Artifical restriction. */
1012         if (from->type->features != to->type->features)
1013                 return -ENOEXEC;
1014
1015         /* No magic here: ref munging protected by the mutex */ 
1016         write_lock_bh(&ip_set_lock);
1017         strncpy(from_name, from->name, IP_SET_MAXNAMELEN);
1018         from_ref = atomic_read(&from->ref);
1019
1020         strncpy(from->name, to->name, IP_SET_MAXNAMELEN);
1021         atomic_set(&from->ref, atomic_read(&to->ref));
1022         strncpy(to->name, from_name, IP_SET_MAXNAMELEN);
1023         atomic_set(&to->ref, from_ref);
1024         
1025         ip_set_list[from_index] = to;
1026         ip_set_list[to_index] = from;
1027         
1028         write_unlock_bh(&ip_set_lock);
1029         return 0;
1030 }
1031
1032 /*
1033  * List set data
1034  */
1035
1036 static inline void
1037 __set_hash_bindings_size_list(struct ip_set_hash *set_hash,
1038                               ip_set_id_t id, size_t *size)
1039 {
1040         if (set_hash->id == id)
1041                 *size += sizeof(struct ip_set_hash_list);
1042 }
1043
1044 static inline void
1045 __set_hash_bindings_size_save(struct ip_set_hash *set_hash,
1046                               ip_set_id_t id, size_t *size)
1047 {
1048         if (set_hash->id == id)
1049                 *size += sizeof(struct ip_set_hash_save);
1050 }
1051
1052 static inline void
1053 __set_hash_bindings(struct ip_set_hash *set_hash,
1054                     ip_set_id_t id, void *data, int *used)
1055 {
1056         if (set_hash->id == id) {
1057                 struct ip_set_hash_list *hash_list = 
1058                         (struct ip_set_hash_list *)(data + *used);
1059
1060                 hash_list->ip = set_hash->ip;
1061                 hash_list->binding = set_hash->binding;
1062                 *used += sizeof(struct ip_set_hash_list);
1063         }
1064 }
1065
1066 static int ip_set_list_set(ip_set_id_t index,
1067                            void *data,
1068                            int *used,
1069                            int len)
1070 {
1071         struct ip_set *set = ip_set_list[index];
1072         struct ip_set_list *set_list;
1073
1074         /* Pointer to our header */
1075         set_list = (struct ip_set_list *) (data + *used);
1076
1077         DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used);
1078
1079         /* Get and ensure header size */
1080         if (*used + sizeof(struct ip_set_list) > len)
1081                 goto not_enough_mem;
1082         *used += sizeof(struct ip_set_list);
1083
1084         read_lock_bh(&set->lock);
1085         /* Get and ensure set specific header size */
1086         set_list->header_size = set->type->header_size;
1087         if (*used + set_list->header_size > len)
1088                 goto unlock_set;
1089
1090         /* Fill in the header */
1091         set_list->index = index;
1092         set_list->binding = set->binding;
1093         set_list->ref = atomic_read(&set->ref);
1094
1095         /* Fill in set spefific header data */
1096         set->type->list_header(set, data + *used);
1097         *used += set_list->header_size;
1098
1099         /* Get and ensure set specific members size */
1100         set_list->members_size = set->type->list_members_size(set);
1101         if (*used + set_list->members_size > len)
1102                 goto unlock_set;
1103
1104         /* Fill in set spefific members data */
1105         set->type->list_members(set, data + *used);
1106         *used += set_list->members_size;
1107         read_unlock_bh(&set->lock);
1108
1109         /* Bindings */
1110
1111         /* Get and ensure set specific bindings size */
1112         set_list->bindings_size = 0;
1113         FOREACH_HASH_DO(__set_hash_bindings_size_list,
1114                         set->id, &set_list->bindings_size);
1115         if (*used + set_list->bindings_size > len)
1116                 goto not_enough_mem;
1117
1118         /* Fill in set spefific bindings data */
1119         FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used);
1120         
1121         return 0;
1122
1123     unlock_set:
1124         read_unlock_bh(&set->lock);
1125     not_enough_mem:
1126         DP("not enough mem, try again");
1127         return -EAGAIN;
1128 }
1129
1130 /*
1131  * Save sets
1132  */
1133 static int ip_set_save_set(ip_set_id_t index,
1134                            void *data,
1135                            int *used,
1136                            int len)
1137 {
1138         struct ip_set *set;
1139         struct ip_set_save *set_save;
1140
1141         /* Pointer to our header */
1142         set_save = (struct ip_set_save *) (data + *used);
1143
1144         /* Get and ensure header size */
1145         if (*used + sizeof(struct ip_set_save) > len)
1146                 goto not_enough_mem;
1147         *used += sizeof(struct ip_set_save);
1148
1149         set = ip_set_list[index];
1150         DP("set: %s, used: %u(%u) %p %p", set->name, *used, len, 
1151            data, data + *used);
1152
1153         read_lock_bh(&set->lock);
1154         /* Get and ensure set specific header size */
1155         set_save->header_size = set->type->header_size;
1156         if (*used + set_save->header_size > len)
1157                 goto unlock_set;
1158
1159         /* Fill in the header */
1160         set_save->index = index;
1161         set_save->binding = set->binding;
1162
1163         /* Fill in set spefific header data */
1164         set->type->list_header(set, data + *used);
1165         *used += set_save->header_size;
1166
1167         DP("set header filled: %s, used: %u(%u) %p %p", set->name, *used,
1168            set_save->header_size, data, data + *used);
1169         /* Get and ensure set specific members size */
1170         set_save->members_size = set->type->list_members_size(set);
1171         if (*used + set_save->members_size > len)
1172                 goto unlock_set;
1173
1174         /* Fill in set spefific members data */
1175         set->type->list_members(set, data + *used);
1176         *used += set_save->members_size;
1177         read_unlock_bh(&set->lock);
1178         DP("set members filled: %s, used: %u(%u) %p %p", set->name, *used,
1179            set_save->members_size, data, data + *used);
1180         return 0;
1181
1182     unlock_set:
1183         read_unlock_bh(&set->lock);
1184     not_enough_mem:
1185         DP("not enough mem, try again");
1186         return -EAGAIN;
1187 }
1188
1189 static inline void
1190 __set_hash_save_bindings(struct ip_set_hash *set_hash,
1191                          ip_set_id_t id,
1192                          void *data,
1193                          int *used,
1194                          int len,
1195                          int *res)
1196 {
1197         if (*res == 0
1198             && (id == IP_SET_INVALID_ID || set_hash->id == id)) {
1199                 struct ip_set_hash_save *hash_save = 
1200                         (struct ip_set_hash_save *)(data + *used);
1201                 /* Ensure bindings size */
1202                 if (*used + sizeof(struct ip_set_hash_save) > len) {
1203                         *res = -ENOMEM;
1204                         return;
1205                 }
1206                 hash_save->id = set_hash->id;
1207                 hash_save->ip = set_hash->ip;
1208                 hash_save->binding = set_hash->binding;
1209                 *used += sizeof(struct ip_set_hash_save);
1210         }
1211 }
1212
1213 static int ip_set_save_bindings(ip_set_id_t index,
1214                                 void *data,
1215                                 int *used,
1216                                 int len)
1217 {
1218         int res = 0;
1219         struct ip_set_save *set_save;
1220
1221         DP("used %u, len %u", *used, len);
1222         /* Get and ensure header size */
1223         if (*used + sizeof(struct ip_set_save) > len)
1224                 return -ENOMEM;
1225
1226         /* Marker */
1227         set_save = (struct ip_set_save *) (data + *used);
1228         set_save->index = IP_SET_INVALID_ID;
1229         set_save->header_size = 0;
1230         set_save->members_size = 0;
1231         *used += sizeof(struct ip_set_save);
1232
1233         DP("marker added used %u, len %u", *used, len);
1234         /* Fill in bindings data */
1235         if (index != IP_SET_INVALID_ID)
1236                 /* Sets are identified by id in hash */
1237                 index = ip_set_list[index]->id;
1238         FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res);
1239
1240         return res;     
1241 }
1242
1243 /*
1244  * Restore sets
1245  */
1246 static int ip_set_restore(void *data,
1247                           int len)
1248 {
1249         int res = 0;
1250         int line = 0, used = 0, members_size;
1251         struct ip_set *set;
1252         struct ip_set_hash_save *hash_save;
1253         struct ip_set_restore *set_restore;
1254         ip_set_id_t index;
1255
1256         /* Loop to restore sets */
1257         while (1) {
1258                 line++;
1259                 
1260                 DP("%u %u %u", used, sizeof(struct ip_set_restore), len);
1261                 /* Get and ensure header size */
1262                 if (used + sizeof(struct ip_set_restore) > len)
1263                         return line;
1264                 set_restore = (struct ip_set_restore *) (data + used);
1265                 used += sizeof(struct ip_set_restore);
1266
1267                 /* Ensure data size */
1268                 if (used 
1269                     + set_restore->header_size 
1270                     + set_restore->members_size > len)
1271                         return line;
1272
1273                 /* Check marker */
1274                 if (set_restore->index == IP_SET_INVALID_ID) {
1275                         line--;
1276                         goto bindings;
1277                 }
1278                 
1279                 /* Try to create the set */
1280                 DP("restore %s %s", set_restore->name, set_restore->typename);
1281                 res = ip_set_create(set_restore->name,
1282                                     set_restore->typename,
1283                                     set_restore->index,
1284                                     data + used,
1285                                     set_restore->header_size);
1286                 
1287                 if (res != 0)
1288                         return line;
1289                 used += set_restore->header_size;
1290
1291                 index = ip_set_find_byindex(set_restore->index);
1292                 DP("index %u, restore_index %u", index, set_restore->index);
1293                 if (index != set_restore->index)
1294                         return line;
1295                 /* Try to restore members data */
1296                 set = ip_set_list[index];
1297                 members_size = 0;
1298                 DP("members_size %u reqsize %u",
1299                    set_restore->members_size, set->type->reqsize);
1300                 while (members_size + set->type->reqsize <=
1301                        set_restore->members_size) {
1302                         line++;
1303                         DP("members: %u, line %u", members_size, line);
1304                         res = __ip_set_addip(index,
1305                                            data + used + members_size,
1306                                            set->type->reqsize);
1307                         if (!(res == 0 || res == -EEXIST)) 
1308                                 return line;
1309                         members_size += set->type->reqsize;
1310                 }
1311
1312                 DP("members_size %u  %u",
1313                    set_restore->members_size, members_size);
1314                 if (members_size != set_restore->members_size)
1315                         return line++;
1316                 used += set_restore->members_size;              
1317         }
1318         
1319    bindings:
1320         /* Loop to restore bindings */
1321         while (used < len) {
1322                 line++;
1323
1324                 DP("restore binding, line %u", line);           
1325                 /* Get and ensure size */
1326                 if (used + sizeof(struct ip_set_hash_save) > len)
1327                         return line;
1328                 hash_save = (struct ip_set_hash_save *) (data + used);
1329                 used += sizeof(struct ip_set_hash_save);
1330                 
1331                 /* hash_save->id is used to store the index */
1332                 index = ip_set_find_byindex(hash_save->id);
1333                 DP("restore binding index %u, id %u, %u -> %u",
1334                    index, hash_save->id, hash_save->ip, hash_save->binding);            
1335                 if (index != hash_save->id)
1336                         return line;
1337                 if (ip_set_find_byindex(hash_save->binding) == IP_SET_INVALID_ID) {
1338                         DP("corrupt binding set index %u", hash_save->binding);
1339                         return line;
1340                 }
1341                 set = ip_set_list[hash_save->id];
1342                 /* Null valued IP means default binding */
1343                 if (hash_save->ip)
1344                         res = ip_set_hash_add(set->id, 
1345                                               hash_save->ip,
1346                                               hash_save->binding);
1347                 else {
1348                         IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID);
1349                         write_lock_bh(&ip_set_lock);
1350                         set->binding = hash_save->binding;
1351                         __ip_set_get(set->binding);
1352                         write_unlock_bh(&ip_set_lock);
1353                         DP("default binding: %u", set->binding);
1354                 }
1355                 if (res != 0)
1356                         return line;
1357         }
1358         if (used != len)
1359                 return line;
1360         
1361         return 0;       
1362 }
1363
1364 static int
1365 ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len)
1366 {
1367         void *data;
1368         int res = 0;            /* Assume OK */
1369         unsigned *op;
1370         struct ip_set_req_adt *req_adt;
1371         ip_set_id_t index = IP_SET_INVALID_ID;
1372         int (*adtfn)(ip_set_id_t index,
1373                      const void *data, size_t size);
1374         struct fn_table {
1375                 int (*fn)(ip_set_id_t index,
1376                           const void *data, size_t size);
1377         } adtfn_table[] =
1378         { { ip_set_addip }, { ip_set_delip }, { ip_set_testip},
1379           { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind },
1380         };
1381
1382         DP("optval=%d, user=%p, len=%d", optval, user, len);
1383         if (!capable(CAP_NET_ADMIN))
1384                 return -EPERM;
1385         if (optval != SO_IP_SET)
1386                 return -EBADF;
1387         if (len <= sizeof(unsigned)) {
1388                 ip_set_printk("short userdata (want >%zu, got %u)",
1389                               sizeof(unsigned), len);
1390                 return -EINVAL;
1391         }
1392         data = vmalloc(len);
1393         if (!data) {
1394                 DP("out of mem for %u bytes", len);
1395                 return -ENOMEM;
1396         }
1397         if (copy_from_user(data, user, len) != 0) {
1398                 res = -EFAULT;
1399                 goto done;
1400         }
1401         if (down_interruptible(&ip_set_app_mutex)) {
1402                 res = -EINTR;
1403                 goto done;
1404         }
1405
1406         op = (unsigned *)data;
1407         DP("op=%x", *op);
1408         
1409         if (*op < IP_SET_OP_VERSION) {
1410                 /* Check the version at the beginning of operations */
1411                 struct ip_set_req_version *req_version =
1412                         (struct ip_set_req_version *) data;
1413                 if (req_version->version != IP_SET_PROTOCOL_VERSION) {
1414                         res = -EPROTO;
1415                         goto done;
1416                 }
1417         }
1418
1419         switch (*op) {
1420         case IP_SET_OP_CREATE:{
1421                 struct ip_set_req_create *req_create
1422                         = (struct ip_set_req_create *) data;
1423                 
1424                 if (len < sizeof(struct ip_set_req_create)) {
1425                         ip_set_printk("short CREATE data (want >=%zu, got %u)",
1426                                       sizeof(struct ip_set_req_create), len);
1427                         res = -EINVAL;
1428                         goto done;
1429                 }
1430                 req_create->name[IP_SET_MAXNAMELEN - 1] = '\0';
1431                 req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1432                 res = ip_set_create(req_create->name,
1433                                     req_create->typename,
1434                                     IP_SET_INVALID_ID,
1435                                     data + sizeof(struct ip_set_req_create),
1436                                     len - sizeof(struct ip_set_req_create));
1437                 goto done;
1438         }
1439         case IP_SET_OP_DESTROY:{
1440                 struct ip_set_req_std *req_destroy
1441                         = (struct ip_set_req_std *) data;
1442                 
1443                 if (len != sizeof(struct ip_set_req_std)) {
1444                         ip_set_printk("invalid DESTROY data (want %zu, got %u)",
1445                                       sizeof(struct ip_set_req_std), len);
1446                         res = -EINVAL;
1447                         goto done;
1448                 }
1449                 if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) {
1450                         /* Destroy all sets */
1451                         index = IP_SET_INVALID_ID;
1452                 } else {
1453                         req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0';
1454                         index = ip_set_find_byname(req_destroy->name);
1455
1456                         if (index == IP_SET_INVALID_ID) {
1457                                 res = -ENOENT;
1458                                 goto done;
1459                         }
1460                 }
1461                         
1462                 res = ip_set_destroy(index);
1463                 goto done;
1464         }
1465         case IP_SET_OP_FLUSH:{
1466                 struct ip_set_req_std *req_flush =
1467                         (struct ip_set_req_std *) data;
1468
1469                 if (len != sizeof(struct ip_set_req_std)) {
1470                         ip_set_printk("invalid FLUSH data (want %zu, got %u)",
1471                                       sizeof(struct ip_set_req_std), len);
1472                         res = -EINVAL;
1473                         goto done;
1474                 }
1475                 if (strcmp(req_flush->name, IPSET_TOKEN_ALL) == 0) {
1476                         /* Flush all sets */
1477                         index = IP_SET_INVALID_ID;
1478                 } else {
1479                         req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0';
1480                         index = ip_set_find_byname(req_flush->name);
1481
1482                         if (index == IP_SET_INVALID_ID) {
1483                                 res = -ENOENT;
1484                                 goto done;
1485                         }
1486                 }
1487                 res = ip_set_flush(index);
1488                 goto done;
1489         }
1490         case IP_SET_OP_RENAME:{
1491                 struct ip_set_req_create *req_rename
1492                         = (struct ip_set_req_create *) data;
1493
1494                 if (len != sizeof(struct ip_set_req_create)) {
1495                         ip_set_printk("invalid RENAME data (want %zu, got %u)",
1496                                       sizeof(struct ip_set_req_create), len);
1497                         res = -EINVAL;
1498                         goto done;
1499                 }
1500
1501                 req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0';
1502                 req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1503                         
1504                 index = ip_set_find_byname(req_rename->name);
1505                 if (index == IP_SET_INVALID_ID) {
1506                         res = -ENOENT;
1507                         goto done;
1508                 }
1509                 res = ip_set_rename(index, req_rename->typename);
1510                 goto done;
1511         }
1512         case IP_SET_OP_SWAP:{
1513                 struct ip_set_req_create *req_swap
1514                         = (struct ip_set_req_create *) data;
1515                 ip_set_id_t to_index;
1516
1517                 if (len != sizeof(struct ip_set_req_create)) {
1518                         ip_set_printk("invalid SWAP data (want %zu, got %u)",
1519                                       sizeof(struct ip_set_req_create), len);
1520                         res = -EINVAL;
1521                         goto done;
1522                 }
1523
1524                 req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0';
1525                 req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1526
1527                 index = ip_set_find_byname(req_swap->name);
1528                 if (index == IP_SET_INVALID_ID) {
1529                         res = -ENOENT;
1530                         goto done;
1531                 }
1532                 to_index = ip_set_find_byname(req_swap->typename);
1533                 if (to_index == IP_SET_INVALID_ID) {
1534                         res = -ENOENT;
1535                         goto done;
1536                 }
1537                 res = ip_set_swap(index, to_index);
1538                 goto done;
1539         }
1540         default: 
1541                 break;  /* Set identified by id */
1542         }
1543         
1544         /* There we may have add/del/test/bind/unbind/test_bind operations */
1545         if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_BIND_SET) {
1546                 res = -EBADMSG;
1547                 goto done;
1548         }
1549         adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn;
1550
1551         if (len < sizeof(struct ip_set_req_adt)) {
1552                 ip_set_printk("short data in adt request (want >=%zu, got %u)",
1553                               sizeof(struct ip_set_req_adt), len);
1554                 res = -EINVAL;
1555                 goto done;
1556         }
1557         req_adt = (struct ip_set_req_adt *) data;
1558
1559         /* -U :all: :all:|:default: uses IP_SET_INVALID_ID */
1560         if (!(*op == IP_SET_OP_UNBIND_SET 
1561               && req_adt->index == IP_SET_INVALID_ID)) {
1562                 index = ip_set_find_byindex(req_adt->index);
1563                 if (index == IP_SET_INVALID_ID) {
1564                         res = -ENOENT;
1565                         goto done;
1566                 }
1567         }
1568         res = adtfn(index, data, len);
1569
1570     done:
1571         up(&ip_set_app_mutex);
1572         vfree(data);
1573         if (res > 0)
1574                 res = 0;
1575         DP("final result %d", res);
1576         return res;
1577 }
1578
1579 static int 
1580 ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len)
1581 {
1582         int res = 0;
1583         unsigned *op;
1584         ip_set_id_t index = IP_SET_INVALID_ID;
1585         void *data;
1586         int copylen = *len;
1587
1588         DP("optval=%d, user=%p, len=%d", optval, user, *len);
1589         if (!capable(CAP_NET_ADMIN))
1590                 return -EPERM;
1591         if (optval != SO_IP_SET)
1592                 return -EBADF;
1593         if (*len < sizeof(unsigned)) {
1594                 ip_set_printk("short userdata (want >=%zu, got %d)",
1595                               sizeof(unsigned), *len);
1596                 return -EINVAL;
1597         }
1598         data = vmalloc(*len);
1599         if (!data) {
1600                 DP("out of mem for %d bytes", *len);
1601                 return -ENOMEM;
1602         }
1603         if (copy_from_user(data, user, *len) != 0) {
1604                 res = -EFAULT;
1605                 goto done;
1606         }
1607         if (down_interruptible(&ip_set_app_mutex)) {
1608                 res = -EINTR;
1609                 goto done;
1610         }
1611
1612         op = (unsigned *) data;
1613         DP("op=%x", *op);
1614
1615         if (*op < IP_SET_OP_VERSION) {
1616                 /* Check the version at the beginning of operations */
1617                 struct ip_set_req_version *req_version =
1618                         (struct ip_set_req_version *) data;
1619                 if (req_version->version != IP_SET_PROTOCOL_VERSION) {
1620                         res = -EPROTO;
1621                         goto done;
1622                 }
1623         }
1624
1625         switch (*op) {
1626         case IP_SET_OP_VERSION: {
1627                 struct ip_set_req_version *req_version =
1628                     (struct ip_set_req_version *) data;
1629
1630                 if (*len != sizeof(struct ip_set_req_version)) {
1631                         ip_set_printk("invalid VERSION (want %zu, got %d)",
1632                                       sizeof(struct ip_set_req_version),
1633                                       *len);
1634                         res = -EINVAL;
1635                         goto done;
1636                 }
1637
1638                 req_version->version = IP_SET_PROTOCOL_VERSION;
1639                 res = copy_to_user(user, req_version,
1640                                    sizeof(struct ip_set_req_version));
1641                 goto done;
1642         }
1643         case IP_SET_OP_GET_BYNAME: {
1644                 struct ip_set_req_get_set *req_get
1645                         = (struct ip_set_req_get_set *) data;
1646
1647                 if (*len != sizeof(struct ip_set_req_get_set)) {
1648                         ip_set_printk("invalid GET_BYNAME (want %zu, got %d)",
1649                                       sizeof(struct ip_set_req_get_set), *len);
1650                         res = -EINVAL;
1651                         goto done;
1652                 }
1653                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1654                 index = ip_set_find_byname(req_get->set.name);
1655                 req_get->set.index = index;
1656                 goto copy;
1657         }
1658         case IP_SET_OP_GET_BYINDEX: {
1659                 struct ip_set_req_get_set *req_get
1660                         = (struct ip_set_req_get_set *) data;
1661
1662                 if (*len != sizeof(struct ip_set_req_get_set)) {
1663                         ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)",
1664                                       sizeof(struct ip_set_req_get_set), *len);
1665                         res = -EINVAL;
1666                         goto done;
1667                 }
1668                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1669                 index = ip_set_find_byindex(req_get->set.index);
1670                 strncpy(req_get->set.name,
1671                         index == IP_SET_INVALID_ID ? ""
1672                         : ip_set_list[index]->name, IP_SET_MAXNAMELEN);
1673                 goto copy;
1674         }
1675         case IP_SET_OP_ADT_GET: {
1676                 struct ip_set_req_adt_get *req_get
1677                         = (struct ip_set_req_adt_get *) data;
1678
1679                 if (*len != sizeof(struct ip_set_req_adt_get)) {
1680                         ip_set_printk("invalid ADT_GET (want %zu, got %d)",
1681                                       sizeof(struct ip_set_req_adt_get), *len);
1682                         res = -EINVAL;
1683                         goto done;
1684                 }
1685                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1686                 index = ip_set_find_byname(req_get->set.name);
1687                 if (index != IP_SET_INVALID_ID) {
1688                         req_get->set.index = index;
1689                         strncpy(req_get->typename,
1690                                 ip_set_list[index]->type->typename,
1691                                 IP_SET_MAXNAMELEN - 1);
1692                 } else {
1693                         res = -ENOENT;
1694                         goto done;
1695                 }
1696                 goto copy;
1697         }
1698         case IP_SET_OP_MAX_SETS: {
1699                 struct ip_set_req_max_sets *req_max_sets
1700                         = (struct ip_set_req_max_sets *) data;
1701                 ip_set_id_t i;
1702
1703                 if (*len != sizeof(struct ip_set_req_max_sets)) {
1704                         ip_set_printk("invalid MAX_SETS (want %zu, got %d)",
1705                                       sizeof(struct ip_set_req_max_sets), *len);
1706                         res = -EINVAL;
1707                         goto done;
1708                 }
1709
1710                 if (strcmp(req_max_sets->set.name, IPSET_TOKEN_ALL) == 0) {
1711                         req_max_sets->set.index = IP_SET_INVALID_ID;
1712                 } else {
1713                         req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1714                         req_max_sets->set.index = 
1715                                 ip_set_find_byname(req_max_sets->set.name);
1716                         if (req_max_sets->set.index == IP_SET_INVALID_ID) {
1717                                 res = -ENOENT;
1718                                 goto done;
1719                         }
1720                 }
1721                 req_max_sets->max_sets = ip_set_max;
1722                 req_max_sets->sets = 0;
1723                 for (i = 0; i < ip_set_max; i++) {
1724                         if (ip_set_list[i] != NULL)
1725                                 req_max_sets->sets++;
1726                 }
1727                 goto copy;
1728         }
1729         case IP_SET_OP_LIST_SIZE: 
1730         case IP_SET_OP_SAVE_SIZE: {
1731                 struct ip_set_req_setnames *req_setnames
1732                         = (struct ip_set_req_setnames *) data;
1733                 struct ip_set_name_list *name_list;
1734                 struct ip_set *set;
1735                 ip_set_id_t i;
1736                 int used;
1737
1738                 if (*len < sizeof(struct ip_set_req_setnames)) {
1739                         ip_set_printk("short LIST_SIZE (want >=%zu, got %d)",
1740                                       sizeof(struct ip_set_req_setnames), *len);
1741                         res = -EINVAL;
1742                         goto done;
1743                 }
1744
1745                 req_setnames->size = 0;
1746                 used = sizeof(struct ip_set_req_setnames);
1747                 for (i = 0; i < ip_set_max; i++) {
1748                         if (ip_set_list[i] == NULL)
1749                                 continue;
1750                         name_list = (struct ip_set_name_list *) 
1751                                 (data + used);
1752                         used += sizeof(struct ip_set_name_list);
1753                         if (used > copylen) {
1754                                 res = -EAGAIN;
1755                                 goto done;
1756                         }
1757                         set = ip_set_list[i];
1758                         /* Fill in index, name, etc. */
1759                         name_list->index = i;
1760                         name_list->id = set->id;
1761                         strncpy(name_list->name,
1762                                 set->name,
1763                                 IP_SET_MAXNAMELEN - 1);
1764                         strncpy(name_list->typename,
1765                                 set->type->typename,
1766                                 IP_SET_MAXNAMELEN - 1);
1767                         DP("filled %s of type %s, index %u\n",
1768                            name_list->name, name_list->typename,
1769                            name_list->index);
1770                         if (!(req_setnames->index == IP_SET_INVALID_ID
1771                               || req_setnames->index == i))
1772                               continue;
1773                         /* Update size */
1774                         switch (*op) {
1775                         case IP_SET_OP_LIST_SIZE: {
1776                                 req_setnames->size += sizeof(struct ip_set_list)
1777                                         + set->type->header_size
1778                                         + set->type->list_members_size(set);
1779                                 /* Sets are identified by id in the hash */
1780                                 FOREACH_HASH_DO(__set_hash_bindings_size_list, 
1781                                                 set->id, &req_setnames->size);
1782                                 break;
1783                         }
1784                         case IP_SET_OP_SAVE_SIZE: {
1785                                 req_setnames->size += sizeof(struct ip_set_save)
1786                                         + set->type->header_size
1787                                         + set->type->list_members_size(set);
1788                                 FOREACH_HASH_DO(__set_hash_bindings_size_save,
1789                                                 set->id, &req_setnames->size);
1790                                 break;
1791                         }
1792                         default:
1793                                 break;
1794                         }
1795                 }
1796                 if (copylen != used) {
1797                         res = -EAGAIN;
1798                         goto done;
1799                 }
1800                 goto copy;
1801         }
1802         case IP_SET_OP_LIST: {
1803                 struct ip_set_req_list *req_list
1804                         = (struct ip_set_req_list *) data;
1805                 ip_set_id_t i;
1806                 int used;
1807
1808                 if (*len < sizeof(struct ip_set_req_list)) {
1809                         ip_set_printk("short LIST (want >=%zu, got %d)",
1810                                       sizeof(struct ip_set_req_list), *len);
1811                         res = -EINVAL;
1812                         goto done;
1813                 }
1814                 index = req_list->index;
1815                 if (index != IP_SET_INVALID_ID
1816                     && ip_set_find_byindex(index) != index) {
1817                         res = -ENOENT;
1818                         goto done;
1819                 }
1820                 used = 0;
1821                 if (index == IP_SET_INVALID_ID) {
1822                         /* List all sets */
1823                         for (i = 0; i < ip_set_max && res == 0; i++) {
1824                                 if (ip_set_list[i] != NULL)
1825                                         res = ip_set_list_set(i, data, &used, *len);
1826                         }
1827                 } else {
1828                         /* List an individual set */
1829                         res = ip_set_list_set(index, data, &used, *len);
1830                 }
1831                 if (res != 0)
1832                         goto done;
1833                 else if (copylen != used) {
1834                         res = -EAGAIN;
1835                         goto done;
1836                 }
1837                 goto copy;
1838         }
1839         case IP_SET_OP_SAVE: {
1840                 struct ip_set_req_list *req_save
1841                         = (struct ip_set_req_list *) data;
1842                 ip_set_id_t i;
1843                 int used;
1844
1845                 if (*len < sizeof(struct ip_set_req_list)) {
1846                         ip_set_printk("short SAVE (want >=%zu, got %d)",
1847                                       sizeof(struct ip_set_req_list), *len);
1848                         res = -EINVAL;
1849                         goto done;
1850                 }
1851                 index = req_save->index;
1852                 if (index != IP_SET_INVALID_ID
1853                     && ip_set_find_byindex(index) != index) {
1854                         res = -ENOENT;
1855                         goto done;
1856                 }
1857                 used = 0;
1858                 if (index == IP_SET_INVALID_ID) {
1859                         /* Save all sets */
1860                         for (i = 0; i < ip_set_max && res == 0; i++) {
1861                                 if (ip_set_list[i] != NULL)
1862                                         res = ip_set_save_set(i, data, &used, *len);
1863                         }
1864                 } else {
1865                         /* Save an individual set */
1866                         res = ip_set_save_set(index, data, &used, *len);
1867                 }
1868                 if (res == 0)
1869                         res = ip_set_save_bindings(index, data, &used, *len);
1870                         
1871                 if (res != 0)
1872                         goto done;
1873                 else if (copylen != used) {
1874                         res = -EAGAIN;
1875                         goto done;
1876                 }
1877                 goto copy;
1878         }
1879         case IP_SET_OP_RESTORE: {
1880                 struct ip_set_req_setnames *req_restore
1881                         = (struct ip_set_req_setnames *) data;
1882                 int line;
1883
1884                 if (*len < sizeof(struct ip_set_req_setnames)
1885                     || *len != req_restore->size) {
1886                         ip_set_printk("invalid RESTORE (want =%zu, got %d)",
1887                                       req_restore->size, *len);
1888                         res = -EINVAL;
1889                         goto done;
1890                 }
1891                 line = ip_set_restore(data + sizeof(struct ip_set_req_setnames),
1892                                       req_restore->size - sizeof(struct ip_set_req_setnames));
1893                 DP("ip_set_restore: %u", line);
1894                 if (line != 0) {
1895                         res = -EAGAIN;
1896                         req_restore->size = line;
1897                         copylen = sizeof(struct ip_set_req_setnames);
1898                         goto copy;
1899                 }
1900                 goto done;
1901         }
1902         default:
1903                 res = -EBADMSG;
1904                 goto done;
1905         }       /* end of switch(op) */
1906
1907     copy:
1908         DP("set %s, copylen %u", index != IP_SET_INVALID_ID
1909                                  && ip_set_list[index]
1910                      ? ip_set_list[index]->name
1911                      : ":all:", copylen);
1912         res = copy_to_user(user, data, copylen);
1913         
1914     done:
1915         up(&ip_set_app_mutex);
1916         vfree(data);
1917         if (res > 0)
1918                 res = 0;
1919         DP("final result %d", res);
1920         return res;
1921 }
1922
1923 static struct nf_sockopt_ops so_set = {
1924         .pf             = PF_INET,
1925         .set_optmin     = SO_IP_SET,
1926         .set_optmax     = SO_IP_SET + 1,
1927         .set            = &ip_set_sockfn_set,
1928         .get_optmin     = SO_IP_SET,
1929         .get_optmax     = SO_IP_SET + 1,
1930         .get            = &ip_set_sockfn_get,
1931         .use            = 0
1932 };
1933
1934 static int max_sets, hash_size;
1935 module_param(max_sets, int, 0600);
1936 MODULE_PARM_DESC(max_sets, "maximal number of sets");
1937 module_param(hash_size, int, 0600);
1938 MODULE_PARM_DESC(hash_size, "hash size for bindings");
1939 MODULE_LICENSE("GPL");
1940 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
1941 MODULE_DESCRIPTION("module implementing core IP set support");
1942
1943 static int __init init(void)
1944 {
1945         int res;
1946         ip_set_id_t i;
1947
1948         get_random_bytes(&ip_set_hash_random, 4);
1949         if (max_sets)
1950                 ip_set_max = max_sets;
1951         ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max);
1952         if (!ip_set_list) {
1953                 printk(KERN_ERR "Unable to create ip_set_list\n");
1954                 return -ENOMEM;
1955         }
1956         memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max);
1957         if (hash_size)
1958                 ip_set_bindings_hash_size = hash_size;
1959         ip_set_hash = vmalloc(sizeof(struct list_head) * ip_set_bindings_hash_size);
1960         if (!ip_set_hash) {
1961                 printk(KERN_ERR "Unable to create ip_set_hash\n");
1962                 vfree(ip_set_list);
1963                 return -ENOMEM;
1964         }
1965         for (i = 0; i < ip_set_bindings_hash_size; i++)
1966                 INIT_LIST_HEAD(&ip_set_hash[i]);
1967
1968         INIT_LIST_HEAD(&set_type_list);
1969
1970         res = nf_register_sockopt(&so_set);
1971         if (res != 0) {
1972                 ip_set_printk("SO_SET registry failed: %d", res);
1973                 vfree(ip_set_list);
1974                 vfree(ip_set_hash);
1975                 return res;
1976         }
1977         return 0;
1978 }
1979
1980 static void __exit fini(void)
1981 {
1982         /* There can't be any existing set or binding */
1983         nf_unregister_sockopt(&so_set);
1984         vfree(ip_set_list);
1985         vfree(ip_set_hash);
1986         DP("these are the famous last words");
1987 }
1988
1989 EXPORT_SYMBOL(ip_set_register_set_type);
1990 EXPORT_SYMBOL(ip_set_unregister_set_type);
1991
1992 EXPORT_SYMBOL(ip_set_get_byname);
1993 EXPORT_SYMBOL(ip_set_get_byindex);
1994 EXPORT_SYMBOL(ip_set_put);
1995
1996 EXPORT_SYMBOL(ip_set_addip_kernel);
1997 EXPORT_SYMBOL(ip_set_delip_kernel);
1998 EXPORT_SYMBOL(ip_set_testip_kernel);
1999
2000 module_init(init);
2001 module_exit(fini);