Merge to Fedora kernel-2.6.18-1.2260_FC5 patched with stable patch-2.6.18.5-vs2.0...
[linux-2.6.git] / net / ipv4 / netfilter / ip_set.c
1 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2  *                         Patrick Schaaf <bof@bof.de>
3  * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.  
8  */
9
10 /* Kernel module for IP set management */
11
12 #include <linux/version.h>
13 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
14 #include <linux/config.h>
15 #endif
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/kmod.h>
19 #include <linux/ip.h>
20 #include <linux/skbuff.h>
21 #include <linux/random.h>
22 #include <linux/jhash.h>
23 #include <linux/netfilter_ipv4/ip_tables.h>
24 #include <linux/errno.h>
25 #include <asm/uaccess.h>
26 #include <asm/bitops.h>
27 #include <asm/semaphore.h>
28 #include <linux/spinlock.h>
29 #include <linux/vmalloc.h>
30
31 #define ASSERT_READ_LOCK(x)
32 #define ASSERT_WRITE_LOCK(x)
33 #include <linux/netfilter_ipv4/ip_set.h>
34
35 static struct list_head set_type_list;          /* all registered sets */
36 static struct ip_set **ip_set_list;             /* all individual sets */
37 static DEFINE_RWLOCK(ip_set_lock);              /* protects the lists and the hash */
38 static DECLARE_MUTEX(ip_set_app_mutex);         /* serializes user access */
39 static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX;
40 static ip_set_id_t ip_set_bindings_hash_size =  CONFIG_IP_NF_SET_HASHSIZE;
41 static struct list_head *ip_set_hash;           /* hash of bindings */
42 static unsigned int ip_set_hash_random;         /* random seed */
43
44 /*
45  * Sets are identified either by the index in ip_set_list or by id.
46  * The id never changes and is used to find a key in the hash. 
47  * The index may change by swapping and used at all other places 
48  * (set/SET netfilter modules, binding value, etc.)
49  *
50  * Userspace requests are serialized by ip_set_mutex and sets can
51  * be deleted only from userspace. Therefore ip_set_list locking 
52  * must obey the following rules:
53  *
54  * - kernel requests: read and write locking mandatory
55  * - user requests: read locking optional, write locking mandatory
56  */
57
58 static inline void
59 __ip_set_get(ip_set_id_t index)
60 {
61         atomic_inc(&ip_set_list[index]->ref);
62 }
63
64 static inline void
65 __ip_set_put(ip_set_id_t index)
66 {
67         atomic_dec(&ip_set_list[index]->ref);
68 }
69
70 /*
71  * Binding routines
72  */
73
74 static inline struct ip_set_hash *
75 __ip_set_find(u_int32_t key, ip_set_id_t id, ip_set_ip_t ip)
76 {
77         struct ip_set_hash *set_hash;
78
79         list_for_each_entry(set_hash, &ip_set_hash[key], list)
80                 if (set_hash->id == id && set_hash->ip == ip)
81                         return set_hash;
82                         
83         return NULL;
84 }
85
86 static ip_set_id_t
87 ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip)
88 {
89         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) 
90                                 % ip_set_bindings_hash_size;
91         struct ip_set_hash *set_hash;
92
93         ASSERT_READ_LOCK(&ip_set_lock);
94         IP_SET_ASSERT(ip_set_list[id]);
95         DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
96         
97         set_hash = __ip_set_find(key, id, ip);
98         
99         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
100            HIPQUAD(ip),
101            set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
102
103         return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID);
104 }
105
106 static inline void 
107 __set_hash_del(struct ip_set_hash *set_hash)
108 {
109         ASSERT_WRITE_LOCK(&ip_set_lock);
110         IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
111
112         __ip_set_put(set_hash->binding);
113         list_del(&set_hash->list);
114         kfree(set_hash);
115 }
116
117 static int
118 ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip)
119 {
120         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
121                                 % ip_set_bindings_hash_size;
122         struct ip_set_hash *set_hash;
123         
124         IP_SET_ASSERT(ip_set_list[id]);
125         DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
126         write_lock_bh(&ip_set_lock);
127         set_hash = __ip_set_find(key, id, ip);
128         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
129            HIPQUAD(ip),
130            set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
131
132         if (set_hash != NULL)
133                 __set_hash_del(set_hash);
134         write_unlock_bh(&ip_set_lock);
135         return 0;
136 }
137
138 static int 
139 ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding)
140 {
141         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
142                                 % ip_set_bindings_hash_size;
143         struct ip_set_hash *set_hash;
144         int ret = 0;
145         
146         IP_SET_ASSERT(ip_set_list[id]);
147         IP_SET_ASSERT(ip_set_list[binding]);
148         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
149            HIPQUAD(ip), ip_set_list[binding]->name);
150         write_lock_bh(&ip_set_lock);
151         set_hash = __ip_set_find(key, id, ip);
152         if (!set_hash) {
153                 set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_KERNEL);
154                 if (!set_hash) {
155                         ret = -ENOMEM;
156                         goto unlock;
157                 }
158                 INIT_LIST_HEAD(&set_hash->list);
159                 set_hash->id = id;
160                 set_hash->ip = ip;
161                 list_add(&ip_set_hash[key], &set_hash->list);
162         } else {
163                 IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
164                 DP("overwrite binding: %s",
165                    ip_set_list[set_hash->binding]->name);
166                 __ip_set_put(set_hash->binding);
167         }
168         set_hash->binding = binding;
169         __ip_set_get(set_hash->binding);
170     unlock:
171         write_unlock_bh(&ip_set_lock);
172         return ret;
173 }
174
175 #define FOREACH_HASH_DO(fn, args...)                                            \
176 ({                                                                              \
177         ip_set_id_t __key;                                                      \
178         struct ip_set_hash *__set_hash;                                         \
179                                                                                 \
180         for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
181                 list_for_each_entry(__set_hash, &ip_set_hash[__key], list)      \
182                         fn(__set_hash , ## args);                               \
183         }                                                                       \
184 })
185
186 #define FOREACH_HASH_RW_DO(fn, args...)                                                 \
187 ({                                                                              \
188         ip_set_id_t __key;                                                      \
189         struct ip_set_hash *__set_hash, *__n;                                   \
190                                                                                 \
191         ASSERT_WRITE_LOCK(&ip_set_lock);                                        \
192         for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
193                 list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\
194                         fn(__set_hash , ## args);                               \
195         }                                                                       \
196 })
197
198 /* Add, del and test set entries from kernel */
199
200 #define follow_bindings(index, set, ip)                                 \
201 ((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID      \
202  || (index = (set)->binding) != IP_SET_INVALID_ID)
203
204 int
205 ip_set_testip_kernel(ip_set_id_t index,
206                      const struct sk_buff *skb,
207                      const u_int32_t *flags)
208 {
209         struct ip_set *set;
210         ip_set_ip_t ip;
211         int res;
212         unsigned char i = 0;
213         
214         IP_SET_ASSERT(flags[i]);
215         read_lock_bh(&ip_set_lock);
216         do {
217                 set = ip_set_list[index];
218                 IP_SET_ASSERT(set);
219                 DP("set %s, index %u", set->name, index);
220                 read_lock_bh(&set->lock);
221                 res = set->type->testip_kernel(set, skb, &ip, flags, i++);
222                 read_unlock_bh(&set->lock);
223                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
224         } while (res > 0 
225                  && flags[i] 
226                  && follow_bindings(index, set, ip));
227         read_unlock_bh(&ip_set_lock);
228
229         return res;
230 }
231
232 void
233 ip_set_addip_kernel(ip_set_id_t index,
234                     const struct sk_buff *skb,
235                     const u_int32_t *flags)
236 {
237         struct ip_set *set;
238         ip_set_ip_t ip;
239         int res;
240         unsigned char i = 0;
241
242         IP_SET_ASSERT(flags[i]);
243    retry:
244         read_lock_bh(&ip_set_lock);
245         do {
246                 set = ip_set_list[index];
247                 IP_SET_ASSERT(set);
248                 DP("set %s, index %u", set->name, index);
249                 write_lock_bh(&set->lock);
250                 res = set->type->addip_kernel(set, skb, &ip, flags, i++);
251                 write_unlock_bh(&set->lock);
252                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
253         } while ((res == 0 || res == -EEXIST)
254                  && flags[i] 
255                  && follow_bindings(index, set, ip));
256         read_unlock_bh(&ip_set_lock);
257
258         if (res == -EAGAIN
259             && set->type->retry
260             && (res = set->type->retry(set)) == 0)
261                 goto retry;
262 }
263
264 void
265 ip_set_delip_kernel(ip_set_id_t index,
266                     const struct sk_buff *skb,
267                     const u_int32_t *flags)
268 {
269         struct ip_set *set;
270         ip_set_ip_t ip;
271         int res;
272         unsigned char i = 0;
273
274         IP_SET_ASSERT(flags[i]);
275         read_lock_bh(&ip_set_lock);
276         do {
277                 set = ip_set_list[index];
278                 IP_SET_ASSERT(set);
279                 DP("set %s, index %u", set->name, index);
280                 write_lock_bh(&set->lock);
281                 res = set->type->delip_kernel(set, skb, &ip, flags, i++);
282                 write_unlock_bh(&set->lock);
283                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
284         } while ((res == 0 || res == -EEXIST)
285                  && flags[i] 
286                  && follow_bindings(index, set, ip));
287         read_unlock_bh(&ip_set_lock);
288 }
289
290 /* Register and deregister settype */
291
292 static inline struct ip_set_type *
293 find_set_type(const char *name)
294 {
295         struct ip_set_type *set_type;
296
297         list_for_each_entry(set_type, &set_type_list, list)
298                 if (!strncmp(set_type->typename, name, IP_SET_MAXNAMELEN - 1))
299                         return set_type;
300         return NULL;
301 }
302
303 int 
304 ip_set_register_set_type(struct ip_set_type *set_type)
305 {
306         int ret = 0;
307         
308         if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) {
309                 ip_set_printk("'%s' uses wrong protocol version %u (want %u)",
310                               set_type->typename,
311                               set_type->protocol_version,
312                               IP_SET_PROTOCOL_VERSION);
313                 return -EINVAL;
314         }
315
316         write_lock_bh(&ip_set_lock);
317         if (find_set_type(set_type->typename)) {
318                 /* Duplicate! */
319                 ip_set_printk("'%s' already registered!", 
320                               set_type->typename);
321                 ret = -EINVAL;
322                 goto unlock;
323         }
324         if (!try_module_get(THIS_MODULE)) {
325                 ret = -EFAULT;
326                 goto unlock;
327         }
328         list_add(&set_type->list, &set_type_list);
329         DP("'%s' registered.", set_type->typename);
330    unlock:
331         write_unlock_bh(&ip_set_lock);
332         return ret;
333 }
334
335 void
336 ip_set_unregister_set_type(struct ip_set_type *set_type)
337 {
338         write_lock_bh(&ip_set_lock);
339         if (!find_set_type(set_type->typename)) {
340                 ip_set_printk("'%s' not registered?",
341                               set_type->typename);
342                 goto unlock;
343         }
344         list_del(&set_type->list);
345         module_put(THIS_MODULE);
346         DP("'%s' unregistered.", set_type->typename);
347    unlock:
348         write_unlock_bh(&ip_set_lock);
349
350 }
351
352 /*
353  * Userspace routines
354  */
355
356 /*
357  * Find set by name, reference it once. The reference makes sure the
358  * thing pointed to, does not go away under our feet. Drop the reference
359  * later, using ip_set_put().
360  */
361 ip_set_id_t
362 ip_set_get_byname(const char *name)
363 {
364         ip_set_id_t i, index = IP_SET_INVALID_ID;
365         
366         down(&ip_set_app_mutex);
367         for (i = 0; i < ip_set_max; i++) {
368                 if (ip_set_list[i] != NULL
369                     && strcmp(ip_set_list[i]->name, name) == 0) {
370                         __ip_set_get(i);
371                         index = i;
372                         break;
373                 }
374         }
375         up(&ip_set_app_mutex);
376         return index;
377 }
378
379 /*
380  * Find set by index, reference it once. The reference makes sure the
381  * thing pointed to, does not go away under our feet. Drop the reference
382  * later, using ip_set_put().
383  */
384 ip_set_id_t
385 ip_set_get_byindex(ip_set_id_t index)
386 {
387         down(&ip_set_app_mutex);
388
389         if (index >= ip_set_max)
390                 return IP_SET_INVALID_ID;
391         
392         if (ip_set_list[index])
393                 __ip_set_get(index);
394         else
395                 index = IP_SET_INVALID_ID;
396                 
397         up(&ip_set_app_mutex);
398         return index;
399 }
400
401 /*
402  * If the given set pointer points to a valid set, decrement
403  * reference count by 1. The caller shall not assume the index
404  * to be valid, after calling this function.
405  */
406 void ip_set_put(ip_set_id_t index)
407 {
408         down(&ip_set_app_mutex);
409         if (ip_set_list[index])
410                 __ip_set_put(index);
411         up(&ip_set_app_mutex);
412 }
413
414 /* Find a set by name or index */
415 static ip_set_id_t
416 ip_set_find_byname(const char *name)
417 {
418         ip_set_id_t i, index = IP_SET_INVALID_ID;
419         
420         for (i = 0; i < ip_set_max; i++) {
421                 if (ip_set_list[i] != NULL
422                     && strcmp(ip_set_list[i]->name, name) == 0) {
423                         index = i;
424                         break;
425                 }
426         }
427         return index;
428 }
429
430 static ip_set_id_t
431 ip_set_find_byindex(ip_set_id_t index)
432 {
433         if (index >= ip_set_max || ip_set_list[index] == NULL)
434                 index = IP_SET_INVALID_ID;
435         
436         return index;
437 }
438
439 /*
440  * Add, del, test, bind and unbind
441  */
442
443 static inline int
444 __ip_set_testip(struct ip_set *set,
445                 const void *data,
446                 size_t size,
447                 ip_set_ip_t *ip)
448 {
449         int res;
450
451         read_lock_bh(&set->lock);
452         res = set->type->testip(set, data, size, ip);
453         read_unlock_bh(&set->lock);
454
455         return res;
456 }
457
458 static int
459 __ip_set_addip(ip_set_id_t index,
460                const void *data,
461                size_t size)
462 {
463         struct ip_set *set = ip_set_list[index];
464         ip_set_ip_t ip;
465         int res;
466         
467         IP_SET_ASSERT(set);
468         do {
469                 write_lock_bh(&set->lock);
470                 res = set->type->addip(set, data, size, &ip);
471                 write_unlock_bh(&set->lock);
472         } while (res == -EAGAIN
473                  && set->type->retry
474                  && (res = set->type->retry(set)) == 0);
475
476         return res;
477 }
478
479 static int
480 ip_set_addip(ip_set_id_t index,
481              const void *data,
482              size_t size)
483 {
484
485         return __ip_set_addip(index,
486                               data + sizeof(struct ip_set_req_adt),
487                               size - sizeof(struct ip_set_req_adt));
488 }
489
490 static int
491 ip_set_delip(ip_set_id_t index,
492              const void *data,
493              size_t size)
494 {
495         struct ip_set *set = ip_set_list[index];
496         ip_set_ip_t ip;
497         int res;
498         
499         IP_SET_ASSERT(set);
500         write_lock_bh(&set->lock);
501         res = set->type->delip(set,
502                                data + sizeof(struct ip_set_req_adt),
503                                size - sizeof(struct ip_set_req_adt),
504                                &ip);
505         write_unlock_bh(&set->lock);
506
507         return res;
508 }
509
510 static int
511 ip_set_testip(ip_set_id_t index,
512               const void *data,
513               size_t size)
514 {
515         struct ip_set *set = ip_set_list[index];
516         ip_set_ip_t ip;
517         int res;
518
519         IP_SET_ASSERT(set);
520         res = __ip_set_testip(set,
521                               data + sizeof(struct ip_set_req_adt),
522                               size - sizeof(struct ip_set_req_adt),
523                               &ip);
524
525         return (res > 0 ? -EEXIST : res);
526 }
527
528 static int
529 ip_set_bindip(ip_set_id_t index,
530               const void *data,
531               size_t size)
532 {
533         struct ip_set *set = ip_set_list[index];
534         struct ip_set_req_bind *req_bind;
535         ip_set_id_t binding;
536         ip_set_ip_t ip;
537         int res;
538
539         IP_SET_ASSERT(set);
540         if (size < sizeof(struct ip_set_req_bind))
541                 return -EINVAL;
542                 
543         req_bind = (struct ip_set_req_bind *) data;
544         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
545
546         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
547                 /* Default binding of a set */
548                 char *binding_name;
549                 
550                 if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
551                         return -EINVAL;
552
553                 binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
554                 binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
555
556                 binding = ip_set_find_byname(binding_name);
557                 if (binding == IP_SET_INVALID_ID)
558                         return -ENOENT;
559
560                 write_lock_bh(&ip_set_lock);
561                 /* Sets as binding values are referenced */
562                 if (set->binding != IP_SET_INVALID_ID)
563                         __ip_set_put(set->binding);
564                 set->binding = binding;
565                 __ip_set_get(set->binding);
566                 write_unlock_bh(&ip_set_lock);
567
568                 return 0;
569         }
570         binding = ip_set_find_byname(req_bind->binding);
571         if (binding == IP_SET_INVALID_ID)
572                 return -ENOENT;
573
574         res = __ip_set_testip(set,
575                               data + sizeof(struct ip_set_req_bind),
576                               size - sizeof(struct ip_set_req_bind),
577                               &ip);
578         DP("set %s, ip: %u.%u.%u.%u, binding %s",
579            set->name, HIPQUAD(ip), ip_set_list[binding]->name);
580         
581         if (res >= 0)
582                 res = ip_set_hash_add(set->id, ip, binding);
583
584         return res;
585 }
586
587 #define FOREACH_SET_DO(fn, args...)                             \
588 ({                                                              \
589         ip_set_id_t __i;                                        \
590         struct ip_set *__set;                                   \
591                                                                 \
592         for (__i = 0; __i < ip_set_max; __i++) {                \
593                 __set = ip_set_list[__i];                       \
594                 if (__set != NULL)                              \
595                         fn(__set , ##args);                     \
596         }                                                       \
597 })
598
599 static inline void
600 __set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id)
601 {
602         if (set_hash->id == id)
603                 __set_hash_del(set_hash);
604 }
605
606 static inline void
607 __unbind_default(struct ip_set *set)
608 {
609         if (set->binding != IP_SET_INVALID_ID) {
610                 /* Sets as binding values are referenced */
611                 __ip_set_put(set->binding);
612                 set->binding = IP_SET_INVALID_ID;
613         }
614 }
615
616 static int
617 ip_set_unbindip(ip_set_id_t index,
618                 const void *data,
619                 size_t size)
620 {
621         struct ip_set *set;
622         struct ip_set_req_bind *req_bind;
623         ip_set_ip_t ip;
624         int res;
625
626         DP("");
627         if (size < sizeof(struct ip_set_req_bind))
628                 return -EINVAL;
629                 
630         req_bind = (struct ip_set_req_bind *) data;
631         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
632         
633         DP("%u %s", index, req_bind->binding);
634         if (index == IP_SET_INVALID_ID) {
635                 /* unbind :all: */
636                 if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
637                         /* Default binding of sets */
638                         write_lock_bh(&ip_set_lock);
639                         FOREACH_SET_DO(__unbind_default);
640                         write_unlock_bh(&ip_set_lock);
641                         return 0;
642                 } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
643                         /* Flush all bindings of all sets*/
644                         write_lock_bh(&ip_set_lock);
645                         FOREACH_HASH_RW_DO(__set_hash_del);
646                         write_unlock_bh(&ip_set_lock);
647                         return 0;
648                 }
649                 DP("unreachable reached!");
650                 return -EINVAL;
651         }
652         
653         set = ip_set_list[index];
654         IP_SET_ASSERT(set);
655         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
656                 /* Default binding of set */
657                 ip_set_id_t binding = ip_set_find_byindex(set->binding);
658
659                 if (binding == IP_SET_INVALID_ID)
660                         return -ENOENT;
661                         
662                 write_lock_bh(&ip_set_lock);
663                 /* Sets in hash values are referenced */
664                 __ip_set_put(set->binding);
665                 set->binding = IP_SET_INVALID_ID;
666                 write_unlock_bh(&ip_set_lock);
667
668                 return 0;
669         } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
670                 /* Flush all bindings */
671
672                 write_lock_bh(&ip_set_lock);
673                 FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
674                 write_unlock_bh(&ip_set_lock);
675                 return 0;
676         }
677         
678         res = __ip_set_testip(set,
679                               data + sizeof(struct ip_set_req_bind),
680                               size - sizeof(struct ip_set_req_bind),
681                               &ip);
682
683         DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
684         if (res >= 0)
685                 res = ip_set_hash_del(set->id, ip);
686
687         return res;
688 }
689
690 static int
691 ip_set_testbind(ip_set_id_t index,
692                 const void *data,
693                 size_t size)
694 {
695         struct ip_set *set = ip_set_list[index];
696         struct ip_set_req_bind *req_bind;
697         ip_set_id_t binding;
698         ip_set_ip_t ip;
699         int res;
700
701         IP_SET_ASSERT(set);
702         if (size < sizeof(struct ip_set_req_bind))
703                 return -EINVAL;
704                 
705         req_bind = (struct ip_set_req_bind *) data;
706         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
707
708         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
709                 /* Default binding of set */
710                 char *binding_name;
711                 
712                 if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
713                         return -EINVAL;
714
715                 binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
716                 binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
717
718                 binding = ip_set_find_byname(binding_name);
719                 if (binding == IP_SET_INVALID_ID)
720                         return -ENOENT;
721                 
722                 res = (set->binding == binding) ? -EEXIST : 0;
723
724                 return res;
725         }
726         binding = ip_set_find_byname(req_bind->binding);
727         if (binding == IP_SET_INVALID_ID)
728                 return -ENOENT;
729                 
730         
731         res = __ip_set_testip(set,
732                               data + sizeof(struct ip_set_req_bind),
733                               size - sizeof(struct ip_set_req_bind),
734                               &ip);
735         DP("set %s, ip: %u.%u.%u.%u, binding %s",
736            set->name, HIPQUAD(ip), ip_set_list[binding]->name);
737            
738         if (res >= 0)
739                 res = (ip_set_find_in_hash(set->id, ip) == binding)
740                         ? -EEXIST : 0;
741
742         return res;
743 }
744
745 static struct ip_set_type *
746 find_set_type_rlock(const char *typename)
747 {
748         struct ip_set_type *type;
749         
750         read_lock_bh(&ip_set_lock);
751         type = find_set_type(typename);
752         if (type == NULL)
753                 read_unlock_bh(&ip_set_lock);
754
755         return type;
756 }
757
758 static int
759 find_free_id(const char *name,
760              ip_set_id_t *index,
761              ip_set_id_t *id)
762 {
763         ip_set_id_t i;
764
765         *id = IP_SET_INVALID_ID;
766         for (i = 0;  i < ip_set_max; i++) {
767                 if (ip_set_list[i] == NULL) {
768                         if (*id == IP_SET_INVALID_ID)
769                                 *id = *index = i;
770                 } else if (strcmp(name, ip_set_list[i]->name) == 0)
771                         /* Name clash */
772                         return -EEXIST;
773         }
774         if (*id == IP_SET_INVALID_ID)
775                 /* No free slot remained */
776                 return -ERANGE;
777         /* Check that index is usable as id (swapping) */
778     check:      
779         for (i = 0;  i < ip_set_max; i++) {
780                 if (ip_set_list[i] != NULL
781                     && ip_set_list[i]->id == *id) {
782                     *id = i;
783                     goto check;
784                 }
785         }
786         return 0;
787 }
788
789 /*
790  * Create a set
791  */
792 static int
793 ip_set_create(const char *name,
794               const char *typename,
795               ip_set_id_t restore,
796               const void *data,
797               size_t size)
798 {
799         struct ip_set *set;
800         ip_set_id_t index = 0, id;
801         int res = 0;
802
803         DP("setname: %s, typename: %s, id: %u", name, typename, restore);
804         /*
805          * First, and without any locks, allocate and initialize
806          * a normal base set structure.
807          */
808         set = kmalloc(sizeof(struct ip_set), GFP_KERNEL);
809         if (!set)
810                 return -ENOMEM;
811         set->lock = RW_LOCK_UNLOCKED;
812         strncpy(set->name, name, IP_SET_MAXNAMELEN);
813         set->binding = IP_SET_INVALID_ID;
814         atomic_set(&set->ref, 0);
815
816         /*
817          * Next, take the &ip_set_lock, check that we know the type,
818          * and take a reference on the type, to make sure it
819          * stays available while constructing our new set.
820          *
821          * After referencing the type, we drop the &ip_set_lock,
822          * and let the new set construction run without locks.
823          */
824         set->type = find_set_type_rlock(typename);
825         if (set->type == NULL) {
826                 /* Try loading the module */
827                 char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1];
828                 strcpy(modulename, "ip_set_");
829                 strcat(modulename, typename);
830                 DP("try to load %s", modulename);
831                 request_module(modulename);
832                 set->type = find_set_type_rlock(typename);
833         }
834         if (set->type == NULL) {
835                 ip_set_printk("no set type '%s', set '%s' not created",
836                               typename, name);
837                 res = -ENOENT;
838                 goto out;
839         }
840         if (!try_module_get(set->type->me)) {
841                 read_unlock_bh(&ip_set_lock);
842                 res = -EFAULT;
843                 goto out;
844         }
845         read_unlock_bh(&ip_set_lock);
846
847         /*
848          * Without holding any locks, create private part.
849          */
850         res = set->type->create(set, data, size);
851         if (res != 0)
852                 goto put_out;
853
854         /* BTW, res==0 here. */
855
856         /*
857          * Here, we have a valid, constructed set. &ip_set_lock again,
858          * find free id/index and check that it is not already in 
859          * ip_set_list.
860          */
861         write_lock_bh(&ip_set_lock);
862         if ((res = find_free_id(set->name, &index, &id)) != 0) {
863                 DP("no free id!");
864                 goto cleanup;
865         }
866
867         /* Make sure restore gets the same index */
868         if (restore != IP_SET_INVALID_ID && index != restore) {
869                 DP("Can't restore, sets are screwed up");
870                 res = -ERANGE;
871                 goto cleanup;
872         }
873          
874         /*
875          * Finally! Add our shiny new set to the list, and be done.
876          */
877         DP("create: '%s' created with index %u, id %u!", set->name, index, id);
878         set->id = id;
879         ip_set_list[index] = set;
880         write_unlock_bh(&ip_set_lock);
881         return res;
882         
883     cleanup:
884         write_unlock_bh(&ip_set_lock);
885         set->type->destroy(set);
886     put_out:
887         module_put(set->type->me);
888     out:
889         kfree(set);
890         return res;
891 }
892
893 /*
894  * Destroy a given existing set
895  */
896 static void
897 ip_set_destroy_set(ip_set_id_t index)
898 {
899         struct ip_set *set = ip_set_list[index];
900
901         IP_SET_ASSERT(set);
902         DP("set: %s",  set->name);
903         write_lock_bh(&ip_set_lock);
904         FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
905         if (set->binding != IP_SET_INVALID_ID)
906                 __ip_set_put(set->binding);
907         ip_set_list[index] = NULL;
908         write_unlock_bh(&ip_set_lock);
909
910         /* Must call it without holding any lock */
911         set->type->destroy(set);
912         module_put(set->type->me);
913         kfree(set);
914 }
915
916 /*
917  * Destroy a set - or all sets
918  * Sets must not be referenced/used.
919  */
920 static int
921 ip_set_destroy(ip_set_id_t index)
922 {
923         ip_set_id_t i;
924
925         /* ref modification always protected by the mutex */
926         if (index != IP_SET_INVALID_ID) {
927                 if (atomic_read(&ip_set_list[index]->ref))
928                         return -EBUSY;
929                 ip_set_destroy_set(index);
930         } else {
931                 for (i = 0; i < ip_set_max; i++) {
932                         if (ip_set_list[i] != NULL 
933                             && (atomic_read(&ip_set_list[i]->ref)))
934                                 return -EBUSY;
935                 }
936
937                 for (i = 0; i < ip_set_max; i++) {
938                         if (ip_set_list[i] != NULL)
939                                 ip_set_destroy_set(i);
940                 }
941         }
942         return 0;
943 }
944
945 static void
946 ip_set_flush_set(struct ip_set *set)
947 {
948         DP("set: %s %u",  set->name, set->id);
949
950         write_lock_bh(&set->lock);
951         set->type->flush(set);
952         write_unlock_bh(&set->lock);
953 }
954
955 /* 
956  * Flush data in a set - or in all sets
957  */
958 static int
959 ip_set_flush(ip_set_id_t index)
960 {
961         if (index != IP_SET_INVALID_ID) {
962                 IP_SET_ASSERT(ip_set_list[index]);
963                 ip_set_flush_set(ip_set_list[index]);
964         } else
965                 FOREACH_SET_DO(ip_set_flush_set);
966
967         return 0;
968 }
969
970 /* Rename a set */
971 static int
972 ip_set_rename(ip_set_id_t index, const char *name)
973 {
974         struct ip_set *set = ip_set_list[index];
975         ip_set_id_t i;
976         int res = 0;
977
978         DP("set: %s to %s",  set->name, name);
979         write_lock_bh(&ip_set_lock);
980         for (i = 0; i < ip_set_max; i++) {
981                 if (ip_set_list[i] != NULL
982                     && strncmp(ip_set_list[i]->name, 
983                                name,
984                                IP_SET_MAXNAMELEN - 1) == 0) {
985                         res = -EEXIST;
986                         goto unlock;
987                 }
988         }
989         strncpy(set->name, name, IP_SET_MAXNAMELEN);
990     unlock:
991         write_unlock_bh(&ip_set_lock);
992         return res;
993 }
994
995 /*
996  * Swap two sets so that name/index points to the other.
997  * References are also swapped.
998  */
999 static int
1000 ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index)
1001 {
1002         struct ip_set *from = ip_set_list[from_index];
1003         struct ip_set *to = ip_set_list[to_index];
1004         char from_name[IP_SET_MAXNAMELEN];
1005         u_int32_t from_ref;
1006
1007         DP("set: %s to %s",  from->name, to->name);
1008         /* Features must not change. Artifical restriction. */
1009         if (from->type->features != to->type->features)
1010                 return -ENOEXEC;
1011
1012         /* No magic here: ref munging protected by the mutex */ 
1013         write_lock_bh(&ip_set_lock);
1014         strncpy(from_name, from->name, IP_SET_MAXNAMELEN);
1015         from_ref = atomic_read(&from->ref);
1016
1017         strncpy(from->name, to->name, IP_SET_MAXNAMELEN);
1018         atomic_set(&from->ref, atomic_read(&to->ref));
1019         strncpy(to->name, from_name, IP_SET_MAXNAMELEN);
1020         atomic_set(&to->ref, from_ref);
1021         
1022         ip_set_list[from_index] = to;
1023         ip_set_list[to_index] = from;
1024         
1025         write_unlock_bh(&ip_set_lock);
1026         return 0;
1027 }
1028
1029 /*
1030  * List set data
1031  */
1032
1033 static inline void
1034 __set_hash_bindings_size_list(struct ip_set_hash *set_hash,
1035                               ip_set_id_t id, size_t *size)
1036 {
1037         if (set_hash->id == id)
1038                 *size += sizeof(struct ip_set_hash_list);
1039 }
1040
1041 static inline void
1042 __set_hash_bindings_size_save(struct ip_set_hash *set_hash,
1043                               ip_set_id_t id, size_t *size)
1044 {
1045         if (set_hash->id == id)
1046                 *size += sizeof(struct ip_set_hash_save);
1047 }
1048
1049 static inline void
1050 __set_hash_bindings(struct ip_set_hash *set_hash,
1051                     ip_set_id_t id, void *data, int *used)
1052 {
1053         if (set_hash->id == id) {
1054                 struct ip_set_hash_list *hash_list = 
1055                         (struct ip_set_hash_list *)(data + *used);
1056
1057                 hash_list->ip = set_hash->ip;
1058                 hash_list->binding = set_hash->binding;
1059                 *used += sizeof(struct ip_set_hash_list);
1060         }
1061 }
1062
1063 static int ip_set_list_set(ip_set_id_t index,
1064                            void *data,
1065                            int *used,
1066                            int len)
1067 {
1068         struct ip_set *set = ip_set_list[index];
1069         struct ip_set_list *set_list;
1070
1071         /* Pointer to our header */
1072         set_list = (struct ip_set_list *) (data + *used);
1073
1074         DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used);
1075
1076         /* Get and ensure header size */
1077         if (*used + sizeof(struct ip_set_list) > len)
1078                 goto not_enough_mem;
1079         *used += sizeof(struct ip_set_list);
1080
1081         read_lock_bh(&set->lock);
1082         /* Get and ensure set specific header size */
1083         set_list->header_size = set->type->header_size;
1084         if (*used + set_list->header_size > len)
1085                 goto unlock_set;
1086
1087         /* Fill in the header */
1088         set_list->index = index;
1089         set_list->binding = set->binding;
1090         set_list->ref = atomic_read(&set->ref);
1091
1092         /* Fill in set spefific header data */
1093         set->type->list_header(set, data + *used);
1094         *used += set_list->header_size;
1095
1096         /* Get and ensure set specific members size */
1097         set_list->members_size = set->type->list_members_size(set);
1098         if (*used + set_list->members_size > len)
1099                 goto unlock_set;
1100
1101         /* Fill in set spefific members data */
1102         set->type->list_members(set, data + *used);
1103         *used += set_list->members_size;
1104         read_unlock_bh(&set->lock);
1105
1106         /* Bindings */
1107
1108         /* Get and ensure set specific bindings size */
1109         set_list->bindings_size = 0;
1110         FOREACH_HASH_DO(__set_hash_bindings_size_list,
1111                         set->id, &set_list->bindings_size);
1112         if (*used + set_list->bindings_size > len)
1113                 goto not_enough_mem;
1114
1115         /* Fill in set spefific bindings data */
1116         FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used);
1117         
1118         return 0;
1119
1120     unlock_set:
1121         read_unlock_bh(&set->lock);
1122     not_enough_mem:
1123         DP("not enough mem, try again");
1124         return -EAGAIN;
1125 }
1126
1127 /*
1128  * Save sets
1129  */
1130 static int ip_set_save_set(ip_set_id_t index,
1131                            void *data,
1132                            int *used,
1133                            int len)
1134 {
1135         struct ip_set *set;
1136         struct ip_set_save *set_save;
1137
1138         /* Pointer to our header */
1139         set_save = (struct ip_set_save *) (data + *used);
1140
1141         /* Get and ensure header size */
1142         if (*used + sizeof(struct ip_set_save) > len)
1143                 goto not_enough_mem;
1144         *used += sizeof(struct ip_set_save);
1145
1146         set = ip_set_list[index];
1147         DP("set: %s, used: %u(%u) %p %p", set->name, *used, len, 
1148            data, data + *used);
1149
1150         read_lock_bh(&set->lock);
1151         /* Get and ensure set specific header size */
1152         set_save->header_size = set->type->header_size;
1153         if (*used + set_save->header_size > len)
1154                 goto unlock_set;
1155
1156         /* Fill in the header */
1157         set_save->index = index;
1158         set_save->binding = set->binding;
1159
1160         /* Fill in set spefific header data */
1161         set->type->list_header(set, data + *used);
1162         *used += set_save->header_size;
1163
1164         DP("set header filled: %s, used: %u(%u) %p %p", set->name, *used,
1165            set_save->header_size, data, data + *used);
1166         /* Get and ensure set specific members size */
1167         set_save->members_size = set->type->list_members_size(set);
1168         if (*used + set_save->members_size > len)
1169                 goto unlock_set;
1170
1171         /* Fill in set spefific members data */
1172         set->type->list_members(set, data + *used);
1173         *used += set_save->members_size;
1174         read_unlock_bh(&set->lock);
1175         DP("set members filled: %s, used: %u(%u) %p %p", set->name, *used,
1176            set_save->members_size, data, data + *used);
1177         return 0;
1178
1179     unlock_set:
1180         read_unlock_bh(&set->lock);
1181     not_enough_mem:
1182         DP("not enough mem, try again");
1183         return -EAGAIN;
1184 }
1185
1186 static inline void
1187 __set_hash_save_bindings(struct ip_set_hash *set_hash,
1188                          ip_set_id_t id,
1189                          void *data,
1190                          int *used,
1191                          int len,
1192                          int *res)
1193 {
1194         if (*res == 0
1195             && (id == IP_SET_INVALID_ID || set_hash->id == id)) {
1196                 struct ip_set_hash_save *hash_save = 
1197                         (struct ip_set_hash_save *)(data + *used);
1198                 /* Ensure bindings size */
1199                 if (*used + sizeof(struct ip_set_hash_save) > len) {
1200                         *res = -ENOMEM;
1201                         return;
1202                 }
1203                 hash_save->id = set_hash->id;
1204                 hash_save->ip = set_hash->ip;
1205                 hash_save->binding = set_hash->binding;
1206                 *used += sizeof(struct ip_set_hash_save);
1207         }
1208 }
1209
1210 static int ip_set_save_bindings(ip_set_id_t index,
1211                                 void *data,
1212                                 int *used,
1213                                 int len)
1214 {
1215         int res = 0;
1216         struct ip_set_save *set_save;
1217
1218         DP("used %u, len %u", *used, len);
1219         /* Get and ensure header size */
1220         if (*used + sizeof(struct ip_set_save) > len)
1221                 return -ENOMEM;
1222
1223         /* Marker */
1224         set_save = (struct ip_set_save *) (data + *used);
1225         set_save->index = IP_SET_INVALID_ID;
1226         set_save->header_size = 0;
1227         set_save->members_size = 0;
1228         *used += sizeof(struct ip_set_save);
1229
1230         DP("marker added used %u, len %u", *used, len);
1231         /* Fill in bindings data */
1232         if (index != IP_SET_INVALID_ID)
1233                 /* Sets are identified by id in hash */
1234                 index = ip_set_list[index]->id;
1235         FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res);
1236
1237         return res;     
1238 }
1239
1240 /*
1241  * Restore sets
1242  */
1243 static int ip_set_restore(void *data,
1244                           int len)
1245 {
1246         int res = 0;
1247         int line = 0, used = 0, members_size;
1248         struct ip_set *set;
1249         struct ip_set_hash_save *hash_save;
1250         struct ip_set_restore *set_restore;
1251         ip_set_id_t index;
1252
1253         /* Loop to restore sets */
1254         while (1) {
1255                 line++;
1256                 
1257                 DP("%u %u %u", used, sizeof(struct ip_set_restore), len);
1258                 /* Get and ensure header size */
1259                 if (used + sizeof(struct ip_set_restore) > len)
1260                         return line;
1261                 set_restore = (struct ip_set_restore *) (data + used);
1262                 used += sizeof(struct ip_set_restore);
1263
1264                 /* Ensure data size */
1265                 if (used 
1266                     + set_restore->header_size 
1267                     + set_restore->members_size > len)
1268                         return line;
1269
1270                 /* Check marker */
1271                 if (set_restore->index == IP_SET_INVALID_ID) {
1272                         line--;
1273                         goto bindings;
1274                 }
1275                 
1276                 /* Try to create the set */
1277                 DP("restore %s %s", set_restore->name, set_restore->typename);
1278                 res = ip_set_create(set_restore->name,
1279                                     set_restore->typename,
1280                                     set_restore->index,
1281                                     data + used,
1282                                     set_restore->header_size);
1283                 
1284                 if (res != 0)
1285                         return line;
1286                 used += set_restore->header_size;
1287
1288                 index = ip_set_find_byindex(set_restore->index);
1289                 DP("index %u, restore_index %u", index, set_restore->index);
1290                 if (index != set_restore->index)
1291                         return line;
1292                 /* Try to restore members data */
1293                 set = ip_set_list[index];
1294                 members_size = 0;
1295                 DP("members_size %u reqsize %u",
1296                    set_restore->members_size, set->type->reqsize);
1297                 while (members_size + set->type->reqsize <=
1298                        set_restore->members_size) {
1299                         line++;
1300                         DP("members: %u, line %u", members_size, line);
1301                         res = __ip_set_addip(index,
1302                                            data + used + members_size,
1303                                            set->type->reqsize);
1304                         if (!(res == 0 || res == -EEXIST)) 
1305                                 return line;
1306                         members_size += set->type->reqsize;
1307                 }
1308
1309                 DP("members_size %u  %u",
1310                    set_restore->members_size, members_size);
1311                 if (members_size != set_restore->members_size)
1312                         return line++;
1313                 used += set_restore->members_size;              
1314         }
1315         
1316    bindings:
1317         /* Loop to restore bindings */
1318         while (used < len) {
1319                 line++;
1320
1321                 DP("restore binding, line %u", line);           
1322                 /* Get and ensure size */
1323                 if (used + sizeof(struct ip_set_hash_save) > len)
1324                         return line;
1325                 hash_save = (struct ip_set_hash_save *) (data + used);
1326                 used += sizeof(struct ip_set_hash_save);
1327                 
1328                 /* hash_save->id is used to store the index */
1329                 index = ip_set_find_byindex(hash_save->id);
1330                 DP("restore binding index %u, id %u, %u -> %u",
1331                    index, hash_save->id, hash_save->ip, hash_save->binding);            
1332                 if (index != hash_save->id)
1333                         return line;
1334                         
1335                 set = ip_set_list[hash_save->id];
1336                 /* Null valued IP means default binding */
1337                 if (hash_save->ip)
1338                         res = ip_set_hash_add(set->id, 
1339                                               hash_save->ip,
1340                                               hash_save->binding);
1341                 else {
1342                         IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID);
1343                         write_lock_bh(&ip_set_lock);
1344                         set->binding = hash_save->binding;
1345                         __ip_set_get(set->binding);
1346                         write_unlock_bh(&ip_set_lock);
1347                         DP("default binding: %u", set->binding);
1348                 }
1349                 if (res != 0)
1350                         return line;
1351         }
1352         if (used != len)
1353                 return line;
1354         
1355         return 0;       
1356 }
1357
1358 static int
1359 ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len)
1360 {
1361         void *data;
1362         int res = 0;            /* Assume OK */
1363         unsigned *op;
1364         struct ip_set_req_adt *req_adt;
1365         ip_set_id_t index = IP_SET_INVALID_ID;
1366         int (*adtfn)(ip_set_id_t index,
1367                      const void *data, size_t size);
1368         struct fn_table {
1369                 int (*fn)(ip_set_id_t index,
1370                           const void *data, size_t size);
1371         } adtfn_table[] =
1372         { { ip_set_addip }, { ip_set_delip }, { ip_set_testip},
1373           { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind },
1374         };
1375
1376         DP("optval=%d, user=%p, len=%d", optval, user, len);
1377         if (!capable(CAP_NET_ADMIN))
1378                 return -EPERM;
1379         if (optval != SO_IP_SET)
1380                 return -EBADF;
1381         if (len <= sizeof(unsigned)) {
1382                 ip_set_printk("short userdata (want >%zu, got %u)",
1383                               sizeof(unsigned), len);
1384                 return -EINVAL;
1385         }
1386         data = vmalloc(len);
1387         if (!data) {
1388                 DP("out of mem for %u bytes", len);
1389                 return -ENOMEM;
1390         }
1391         if (copy_from_user(data, user, len) != 0) {
1392                 res = -EFAULT;
1393                 goto done;
1394         }
1395         if (down_interruptible(&ip_set_app_mutex)) {
1396                 res = -EINTR;
1397                 goto done;
1398         }
1399
1400         op = (unsigned *)data;
1401         DP("op=%x", *op);
1402         
1403         if (*op < IP_SET_OP_VERSION) {
1404                 /* Check the version at the beginning of operations */
1405                 struct ip_set_req_version *req_version =
1406                         (struct ip_set_req_version *) data;
1407                 if (req_version->version != IP_SET_PROTOCOL_VERSION) {
1408                         res = -EPROTO;
1409                         goto done;
1410                 }
1411         }
1412
1413         switch (*op) {
1414         case IP_SET_OP_CREATE:{
1415                 struct ip_set_req_create *req_create
1416                         = (struct ip_set_req_create *) data;
1417                 
1418                 if (len < sizeof(struct ip_set_req_create)) {
1419                         ip_set_printk("short CREATE data (want >=%zu, got %u)",
1420                                       sizeof(struct ip_set_req_create), len);
1421                         res = -EINVAL;
1422                         goto done;
1423                 }
1424                 req_create->name[IP_SET_MAXNAMELEN - 1] = '\0';
1425                 req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1426                 res = ip_set_create(req_create->name,
1427                                     req_create->typename,
1428                                     IP_SET_INVALID_ID,
1429                                     data + sizeof(struct ip_set_req_create),
1430                                     len - sizeof(struct ip_set_req_create));
1431                 goto done;
1432         }
1433         case IP_SET_OP_DESTROY:{
1434                 struct ip_set_req_std *req_destroy
1435                         = (struct ip_set_req_std *) data;
1436                 
1437                 if (len != sizeof(struct ip_set_req_std)) {
1438                         ip_set_printk("invalid DESTROY data (want %zu, got %u)",
1439                                       sizeof(struct ip_set_req_std), len);
1440                         res = -EINVAL;
1441                         goto done;
1442                 }
1443                 if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) {
1444                         /* Destroy all sets */
1445                         index = IP_SET_INVALID_ID;
1446                 } else {
1447                         req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0';
1448                         index = ip_set_find_byname(req_destroy->name);
1449
1450                         if (index == IP_SET_INVALID_ID) {
1451                                 res = -ENOENT;
1452                                 goto done;
1453                         }
1454                 }
1455                         
1456                 res = ip_set_destroy(index);
1457                 goto done;
1458         }
1459         case IP_SET_OP_FLUSH:{
1460                 struct ip_set_req_std *req_flush =
1461                         (struct ip_set_req_std *) data;
1462
1463                 if (len != sizeof(struct ip_set_req_std)) {
1464                         ip_set_printk("invalid FLUSH data (want %zu, got %u)",
1465                                       sizeof(struct ip_set_req_std), len);
1466                         res = -EINVAL;
1467                         goto done;
1468                 }
1469                 if (strcmp(req_flush->name, IPSET_TOKEN_ALL) == 0) {
1470                         /* Flush all sets */
1471                         index = IP_SET_INVALID_ID;
1472                 } else {
1473                         req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0';
1474                         index = ip_set_find_byname(req_flush->name);
1475
1476                         if (index == IP_SET_INVALID_ID) {
1477                                 res = -ENOENT;
1478                                 goto done;
1479                         }
1480                 }
1481                 res = ip_set_flush(index);
1482                 goto done;
1483         }
1484         case IP_SET_OP_RENAME:{
1485                 struct ip_set_req_create *req_rename
1486                         = (struct ip_set_req_create *) data;
1487
1488                 if (len != sizeof(struct ip_set_req_create)) {
1489                         ip_set_printk("invalid RENAME data (want %zu, got %u)",
1490                                       sizeof(struct ip_set_req_create), len);
1491                         res = -EINVAL;
1492                         goto done;
1493                 }
1494
1495                 req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0';
1496                 req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1497                         
1498                 index = ip_set_find_byname(req_rename->name);
1499                 if (index == IP_SET_INVALID_ID) {
1500                         res = -ENOENT;
1501                         goto done;
1502                 }
1503                 res = ip_set_rename(index, req_rename->typename);
1504                 goto done;
1505         }
1506         case IP_SET_OP_SWAP:{
1507                 struct ip_set_req_create *req_swap
1508                         = (struct ip_set_req_create *) data;
1509                 ip_set_id_t to_index;
1510
1511                 if (len != sizeof(struct ip_set_req_create)) {
1512                         ip_set_printk("invalid SWAP data (want %zu, got %u)",
1513                                       sizeof(struct ip_set_req_create), len);
1514                         res = -EINVAL;
1515                         goto done;
1516                 }
1517
1518                 req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0';
1519                 req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1520
1521                 index = ip_set_find_byname(req_swap->name);
1522                 if (index == IP_SET_INVALID_ID) {
1523                         res = -ENOENT;
1524                         goto done;
1525                 }
1526                 to_index = ip_set_find_byname(req_swap->typename);
1527                 if (to_index == IP_SET_INVALID_ID) {
1528                         res = -ENOENT;
1529                         goto done;
1530                 }
1531                 res = ip_set_swap(index, to_index);
1532                 goto done;
1533         }
1534         default: 
1535                 break;  /* Set identified by id */
1536         }
1537         
1538         /* There we may have add/del/test/bind/unbind/test_bind operations */
1539         if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_BIND_SET) {
1540                 res = -EBADMSG;
1541                 goto done;
1542         }
1543         adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn;
1544
1545         if (len < sizeof(struct ip_set_req_adt)) {
1546                 ip_set_printk("short data in adt request (want >=%zu, got %u)",
1547                               sizeof(struct ip_set_req_adt), len);
1548                 res = -EINVAL;
1549                 goto done;
1550         }
1551         req_adt = (struct ip_set_req_adt *) data;
1552
1553         /* -U :all: :all:|:default: uses IP_SET_INVALID_ID */
1554         if (!(*op == IP_SET_OP_UNBIND_SET 
1555               && req_adt->index == IP_SET_INVALID_ID)) {
1556                 index = ip_set_find_byindex(req_adt->index);
1557                 if (index == IP_SET_INVALID_ID) {
1558                         res = -ENOENT;
1559                         goto done;
1560                 }
1561         }
1562         res = adtfn(index, data, len);
1563
1564     done:
1565         up(&ip_set_app_mutex);
1566         vfree(data);
1567         if (res > 0)
1568                 res = 0;
1569         DP("final result %d", res);
1570         return res;
1571 }
1572
1573 static int 
1574 ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len)
1575 {
1576         int res = 0;
1577         unsigned *op;
1578         ip_set_id_t index = IP_SET_INVALID_ID;
1579         void *data;
1580         int copylen = *len;
1581
1582         DP("optval=%d, user=%p, len=%d", optval, user, *len);
1583         if (!capable(CAP_NET_ADMIN))
1584                 return -EPERM;
1585         if (optval != SO_IP_SET)
1586                 return -EBADF;
1587         if (*len < sizeof(unsigned)) {
1588                 ip_set_printk("short userdata (want >=%zu, got %d)",
1589                               sizeof(unsigned), *len);
1590                 return -EINVAL;
1591         }
1592         data = vmalloc(*len);
1593         if (!data) {
1594                 DP("out of mem for %d bytes", *len);
1595                 return -ENOMEM;
1596         }
1597         if (copy_from_user(data, user, *len) != 0) {
1598                 res = -EFAULT;
1599                 goto done;
1600         }
1601         if (down_interruptible(&ip_set_app_mutex)) {
1602                 res = -EINTR;
1603                 goto done;
1604         }
1605
1606         op = (unsigned *) data;
1607         DP("op=%x", *op);
1608
1609         if (*op < IP_SET_OP_VERSION) {
1610                 /* Check the version at the beginning of operations */
1611                 struct ip_set_req_version *req_version =
1612                         (struct ip_set_req_version *) data;
1613                 if (req_version->version != IP_SET_PROTOCOL_VERSION) {
1614                         res = -EPROTO;
1615                         goto done;
1616                 }
1617         }
1618
1619         switch (*op) {
1620         case IP_SET_OP_VERSION: {
1621                 struct ip_set_req_version *req_version =
1622                     (struct ip_set_req_version *) data;
1623
1624                 if (*len != sizeof(struct ip_set_req_version)) {
1625                         ip_set_printk("invalid VERSION (want %zu, got %d)",
1626                                       sizeof(struct ip_set_req_version),
1627                                       *len);
1628                         res = -EINVAL;
1629                         goto done;
1630                 }
1631
1632                 req_version->version = IP_SET_PROTOCOL_VERSION;
1633                 res = copy_to_user(user, req_version,
1634                                    sizeof(struct ip_set_req_version));
1635                 goto done;
1636         }
1637         case IP_SET_OP_GET_BYNAME: {
1638                 struct ip_set_req_get_set *req_get
1639                         = (struct ip_set_req_get_set *) data;
1640
1641                 if (*len != sizeof(struct ip_set_req_get_set)) {
1642                         ip_set_printk("invalid GET_BYNAME (want %zu, got %d)",
1643                                       sizeof(struct ip_set_req_get_set), *len);
1644                         res = -EINVAL;
1645                         goto done;
1646                 }
1647                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1648                 index = ip_set_find_byname(req_get->set.name);
1649                 req_get->set.index = index;
1650                 goto copy;
1651         }
1652         case IP_SET_OP_GET_BYINDEX: {
1653                 struct ip_set_req_get_set *req_get
1654                         = (struct ip_set_req_get_set *) data;
1655
1656                 if (*len != sizeof(struct ip_set_req_get_set)) {
1657                         ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)",
1658                                       sizeof(struct ip_set_req_get_set), *len);
1659                         res = -EINVAL;
1660                         goto done;
1661                 }
1662                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1663                 index = ip_set_find_byindex(req_get->set.index);
1664                 strncpy(req_get->set.name,
1665                         index == IP_SET_INVALID_ID ? ""
1666                         : ip_set_list[index]->name, IP_SET_MAXNAMELEN);
1667                 goto copy;
1668         }
1669         case IP_SET_OP_ADT_GET: {
1670                 struct ip_set_req_adt_get *req_get
1671                         = (struct ip_set_req_adt_get *) data;
1672
1673                 if (*len != sizeof(struct ip_set_req_adt_get)) {
1674                         ip_set_printk("invalid ADT_GET (want %zu, got %d)",
1675                                       sizeof(struct ip_set_req_adt_get), *len);
1676                         res = -EINVAL;
1677                         goto done;
1678                 }
1679                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1680                 index = ip_set_find_byname(req_get->set.name);
1681                 if (index != IP_SET_INVALID_ID) {
1682                         req_get->set.index = index;
1683                         strncpy(req_get->typename,
1684                                 ip_set_list[index]->type->typename,
1685                                 IP_SET_MAXNAMELEN - 1);
1686                 } else {
1687                         res = -ENOENT;
1688                         goto done;
1689                 }
1690                 goto copy;
1691         }
1692         case IP_SET_OP_MAX_SETS: {
1693                 struct ip_set_req_max_sets *req_max_sets
1694                         = (struct ip_set_req_max_sets *) data;
1695                 ip_set_id_t i;
1696
1697                 if (*len != sizeof(struct ip_set_req_max_sets)) {
1698                         ip_set_printk("invalid MAX_SETS (want %zu, got %d)",
1699                                       sizeof(struct ip_set_req_max_sets), *len);
1700                         res = -EINVAL;
1701                         goto done;
1702                 }
1703
1704                 if (strcmp(req_max_sets->set.name, IPSET_TOKEN_ALL) == 0) {
1705                         req_max_sets->set.index = IP_SET_INVALID_ID;
1706                 } else {
1707                         req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1708                         req_max_sets->set.index = 
1709                                 ip_set_find_byname(req_max_sets->set.name);
1710                         if (req_max_sets->set.index == IP_SET_INVALID_ID) {
1711                                 res = -ENOENT;
1712                                 goto done;
1713                         }
1714                 }
1715                 req_max_sets->max_sets = ip_set_max;
1716                 req_max_sets->sets = 0;
1717                 for (i = 0; i < ip_set_max; i++) {
1718                         if (ip_set_list[i] != NULL)
1719                                 req_max_sets->sets++;
1720                 }
1721                 goto copy;
1722         }
1723         case IP_SET_OP_LIST_SIZE: 
1724         case IP_SET_OP_SAVE_SIZE: {
1725                 struct ip_set_req_setnames *req_setnames
1726                         = (struct ip_set_req_setnames *) data;
1727                 struct ip_set_name_list *name_list;
1728                 struct ip_set *set;
1729                 ip_set_id_t i;
1730                 int used;
1731
1732                 if (*len < sizeof(struct ip_set_req_setnames)) {
1733                         ip_set_printk("short LIST_SIZE (want >=%zu, got %d)",
1734                                       sizeof(struct ip_set_req_setnames), *len);
1735                         res = -EINVAL;
1736                         goto done;
1737                 }
1738
1739                 req_setnames->size = 0;
1740                 used = sizeof(struct ip_set_req_setnames);
1741                 for (i = 0; i < ip_set_max; i++) {
1742                         if (ip_set_list[i] == NULL)
1743                                 continue;
1744                         name_list = (struct ip_set_name_list *) 
1745                                 (data + used);
1746                         used += sizeof(struct ip_set_name_list);
1747                         if (used > copylen) {
1748                                 res = -EAGAIN;
1749                                 goto done;
1750                         }
1751                         set = ip_set_list[i];
1752                         /* Fill in index, name, etc. */
1753                         name_list->index = i;
1754                         name_list->id = set->id;
1755                         strncpy(name_list->name,
1756                                 set->name,
1757                                 IP_SET_MAXNAMELEN - 1);
1758                         strncpy(name_list->typename,
1759                                 set->type->typename,
1760                                 IP_SET_MAXNAMELEN - 1);
1761                         DP("filled %s of type %s, index %u\n",
1762                            name_list->name, name_list->typename,
1763                            name_list->index);
1764                         if (!(req_setnames->index == IP_SET_INVALID_ID
1765                               || req_setnames->index == i))
1766                               continue;
1767                         /* Update size */
1768                         switch (*op) {
1769                         case IP_SET_OP_LIST_SIZE: {
1770                                 req_setnames->size += sizeof(struct ip_set_list)
1771                                         + set->type->header_size
1772                                         + set->type->list_members_size(set);
1773                                 /* Sets are identified by id in the hash */
1774                                 FOREACH_HASH_DO(__set_hash_bindings_size_list, 
1775                                                 set->id, &req_setnames->size);
1776                                 break;
1777                         }
1778                         case IP_SET_OP_SAVE_SIZE: {
1779                                 req_setnames->size += sizeof(struct ip_set_save)
1780                                         + set->type->header_size
1781                                         + set->type->list_members_size(set);
1782                                 FOREACH_HASH_DO(__set_hash_bindings_size_save,
1783                                                 set->id, &req_setnames->size);
1784                                 break;
1785                         }
1786                         default:
1787                                 break;
1788                         }
1789                 }
1790                 if (copylen != used) {
1791                         res = -EAGAIN;
1792                         goto done;
1793                 }
1794                 goto copy;
1795         }
1796         case IP_SET_OP_LIST: {
1797                 struct ip_set_req_list *req_list
1798                         = (struct ip_set_req_list *) data;
1799                 ip_set_id_t i;
1800                 int used;
1801
1802                 if (*len < sizeof(struct ip_set_req_list)) {
1803                         ip_set_printk("short LIST (want >=%zu, got %d)",
1804                                       sizeof(struct ip_set_req_list), *len);
1805                         res = -EINVAL;
1806                         goto done;
1807                 }
1808                 index = req_list->index;
1809                 if (index != IP_SET_INVALID_ID
1810                     && ip_set_find_byindex(index) != index) {
1811                         res = -ENOENT;
1812                         goto done;
1813                 }
1814                 used = 0;
1815                 if (index == IP_SET_INVALID_ID) {
1816                         /* List all sets */
1817                         for (i = 0; i < ip_set_max && res == 0; i++) {
1818                                 if (ip_set_list[i] != NULL)
1819                                         res = ip_set_list_set(i, data, &used, *len);
1820                         }
1821                 } else {
1822                         /* List an individual set */
1823                         res = ip_set_list_set(index, data, &used, *len);
1824                 }
1825                 if (res != 0)
1826                         goto done;
1827                 else if (copylen != used) {
1828                         res = -EAGAIN;
1829                         goto done;
1830                 }
1831                 goto copy;
1832         }
1833         case IP_SET_OP_SAVE: {
1834                 struct ip_set_req_list *req_save
1835                         = (struct ip_set_req_list *) data;
1836                 ip_set_id_t i;
1837                 int used;
1838
1839                 if (*len < sizeof(struct ip_set_req_list)) {
1840                         ip_set_printk("short SAVE (want >=%zu, got %d)",
1841                                       sizeof(struct ip_set_req_list), *len);
1842                         res = -EINVAL;
1843                         goto done;
1844                 }
1845                 index = req_save->index;
1846                 if (index != IP_SET_INVALID_ID
1847                     && ip_set_find_byindex(index) != index) {
1848                         res = -ENOENT;
1849                         goto done;
1850                 }
1851                 used = 0;
1852                 if (index == IP_SET_INVALID_ID) {
1853                         /* Save all sets */
1854                         for (i = 0; i < ip_set_max && res == 0; i++) {
1855                                 if (ip_set_list[i] != NULL)
1856                                         res = ip_set_save_set(i, data, &used, *len);
1857                         }
1858                 } else {
1859                         /* Save an individual set */
1860                         res = ip_set_save_set(index, data, &used, *len);
1861                 }
1862                 if (res == 0)
1863                         res = ip_set_save_bindings(index, data, &used, *len);
1864                         
1865                 if (res != 0)
1866                         goto done;
1867                 else if (copylen != used) {
1868                         res = -EAGAIN;
1869                         goto done;
1870                 }
1871                 goto copy;
1872         }
1873         case IP_SET_OP_RESTORE: {
1874                 struct ip_set_req_setnames *req_restore
1875                         = (struct ip_set_req_setnames *) data;
1876                 int line;
1877
1878                 if (*len < sizeof(struct ip_set_req_setnames)
1879                     || *len != req_restore->size) {
1880                         ip_set_printk("invalid RESTORE (want =%zu, got %d)",
1881                                       req_restore->size, *len);
1882                         res = -EINVAL;
1883                         goto done;
1884                 }
1885                 line = ip_set_restore(data + sizeof(struct ip_set_req_setnames),
1886                                       req_restore->size - sizeof(struct ip_set_req_setnames));
1887                 DP("ip_set_restore: %u", line);
1888                 if (line != 0) {
1889                         res = -EAGAIN;
1890                         req_restore->size = line;
1891                         copylen = sizeof(struct ip_set_req_setnames);
1892                         goto copy;
1893                 }
1894                 goto done;
1895         }
1896         default:
1897                 res = -EBADMSG;
1898                 goto done;
1899         }       /* end of switch(op) */
1900
1901     copy:
1902         DP("set %s, copylen %u", index != IP_SET_INVALID_ID
1903                                  && ip_set_list[index]
1904                      ? ip_set_list[index]->name
1905                      : ":all:", copylen);
1906         res = copy_to_user(user, data, copylen);
1907         
1908     done:
1909         up(&ip_set_app_mutex);
1910         vfree(data);
1911         if (res > 0)
1912                 res = 0;
1913         DP("final result %d", res);
1914         return res;
1915 }
1916
1917 static struct nf_sockopt_ops so_set = {
1918         .pf             = PF_INET,
1919         .set_optmin     = SO_IP_SET,
1920         .set_optmax     = SO_IP_SET + 1,
1921         .set            = &ip_set_sockfn_set,
1922         .get_optmin     = SO_IP_SET,
1923         .get_optmax     = SO_IP_SET + 1,
1924         .get            = &ip_set_sockfn_get,
1925         .use            = 0
1926 };
1927
1928 static int max_sets, hash_size;
1929 module_param(max_sets, int, 0600);
1930 MODULE_PARM_DESC(max_sets, "maximal number of sets");
1931 module_param(hash_size, int, 0600);
1932 MODULE_PARM_DESC(hash_size, "hash size for bindings");
1933 MODULE_LICENSE("GPL");
1934 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
1935 MODULE_DESCRIPTION("module implementing core IP set support");
1936
1937 static int __init init(void)
1938 {
1939         int res;
1940         ip_set_id_t i;
1941
1942         get_random_bytes(&ip_set_hash_random, 4);
1943         if (max_sets)
1944                 ip_set_max = max_sets;
1945         ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max);
1946         if (!ip_set_list) {
1947                 printk(KERN_ERR "Unable to create ip_set_list\n");
1948                 return -ENOMEM;
1949         }
1950         memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max);
1951         if (hash_size)
1952                 ip_set_bindings_hash_size = hash_size;
1953         ip_set_hash = vmalloc(sizeof(struct list_head) * ip_set_bindings_hash_size);
1954         if (!ip_set_hash) {
1955                 printk(KERN_ERR "Unable to create ip_set_hash\n");
1956                 vfree(ip_set_list);
1957                 return -ENOMEM;
1958         }
1959         for (i = 0; i < ip_set_bindings_hash_size; i++)
1960                 INIT_LIST_HEAD(&ip_set_hash[i]);
1961
1962         INIT_LIST_HEAD(&set_type_list);
1963
1964         res = nf_register_sockopt(&so_set);
1965         if (res != 0) {
1966                 ip_set_printk("SO_SET registry failed: %d", res);
1967                 vfree(ip_set_list);
1968                 vfree(ip_set_hash);
1969                 return res;
1970         }
1971         return 0;
1972 }
1973
1974 static void __exit fini(void)
1975 {
1976         /* There can't be any existing set or binding */
1977         nf_unregister_sockopt(&so_set);
1978         vfree(ip_set_list);
1979         vfree(ip_set_hash);
1980         DP("these are the famous last words");
1981 }
1982
1983 EXPORT_SYMBOL(ip_set_register_set_type);
1984 EXPORT_SYMBOL(ip_set_unregister_set_type);
1985
1986 EXPORT_SYMBOL(ip_set_get_byname);
1987 EXPORT_SYMBOL(ip_set_get_byindex);
1988 EXPORT_SYMBOL(ip_set_put);
1989
1990 EXPORT_SYMBOL(ip_set_addip_kernel);
1991 EXPORT_SYMBOL(ip_set_delip_kernel);
1992 EXPORT_SYMBOL(ip_set_testip_kernel);
1993
1994 module_init(init);
1995 module_exit(fini);