This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / net / ipv4 / netfilter / ip_set.c
1 /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
2  *                         Patrick Schaaf <bof@bof.de>
3  * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation.  
8  */
9
10 /* Kernel module for IP set management */
11
12 #include <linux/config.h>
13 #include <linux/module.h>
14 #include <linux/moduleparam.h>
15 #include <linux/kmod.h>
16 #include <linux/ip.h>
17 #include <linux/skbuff.h>
18 #include <linux/random.h>
19 #include <linux/jhash.h>
20 #include <linux/netfilter_ipv4/ip_tables.h>
21 #include <linux/errno.h>
22 #include <asm/uaccess.h>
23 #include <asm/bitops.h>
24 #include <asm/semaphore.h>
25 #include <linux/spinlock.h>
26 #include <linux/vmalloc.h>
27
28 #define ASSERT_READ_LOCK(x)     /* dont use that */
29 #define ASSERT_WRITE_LOCK(x)
30 #include <linux/netfilter_ipv4/listhelp.h>
31 #include <linux/netfilter_ipv4/ip_set.h>
32
33 static struct list_head set_type_list;          /* all registered sets */
34 static struct ip_set **ip_set_list;             /* all individual sets */
35 static DEFINE_RWLOCK(ip_set_lock);              /* protects the lists and the hash */
36 static DECLARE_MUTEX(ip_set_app_mutex);         /* serializes user access */
37 static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX;
38 static ip_set_id_t ip_set_bindings_hash_size =  CONFIG_IP_NF_SET_HASHSIZE;
39 static struct list_head *ip_set_hash;           /* hash of bindings */
40 static unsigned int ip_set_hash_random;         /* random seed */
41
42 /*
43  * Sets are identified either by the index in ip_set_list or by id.
44  * The id never changes and is used to find a key in the hash. 
45  * The index may change by swapping and used at all other places 
46  * (set/SET netfilter modules, binding value, etc.)
47  *
48  * Userspace requests are serialized by ip_set_mutex and sets can
49  * be deleted only from userspace. Therefore ip_set_list locking 
50  * must obey the following rules:
51  *
52  * - kernel requests: read and write locking mandatory
53  * - user requests: read locking optional, write locking mandatory
54  */
55
56 static inline void
57 __ip_set_get(ip_set_id_t index)
58 {
59         atomic_inc(&ip_set_list[index]->ref);
60 }
61
62 static inline void
63 __ip_set_put(ip_set_id_t index)
64 {
65         atomic_dec(&ip_set_list[index]->ref);
66 }
67
68 /*
69  * Binding routines
70  */
71
72 static inline int
73 ip_hash_cmp(const struct ip_set_hash *set_hash,
74             ip_set_id_t id, ip_set_ip_t ip)
75 {
76         return set_hash->id == id && set_hash->ip == ip;
77 }
78
79 static ip_set_id_t
80 ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip)
81 {
82         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) 
83                                 % ip_set_bindings_hash_size;
84         struct ip_set_hash *set_hash;
85
86         ASSERT_READ_LOCK(&ip_set_lock);
87         IP_SET_ASSERT(ip_set_list[id]);
88         DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
89         
90         set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp,
91                              struct ip_set_hash *, id, ip);
92         
93         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
94            HIPQUAD(ip),
95            set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
96
97         return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID);
98 }
99
100 static inline void 
101 __set_hash_del(struct ip_set_hash *set_hash)
102 {
103         ASSERT_WRITE_LOCK(&ip_set_lock);
104         IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
105
106         __ip_set_put(set_hash->binding);
107         list_del(&set_hash->list);
108         kfree(set_hash);
109 }
110
111 static int
112 ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip)
113 {
114         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
115                                 % ip_set_bindings_hash_size;
116         struct ip_set_hash *set_hash;
117         
118         IP_SET_ASSERT(ip_set_list[id]);
119         DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));     
120         write_lock_bh(&ip_set_lock);
121         set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp,
122                              struct ip_set_hash *, id, ip);
123         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
124            HIPQUAD(ip),
125            set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
126
127         if (set_hash != NULL)
128                 __set_hash_del(set_hash);
129         write_unlock_bh(&ip_set_lock);
130         return 0;
131 }
132
133 static int 
134 ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding)
135 {
136         u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
137                                 % ip_set_bindings_hash_size;
138         struct ip_set_hash *set_hash;
139         int ret = 0;
140         
141         IP_SET_ASSERT(ip_set_list[id]);
142         IP_SET_ASSERT(ip_set_list[binding]);
143         DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, 
144            HIPQUAD(ip), ip_set_list[binding]->name);
145         write_lock_bh(&ip_set_lock);
146         set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp,
147                              struct ip_set_hash *, id, ip);
148         if (!set_hash) {
149                 set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_KERNEL);
150                 if (!set_hash) {
151                         ret = -ENOMEM;
152                         goto unlock;
153                 }
154                 INIT_LIST_HEAD(&set_hash->list);
155                 set_hash->id = id;
156                 set_hash->ip = ip;
157                 list_add(&ip_set_hash[key], &set_hash->list);
158         } else {
159                 IP_SET_ASSERT(ip_set_list[set_hash->binding]);  
160                 DP("overwrite binding: %s",
161                    ip_set_list[set_hash->binding]->name);
162                 __ip_set_put(set_hash->binding);
163         }
164         set_hash->binding = binding;
165         __ip_set_get(set_hash->binding);
166     unlock:
167         write_unlock_bh(&ip_set_lock);
168         return ret;
169 }
170
171 #define FOREACH_HASH_DO(fn, args...)                                            \
172 ({                                                                              \
173         ip_set_id_t __key;                                                      \
174         struct ip_set_hash *__set_hash;                                         \
175                                                                                 \
176         for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
177                 list_for_each_entry(__set_hash, &ip_set_hash[__key], list)      \
178                         fn(__set_hash , ## args);                               \
179         }                                                                       \
180 })
181
182 #define FOREACH_HASH_RW_DO(fn, args...)                                                 \
183 ({                                                                              \
184         ip_set_id_t __key;                                                      \
185         struct ip_set_hash *__set_hash, *__n;                                   \
186                                                                                 \
187         ASSERT_WRITE_LOCK(&ip_set_lock);                                        \
188         for (__key = 0; __key < ip_set_bindings_hash_size; __key++) {           \
189                 list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\
190                         fn(__set_hash , ## args);                               \
191         }                                                                       \
192 })
193
194 /* Add, del and test set entries from kernel */
195
196 #define follow_bindings(index, set, ip)                                 \
197 ((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID      \
198  || (index = (set)->binding) != IP_SET_INVALID_ID)
199
200 int
201 ip_set_testip_kernel(ip_set_id_t index,
202                      const struct sk_buff *skb,
203                      const u_int32_t *flags)
204 {
205         struct ip_set *set;
206         ip_set_ip_t ip;
207         int res;
208         unsigned char i = 0;
209         
210         IP_SET_ASSERT(flags[i]);
211         read_lock_bh(&ip_set_lock);
212         do {
213                 set = ip_set_list[index];
214                 IP_SET_ASSERT(set);
215                 DP("set %s, index %u", set->name, index);
216                 read_lock_bh(&set->lock);
217                 res = set->type->testip_kernel(set, skb, &ip, flags, i++);
218                 read_unlock_bh(&set->lock);
219                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
220         } while (res > 0 
221                  && flags[i] 
222                  && follow_bindings(index, set, ip));
223         read_unlock_bh(&ip_set_lock);
224
225         return res;
226 }
227
228 void
229 ip_set_addip_kernel(ip_set_id_t index,
230                     const struct sk_buff *skb,
231                     const u_int32_t *flags)
232 {
233         struct ip_set *set;
234         ip_set_ip_t ip;
235         int res;
236         unsigned char i = 0;
237
238         IP_SET_ASSERT(flags[i]);
239    retry:
240         read_lock_bh(&ip_set_lock);
241         do {
242                 set = ip_set_list[index];
243                 IP_SET_ASSERT(set);
244                 DP("set %s, index %u", set->name, index);
245                 write_lock_bh(&set->lock);
246                 res = set->type->addip_kernel(set, skb, &ip, flags, i++);
247                 write_unlock_bh(&set->lock);
248                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
249         } while ((res == 0 || res == -EEXIST)
250                  && flags[i] 
251                  && follow_bindings(index, set, ip));
252         read_unlock_bh(&ip_set_lock);
253
254         if (res == -EAGAIN
255             && set->type->retry
256             && (res = set->type->retry(set)) == 0)
257                 goto retry;
258 }
259
260 void
261 ip_set_delip_kernel(ip_set_id_t index,
262                     const struct sk_buff *skb,
263                     const u_int32_t *flags)
264 {
265         struct ip_set *set;
266         ip_set_ip_t ip;
267         int res;
268         unsigned char i = 0;
269
270         IP_SET_ASSERT(flags[i]);
271         read_lock_bh(&ip_set_lock);
272         do {
273                 set = ip_set_list[index];
274                 IP_SET_ASSERT(set);
275                 DP("set %s, index %u", set->name, index);
276                 write_lock_bh(&set->lock);
277                 res = set->type->delip_kernel(set, skb, &ip, flags, i++);
278                 write_unlock_bh(&set->lock);
279                 i += !!(set->type->features & IPSET_DATA_DOUBLE);
280         } while ((res == 0 || res == -EEXIST)
281                  && flags[i] 
282                  && follow_bindings(index, set, ip));
283         read_unlock_bh(&ip_set_lock);
284 }
285
286 /* Register and deregister settype */
287
288 static inline int
289 set_type_equal(const struct ip_set_type *set_type, const char *str2)
290 {
291         return !strncmp(set_type->typename, str2, IP_SET_MAXNAMELEN - 1);
292 }
293
294 static inline struct ip_set_type *
295 find_set_type(const char *name)
296 {
297         return LIST_FIND(&set_type_list,
298                          set_type_equal,
299                          struct ip_set_type *,
300                          name);
301 }
302
303 int 
304 ip_set_register_set_type(struct ip_set_type *set_type)
305 {
306         int ret = 0;
307         
308         if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) {
309                 ip_set_printk("'%s' uses wrong protocol version %u (want %u)",
310                               set_type->typename,
311                               set_type->protocol_version,
312                               IP_SET_PROTOCOL_VERSION);
313                 return -EINVAL;
314         }
315
316         write_lock_bh(&ip_set_lock);
317         if (find_set_type(set_type->typename)) {
318                 /* Duplicate! */
319                 ip_set_printk("'%s' already registered!", 
320                               set_type->typename);
321                 ret = -EINVAL;
322                 goto unlock;
323         }
324         if (!try_module_get(THIS_MODULE)) {
325                 ret = -EFAULT;
326                 goto unlock;
327         }
328         list_append(&set_type_list, set_type);
329         DP("'%s' registered.", set_type->typename);
330    unlock:
331         write_unlock_bh(&ip_set_lock);
332         return ret;
333 }
334
335 void
336 ip_set_unregister_set_type(struct ip_set_type *set_type)
337 {
338         write_lock_bh(&ip_set_lock);
339         if (!find_set_type(set_type->typename)) {
340                 ip_set_printk("'%s' not registered?",
341                               set_type->typename);
342                 goto unlock;
343         }
344         LIST_DELETE(&set_type_list, set_type);
345         module_put(THIS_MODULE);
346         DP("'%s' unregistered.", set_type->typename);
347    unlock:
348         write_unlock_bh(&ip_set_lock);
349
350 }
351
352 /*
353  * Userspace routines
354  */
355
356 /*
357  * Find set by name, reference it once. The reference makes sure the
358  * thing pointed to, does not go away under our feet. Drop the reference
359  * later, using ip_set_put().
360  */
361 ip_set_id_t
362 ip_set_get_byname(const char *name)
363 {
364         ip_set_id_t i, index = IP_SET_INVALID_ID;
365         
366         down(&ip_set_app_mutex);
367         for (i = 0; i < ip_set_max; i++) {
368                 if (ip_set_list[i] != NULL
369                     && strcmp(ip_set_list[i]->name, name) == 0) {
370                         __ip_set_get(i);
371                         index = i;
372                         break;
373                 }
374         }
375         up(&ip_set_app_mutex);
376         return index;
377 }
378
379 /*
380  * Find set by index, reference it once. The reference makes sure the
381  * thing pointed to, does not go away under our feet. Drop the reference
382  * later, using ip_set_put().
383  */
384 ip_set_id_t
385 ip_set_get_byindex(ip_set_id_t index)
386 {
387         down(&ip_set_app_mutex);
388
389         if (index >= ip_set_max)
390                 return IP_SET_INVALID_ID;
391         
392         if (ip_set_list[index])
393                 __ip_set_get(index);
394         else
395                 index = IP_SET_INVALID_ID;
396                 
397         up(&ip_set_app_mutex);
398         return index;
399 }
400
401 /*
402  * If the given set pointer points to a valid set, decrement
403  * reference count by 1. The caller shall not assume the index
404  * to be valid, after calling this function.
405  */
406 void ip_set_put(ip_set_id_t index)
407 {
408         down(&ip_set_app_mutex);
409         if (ip_set_list[index])
410                 __ip_set_put(index);
411         up(&ip_set_app_mutex);
412 }
413
414 /* Find a set by name or index */
415 static ip_set_id_t
416 ip_set_find_byname(const char *name)
417 {
418         ip_set_id_t i, index = IP_SET_INVALID_ID;
419         
420         for (i = 0; i < ip_set_max; i++) {
421                 if (ip_set_list[i] != NULL
422                     && strcmp(ip_set_list[i]->name, name) == 0) {
423                         index = i;
424                         break;
425                 }
426         }
427         return index;
428 }
429
430 static ip_set_id_t
431 ip_set_find_byindex(ip_set_id_t index)
432 {
433         if (index >= ip_set_max || ip_set_list[index] == NULL)
434                 index = IP_SET_INVALID_ID;
435         
436         return index;
437 }
438
439 /*
440  * Add, del, test, bind and unbind
441  */
442
443 static inline int
444 __ip_set_testip(struct ip_set *set,
445                 const void *data,
446                 size_t size,
447                 ip_set_ip_t *ip)
448 {
449         int res;
450
451         read_lock_bh(&set->lock);
452         res = set->type->testip(set, data, size, ip);
453         read_unlock_bh(&set->lock);
454
455         return res;
456 }
457
458 static int
459 __ip_set_addip(ip_set_id_t index,
460                const void *data,
461                size_t size)
462 {
463         struct ip_set *set = ip_set_list[index];
464         ip_set_ip_t ip;
465         int res;
466         
467         IP_SET_ASSERT(set);
468         do {
469                 write_lock_bh(&set->lock);
470                 res = set->type->addip(set, data, size, &ip);
471                 write_unlock_bh(&set->lock);
472         } while (res == -EAGAIN
473                  && set->type->retry
474                  && (res = set->type->retry(set)) == 0);
475
476         return res;
477 }
478
479 static int
480 ip_set_addip(ip_set_id_t index,
481              const void *data,
482              size_t size)
483 {
484
485         return __ip_set_addip(index,
486                               data + sizeof(struct ip_set_req_adt),
487                               size - sizeof(struct ip_set_req_adt));
488 }
489
490 static int
491 ip_set_delip(ip_set_id_t index,
492              const void *data,
493              size_t size)
494 {
495         struct ip_set *set = ip_set_list[index];
496         ip_set_ip_t ip;
497         int res;
498         
499         IP_SET_ASSERT(set);
500         write_lock_bh(&set->lock);
501         res = set->type->delip(set,
502                                data + sizeof(struct ip_set_req_adt),
503                                size - sizeof(struct ip_set_req_adt),
504                                &ip);
505         write_unlock_bh(&set->lock);
506
507         return res;
508 }
509
510 static int
511 ip_set_testip(ip_set_id_t index,
512               const void *data,
513               size_t size)
514 {
515         struct ip_set *set = ip_set_list[index];
516         ip_set_ip_t ip;
517         int res;
518
519         IP_SET_ASSERT(set);
520         res = __ip_set_testip(set,
521                               data + sizeof(struct ip_set_req_adt),
522                               size - sizeof(struct ip_set_req_adt),
523                               &ip);
524
525         return (res > 0 ? -EEXIST : res);
526 }
527
528 static int
529 ip_set_bindip(ip_set_id_t index,
530               const void *data,
531               size_t size)
532 {
533         struct ip_set *set = ip_set_list[index];
534         struct ip_set_req_bind *req_bind;
535         ip_set_id_t binding;
536         ip_set_ip_t ip;
537         int res;
538
539         IP_SET_ASSERT(set);
540         if (size < sizeof(struct ip_set_req_bind))
541                 return -EINVAL;
542                 
543         req_bind = (struct ip_set_req_bind *) data;
544         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
545
546         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
547                 /* Default binding of a set */
548                 char *binding_name;
549                 
550                 if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
551                         return -EINVAL;
552
553                 binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
554                 binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
555
556                 binding = ip_set_find_byname(binding_name);
557                 if (binding == IP_SET_INVALID_ID)
558                         return -ENOENT;
559
560                 write_lock_bh(&ip_set_lock);
561                 /* Sets as binding values are referenced */
562                 if (set->binding != IP_SET_INVALID_ID)
563                         __ip_set_put(set->binding);
564                 set->binding = binding;
565                 __ip_set_get(set->binding);
566                 write_unlock_bh(&ip_set_lock);
567
568                 return 0;
569         }
570         binding = ip_set_find_byname(req_bind->binding);
571         if (binding == IP_SET_INVALID_ID)
572                 return -ENOENT;
573
574         res = __ip_set_testip(set,
575                               data + sizeof(struct ip_set_req_bind),
576                               size - sizeof(struct ip_set_req_bind),
577                               &ip);
578         DP("set %s, ip: %u.%u.%u.%u, binding %s",
579            set->name, HIPQUAD(ip), ip_set_list[binding]->name);
580         
581         if (res >= 0)
582                 res = ip_set_hash_add(set->id, ip, binding);
583
584         return res;
585 }
586
587 #define FOREACH_SET_DO(fn, args...)                             \
588 ({                                                              \
589         ip_set_id_t __i;                                        \
590         struct ip_set *__set;                                   \
591                                                                 \
592         for (__i = 0; __i < ip_set_max; __i++) {                \
593                 __set = ip_set_list[__i];                       \
594                 if (__set != NULL)                              \
595                         fn(__set , ##args);                     \
596         }                                                       \
597 })
598
599 static inline void
600 __set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id)
601 {
602         if (set_hash->id == id)
603                 __set_hash_del(set_hash);
604 }
605
606 static inline void
607 __unbind_default(struct ip_set *set)
608 {
609         if (set->binding != IP_SET_INVALID_ID) {
610                 /* Sets as binding values are referenced */
611                 __ip_set_put(set->binding);
612                 set->binding = IP_SET_INVALID_ID;
613         }
614 }
615
616 static int
617 ip_set_unbindip(ip_set_id_t index,
618                 const void *data,
619                 size_t size)
620 {
621         struct ip_set *set;
622         struct ip_set_req_bind *req_bind;
623         ip_set_ip_t ip;
624         int res;
625
626         DP("");
627         if (size < sizeof(struct ip_set_req_bind))
628                 return -EINVAL;
629                 
630         req_bind = (struct ip_set_req_bind *) data;
631         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
632         
633         DP("%u %s", index, req_bind->binding);
634         if (index == IP_SET_INVALID_ID) {
635                 /* unbind :all: */
636                 if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
637                         /* Default binding of sets */
638                         write_lock_bh(&ip_set_lock);
639                         FOREACH_SET_DO(__unbind_default);
640                         write_unlock_bh(&ip_set_lock);
641                         return 0;
642                 } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
643                         /* Flush all bindings of all sets*/
644                         write_lock_bh(&ip_set_lock);
645                         FOREACH_HASH_RW_DO(__set_hash_del);
646                         write_unlock_bh(&ip_set_lock);
647                         return 0;
648                 }
649                 DP("unreachable reached!");
650                 return -EINVAL;
651         }
652         
653         set = ip_set_list[index];
654         IP_SET_ASSERT(set);
655         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
656                 /* Default binding of set */
657                 ip_set_id_t binding = ip_set_find_byindex(set->binding);
658
659                 if (binding == IP_SET_INVALID_ID)
660                         return -ENOENT;
661                         
662                 write_lock_bh(&ip_set_lock);
663                 /* Sets in hash values are referenced */
664                 __ip_set_put(set->binding);
665                 set->binding = IP_SET_INVALID_ID;
666                 write_unlock_bh(&ip_set_lock);
667
668                 return 0;
669         } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
670                 /* Flush all bindings */
671
672                 write_lock_bh(&ip_set_lock);
673                 FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
674                 write_unlock_bh(&ip_set_lock);
675                 return 0;
676         }
677         
678         res = __ip_set_testip(set,
679                               data + sizeof(struct ip_set_req_bind),
680                               size - sizeof(struct ip_set_req_bind),
681                               &ip);
682
683         DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
684         if (res >= 0)
685                 res = ip_set_hash_del(set->id, ip);
686
687         return res;
688 }
689
690 static int
691 ip_set_testbind(ip_set_id_t index,
692                 const void *data,
693                 size_t size)
694 {
695         struct ip_set *set = ip_set_list[index];
696         struct ip_set_req_bind *req_bind;
697         ip_set_id_t binding;
698         ip_set_ip_t ip;
699         int res;
700
701         IP_SET_ASSERT(set);
702         if (size < sizeof(struct ip_set_req_bind))
703                 return -EINVAL;
704                 
705         req_bind = (struct ip_set_req_bind *) data;
706         req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
707
708         if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
709                 /* Default binding of set */
710                 char *binding_name;
711                 
712                 if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
713                         return -EINVAL;
714
715                 binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); 
716                 binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
717
718                 binding = ip_set_find_byname(binding_name);
719                 if (binding == IP_SET_INVALID_ID)
720                         return -ENOENT;
721                 
722                 res = (set->binding == binding) ? -EEXIST : 0;
723
724                 return res;
725         }
726         binding = ip_set_find_byname(req_bind->binding);
727         if (binding == IP_SET_INVALID_ID)
728                 return -ENOENT;
729                 
730         
731         res = __ip_set_testip(set,
732                               data + sizeof(struct ip_set_req_bind),
733                               size - sizeof(struct ip_set_req_bind),
734                               &ip);
735         DP("set %s, ip: %u.%u.%u.%u, binding %s",
736            set->name, HIPQUAD(ip), ip_set_list[binding]->name);
737            
738         if (res >= 0)
739                 res = (ip_set_find_in_hash(set->id, ip) == binding)
740                         ? -EEXIST : 0;
741
742         return res;
743 }
744
745 static struct ip_set_type *
746 find_set_type_rlock(const char *typename)
747 {
748         struct ip_set_type *type;
749         
750         read_lock_bh(&ip_set_lock);
751         type = find_set_type(typename);
752         if (type == NULL)
753                 read_unlock_bh(&ip_set_lock);
754
755         return type;
756 }
757
758 static int
759 find_free_id(const char *name,
760              ip_set_id_t *index,
761              ip_set_id_t *id)
762 {
763         ip_set_id_t i;
764
765         *id = IP_SET_INVALID_ID;
766         for (i = 0;  i < ip_set_max; i++) {
767                 if (ip_set_list[i] == NULL) {
768                         if (*id == IP_SET_INVALID_ID)
769                                 *id = *index = i;
770                 } else if (strcmp(name, ip_set_list[i]->name) == 0)
771                         /* Name clash */
772                         return -EEXIST;
773         }
774         if (*id == IP_SET_INVALID_ID)
775                 /* No free slot remained */
776                 return -ERANGE;
777         /* Check that index is usable as id (swapping) */
778     check:      
779         for (i = 0;  i < ip_set_max; i++) {
780                 if (ip_set_list[i] != NULL
781                     && ip_set_list[i]->id == *id) {
782                     *id = i;
783                     goto check;
784                 }
785         }
786         return 0;
787 }
788
789 /*
790  * Create a set
791  */
792 static int
793 ip_set_create(const char *name,
794               const char *typename,
795               ip_set_id_t restore,
796               const void *data,
797               size_t size)
798 {
799         struct ip_set *set;
800         ip_set_id_t index, id;
801         int res = 0;
802
803         DP("setname: %s, typename: %s, id: %u", name, typename, restore);
804         /*
805          * First, and without any locks, allocate and initialize
806          * a normal base set structure.
807          */
808         set = kmalloc(sizeof(struct ip_set), GFP_KERNEL);
809         if (!set)
810                 return -ENOMEM;
811         set->lock = RW_LOCK_UNLOCKED;
812         strncpy(set->name, name, IP_SET_MAXNAMELEN);
813         set->binding = IP_SET_INVALID_ID;
814         atomic_set(&set->ref, 0);
815
816         /*
817          * Next, take the &ip_set_lock, check that we know the type,
818          * and take a reference on the type, to make sure it
819          * stays available while constructing our new set.
820          *
821          * After referencing the type, we drop the &ip_set_lock,
822          * and let the new set construction run without locks.
823          */
824         set->type = find_set_type_rlock(typename);
825         if (set->type == NULL) {
826                 /* Try loading the module */
827                 char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1];
828                 strcpy(modulename, "ip_set_");
829                 strcat(modulename, typename);
830                 DP("try to load %s", modulename);
831                 request_module(modulename);
832                 set->type = find_set_type_rlock(typename);
833         }
834         if (set->type == NULL) {
835                 ip_set_printk("no set type '%s', set '%s' not created",
836                               typename, name);
837                 res = -ENOENT;
838                 goto out;
839         }
840         if (!try_module_get(set->type->me)) {
841                 read_unlock_bh(&ip_set_lock);
842                 res = -EFAULT;
843                 goto out;
844         }
845         read_unlock_bh(&ip_set_lock);
846
847         /*
848          * Without holding any locks, create private part.
849          */
850         res = set->type->create(set, data, size);
851         if (res != 0)
852                 goto put_out;
853
854         /* BTW, res==0 here. */
855
856         /*
857          * Here, we have a valid, constructed set. &ip_set_lock again,
858          * find free id/index and check that it is not already in 
859          * ip_set_list.
860          */
861         write_lock_bh(&ip_set_lock);
862         if ((res = find_free_id(set->name, &index, &id)) != 0) {
863                 DP("no free id!");
864                 goto cleanup;
865         }
866
867         /* Make sure restore gets the same index */
868         if (restore != IP_SET_INVALID_ID && index != restore) {
869                 DP("Can't restore, sets are screwed up");
870                 res = -ERANGE;
871                 goto cleanup;
872         }
873          
874         /*
875          * Finally! Add our shiny new set to the list, and be done.
876          */
877         DP("create: '%s' created with index %u, id %u!", set->name, index, id);
878         set->id = id;
879         ip_set_list[index] = set;
880         write_unlock_bh(&ip_set_lock);
881         return res;
882         
883     cleanup:
884         write_unlock_bh(&ip_set_lock);
885         set->type->destroy(set);
886     put_out:
887         module_put(set->type->me);
888     out:
889         kfree(set);
890         return res;
891 }
892
893 /*
894  * Destroy a given existing set
895  */
896 static void
897 ip_set_destroy_set(ip_set_id_t index)
898 {
899         struct ip_set *set = ip_set_list[index];
900
901         IP_SET_ASSERT(set);
902         DP("set: %s",  set->name);
903         write_lock_bh(&ip_set_lock);
904         FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
905         if (set->binding != IP_SET_INVALID_ID)
906                 __ip_set_put(set->binding);
907         ip_set_list[index] = NULL;
908         write_unlock_bh(&ip_set_lock);
909
910         /* Must call it without holding any lock */
911         set->type->destroy(set);
912         module_put(set->type->me);
913         kfree(set);
914 }
915
916 /*
917  * Destroy a set - or all sets
918  * Sets must not be referenced/used.
919  */
920 static int
921 ip_set_destroy(ip_set_id_t index)
922 {
923         ip_set_id_t i;
924
925         /* ref modification always protected by the mutex */
926         if (index != IP_SET_INVALID_ID) {
927                 if (atomic_read(&ip_set_list[index]->ref))
928                         return -EBUSY;
929                 ip_set_destroy_set(index);
930         } else {
931                 for (i = 0; i < ip_set_max; i++) {
932                         if (ip_set_list[i] != NULL 
933                             && (atomic_read(&ip_set_list[i]->ref)))
934                                 return -EBUSY;
935                 }
936
937                 for (i = 0; i < ip_set_max; i++) {
938                         if (ip_set_list[i] != NULL)
939                                 ip_set_destroy_set(i);
940                 }
941         }
942         return 0;
943 }
944
945 static void
946 ip_set_flush_set(struct ip_set *set)
947 {
948         DP("set: %s %u",  set->name, set->id);
949
950         write_lock_bh(&set->lock);
951         set->type->flush(set);
952         write_unlock_bh(&set->lock);
953 }
954
955 /* 
956  * Flush data in a set - or in all sets
957  */
958 static int
959 ip_set_flush(ip_set_id_t index)
960 {
961         if (index != IP_SET_INVALID_ID) {
962                 IP_SET_ASSERT(ip_set_list[index]);
963                 ip_set_flush_set(ip_set_list[index]);
964         } else
965                 FOREACH_SET_DO(ip_set_flush_set);
966
967         return 0;
968 }
969
970 /* Rename a set */
971 static int
972 ip_set_rename(ip_set_id_t index, const char *name)
973 {
974         struct ip_set *set = ip_set_list[index];
975         ip_set_id_t i;
976         int res = 0;
977
978         DP("set: %s to %s",  set->name, name);
979         write_lock_bh(&ip_set_lock);
980         for (i = 0; i < ip_set_max; i++) {
981                 if (ip_set_list[i] != NULL
982                     && strncmp(ip_set_list[i]->name, 
983                                name,
984                                IP_SET_MAXNAMELEN - 1) == 0) {
985                         res = -EEXIST;
986                         goto unlock;
987                 }
988         }
989         strncpy(set->name, name, IP_SET_MAXNAMELEN);
990     unlock:
991         write_unlock_bh(&ip_set_lock);
992         return res;
993 }
994
995 /*
996  * Swap two sets so that name/index points to the other.
997  * References are also swapped.
998  */
999 static int
1000 ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index)
1001 {
1002         struct ip_set *from = ip_set_list[from_index];
1003         struct ip_set *to = ip_set_list[to_index];
1004         char from_name[IP_SET_MAXNAMELEN];
1005         u_int32_t from_ref;
1006
1007         DP("set: %s to %s",  from->name, to->name);
1008         /* Features must not change. Artifical restriction. */
1009         if (from->type->features != to->type->features)
1010                 return -ENOEXEC;
1011
1012         /* No magic here: ref munging protected by the mutex */ 
1013         write_lock_bh(&ip_set_lock);
1014         strncpy(from_name, from->name, IP_SET_MAXNAMELEN);
1015         from_ref = atomic_read(&from->ref);
1016
1017         strncpy(from->name, to->name, IP_SET_MAXNAMELEN);
1018         atomic_set(&from->ref, atomic_read(&to->ref));
1019         strncpy(to->name, from_name, IP_SET_MAXNAMELEN);
1020         atomic_set(&to->ref, from_ref);
1021         
1022         ip_set_list[from_index] = to;
1023         ip_set_list[to_index] = from;
1024         
1025         write_unlock_bh(&ip_set_lock);
1026         return 0;
1027 }
1028
1029 /*
1030  * List set data
1031  */
1032
1033 static inline void
1034 __set_hash_bindings_size_list(struct ip_set_hash *set_hash,
1035                               ip_set_id_t id, size_t *size)
1036 {
1037         if (set_hash->id == id)
1038                 *size += sizeof(struct ip_set_hash_list);
1039 }
1040
1041 static inline void
1042 __set_hash_bindings_size_save(struct ip_set_hash *set_hash,
1043                               ip_set_id_t id, size_t *size)
1044 {
1045         if (set_hash->id == id)
1046                 *size += sizeof(struct ip_set_hash_save);
1047 }
1048
1049 static inline void
1050 __set_hash_bindings(struct ip_set_hash *set_hash,
1051                     ip_set_id_t id, void *data, int *used)
1052 {
1053         if (set_hash->id == id) {
1054                 struct ip_set_hash_list *hash_list = 
1055                         (struct ip_set_hash_list *)(data + *used);
1056
1057                 hash_list->ip = set_hash->ip;
1058                 hash_list->binding = set_hash->binding;
1059                 *used += sizeof(struct ip_set_hash_list);
1060         }
1061 }
1062
1063 static int ip_set_list_set(ip_set_id_t index,
1064                            void *data,
1065                            int *used,
1066                            int len)
1067 {
1068         struct ip_set *set = ip_set_list[index];
1069         struct ip_set_list *set_list;
1070
1071         /* Pointer to our header */
1072         set_list = (struct ip_set_list *) (data + *used);
1073
1074         DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used);
1075
1076         /* Get and ensure header size */
1077         if (*used + sizeof(struct ip_set_list) > len)
1078                 goto not_enough_mem;
1079         *used += sizeof(struct ip_set_list);
1080
1081         read_lock_bh(&set->lock);
1082         /* Get and ensure set specific header size */
1083         set_list->header_size = set->type->header_size;
1084         if (*used + set_list->header_size > len)
1085                 goto unlock_set;
1086
1087         /* Fill in the header */
1088         set_list->index = index;
1089         set_list->binding = set->binding;
1090         set_list->ref = atomic_read(&set->ref);
1091
1092         /* Fill in set spefific header data */
1093         set->type->list_header(set, data + *used);
1094         *used += set_list->header_size;
1095
1096         /* Get and ensure set specific members size */
1097         set_list->members_size = set->type->list_members_size(set);
1098         if (*used + set_list->members_size > len)
1099                 goto unlock_set;
1100
1101         /* Fill in set spefific members data */
1102         set->type->list_members(set, data + *used);
1103         *used += set_list->members_size;
1104         read_unlock_bh(&set->lock);
1105
1106         /* Bindings */
1107
1108         /* Get and ensure set specific bindings size */
1109         set_list->bindings_size = 0;
1110         FOREACH_HASH_DO(__set_hash_bindings_size_list,
1111                         set->id, &set_list->bindings_size);
1112         if (*used + set_list->bindings_size > len)
1113                 goto not_enough_mem;
1114
1115         /* Fill in set spefific bindings data */
1116         FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used);
1117         
1118         return 0;
1119
1120     unlock_set:
1121         read_unlock_bh(&set->lock);
1122     not_enough_mem:
1123         DP("not enough mem, try again");
1124         return -EAGAIN;
1125 }
1126
1127 /*
1128  * Save sets
1129  */
1130 static int ip_set_save_set(ip_set_id_t index,
1131                            void *data,
1132                            int *used,
1133                            int len)
1134 {
1135         struct ip_set *set;
1136         struct ip_set_save *set_save;
1137
1138         /* Pointer to our header */
1139         set_save = (struct ip_set_save *) (data + *used);
1140
1141         /* Get and ensure header size */
1142         if (*used + sizeof(struct ip_set_save) > len)
1143                 goto not_enough_mem;
1144         *used += sizeof(struct ip_set_save);
1145
1146         set = ip_set_list[index];
1147         DP("set: %s, used: %u(%u) %p %p", set->name, *used, len, 
1148            data, data + *used);
1149
1150         read_lock_bh(&set->lock);
1151         /* Get and ensure set specific header size */
1152         set_save->header_size = set->type->header_size;
1153         if (*used + set_save->header_size > len)
1154                 goto unlock_set;
1155
1156         /* Fill in the header */
1157         set_save->index = index;
1158         set_save->binding = set->binding;
1159
1160         /* Fill in set spefific header data */
1161         set->type->list_header(set, data + *used);
1162         *used += set_save->header_size;
1163
1164         DP("set header filled: %s, used: %u %p %p", set->name, *used,
1165            data, data + *used);
1166         /* Get and ensure set specific members size */
1167         set_save->members_size = set->type->list_members_size(set);
1168         if (*used + set_save->members_size > len)
1169                 goto unlock_set;
1170
1171         /* Fill in set spefific members data */
1172         set->type->list_members(set, data + *used);
1173         *used += set_save->members_size;
1174         read_unlock_bh(&set->lock);
1175         DP("set members filled: %s, used: %u %p %p", set->name, *used,
1176            data, data + *used);
1177         return 0;
1178
1179     unlock_set:
1180         read_unlock_bh(&set->lock);
1181     not_enough_mem:
1182         DP("not enough mem, try again");
1183         return -EAGAIN;
1184 }
1185
1186 static inline void
1187 __set_hash_save_bindings(struct ip_set_hash *set_hash,
1188                          ip_set_id_t id,
1189                          void *data,
1190                          int *used,
1191                          int len,
1192                          int *res)
1193 {
1194         if (*res == 0
1195             && (id == IP_SET_INVALID_ID || set_hash->id == id)) {
1196                 struct ip_set_hash_save *hash_save = 
1197                         (struct ip_set_hash_save *)(data + *used);
1198                 /* Ensure bindings size */
1199                 if (*used + sizeof(struct ip_set_hash_save) > len) {
1200                         *res = -ENOMEM;
1201                         return;
1202                 }
1203                 hash_save->id = set_hash->id;
1204                 hash_save->ip = set_hash->ip;
1205                 hash_save->binding = set_hash->binding;
1206                 *used += sizeof(struct ip_set_hash_save);
1207         }
1208 }
1209
1210 static int ip_set_save_bindings(ip_set_id_t index,
1211                                 void *data,
1212                                 int *used,
1213                                 int len)
1214 {
1215         int res = 0;
1216         struct ip_set_save *set_save;
1217
1218         DP("used %u, len %u", *used, len);
1219         /* Get and ensure header size */
1220         if (*used + sizeof(struct ip_set_save) > len)
1221                 return -ENOMEM;
1222
1223         /* Marker */
1224         set_save = (struct ip_set_save *) (data + *used);
1225         set_save->index = IP_SET_INVALID_ID;
1226         *used += sizeof(struct ip_set_save);
1227
1228         DP("marker added used %u, len %u", *used, len);
1229         /* Fill in bindings data */
1230         if (index != IP_SET_INVALID_ID)
1231                 /* Sets are identified by id in hash */
1232                 index = ip_set_list[index]->id;
1233         FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res);
1234
1235         return res;     
1236 }
1237
1238 /*
1239  * Restore sets
1240  */
1241 static int ip_set_restore(void *data,
1242                           int len)
1243 {
1244         int res = 0;
1245         int line = 0, used = 0, members_size;
1246         struct ip_set *set;
1247         struct ip_set_hash_save *hash_save;
1248         struct ip_set_restore *set_restore;
1249         ip_set_id_t index;
1250
1251         /* Loop to restore sets */
1252         while (1) {
1253                 line++;
1254                 
1255                 DP("%u %u %u", used, sizeof(struct ip_set_restore), len);
1256                 /* Get and ensure header size */
1257                 if (used + sizeof(struct ip_set_restore) > len)
1258                         return line;
1259                 set_restore = (struct ip_set_restore *) (data + used);
1260                 used += sizeof(struct ip_set_restore);
1261
1262                 /* Ensure data size */
1263                 if (used 
1264                     + set_restore->header_size 
1265                     + set_restore->members_size > len)
1266                         return line;
1267
1268                 /* Check marker */
1269                 if (set_restore->index == IP_SET_INVALID_ID) {
1270                         line--;
1271                         goto bindings;
1272                 }
1273                 
1274                 /* Try to create the set */
1275                 DP("restore %s %s", set_restore->name, set_restore->typename);
1276                 res = ip_set_create(set_restore->name,
1277                                     set_restore->typename,
1278                                     set_restore->index,
1279                                     data + used,
1280                                     set_restore->header_size);
1281                 
1282                 if (res != 0)
1283                         return line;
1284                 used += set_restore->header_size;
1285
1286                 index = ip_set_find_byindex(set_restore->index);
1287                 DP("index %u, restore_index %u", index, set_restore->index);
1288                 if (index != set_restore->index)
1289                         return line;
1290                 /* Try to restore members data */
1291                 set = ip_set_list[index];
1292                 members_size = 0;
1293                 DP("members_size %u reqsize %u",
1294                    set_restore->members_size, set->type->reqsize);
1295                 while (members_size + set->type->reqsize <=
1296                        set_restore->members_size) {
1297                         line++;
1298                         DP("members: %u, line %u", members_size, line);
1299                         res = __ip_set_addip(index,
1300                                            data + used + members_size,
1301                                            set->type->reqsize);
1302                         if (!(res == 0 || res == -EEXIST)) 
1303                                 return line;
1304                         members_size += set->type->reqsize;
1305                 }
1306
1307                 DP("members_size %u  %u",
1308                    set_restore->members_size, members_size);
1309                 if (members_size != set_restore->members_size)
1310                         return line++;
1311                 used += set_restore->members_size;              
1312         }
1313         
1314    bindings:
1315         /* Loop to restore bindings */
1316         while (used < len) {
1317                 line++;
1318
1319                 DP("restore binding, line %u", line);           
1320                 /* Get and ensure size */
1321                 if (used + sizeof(struct ip_set_hash_save) > len)
1322                         return line;
1323                 hash_save = (struct ip_set_hash_save *) (data + used);
1324                 used += sizeof(struct ip_set_hash_save);
1325                 
1326                 /* hash_save->id is used to store the index */
1327                 index = ip_set_find_byindex(hash_save->id);
1328                 DP("restore binding index %u, id %u, %u -> %u",
1329                    index, hash_save->id, hash_save->ip, hash_save->binding);            
1330                 if (index != hash_save->id)
1331                         return line;
1332                         
1333                 set = ip_set_list[hash_save->id];
1334                 /* Null valued IP means default binding */
1335                 if (hash_save->ip)
1336                         res = ip_set_hash_add(set->id, 
1337                                               hash_save->ip,
1338                                               hash_save->binding);
1339                 else {
1340                         IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID);
1341                         write_lock_bh(&ip_set_lock);
1342                         set->binding = hash_save->binding;
1343                         __ip_set_get(set->binding);
1344                         write_unlock_bh(&ip_set_lock);
1345                         DP("default binding: %u", set->binding);
1346                 }
1347                 if (res != 0)
1348                         return line;
1349         }
1350         if (used != len)
1351                 return line;
1352         
1353         return 0;       
1354 }
1355
1356 static int
1357 ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len)
1358 {
1359         void *data;
1360         int res = 0;            /* Assume OK */
1361         unsigned *op;
1362         struct ip_set_req_adt *req_adt;
1363         ip_set_id_t index = IP_SET_INVALID_ID;
1364         int (*adtfn)(ip_set_id_t index,
1365                      const void *data, size_t size);
1366         struct fn_table {
1367                 int (*fn)(ip_set_id_t index,
1368                           const void *data, size_t size);
1369         } adtfn_table[] =
1370         { { ip_set_addip }, { ip_set_delip }, { ip_set_testip},
1371           { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind },
1372         };
1373
1374         DP("optval=%d, user=%p, len=%d", optval, user, len);
1375         if (!capable(CAP_NET_ADMIN))
1376                 return -EPERM;
1377         if (optval != SO_IP_SET)
1378                 return -EBADF;
1379         if (len <= sizeof(unsigned)) {
1380                 ip_set_printk("short userdata (want >%zu, got %u)",
1381                               sizeof(unsigned), len);
1382                 return -EINVAL;
1383         }
1384         data = vmalloc(len);
1385         if (!data) {
1386                 DP("out of mem for %u bytes", len);
1387                 return -ENOMEM;
1388         }
1389         if (copy_from_user(data, user, len) != 0) {
1390                 res = -EFAULT;
1391                 goto done;
1392         }
1393         if (down_interruptible(&ip_set_app_mutex)) {
1394                 res = -EINTR;
1395                 goto done;
1396         }
1397
1398         op = (unsigned *)data;
1399         DP("op=%x", *op);
1400         
1401         if (*op < IP_SET_OP_VERSION) {
1402                 /* Check the version at the beginning of operations */
1403                 struct ip_set_req_version *req_version =
1404                         (struct ip_set_req_version *) data;
1405                 if (req_version->version != IP_SET_PROTOCOL_VERSION) {
1406                         res = -EPROTO;
1407                         goto done;
1408                 }
1409         }
1410
1411         switch (*op) {
1412         case IP_SET_OP_CREATE:{
1413                 struct ip_set_req_create *req_create
1414                         = (struct ip_set_req_create *) data;
1415                 
1416                 if (len <= sizeof(struct ip_set_req_create)) {
1417                         ip_set_printk("short CREATE data (want >%zu, got %u)",
1418                                       sizeof(struct ip_set_req_create), len);
1419                         res = -EINVAL;
1420                         goto done;
1421                 }
1422                 req_create->name[IP_SET_MAXNAMELEN - 1] = '\0';
1423                 req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1424                 res = ip_set_create(req_create->name,
1425                                     req_create->typename,
1426                                     IP_SET_INVALID_ID,
1427                                     data + sizeof(struct ip_set_req_create),
1428                                     len - sizeof(struct ip_set_req_create));
1429                 goto done;
1430         }
1431         case IP_SET_OP_DESTROY:{
1432                 struct ip_set_req_std *req_destroy
1433                         = (struct ip_set_req_std *) data;
1434                 
1435                 if (len != sizeof(struct ip_set_req_std)) {
1436                         ip_set_printk("invalid DESTROY data (want %zu, got %u)",
1437                                       sizeof(struct ip_set_req_std), len);
1438                         res = -EINVAL;
1439                         goto done;
1440                 }
1441                 if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) {
1442                         /* Destroy all sets */
1443                         index = IP_SET_INVALID_ID;
1444                 } else {
1445                         req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0';
1446                         index = ip_set_find_byname(req_destroy->name);
1447
1448                         if (index == IP_SET_INVALID_ID) {
1449                                 res = -ENOENT;
1450                                 goto done;
1451                         }
1452                 }
1453                         
1454                 res = ip_set_destroy(index);
1455                 goto done;
1456         }
1457         case IP_SET_OP_FLUSH:{
1458                 struct ip_set_req_std *req_flush =
1459                         (struct ip_set_req_std *) data;
1460
1461                 if (len != sizeof(struct ip_set_req_std)) {
1462                         ip_set_printk("invalid FLUSH data (want %zu, got %u)",
1463                                       sizeof(struct ip_set_req_std), len);
1464                         res = -EINVAL;
1465                         goto done;
1466                 }
1467                 if (strcmp(req_flush->name, IPSET_TOKEN_ALL) == 0) {
1468                         /* Flush all sets */
1469                         index = IP_SET_INVALID_ID;
1470                 } else {
1471                         req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0';
1472                         index = ip_set_find_byname(req_flush->name);
1473
1474                         if (index == IP_SET_INVALID_ID) {
1475                                 res = -ENOENT;
1476                                 goto done;
1477                         }
1478                 }
1479                 res = ip_set_flush(index);
1480                 goto done;
1481         }
1482         case IP_SET_OP_RENAME:{
1483                 struct ip_set_req_create *req_rename
1484                         = (struct ip_set_req_create *) data;
1485
1486                 if (len != sizeof(struct ip_set_req_create)) {
1487                         ip_set_printk("invalid RENAME data (want %zu, got %u)",
1488                                       sizeof(struct ip_set_req_create), len);
1489                         res = -EINVAL;
1490                         goto done;
1491                 }
1492
1493                 req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0';
1494                 req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1495                         
1496                 index = ip_set_find_byname(req_rename->name);
1497                 if (index == IP_SET_INVALID_ID) {
1498                         res = -ENOENT;
1499                         goto done;
1500                 }
1501                 res = ip_set_rename(index, req_rename->typename);
1502                 goto done;
1503         }
1504         case IP_SET_OP_SWAP:{
1505                 struct ip_set_req_create *req_swap
1506                         = (struct ip_set_req_create *) data;
1507                 ip_set_id_t to_index;
1508
1509                 if (len != sizeof(struct ip_set_req_create)) {
1510                         ip_set_printk("invalid SWAP data (want %zu, got %u)",
1511                                       sizeof(struct ip_set_req_create), len);
1512                         res = -EINVAL;
1513                         goto done;
1514                 }
1515
1516                 req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0';
1517                 req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0';
1518
1519                 index = ip_set_find_byname(req_swap->name);
1520                 if (index == IP_SET_INVALID_ID) {
1521                         res = -ENOENT;
1522                         goto done;
1523                 }
1524                 to_index = ip_set_find_byname(req_swap->typename);
1525                 if (to_index == IP_SET_INVALID_ID) {
1526                         res = -ENOENT;
1527                         goto done;
1528                 }
1529                 res = ip_set_swap(index, to_index);
1530                 goto done;
1531         }
1532         default: 
1533                 break;  /* Set identified by id */
1534         }
1535         
1536         /* There we may have add/del/test/bind/unbind/test_bind operations */
1537         if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_BIND_SET) {
1538                 res = -EBADMSG;
1539                 goto done;
1540         }
1541         adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn;
1542
1543         if (len < sizeof(struct ip_set_req_adt)) {
1544                 ip_set_printk("short data in adt request (want >=%zu, got %u)",
1545                               sizeof(struct ip_set_req_adt), len);
1546                 res = -EINVAL;
1547                 goto done;
1548         }
1549         req_adt = (struct ip_set_req_adt *) data;
1550
1551         /* -U :all: :all:|:default: uses IP_SET_INVALID_ID */
1552         if (!(*op == IP_SET_OP_UNBIND_SET 
1553               && req_adt->index == IP_SET_INVALID_ID)) {
1554                 index = ip_set_find_byindex(req_adt->index);
1555                 if (index == IP_SET_INVALID_ID) {
1556                         res = -ENOENT;
1557                         goto done;
1558                 }
1559         }
1560         res = adtfn(index, data, len);
1561
1562     done:
1563         up(&ip_set_app_mutex);
1564         vfree(data);
1565         if (res > 0)
1566                 res = 0;
1567         DP("final result %d", res);
1568         return res;
1569 }
1570
1571 static int 
1572 ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len)
1573 {
1574         int res = 0;
1575         unsigned *op;
1576         ip_set_id_t index = IP_SET_INVALID_ID;
1577         void *data;
1578         int copylen = *len;
1579
1580         DP("optval=%d, user=%p, len=%d", optval, user, *len);
1581         if (!capable(CAP_NET_ADMIN))
1582                 return -EPERM;
1583         if (optval != SO_IP_SET)
1584                 return -EBADF;
1585         if (*len < sizeof(unsigned)) {
1586                 ip_set_printk("short userdata (want >=%zu, got %d)",
1587                               sizeof(unsigned), *len);
1588                 return -EINVAL;
1589         }
1590         data = vmalloc(*len);
1591         if (!data) {
1592                 DP("out of mem for %d bytes", *len);
1593                 return -ENOMEM;
1594         }
1595         if (copy_from_user(data, user, *len) != 0) {
1596                 res = -EFAULT;
1597                 goto done;
1598         }
1599         if (down_interruptible(&ip_set_app_mutex)) {
1600                 res = -EINTR;
1601                 goto done;
1602         }
1603
1604         op = (unsigned *) data;
1605         DP("op=%x", *op);
1606
1607         if (*op < IP_SET_OP_VERSION) {
1608                 /* Check the version at the beginning of operations */
1609                 struct ip_set_req_version *req_version =
1610                         (struct ip_set_req_version *) data;
1611                 if (req_version->version != IP_SET_PROTOCOL_VERSION) {
1612                         res = -EPROTO;
1613                         goto done;
1614                 }
1615         }
1616
1617         switch (*op) {
1618         case IP_SET_OP_VERSION: {
1619                 struct ip_set_req_version *req_version =
1620                     (struct ip_set_req_version *) data;
1621
1622                 if (*len != sizeof(struct ip_set_req_version)) {
1623                         ip_set_printk("invalid VERSION (want %zu, got %d)",
1624                                       sizeof(struct ip_set_req_version),
1625                                       *len);
1626                         res = -EINVAL;
1627                         goto done;
1628                 }
1629
1630                 req_version->version = IP_SET_PROTOCOL_VERSION;
1631                 res = copy_to_user(user, req_version,
1632                                    sizeof(struct ip_set_req_version));
1633                 goto done;
1634         }
1635         case IP_SET_OP_GET_BYNAME: {
1636                 struct ip_set_req_get_set *req_get
1637                         = (struct ip_set_req_get_set *) data;
1638
1639                 if (*len != sizeof(struct ip_set_req_get_set)) {
1640                         ip_set_printk("invalid GET_BYNAME (want %zu, got %d)",
1641                                       sizeof(struct ip_set_req_get_set), *len);
1642                         res = -EINVAL;
1643                         goto done;
1644                 }
1645                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1646                 index = ip_set_find_byname(req_get->set.name);
1647                 req_get->set.index = index;
1648                 goto copy;
1649         }
1650         case IP_SET_OP_GET_BYINDEX: {
1651                 struct ip_set_req_get_set *req_get
1652                         = (struct ip_set_req_get_set *) data;
1653
1654                 if (*len != sizeof(struct ip_set_req_get_set)) {
1655                         ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)",
1656                                       sizeof(struct ip_set_req_get_set), *len);
1657                         res = -EINVAL;
1658                         goto done;
1659                 }
1660                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1661                 index = ip_set_find_byindex(req_get->set.index);
1662                 strncpy(req_get->set.name,
1663                         index == IP_SET_INVALID_ID ? ""
1664                         : ip_set_list[index]->name, IP_SET_MAXNAMELEN);
1665                 goto copy;
1666         }
1667         case IP_SET_OP_ADT_GET: {
1668                 struct ip_set_req_adt_get *req_get
1669                         = (struct ip_set_req_adt_get *) data;
1670
1671                 if (*len != sizeof(struct ip_set_req_adt_get)) {
1672                         ip_set_printk("invalid ADT_GET (want %zu, got %d)",
1673                                       sizeof(struct ip_set_req_adt_get), *len);
1674                         res = -EINVAL;
1675                         goto done;
1676                 }
1677                 req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1678                 index = ip_set_find_byname(req_get->set.name);
1679                 if (index != IP_SET_INVALID_ID) {
1680                         req_get->set.index = index;
1681                         strncpy(req_get->typename,
1682                                 ip_set_list[index]->type->typename,
1683                                 IP_SET_MAXNAMELEN - 1);
1684                 } else {
1685                         res = -ENOENT;
1686                         goto done;
1687                 }
1688                 goto copy;
1689         }
1690         case IP_SET_OP_MAX_SETS: {
1691                 struct ip_set_req_max_sets *req_max_sets
1692                         = (struct ip_set_req_max_sets *) data;
1693                 ip_set_id_t i;
1694
1695                 if (*len != sizeof(struct ip_set_req_max_sets)) {
1696                         ip_set_printk("invalid MAX_SETS (want %zu, got %d)",
1697                                       sizeof(struct ip_set_req_max_sets), *len);
1698                         res = -EINVAL;
1699                         goto done;
1700                 }
1701
1702                 if (strcmp(req_max_sets->set.name, IPSET_TOKEN_ALL) == 0) {
1703                         req_max_sets->set.index = IP_SET_INVALID_ID;
1704                 } else {
1705                         req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0';
1706                         req_max_sets->set.index = 
1707                                 ip_set_find_byname(req_max_sets->set.name);
1708                         if (req_max_sets->set.index == IP_SET_INVALID_ID) {
1709                                 res = -ENOENT;
1710                                 goto done;
1711                         }
1712                 }
1713                 req_max_sets->max_sets = ip_set_max;
1714                 req_max_sets->sets = 0;
1715                 for (i = 0; i < ip_set_max; i++) {
1716                         if (ip_set_list[i] != NULL)
1717                                 req_max_sets->sets++;
1718                 }
1719                 goto copy;
1720         }
1721         case IP_SET_OP_LIST_SIZE: 
1722         case IP_SET_OP_SAVE_SIZE: {
1723                 struct ip_set_req_setnames *req_setnames
1724                         = (struct ip_set_req_setnames *) data;
1725                 struct ip_set_name_list *name_list;
1726                 struct ip_set *set;
1727                 ip_set_id_t i;
1728                 int used;
1729
1730                 if (*len < sizeof(struct ip_set_req_setnames)) {
1731                         ip_set_printk("short LIST_SIZE (want >=%zu, got %d)",
1732                                       sizeof(struct ip_set_req_setnames), *len);
1733                         res = -EINVAL;
1734                         goto done;
1735                 }
1736
1737                 req_setnames->size = 0;
1738                 used = sizeof(struct ip_set_req_setnames);
1739                 for (i = 0; i < ip_set_max; i++) {
1740                         if (ip_set_list[i] == NULL)
1741                                 continue;
1742                         name_list = (struct ip_set_name_list *) 
1743                                 (data + used);
1744                         used += sizeof(struct ip_set_name_list);
1745                         if (used > copylen) {
1746                                 res = -EAGAIN;
1747                                 goto done;
1748                         }
1749                         set = ip_set_list[i];
1750                         /* Fill in index, name, etc. */
1751                         name_list->index = i;
1752                         name_list->id = set->id;
1753                         strncpy(name_list->name,
1754                                 set->name,
1755                                 IP_SET_MAXNAMELEN - 1);
1756                         strncpy(name_list->typename,
1757                                 set->type->typename,
1758                                 IP_SET_MAXNAMELEN - 1);
1759                         DP("filled %s of type %s, index %u\n",
1760                            name_list->name, name_list->typename,
1761                            name_list->index);
1762                         if (!(req_setnames->index == IP_SET_INVALID_ID
1763                               || req_setnames->index == i))
1764                               continue;
1765                         /* Update size */
1766                         switch (*op) {
1767                         case IP_SET_OP_LIST_SIZE: {
1768                                 req_setnames->size += sizeof(struct ip_set_list)
1769                                         + set->type->header_size
1770                                         + set->type->list_members_size(set);
1771                                 FOREACH_HASH_DO(__set_hash_bindings_size_list, 
1772                                                 i, &req_setnames->size);
1773                                 break;
1774                         }
1775                         case IP_SET_OP_SAVE_SIZE: {
1776                                 req_setnames->size += sizeof(struct ip_set_save)
1777                                         + set->type->header_size
1778                                         + set->type->list_members_size(set);
1779                                 FOREACH_HASH_DO(__set_hash_bindings_size_save,
1780                                                 i, &req_setnames->size);
1781                                 break;
1782                         }
1783                         default:
1784                                 break;
1785                         }
1786                 }
1787                 if (copylen != used) {
1788                         res = -EAGAIN;
1789                         goto done;
1790                 }
1791                 goto copy;
1792         }
1793         case IP_SET_OP_LIST: {
1794                 struct ip_set_req_list *req_list
1795                         = (struct ip_set_req_list *) data;
1796                 ip_set_id_t i;
1797                 int used;
1798
1799                 if (*len < sizeof(struct ip_set_req_list)) {
1800                         ip_set_printk("short LIST (want >=%zu, got %d)",
1801                                       sizeof(struct ip_set_req_list), *len);
1802                         res = -EINVAL;
1803                         goto done;
1804                 }
1805                 index = req_list->index;
1806                 if (index != IP_SET_INVALID_ID
1807                     && ip_set_find_byindex(index) != index) {
1808                         res = -ENOENT;
1809                         goto done;
1810                 }
1811                 used = 0;
1812                 if (index == IP_SET_INVALID_ID) {
1813                         /* List all sets */
1814                         for (i = 0; i < ip_set_max && res == 0; i++) {
1815                                 if (ip_set_list[i] != NULL)
1816                                         res = ip_set_list_set(i, data, &used, *len);
1817                         }
1818                 } else {
1819                         /* List an individual set */
1820                         res = ip_set_list_set(index, data, &used, *len);
1821                 }
1822                 if (res != 0)
1823                         goto done;
1824                 else if (copylen != used) {
1825                         res = -EAGAIN;
1826                         goto done;
1827                 }
1828                 goto copy;
1829         }
1830         case IP_SET_OP_SAVE: {
1831                 struct ip_set_req_list *req_save
1832                         = (struct ip_set_req_list *) data;
1833                 ip_set_id_t i;
1834                 int used;
1835
1836                 if (*len < sizeof(struct ip_set_req_list)) {
1837                         ip_set_printk("short SAVE (want >=%zu, got %d)",
1838                                       sizeof(struct ip_set_req_list), *len);
1839                         res = -EINVAL;
1840                         goto done;
1841                 }
1842                 index = req_save->index;
1843                 if (index != IP_SET_INVALID_ID
1844                     && ip_set_find_byindex(index) != index) {
1845                         res = -ENOENT;
1846                         goto done;
1847                 }
1848                 used = 0;
1849                 if (index == IP_SET_INVALID_ID) {
1850                         /* Save all sets */
1851                         for (i = 0; i < ip_set_max && res == 0; i++) {
1852                                 if (ip_set_list[i] != NULL)
1853                                         res = ip_set_save_set(i, data, &used, *len);
1854                         }
1855                 } else {
1856                         /* Save an individual set */
1857                         res = ip_set_save_set(index, data, &used, *len);
1858                 }
1859                 if (res == 0)
1860                         res = ip_set_save_bindings(index, data, &used, *len);
1861                         
1862                 if (res != 0)
1863                         goto done;
1864                 else if (copylen != used) {
1865                         res = -EAGAIN;
1866                         goto done;
1867                 }
1868                 goto copy;
1869         }
1870         case IP_SET_OP_RESTORE: {
1871                 struct ip_set_req_setnames *req_restore
1872                         = (struct ip_set_req_setnames *) data;
1873                 int line;
1874
1875                 if (*len < sizeof(struct ip_set_req_setnames)
1876                     || *len != req_restore->size) {
1877                         ip_set_printk("invalid RESTORE (want =%zu, got %d)",
1878                                       req_restore->size, *len);
1879                         res = -EINVAL;
1880                         goto done;
1881                 }
1882                 line = ip_set_restore(data + sizeof(struct ip_set_req_setnames),
1883                                       req_restore->size - sizeof(struct ip_set_req_setnames));
1884                 DP("ip_set_restore: %u", line);
1885                 if (line != 0) {
1886                         res = -EAGAIN;
1887                         req_restore->size = line;
1888                         copylen = sizeof(struct ip_set_req_setnames);
1889                         goto copy;
1890                 }
1891                 goto done;
1892         }
1893         default:
1894                 res = -EBADMSG;
1895                 goto done;
1896         }       /* end of switch(op) */
1897
1898     copy:
1899         DP("set %s, copylen %u", index != IP_SET_INVALID_ID
1900                                  && ip_set_list[index]
1901                      ? ip_set_list[index]->name
1902                      : ":all:", copylen);
1903         res = copy_to_user(user, data, copylen);
1904         
1905     done:
1906         up(&ip_set_app_mutex);
1907         vfree(data);
1908         if (res > 0)
1909                 res = 0;
1910         DP("final result %d", res);
1911         return res;
1912 }
1913
1914 static struct nf_sockopt_ops so_set = {
1915         .pf             = PF_INET,
1916         .set_optmin     = SO_IP_SET,
1917         .set_optmax     = SO_IP_SET + 1,
1918         .set            = &ip_set_sockfn_set,
1919         .get_optmin     = SO_IP_SET,
1920         .get_optmax     = SO_IP_SET + 1,
1921         .get            = &ip_set_sockfn_get,
1922         .use            = 0
1923 };
1924
1925 static int max_sets, hash_size;
1926 module_param(max_sets, int, 0600);
1927 MODULE_PARM_DESC(max_sets, "maximal number of sets");
1928 module_param(hash_size, int, 0600);
1929 MODULE_PARM_DESC(hash_size, "hash size for bindings");
1930 MODULE_LICENSE("GPL");
1931 MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
1932 MODULE_DESCRIPTION("module implementing core IP set support");
1933
1934 static int __init init(void)
1935 {
1936         int res;
1937         ip_set_id_t i;
1938
1939         get_random_bytes(&ip_set_hash_random, 4);
1940         if (max_sets)
1941                 ip_set_max = max_sets;
1942         ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max);
1943         if (!ip_set_list) {
1944                 printk(KERN_ERR "Unable to create ip_set_list\n");
1945                 return -ENOMEM;
1946         }
1947         memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max);
1948         if (hash_size)
1949                 ip_set_bindings_hash_size = hash_size;
1950         ip_set_hash = vmalloc(sizeof(struct list_head) * ip_set_bindings_hash_size);
1951         if (!ip_set_hash) {
1952                 printk(KERN_ERR "Unable to create ip_set_hash\n");
1953                 vfree(ip_set_list);
1954                 return -ENOMEM;
1955         }
1956         for (i = 0; i < ip_set_bindings_hash_size; i++)
1957                 INIT_LIST_HEAD(&ip_set_hash[i]);
1958
1959         INIT_LIST_HEAD(&set_type_list);
1960
1961         res = nf_register_sockopt(&so_set);
1962         if (res != 0) {
1963                 ip_set_printk("SO_SET registry failed: %d", res);
1964                 vfree(ip_set_list);
1965                 vfree(ip_set_hash);
1966                 return res;
1967         }
1968         return 0;
1969 }
1970
1971 static void __exit fini(void)
1972 {
1973         /* There can't be any existing set or binding */
1974         nf_unregister_sockopt(&so_set);
1975         vfree(ip_set_list);
1976         vfree(ip_set_hash);
1977         DP("these are the famous last words");
1978 }
1979
1980 EXPORT_SYMBOL(ip_set_register_set_type);
1981 EXPORT_SYMBOL(ip_set_unregister_set_type);
1982
1983 EXPORT_SYMBOL(ip_set_get_byname);
1984 EXPORT_SYMBOL(ip_set_get_byindex);
1985 EXPORT_SYMBOL(ip_set_put);
1986
1987 EXPORT_SYMBOL(ip_set_addip_kernel);
1988 EXPORT_SYMBOL(ip_set_delip_kernel);
1989 EXPORT_SYMBOL(ip_set_testip_kernel);
1990
1991 module_init(init);
1992 module_exit(fini);