vserver 1.9.3
[linux-2.6.git] / net / ipv6 / netfilter / ip6_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
15  *      - new extension header parser code
16  */
17 #include <linux/config.h>
18 #include <linux/skbuff.h>
19 #include <linux/kmod.h>
20 #include <linux/vmalloc.h>
21 #include <linux/netdevice.h>
22 #include <linux/module.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/icmpv6.h>
26 #include <net/ip.h>
27 #include <net/ipv6.h>
28 #include <asm/uaccess.h>
29 #include <asm/semaphore.h>
30 #include <linux/proc_fs.h>
31
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv6 packet filter");
37
38 #define IPV6_HDR_LEN    (sizeof(struct ipv6hdr))
39 #define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
40
41 /*#define DEBUG_IP_FIREWALL*/
42 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
43 /*#define DEBUG_IP_FIREWALL_USER*/
44
45 #ifdef DEBUG_IP_FIREWALL
46 #define dprintf(format, args...)  printk(format , ## args)
47 #else
48 #define dprintf(format, args...)
49 #endif
50
51 #ifdef DEBUG_IP_FIREWALL_USER
52 #define duprintf(format, args...) printk(format , ## args)
53 #else
54 #define duprintf(format, args...)
55 #endif
56
57 #ifdef CONFIG_NETFILTER_DEBUG
58 #define IP_NF_ASSERT(x)                                         \
59 do {                                                            \
60         if (!(x))                                               \
61                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
62                        __FUNCTION__, __FILE__, __LINE__);       \
63 } while(0)
64 #else
65 #define IP_NF_ASSERT(x)
66 #endif
67 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
68
69 static DECLARE_MUTEX(ip6t_mutex);
70
71 /* Must have mutex */
72 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
73 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
74 #include <linux/netfilter_ipv4/lockhelp.h>
75 #include <linux/netfilter_ipv4/listhelp.h>
76
77 #if 0
78 /* All the better to debug you with... */
79 #define static
80 #define inline
81 #endif
82
83 /* Locking is simple: we assume at worst case there will be one packet
84    in user context and one from bottom halves (or soft irq if Alexey's
85    softnet patch was applied).
86
87    We keep a set of rules for each CPU, so we can avoid write-locking
88    them; doing a readlock_bh() stops packets coming through if we're
89    in user context.
90
91    To be cache friendly on SMP, we arrange them like so:
92    [ n-entries ]
93    ... cache-align padding ...
94    [ n-entries ]
95
96    Hence the start of any table is given by get_table() below.  */
97
98 /* The table itself */
99 struct ip6t_table_info
100 {
101         /* Size per table */
102         unsigned int size;
103         /* Number of entries: FIXME. --RR */
104         unsigned int number;
105         /* Initial number of entries. Needed for module usage count */
106         unsigned int initial_entries;
107
108         /* Entry points and underflows */
109         unsigned int hook_entry[NF_IP6_NUMHOOKS];
110         unsigned int underflow[NF_IP6_NUMHOOKS];
111
112         /* ip6t_entry tables: one per CPU */
113         char entries[0] ____cacheline_aligned;
114 };
115
116 static LIST_HEAD(ip6t_target);
117 static LIST_HEAD(ip6t_match);
118 static LIST_HEAD(ip6t_tables);
119 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
120
121 #ifdef CONFIG_SMP
122 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
123 #else
124 #define TABLE_OFFSET(t,p) 0
125 #endif
126
127 #if 0
128 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
129 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
130 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
131 #endif
132
133 static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
134                               struct in6_addr addr2)
135 {
136         int i;
137         for( i = 0; i < 16; i++){
138                 if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
139                    (addr2.s6_addr[i] & mask.s6_addr[i]))
140                         return 1;
141         }
142         return 0;
143 }
144
145 /* Check for an extension */
146 int 
147 ip6t_ext_hdr(u8 nexthdr)
148 {
149         return ( (nexthdr == IPPROTO_HOPOPTS)   ||
150                  (nexthdr == IPPROTO_ROUTING)   ||
151                  (nexthdr == IPPROTO_FRAGMENT)  ||
152                  (nexthdr == IPPROTO_ESP)       ||
153                  (nexthdr == IPPROTO_AH)        ||
154                  (nexthdr == IPPROTO_NONE)      ||
155                  (nexthdr == IPPROTO_DSTOPTS) );
156 }
157
158 /* Returns whether matches rule or not. */
159 static inline int
160 ip6_packet_match(const struct sk_buff *skb,
161                  const struct ipv6hdr *ipv6,
162                  const char *indev,
163                  const char *outdev,
164                  const struct ip6t_ip6 *ip6info,
165                  int isfrag)
166 {
167         size_t i;
168         unsigned long ret;
169
170 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
171
172         if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
173                   IP6T_INV_SRCIP)
174             || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
175                      IP6T_INV_DSTIP)) {
176                 dprintf("Source or dest mismatch.\n");
177 /*
178                 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
179                         ipinfo->smsk.s_addr, ipinfo->src.s_addr,
180                         ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
181                 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
182                         ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
183                         ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
184                 return 0;
185         }
186
187         /* Look for ifname matches; this should unroll nicely. */
188         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
189                 ret |= (((const unsigned long *)indev)[i]
190                         ^ ((const unsigned long *)ip6info->iniface)[i])
191                         & ((const unsigned long *)ip6info->iniface_mask)[i];
192         }
193
194         if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
195                 dprintf("VIA in mismatch (%s vs %s).%s\n",
196                         indev, ip6info->iniface,
197                         ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
198                 return 0;
199         }
200
201         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
202                 ret |= (((const unsigned long *)outdev)[i]
203                         ^ ((const unsigned long *)ip6info->outiface)[i])
204                         & ((const unsigned long *)ip6info->outiface_mask)[i];
205         }
206
207         if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
208                 dprintf("VIA out mismatch (%s vs %s).%s\n",
209                         outdev, ip6info->outiface,
210                         ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
211                 return 0;
212         }
213
214 /* ... might want to do something with class and flowlabel here ... */
215
216         /* look for the desired protocol header */
217         if((ip6info->flags & IP6T_F_PROTO)) {
218                 u_int8_t currenthdr = ipv6->nexthdr;
219                 struct ipv6_opt_hdr *hdrptr;
220                 u_int16_t ptr;          /* Header offset in skb */
221                 u_int16_t hdrlen;       /* Header */
222
223                 ptr = IPV6_HDR_LEN;
224
225                 while (ip6t_ext_hdr(currenthdr)) {
226                         /* Is there enough space for the next ext header? */
227                         if (skb->len - ptr < IPV6_OPTHDR_LEN)
228                                 return 0;
229
230                         /* NONE or ESP: there isn't protocol part */
231                         /* If we want to count these packets in '-p all',
232                          * we will change the return 0 to 1*/
233                         if ((currenthdr == IPPROTO_NONE) || 
234                                 (currenthdr == IPPROTO_ESP))
235                                 return 0;
236
237                         hdrptr = (struct ipv6_opt_hdr *)(skb->data + ptr);
238
239                         /* Size calculation */
240                         if (currenthdr == IPPROTO_FRAGMENT) {
241                                 hdrlen = 8;
242                         } else if (currenthdr == IPPROTO_AH)
243                                 hdrlen = (hdrptr->hdrlen+2)<<2;
244                         else
245                                 hdrlen = ipv6_optlen(hdrptr);
246
247                         currenthdr = hdrptr->nexthdr;
248                         ptr += hdrlen;
249                         /* ptr is too large */
250                         if ( ptr > skb->len ) 
251                                 return 0;
252                 }
253
254                 /* currenthdr contains the protocol header */
255
256                 dprintf("Packet protocol %hi ?= %s%hi.\n",
257                                 currenthdr, 
258                                 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
259                                 ip6info->proto);
260
261                 if (ip6info->proto == currenthdr) {
262                         if(ip6info->invflags & IP6T_INV_PROTO) {
263                                 return 0;
264                         }
265                         return 1;
266                 }
267
268                 /* We need match for the '-p all', too! */
269                 if ((ip6info->proto != 0) &&
270                         !(ip6info->invflags & IP6T_INV_PROTO))
271                         return 0;
272         }
273         return 1;
274 }
275
276 /* should be ip6 safe */
277 static inline int 
278 ip6_checkentry(const struct ip6t_ip6 *ipv6)
279 {
280         if (ipv6->flags & ~IP6T_F_MASK) {
281                 duprintf("Unknown flag bits set: %08X\n",
282                          ipv6->flags & ~IP6T_F_MASK);
283                 return 0;
284         }
285         if (ipv6->invflags & ~IP6T_INV_MASK) {
286                 duprintf("Unknown invflag bits set: %08X\n",
287                          ipv6->invflags & ~IP6T_INV_MASK);
288                 return 0;
289         }
290         return 1;
291 }
292
293 static unsigned int
294 ip6t_error(struct sk_buff **pskb,
295           unsigned int hooknum,
296           const struct net_device *in,
297           const struct net_device *out,
298           const void *targinfo,
299           void *userinfo)
300 {
301         if (net_ratelimit())
302                 printk("ip6_tables: error: `%s'\n", (char *)targinfo);
303
304         return NF_DROP;
305 }
306
307 static inline
308 int do_match(struct ip6t_entry_match *m,
309              const struct sk_buff *skb,
310              const struct net_device *in,
311              const struct net_device *out,
312              int offset,
313              const void *hdr,
314              u_int16_t datalen,
315              int *hotdrop)
316 {
317         /* Stop iteration if it doesn't match */
318         if (!m->u.kernel.match->match(skb, in, out, m->data,
319                                       offset, hdr, datalen, hotdrop))
320                 return 1;
321         else
322                 return 0;
323 }
324
325 static inline struct ip6t_entry *
326 get_entry(void *base, unsigned int offset)
327 {
328         return (struct ip6t_entry *)(base + offset);
329 }
330
331 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
332 unsigned int
333 ip6t_do_table(struct sk_buff **pskb,
334               unsigned int hook,
335               const struct net_device *in,
336               const struct net_device *out,
337               struct ip6t_table *table,
338               void *userdata)
339 {
340         static const char nulldevname[IFNAMSIZ];
341         u_int16_t offset = 0;
342         struct ipv6hdr *ipv6;
343         void *protohdr;
344         u_int16_t datalen;
345         int hotdrop = 0;
346         /* Initializing verdict to NF_DROP keeps gcc happy. */
347         unsigned int verdict = NF_DROP;
348         const char *indev, *outdev;
349         void *table_base;
350         struct ip6t_entry *e, *back;
351
352         /* FIXME: Push down to extensions --RR */
353         if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
354                 return NF_DROP;
355
356         /* Initialization */
357         ipv6 = (*pskb)->nh.ipv6h;
358         protohdr = (u_int32_t *)((char *)ipv6 + IPV6_HDR_LEN);
359         datalen = (*pskb)->len - IPV6_HDR_LEN;
360         indev = in ? in->name : nulldevname;
361         outdev = out ? out->name : nulldevname;
362
363         /* We handle fragments by dealing with the first fragment as
364          * if it was a normal packet.  All other fragments are treated
365          * normally, except that they will NEVER match rules that ask
366          * things we don't know, ie. tcp syn flag or ports).  If the
367          * rule is also a fragment-specific rule, non-fragments won't
368          * match it. */
369
370         read_lock_bh(&table->lock);
371         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
372         table_base = (void *)table->private->entries
373                 + TABLE_OFFSET(table->private, smp_processor_id());
374         e = get_entry(table_base, table->private->hook_entry[hook]);
375
376 #ifdef CONFIG_NETFILTER_DEBUG
377         /* Check noone else using our table */
378         if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
379             && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
380                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
381                        smp_processor_id(),
382                        table->name,
383                        &((struct ip6t_entry *)table_base)->comefrom,
384                        ((struct ip6t_entry *)table_base)->comefrom);
385         }
386         ((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
387 #endif
388
389         /* For return from builtin chain */
390         back = get_entry(table_base, table->private->underflow[hook]);
391
392         do {
393                 IP_NF_ASSERT(e);
394                 IP_NF_ASSERT(back);
395                 (*pskb)->nfcache |= e->nfcache;
396                 if (ip6_packet_match(*pskb, ipv6, indev, outdev, 
397                         &e->ipv6, offset)) {
398                         struct ip6t_entry_target *t;
399
400                         if (IP6T_MATCH_ITERATE(e, do_match,
401                                                *pskb, in, out,
402                                                offset, protohdr,
403                                                datalen, &hotdrop) != 0)
404                                 goto no_match;
405
406                         ADD_COUNTER(e->counters, ntohs(ipv6->payload_len) + IPV6_HDR_LEN, 1);
407
408                         t = ip6t_get_target(e);
409                         IP_NF_ASSERT(t->u.kernel.target);
410                         /* Standard target? */
411                         if (!t->u.kernel.target->target) {
412                                 int v;
413
414                                 v = ((struct ip6t_standard_target *)t)->verdict;
415                                 if (v < 0) {
416                                         /* Pop from stack? */
417                                         if (v != IP6T_RETURN) {
418                                                 verdict = (unsigned)(-v) - 1;
419                                                 break;
420                                         }
421                                         e = back;
422                                         back = get_entry(table_base,
423                                                          back->comefrom);
424                                         continue;
425                                 }
426                                 if (table_base + v
427                                     != (void *)e + e->next_offset) {
428                                         /* Save old back ptr in next entry */
429                                         struct ip6t_entry *next
430                                                 = (void *)e + e->next_offset;
431                                         next->comefrom
432                                                 = (void *)back - table_base;
433                                         /* set back pointer to next entry */
434                                         back = next;
435                                 }
436
437                                 e = get_entry(table_base, v);
438                         } else {
439                                 /* Targets which reenter must return
440                                    abs. verdicts */
441 #ifdef CONFIG_NETFILTER_DEBUG
442                                 ((struct ip6t_entry *)table_base)->comefrom
443                                         = 0xeeeeeeec;
444 #endif
445                                 verdict = t->u.kernel.target->target(pskb,
446                                                                      hook,
447                                                                      in, out,
448                                                                      t->data,
449                                                                      userdata);
450
451 #ifdef CONFIG_NETFILTER_DEBUG
452                                 if (((struct ip6t_entry *)table_base)->comefrom
453                                     != 0xeeeeeeec
454                                     && verdict == IP6T_CONTINUE) {
455                                         printk("Target %s reentered!\n",
456                                                t->u.kernel.target->name);
457                                         verdict = NF_DROP;
458                                 }
459                                 ((struct ip6t_entry *)table_base)->comefrom
460                                         = 0x57acc001;
461 #endif
462                                 /* Target might have changed stuff. */
463                                 ipv6 = (*pskb)->nh.ipv6h;
464                                 protohdr = (u_int32_t *)((void *)ipv6 + IPV6_HDR_LEN);
465                                 datalen = (*pskb)->len - IPV6_HDR_LEN;
466
467                                 if (verdict == IP6T_CONTINUE)
468                                         e = (void *)e + e->next_offset;
469                                 else
470                                         /* Verdict */
471                                         break;
472                         }
473                 } else {
474
475                 no_match:
476                         e = (void *)e + e->next_offset;
477                 }
478         } while (!hotdrop);
479
480 #ifdef CONFIG_NETFILTER_DEBUG
481         ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
482 #endif
483         read_unlock_bh(&table->lock);
484
485 #ifdef DEBUG_ALLOW_ALL
486         return NF_ACCEPT;
487 #else
488         if (hotdrop)
489                 return NF_DROP;
490         else return verdict;
491 #endif
492 }
493
494 /* If it succeeds, returns element and locks mutex */
495 static inline void *
496 find_inlist_lock_noload(struct list_head *head,
497                         const char *name,
498                         int *error,
499                         struct semaphore *mutex)
500 {
501         void *ret;
502
503 #if 1
504         duprintf("find_inlist: searching for `%s' in %s.\n",
505                  name, head == &ip6t_target ? "ip6t_target"
506                  : head == &ip6t_match ? "ip6t_match"
507                  : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
508 #endif
509
510         *error = down_interruptible(mutex);
511         if (*error != 0)
512                 return NULL;
513
514         ret = list_named_find(head, name);
515         if (!ret) {
516                 *error = -ENOENT;
517                 up(mutex);
518         }
519         return ret;
520 }
521
522 #ifndef CONFIG_KMOD
523 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
524 #else
525 static void *
526 find_inlist_lock(struct list_head *head,
527                  const char *name,
528                  const char *prefix,
529                  int *error,
530                  struct semaphore *mutex)
531 {
532         void *ret;
533
534         ret = find_inlist_lock_noload(head, name, error, mutex);
535         if (!ret) {
536                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
537                 request_module("%s%s", prefix, name);
538                 ret = find_inlist_lock_noload(head, name, error, mutex);
539         }
540
541         return ret;
542 }
543 #endif
544
545 static inline struct ip6t_table *
546 ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
547 {
548         return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex);
549 }
550
551 static inline struct ip6t_match *
552 find_match_lock(const char *name, int *error, struct semaphore *mutex)
553 {
554         return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex);
555 }
556
557 struct ip6t_target *
558 ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex)
559 {
560         return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex);
561 }
562
563 /* All zeroes == unconditional rule. */
564 static inline int
565 unconditional(const struct ip6t_ip6 *ipv6)
566 {
567         unsigned int i;
568
569         for (i = 0; i < sizeof(*ipv6); i++)
570                 if (((char *)ipv6)[i])
571                         break;
572
573         return (i == sizeof(*ipv6));
574 }
575
576 /* Figures out from what hook each rule can be called: returns 0 if
577    there are loops.  Puts hook bitmask in comefrom. */
578 static int
579 mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
580 {
581         unsigned int hook;
582
583         /* No recursion; use packet counter to save back ptrs (reset
584            to 0 as we leave), and comefrom to save source hook bitmask */
585         for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
586                 unsigned int pos = newinfo->hook_entry[hook];
587                 struct ip6t_entry *e
588                         = (struct ip6t_entry *)(newinfo->entries + pos);
589
590                 if (!(valid_hooks & (1 << hook)))
591                         continue;
592
593                 /* Set initial back pointer. */
594                 e->counters.pcnt = pos;
595
596                 for (;;) {
597                         struct ip6t_standard_target *t
598                                 = (void *)ip6t_get_target(e);
599
600                         if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
601                                 printk("iptables: loop hook %u pos %u %08X.\n",
602                                        hook, pos, e->comefrom);
603                                 return 0;
604                         }
605                         e->comefrom
606                                 |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
607
608                         /* Unconditional return/END. */
609                         if (e->target_offset == sizeof(struct ip6t_entry)
610                             && (strcmp(t->target.u.user.name,
611                                        IP6T_STANDARD_TARGET) == 0)
612                             && t->verdict < 0
613                             && unconditional(&e->ipv6)) {
614                                 unsigned int oldpos, size;
615
616                                 /* Return: backtrack through the last
617                                    big jump. */
618                                 do {
619                                         e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
620 #ifdef DEBUG_IP_FIREWALL_USER
621                                         if (e->comefrom
622                                             & (1 << NF_IP6_NUMHOOKS)) {
623                                                 duprintf("Back unset "
624                                                          "on hook %u "
625                                                          "rule %u\n",
626                                                          hook, pos);
627                                         }
628 #endif
629                                         oldpos = pos;
630                                         pos = e->counters.pcnt;
631                                         e->counters.pcnt = 0;
632
633                                         /* We're at the start. */
634                                         if (pos == oldpos)
635                                                 goto next;
636
637                                         e = (struct ip6t_entry *)
638                                                 (newinfo->entries + pos);
639                                 } while (oldpos == pos + e->next_offset);
640
641                                 /* Move along one */
642                                 size = e->next_offset;
643                                 e = (struct ip6t_entry *)
644                                         (newinfo->entries + pos + size);
645                                 e->counters.pcnt = pos;
646                                 pos += size;
647                         } else {
648                                 int newpos = t->verdict;
649
650                                 if (strcmp(t->target.u.user.name,
651                                            IP6T_STANDARD_TARGET) == 0
652                                     && newpos >= 0) {
653                                         /* This a jump; chase it. */
654                                         duprintf("Jump rule %u -> %u\n",
655                                                  pos, newpos);
656                                 } else {
657                                         /* ... this is a fallthru */
658                                         newpos = pos + e->next_offset;
659                                 }
660                                 e = (struct ip6t_entry *)
661                                         (newinfo->entries + newpos);
662                                 e->counters.pcnt = pos;
663                                 pos = newpos;
664                         }
665                 }
666                 next:
667                 duprintf("Finished chain %u\n", hook);
668         }
669         return 1;
670 }
671
672 static inline int
673 cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
674 {
675         if (i && (*i)-- == 0)
676                 return 1;
677
678         if (m->u.kernel.match->destroy)
679                 m->u.kernel.match->destroy(m->data,
680                                            m->u.match_size - sizeof(*m));
681         module_put(m->u.kernel.match->me);
682         return 0;
683 }
684
685 static inline int
686 standard_check(const struct ip6t_entry_target *t,
687                unsigned int max_offset)
688 {
689         struct ip6t_standard_target *targ = (void *)t;
690
691         /* Check standard info. */
692         if (t->u.target_size
693             != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
694                 duprintf("standard_check: target size %u != %u\n",
695                          t->u.target_size,
696                          IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
697                 return 0;
698         }
699
700         if (targ->verdict >= 0
701             && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
702                 duprintf("ip6t_standard_check: bad verdict (%i)\n",
703                          targ->verdict);
704                 return 0;
705         }
706
707         if (targ->verdict < -NF_MAX_VERDICT - 1) {
708                 duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
709                          targ->verdict);
710                 return 0;
711         }
712         return 1;
713 }
714
715 static inline int
716 check_match(struct ip6t_entry_match *m,
717             const char *name,
718             const struct ip6t_ip6 *ipv6,
719             unsigned int hookmask,
720             unsigned int *i)
721 {
722         int ret;
723         struct ip6t_match *match;
724
725         match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex);
726         if (!match) {
727           //            duprintf("check_match: `%s' not found\n", m->u.name);
728                 return ret;
729         }
730         if (!try_module_get(match->me)) {
731                 up(&ip6t_mutex);
732                 return -ENOENT;
733         }
734         m->u.kernel.match = match;
735         up(&ip6t_mutex);
736
737         if (m->u.kernel.match->checkentry
738             && !m->u.kernel.match->checkentry(name, ipv6, m->data,
739                                               m->u.match_size - sizeof(*m),
740                                               hookmask)) {
741                 module_put(m->u.kernel.match->me);
742                 duprintf("ip_tables: check failed for `%s'.\n",
743                          m->u.kernel.match->name);
744                 return -EINVAL;
745         }
746
747         (*i)++;
748         return 0;
749 }
750
751 static struct ip6t_target ip6t_standard_target;
752
753 static inline int
754 check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
755             unsigned int *i)
756 {
757         struct ip6t_entry_target *t;
758         struct ip6t_target *target;
759         int ret;
760         unsigned int j;
761
762         if (!ip6_checkentry(&e->ipv6)) {
763                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
764                 return -EINVAL;
765         }
766
767         j = 0;
768         ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
769         if (ret != 0)
770                 goto cleanup_matches;
771
772         t = ip6t_get_target(e);
773         target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
774         if (!target) {
775                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
776                 goto cleanup_matches;
777         }
778         if (!try_module_get(target->me)) {
779                 up(&ip6t_mutex);
780                 ret = -ENOENT;
781                 goto cleanup_matches;
782         }
783         t->u.kernel.target = target;
784         up(&ip6t_mutex);
785         if (!t->u.kernel.target) {
786                 ret = -EBUSY;
787                 goto cleanup_matches;
788         }
789         if (t->u.kernel.target == &ip6t_standard_target) {
790                 if (!standard_check(t, size)) {
791                         ret = -EINVAL;
792                         goto cleanup_matches;
793                 }
794         } else if (t->u.kernel.target->checkentry
795                    && !t->u.kernel.target->checkentry(name, e, t->data,
796                                                       t->u.target_size
797                                                       - sizeof(*t),
798                                                       e->comefrom)) {
799                 module_put(t->u.kernel.target->me);
800                 duprintf("ip_tables: check failed for `%s'.\n",
801                          t->u.kernel.target->name);
802                 ret = -EINVAL;
803                 goto cleanup_matches;
804         }
805
806         (*i)++;
807         return 0;
808
809  cleanup_matches:
810         IP6T_MATCH_ITERATE(e, cleanup_match, &j);
811         return ret;
812 }
813
814 static inline int
815 check_entry_size_and_hooks(struct ip6t_entry *e,
816                            struct ip6t_table_info *newinfo,
817                            unsigned char *base,
818                            unsigned char *limit,
819                            const unsigned int *hook_entries,
820                            const unsigned int *underflows,
821                            unsigned int *i)
822 {
823         unsigned int h;
824
825         if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
826             || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
827                 duprintf("Bad offset %p\n", e);
828                 return -EINVAL;
829         }
830
831         if (e->next_offset
832             < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
833                 duprintf("checking: element %p size %u\n",
834                          e, e->next_offset);
835                 return -EINVAL;
836         }
837
838         /* Check hooks & underflows */
839         for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
840                 if ((unsigned char *)e - base == hook_entries[h])
841                         newinfo->hook_entry[h] = hook_entries[h];
842                 if ((unsigned char *)e - base == underflows[h])
843                         newinfo->underflow[h] = underflows[h];
844         }
845
846         /* FIXME: underflows must be unconditional, standard verdicts
847            < 0 (not IP6T_RETURN). --RR */
848
849         /* Clear counters and comefrom */
850         e->counters = ((struct ip6t_counters) { 0, 0 });
851         e->comefrom = 0;
852
853         (*i)++;
854         return 0;
855 }
856
857 static inline int
858 cleanup_entry(struct ip6t_entry *e, unsigned int *i)
859 {
860         struct ip6t_entry_target *t;
861
862         if (i && (*i)-- == 0)
863                 return 1;
864
865         /* Cleanup all matches */
866         IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
867         t = ip6t_get_target(e);
868         if (t->u.kernel.target->destroy)
869                 t->u.kernel.target->destroy(t->data,
870                                             t->u.target_size - sizeof(*t));
871         module_put(t->u.kernel.target->me);
872         return 0;
873 }
874
875 /* Checks and translates the user-supplied table segment (held in
876    newinfo) */
877 static int
878 translate_table(const char *name,
879                 unsigned int valid_hooks,
880                 struct ip6t_table_info *newinfo,
881                 unsigned int size,
882                 unsigned int number,
883                 const unsigned int *hook_entries,
884                 const unsigned int *underflows)
885 {
886         unsigned int i;
887         int ret;
888
889         newinfo->size = size;
890         newinfo->number = number;
891
892         /* Init all hooks to impossible value. */
893         for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
894                 newinfo->hook_entry[i] = 0xFFFFFFFF;
895                 newinfo->underflow[i] = 0xFFFFFFFF;
896         }
897
898         duprintf("translate_table: size %u\n", newinfo->size);
899         i = 0;
900         /* Walk through entries, checking offsets. */
901         ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
902                                 check_entry_size_and_hooks,
903                                 newinfo,
904                                 newinfo->entries,
905                                 newinfo->entries + size,
906                                 hook_entries, underflows, &i);
907         if (ret != 0)
908                 return ret;
909
910         if (i != number) {
911                 duprintf("translate_table: %u not %u entries\n",
912                          i, number);
913                 return -EINVAL;
914         }
915
916         /* Check hooks all assigned */
917         for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
918                 /* Only hooks which are valid */
919                 if (!(valid_hooks & (1 << i)))
920                         continue;
921                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
922                         duprintf("Invalid hook entry %u %u\n",
923                                  i, hook_entries[i]);
924                         return -EINVAL;
925                 }
926                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
927                         duprintf("Invalid underflow %u %u\n",
928                                  i, underflows[i]);
929                         return -EINVAL;
930                 }
931         }
932
933         if (!mark_source_chains(newinfo, valid_hooks))
934                 return -ELOOP;
935
936         /* Finally, each sanity check must pass */
937         i = 0;
938         ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
939                                 check_entry, name, size, &i);
940
941         if (ret != 0) {
942                 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
943                                   cleanup_entry, &i);
944                 return ret;
945         }
946
947         /* And one copy for every other CPU */
948         for (i = 1; i < NR_CPUS; i++) {
949                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
950                        newinfo->entries,
951                        SMP_ALIGN(newinfo->size));
952         }
953
954         return ret;
955 }
956
957 static struct ip6t_table_info *
958 replace_table(struct ip6t_table *table,
959               unsigned int num_counters,
960               struct ip6t_table_info *newinfo,
961               int *error)
962 {
963         struct ip6t_table_info *oldinfo;
964
965 #ifdef CONFIG_NETFILTER_DEBUG
966         {
967                 struct ip6t_entry *table_base;
968                 unsigned int i;
969
970                 for (i = 0; i < NR_CPUS; i++) {
971                         table_base =
972                                 (void *)newinfo->entries
973                                 + TABLE_OFFSET(newinfo, i);
974
975                         table_base->comefrom = 0xdead57ac;
976                 }
977         }
978 #endif
979
980         /* Do the substitution. */
981         write_lock_bh(&table->lock);
982         /* Check inside lock: is the old number correct? */
983         if (num_counters != table->private->number) {
984                 duprintf("num_counters != table->private->number (%u/%u)\n",
985                          num_counters, table->private->number);
986                 write_unlock_bh(&table->lock);
987                 *error = -EAGAIN;
988                 return NULL;
989         }
990         oldinfo = table->private;
991         table->private = newinfo;
992         newinfo->initial_entries = oldinfo->initial_entries;
993         write_unlock_bh(&table->lock);
994
995         return oldinfo;
996 }
997
998 /* Gets counters. */
999 static inline int
1000 add_entry_to_counter(const struct ip6t_entry *e,
1001                      struct ip6t_counters total[],
1002                      unsigned int *i)
1003 {
1004         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1005
1006         (*i)++;
1007         return 0;
1008 }
1009
1010 static void
1011 get_counters(const struct ip6t_table_info *t,
1012              struct ip6t_counters counters[])
1013 {
1014         unsigned int cpu;
1015         unsigned int i;
1016
1017         for (cpu = 0; cpu < NR_CPUS; cpu++) {
1018                 i = 0;
1019                 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
1020                                   t->size,
1021                                   add_entry_to_counter,
1022                                   counters,
1023                                   &i);
1024         }
1025 }
1026
1027 static int
1028 copy_entries_to_user(unsigned int total_size,
1029                      struct ip6t_table *table,
1030                      void __user *userptr)
1031 {
1032         unsigned int off, num, countersize;
1033         struct ip6t_entry *e;
1034         struct ip6t_counters *counters;
1035         int ret = 0;
1036
1037         /* We need atomic snapshot of counters: rest doesn't change
1038            (other than comefrom, which userspace doesn't care
1039            about). */
1040         countersize = sizeof(struct ip6t_counters) * table->private->number;
1041         counters = vmalloc(countersize);
1042
1043         if (counters == NULL)
1044                 return -ENOMEM;
1045
1046         /* First, sum counters... */
1047         memset(counters, 0, countersize);
1048         write_lock_bh(&table->lock);
1049         get_counters(table->private, counters);
1050         write_unlock_bh(&table->lock);
1051
1052         /* ... then copy entire thing from CPU 0... */
1053         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1054                 ret = -EFAULT;
1055                 goto free_counters;
1056         }
1057
1058         /* FIXME: use iterator macros --RR */
1059         /* ... then go back and fix counters and names */
1060         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1061                 unsigned int i;
1062                 struct ip6t_entry_match *m;
1063                 struct ip6t_entry_target *t;
1064
1065                 e = (struct ip6t_entry *)(table->private->entries + off);
1066                 if (copy_to_user(userptr + off
1067                                  + offsetof(struct ip6t_entry, counters),
1068                                  &counters[num],
1069                                  sizeof(counters[num])) != 0) {
1070                         ret = -EFAULT;
1071                         goto free_counters;
1072                 }
1073
1074                 for (i = sizeof(struct ip6t_entry);
1075                      i < e->target_offset;
1076                      i += m->u.match_size) {
1077                         m = (void *)e + i;
1078
1079                         if (copy_to_user(userptr + off + i
1080                                          + offsetof(struct ip6t_entry_match,
1081                                                     u.user.name),
1082                                          m->u.kernel.match->name,
1083                                          strlen(m->u.kernel.match->name)+1)
1084                             != 0) {
1085                                 ret = -EFAULT;
1086                                 goto free_counters;
1087                         }
1088                 }
1089
1090                 t = ip6t_get_target(e);
1091                 if (copy_to_user(userptr + off + e->target_offset
1092                                  + offsetof(struct ip6t_entry_target,
1093                                             u.user.name),
1094                                  t->u.kernel.target->name,
1095                                  strlen(t->u.kernel.target->name)+1) != 0) {
1096                         ret = -EFAULT;
1097                         goto free_counters;
1098                 }
1099         }
1100
1101  free_counters:
1102         vfree(counters);
1103         return ret;
1104 }
1105
1106 static int
1107 get_entries(const struct ip6t_get_entries *entries,
1108             struct ip6t_get_entries __user *uptr)
1109 {
1110         int ret;
1111         struct ip6t_table *t;
1112
1113         t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex);
1114         if (t) {
1115                 duprintf("t->private->number = %u\n",
1116                          t->private->number);
1117                 if (entries->size == t->private->size)
1118                         ret = copy_entries_to_user(t->private->size,
1119                                                    t, uptr->entrytable);
1120                 else {
1121                         duprintf("get_entries: I've got %u not %u!\n",
1122                                  t->private->size,
1123                                  entries->size);
1124                         ret = -EINVAL;
1125                 }
1126                 up(&ip6t_mutex);
1127         } else
1128                 duprintf("get_entries: Can't find %s!\n",
1129                          entries->name);
1130
1131         return ret;
1132 }
1133
1134 static int
1135 do_replace(void __user *user, unsigned int len)
1136 {
1137         int ret;
1138         struct ip6t_replace tmp;
1139         struct ip6t_table *t;
1140         struct ip6t_table_info *newinfo, *oldinfo;
1141         struct ip6t_counters *counters;
1142
1143         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1144                 return -EFAULT;
1145
1146         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1147         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1148                 return -ENOMEM;
1149
1150         newinfo = vmalloc(sizeof(struct ip6t_table_info)
1151                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1152         if (!newinfo)
1153                 return -ENOMEM;
1154
1155         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1156                            tmp.size) != 0) {
1157                 ret = -EFAULT;
1158                 goto free_newinfo;
1159         }
1160
1161         counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
1162         if (!counters) {
1163                 ret = -ENOMEM;
1164                 goto free_newinfo;
1165         }
1166         memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1167
1168         ret = translate_table(tmp.name, tmp.valid_hooks,
1169                               newinfo, tmp.size, tmp.num_entries,
1170                               tmp.hook_entry, tmp.underflow);
1171         if (ret != 0)
1172                 goto free_newinfo_counters;
1173
1174         duprintf("ip_tables: Translated table\n");
1175
1176         t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1177         if (!t)
1178                 goto free_newinfo_counters_untrans;
1179
1180         /* You lied! */
1181         if (tmp.valid_hooks != t->valid_hooks) {
1182                 duprintf("Valid hook crap: %08X vs %08X\n",
1183                          tmp.valid_hooks, t->valid_hooks);
1184                 ret = -EINVAL;
1185                 goto free_newinfo_counters_untrans_unlock;
1186         }
1187
1188         /* Get a reference in advance, we're not allowed fail later */
1189         if (!try_module_get(t->me)) {
1190                 ret = -EBUSY;
1191                 goto free_newinfo_counters_untrans_unlock;
1192         }
1193
1194         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1195         if (!oldinfo)
1196                 goto put_module;
1197
1198         /* Update module usage count based on number of rules */
1199         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1200                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1201         if ((oldinfo->number > oldinfo->initial_entries) || 
1202             (newinfo->number <= oldinfo->initial_entries)) 
1203                 module_put(t->me);
1204         if ((oldinfo->number > oldinfo->initial_entries) &&
1205             (newinfo->number <= oldinfo->initial_entries))
1206                 module_put(t->me);
1207
1208         /* Get the old counters. */
1209         get_counters(oldinfo, counters);
1210         /* Decrease module usage counts and free resource */
1211         IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1212         vfree(oldinfo);
1213         /* Silent error: too late now. */
1214         copy_to_user(tmp.counters, counters,
1215                      sizeof(struct ip6t_counters) * tmp.num_counters);
1216         vfree(counters);
1217         up(&ip6t_mutex);
1218         return 0;
1219
1220  put_module:
1221         module_put(t->me);
1222  free_newinfo_counters_untrans_unlock:
1223         up(&ip6t_mutex);
1224  free_newinfo_counters_untrans:
1225         IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1226  free_newinfo_counters:
1227         vfree(counters);
1228  free_newinfo:
1229         vfree(newinfo);
1230         return ret;
1231 }
1232
1233 /* We're lazy, and add to the first CPU; overflow works its fey magic
1234  * and everything is OK. */
1235 static inline int
1236 add_counter_to_entry(struct ip6t_entry *e,
1237                      const struct ip6t_counters addme[],
1238                      unsigned int *i)
1239 {
1240 #if 0
1241         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1242                  *i,
1243                  (long unsigned int)e->counters.pcnt,
1244                  (long unsigned int)e->counters.bcnt,
1245                  (long unsigned int)addme[*i].pcnt,
1246                  (long unsigned int)addme[*i].bcnt);
1247 #endif
1248
1249         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1250
1251         (*i)++;
1252         return 0;
1253 }
1254
1255 static int
1256 do_add_counters(void __user *user, unsigned int len)
1257 {
1258         unsigned int i;
1259         struct ip6t_counters_info tmp, *paddc;
1260         struct ip6t_table *t;
1261         int ret;
1262
1263         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1264                 return -EFAULT;
1265
1266         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
1267                 return -EINVAL;
1268
1269         paddc = vmalloc(len);
1270         if (!paddc)
1271                 return -ENOMEM;
1272
1273         if (copy_from_user(paddc, user, len) != 0) {
1274                 ret = -EFAULT;
1275                 goto free;
1276         }
1277
1278         t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1279         if (!t)
1280                 goto free;
1281
1282         write_lock_bh(&t->lock);
1283         if (t->private->number != paddc->num_counters) {
1284                 ret = -EINVAL;
1285                 goto unlock_up_free;
1286         }
1287
1288         i = 0;
1289         IP6T_ENTRY_ITERATE(t->private->entries,
1290                           t->private->size,
1291                           add_counter_to_entry,
1292                           paddc->counters,
1293                           &i);
1294  unlock_up_free:
1295         write_unlock_bh(&t->lock);
1296         up(&ip6t_mutex);
1297  free:
1298         vfree(paddc);
1299
1300         return ret;
1301 }
1302
1303 static int
1304 do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1305 {
1306         int ret;
1307
1308         if (!capable(CAP_NET_ADMIN))
1309                 return -EPERM;
1310
1311         switch (cmd) {
1312         case IP6T_SO_SET_REPLACE:
1313                 ret = do_replace(user, len);
1314                 break;
1315
1316         case IP6T_SO_SET_ADD_COUNTERS:
1317                 ret = do_add_counters(user, len);
1318                 break;
1319
1320         default:
1321                 duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
1322                 ret = -EINVAL;
1323         }
1324
1325         return ret;
1326 }
1327
1328 static int
1329 do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1330 {
1331         int ret;
1332
1333         if (!capable(CAP_NET_ADMIN))
1334                 return -EPERM;
1335
1336         switch (cmd) {
1337         case IP6T_SO_GET_INFO: {
1338                 char name[IP6T_TABLE_MAXNAMELEN];
1339                 struct ip6t_table *t;
1340
1341                 if (*len != sizeof(struct ip6t_getinfo)) {
1342                         duprintf("length %u != %u\n", *len,
1343                                  sizeof(struct ip6t_getinfo));
1344                         ret = -EINVAL;
1345                         break;
1346                 }
1347
1348                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1349                         ret = -EFAULT;
1350                         break;
1351                 }
1352                 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1353                 t = ip6t_find_table_lock(name, &ret, &ip6t_mutex);
1354                 if (t) {
1355                         struct ip6t_getinfo info;
1356
1357                         info.valid_hooks = t->valid_hooks;
1358                         memcpy(info.hook_entry, t->private->hook_entry,
1359                                sizeof(info.hook_entry));
1360                         memcpy(info.underflow, t->private->underflow,
1361                                sizeof(info.underflow));
1362                         info.num_entries = t->private->number;
1363                         info.size = t->private->size;
1364                         strcpy(info.name, name);
1365
1366                         if (copy_to_user(user, &info, *len) != 0)
1367                                 ret = -EFAULT;
1368                         else
1369                                 ret = 0;
1370
1371                         up(&ip6t_mutex);
1372                 }
1373         }
1374         break;
1375
1376         case IP6T_SO_GET_ENTRIES: {
1377                 struct ip6t_get_entries get;
1378
1379                 if (*len < sizeof(get)) {
1380                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1381                         ret = -EINVAL;
1382                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1383                         ret = -EFAULT;
1384                 } else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
1385                         duprintf("get_entries: %u != %u\n", *len,
1386                                  sizeof(struct ip6t_get_entries) + get.size);
1387                         ret = -EINVAL;
1388                 } else
1389                         ret = get_entries(&get, user);
1390                 break;
1391         }
1392
1393         default:
1394                 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1395                 ret = -EINVAL;
1396         }
1397
1398         return ret;
1399 }
1400
1401 /* Registration hooks for targets. */
1402 int
1403 ip6t_register_target(struct ip6t_target *target)
1404 {
1405         int ret;
1406
1407         ret = down_interruptible(&ip6t_mutex);
1408         if (ret != 0)
1409                 return ret;
1410
1411         if (!list_named_insert(&ip6t_target, target)) {
1412                 duprintf("ip6t_register_target: `%s' already in list!\n",
1413                          target->name);
1414                 ret = -EINVAL;
1415         }
1416         up(&ip6t_mutex);
1417         return ret;
1418 }
1419
1420 void
1421 ip6t_unregister_target(struct ip6t_target *target)
1422 {
1423         down(&ip6t_mutex);
1424         LIST_DELETE(&ip6t_target, target);
1425         up(&ip6t_mutex);
1426 }
1427
1428 int
1429 ip6t_register_match(struct ip6t_match *match)
1430 {
1431         int ret;
1432
1433         ret = down_interruptible(&ip6t_mutex);
1434         if (ret != 0)
1435                 return ret;
1436
1437         if (!list_named_insert(&ip6t_match, match)) {
1438                 duprintf("ip6t_register_match: `%s' already in list!\n",
1439                          match->name);
1440                 ret = -EINVAL;
1441         }
1442         up(&ip6t_mutex);
1443
1444         return ret;
1445 }
1446
1447 void
1448 ip6t_unregister_match(struct ip6t_match *match)
1449 {
1450         down(&ip6t_mutex);
1451         LIST_DELETE(&ip6t_match, match);
1452         up(&ip6t_mutex);
1453 }
1454
1455 int ip6t_register_table(struct ip6t_table *table)
1456 {
1457         int ret;
1458         struct ip6t_table_info *newinfo;
1459         static struct ip6t_table_info bootstrap
1460                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1461
1462         newinfo = vmalloc(sizeof(struct ip6t_table_info)
1463                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1464         if (!newinfo)
1465                 return -ENOMEM;
1466
1467         memcpy(newinfo->entries, table->table->entries, table->table->size);
1468
1469         ret = translate_table(table->name, table->valid_hooks,
1470                               newinfo, table->table->size,
1471                               table->table->num_entries,
1472                               table->table->hook_entry,
1473                               table->table->underflow);
1474         if (ret != 0) {
1475                 vfree(newinfo);
1476                 return ret;
1477         }
1478
1479         ret = down_interruptible(&ip6t_mutex);
1480         if (ret != 0) {
1481                 vfree(newinfo);
1482                 return ret;
1483         }
1484
1485         /* Don't autoload: we'd eat our tail... */
1486         if (list_named_find(&ip6t_tables, table->name)) {
1487                 ret = -EEXIST;
1488                 goto free_unlock;
1489         }
1490
1491         /* Simplifies replace_table code. */
1492         table->private = &bootstrap;
1493         if (!replace_table(table, 0, newinfo, &ret))
1494                 goto free_unlock;
1495
1496         duprintf("table->private->number = %u\n",
1497                  table->private->number);
1498
1499         /* save number of initial entries */
1500         table->private->initial_entries = table->private->number;
1501
1502         table->lock = RW_LOCK_UNLOCKED;
1503         list_prepend(&ip6t_tables, table);
1504
1505  unlock:
1506         up(&ip6t_mutex);
1507         return ret;
1508
1509  free_unlock:
1510         vfree(newinfo);
1511         goto unlock;
1512 }
1513
1514 void ip6t_unregister_table(struct ip6t_table *table)
1515 {
1516         down(&ip6t_mutex);
1517         LIST_DELETE(&ip6t_tables, table);
1518         up(&ip6t_mutex);
1519
1520         /* Decrease module usage counts and free resources */
1521         IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
1522                           cleanup_entry, NULL);
1523         vfree(table->private);
1524 }
1525
1526 /* Returns 1 if the port is matched by the range, 0 otherwise */
1527 static inline int
1528 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1529 {
1530         int ret;
1531
1532         ret = (port >= min && port <= max) ^ invert;
1533         return ret;
1534 }
1535
1536 static int
1537 tcp_find_option(u_int8_t option,
1538                 const struct tcphdr *tcp,
1539                 u_int16_t datalen,
1540                 int invert,
1541                 int *hotdrop)
1542 {
1543         unsigned int i = sizeof(struct tcphdr);
1544         const u_int8_t *opt = (u_int8_t *)tcp;
1545
1546         duprintf("tcp_match: finding option\n");
1547         /* If we don't have the whole header, drop packet. */
1548         if (tcp->doff * 4 < sizeof(struct tcphdr) ||
1549             tcp->doff * 4 > datalen) {
1550                 *hotdrop = 1;
1551                 return 0;
1552         }
1553
1554         while (i < tcp->doff * 4) {
1555                 if (opt[i] == option) return !invert;
1556                 if (opt[i] < 2) i++;
1557                 else i += opt[i+1]?:1;
1558         }
1559
1560         return invert;
1561 }
1562
1563 static int
1564 tcp_match(const struct sk_buff *skb,
1565           const struct net_device *in,
1566           const struct net_device *out,
1567           const void *matchinfo,
1568           int offset,
1569           const void *hdr,
1570           u_int16_t datalen,
1571           int *hotdrop)
1572 {
1573         const struct tcphdr *tcp;
1574         const struct ip6t_tcp *tcpinfo = matchinfo;
1575         int tcpoff;
1576         u8 nexthdr = skb->nh.ipv6h->nexthdr;
1577
1578         /* To quote Alan:
1579
1580            Don't allow a fragment of TCP 8 bytes in. Nobody normal
1581            causes this. Its a cracker trying to break in by doing a
1582            flag overwrite to pass the direction checks.
1583         */
1584
1585         if (offset == 1) {
1586                 duprintf("Dropping evil TCP offset=1 frag.\n");
1587                 *hotdrop = 1;
1588                 return 0;
1589         } else if (offset == 0 && datalen < sizeof(struct tcphdr)) {
1590                 /* We've been asked to examine this packet, and we
1591                    can't.  Hence, no choice but to drop. */
1592                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1593                 *hotdrop = 1;
1594                 return 0;
1595         }
1596
1597         tcpoff = (u8*)(skb->nh.ipv6h + 1) - skb->data;
1598         tcpoff = ipv6_skip_exthdr(skb, tcpoff, &nexthdr, skb->len - tcpoff);
1599         if (tcpoff < 0 || tcpoff > skb->len) {
1600                 duprintf("tcp_match: cannot skip exthdr. Dropping.\n");
1601                 *hotdrop = 1;
1602                 return 0;
1603         } else if (nexthdr == IPPROTO_FRAGMENT)
1604                 return 0;
1605         else if (nexthdr != IPPROTO_TCP ||
1606                  skb->len - tcpoff < sizeof(struct tcphdr)) {
1607                 /* cannot be occured */
1608                 duprintf("tcp_match: cannot get TCP header. Dropping.\n");
1609                 *hotdrop = 1;
1610                 return 0;
1611         }
1612
1613         tcp = (struct tcphdr *)(skb->data + tcpoff);
1614
1615         /* FIXME: Try tcp doff >> packet len against various stacks --RR */
1616
1617 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1618
1619         /* Must not be a fragment. */
1620         return !offset
1621                 && port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1622                               ntohs(tcp->source),
1623                               !!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT))
1624                 && port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1625                               ntohs(tcp->dest),
1626                               !!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT))
1627                 && FWINVTCP((((unsigned char *)tcp)[13]
1628                              & tcpinfo->flg_mask)
1629                             == tcpinfo->flg_cmp,
1630                             IP6T_TCP_INV_FLAGS)
1631                 && (!tcpinfo->option
1632                     || tcp_find_option(tcpinfo->option, tcp, datalen,
1633                                        tcpinfo->invflags
1634                                        & IP6T_TCP_INV_OPTION,
1635                                        hotdrop));
1636 }
1637
1638 /* Called when user tries to insert an entry of this type. */
1639 static int
1640 tcp_checkentry(const char *tablename,
1641                const struct ip6t_ip6 *ipv6,
1642                void *matchinfo,
1643                unsigned int matchsize,
1644                unsigned int hook_mask)
1645 {
1646         const struct ip6t_tcp *tcpinfo = matchinfo;
1647
1648         /* Must specify proto == TCP, and no unknown invflags */
1649         return ipv6->proto == IPPROTO_TCP
1650                 && !(ipv6->invflags & IP6T_INV_PROTO)
1651                 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
1652                 && !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
1653 }
1654
1655 static int
1656 udp_match(const struct sk_buff *skb,
1657           const struct net_device *in,
1658           const struct net_device *out,
1659           const void *matchinfo,
1660           int offset,
1661           const void *hdr,
1662           u_int16_t datalen,
1663           int *hotdrop)
1664 {
1665         const struct udphdr *udp;
1666         const struct ip6t_udp *udpinfo = matchinfo;
1667         int udpoff;
1668         u8 nexthdr = skb->nh.ipv6h->nexthdr;
1669
1670         if (offset == 0 && datalen < sizeof(struct udphdr)) {
1671                 /* We've been asked to examine this packet, and we
1672                    can't.  Hence, no choice but to drop. */
1673                 duprintf("Dropping evil UDP tinygram.\n");
1674                 *hotdrop = 1;
1675                 return 0;
1676         }
1677
1678         udpoff = (u8*)(skb->nh.ipv6h + 1) - skb->data;
1679         udpoff = ipv6_skip_exthdr(skb, udpoff, &nexthdr, skb->len - udpoff);
1680         if (udpoff < 0 || udpoff > skb->len) {
1681                 duprintf("udp_match: cannot skip exthdr. Dropping.\n");
1682                 *hotdrop = 1;
1683                 return 0;
1684         } else if (nexthdr == IPPROTO_FRAGMENT)
1685                 return 0;
1686         else if (nexthdr != IPPROTO_UDP ||
1687                  skb->len - udpoff < sizeof(struct udphdr)) {
1688                 duprintf("udp_match: cannot get UDP header. Dropping.\n");
1689                 *hotdrop = 1;
1690                 return 0;
1691         }
1692
1693         udp = (struct udphdr *)(skb->data + udpoff);
1694
1695         /* Must not be a fragment. */
1696         return !offset
1697                 && port_match(udpinfo->spts[0], udpinfo->spts[1],
1698                               ntohs(udp->source),
1699                               !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
1700                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1701                               ntohs(udp->dest),
1702                               !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
1703 }
1704
1705 /* Called when user tries to insert an entry of this type. */
1706 static int
1707 udp_checkentry(const char *tablename,
1708                const struct ip6t_ip6 *ipv6,
1709                void *matchinfo,
1710                unsigned int matchinfosize,
1711                unsigned int hook_mask)
1712 {
1713         const struct ip6t_udp *udpinfo = matchinfo;
1714
1715         /* Must specify proto == UDP, and no unknown invflags */
1716         if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
1717                 duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
1718                          IPPROTO_UDP);
1719                 return 0;
1720         }
1721         if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
1722                 duprintf("ip6t_udp: matchsize %u != %u\n",
1723                          matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
1724                 return 0;
1725         }
1726         if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
1727                 duprintf("ip6t_udp: unknown flags %X\n",
1728                          udpinfo->invflags);
1729                 return 0;
1730         }
1731
1732         return 1;
1733 }
1734
1735 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1736 static inline int
1737 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1738                      u_int8_t type, u_int8_t code,
1739                      int invert)
1740 {
1741         return (type == test_type && code >= min_code && code <= max_code)
1742                 ^ invert;
1743 }
1744
1745 static int
1746 icmp6_match(const struct sk_buff *skb,
1747            const struct net_device *in,
1748            const struct net_device *out,
1749            const void *matchinfo,
1750            int offset,
1751            const void *hdr,
1752            u_int16_t datalen,
1753            int *hotdrop)
1754 {
1755         const struct icmp6hdr *icmp = hdr;
1756         const struct ip6t_icmp *icmpinfo = matchinfo;
1757
1758         if (offset == 0 && datalen < 2) {
1759                 /* We've been asked to examine this packet, and we
1760                    can't.  Hence, no choice but to drop. */
1761                 duprintf("Dropping evil ICMP tinygram.\n");
1762                 *hotdrop = 1;
1763                 return 0;
1764         }
1765
1766         /* Must not be a fragment. */
1767         return !offset
1768                 && icmp6_type_code_match(icmpinfo->type,
1769                                         icmpinfo->code[0],
1770                                         icmpinfo->code[1],
1771                                         icmp->icmp6_type, icmp->icmp6_code,
1772                                         !!(icmpinfo->invflags&IP6T_ICMP_INV));
1773 }
1774
1775 /* Called when user tries to insert an entry of this type. */
1776 static int
1777 icmp6_checkentry(const char *tablename,
1778            const struct ip6t_ip6 *ipv6,
1779            void *matchinfo,
1780            unsigned int matchsize,
1781            unsigned int hook_mask)
1782 {
1783         const struct ip6t_icmp *icmpinfo = matchinfo;
1784
1785         /* Must specify proto == ICMP, and no unknown invflags */
1786         return ipv6->proto == IPPROTO_ICMPV6
1787                 && !(ipv6->invflags & IP6T_INV_PROTO)
1788                 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_icmp))
1789                 && !(icmpinfo->invflags & ~IP6T_ICMP_INV);
1790 }
1791
1792 /* The built-in targets: standard (NULL) and error. */
1793 static struct ip6t_target ip6t_standard_target = {
1794         .name           = IP6T_STANDARD_TARGET,
1795 };
1796
1797 static struct ip6t_target ip6t_error_target = {
1798         .name           = IP6T_ERROR_TARGET,
1799         .target         = ip6t_error,
1800 };
1801
1802 static struct nf_sockopt_ops ip6t_sockopts = {
1803         .pf             = PF_INET6,
1804         .set_optmin     = IP6T_BASE_CTL,
1805         .set_optmax     = IP6T_SO_SET_MAX+1,
1806         .set            = do_ip6t_set_ctl,
1807         .get_optmin     = IP6T_BASE_CTL,
1808         .get_optmax     = IP6T_SO_GET_MAX+1,
1809         .get            = do_ip6t_get_ctl,
1810 };
1811
1812 static struct ip6t_match tcp_matchstruct = {
1813         .name           = "tcp",
1814         .match          = &tcp_match,
1815         .checkentry     = &tcp_checkentry,
1816 };
1817
1818 static struct ip6t_match udp_matchstruct = {
1819         .name           = "udp",
1820         .match          = &udp_match,
1821         .checkentry     = &udp_checkentry,
1822 };
1823
1824 static struct ip6t_match icmp6_matchstruct = {
1825         .name           = "icmp6",
1826         .match          = &icmp6_match,
1827         .checkentry     = &icmp6_checkentry,
1828 };
1829
1830 #ifdef CONFIG_PROC_FS
1831 static inline int print_name(const char *i,
1832                              off_t start_offset, char *buffer, int length,
1833                              off_t *pos, unsigned int *count)
1834 {
1835         if ((*count)++ >= start_offset) {
1836                 unsigned int namelen;
1837
1838                 namelen = sprintf(buffer + *pos, "%s\n",
1839                                   i + sizeof(struct list_head));
1840                 if (*pos + namelen > length) {
1841                         /* Stop iterating */
1842                         return 1;
1843                 }
1844                 *pos += namelen;
1845         }
1846         return 0;
1847 }
1848
1849 static inline int print_target(const struct ip6t_target *t,
1850                                off_t start_offset, char *buffer, int length,
1851                                off_t *pos, unsigned int *count)
1852 {
1853         if (t == &ip6t_standard_target || t == &ip6t_error_target)
1854                 return 0;
1855         return print_name((char *)t, start_offset, buffer, length, pos, count);
1856 }
1857
1858 static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
1859 {
1860         off_t pos = 0;
1861         unsigned int count = 0;
1862
1863         if (down_interruptible(&ip6t_mutex) != 0)
1864                 return 0;
1865
1866         LIST_FIND(&ip6t_tables, print_name, char *,
1867                   offset, buffer, length, &pos, &count);
1868
1869         up(&ip6t_mutex);
1870
1871         /* `start' hack - see fs/proc/generic.c line ~105 */
1872         *start=(char *)((unsigned long)count-offset);
1873         return pos;
1874 }
1875
1876 static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
1877 {
1878         off_t pos = 0;
1879         unsigned int count = 0;
1880
1881         if (down_interruptible(&ip6t_mutex) != 0)
1882                 return 0;
1883
1884         LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
1885                   offset, buffer, length, &pos, &count);
1886
1887         up(&ip6t_mutex);
1888
1889         *start = (char *)((unsigned long)count - offset);
1890         return pos;
1891 }
1892
1893 static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
1894 {
1895         off_t pos = 0;
1896         unsigned int count = 0;
1897
1898         if (down_interruptible(&ip6t_mutex) != 0)
1899                 return 0;
1900
1901         LIST_FIND(&ip6t_match, print_name, char *,
1902                   offset, buffer, length, &pos, &count);
1903
1904         up(&ip6t_mutex);
1905
1906         *start = (char *)((unsigned long)count - offset);
1907         return pos;
1908 }
1909
1910 static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
1911 { { "ip6_tables_names", ip6t_get_tables },
1912   { "ip6_tables_targets", ip6t_get_targets },
1913   { "ip6_tables_matches", ip6t_get_matches },
1914   { NULL, NULL} };
1915 #endif /*CONFIG_PROC_FS*/
1916
1917 static int __init init(void)
1918 {
1919         int ret;
1920
1921         /* Noone else will be downing sem now, so we won't sleep */
1922         down(&ip6t_mutex);
1923         list_append(&ip6t_target, &ip6t_standard_target);
1924         list_append(&ip6t_target, &ip6t_error_target);
1925         list_append(&ip6t_match, &tcp_matchstruct);
1926         list_append(&ip6t_match, &udp_matchstruct);
1927         list_append(&ip6t_match, &icmp6_matchstruct);
1928         up(&ip6t_mutex);
1929
1930         /* Register setsockopt */
1931         ret = nf_register_sockopt(&ip6t_sockopts);
1932         if (ret < 0) {
1933                 duprintf("Unable to register sockopts.\n");
1934                 return ret;
1935         }
1936
1937 #ifdef CONFIG_PROC_FS
1938         {
1939                 struct proc_dir_entry *proc;
1940                 int i;
1941
1942                 for (i = 0; ip6t_proc_entry[i].name; i++) {
1943                         proc = proc_net_create(ip6t_proc_entry[i].name, 0,
1944                                                ip6t_proc_entry[i].get_info);
1945                         if (!proc) {
1946                                 while (--i >= 0)
1947                                        proc_net_remove(ip6t_proc_entry[i].name);
1948                                 nf_unregister_sockopt(&ip6t_sockopts);
1949                                 return -ENOMEM;
1950                         }
1951                         proc->owner = THIS_MODULE;
1952                 }
1953         }
1954 #endif
1955
1956         printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
1957         return 0;
1958 }
1959
1960 static void __exit fini(void)
1961 {
1962         nf_unregister_sockopt(&ip6t_sockopts);
1963 #ifdef CONFIG_PROC_FS
1964         {
1965                 int i;
1966                 for (i = 0; ip6t_proc_entry[i].name; i++)
1967                         proc_net_remove(ip6t_proc_entry[i].name);
1968         }
1969 #endif
1970 }
1971
1972 EXPORT_SYMBOL(ip6t_register_table);
1973 EXPORT_SYMBOL(ip6t_unregister_table);
1974 EXPORT_SYMBOL(ip6t_do_table);
1975 EXPORT_SYMBOL(ip6t_find_target_lock);
1976 EXPORT_SYMBOL(ip6t_register_match);
1977 EXPORT_SYMBOL(ip6t_unregister_match);
1978 EXPORT_SYMBOL(ip6t_register_target);
1979 EXPORT_SYMBOL(ip6t_unregister_target);
1980 EXPORT_SYMBOL(ip6t_ext_hdr);
1981
1982 module_init(init);
1983 module_exit(fini);