kernel.org linux-2.6.10
[linux-2.6.git] / net / ipv6 / netfilter / ip6_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
15  *      - new extension header parser code
16  */
17 #include <linux/config.h>
18 #include <linux/skbuff.h>
19 #include <linux/kmod.h>
20 #include <linux/vmalloc.h>
21 #include <linux/netdevice.h>
22 #include <linux/module.h>
23 #include <linux/tcp.h>
24 #include <linux/udp.h>
25 #include <linux/icmpv6.h>
26 #include <net/ip.h>
27 #include <net/ipv6.h>
28 #include <asm/uaccess.h>
29 #include <asm/semaphore.h>
30 #include <linux/proc_fs.h>
31
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv6 packet filter");
37
38 #define IPV6_HDR_LEN    (sizeof(struct ipv6hdr))
39 #define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
40
41 /*#define DEBUG_IP_FIREWALL*/
42 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
43 /*#define DEBUG_IP_FIREWALL_USER*/
44
45 #ifdef DEBUG_IP_FIREWALL
46 #define dprintf(format, args...)  printk(format , ## args)
47 #else
48 #define dprintf(format, args...)
49 #endif
50
51 #ifdef DEBUG_IP_FIREWALL_USER
52 #define duprintf(format, args...) printk(format , ## args)
53 #else
54 #define duprintf(format, args...)
55 #endif
56
57 #ifdef CONFIG_NETFILTER_DEBUG
58 #define IP_NF_ASSERT(x)                                         \
59 do {                                                            \
60         if (!(x))                                               \
61                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
62                        __FUNCTION__, __FILE__, __LINE__);       \
63 } while(0)
64 #else
65 #define IP_NF_ASSERT(x)
66 #endif
67 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
68
69 static DECLARE_MUTEX(ip6t_mutex);
70
71 /* Must have mutex */
72 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
73 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
74 #include <linux/netfilter_ipv4/lockhelp.h>
75 #include <linux/netfilter_ipv4/listhelp.h>
76
77 #if 0
78 /* All the better to debug you with... */
79 #define static
80 #define inline
81 #endif
82
83 /* Locking is simple: we assume at worst case there will be one packet
84    in user context and one from bottom halves (or soft irq if Alexey's
85    softnet patch was applied).
86
87    We keep a set of rules for each CPU, so we can avoid write-locking
88    them; doing a readlock_bh() stops packets coming through if we're
89    in user context.
90
91    To be cache friendly on SMP, we arrange them like so:
92    [ n-entries ]
93    ... cache-align padding ...
94    [ n-entries ]
95
96    Hence the start of any table is given by get_table() below.  */
97
98 /* The table itself */
99 struct ip6t_table_info
100 {
101         /* Size per table */
102         unsigned int size;
103         /* Number of entries: FIXME. --RR */
104         unsigned int number;
105         /* Initial number of entries. Needed for module usage count */
106         unsigned int initial_entries;
107
108         /* Entry points and underflows */
109         unsigned int hook_entry[NF_IP6_NUMHOOKS];
110         unsigned int underflow[NF_IP6_NUMHOOKS];
111
112         /* ip6t_entry tables: one per CPU */
113         char entries[0] ____cacheline_aligned;
114 };
115
116 static LIST_HEAD(ip6t_target);
117 static LIST_HEAD(ip6t_match);
118 static LIST_HEAD(ip6t_tables);
119 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
120
121 #ifdef CONFIG_SMP
122 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
123 #else
124 #define TABLE_OFFSET(t,p) 0
125 #endif
126
127 #if 0
128 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
129 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
130 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
131 #endif
132
133 static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
134                               struct in6_addr addr2)
135 {
136         int i;
137         for( i = 0; i < 16; i++){
138                 if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
139                    (addr2.s6_addr[i] & mask.s6_addr[i]))
140                         return 1;
141         }
142         return 0;
143 }
144
145 /* Check for an extension */
146 int 
147 ip6t_ext_hdr(u8 nexthdr)
148 {
149         return ( (nexthdr == IPPROTO_HOPOPTS)   ||
150                  (nexthdr == IPPROTO_ROUTING)   ||
151                  (nexthdr == IPPROTO_FRAGMENT)  ||
152                  (nexthdr == IPPROTO_ESP)       ||
153                  (nexthdr == IPPROTO_AH)        ||
154                  (nexthdr == IPPROTO_NONE)      ||
155                  (nexthdr == IPPROTO_DSTOPTS) );
156 }
157
158 /* Returns whether matches rule or not. */
159 static inline int
160 ip6_packet_match(const struct sk_buff *skb,
161                  const char *indev,
162                  const char *outdev,
163                  const struct ip6t_ip6 *ip6info,
164                  unsigned int *protoff,
165                  int *fragoff)
166 {
167         size_t i;
168         unsigned long ret;
169         const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
170
171 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
172
173         if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
174                   IP6T_INV_SRCIP)
175             || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
176                      IP6T_INV_DSTIP)) {
177                 dprintf("Source or dest mismatch.\n");
178 /*
179                 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
180                         ipinfo->smsk.s_addr, ipinfo->src.s_addr,
181                         ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
182                 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
183                         ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
184                         ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
185                 return 0;
186         }
187
188         /* Look for ifname matches; this should unroll nicely. */
189         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
190                 ret |= (((const unsigned long *)indev)[i]
191                         ^ ((const unsigned long *)ip6info->iniface)[i])
192                         & ((const unsigned long *)ip6info->iniface_mask)[i];
193         }
194
195         if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
196                 dprintf("VIA in mismatch (%s vs %s).%s\n",
197                         indev, ip6info->iniface,
198                         ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
199                 return 0;
200         }
201
202         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
203                 ret |= (((const unsigned long *)outdev)[i]
204                         ^ ((const unsigned long *)ip6info->outiface)[i])
205                         & ((const unsigned long *)ip6info->outiface_mask)[i];
206         }
207
208         if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
209                 dprintf("VIA out mismatch (%s vs %s).%s\n",
210                         outdev, ip6info->outiface,
211                         ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
212                 return 0;
213         }
214
215 /* ... might want to do something with class and flowlabel here ... */
216
217         /* look for the desired protocol header */
218         if((ip6info->flags & IP6T_F_PROTO)) {
219                 u_int8_t currenthdr = ipv6->nexthdr;
220                 struct ipv6_opt_hdr _hdr, *hp;
221                 u_int16_t ptr;          /* Header offset in skb */
222                 u_int16_t hdrlen;       /* Header */
223                 u_int16_t _fragoff = 0, *fp = NULL;
224
225                 ptr = IPV6_HDR_LEN;
226
227                 while (ip6t_ext_hdr(currenthdr)) {
228                         /* Is there enough space for the next ext header? */
229                         if (skb->len - ptr < IPV6_OPTHDR_LEN)
230                                 return 0;
231
232                         /* NONE or ESP: there isn't protocol part */
233                         /* If we want to count these packets in '-p all',
234                          * we will change the return 0 to 1*/
235                         if ((currenthdr == IPPROTO_NONE) || 
236                                 (currenthdr == IPPROTO_ESP))
237                                 return 0;
238
239                         hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
240                         BUG_ON(hp == NULL);
241
242                         /* Size calculation */
243                         if (currenthdr == IPPROTO_FRAGMENT) {
244                                 fp = skb_header_pointer(skb,
245                                                    ptr+offsetof(struct frag_hdr,
246                                                                 frag_off),
247                                                    sizeof(_fragoff),
248                                                    &_fragoff);
249                                 if (fp == NULL)
250                                         return 0;
251
252                                 _fragoff = ntohs(*fp) & ~0x7;
253                                 hdrlen = 8;
254                         } else if (currenthdr == IPPROTO_AH)
255                                 hdrlen = (hp->hdrlen+2)<<2;
256                         else
257                                 hdrlen = ipv6_optlen(hp);
258
259                         currenthdr = hp->nexthdr;
260                         ptr += hdrlen;
261                         /* ptr is too large */
262                         if ( ptr > skb->len ) 
263                                 return 0;
264                         if (_fragoff) {
265                                 if (ip6t_ext_hdr(currenthdr))
266                                         return 0;
267                                 break;
268                         }
269                 }
270
271                 *protoff = ptr;
272                 *fragoff = _fragoff;
273
274                 /* currenthdr contains the protocol header */
275
276                 dprintf("Packet protocol %hi ?= %s%hi.\n",
277                                 currenthdr, 
278                                 ip6info->invflags & IP6T_INV_PROTO ? "!":"",
279                                 ip6info->proto);
280
281                 if (ip6info->proto == currenthdr) {
282                         if(ip6info->invflags & IP6T_INV_PROTO) {
283                                 return 0;
284                         }
285                         return 1;
286                 }
287
288                 /* We need match for the '-p all', too! */
289                 if ((ip6info->proto != 0) &&
290                         !(ip6info->invflags & IP6T_INV_PROTO))
291                         return 0;
292         }
293         return 1;
294 }
295
296 /* should be ip6 safe */
297 static inline int 
298 ip6_checkentry(const struct ip6t_ip6 *ipv6)
299 {
300         if (ipv6->flags & ~IP6T_F_MASK) {
301                 duprintf("Unknown flag bits set: %08X\n",
302                          ipv6->flags & ~IP6T_F_MASK);
303                 return 0;
304         }
305         if (ipv6->invflags & ~IP6T_INV_MASK) {
306                 duprintf("Unknown invflag bits set: %08X\n",
307                          ipv6->invflags & ~IP6T_INV_MASK);
308                 return 0;
309         }
310         return 1;
311 }
312
313 static unsigned int
314 ip6t_error(struct sk_buff **pskb,
315           const struct net_device *in,
316           const struct net_device *out,
317           unsigned int hooknum,
318           const void *targinfo,
319           void *userinfo)
320 {
321         if (net_ratelimit())
322                 printk("ip6_tables: error: `%s'\n", (char *)targinfo);
323
324         return NF_DROP;
325 }
326
327 static inline
328 int do_match(struct ip6t_entry_match *m,
329              const struct sk_buff *skb,
330              const struct net_device *in,
331              const struct net_device *out,
332              int offset,
333              unsigned int protoff,
334              int *hotdrop)
335 {
336         /* Stop iteration if it doesn't match */
337         if (!m->u.kernel.match->match(skb, in, out, m->data,
338                                       offset, protoff, hotdrop))
339                 return 1;
340         else
341                 return 0;
342 }
343
344 static inline struct ip6t_entry *
345 get_entry(void *base, unsigned int offset)
346 {
347         return (struct ip6t_entry *)(base + offset);
348 }
349
350 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
351 unsigned int
352 ip6t_do_table(struct sk_buff **pskb,
353               unsigned int hook,
354               const struct net_device *in,
355               const struct net_device *out,
356               struct ip6t_table *table,
357               void *userdata)
358 {
359         static const char nulldevname[IFNAMSIZ];
360         int offset = 0;
361         unsigned int protoff = 0;
362         int hotdrop = 0;
363         /* Initializing verdict to NF_DROP keeps gcc happy. */
364         unsigned int verdict = NF_DROP;
365         const char *indev, *outdev;
366         void *table_base;
367         struct ip6t_entry *e, *back;
368
369         /* FIXME: Push down to extensions --RR */
370         if (skb_is_nonlinear(*pskb) && skb_linearize(*pskb, GFP_ATOMIC) != 0)
371                 return NF_DROP;
372
373         /* Initialization */
374         indev = in ? in->name : nulldevname;
375         outdev = out ? out->name : nulldevname;
376
377         /* We handle fragments by dealing with the first fragment as
378          * if it was a normal packet.  All other fragments are treated
379          * normally, except that they will NEVER match rules that ask
380          * things we don't know, ie. tcp syn flag or ports).  If the
381          * rule is also a fragment-specific rule, non-fragments won't
382          * match it. */
383
384         read_lock_bh(&table->lock);
385         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
386         table_base = (void *)table->private->entries
387                 + TABLE_OFFSET(table->private, smp_processor_id());
388         e = get_entry(table_base, table->private->hook_entry[hook]);
389
390 #ifdef CONFIG_NETFILTER_DEBUG
391         /* Check noone else using our table */
392         if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
393             && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
394                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
395                        smp_processor_id(),
396                        table->name,
397                        &((struct ip6t_entry *)table_base)->comefrom,
398                        ((struct ip6t_entry *)table_base)->comefrom);
399         }
400         ((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
401 #endif
402
403         /* For return from builtin chain */
404         back = get_entry(table_base, table->private->underflow[hook]);
405
406         do {
407                 IP_NF_ASSERT(e);
408                 IP_NF_ASSERT(back);
409                 (*pskb)->nfcache |= e->nfcache;
410                 if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
411                         &protoff, &offset)) {
412                         struct ip6t_entry_target *t;
413
414                         if (IP6T_MATCH_ITERATE(e, do_match,
415                                                *pskb, in, out,
416                                                offset, protoff, &hotdrop) != 0)
417                                 goto no_match;
418
419                         ADD_COUNTER(e->counters,
420                                     ntohs((*pskb)->nh.ipv6h->payload_len)
421                                     + IPV6_HDR_LEN,
422                                     1);
423
424                         t = ip6t_get_target(e);
425                         IP_NF_ASSERT(t->u.kernel.target);
426                         /* Standard target? */
427                         if (!t->u.kernel.target->target) {
428                                 int v;
429
430                                 v = ((struct ip6t_standard_target *)t)->verdict;
431                                 if (v < 0) {
432                                         /* Pop from stack? */
433                                         if (v != IP6T_RETURN) {
434                                                 verdict = (unsigned)(-v) - 1;
435                                                 break;
436                                         }
437                                         e = back;
438                                         back = get_entry(table_base,
439                                                          back->comefrom);
440                                         continue;
441                                 }
442                                 if (table_base + v
443                                     != (void *)e + e->next_offset) {
444                                         /* Save old back ptr in next entry */
445                                         struct ip6t_entry *next
446                                                 = (void *)e + e->next_offset;
447                                         next->comefrom
448                                                 = (void *)back - table_base;
449                                         /* set back pointer to next entry */
450                                         back = next;
451                                 }
452
453                                 e = get_entry(table_base, v);
454                         } else {
455                                 /* Targets which reenter must return
456                                    abs. verdicts */
457 #ifdef CONFIG_NETFILTER_DEBUG
458                                 ((struct ip6t_entry *)table_base)->comefrom
459                                         = 0xeeeeeeec;
460 #endif
461                                 verdict = t->u.kernel.target->target(pskb,
462                                                                      in, out,
463                                                                      hook,
464                                                                      t->data,
465                                                                      userdata);
466
467 #ifdef CONFIG_NETFILTER_DEBUG
468                                 if (((struct ip6t_entry *)table_base)->comefrom
469                                     != 0xeeeeeeec
470                                     && verdict == IP6T_CONTINUE) {
471                                         printk("Target %s reentered!\n",
472                                                t->u.kernel.target->name);
473                                         verdict = NF_DROP;
474                                 }
475                                 ((struct ip6t_entry *)table_base)->comefrom
476                                         = 0x57acc001;
477 #endif
478                                 if (verdict == IP6T_CONTINUE)
479                                         e = (void *)e + e->next_offset;
480                                 else
481                                         /* Verdict */
482                                         break;
483                         }
484                 } else {
485
486                 no_match:
487                         e = (void *)e + e->next_offset;
488                 }
489         } while (!hotdrop);
490
491 #ifdef CONFIG_NETFILTER_DEBUG
492         ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
493 #endif
494         read_unlock_bh(&table->lock);
495
496 #ifdef DEBUG_ALLOW_ALL
497         return NF_ACCEPT;
498 #else
499         if (hotdrop)
500                 return NF_DROP;
501         else return verdict;
502 #endif
503 }
504
505 /* If it succeeds, returns element and locks mutex */
506 static inline void *
507 find_inlist_lock_noload(struct list_head *head,
508                         const char *name,
509                         int *error,
510                         struct semaphore *mutex)
511 {
512         void *ret;
513
514 #if 1
515         duprintf("find_inlist: searching for `%s' in %s.\n",
516                  name, head == &ip6t_target ? "ip6t_target"
517                  : head == &ip6t_match ? "ip6t_match"
518                  : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
519 #endif
520
521         *error = down_interruptible(mutex);
522         if (*error != 0)
523                 return NULL;
524
525         ret = list_named_find(head, name);
526         if (!ret) {
527                 *error = -ENOENT;
528                 up(mutex);
529         }
530         return ret;
531 }
532
533 #ifndef CONFIG_KMOD
534 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
535 #else
536 static void *
537 find_inlist_lock(struct list_head *head,
538                  const char *name,
539                  const char *prefix,
540                  int *error,
541                  struct semaphore *mutex)
542 {
543         void *ret;
544
545         ret = find_inlist_lock_noload(head, name, error, mutex);
546         if (!ret) {
547                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
548                 request_module("%s%s", prefix, name);
549                 ret = find_inlist_lock_noload(head, name, error, mutex);
550         }
551
552         return ret;
553 }
554 #endif
555
556 static inline struct ip6t_table *
557 ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
558 {
559         return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex);
560 }
561
562 static inline struct ip6t_match *
563 find_match_lock(const char *name, int *error, struct semaphore *mutex)
564 {
565         return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex);
566 }
567
568 static struct ip6t_target *
569 ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex)
570 {
571         return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex);
572 }
573
574 /* All zeroes == unconditional rule. */
575 static inline int
576 unconditional(const struct ip6t_ip6 *ipv6)
577 {
578         unsigned int i;
579
580         for (i = 0; i < sizeof(*ipv6); i++)
581                 if (((char *)ipv6)[i])
582                         break;
583
584         return (i == sizeof(*ipv6));
585 }
586
587 /* Figures out from what hook each rule can be called: returns 0 if
588    there are loops.  Puts hook bitmask in comefrom. */
589 static int
590 mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
591 {
592         unsigned int hook;
593
594         /* No recursion; use packet counter to save back ptrs (reset
595            to 0 as we leave), and comefrom to save source hook bitmask */
596         for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
597                 unsigned int pos = newinfo->hook_entry[hook];
598                 struct ip6t_entry *e
599                         = (struct ip6t_entry *)(newinfo->entries + pos);
600
601                 if (!(valid_hooks & (1 << hook)))
602                         continue;
603
604                 /* Set initial back pointer. */
605                 e->counters.pcnt = pos;
606
607                 for (;;) {
608                         struct ip6t_standard_target *t
609                                 = (void *)ip6t_get_target(e);
610
611                         if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
612                                 printk("iptables: loop hook %u pos %u %08X.\n",
613                                        hook, pos, e->comefrom);
614                                 return 0;
615                         }
616                         e->comefrom
617                                 |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
618
619                         /* Unconditional return/END. */
620                         if (e->target_offset == sizeof(struct ip6t_entry)
621                             && (strcmp(t->target.u.user.name,
622                                        IP6T_STANDARD_TARGET) == 0)
623                             && t->verdict < 0
624                             && unconditional(&e->ipv6)) {
625                                 unsigned int oldpos, size;
626
627                                 /* Return: backtrack through the last
628                                    big jump. */
629                                 do {
630                                         e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
631 #ifdef DEBUG_IP_FIREWALL_USER
632                                         if (e->comefrom
633                                             & (1 << NF_IP6_NUMHOOKS)) {
634                                                 duprintf("Back unset "
635                                                          "on hook %u "
636                                                          "rule %u\n",
637                                                          hook, pos);
638                                         }
639 #endif
640                                         oldpos = pos;
641                                         pos = e->counters.pcnt;
642                                         e->counters.pcnt = 0;
643
644                                         /* We're at the start. */
645                                         if (pos == oldpos)
646                                                 goto next;
647
648                                         e = (struct ip6t_entry *)
649                                                 (newinfo->entries + pos);
650                                 } while (oldpos == pos + e->next_offset);
651
652                                 /* Move along one */
653                                 size = e->next_offset;
654                                 e = (struct ip6t_entry *)
655                                         (newinfo->entries + pos + size);
656                                 e->counters.pcnt = pos;
657                                 pos += size;
658                         } else {
659                                 int newpos = t->verdict;
660
661                                 if (strcmp(t->target.u.user.name,
662                                            IP6T_STANDARD_TARGET) == 0
663                                     && newpos >= 0) {
664                                         /* This a jump; chase it. */
665                                         duprintf("Jump rule %u -> %u\n",
666                                                  pos, newpos);
667                                 } else {
668                                         /* ... this is a fallthru */
669                                         newpos = pos + e->next_offset;
670                                 }
671                                 e = (struct ip6t_entry *)
672                                         (newinfo->entries + newpos);
673                                 e->counters.pcnt = pos;
674                                 pos = newpos;
675                         }
676                 }
677                 next:
678                 duprintf("Finished chain %u\n", hook);
679         }
680         return 1;
681 }
682
683 static inline int
684 cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
685 {
686         if (i && (*i)-- == 0)
687                 return 1;
688
689         if (m->u.kernel.match->destroy)
690                 m->u.kernel.match->destroy(m->data,
691                                            m->u.match_size - sizeof(*m));
692         module_put(m->u.kernel.match->me);
693         return 0;
694 }
695
696 static inline int
697 standard_check(const struct ip6t_entry_target *t,
698                unsigned int max_offset)
699 {
700         struct ip6t_standard_target *targ = (void *)t;
701
702         /* Check standard info. */
703         if (t->u.target_size
704             != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
705                 duprintf("standard_check: target size %u != %u\n",
706                          t->u.target_size,
707                          IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
708                 return 0;
709         }
710
711         if (targ->verdict >= 0
712             && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
713                 duprintf("ip6t_standard_check: bad verdict (%i)\n",
714                          targ->verdict);
715                 return 0;
716         }
717
718         if (targ->verdict < -NF_MAX_VERDICT - 1) {
719                 duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
720                          targ->verdict);
721                 return 0;
722         }
723         return 1;
724 }
725
726 static inline int
727 check_match(struct ip6t_entry_match *m,
728             const char *name,
729             const struct ip6t_ip6 *ipv6,
730             unsigned int hookmask,
731             unsigned int *i)
732 {
733         int ret;
734         struct ip6t_match *match;
735
736         match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex);
737         if (!match) {
738           //            duprintf("check_match: `%s' not found\n", m->u.name);
739                 return ret;
740         }
741         if (!try_module_get(match->me)) {
742                 up(&ip6t_mutex);
743                 return -ENOENT;
744         }
745         m->u.kernel.match = match;
746         up(&ip6t_mutex);
747
748         if (m->u.kernel.match->checkentry
749             && !m->u.kernel.match->checkentry(name, ipv6, m->data,
750                                               m->u.match_size - sizeof(*m),
751                                               hookmask)) {
752                 module_put(m->u.kernel.match->me);
753                 duprintf("ip_tables: check failed for `%s'.\n",
754                          m->u.kernel.match->name);
755                 return -EINVAL;
756         }
757
758         (*i)++;
759         return 0;
760 }
761
762 static struct ip6t_target ip6t_standard_target;
763
764 static inline int
765 check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
766             unsigned int *i)
767 {
768         struct ip6t_entry_target *t;
769         struct ip6t_target *target;
770         int ret;
771         unsigned int j;
772
773         if (!ip6_checkentry(&e->ipv6)) {
774                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
775                 return -EINVAL;
776         }
777
778         j = 0;
779         ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
780         if (ret != 0)
781                 goto cleanup_matches;
782
783         t = ip6t_get_target(e);
784         target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
785         if (!target) {
786                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
787                 goto cleanup_matches;
788         }
789         if (!try_module_get(target->me)) {
790                 up(&ip6t_mutex);
791                 ret = -ENOENT;
792                 goto cleanup_matches;
793         }
794         t->u.kernel.target = target;
795         up(&ip6t_mutex);
796         if (!t->u.kernel.target) {
797                 ret = -EBUSY;
798                 goto cleanup_matches;
799         }
800         if (t->u.kernel.target == &ip6t_standard_target) {
801                 if (!standard_check(t, size)) {
802                         ret = -EINVAL;
803                         goto cleanup_matches;
804                 }
805         } else if (t->u.kernel.target->checkentry
806                    && !t->u.kernel.target->checkentry(name, e, t->data,
807                                                       t->u.target_size
808                                                       - sizeof(*t),
809                                                       e->comefrom)) {
810                 module_put(t->u.kernel.target->me);
811                 duprintf("ip_tables: check failed for `%s'.\n",
812                          t->u.kernel.target->name);
813                 ret = -EINVAL;
814                 goto cleanup_matches;
815         }
816
817         (*i)++;
818         return 0;
819
820  cleanup_matches:
821         IP6T_MATCH_ITERATE(e, cleanup_match, &j);
822         return ret;
823 }
824
825 static inline int
826 check_entry_size_and_hooks(struct ip6t_entry *e,
827                            struct ip6t_table_info *newinfo,
828                            unsigned char *base,
829                            unsigned char *limit,
830                            const unsigned int *hook_entries,
831                            const unsigned int *underflows,
832                            unsigned int *i)
833 {
834         unsigned int h;
835
836         if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
837             || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
838                 duprintf("Bad offset %p\n", e);
839                 return -EINVAL;
840         }
841
842         if (e->next_offset
843             < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
844                 duprintf("checking: element %p size %u\n",
845                          e, e->next_offset);
846                 return -EINVAL;
847         }
848
849         /* Check hooks & underflows */
850         for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
851                 if ((unsigned char *)e - base == hook_entries[h])
852                         newinfo->hook_entry[h] = hook_entries[h];
853                 if ((unsigned char *)e - base == underflows[h])
854                         newinfo->underflow[h] = underflows[h];
855         }
856
857         /* FIXME: underflows must be unconditional, standard verdicts
858            < 0 (not IP6T_RETURN). --RR */
859
860         /* Clear counters and comefrom */
861         e->counters = ((struct ip6t_counters) { 0, 0 });
862         e->comefrom = 0;
863
864         (*i)++;
865         return 0;
866 }
867
868 static inline int
869 cleanup_entry(struct ip6t_entry *e, unsigned int *i)
870 {
871         struct ip6t_entry_target *t;
872
873         if (i && (*i)-- == 0)
874                 return 1;
875
876         /* Cleanup all matches */
877         IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
878         t = ip6t_get_target(e);
879         if (t->u.kernel.target->destroy)
880                 t->u.kernel.target->destroy(t->data,
881                                             t->u.target_size - sizeof(*t));
882         module_put(t->u.kernel.target->me);
883         return 0;
884 }
885
886 /* Checks and translates the user-supplied table segment (held in
887    newinfo) */
888 static int
889 translate_table(const char *name,
890                 unsigned int valid_hooks,
891                 struct ip6t_table_info *newinfo,
892                 unsigned int size,
893                 unsigned int number,
894                 const unsigned int *hook_entries,
895                 const unsigned int *underflows)
896 {
897         unsigned int i;
898         int ret;
899
900         newinfo->size = size;
901         newinfo->number = number;
902
903         /* Init all hooks to impossible value. */
904         for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
905                 newinfo->hook_entry[i] = 0xFFFFFFFF;
906                 newinfo->underflow[i] = 0xFFFFFFFF;
907         }
908
909         duprintf("translate_table: size %u\n", newinfo->size);
910         i = 0;
911         /* Walk through entries, checking offsets. */
912         ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
913                                 check_entry_size_and_hooks,
914                                 newinfo,
915                                 newinfo->entries,
916                                 newinfo->entries + size,
917                                 hook_entries, underflows, &i);
918         if (ret != 0)
919                 return ret;
920
921         if (i != number) {
922                 duprintf("translate_table: %u not %u entries\n",
923                          i, number);
924                 return -EINVAL;
925         }
926
927         /* Check hooks all assigned */
928         for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
929                 /* Only hooks which are valid */
930                 if (!(valid_hooks & (1 << i)))
931                         continue;
932                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
933                         duprintf("Invalid hook entry %u %u\n",
934                                  i, hook_entries[i]);
935                         return -EINVAL;
936                 }
937                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
938                         duprintf("Invalid underflow %u %u\n",
939                                  i, underflows[i]);
940                         return -EINVAL;
941                 }
942         }
943
944         if (!mark_source_chains(newinfo, valid_hooks))
945                 return -ELOOP;
946
947         /* Finally, each sanity check must pass */
948         i = 0;
949         ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
950                                 check_entry, name, size, &i);
951
952         if (ret != 0) {
953                 IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
954                                   cleanup_entry, &i);
955                 return ret;
956         }
957
958         /* And one copy for every other CPU */
959         for (i = 1; i < NR_CPUS; i++) {
960                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
961                        newinfo->entries,
962                        SMP_ALIGN(newinfo->size));
963         }
964
965         return ret;
966 }
967
968 static struct ip6t_table_info *
969 replace_table(struct ip6t_table *table,
970               unsigned int num_counters,
971               struct ip6t_table_info *newinfo,
972               int *error)
973 {
974         struct ip6t_table_info *oldinfo;
975
976 #ifdef CONFIG_NETFILTER_DEBUG
977         {
978                 struct ip6t_entry *table_base;
979                 unsigned int i;
980
981                 for (i = 0; i < NR_CPUS; i++) {
982                         table_base =
983                                 (void *)newinfo->entries
984                                 + TABLE_OFFSET(newinfo, i);
985
986                         table_base->comefrom = 0xdead57ac;
987                 }
988         }
989 #endif
990
991         /* Do the substitution. */
992         write_lock_bh(&table->lock);
993         /* Check inside lock: is the old number correct? */
994         if (num_counters != table->private->number) {
995                 duprintf("num_counters != table->private->number (%u/%u)\n",
996                          num_counters, table->private->number);
997                 write_unlock_bh(&table->lock);
998                 *error = -EAGAIN;
999                 return NULL;
1000         }
1001         oldinfo = table->private;
1002         table->private = newinfo;
1003         newinfo->initial_entries = oldinfo->initial_entries;
1004         write_unlock_bh(&table->lock);
1005
1006         return oldinfo;
1007 }
1008
1009 /* Gets counters. */
1010 static inline int
1011 add_entry_to_counter(const struct ip6t_entry *e,
1012                      struct ip6t_counters total[],
1013                      unsigned int *i)
1014 {
1015         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
1016
1017         (*i)++;
1018         return 0;
1019 }
1020
1021 static void
1022 get_counters(const struct ip6t_table_info *t,
1023              struct ip6t_counters counters[])
1024 {
1025         unsigned int cpu;
1026         unsigned int i;
1027
1028         for (cpu = 0; cpu < NR_CPUS; cpu++) {
1029                 i = 0;
1030                 IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
1031                                   t->size,
1032                                   add_entry_to_counter,
1033                                   counters,
1034                                   &i);
1035         }
1036 }
1037
1038 static int
1039 copy_entries_to_user(unsigned int total_size,
1040                      struct ip6t_table *table,
1041                      void __user *userptr)
1042 {
1043         unsigned int off, num, countersize;
1044         struct ip6t_entry *e;
1045         struct ip6t_counters *counters;
1046         int ret = 0;
1047
1048         /* We need atomic snapshot of counters: rest doesn't change
1049            (other than comefrom, which userspace doesn't care
1050            about). */
1051         countersize = sizeof(struct ip6t_counters) * table->private->number;
1052         counters = vmalloc(countersize);
1053
1054         if (counters == NULL)
1055                 return -ENOMEM;
1056
1057         /* First, sum counters... */
1058         memset(counters, 0, countersize);
1059         write_lock_bh(&table->lock);
1060         get_counters(table->private, counters);
1061         write_unlock_bh(&table->lock);
1062
1063         /* ... then copy entire thing from CPU 0... */
1064         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1065                 ret = -EFAULT;
1066                 goto free_counters;
1067         }
1068
1069         /* FIXME: use iterator macros --RR */
1070         /* ... then go back and fix counters and names */
1071         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1072                 unsigned int i;
1073                 struct ip6t_entry_match *m;
1074                 struct ip6t_entry_target *t;
1075
1076                 e = (struct ip6t_entry *)(table->private->entries + off);
1077                 if (copy_to_user(userptr + off
1078                                  + offsetof(struct ip6t_entry, counters),
1079                                  &counters[num],
1080                                  sizeof(counters[num])) != 0) {
1081                         ret = -EFAULT;
1082                         goto free_counters;
1083                 }
1084
1085                 for (i = sizeof(struct ip6t_entry);
1086                      i < e->target_offset;
1087                      i += m->u.match_size) {
1088                         m = (void *)e + i;
1089
1090                         if (copy_to_user(userptr + off + i
1091                                          + offsetof(struct ip6t_entry_match,
1092                                                     u.user.name),
1093                                          m->u.kernel.match->name,
1094                                          strlen(m->u.kernel.match->name)+1)
1095                             != 0) {
1096                                 ret = -EFAULT;
1097                                 goto free_counters;
1098                         }
1099                 }
1100
1101                 t = ip6t_get_target(e);
1102                 if (copy_to_user(userptr + off + e->target_offset
1103                                  + offsetof(struct ip6t_entry_target,
1104                                             u.user.name),
1105                                  t->u.kernel.target->name,
1106                                  strlen(t->u.kernel.target->name)+1) != 0) {
1107                         ret = -EFAULT;
1108                         goto free_counters;
1109                 }
1110         }
1111
1112  free_counters:
1113         vfree(counters);
1114         return ret;
1115 }
1116
1117 static int
1118 get_entries(const struct ip6t_get_entries *entries,
1119             struct ip6t_get_entries __user *uptr)
1120 {
1121         int ret;
1122         struct ip6t_table *t;
1123
1124         t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex);
1125         if (t) {
1126                 duprintf("t->private->number = %u\n",
1127                          t->private->number);
1128                 if (entries->size == t->private->size)
1129                         ret = copy_entries_to_user(t->private->size,
1130                                                    t, uptr->entrytable);
1131                 else {
1132                         duprintf("get_entries: I've got %u not %u!\n",
1133                                  t->private->size,
1134                                  entries->size);
1135                         ret = -EINVAL;
1136                 }
1137                 up(&ip6t_mutex);
1138         } else
1139                 duprintf("get_entries: Can't find %s!\n",
1140                          entries->name);
1141
1142         return ret;
1143 }
1144
1145 static int
1146 do_replace(void __user *user, unsigned int len)
1147 {
1148         int ret;
1149         struct ip6t_replace tmp;
1150         struct ip6t_table *t;
1151         struct ip6t_table_info *newinfo, *oldinfo;
1152         struct ip6t_counters *counters;
1153
1154         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1155                 return -EFAULT;
1156
1157         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1158         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1159                 return -ENOMEM;
1160
1161         newinfo = vmalloc(sizeof(struct ip6t_table_info)
1162                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1163         if (!newinfo)
1164                 return -ENOMEM;
1165
1166         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1167                            tmp.size) != 0) {
1168                 ret = -EFAULT;
1169                 goto free_newinfo;
1170         }
1171
1172         counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
1173         if (!counters) {
1174                 ret = -ENOMEM;
1175                 goto free_newinfo;
1176         }
1177         memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
1178
1179         ret = translate_table(tmp.name, tmp.valid_hooks,
1180                               newinfo, tmp.size, tmp.num_entries,
1181                               tmp.hook_entry, tmp.underflow);
1182         if (ret != 0)
1183                 goto free_newinfo_counters;
1184
1185         duprintf("ip_tables: Translated table\n");
1186
1187         t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1188         if (!t)
1189                 goto free_newinfo_counters_untrans;
1190
1191         /* You lied! */
1192         if (tmp.valid_hooks != t->valid_hooks) {
1193                 duprintf("Valid hook crap: %08X vs %08X\n",
1194                          tmp.valid_hooks, t->valid_hooks);
1195                 ret = -EINVAL;
1196                 goto free_newinfo_counters_untrans_unlock;
1197         }
1198
1199         /* Get a reference in advance, we're not allowed fail later */
1200         if (!try_module_get(t->me)) {
1201                 ret = -EBUSY;
1202                 goto free_newinfo_counters_untrans_unlock;
1203         }
1204
1205         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1206         if (!oldinfo)
1207                 goto put_module;
1208
1209         /* Update module usage count based on number of rules */
1210         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1211                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1212         if ((oldinfo->number > oldinfo->initial_entries) || 
1213             (newinfo->number <= oldinfo->initial_entries)) 
1214                 module_put(t->me);
1215         if ((oldinfo->number > oldinfo->initial_entries) &&
1216             (newinfo->number <= oldinfo->initial_entries))
1217                 module_put(t->me);
1218
1219         /* Get the old counters. */
1220         get_counters(oldinfo, counters);
1221         /* Decrease module usage counts and free resource */
1222         IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1223         vfree(oldinfo);
1224         /* Silent error: too late now. */
1225         copy_to_user(tmp.counters, counters,
1226                      sizeof(struct ip6t_counters) * tmp.num_counters);
1227         vfree(counters);
1228         up(&ip6t_mutex);
1229         return 0;
1230
1231  put_module:
1232         module_put(t->me);
1233  free_newinfo_counters_untrans_unlock:
1234         up(&ip6t_mutex);
1235  free_newinfo_counters_untrans:
1236         IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1237  free_newinfo_counters:
1238         vfree(counters);
1239  free_newinfo:
1240         vfree(newinfo);
1241         return ret;
1242 }
1243
1244 /* We're lazy, and add to the first CPU; overflow works its fey magic
1245  * and everything is OK. */
1246 static inline int
1247 add_counter_to_entry(struct ip6t_entry *e,
1248                      const struct ip6t_counters addme[],
1249                      unsigned int *i)
1250 {
1251 #if 0
1252         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1253                  *i,
1254                  (long unsigned int)e->counters.pcnt,
1255                  (long unsigned int)e->counters.bcnt,
1256                  (long unsigned int)addme[*i].pcnt,
1257                  (long unsigned int)addme[*i].bcnt);
1258 #endif
1259
1260         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1261
1262         (*i)++;
1263         return 0;
1264 }
1265
1266 static int
1267 do_add_counters(void __user *user, unsigned int len)
1268 {
1269         unsigned int i;
1270         struct ip6t_counters_info tmp, *paddc;
1271         struct ip6t_table *t;
1272         int ret;
1273
1274         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1275                 return -EFAULT;
1276
1277         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
1278                 return -EINVAL;
1279
1280         paddc = vmalloc(len);
1281         if (!paddc)
1282                 return -ENOMEM;
1283
1284         if (copy_from_user(paddc, user, len) != 0) {
1285                 ret = -EFAULT;
1286                 goto free;
1287         }
1288
1289         t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
1290         if (!t)
1291                 goto free;
1292
1293         write_lock_bh(&t->lock);
1294         if (t->private->number != paddc->num_counters) {
1295                 ret = -EINVAL;
1296                 goto unlock_up_free;
1297         }
1298
1299         i = 0;
1300         IP6T_ENTRY_ITERATE(t->private->entries,
1301                           t->private->size,
1302                           add_counter_to_entry,
1303                           paddc->counters,
1304                           &i);
1305  unlock_up_free:
1306         write_unlock_bh(&t->lock);
1307         up(&ip6t_mutex);
1308  free:
1309         vfree(paddc);
1310
1311         return ret;
1312 }
1313
1314 static int
1315 do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1316 {
1317         int ret;
1318
1319         if (!capable(CAP_NET_ADMIN))
1320                 return -EPERM;
1321
1322         switch (cmd) {
1323         case IP6T_SO_SET_REPLACE:
1324                 ret = do_replace(user, len);
1325                 break;
1326
1327         case IP6T_SO_SET_ADD_COUNTERS:
1328                 ret = do_add_counters(user, len);
1329                 break;
1330
1331         default:
1332                 duprintf("do_ip6t_set_ctl:  unknown request %i\n", cmd);
1333                 ret = -EINVAL;
1334         }
1335
1336         return ret;
1337 }
1338
1339 static int
1340 do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1341 {
1342         int ret;
1343
1344         if (!capable(CAP_NET_ADMIN))
1345                 return -EPERM;
1346
1347         switch (cmd) {
1348         case IP6T_SO_GET_INFO: {
1349                 char name[IP6T_TABLE_MAXNAMELEN];
1350                 struct ip6t_table *t;
1351
1352                 if (*len != sizeof(struct ip6t_getinfo)) {
1353                         duprintf("length %u != %u\n", *len,
1354                                  sizeof(struct ip6t_getinfo));
1355                         ret = -EINVAL;
1356                         break;
1357                 }
1358
1359                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1360                         ret = -EFAULT;
1361                         break;
1362                 }
1363                 name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
1364                 t = ip6t_find_table_lock(name, &ret, &ip6t_mutex);
1365                 if (t) {
1366                         struct ip6t_getinfo info;
1367
1368                         info.valid_hooks = t->valid_hooks;
1369                         memcpy(info.hook_entry, t->private->hook_entry,
1370                                sizeof(info.hook_entry));
1371                         memcpy(info.underflow, t->private->underflow,
1372                                sizeof(info.underflow));
1373                         info.num_entries = t->private->number;
1374                         info.size = t->private->size;
1375                         strcpy(info.name, name);
1376
1377                         if (copy_to_user(user, &info, *len) != 0)
1378                                 ret = -EFAULT;
1379                         else
1380                                 ret = 0;
1381
1382                         up(&ip6t_mutex);
1383                 }
1384         }
1385         break;
1386
1387         case IP6T_SO_GET_ENTRIES: {
1388                 struct ip6t_get_entries get;
1389
1390                 if (*len < sizeof(get)) {
1391                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1392                         ret = -EINVAL;
1393                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1394                         ret = -EFAULT;
1395                 } else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
1396                         duprintf("get_entries: %u != %u\n", *len,
1397                                  sizeof(struct ip6t_get_entries) + get.size);
1398                         ret = -EINVAL;
1399                 } else
1400                         ret = get_entries(&get, user);
1401                 break;
1402         }
1403
1404         default:
1405                 duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
1406                 ret = -EINVAL;
1407         }
1408
1409         return ret;
1410 }
1411
1412 /* Registration hooks for targets. */
1413 int
1414 ip6t_register_target(struct ip6t_target *target)
1415 {
1416         int ret;
1417
1418         ret = down_interruptible(&ip6t_mutex);
1419         if (ret != 0)
1420                 return ret;
1421
1422         if (!list_named_insert(&ip6t_target, target)) {
1423                 duprintf("ip6t_register_target: `%s' already in list!\n",
1424                          target->name);
1425                 ret = -EINVAL;
1426         }
1427         up(&ip6t_mutex);
1428         return ret;
1429 }
1430
1431 void
1432 ip6t_unregister_target(struct ip6t_target *target)
1433 {
1434         down(&ip6t_mutex);
1435         LIST_DELETE(&ip6t_target, target);
1436         up(&ip6t_mutex);
1437 }
1438
1439 int
1440 ip6t_register_match(struct ip6t_match *match)
1441 {
1442         int ret;
1443
1444         ret = down_interruptible(&ip6t_mutex);
1445         if (ret != 0)
1446                 return ret;
1447
1448         if (!list_named_insert(&ip6t_match, match)) {
1449                 duprintf("ip6t_register_match: `%s' already in list!\n",
1450                          match->name);
1451                 ret = -EINVAL;
1452         }
1453         up(&ip6t_mutex);
1454
1455         return ret;
1456 }
1457
1458 void
1459 ip6t_unregister_match(struct ip6t_match *match)
1460 {
1461         down(&ip6t_mutex);
1462         LIST_DELETE(&ip6t_match, match);
1463         up(&ip6t_mutex);
1464 }
1465
1466 int ip6t_register_table(struct ip6t_table *table)
1467 {
1468         int ret;
1469         struct ip6t_table_info *newinfo;
1470         static struct ip6t_table_info bootstrap
1471                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1472
1473         newinfo = vmalloc(sizeof(struct ip6t_table_info)
1474                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1475         if (!newinfo)
1476                 return -ENOMEM;
1477
1478         memcpy(newinfo->entries, table->table->entries, table->table->size);
1479
1480         ret = translate_table(table->name, table->valid_hooks,
1481                               newinfo, table->table->size,
1482                               table->table->num_entries,
1483                               table->table->hook_entry,
1484                               table->table->underflow);
1485         if (ret != 0) {
1486                 vfree(newinfo);
1487                 return ret;
1488         }
1489
1490         ret = down_interruptible(&ip6t_mutex);
1491         if (ret != 0) {
1492                 vfree(newinfo);
1493                 return ret;
1494         }
1495
1496         /* Don't autoload: we'd eat our tail... */
1497         if (list_named_find(&ip6t_tables, table->name)) {
1498                 ret = -EEXIST;
1499                 goto free_unlock;
1500         }
1501
1502         /* Simplifies replace_table code. */
1503         table->private = &bootstrap;
1504         if (!replace_table(table, 0, newinfo, &ret))
1505                 goto free_unlock;
1506
1507         duprintf("table->private->number = %u\n",
1508                  table->private->number);
1509
1510         /* save number of initial entries */
1511         table->private->initial_entries = table->private->number;
1512
1513         rwlock_init(&table->lock);
1514         list_prepend(&ip6t_tables, table);
1515
1516  unlock:
1517         up(&ip6t_mutex);
1518         return ret;
1519
1520  free_unlock:
1521         vfree(newinfo);
1522         goto unlock;
1523 }
1524
1525 void ip6t_unregister_table(struct ip6t_table *table)
1526 {
1527         down(&ip6t_mutex);
1528         LIST_DELETE(&ip6t_tables, table);
1529         up(&ip6t_mutex);
1530
1531         /* Decrease module usage counts and free resources */
1532         IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
1533                           cleanup_entry, NULL);
1534         vfree(table->private);
1535 }
1536
1537 /* Returns 1 if the port is matched by the range, 0 otherwise */
1538 static inline int
1539 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1540 {
1541         int ret;
1542
1543         ret = (port >= min && port <= max) ^ invert;
1544         return ret;
1545 }
1546
1547 static int
1548 tcp_find_option(u_int8_t option,
1549                 const struct sk_buff *skb,
1550                 unsigned int tcpoff,
1551                 unsigned int optlen,
1552                 int invert,
1553                 int *hotdrop)
1554 {
1555         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1556         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1557         unsigned int i;
1558
1559         duprintf("tcp_match: finding option\n");
1560         if (!optlen)
1561                 return invert;
1562         /* If we don't have the whole header, drop packet. */
1563         op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
1564                                 _opt);
1565         if (op == NULL) {
1566                 *hotdrop = 1;
1567                 return 0;
1568         }
1569
1570         for (i = 0; i < optlen; ) {
1571                 if (op[i] == option) return !invert;
1572                 if (op[i] < 2) i++;
1573                 else i += op[i+1]?:1;
1574         }
1575
1576         return invert;
1577 }
1578
1579 static int
1580 tcp_match(const struct sk_buff *skb,
1581           const struct net_device *in,
1582           const struct net_device *out,
1583           const void *matchinfo,
1584           int offset,
1585           unsigned int protoff,
1586           int *hotdrop)
1587 {
1588         struct tcphdr _tcph, *th;
1589         const struct ip6t_tcp *tcpinfo = matchinfo;
1590
1591         if (offset) {
1592                 /* To quote Alan:
1593
1594                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1595                    causes this. Its a cracker trying to break in by doing a
1596                    flag overwrite to pass the direction checks.
1597                 */
1598                 if (offset == 1) {
1599                         duprintf("Dropping evil TCP offset=1 frag.\n");
1600                         *hotdrop = 1;
1601                 }
1602                 /* Must not be a fragment. */
1603                 return 0;
1604         }
1605
1606 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1607
1608         th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
1609         if (th == NULL) {
1610                 /* We've been asked to examine this packet, and we
1611                    can't.  Hence, no choice but to drop. */
1612                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1613                 *hotdrop = 1;
1614                 return 0;
1615         }
1616
1617         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1618                         ntohs(th->source),
1619                         !!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
1620                 return 0;
1621         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1622                         ntohs(th->dest),
1623                         !!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
1624                 return 0;
1625         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1626                       == tcpinfo->flg_cmp,
1627                       IP6T_TCP_INV_FLAGS))
1628                 return 0;
1629         if (tcpinfo->option) {
1630                 if (th->doff * 4 < sizeof(_tcph)) {
1631                         *hotdrop = 1;
1632                         return 0;
1633                 }
1634                 if (!tcp_find_option(tcpinfo->option, skb, protoff,
1635                                      th->doff*4 - sizeof(*th),
1636                                      tcpinfo->invflags & IP6T_TCP_INV_OPTION,
1637                                      hotdrop))
1638                         return 0;
1639         }
1640         return 1;
1641 }
1642
1643 /* Called when user tries to insert an entry of this type. */
1644 static int
1645 tcp_checkentry(const char *tablename,
1646                const struct ip6t_ip6 *ipv6,
1647                void *matchinfo,
1648                unsigned int matchsize,
1649                unsigned int hook_mask)
1650 {
1651         const struct ip6t_tcp *tcpinfo = matchinfo;
1652
1653         /* Must specify proto == TCP, and no unknown invflags */
1654         return ipv6->proto == IPPROTO_TCP
1655                 && !(ipv6->invflags & IP6T_INV_PROTO)
1656                 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
1657                 && !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
1658 }
1659
1660 static int
1661 udp_match(const struct sk_buff *skb,
1662           const struct net_device *in,
1663           const struct net_device *out,
1664           const void *matchinfo,
1665           int offset,
1666           unsigned int protoff,
1667           int *hotdrop)
1668 {
1669         struct udphdr _udph, *uh;
1670         const struct ip6t_udp *udpinfo = matchinfo;
1671
1672         /* Must not be a fragment. */
1673         if (offset)
1674                 return 0;
1675
1676         uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
1677         if (uh == NULL) {
1678                 /* We've been asked to examine this packet, and we
1679                    can't.  Hence, no choice but to drop. */
1680                 duprintf("Dropping evil UDP tinygram.\n");
1681                 *hotdrop = 1;
1682                 return 0;
1683         }
1684
1685         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1686                           ntohs(uh->source),
1687                           !!(udpinfo->invflags & IP6T_UDP_INV_SRCPT))
1688                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1689                               ntohs(uh->dest),
1690                               !!(udpinfo->invflags & IP6T_UDP_INV_DSTPT));
1691 }
1692
1693 /* Called when user tries to insert an entry of this type. */
1694 static int
1695 udp_checkentry(const char *tablename,
1696                const struct ip6t_ip6 *ipv6,
1697                void *matchinfo,
1698                unsigned int matchinfosize,
1699                unsigned int hook_mask)
1700 {
1701         const struct ip6t_udp *udpinfo = matchinfo;
1702
1703         /* Must specify proto == UDP, and no unknown invflags */
1704         if (ipv6->proto != IPPROTO_UDP || (ipv6->invflags & IP6T_INV_PROTO)) {
1705                 duprintf("ip6t_udp: Protocol %u != %u\n", ipv6->proto,
1706                          IPPROTO_UDP);
1707                 return 0;
1708         }
1709         if (matchinfosize != IP6T_ALIGN(sizeof(struct ip6t_udp))) {
1710                 duprintf("ip6t_udp: matchsize %u != %u\n",
1711                          matchinfosize, IP6T_ALIGN(sizeof(struct ip6t_udp)));
1712                 return 0;
1713         }
1714         if (udpinfo->invflags & ~IP6T_UDP_INV_MASK) {
1715                 duprintf("ip6t_udp: unknown flags %X\n",
1716                          udpinfo->invflags);
1717                 return 0;
1718         }
1719
1720         return 1;
1721 }
1722
1723 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1724 static inline int
1725 icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1726                      u_int8_t type, u_int8_t code,
1727                      int invert)
1728 {
1729         return (type == test_type && code >= min_code && code <= max_code)
1730                 ^ invert;
1731 }
1732
1733 static int
1734 icmp6_match(const struct sk_buff *skb,
1735            const struct net_device *in,
1736            const struct net_device *out,
1737            const void *matchinfo,
1738            int offset,
1739            unsigned int protoff,
1740            int *hotdrop)
1741 {
1742         struct icmp6hdr _icmp, *ic;
1743         const struct ip6t_icmp *icmpinfo = matchinfo;
1744
1745         /* Must not be a fragment. */
1746         if (offset)
1747                 return 0;
1748
1749         ic = skb_header_pointer(skb, protoff, sizeof(_icmp), &_icmp);
1750         if (ic == NULL) {
1751                 /* We've been asked to examine this packet, and we
1752                    can't.  Hence, no choice but to drop. */
1753                 duprintf("Dropping evil ICMP tinygram.\n");
1754                 *hotdrop = 1;
1755                 return 0;
1756         }
1757
1758         return icmp6_type_code_match(icmpinfo->type,
1759                                      icmpinfo->code[0],
1760                                      icmpinfo->code[1],
1761                                      ic->icmp6_type, ic->icmp6_code,
1762                                      !!(icmpinfo->invflags&IP6T_ICMP_INV));
1763 }
1764
1765 /* Called when user tries to insert an entry of this type. */
1766 static int
1767 icmp6_checkentry(const char *tablename,
1768            const struct ip6t_ip6 *ipv6,
1769            void *matchinfo,
1770            unsigned int matchsize,
1771            unsigned int hook_mask)
1772 {
1773         const struct ip6t_icmp *icmpinfo = matchinfo;
1774
1775         /* Must specify proto == ICMP, and no unknown invflags */
1776         return ipv6->proto == IPPROTO_ICMPV6
1777                 && !(ipv6->invflags & IP6T_INV_PROTO)
1778                 && matchsize == IP6T_ALIGN(sizeof(struct ip6t_icmp))
1779                 && !(icmpinfo->invflags & ~IP6T_ICMP_INV);
1780 }
1781
1782 /* The built-in targets: standard (NULL) and error. */
1783 static struct ip6t_target ip6t_standard_target = {
1784         .name           = IP6T_STANDARD_TARGET,
1785 };
1786
1787 static struct ip6t_target ip6t_error_target = {
1788         .name           = IP6T_ERROR_TARGET,
1789         .target         = ip6t_error,
1790 };
1791
1792 static struct nf_sockopt_ops ip6t_sockopts = {
1793         .pf             = PF_INET6,
1794         .set_optmin     = IP6T_BASE_CTL,
1795         .set_optmax     = IP6T_SO_SET_MAX+1,
1796         .set            = do_ip6t_set_ctl,
1797         .get_optmin     = IP6T_BASE_CTL,
1798         .get_optmax     = IP6T_SO_GET_MAX+1,
1799         .get            = do_ip6t_get_ctl,
1800 };
1801
1802 static struct ip6t_match tcp_matchstruct = {
1803         .name           = "tcp",
1804         .match          = &tcp_match,
1805         .checkentry     = &tcp_checkentry,
1806 };
1807
1808 static struct ip6t_match udp_matchstruct = {
1809         .name           = "udp",
1810         .match          = &udp_match,
1811         .checkentry     = &udp_checkentry,
1812 };
1813
1814 static struct ip6t_match icmp6_matchstruct = {
1815         .name           = "icmp6",
1816         .match          = &icmp6_match,
1817         .checkentry     = &icmp6_checkentry,
1818 };
1819
1820 #ifdef CONFIG_PROC_FS
1821 static inline int print_name(const char *i,
1822                              off_t start_offset, char *buffer, int length,
1823                              off_t *pos, unsigned int *count)
1824 {
1825         if ((*count)++ >= start_offset) {
1826                 unsigned int namelen;
1827
1828                 namelen = sprintf(buffer + *pos, "%s\n",
1829                                   i + sizeof(struct list_head));
1830                 if (*pos + namelen > length) {
1831                         /* Stop iterating */
1832                         return 1;
1833                 }
1834                 *pos += namelen;
1835         }
1836         return 0;
1837 }
1838
1839 static inline int print_target(const struct ip6t_target *t,
1840                                off_t start_offset, char *buffer, int length,
1841                                off_t *pos, unsigned int *count)
1842 {
1843         if (t == &ip6t_standard_target || t == &ip6t_error_target)
1844                 return 0;
1845         return print_name((char *)t, start_offset, buffer, length, pos, count);
1846 }
1847
1848 static int ip6t_get_tables(char *buffer, char **start, off_t offset, int length)
1849 {
1850         off_t pos = 0;
1851         unsigned int count = 0;
1852
1853         if (down_interruptible(&ip6t_mutex) != 0)
1854                 return 0;
1855
1856         LIST_FIND(&ip6t_tables, print_name, char *,
1857                   offset, buffer, length, &pos, &count);
1858
1859         up(&ip6t_mutex);
1860
1861         /* `start' hack - see fs/proc/generic.c line ~105 */
1862         *start=(char *)((unsigned long)count-offset);
1863         return pos;
1864 }
1865
1866 static int ip6t_get_targets(char *buffer, char **start, off_t offset, int length)
1867 {
1868         off_t pos = 0;
1869         unsigned int count = 0;
1870
1871         if (down_interruptible(&ip6t_mutex) != 0)
1872                 return 0;
1873
1874         LIST_FIND(&ip6t_target, print_target, struct ip6t_target *,
1875                   offset, buffer, length, &pos, &count);
1876
1877         up(&ip6t_mutex);
1878
1879         *start = (char *)((unsigned long)count - offset);
1880         return pos;
1881 }
1882
1883 static int ip6t_get_matches(char *buffer, char **start, off_t offset, int length)
1884 {
1885         off_t pos = 0;
1886         unsigned int count = 0;
1887
1888         if (down_interruptible(&ip6t_mutex) != 0)
1889                 return 0;
1890
1891         LIST_FIND(&ip6t_match, print_name, char *,
1892                   offset, buffer, length, &pos, &count);
1893
1894         up(&ip6t_mutex);
1895
1896         *start = (char *)((unsigned long)count - offset);
1897         return pos;
1898 }
1899
1900 static struct { char *name; get_info_t *get_info; } ip6t_proc_entry[] =
1901 { { "ip6_tables_names", ip6t_get_tables },
1902   { "ip6_tables_targets", ip6t_get_targets },
1903   { "ip6_tables_matches", ip6t_get_matches },
1904   { NULL, NULL} };
1905 #endif /*CONFIG_PROC_FS*/
1906
1907 static int __init init(void)
1908 {
1909         int ret;
1910
1911         /* Noone else will be downing sem now, so we won't sleep */
1912         down(&ip6t_mutex);
1913         list_append(&ip6t_target, &ip6t_standard_target);
1914         list_append(&ip6t_target, &ip6t_error_target);
1915         list_append(&ip6t_match, &tcp_matchstruct);
1916         list_append(&ip6t_match, &udp_matchstruct);
1917         list_append(&ip6t_match, &icmp6_matchstruct);
1918         up(&ip6t_mutex);
1919
1920         /* Register setsockopt */
1921         ret = nf_register_sockopt(&ip6t_sockopts);
1922         if (ret < 0) {
1923                 duprintf("Unable to register sockopts.\n");
1924                 return ret;
1925         }
1926
1927 #ifdef CONFIG_PROC_FS
1928         {
1929                 struct proc_dir_entry *proc;
1930                 int i;
1931
1932                 for (i = 0; ip6t_proc_entry[i].name; i++) {
1933                         proc = proc_net_create(ip6t_proc_entry[i].name, 0,
1934                                                ip6t_proc_entry[i].get_info);
1935                         if (!proc) {
1936                                 while (--i >= 0)
1937                                        proc_net_remove(ip6t_proc_entry[i].name);
1938                                 nf_unregister_sockopt(&ip6t_sockopts);
1939                                 return -ENOMEM;
1940                         }
1941                         proc->owner = THIS_MODULE;
1942                 }
1943         }
1944 #endif
1945
1946         printk("ip6_tables: (C) 2000-2002 Netfilter core team\n");
1947         return 0;
1948 }
1949
1950 static void __exit fini(void)
1951 {
1952         nf_unregister_sockopt(&ip6t_sockopts);
1953 #ifdef CONFIG_PROC_FS
1954         {
1955                 int i;
1956                 for (i = 0; ip6t_proc_entry[i].name; i++)
1957                         proc_net_remove(ip6t_proc_entry[i].name);
1958         }
1959 #endif
1960 }
1961
1962 EXPORT_SYMBOL(ip6t_register_table);
1963 EXPORT_SYMBOL(ip6t_unregister_table);
1964 EXPORT_SYMBOL(ip6t_do_table);
1965 EXPORT_SYMBOL(ip6t_register_match);
1966 EXPORT_SYMBOL(ip6t_unregister_match);
1967 EXPORT_SYMBOL(ip6t_register_target);
1968 EXPORT_SYMBOL(ip6t_unregister_target);
1969 EXPORT_SYMBOL(ip6t_ext_hdr);
1970
1971 module_init(init);
1972 module_exit(fini);