ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29
30 #include <linux/netfilter_ipv4/ip_tables.h>
31
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
35
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
39
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...)  printk(format , ## args)
42 #else
43 #define dprintf(format, args...)
44 #endif
45
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
48 #else
49 #define duprintf(format, args...)
50 #endif
51
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x)                                         \
54 do {                                                            \
55         if (!(x))                                               \
56                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
57                        __FUNCTION__, __FILE__, __LINE__);       \
58 } while(0)
59 #else
60 #define IP_NF_ASSERT(x)
61 #endif
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
63
64 /* Mutex protects lists (only traversed in user context). */
65 static DECLARE_MUTEX(ipt_mutex);
66
67 /* Must have mutex */
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/lockhelp.h>
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80    We keep a set of rules for each CPU, so we can avoid write-locking
81    them in the softirq when updating the counters and therefore
82    only need to read-lock in the softirq; doing a write_lock_bh() in user
83    context stops packets coming through and allows user context to read
84    the counters or update the rules.
85
86    To be cache friendly on SMP, we arrange them like so:
87    [ n-entries ]
88    ... cache-align padding ...
89    [ n-entries ]
90
91    Hence the start of any table is given by get_table() below.  */
92
93 /* The table itself */
94 struct ipt_table_info
95 {
96         /* Size per table */
97         unsigned int size;
98         /* Number of entries: FIXME. --RR */
99         unsigned int number;
100         /* Initial number of entries. Needed for module usage count */
101         unsigned int initial_entries;
102
103         /* Entry points and underflows */
104         unsigned int hook_entry[NF_IP_NUMHOOKS];
105         unsigned int underflow[NF_IP_NUMHOOKS];
106
107         /* ipt_entry tables: one per CPU */
108         char entries[0] ____cacheline_aligned;
109 };
110
111 static LIST_HEAD(ipt_target);
112 static LIST_HEAD(ipt_match);
113 static LIST_HEAD(ipt_tables);
114 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115
116 #ifdef CONFIG_SMP
117 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #else
119 #define TABLE_OFFSET(t,p) 0
120 #endif
121
122 #if 0
123 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
126 #endif
127
128 /* Returns whether matches rule or not. */
129 static inline int
130 ip_packet_match(const struct iphdr *ip,
131                 const char *indev,
132                 const char *outdev,
133                 const struct ipt_ip *ipinfo,
134                 int isfrag)
135 {
136         size_t i;
137         unsigned long ret;
138
139 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140
141         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142                   IPT_INV_SRCIP)
143             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144                      IPT_INV_DSTIP)) {
145                 dprintf("Source or dest mismatch.\n");
146
147                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148                         NIPQUAD(ip->saddr),
149                         NIPQUAD(ipinfo->smsk.s_addr),
150                         NIPQUAD(ipinfo->src.s_addr),
151                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
152                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153                         NIPQUAD(ip->daddr),
154                         NIPQUAD(ipinfo->dmsk.s_addr),
155                         NIPQUAD(ipinfo->dst.s_addr),
156                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
157                 return 0;
158         }
159
160         /* Look for ifname matches; this should unroll nicely. */
161         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
162                 ret |= (((const unsigned long *)indev)[i]
163                         ^ ((const unsigned long *)ipinfo->iniface)[i])
164                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
165         }
166
167         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
168                 dprintf("VIA in mismatch (%s vs %s).%s\n",
169                         indev, ipinfo->iniface,
170                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
171                 return 0;
172         }
173
174         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
175                 ret |= (((const unsigned long *)outdev)[i]
176                         ^ ((const unsigned long *)ipinfo->outiface)[i])
177                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
178         }
179
180         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
181                 dprintf("VIA out mismatch (%s vs %s).%s\n",
182                         outdev, ipinfo->outiface,
183                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
184                 return 0;
185         }
186
187         /* Check specific protocol */
188         if (ipinfo->proto
189             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
190                 dprintf("Packet protocol %hi does not match %hi.%s\n",
191                         ip->protocol, ipinfo->proto,
192                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
193                 return 0;
194         }
195
196         /* If we have a fragment rule but the packet is not a fragment
197          * then we return zero */
198         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
199                 dprintf("Fragment rule but not fragment.%s\n",
200                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
201                 return 0;
202         }
203
204         return 1;
205 }
206
207 static inline int
208 ip_checkentry(const struct ipt_ip *ip)
209 {
210         if (ip->flags & ~IPT_F_MASK) {
211                 duprintf("Unknown flag bits set: %08X\n",
212                          ip->flags & ~IPT_F_MASK);
213                 return 0;
214         }
215         if (ip->invflags & ~IPT_INV_MASK) {
216                 duprintf("Unknown invflag bits set: %08X\n",
217                          ip->invflags & ~IPT_INV_MASK);
218                 return 0;
219         }
220         return 1;
221 }
222
223 static unsigned int
224 ipt_error(struct sk_buff **pskb,
225           const struct net_device *in,
226           const struct net_device *out,
227           unsigned int hooknum,
228           const void *targinfo,
229           void *userinfo)
230 {
231         if (net_ratelimit())
232                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
233
234         return NF_DROP;
235 }
236
237 static inline
238 int do_match(struct ipt_entry_match *m,
239              const struct sk_buff *skb,
240              const struct net_device *in,
241              const struct net_device *out,
242              int offset,
243              int *hotdrop)
244 {
245         /* Stop iteration if it doesn't match */
246         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
247                 return 1;
248         else
249                 return 0;
250 }
251
252 static inline struct ipt_entry *
253 get_entry(void *base, unsigned int offset)
254 {
255         return (struct ipt_entry *)(base + offset);
256 }
257
258 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 unsigned int
260 ipt_do_table(struct sk_buff **pskb,
261              unsigned int hook,
262              const struct net_device *in,
263              const struct net_device *out,
264              struct ipt_table *table,
265              void *userdata)
266 {
267         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
268         u_int16_t offset;
269         struct iphdr *ip;
270         u_int16_t datalen;
271         int hotdrop = 0;
272         /* Initializing verdict to NF_DROP keeps gcc happy. */
273         unsigned int verdict = NF_DROP;
274         const char *indev, *outdev;
275         void *table_base;
276         struct ipt_entry *e, *back;
277
278         /* Initialization */
279         ip = (*pskb)->nh.iph;
280         datalen = (*pskb)->len - ip->ihl * 4;
281         indev = in ? in->name : nulldevname;
282         outdev = out ? out->name : nulldevname;
283         /* We handle fragments by dealing with the first fragment as
284          * if it was a normal packet.  All other fragments are treated
285          * normally, except that they will NEVER match rules that ask
286          * things we don't know, ie. tcp syn flag or ports).  If the
287          * rule is also a fragment-specific rule, non-fragments won't
288          * match it. */
289         offset = ntohs(ip->frag_off) & IP_OFFSET;
290
291         read_lock_bh(&table->lock);
292         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293         table_base = (void *)table->private->entries
294                 + TABLE_OFFSET(table->private, smp_processor_id());
295         e = get_entry(table_base, table->private->hook_entry[hook]);
296
297 #ifdef CONFIG_NETFILTER_DEBUG
298         /* Check noone else using our table */
299         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
302                        smp_processor_id(),
303                        table->name,
304                        &((struct ipt_entry *)table_base)->comefrom,
305                        ((struct ipt_entry *)table_base)->comefrom);
306         }
307         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
308 #endif
309
310         /* For return from builtin chain */
311         back = get_entry(table_base, table->private->underflow[hook]);
312
313         do {
314                 IP_NF_ASSERT(e);
315                 IP_NF_ASSERT(back);
316                 (*pskb)->nfcache |= e->nfcache;
317                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
318                         struct ipt_entry_target *t;
319
320                         if (IPT_MATCH_ITERATE(e, do_match,
321                                               *pskb, in, out,
322                                               offset, &hotdrop) != 0)
323                                 goto no_match;
324
325                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
326
327                         t = ipt_get_target(e);
328                         IP_NF_ASSERT(t->u.kernel.target);
329                         /* Standard target? */
330                         if (!t->u.kernel.target->target) {
331                                 int v;
332
333                                 v = ((struct ipt_standard_target *)t)->verdict;
334                                 if (v < 0) {
335                                         /* Pop from stack? */
336                                         if (v != IPT_RETURN) {
337                                                 verdict = (unsigned)(-v) - 1;
338                                                 break;
339                                         }
340                                         e = back;
341                                         back = get_entry(table_base,
342                                                          back->comefrom);
343                                         continue;
344                                 }
345                                 if (table_base + v
346                                     != (void *)e + e->next_offset) {
347                                         /* Save old back ptr in next entry */
348                                         struct ipt_entry *next
349                                                 = (void *)e + e->next_offset;
350                                         next->comefrom
351                                                 = (void *)back - table_base;
352                                         /* set back pointer to next entry */
353                                         back = next;
354                                 }
355
356                                 e = get_entry(table_base, v);
357                         } else {
358                                 /* Targets which reenter must return
359                                    abs. verdicts */
360 #ifdef CONFIG_NETFILTER_DEBUG
361                                 ((struct ipt_entry *)table_base)->comefrom
362                                         = 0xeeeeeeec;
363 #endif
364                                 verdict = t->u.kernel.target->target(pskb,
365                                                                      in, out,
366                                                                      hook,
367                                                                      t->data,
368                                                                      userdata);
369
370 #ifdef CONFIG_NETFILTER_DEBUG
371                                 if (((struct ipt_entry *)table_base)->comefrom
372                                     != 0xeeeeeeec
373                                     && verdict == IPT_CONTINUE) {
374                                         printk("Target %s reentered!\n",
375                                                t->u.kernel.target->name);
376                                         verdict = NF_DROP;
377                                 }
378                                 ((struct ipt_entry *)table_base)->comefrom
379                                         = 0x57acc001;
380 #endif
381                                 /* Target might have changed stuff. */
382                                 ip = (*pskb)->nh.iph;
383                                 datalen = (*pskb)->len - ip->ihl * 4;
384
385                                 if (verdict == IPT_CONTINUE)
386                                         e = (void *)e + e->next_offset;
387                                 else
388                                         /* Verdict */
389                                         break;
390                         }
391                 } else {
392
393                 no_match:
394                         e = (void *)e + e->next_offset;
395                 }
396         } while (!hotdrop);
397
398 #ifdef CONFIG_NETFILTER_DEBUG
399         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
400 #endif
401         read_unlock_bh(&table->lock);
402
403 #ifdef DEBUG_ALLOW_ALL
404         return NF_ACCEPT;
405 #else
406         if (hotdrop)
407                 return NF_DROP;
408         else return verdict;
409 #endif
410 }
411
412 /* If it succeeds, returns element and locks mutex */
413 static inline void *
414 find_inlist_lock_noload(struct list_head *head,
415                         const char *name,
416                         int *error,
417                         struct semaphore *mutex)
418 {
419         void *ret;
420
421 #if 0
422         duprintf("find_inlist: searching for `%s' in %s.\n",
423                  name, head == &ipt_target ? "ipt_target"
424                  : head == &ipt_match ? "ipt_match"
425                  : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
426 #endif
427
428         *error = down_interruptible(mutex);
429         if (*error != 0)
430                 return NULL;
431
432         ret = list_named_find(head, name);
433         if (!ret) {
434                 *error = -ENOENT;
435                 up(mutex);
436         }
437         return ret;
438 }
439
440 #ifndef CONFIG_KMOD
441 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
442 #else
443 static void *
444 find_inlist_lock(struct list_head *head,
445                  const char *name,
446                  const char *prefix,
447                  int *error,
448                  struct semaphore *mutex)
449 {
450         void *ret;
451
452         ret = find_inlist_lock_noload(head, name, error, mutex);
453         if (!ret) {
454                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
455                 request_module("%s%s", prefix, name);
456                 ret = find_inlist_lock_noload(head, name, error, mutex);
457         }
458
459         return ret;
460 }
461 #endif
462
463 static inline struct ipt_table *
464 find_table_lock(const char *name, int *error, struct semaphore *mutex)
465 {
466         return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
467 }
468
469 static inline struct ipt_match *
470 find_match_lock(const char *name, int *error, struct semaphore *mutex)
471 {
472         return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
473 }
474
475 static inline struct ipt_target *
476 find_target_lock(const char *name, int *error, struct semaphore *mutex)
477 {
478         return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
479 }
480
481 /* All zeroes == unconditional rule. */
482 static inline int
483 unconditional(const struct ipt_ip *ip)
484 {
485         unsigned int i;
486
487         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
488                 if (((__u32 *)ip)[i])
489                         return 0;
490
491         return 1;
492 }
493
494 /* Figures out from what hook each rule can be called: returns 0 if
495    there are loops.  Puts hook bitmask in comefrom. */
496 static int
497 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
498 {
499         unsigned int hook;
500
501         /* No recursion; use packet counter to save back ptrs (reset
502            to 0 as we leave), and comefrom to save source hook bitmask */
503         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
504                 unsigned int pos = newinfo->hook_entry[hook];
505                 struct ipt_entry *e
506                         = (struct ipt_entry *)(newinfo->entries + pos);
507
508                 if (!(valid_hooks & (1 << hook)))
509                         continue;
510
511                 /* Set initial back pointer. */
512                 e->counters.pcnt = pos;
513
514                 for (;;) {
515                         struct ipt_standard_target *t
516                                 = (void *)ipt_get_target(e);
517
518                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
519                                 printk("iptables: loop hook %u pos %u %08X.\n",
520                                        hook, pos, e->comefrom);
521                                 return 0;
522                         }
523                         e->comefrom
524                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
525
526                         /* Unconditional return/END. */
527                         if (e->target_offset == sizeof(struct ipt_entry)
528                             && (strcmp(t->target.u.user.name,
529                                        IPT_STANDARD_TARGET) == 0)
530                             && t->verdict < 0
531                             && unconditional(&e->ip)) {
532                                 unsigned int oldpos, size;
533
534                                 /* Return: backtrack through the last
535                                    big jump. */
536                                 do {
537                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
538 #ifdef DEBUG_IP_FIREWALL_USER
539                                         if (e->comefrom
540                                             & (1 << NF_IP_NUMHOOKS)) {
541                                                 duprintf("Back unset "
542                                                          "on hook %u "
543                                                          "rule %u\n",
544                                                          hook, pos);
545                                         }
546 #endif
547                                         oldpos = pos;
548                                         pos = e->counters.pcnt;
549                                         e->counters.pcnt = 0;
550
551                                         /* We're at the start. */
552                                         if (pos == oldpos)
553                                                 goto next;
554
555                                         e = (struct ipt_entry *)
556                                                 (newinfo->entries + pos);
557                                 } while (oldpos == pos + e->next_offset);
558
559                                 /* Move along one */
560                                 size = e->next_offset;
561                                 e = (struct ipt_entry *)
562                                         (newinfo->entries + pos + size);
563                                 e->counters.pcnt = pos;
564                                 pos += size;
565                         } else {
566                                 int newpos = t->verdict;
567
568                                 if (strcmp(t->target.u.user.name,
569                                            IPT_STANDARD_TARGET) == 0
570                                     && newpos >= 0) {
571                                         /* This a jump; chase it. */
572                                         duprintf("Jump rule %u -> %u\n",
573                                                  pos, newpos);
574                                 } else {
575                                         /* ... this is a fallthru */
576                                         newpos = pos + e->next_offset;
577                                 }
578                                 e = (struct ipt_entry *)
579                                         (newinfo->entries + newpos);
580                                 e->counters.pcnt = pos;
581                                 pos = newpos;
582                         }
583                 }
584                 next:
585                 duprintf("Finished chain %u\n", hook);
586         }
587         return 1;
588 }
589
590 static inline int
591 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
592 {
593         if (i && (*i)-- == 0)
594                 return 1;
595
596         if (m->u.kernel.match->destroy)
597                 m->u.kernel.match->destroy(m->data,
598                                            m->u.match_size - sizeof(*m));
599         module_put(m->u.kernel.match->me);
600         return 0;
601 }
602
603 static inline int
604 standard_check(const struct ipt_entry_target *t,
605                unsigned int max_offset)
606 {
607         struct ipt_standard_target *targ = (void *)t;
608
609         /* Check standard info. */
610         if (t->u.target_size
611             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
612                 duprintf("standard_check: target size %u != %u\n",
613                          t->u.target_size,
614                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
615                 return 0;
616         }
617
618         if (targ->verdict >= 0
619             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
620                 duprintf("ipt_standard_check: bad verdict (%i)\n",
621                          targ->verdict);
622                 return 0;
623         }
624
625         if (targ->verdict < -NF_MAX_VERDICT - 1) {
626                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
627                          targ->verdict);
628                 return 0;
629         }
630         return 1;
631 }
632
633 static inline int
634 check_match(struct ipt_entry_match *m,
635             const char *name,
636             const struct ipt_ip *ip,
637             unsigned int hookmask,
638             unsigned int *i)
639 {
640         int ret;
641         struct ipt_match *match;
642
643         match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
644         if (!match) {
645                 duprintf("check_match: `%s' not found\n", m->u.user.name);
646                 return ret;
647         }
648         if (!try_module_get(match->me)) {
649                 up(&ipt_mutex);
650                 return -ENOENT;
651         }
652         m->u.kernel.match = match;
653         up(&ipt_mutex);
654
655         if (m->u.kernel.match->checkentry
656             && !m->u.kernel.match->checkentry(name, ip, m->data,
657                                               m->u.match_size - sizeof(*m),
658                                               hookmask)) {
659                 module_put(m->u.kernel.match->me);
660                 duprintf("ip_tables: check failed for `%s'.\n",
661                          m->u.kernel.match->name);
662                 return -EINVAL;
663         }
664
665         (*i)++;
666         return 0;
667 }
668
669 static struct ipt_target ipt_standard_target;
670
671 static inline int
672 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
673             unsigned int *i)
674 {
675         struct ipt_entry_target *t;
676         struct ipt_target *target;
677         int ret;
678         unsigned int j;
679
680         if (!ip_checkentry(&e->ip)) {
681                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
682                 return -EINVAL;
683         }
684
685         j = 0;
686         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
687         if (ret != 0)
688                 goto cleanup_matches;
689
690         t = ipt_get_target(e);
691         target = find_target_lock(t->u.user.name, &ret, &ipt_mutex);
692         if (!target) {
693                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
694                 goto cleanup_matches;
695         }
696         if (!try_module_get(target->me)) {
697                 up(&ipt_mutex);
698                 ret = -ENOENT;
699                 goto cleanup_matches;
700         }
701         t->u.kernel.target = target;
702         up(&ipt_mutex);
703
704         if (t->u.kernel.target == &ipt_standard_target) {
705                 if (!standard_check(t, size)) {
706                         ret = -EINVAL;
707                         goto cleanup_matches;
708                 }
709         } else if (t->u.kernel.target->checkentry
710                    && !t->u.kernel.target->checkentry(name, e, t->data,
711                                                       t->u.target_size
712                                                       - sizeof(*t),
713                                                       e->comefrom)) {
714                 module_put(t->u.kernel.target->me);
715                 duprintf("ip_tables: check failed for `%s'.\n",
716                          t->u.kernel.target->name);
717                 ret = -EINVAL;
718                 goto cleanup_matches;
719         }
720
721         (*i)++;
722         return 0;
723
724  cleanup_matches:
725         IPT_MATCH_ITERATE(e, cleanup_match, &j);
726         return ret;
727 }
728
729 static inline int
730 check_entry_size_and_hooks(struct ipt_entry *e,
731                            struct ipt_table_info *newinfo,
732                            unsigned char *base,
733                            unsigned char *limit,
734                            const unsigned int *hook_entries,
735                            const unsigned int *underflows,
736                            unsigned int *i)
737 {
738         unsigned int h;
739
740         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
741             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
742                 duprintf("Bad offset %p\n", e);
743                 return -EINVAL;
744         }
745
746         if (e->next_offset
747             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
748                 duprintf("checking: element %p size %u\n",
749                          e, e->next_offset);
750                 return -EINVAL;
751         }
752
753         /* Check hooks & underflows */
754         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
755                 if ((unsigned char *)e - base == hook_entries[h])
756                         newinfo->hook_entry[h] = hook_entries[h];
757                 if ((unsigned char *)e - base == underflows[h])
758                         newinfo->underflow[h] = underflows[h];
759         }
760
761         /* FIXME: underflows must be unconditional, standard verdicts
762            < 0 (not IPT_RETURN). --RR */
763
764         /* Clear counters and comefrom */
765         e->counters = ((struct ipt_counters) { 0, 0 });
766         e->comefrom = 0;
767
768         (*i)++;
769         return 0;
770 }
771
772 static inline int
773 cleanup_entry(struct ipt_entry *e, unsigned int *i)
774 {
775         struct ipt_entry_target *t;
776
777         if (i && (*i)-- == 0)
778                 return 1;
779
780         /* Cleanup all matches */
781         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
782         t = ipt_get_target(e);
783         if (t->u.kernel.target->destroy)
784                 t->u.kernel.target->destroy(t->data,
785                                             t->u.target_size - sizeof(*t));
786         module_put(t->u.kernel.target->me);
787         return 0;
788 }
789
790 /* Checks and translates the user-supplied table segment (held in
791    newinfo) */
792 static int
793 translate_table(const char *name,
794                 unsigned int valid_hooks,
795                 struct ipt_table_info *newinfo,
796                 unsigned int size,
797                 unsigned int number,
798                 const unsigned int *hook_entries,
799                 const unsigned int *underflows)
800 {
801         unsigned int i;
802         int ret;
803
804         newinfo->size = size;
805         newinfo->number = number;
806
807         /* Init all hooks to impossible value. */
808         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
809                 newinfo->hook_entry[i] = 0xFFFFFFFF;
810                 newinfo->underflow[i] = 0xFFFFFFFF;
811         }
812
813         duprintf("translate_table: size %u\n", newinfo->size);
814         i = 0;
815         /* Walk through entries, checking offsets. */
816         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
817                                 check_entry_size_and_hooks,
818                                 newinfo,
819                                 newinfo->entries,
820                                 newinfo->entries + size,
821                                 hook_entries, underflows, &i);
822         if (ret != 0)
823                 return ret;
824
825         if (i != number) {
826                 duprintf("translate_table: %u not %u entries\n",
827                          i, number);
828                 return -EINVAL;
829         }
830
831         /* Check hooks all assigned */
832         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
833                 /* Only hooks which are valid */
834                 if (!(valid_hooks & (1 << i)))
835                         continue;
836                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
837                         duprintf("Invalid hook entry %u %u\n",
838                                  i, hook_entries[i]);
839                         return -EINVAL;
840                 }
841                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
842                         duprintf("Invalid underflow %u %u\n",
843                                  i, underflows[i]);
844                         return -EINVAL;
845                 }
846         }
847
848         if (!mark_source_chains(newinfo, valid_hooks))
849                 return -ELOOP;
850
851         /* Finally, each sanity check must pass */
852         i = 0;
853         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
854                                 check_entry, name, size, &i);
855
856         if (ret != 0) {
857                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
858                                   cleanup_entry, &i);
859                 return ret;
860         }
861
862         /* And one copy for every other CPU */
863         for (i = 1; i < NR_CPUS; i++) {
864                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
865                        newinfo->entries,
866                        SMP_ALIGN(newinfo->size));
867         }
868
869         return ret;
870 }
871
872 static struct ipt_table_info *
873 replace_table(struct ipt_table *table,
874               unsigned int num_counters,
875               struct ipt_table_info *newinfo,
876               int *error)
877 {
878         struct ipt_table_info *oldinfo;
879
880 #ifdef CONFIG_NETFILTER_DEBUG
881         {
882                 struct ipt_entry *table_base;
883                 unsigned int i;
884
885                 for (i = 0; i < NR_CPUS; i++) {
886                         table_base =
887                                 (void *)newinfo->entries
888                                 + TABLE_OFFSET(newinfo, i);
889
890                         table_base->comefrom = 0xdead57ac;
891                 }
892         }
893 #endif
894
895         /* Do the substitution. */
896         write_lock_bh(&table->lock);
897         /* Check inside lock: is the old number correct? */
898         if (num_counters != table->private->number) {
899                 duprintf("num_counters != table->private->number (%u/%u)\n",
900                          num_counters, table->private->number);
901                 write_unlock_bh(&table->lock);
902                 *error = -EAGAIN;
903                 return NULL;
904         }
905         oldinfo = table->private;
906         table->private = newinfo;
907         newinfo->initial_entries = oldinfo->initial_entries;
908         write_unlock_bh(&table->lock);
909
910         return oldinfo;
911 }
912
913 /* Gets counters. */
914 static inline int
915 add_entry_to_counter(const struct ipt_entry *e,
916                      struct ipt_counters total[],
917                      unsigned int *i)
918 {
919         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
920
921         (*i)++;
922         return 0;
923 }
924
925 static void
926 get_counters(const struct ipt_table_info *t,
927              struct ipt_counters counters[])
928 {
929         unsigned int cpu;
930         unsigned int i;
931
932         for (cpu = 0; cpu < NR_CPUS; cpu++) {
933                 i = 0;
934                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
935                                   t->size,
936                                   add_entry_to_counter,
937                                   counters,
938                                   &i);
939         }
940 }
941
942 static int
943 copy_entries_to_user(unsigned int total_size,
944                      struct ipt_table *table,
945                      void *userptr)
946 {
947         unsigned int off, num, countersize;
948         struct ipt_entry *e;
949         struct ipt_counters *counters;
950         int ret = 0;
951
952         /* We need atomic snapshot of counters: rest doesn't change
953            (other than comefrom, which userspace doesn't care
954            about). */
955         countersize = sizeof(struct ipt_counters) * table->private->number;
956         counters = vmalloc(countersize);
957
958         if (counters == NULL)
959                 return -ENOMEM;
960
961         /* First, sum counters... */
962         memset(counters, 0, countersize);
963         write_lock_bh(&table->lock);
964         get_counters(table->private, counters);
965         write_unlock_bh(&table->lock);
966
967         /* ... then copy entire thing from CPU 0... */
968         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
969                 ret = -EFAULT;
970                 goto free_counters;
971         }
972
973         /* FIXME: use iterator macros --RR */
974         /* ... then go back and fix counters and names */
975         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
976                 unsigned int i;
977                 struct ipt_entry_match *m;
978                 struct ipt_entry_target *t;
979
980                 e = (struct ipt_entry *)(table->private->entries + off);
981                 if (copy_to_user(userptr + off
982                                  + offsetof(struct ipt_entry, counters),
983                                  &counters[num],
984                                  sizeof(counters[num])) != 0) {
985                         ret = -EFAULT;
986                         goto free_counters;
987                 }
988
989                 for (i = sizeof(struct ipt_entry);
990                      i < e->target_offset;
991                      i += m->u.match_size) {
992                         m = (void *)e + i;
993
994                         if (copy_to_user(userptr + off + i
995                                          + offsetof(struct ipt_entry_match,
996                                                     u.user.name),
997                                          m->u.kernel.match->name,
998                                          strlen(m->u.kernel.match->name)+1)
999                             != 0) {
1000                                 ret = -EFAULT;
1001                                 goto free_counters;
1002                         }
1003                 }
1004
1005                 t = ipt_get_target(e);
1006                 if (copy_to_user(userptr + off + e->target_offset
1007                                  + offsetof(struct ipt_entry_target,
1008                                             u.user.name),
1009                                  t->u.kernel.target->name,
1010                                  strlen(t->u.kernel.target->name)+1) != 0) {
1011                         ret = -EFAULT;
1012                         goto free_counters;
1013                 }
1014         }
1015
1016  free_counters:
1017         vfree(counters);
1018         return ret;
1019 }
1020
1021 static int
1022 get_entries(const struct ipt_get_entries *entries,
1023             struct ipt_get_entries *uptr)
1024 {
1025         int ret;
1026         struct ipt_table *t;
1027
1028         t = find_table_lock(entries->name, &ret, &ipt_mutex);
1029         if (t) {
1030                 duprintf("t->private->number = %u\n",
1031                          t->private->number);
1032                 if (entries->size == t->private->size)
1033                         ret = copy_entries_to_user(t->private->size,
1034                                                    t, uptr->entrytable);
1035                 else {
1036                         duprintf("get_entries: I've got %u not %u!\n",
1037                                  t->private->size,
1038                                  entries->size);
1039                         ret = -EINVAL;
1040                 }
1041                 up(&ipt_mutex);
1042         } else
1043                 duprintf("get_entries: Can't find %s!\n",
1044                          entries->name);
1045
1046         return ret;
1047 }
1048
1049 static int
1050 do_replace(void *user, unsigned int len)
1051 {
1052         int ret;
1053         struct ipt_replace tmp;
1054         struct ipt_table *t;
1055         struct ipt_table_info *newinfo, *oldinfo;
1056         struct ipt_counters *counters;
1057
1058         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1059                 return -EFAULT;
1060
1061         /* Hack: Causes ipchains to give correct error msg --RR */
1062         if (len != sizeof(tmp) + tmp.size)
1063                 return -ENOPROTOOPT;
1064
1065         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1066         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1067                 return -ENOMEM;
1068
1069         newinfo = vmalloc(sizeof(struct ipt_table_info)
1070                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1071         if (!newinfo)
1072                 return -ENOMEM;
1073
1074         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1075                            tmp.size) != 0) {
1076                 ret = -EFAULT;
1077                 goto free_newinfo;
1078         }
1079
1080         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1081         if (!counters) {
1082                 ret = -ENOMEM;
1083                 goto free_newinfo;
1084         }
1085         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1086
1087         ret = translate_table(tmp.name, tmp.valid_hooks,
1088                               newinfo, tmp.size, tmp.num_entries,
1089                               tmp.hook_entry, tmp.underflow);
1090         if (ret != 0)
1091                 goto free_newinfo_counters;
1092
1093         duprintf("ip_tables: Translated table\n");
1094
1095         t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1096         if (!t)
1097                 goto free_newinfo_counters_untrans;
1098
1099         /* You lied! */
1100         if (tmp.valid_hooks != t->valid_hooks) {
1101                 duprintf("Valid hook crap: %08X vs %08X\n",
1102                          tmp.valid_hooks, t->valid_hooks);
1103                 ret = -EINVAL;
1104                 goto free_newinfo_counters_untrans_unlock;
1105         }
1106
1107         /* Get a reference in advance, we're not allowed fail later */
1108         if (!try_module_get(t->me)) {
1109                 ret = -EBUSY;
1110                 goto free_newinfo_counters_untrans_unlock;
1111         }
1112
1113
1114         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1115         if (!oldinfo)
1116                 goto put_module;
1117
1118         /* Update module usage count based on number of rules */
1119         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1120                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1121         if ((oldinfo->number > oldinfo->initial_entries) || 
1122             (newinfo->number <= oldinfo->initial_entries)) 
1123                 module_put(t->me);
1124         if ((oldinfo->number > oldinfo->initial_entries) &&
1125             (newinfo->number <= oldinfo->initial_entries))
1126                 module_put(t->me);
1127
1128         /* Get the old counters. */
1129         get_counters(oldinfo, counters);
1130         /* Decrease module usage counts and free resource */
1131         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1132         vfree(oldinfo);
1133         /* Silent error: too late now. */
1134         copy_to_user(tmp.counters, counters,
1135                      sizeof(struct ipt_counters) * tmp.num_counters);
1136         vfree(counters);
1137         up(&ipt_mutex);
1138         return 0;
1139
1140  put_module:
1141         module_put(t->me);
1142  free_newinfo_counters_untrans_unlock:
1143         up(&ipt_mutex);
1144  free_newinfo_counters_untrans:
1145         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1146  free_newinfo_counters:
1147         vfree(counters);
1148  free_newinfo:
1149         vfree(newinfo);
1150         return ret;
1151 }
1152
1153 /* We're lazy, and add to the first CPU; overflow works its fey magic
1154  * and everything is OK. */
1155 static inline int
1156 add_counter_to_entry(struct ipt_entry *e,
1157                      const struct ipt_counters addme[],
1158                      unsigned int *i)
1159 {
1160 #if 0
1161         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1162                  *i,
1163                  (long unsigned int)e->counters.pcnt,
1164                  (long unsigned int)e->counters.bcnt,
1165                  (long unsigned int)addme[*i].pcnt,
1166                  (long unsigned int)addme[*i].bcnt);
1167 #endif
1168
1169         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1170
1171         (*i)++;
1172         return 0;
1173 }
1174
1175 static int
1176 do_add_counters(void *user, unsigned int len)
1177 {
1178         unsigned int i;
1179         struct ipt_counters_info tmp, *paddc;
1180         struct ipt_table *t;
1181         int ret;
1182
1183         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1184                 return -EFAULT;
1185
1186         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1187                 return -EINVAL;
1188
1189         paddc = vmalloc(len);
1190         if (!paddc)
1191                 return -ENOMEM;
1192
1193         if (copy_from_user(paddc, user, len) != 0) {
1194                 ret = -EFAULT;
1195                 goto free;
1196         }
1197
1198         t = find_table_lock(tmp.name, &ret, &ipt_mutex);
1199         if (!t)
1200                 goto free;
1201
1202         write_lock_bh(&t->lock);
1203         if (t->private->number != paddc->num_counters) {
1204                 ret = -EINVAL;
1205                 goto unlock_up_free;
1206         }
1207
1208         i = 0;
1209         IPT_ENTRY_ITERATE(t->private->entries,
1210                           t->private->size,
1211                           add_counter_to_entry,
1212                           paddc->counters,
1213                           &i);
1214  unlock_up_free:
1215         write_unlock_bh(&t->lock);
1216         up(&ipt_mutex);
1217  free:
1218         vfree(paddc);
1219
1220         return ret;
1221 }
1222
1223 static int
1224 do_ipt_set_ctl(struct sock *sk, int cmd, void *user, unsigned int len)
1225 {
1226         int ret;
1227
1228         if (!capable(CAP_NET_ADMIN))
1229                 return -EPERM;
1230
1231         switch (cmd) {
1232         case IPT_SO_SET_REPLACE:
1233                 ret = do_replace(user, len);
1234                 break;
1235
1236         case IPT_SO_SET_ADD_COUNTERS:
1237                 ret = do_add_counters(user, len);
1238                 break;
1239
1240         default:
1241                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1242                 ret = -EINVAL;
1243         }
1244
1245         return ret;
1246 }
1247
1248 static int
1249 do_ipt_get_ctl(struct sock *sk, int cmd, void *user, int *len)
1250 {
1251         int ret;
1252
1253         if (!capable(CAP_NET_ADMIN))
1254                 return -EPERM;
1255
1256         switch (cmd) {
1257         case IPT_SO_GET_INFO: {
1258                 char name[IPT_TABLE_MAXNAMELEN];
1259                 struct ipt_table *t;
1260
1261                 if (*len != sizeof(struct ipt_getinfo)) {
1262                         duprintf("length %u != %u\n", *len,
1263                                  sizeof(struct ipt_getinfo));
1264                         ret = -EINVAL;
1265                         break;
1266                 }
1267
1268                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1269                         ret = -EFAULT;
1270                         break;
1271                 }
1272                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1273                 t = find_table_lock(name, &ret, &ipt_mutex);
1274                 if (t) {
1275                         struct ipt_getinfo info;
1276
1277                         info.valid_hooks = t->valid_hooks;
1278                         memcpy(info.hook_entry, t->private->hook_entry,
1279                                sizeof(info.hook_entry));
1280                         memcpy(info.underflow, t->private->underflow,
1281                                sizeof(info.underflow));
1282                         info.num_entries = t->private->number;
1283                         info.size = t->private->size;
1284                         strcpy(info.name, name);
1285
1286                         if (copy_to_user(user, &info, *len) != 0)
1287                                 ret = -EFAULT;
1288                         else
1289                                 ret = 0;
1290
1291                         up(&ipt_mutex);
1292                 }
1293         }
1294         break;
1295
1296         case IPT_SO_GET_ENTRIES: {
1297                 struct ipt_get_entries get;
1298
1299                 if (*len < sizeof(get)) {
1300                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1301                         ret = -EINVAL;
1302                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1303                         ret = -EFAULT;
1304                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1305                         duprintf("get_entries: %u != %u\n", *len,
1306                                  sizeof(struct ipt_get_entries) + get.size);
1307                         ret = -EINVAL;
1308                 } else
1309                         ret = get_entries(&get, user);
1310                 break;
1311         }
1312
1313         default:
1314                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1315                 ret = -EINVAL;
1316         }
1317
1318         return ret;
1319 }
1320
1321 /* Registration hooks for targets. */
1322 int
1323 ipt_register_target(struct ipt_target *target)
1324 {
1325         int ret;
1326
1327         ret = down_interruptible(&ipt_mutex);
1328         if (ret != 0)
1329                 return ret;
1330
1331         if (!list_named_insert(&ipt_target, target)) {
1332                 duprintf("ipt_register_target: `%s' already in list!\n",
1333                          target->name);
1334                 ret = -EINVAL;
1335         }
1336         up(&ipt_mutex);
1337         return ret;
1338 }
1339
1340 void
1341 ipt_unregister_target(struct ipt_target *target)
1342 {
1343         down(&ipt_mutex);
1344         LIST_DELETE(&ipt_target, target);
1345         up(&ipt_mutex);
1346 }
1347
1348 int
1349 ipt_register_match(struct ipt_match *match)
1350 {
1351         int ret;
1352
1353         ret = down_interruptible(&ipt_mutex);
1354         if (ret != 0)
1355                 return ret;
1356
1357         if (!list_named_insert(&ipt_match, match)) {
1358                 duprintf("ipt_register_match: `%s' already in list!\n",
1359                          match->name);
1360                 ret = -EINVAL;
1361         }
1362         up(&ipt_mutex);
1363
1364         return ret;
1365 }
1366
1367 void
1368 ipt_unregister_match(struct ipt_match *match)
1369 {
1370         down(&ipt_mutex);
1371         LIST_DELETE(&ipt_match, match);
1372         up(&ipt_mutex);
1373 }
1374
1375 int ipt_register_table(struct ipt_table *table)
1376 {
1377         int ret;
1378         struct ipt_table_info *newinfo;
1379         static struct ipt_table_info bootstrap
1380                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1381
1382         newinfo = vmalloc(sizeof(struct ipt_table_info)
1383                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1384         if (!newinfo)
1385                 return -ENOMEM;
1386
1387         memcpy(newinfo->entries, table->table->entries, table->table->size);
1388
1389         ret = translate_table(table->name, table->valid_hooks,
1390                               newinfo, table->table->size,
1391                               table->table->num_entries,
1392                               table->table->hook_entry,
1393                               table->table->underflow);
1394         if (ret != 0) {
1395                 vfree(newinfo);
1396                 return ret;
1397         }
1398
1399         ret = down_interruptible(&ipt_mutex);
1400         if (ret != 0) {
1401                 vfree(newinfo);
1402                 return ret;
1403         }
1404
1405         /* Don't autoload: we'd eat our tail... */
1406         if (list_named_find(&ipt_tables, table->name)) {
1407                 ret = -EEXIST;
1408                 goto free_unlock;
1409         }
1410
1411         /* Simplifies replace_table code. */
1412         table->private = &bootstrap;
1413         if (!replace_table(table, 0, newinfo, &ret))
1414                 goto free_unlock;
1415
1416         duprintf("table->private->number = %u\n",
1417                  table->private->number);
1418         
1419         /* save number of initial entries */
1420         table->private->initial_entries = table->private->number;
1421
1422         table->lock = RW_LOCK_UNLOCKED;
1423         list_prepend(&ipt_tables, table);
1424
1425  unlock:
1426         up(&ipt_mutex);
1427         return ret;
1428
1429  free_unlock:
1430         vfree(newinfo);
1431         goto unlock;
1432 }
1433
1434 void ipt_unregister_table(struct ipt_table *table)
1435 {
1436         down(&ipt_mutex);
1437         LIST_DELETE(&ipt_tables, table);
1438         up(&ipt_mutex);
1439
1440         /* Decrease module usage counts and free resources */
1441         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1442                           cleanup_entry, NULL);
1443         vfree(table->private);
1444 }
1445
1446 /* Returns 1 if the port is matched by the range, 0 otherwise */
1447 static inline int
1448 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1449 {
1450         int ret;
1451
1452         ret = (port >= min && port <= max) ^ invert;
1453         return ret;
1454 }
1455
1456 static int
1457 tcp_find_option(u_int8_t option,
1458                 const struct sk_buff *skb,
1459                 unsigned int optlen,
1460                 int invert,
1461                 int *hotdrop)
1462 {
1463         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1464         char opt[60 - sizeof(struct tcphdr)];
1465         unsigned int i;
1466
1467         duprintf("tcp_match: finding option\n");
1468         /* If we don't have the whole header, drop packet. */
1469         if (skb_copy_bits(skb, skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1470                           opt, optlen) < 0) {
1471                 *hotdrop = 1;
1472                 return 0;
1473         }
1474
1475         for (i = 0; i < optlen; ) {
1476                 if (opt[i] == option) return !invert;
1477                 if (opt[i] < 2) i++;
1478                 else i += opt[i+1]?:1;
1479         }
1480
1481         return invert;
1482 }
1483
1484 static int
1485 tcp_match(const struct sk_buff *skb,
1486           const struct net_device *in,
1487           const struct net_device *out,
1488           const void *matchinfo,
1489           int offset,
1490           int *hotdrop)
1491 {
1492         struct tcphdr tcph;
1493         const struct ipt_tcp *tcpinfo = matchinfo;
1494
1495         if (offset) {
1496                 /* To quote Alan:
1497
1498                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1499                    causes this. Its a cracker trying to break in by doing a
1500                    flag overwrite to pass the direction checks.
1501                 */
1502                 if (offset == 1) {
1503                         duprintf("Dropping evil TCP offset=1 frag.\n");
1504                         *hotdrop = 1;
1505                 }
1506                 /* Must not be a fragment. */
1507                 return 0;
1508         }
1509
1510 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1511
1512         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
1513                 /* We've been asked to examine this packet, and we
1514                    can't.  Hence, no choice but to drop. */
1515                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1516                 *hotdrop = 1;
1517                 return 0;
1518         }
1519
1520         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1521                         ntohs(tcph.source),
1522                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1523                 return 0;
1524         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1525                         ntohs(tcph.dest),
1526                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1527                 return 0;
1528         if (!FWINVTCP((((unsigned char *)&tcph)[13] & tcpinfo->flg_mask)
1529                       == tcpinfo->flg_cmp,
1530                       IPT_TCP_INV_FLAGS))
1531                 return 0;
1532         if (tcpinfo->option) {
1533                 if (tcph.doff * 4 < sizeof(tcph)) {
1534                         *hotdrop = 1;
1535                         return 0;
1536                 }
1537                 if (!tcp_find_option(tcpinfo->option, skb, tcph.doff*4 - sizeof(tcph),
1538                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1539                                      hotdrop))
1540                         return 0;
1541         }
1542         return 1;
1543 }
1544
1545 /* Called when user tries to insert an entry of this type. */
1546 static int
1547 tcp_checkentry(const char *tablename,
1548                const struct ipt_ip *ip,
1549                void *matchinfo,
1550                unsigned int matchsize,
1551                unsigned int hook_mask)
1552 {
1553         const struct ipt_tcp *tcpinfo = matchinfo;
1554
1555         /* Must specify proto == TCP, and no unknown invflags */
1556         return ip->proto == IPPROTO_TCP
1557                 && !(ip->invflags & IPT_INV_PROTO)
1558                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1559                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1560 }
1561
1562 static int
1563 udp_match(const struct sk_buff *skb,
1564           const struct net_device *in,
1565           const struct net_device *out,
1566           const void *matchinfo,
1567           int offset,
1568           int *hotdrop)
1569 {
1570         struct udphdr udph;
1571         const struct ipt_udp *udpinfo = matchinfo;
1572
1573         /* Must not be a fragment. */
1574         if (offset)
1575                 return 0;
1576
1577         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
1578                 /* We've been asked to examine this packet, and we
1579                    can't.  Hence, no choice but to drop. */
1580                 duprintf("Dropping evil UDP tinygram.\n");
1581                 *hotdrop = 1;
1582                 return 0;
1583         }
1584
1585         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1586                           ntohs(udph.source),
1587                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1588                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1589                               ntohs(udph.dest),
1590                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1591 }
1592
1593 /* Called when user tries to insert an entry of this type. */
1594 static int
1595 udp_checkentry(const char *tablename,
1596                const struct ipt_ip *ip,
1597                void *matchinfo,
1598                unsigned int matchinfosize,
1599                unsigned int hook_mask)
1600 {
1601         const struct ipt_udp *udpinfo = matchinfo;
1602
1603         /* Must specify proto == UDP, and no unknown invflags */
1604         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1605                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1606                          IPPROTO_UDP);
1607                 return 0;
1608         }
1609         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1610                 duprintf("ipt_udp: matchsize %u != %u\n",
1611                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1612                 return 0;
1613         }
1614         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1615                 duprintf("ipt_udp: unknown flags %X\n",
1616                          udpinfo->invflags);
1617                 return 0;
1618         }
1619
1620         return 1;
1621 }
1622
1623 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1624 static inline int
1625 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1626                      u_int8_t type, u_int8_t code,
1627                      int invert)
1628 {
1629         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1630                 ^ invert;
1631 }
1632
1633 static int
1634 icmp_match(const struct sk_buff *skb,
1635            const struct net_device *in,
1636            const struct net_device *out,
1637            const void *matchinfo,
1638            int offset,
1639            int *hotdrop)
1640 {
1641         struct icmphdr icmph;
1642         const struct ipt_icmp *icmpinfo = matchinfo;
1643
1644         /* Must not be a fragment. */
1645         if (offset)
1646                 return 0;
1647
1648         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph)) < 0){
1649                 /* We've been asked to examine this packet, and we
1650                    can't.  Hence, no choice but to drop. */
1651                 duprintf("Dropping evil ICMP tinygram.\n");
1652                 *hotdrop = 1;
1653                 return 0;
1654         }
1655
1656         return icmp_type_code_match(icmpinfo->type,
1657                                     icmpinfo->code[0],
1658                                     icmpinfo->code[1],
1659                                     icmph.type, icmph.code,
1660                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1661 }
1662
1663 /* Called when user tries to insert an entry of this type. */
1664 static int
1665 icmp_checkentry(const char *tablename,
1666            const struct ipt_ip *ip,
1667            void *matchinfo,
1668            unsigned int matchsize,
1669            unsigned int hook_mask)
1670 {
1671         const struct ipt_icmp *icmpinfo = matchinfo;
1672
1673         /* Must specify proto == ICMP, and no unknown invflags */
1674         return ip->proto == IPPROTO_ICMP
1675                 && !(ip->invflags & IPT_INV_PROTO)
1676                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1677                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1678 }
1679
1680 /* The built-in targets: standard (NULL) and error. */
1681 static struct ipt_target ipt_standard_target = {
1682         .name           = IPT_STANDARD_TARGET,
1683 };
1684
1685 static struct ipt_target ipt_error_target = {
1686         .name           = IPT_ERROR_TARGET,
1687         .target         = ipt_error,
1688 };
1689
1690 static struct nf_sockopt_ops ipt_sockopts = {
1691         .pf             = PF_INET,
1692         .set_optmin     = IPT_BASE_CTL,
1693         .set_optmax     = IPT_SO_SET_MAX+1,
1694         .set            = do_ipt_set_ctl,
1695         .get_optmin     = IPT_BASE_CTL,
1696         .get_optmax     = IPT_SO_GET_MAX+1,
1697         .get            = do_ipt_get_ctl,
1698 };
1699
1700 static struct ipt_match tcp_matchstruct = {
1701         .name           = "tcp",
1702         .match          = &tcp_match,
1703         .checkentry     = &tcp_checkentry,
1704 };
1705
1706 static struct ipt_match udp_matchstruct = {
1707         .name           = "udp",
1708         .match          = &udp_match,
1709         .checkentry     = &udp_checkentry,
1710 };
1711
1712 static struct ipt_match icmp_matchstruct = {
1713         .name           = "icmp",
1714         .match          = &icmp_match,
1715         .checkentry     = &icmp_checkentry,
1716 };
1717
1718 #ifdef CONFIG_PROC_FS
1719 static inline int print_name(const char *i,
1720                              off_t start_offset, char *buffer, int length,
1721                              off_t *pos, unsigned int *count)
1722 {
1723         if ((*count)++ >= start_offset) {
1724                 unsigned int namelen;
1725
1726                 namelen = sprintf(buffer + *pos, "%s\n",
1727                                   i + sizeof(struct list_head));
1728                 if (*pos + namelen > length) {
1729                         /* Stop iterating */
1730                         return 1;
1731                 }
1732                 *pos += namelen;
1733         }
1734         return 0;
1735 }
1736
1737 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1738 {
1739         off_t pos = 0;
1740         unsigned int count = 0;
1741
1742         if (down_interruptible(&ipt_mutex) != 0)
1743                 return 0;
1744
1745         LIST_FIND(&ipt_tables, print_name, void *,
1746                   offset, buffer, length, &pos, &count);
1747
1748         up(&ipt_mutex);
1749
1750         /* `start' hack - see fs/proc/generic.c line ~105 */
1751         *start=(char *)((unsigned long)count-offset);
1752         return pos;
1753 }
1754
1755 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1756 {
1757         off_t pos = 0;
1758         unsigned int count = 0;
1759
1760         if (down_interruptible(&ipt_mutex) != 0)
1761                 return 0;
1762
1763         LIST_FIND(&ipt_target, print_name, void *,
1764                   offset, buffer, length, &pos, &count);
1765         
1766         up(&ipt_mutex);
1767
1768         *start = (char *)((unsigned long)count - offset);
1769         return pos;
1770 }
1771
1772 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1773 {
1774         off_t pos = 0;
1775         unsigned int count = 0;
1776
1777         if (down_interruptible(&ipt_mutex) != 0)
1778                 return 0;
1779         
1780         LIST_FIND(&ipt_match, print_name, void *,
1781                   offset, buffer, length, &pos, &count);
1782
1783         up(&ipt_mutex);
1784
1785         *start = (char *)((unsigned long)count - offset);
1786         return pos;
1787 }
1788
1789 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1790 { { "ip_tables_names", ipt_get_tables },
1791   { "ip_tables_targets", ipt_get_targets },
1792   { "ip_tables_matches", ipt_get_matches },
1793   { NULL, NULL} };
1794 #endif /*CONFIG_PROC_FS*/
1795
1796 static int __init init(void)
1797 {
1798         int ret;
1799
1800         /* Noone else will be downing sem now, so we won't sleep */
1801         down(&ipt_mutex);
1802         list_append(&ipt_target, &ipt_standard_target);
1803         list_append(&ipt_target, &ipt_error_target);
1804         list_append(&ipt_match, &tcp_matchstruct);
1805         list_append(&ipt_match, &udp_matchstruct);
1806         list_append(&ipt_match, &icmp_matchstruct);
1807         up(&ipt_mutex);
1808
1809         /* Register setsockopt */
1810         ret = nf_register_sockopt(&ipt_sockopts);
1811         if (ret < 0) {
1812                 duprintf("Unable to register sockopts.\n");
1813                 return ret;
1814         }
1815
1816 #ifdef CONFIG_PROC_FS
1817         {
1818         struct proc_dir_entry *proc;
1819         int i;
1820
1821         for (i = 0; ipt_proc_entry[i].name; i++) {
1822                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1823                                        ipt_proc_entry[i].get_info);
1824                 if (!proc) {
1825                         while (--i >= 0)
1826                                 proc_net_remove(ipt_proc_entry[i].name);
1827                         nf_unregister_sockopt(&ipt_sockopts);
1828                         return -ENOMEM;
1829                 }
1830                 proc->owner = THIS_MODULE;
1831         }
1832         }
1833 #endif
1834
1835         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1836         return 0;
1837 }
1838
1839 static void __exit fini(void)
1840 {
1841         nf_unregister_sockopt(&ipt_sockopts);
1842 #ifdef CONFIG_PROC_FS
1843         {
1844         int i;
1845         for (i = 0; ipt_proc_entry[i].name; i++)
1846                 proc_net_remove(ipt_proc_entry[i].name);
1847         }
1848 #endif
1849 }
1850
1851 EXPORT_SYMBOL(ipt_register_table);
1852 EXPORT_SYMBOL(ipt_unregister_table);
1853 EXPORT_SYMBOL(ipt_register_match);
1854 EXPORT_SYMBOL(ipt_unregister_match);
1855 EXPORT_SYMBOL(ipt_do_table);
1856 EXPORT_SYMBOL(ipt_register_target);
1857 EXPORT_SYMBOL(ipt_unregister_target);
1858
1859 module_init(init);
1860 module_exit(fini);