upgrade to linux 2.6.10-1.12_FC2
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29
30 #include <linux/netfilter_ipv4/ip_tables.h>
31
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
35
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
39
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...)  printk(format , ## args)
42 #else
43 #define dprintf(format, args...)
44 #endif
45
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
48 #else
49 #define duprintf(format, args...)
50 #endif
51
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x)                                         \
54 do {                                                            \
55         if (!(x))                                               \
56                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
57                        __FUNCTION__, __FILE__, __LINE__);       \
58 } while(0)
59 #else
60 #define IP_NF_ASSERT(x)
61 #endif
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
63
64 static DECLARE_MUTEX(ipt_mutex);
65
66 /* Must have mutex */
67 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
68 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #include <linux/netfilter_ipv4/lockhelp.h>
70 #include <linux/netfilter_ipv4/listhelp.h>
71
72 #if 0
73 /* All the better to debug you with... */
74 #define static
75 #define inline
76 #endif
77
78 /*
79    We keep a set of rules for each CPU, so we can avoid write-locking
80    them in the softirq when updating the counters and therefore
81    only need to read-lock in the softirq; doing a write_lock_bh() in user
82    context stops packets coming through and allows user context to read
83    the counters or update the rules.
84
85    To be cache friendly on SMP, we arrange them like so:
86    [ n-entries ]
87    ... cache-align padding ...
88    [ n-entries ]
89
90    Hence the start of any table is given by get_table() below.  */
91
92 /* The table itself */
93 struct ipt_table_info
94 {
95         /* Size per table */
96         unsigned int size;
97         /* Number of entries: FIXME. --RR */
98         unsigned int number;
99         /* Initial number of entries. Needed for module usage count */
100         unsigned int initial_entries;
101
102         /* Entry points and underflows */
103         unsigned int hook_entry[NF_IP_NUMHOOKS];
104         unsigned int underflow[NF_IP_NUMHOOKS];
105
106         /* ipt_entry tables: one per CPU */
107         char entries[0] ____cacheline_aligned;
108 };
109
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115 #ifdef CONFIG_SMP
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117 #else
118 #define TABLE_OFFSET(t,p) 0
119 #endif
120
121 #if 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 #endif
126
127 /* Returns whether matches rule or not. */
128 static inline int
129 ip_packet_match(const struct iphdr *ip,
130                 const char *indev,
131                 const char *outdev,
132                 const struct ipt_ip *ipinfo,
133                 int isfrag)
134 {
135         size_t i;
136         unsigned long ret;
137
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141                   IPT_INV_SRCIP)
142             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143                      IPT_INV_DSTIP)) {
144                 dprintf("Source or dest mismatch.\n");
145
146                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147                         NIPQUAD(ip->saddr),
148                         NIPQUAD(ipinfo->smsk.s_addr),
149                         NIPQUAD(ipinfo->src.s_addr),
150                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152                         NIPQUAD(ip->daddr),
153                         NIPQUAD(ipinfo->dmsk.s_addr),
154                         NIPQUAD(ipinfo->dst.s_addr),
155                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156                 return 0;
157         }
158
159         /* Look for ifname matches; this should unroll nicely. */
160         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161                 ret |= (((const unsigned long *)indev)[i]
162                         ^ ((const unsigned long *)ipinfo->iniface)[i])
163                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
164         }
165
166         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167                 dprintf("VIA in mismatch (%s vs %s).%s\n",
168                         indev, ipinfo->iniface,
169                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170                 return 0;
171         }
172
173         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174                 ret |= (((const unsigned long *)outdev)[i]
175                         ^ ((const unsigned long *)ipinfo->outiface)[i])
176                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
177         }
178
179         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180                 dprintf("VIA out mismatch (%s vs %s).%s\n",
181                         outdev, ipinfo->outiface,
182                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183                 return 0;
184         }
185
186         /* Check specific protocol */
187         if (ipinfo->proto
188             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189                 dprintf("Packet protocol %hi does not match %hi.%s\n",
190                         ip->protocol, ipinfo->proto,
191                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192                 return 0;
193         }
194
195         /* If we have a fragment rule but the packet is not a fragment
196          * then we return zero */
197         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198                 dprintf("Fragment rule but not fragment.%s\n",
199                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200                 return 0;
201         }
202
203         return 1;
204 }
205
206 static inline int
207 ip_checkentry(const struct ipt_ip *ip)
208 {
209         if (ip->flags & ~IPT_F_MASK) {
210                 duprintf("Unknown flag bits set: %08X\n",
211                          ip->flags & ~IPT_F_MASK);
212                 return 0;
213         }
214         if (ip->invflags & ~IPT_INV_MASK) {
215                 duprintf("Unknown invflag bits set: %08X\n",
216                          ip->invflags & ~IPT_INV_MASK);
217                 return 0;
218         }
219         return 1;
220 }
221
222 static unsigned int
223 ipt_error(struct sk_buff **pskb,
224           const struct net_device *in,
225           const struct net_device *out,
226           unsigned int hooknum,
227           const void *targinfo,
228           void *userinfo)
229 {
230         if (net_ratelimit())
231                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233         return NF_DROP;
234 }
235
236 static inline
237 int do_match(struct ipt_entry_match *m,
238              const struct sk_buff *skb,
239              const struct net_device *in,
240              const struct net_device *out,
241              int offset,
242              int *hotdrop)
243 {
244         /* Stop iteration if it doesn't match */
245         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246                 return 1;
247         else
248                 return 0;
249 }
250
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
253 {
254         return (struct ipt_entry *)(base + offset);
255 }
256
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
258 unsigned int
259 ipt_do_table(struct sk_buff **pskb,
260              unsigned int hook,
261              const struct net_device *in,
262              const struct net_device *out,
263              struct ipt_table *table,
264              void *userdata)
265 {
266         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267         u_int16_t offset;
268         struct iphdr *ip;
269         u_int16_t datalen;
270         int hotdrop = 0;
271         /* Initializing verdict to NF_DROP keeps gcc happy. */
272         unsigned int verdict = NF_DROP;
273         const char *indev, *outdev;
274         void *table_base;
275         struct ipt_entry *e, *back;
276
277         /* Initialization */
278         ip = (*pskb)->nh.iph;
279         datalen = (*pskb)->len - ip->ihl * 4;
280         indev = in ? in->name : nulldevname;
281         outdev = out ? out->name : nulldevname;
282         /* We handle fragments by dealing with the first fragment as
283          * if it was a normal packet.  All other fragments are treated
284          * normally, except that they will NEVER match rules that ask
285          * things we don't know, ie. tcp syn flag or ports).  If the
286          * rule is also a fragment-specific rule, non-fragments won't
287          * match it. */
288         offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290         read_lock_bh(&table->lock);
291         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292         table_base = (void *)table->private->entries
293                 + TABLE_OFFSET(table->private, smp_processor_id());
294         e = get_entry(table_base, table->private->hook_entry[hook]);
295
296 #ifdef CONFIG_NETFILTER_DEBUG
297         /* Check noone else using our table */
298         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301                        smp_processor_id(),
302                        table->name,
303                        &((struct ipt_entry *)table_base)->comefrom,
304                        ((struct ipt_entry *)table_base)->comefrom);
305         }
306         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 #endif
308
309         /* For return from builtin chain */
310         back = get_entry(table_base, table->private->underflow[hook]);
311
312         do {
313                 IP_NF_ASSERT(e);
314                 IP_NF_ASSERT(back);
315                 (*pskb)->nfcache |= e->nfcache;
316                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317                         struct ipt_entry_target *t;
318
319                         if (IPT_MATCH_ITERATE(e, do_match,
320                                               *pskb, in, out,
321                                               offset, &hotdrop) != 0)
322                                 goto no_match;
323
324                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326                         t = ipt_get_target(e);
327                         IP_NF_ASSERT(t->u.kernel.target);
328                         /* Standard target? */
329                         if (!t->u.kernel.target->target) {
330                                 int v;
331
332                                 v = ((struct ipt_standard_target *)t)->verdict;
333                                 if (v < 0) {
334                                         /* Pop from stack? */
335                                         if (v != IPT_RETURN) {
336                                                 verdict = (unsigned)(-v) - 1;
337                                                 break;
338                                         }
339                                         e = back;
340                                         back = get_entry(table_base,
341                                                          back->comefrom);
342                                         continue;
343                                 }
344                                 if (table_base + v
345                                     != (void *)e + e->next_offset) {
346                                         /* Save old back ptr in next entry */
347                                         struct ipt_entry *next
348                                                 = (void *)e + e->next_offset;
349                                         next->comefrom
350                                                 = (void *)back - table_base;
351                                         /* set back pointer to next entry */
352                                         back = next;
353                                 }
354
355                                 e = get_entry(table_base, v);
356                         } else {
357                                 /* Targets which reenter must return
358                                    abs. verdicts */
359 #ifdef CONFIG_NETFILTER_DEBUG
360                                 ((struct ipt_entry *)table_base)->comefrom
361                                         = 0xeeeeeeec;
362 #endif
363                                 verdict = t->u.kernel.target->target(pskb,
364                                                                      in, out,
365                                                                      hook,
366                                                                      t->data,
367                                                                      userdata);
368
369 #ifdef CONFIG_NETFILTER_DEBUG
370                                 if (((struct ipt_entry *)table_base)->comefrom
371                                     != 0xeeeeeeec
372                                     && verdict == IPT_CONTINUE) {
373                                         printk("Target %s reentered!\n",
374                                                t->u.kernel.target->name);
375                                         verdict = NF_DROP;
376                                 }
377                                 ((struct ipt_entry *)table_base)->comefrom
378                                         = 0x57acc001;
379 #endif
380                                 /* Target might have changed stuff. */
381                                 ip = (*pskb)->nh.iph;
382                                 datalen = (*pskb)->len - ip->ihl * 4;
383
384                                 if (verdict == IPT_CONTINUE)
385                                         e = (void *)e + e->next_offset;
386                                 else
387                                         /* Verdict */
388                                         break;
389                         }
390                 } else {
391
392                 no_match:
393                         e = (void *)e + e->next_offset;
394                 }
395         } while (!hotdrop);
396
397 #ifdef CONFIG_NETFILTER_DEBUG
398         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399 #endif
400         read_unlock_bh(&table->lock);
401
402 #ifdef DEBUG_ALLOW_ALL
403         return NF_ACCEPT;
404 #else
405         if (hotdrop)
406                 return NF_DROP;
407         else return verdict;
408 #endif
409 }
410
411 /* If it succeeds, returns element and locks mutex */
412 static inline void *
413 find_inlist_lock_noload(struct list_head *head,
414                         const char *name,
415                         int *error,
416                         struct semaphore *mutex)
417 {
418         void *ret;
419
420 #if 0 
421         duprintf("find_inlist: searching for `%s' in %s.\n",
422                  name, head == &ipt_target ? "ipt_target"
423                  : head == &ipt_match ? "ipt_match"
424                  : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
425 #endif
426
427         *error = down_interruptible(mutex);
428         if (*error != 0)
429                 return NULL;
430
431         ret = list_named_find(head, name);
432         if (!ret) {
433                 *error = -ENOENT;
434                 up(mutex);
435         }
436         return ret;
437 }
438
439 #ifndef CONFIG_KMOD
440 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
441 #else
442 static void *
443 find_inlist_lock(struct list_head *head,
444                  const char *name,
445                  const char *prefix,
446                  int *error,
447                  struct semaphore *mutex)
448 {
449         void *ret;
450
451         ret = find_inlist_lock_noload(head, name, error, mutex);
452         if (!ret) {
453                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
454                 request_module("%s%s", prefix, name);
455                 ret = find_inlist_lock_noload(head, name, error, mutex);
456         }
457
458         return ret;
459 }
460 #endif
461
462 static inline struct ipt_table *
463 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
464 {
465         return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
466 }
467
468 static inline struct ipt_match *
469 find_match_lock(const char *name, int *error, struct semaphore *mutex)
470 {
471         return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
472 }
473
474 static struct ipt_target *
475 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
476 {
477         return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
478 }
479
480 struct ipt_target *
481 __ipt_find_target_lock(const char *name, int *error)
482 {
483         return ipt_find_target_lock(name,error,&ipt_mutex);
484 }
485
486 void
487 __ipt_mutex_up(void)
488 {
489         up(&ipt_mutex);
490 }
491
492 /* All zeroes == unconditional rule. */
493 static inline int
494 unconditional(const struct ipt_ip *ip)
495 {
496         unsigned int i;
497
498         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
499                 if (((__u32 *)ip)[i])
500                         return 0;
501
502         return 1;
503 }
504
505 /* Figures out from what hook each rule can be called: returns 0 if
506    there are loops.  Puts hook bitmask in comefrom. */
507 static int
508 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
509 {
510         unsigned int hook;
511
512         /* No recursion; use packet counter to save back ptrs (reset
513            to 0 as we leave), and comefrom to save source hook bitmask */
514         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
515                 unsigned int pos = newinfo->hook_entry[hook];
516                 struct ipt_entry *e
517                         = (struct ipt_entry *)(newinfo->entries + pos);
518
519                 if (!(valid_hooks & (1 << hook)))
520                         continue;
521
522                 /* Set initial back pointer. */
523                 e->counters.pcnt = pos;
524
525                 for (;;) {
526                         struct ipt_standard_target *t
527                                 = (void *)ipt_get_target(e);
528
529                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
530                                 printk("iptables: loop hook %u pos %u %08X.\n",
531                                        hook, pos, e->comefrom);
532                                 return 0;
533                         }
534                         e->comefrom
535                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
536
537                         /* Unconditional return/END. */
538                         if (e->target_offset == sizeof(struct ipt_entry)
539                             && (strcmp(t->target.u.user.name,
540                                        IPT_STANDARD_TARGET) == 0)
541                             && t->verdict < 0
542                             && unconditional(&e->ip)) {
543                                 unsigned int oldpos, size;
544
545                                 /* Return: backtrack through the last
546                                    big jump. */
547                                 do {
548                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
549 #ifdef DEBUG_IP_FIREWALL_USER
550                                         if (e->comefrom
551                                             & (1 << NF_IP_NUMHOOKS)) {
552                                                 duprintf("Back unset "
553                                                          "on hook %u "
554                                                          "rule %u\n",
555                                                          hook, pos);
556                                         }
557 #endif
558                                         oldpos = pos;
559                                         pos = e->counters.pcnt;
560                                         e->counters.pcnt = 0;
561
562                                         /* We're at the start. */
563                                         if (pos == oldpos)
564                                                 goto next;
565
566                                         e = (struct ipt_entry *)
567                                                 (newinfo->entries + pos);
568                                 } while (oldpos == pos + e->next_offset);
569
570                                 /* Move along one */
571                                 size = e->next_offset;
572                                 e = (struct ipt_entry *)
573                                         (newinfo->entries + pos + size);
574                                 e->counters.pcnt = pos;
575                                 pos += size;
576                         } else {
577                                 int newpos = t->verdict;
578
579                                 if (strcmp(t->target.u.user.name,
580                                            IPT_STANDARD_TARGET) == 0
581                                     && newpos >= 0) {
582                                         /* This a jump; chase it. */
583                                         duprintf("Jump rule %u -> %u\n",
584                                                  pos, newpos);
585                                 } else {
586                                         /* ... this is a fallthru */
587                                         newpos = pos + e->next_offset;
588                                 }
589                                 e = (struct ipt_entry *)
590                                         (newinfo->entries + newpos);
591                                 e->counters.pcnt = pos;
592                                 pos = newpos;
593                         }
594                 }
595                 next:
596                 duprintf("Finished chain %u\n", hook);
597         }
598         return 1;
599 }
600
601 static inline int
602 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
603 {
604         if (i && (*i)-- == 0)
605                 return 1;
606
607         if (m->u.kernel.match->destroy)
608                 m->u.kernel.match->destroy(m->data,
609                                            m->u.match_size - sizeof(*m));
610         module_put(m->u.kernel.match->me);
611         return 0;
612 }
613
614 static inline int
615 standard_check(const struct ipt_entry_target *t,
616                unsigned int max_offset)
617 {
618         struct ipt_standard_target *targ = (void *)t;
619
620         /* Check standard info. */
621         if (t->u.target_size
622             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
623                 duprintf("standard_check: target size %u != %u\n",
624                          t->u.target_size,
625                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
626                 return 0;
627         }
628
629         if (targ->verdict >= 0
630             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
631                 duprintf("ipt_standard_check: bad verdict (%i)\n",
632                          targ->verdict);
633                 return 0;
634         }
635
636         if (targ->verdict < -NF_MAX_VERDICT - 1) {
637                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
638                          targ->verdict);
639                 return 0;
640         }
641         return 1;
642 }
643
644 static inline int
645 check_match(struct ipt_entry_match *m,
646             const char *name,
647             const struct ipt_ip *ip,
648             unsigned int hookmask,
649             unsigned int *i)
650 {
651         int ret;
652         struct ipt_match *match;
653
654         match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
655         if (!match) {
656                 duprintf("check_match: `%s' not found\n", m->u.user.name);
657                 return ret;
658         }
659         if (!try_module_get(match->me)) {
660                 up(&ipt_mutex);
661                 return -ENOENT;
662         }
663         m->u.kernel.match = match;
664         up(&ipt_mutex);
665
666         if (m->u.kernel.match->checkentry
667             && !m->u.kernel.match->checkentry(name, ip, m->data,
668                                               m->u.match_size - sizeof(*m),
669                                               hookmask)) {
670                 module_put(m->u.kernel.match->me);
671                 duprintf("ip_tables: check failed for `%s'.\n",
672                          m->u.kernel.match->name);
673                 return -EINVAL;
674         }
675
676         (*i)++;
677         return 0;
678 }
679
680 static struct ipt_target ipt_standard_target;
681
682 static inline int
683 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
684             unsigned int *i)
685 {
686         struct ipt_entry_target *t;
687         struct ipt_target *target;
688         int ret;
689         unsigned int j;
690
691         if (!ip_checkentry(&e->ip)) {
692                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
693                 return -EINVAL;
694         }
695
696         j = 0;
697         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
698         if (ret != 0)
699                 goto cleanup_matches;
700
701         t = ipt_get_target(e);
702         target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
703         if (!target) {
704                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
705                 goto cleanup_matches;
706         }
707         if (!try_module_get(target->me)) {
708                 up(&ipt_mutex);
709                 ret = -ENOENT;
710                 goto cleanup_matches;
711         }
712         t->u.kernel.target = target;
713         up(&ipt_mutex);
714
715         if (t->u.kernel.target == &ipt_standard_target) {
716                 if (!standard_check(t, size)) {
717                         ret = -EINVAL;
718                         goto cleanup_matches;
719                 }
720         } else if (t->u.kernel.target->checkentry
721                    && !t->u.kernel.target->checkentry(name, e, t->data,
722                                                       t->u.target_size
723                                                       - sizeof(*t),
724                                                       e->comefrom)) {
725                 module_put(t->u.kernel.target->me);
726                 duprintf("ip_tables: check failed for `%s'.\n",
727                          t->u.kernel.target->name);
728                 ret = -EINVAL;
729                 goto cleanup_matches;
730         }
731
732         (*i)++;
733         return 0;
734
735  cleanup_matches:
736         IPT_MATCH_ITERATE(e, cleanup_match, &j);
737         return ret;
738 }
739
740 static inline int
741 check_entry_size_and_hooks(struct ipt_entry *e,
742                            struct ipt_table_info *newinfo,
743                            unsigned char *base,
744                            unsigned char *limit,
745                            const unsigned int *hook_entries,
746                            const unsigned int *underflows,
747                            unsigned int *i)
748 {
749         unsigned int h;
750
751         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
752             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
753                 duprintf("Bad offset %p\n", e);
754                 return -EINVAL;
755         }
756
757         if (e->next_offset
758             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
759                 duprintf("checking: element %p size %u\n",
760                          e, e->next_offset);
761                 return -EINVAL;
762         }
763
764         /* Check hooks & underflows */
765         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
766                 if ((unsigned char *)e - base == hook_entries[h])
767                         newinfo->hook_entry[h] = hook_entries[h];
768                 if ((unsigned char *)e - base == underflows[h])
769                         newinfo->underflow[h] = underflows[h];
770         }
771
772         /* FIXME: underflows must be unconditional, standard verdicts
773            < 0 (not IPT_RETURN). --RR */
774
775         /* Clear counters and comefrom */
776         e->counters = ((struct ipt_counters) { 0, 0 });
777         e->comefrom = 0;
778
779         (*i)++;
780         return 0;
781 }
782
783 static inline int
784 cleanup_entry(struct ipt_entry *e, unsigned int *i)
785 {
786         struct ipt_entry_target *t;
787
788         if (i && (*i)-- == 0)
789                 return 1;
790
791         /* Cleanup all matches */
792         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
793         t = ipt_get_target(e);
794         if (t->u.kernel.target->destroy)
795                 t->u.kernel.target->destroy(t->data,
796                                             t->u.target_size - sizeof(*t));
797         module_put(t->u.kernel.target->me);
798         return 0;
799 }
800
801 /* Checks and translates the user-supplied table segment (held in
802    newinfo) */
803 static int
804 translate_table(const char *name,
805                 unsigned int valid_hooks,
806                 struct ipt_table_info *newinfo,
807                 unsigned int size,
808                 unsigned int number,
809                 const unsigned int *hook_entries,
810                 const unsigned int *underflows)
811 {
812         unsigned int i;
813         int ret;
814
815         newinfo->size = size;
816         newinfo->number = number;
817
818         /* Init all hooks to impossible value. */
819         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
820                 newinfo->hook_entry[i] = 0xFFFFFFFF;
821                 newinfo->underflow[i] = 0xFFFFFFFF;
822         }
823
824         duprintf("translate_table: size %u\n", newinfo->size);
825         i = 0;
826         /* Walk through entries, checking offsets. */
827         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
828                                 check_entry_size_and_hooks,
829                                 newinfo,
830                                 newinfo->entries,
831                                 newinfo->entries + size,
832                                 hook_entries, underflows, &i);
833         if (ret != 0)
834                 return ret;
835
836         if (i != number) {
837                 duprintf("translate_table: %u not %u entries\n",
838                          i, number);
839                 return -EINVAL;
840         }
841
842         /* Check hooks all assigned */
843         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
844                 /* Only hooks which are valid */
845                 if (!(valid_hooks & (1 << i)))
846                         continue;
847                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
848                         duprintf("Invalid hook entry %u %u\n",
849                                  i, hook_entries[i]);
850                         return -EINVAL;
851                 }
852                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
853                         duprintf("Invalid underflow %u %u\n",
854                                  i, underflows[i]);
855                         return -EINVAL;
856                 }
857         }
858
859         if (!mark_source_chains(newinfo, valid_hooks))
860                 return -ELOOP;
861
862         /* Finally, each sanity check must pass */
863         i = 0;
864         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
865                                 check_entry, name, size, &i);
866
867         if (ret != 0) {
868                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
869                                   cleanup_entry, &i);
870                 return ret;
871         }
872
873         /* And one copy for every other CPU */
874         for (i = 1; i < NR_CPUS; i++) {
875                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
876                        newinfo->entries,
877                        SMP_ALIGN(newinfo->size));
878         }
879
880         return ret;
881 }
882
883 static struct ipt_table_info *
884 replace_table(struct ipt_table *table,
885               unsigned int num_counters,
886               struct ipt_table_info *newinfo,
887               int *error)
888 {
889         struct ipt_table_info *oldinfo;
890
891 #ifdef CONFIG_NETFILTER_DEBUG
892         {
893                 struct ipt_entry *table_base;
894                 unsigned int i;
895
896                 for (i = 0; i < NR_CPUS; i++) {
897                         table_base =
898                                 (void *)newinfo->entries
899                                 + TABLE_OFFSET(newinfo, i);
900
901                         table_base->comefrom = 0xdead57ac;
902                 }
903         }
904 #endif
905
906         /* Do the substitution. */
907         write_lock_bh(&table->lock);
908         /* Check inside lock: is the old number correct? */
909         if (num_counters != table->private->number) {
910                 duprintf("num_counters != table->private->number (%u/%u)\n",
911                          num_counters, table->private->number);
912                 write_unlock_bh(&table->lock);
913                 *error = -EAGAIN;
914                 return NULL;
915         }
916         oldinfo = table->private;
917         table->private = newinfo;
918         newinfo->initial_entries = oldinfo->initial_entries;
919         write_unlock_bh(&table->lock);
920
921         return oldinfo;
922 }
923
924 /* Gets counters. */
925 static inline int
926 add_entry_to_counter(const struct ipt_entry *e,
927                      struct ipt_counters total[],
928                      unsigned int *i)
929 {
930         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
931
932         (*i)++;
933         return 0;
934 }
935
936 static void
937 get_counters(const struct ipt_table_info *t,
938              struct ipt_counters counters[])
939 {
940         unsigned int cpu;
941         unsigned int i;
942
943         for (cpu = 0; cpu < NR_CPUS; cpu++) {
944                 i = 0;
945                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
946                                   t->size,
947                                   add_entry_to_counter,
948                                   counters,
949                                   &i);
950         }
951 }
952
953 static int
954 copy_entries_to_user(unsigned int total_size,
955                      struct ipt_table *table,
956                      void __user *userptr)
957 {
958         unsigned int off, num, countersize;
959         struct ipt_entry *e;
960         struct ipt_counters *counters;
961         int ret = 0;
962
963         /* We need atomic snapshot of counters: rest doesn't change
964            (other than comefrom, which userspace doesn't care
965            about). */
966         countersize = sizeof(struct ipt_counters) * table->private->number;
967         counters = vmalloc(countersize);
968
969         if (counters == NULL)
970                 return -ENOMEM;
971
972         /* First, sum counters... */
973         memset(counters, 0, countersize);
974         write_lock_bh(&table->lock);
975         get_counters(table->private, counters);
976         write_unlock_bh(&table->lock);
977
978         /* ... then copy entire thing from CPU 0... */
979         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
980                 ret = -EFAULT;
981                 goto free_counters;
982         }
983
984         /* FIXME: use iterator macros --RR */
985         /* ... then go back and fix counters and names */
986         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
987                 unsigned int i;
988                 struct ipt_entry_match *m;
989                 struct ipt_entry_target *t;
990
991                 e = (struct ipt_entry *)(table->private->entries + off);
992                 if (copy_to_user(userptr + off
993                                  + offsetof(struct ipt_entry, counters),
994                                  &counters[num],
995                                  sizeof(counters[num])) != 0) {
996                         ret = -EFAULT;
997                         goto free_counters;
998                 }
999
1000                 for (i = sizeof(struct ipt_entry);
1001                      i < e->target_offset;
1002                      i += m->u.match_size) {
1003                         m = (void *)e + i;
1004
1005                         if (copy_to_user(userptr + off + i
1006                                          + offsetof(struct ipt_entry_match,
1007                                                     u.user.name),
1008                                          m->u.kernel.match->name,
1009                                          strlen(m->u.kernel.match->name)+1)
1010                             != 0) {
1011                                 ret = -EFAULT;
1012                                 goto free_counters;
1013                         }
1014                 }
1015
1016                 t = ipt_get_target(e);
1017                 if (copy_to_user(userptr + off + e->target_offset
1018                                  + offsetof(struct ipt_entry_target,
1019                                             u.user.name),
1020                                  t->u.kernel.target->name,
1021                                  strlen(t->u.kernel.target->name)+1) != 0) {
1022                         ret = -EFAULT;
1023                         goto free_counters;
1024                 }
1025         }
1026
1027  free_counters:
1028         vfree(counters);
1029         return ret;
1030 }
1031
1032 static int
1033 get_entries(const struct ipt_get_entries *entries,
1034             struct ipt_get_entries __user *uptr)
1035 {
1036         int ret;
1037         struct ipt_table *t;
1038
1039         t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1040         if (t) {
1041                 duprintf("t->private->number = %u\n",
1042                          t->private->number);
1043                 if (entries->size == t->private->size)
1044                         ret = copy_entries_to_user(t->private->size,
1045                                                    t, uptr->entrytable);
1046                 else {
1047                         duprintf("get_entries: I've got %u not %u!\n",
1048                                  t->private->size,
1049                                  entries->size);
1050                         ret = -EINVAL;
1051                 }
1052                 up(&ipt_mutex);
1053         } else
1054                 duprintf("get_entries: Can't find %s!\n",
1055                          entries->name);
1056
1057         return ret;
1058 }
1059
1060 static int
1061 do_replace(void __user *user, unsigned int len)
1062 {
1063         int ret;
1064         struct ipt_replace tmp;
1065         struct ipt_table *t;
1066         struct ipt_table_info *newinfo, *oldinfo;
1067         struct ipt_counters *counters;
1068
1069         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1070                 return -EFAULT;
1071
1072         /* Hack: Causes ipchains to give correct error msg --RR */
1073         if (len != sizeof(tmp) + tmp.size)
1074                 return -ENOPROTOOPT;
1075
1076         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1077         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1078                 return -ENOMEM;
1079
1080         newinfo = vmalloc(sizeof(struct ipt_table_info)
1081                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1082         if (!newinfo)
1083                 return -ENOMEM;
1084
1085         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1086                            tmp.size) != 0) {
1087                 ret = -EFAULT;
1088                 goto free_newinfo;
1089         }
1090
1091         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1092         if (!counters) {
1093                 ret = -ENOMEM;
1094                 goto free_newinfo;
1095         }
1096         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1097
1098         ret = translate_table(tmp.name, tmp.valid_hooks,
1099                               newinfo, tmp.size, tmp.num_entries,
1100                               tmp.hook_entry, tmp.underflow);
1101         if (ret != 0)
1102                 goto free_newinfo_counters;
1103
1104         duprintf("ip_tables: Translated table\n");
1105
1106         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1107         if (!t)
1108                 goto free_newinfo_counters_untrans;
1109
1110         /* You lied! */
1111         if (tmp.valid_hooks != t->valid_hooks) {
1112                 duprintf("Valid hook crap: %08X vs %08X\n",
1113                          tmp.valid_hooks, t->valid_hooks);
1114                 ret = -EINVAL;
1115                 goto free_newinfo_counters_untrans_unlock;
1116         }
1117
1118         /* Get a reference in advance, we're not allowed fail later */
1119         if (!try_module_get(t->me)) {
1120                 ret = -EBUSY;
1121                 goto free_newinfo_counters_untrans_unlock;
1122         }
1123
1124
1125         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1126         if (!oldinfo)
1127                 goto put_module;
1128
1129         /* Update module usage count based on number of rules */
1130         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1131                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1132         if ((oldinfo->number > oldinfo->initial_entries) || 
1133             (newinfo->number <= oldinfo->initial_entries)) 
1134                 module_put(t->me);
1135         if ((oldinfo->number > oldinfo->initial_entries) &&
1136             (newinfo->number <= oldinfo->initial_entries))
1137                 module_put(t->me);
1138
1139         /* Get the old counters. */
1140         get_counters(oldinfo, counters);
1141         /* Decrease module usage counts and free resource */
1142         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1143         vfree(oldinfo);
1144         /* Silent error: too late now. */
1145         copy_to_user(tmp.counters, counters,
1146                      sizeof(struct ipt_counters) * tmp.num_counters);
1147         vfree(counters);
1148         up(&ipt_mutex);
1149         return 0;
1150
1151  put_module:
1152         module_put(t->me);
1153  free_newinfo_counters_untrans_unlock:
1154         up(&ipt_mutex);
1155  free_newinfo_counters_untrans:
1156         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1157  free_newinfo_counters:
1158         vfree(counters);
1159  free_newinfo:
1160         vfree(newinfo);
1161         return ret;
1162 }
1163
1164 /* We're lazy, and add to the first CPU; overflow works its fey magic
1165  * and everything is OK. */
1166 static inline int
1167 add_counter_to_entry(struct ipt_entry *e,
1168                      const struct ipt_counters addme[],
1169                      unsigned int *i)
1170 {
1171 #if 0
1172         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1173                  *i,
1174                  (long unsigned int)e->counters.pcnt,
1175                  (long unsigned int)e->counters.bcnt,
1176                  (long unsigned int)addme[*i].pcnt,
1177                  (long unsigned int)addme[*i].bcnt);
1178 #endif
1179
1180         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1181
1182         (*i)++;
1183         return 0;
1184 }
1185
1186 static int
1187 do_add_counters(void __user *user, unsigned int len)
1188 {
1189         unsigned int i;
1190         struct ipt_counters_info tmp, *paddc;
1191         struct ipt_table *t;
1192         int ret;
1193
1194         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1195                 return -EFAULT;
1196
1197         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1198                 return -EINVAL;
1199
1200         paddc = vmalloc(len);
1201         if (!paddc)
1202                 return -ENOMEM;
1203
1204         if (copy_from_user(paddc, user, len) != 0) {
1205                 ret = -EFAULT;
1206                 goto free;
1207         }
1208
1209         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1210         if (!t)
1211                 goto free;
1212
1213         write_lock_bh(&t->lock);
1214         if (t->private->number != paddc->num_counters) {
1215                 ret = -EINVAL;
1216                 goto unlock_up_free;
1217         }
1218
1219         i = 0;
1220         IPT_ENTRY_ITERATE(t->private->entries,
1221                           t->private->size,
1222                           add_counter_to_entry,
1223                           paddc->counters,
1224                           &i);
1225  unlock_up_free:
1226         write_unlock_bh(&t->lock);
1227         up(&ipt_mutex);
1228  free:
1229         vfree(paddc);
1230
1231         return ret;
1232 }
1233
1234 static int
1235 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1236 {
1237         int ret;
1238
1239         if (!capable(CAP_NET_ADMIN))
1240                 return -EPERM;
1241
1242         switch (cmd) {
1243         case IPT_SO_SET_REPLACE:
1244                 ret = do_replace(user, len);
1245                 break;
1246
1247         case IPT_SO_SET_ADD_COUNTERS:
1248                 ret = do_add_counters(user, len);
1249                 break;
1250
1251         default:
1252                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1253                 ret = -EINVAL;
1254         }
1255
1256         return ret;
1257 }
1258
1259 static int
1260 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1261 {
1262         int ret;
1263
1264         if (!capable(CAP_NET_ADMIN))
1265                 return -EPERM;
1266
1267         switch (cmd) {
1268         case IPT_SO_GET_INFO: {
1269                 char name[IPT_TABLE_MAXNAMELEN];
1270                 struct ipt_table *t;
1271
1272                 if (*len != sizeof(struct ipt_getinfo)) {
1273                         duprintf("length %u != %u\n", *len,
1274                                  sizeof(struct ipt_getinfo));
1275                         ret = -EINVAL;
1276                         break;
1277                 }
1278
1279                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1280                         ret = -EFAULT;
1281                         break;
1282                 }
1283                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1284                 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1285                 if (t) {
1286                         struct ipt_getinfo info;
1287
1288                         info.valid_hooks = t->valid_hooks;
1289                         memcpy(info.hook_entry, t->private->hook_entry,
1290                                sizeof(info.hook_entry));
1291                         memcpy(info.underflow, t->private->underflow,
1292                                sizeof(info.underflow));
1293                         info.num_entries = t->private->number;
1294                         info.size = t->private->size;
1295                         memcpy(info.name, name, sizeof(info.name));
1296
1297                         if (copy_to_user(user, &info, *len) != 0)
1298                                 ret = -EFAULT;
1299                         else
1300                                 ret = 0;
1301
1302                         up(&ipt_mutex);
1303                 }
1304         }
1305         break;
1306
1307         case IPT_SO_GET_ENTRIES: {
1308                 struct ipt_get_entries get;
1309
1310                 if (*len < sizeof(get)) {
1311                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1312                         ret = -EINVAL;
1313                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1314                         ret = -EFAULT;
1315                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1316                         duprintf("get_entries: %u != %u\n", *len,
1317                                  sizeof(struct ipt_get_entries) + get.size);
1318                         ret = -EINVAL;
1319                 } else
1320                         ret = get_entries(&get, user);
1321                 break;
1322         }
1323
1324         default:
1325                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1326                 ret = -EINVAL;
1327         }
1328
1329         return ret;
1330 }
1331
1332 /* Registration hooks for targets. */
1333 int
1334 ipt_register_target(struct ipt_target *target)
1335 {
1336         int ret;
1337
1338         ret = down_interruptible(&ipt_mutex);
1339         if (ret != 0)
1340                 return ret;
1341
1342         if (!list_named_insert(&ipt_target, target)) {
1343                 duprintf("ipt_register_target: `%s' already in list!\n",
1344                          target->name);
1345                 ret = -EINVAL;
1346         }
1347         up(&ipt_mutex);
1348         return ret;
1349 }
1350
1351 void
1352 ipt_unregister_target(struct ipt_target *target)
1353 {
1354         down(&ipt_mutex);
1355         LIST_DELETE(&ipt_target, target);
1356         up(&ipt_mutex);
1357 }
1358
1359 int
1360 ipt_register_match(struct ipt_match *match)
1361 {
1362         int ret;
1363
1364         ret = down_interruptible(&ipt_mutex);
1365         if (ret != 0)
1366                 return ret;
1367
1368         if (!list_named_insert(&ipt_match, match)) {
1369                 duprintf("ipt_register_match: `%s' already in list!\n",
1370                          match->name);
1371                 ret = -EINVAL;
1372         }
1373         up(&ipt_mutex);
1374
1375         return ret;
1376 }
1377
1378 void
1379 ipt_unregister_match(struct ipt_match *match)
1380 {
1381         down(&ipt_mutex);
1382         LIST_DELETE(&ipt_match, match);
1383         up(&ipt_mutex);
1384 }
1385
1386 int ipt_register_table(struct ipt_table *table)
1387 {
1388         int ret;
1389         struct ipt_table_info *newinfo;
1390         static struct ipt_table_info bootstrap
1391                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1392
1393         newinfo = vmalloc(sizeof(struct ipt_table_info)
1394                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1395         if (!newinfo)
1396                 return -ENOMEM;
1397
1398         memcpy(newinfo->entries, table->table->entries, table->table->size);
1399
1400         ret = translate_table(table->name, table->valid_hooks,
1401                               newinfo, table->table->size,
1402                               table->table->num_entries,
1403                               table->table->hook_entry,
1404                               table->table->underflow);
1405         if (ret != 0) {
1406                 vfree(newinfo);
1407                 return ret;
1408         }
1409
1410         ret = down_interruptible(&ipt_mutex);
1411         if (ret != 0) {
1412                 vfree(newinfo);
1413                 return ret;
1414         }
1415
1416         /* Don't autoload: we'd eat our tail... */
1417         if (list_named_find(&ipt_tables, table->name)) {
1418                 ret = -EEXIST;
1419                 goto free_unlock;
1420         }
1421
1422         /* Simplifies replace_table code. */
1423         table->private = &bootstrap;
1424         if (!replace_table(table, 0, newinfo, &ret))
1425                 goto free_unlock;
1426
1427         duprintf("table->private->number = %u\n",
1428                  table->private->number);
1429         
1430         /* save number of initial entries */
1431         table->private->initial_entries = table->private->number;
1432
1433         rwlock_init(&table->lock);
1434         list_prepend(&ipt_tables, table);
1435
1436  unlock:
1437         up(&ipt_mutex);
1438         return ret;
1439
1440  free_unlock:
1441         vfree(newinfo);
1442         goto unlock;
1443 }
1444
1445 void ipt_unregister_table(struct ipt_table *table)
1446 {
1447         down(&ipt_mutex);
1448         LIST_DELETE(&ipt_tables, table);
1449         up(&ipt_mutex);
1450
1451         /* Decrease module usage counts and free resources */
1452         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1453                           cleanup_entry, NULL);
1454         vfree(table->private);
1455 }
1456
1457 /* Returns 1 if the port is matched by the range, 0 otherwise */
1458 static inline int
1459 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1460 {
1461         int ret;
1462
1463         ret = (port >= min && port <= max) ^ invert;
1464         return ret;
1465 }
1466
1467 static int
1468 tcp_find_option(u_int8_t option,
1469                 const struct sk_buff *skb,
1470                 unsigned int optlen,
1471                 int invert,
1472                 int *hotdrop)
1473 {
1474         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1475         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1476         unsigned int i;
1477
1478         duprintf("tcp_match: finding option\n");
1479
1480         if (!optlen)
1481                 return invert;
1482
1483         /* If we don't have the whole header, drop packet. */
1484         op = skb_header_pointer(skb,
1485                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1486                                 optlen, _opt);
1487         if (op == NULL) {
1488                 *hotdrop = 1;
1489                 return 0;
1490         }
1491
1492         for (i = 0; i < optlen; ) {
1493                 if (op[i] == option) return !invert;
1494                 if (op[i] < 2) i++;
1495                 else i += op[i+1]?:1;
1496         }
1497
1498         return invert;
1499 }
1500
1501 static int
1502 tcp_match(const struct sk_buff *skb,
1503           const struct net_device *in,
1504           const struct net_device *out,
1505           const void *matchinfo,
1506           int offset,
1507           int *hotdrop)
1508 {
1509         struct tcphdr _tcph, *th;
1510         const struct ipt_tcp *tcpinfo = matchinfo;
1511
1512         if (offset) {
1513                 /* To quote Alan:
1514
1515                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1516                    causes this. Its a cracker trying to break in by doing a
1517                    flag overwrite to pass the direction checks.
1518                 */
1519                 if (offset == 1) {
1520                         duprintf("Dropping evil TCP offset=1 frag.\n");
1521                         *hotdrop = 1;
1522                 }
1523                 /* Must not be a fragment. */
1524                 return 0;
1525         }
1526
1527 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1528
1529         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1530                                 sizeof(_tcph), &_tcph);
1531         if (th == NULL) {
1532                 /* We've been asked to examine this packet, and we
1533                    can't.  Hence, no choice but to drop. */
1534                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1535                 *hotdrop = 1;
1536                 return 0;
1537         }
1538
1539         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1540                         ntohs(th->source),
1541                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1542                 return 0;
1543         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1544                         ntohs(th->dest),
1545                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1546                 return 0;
1547         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1548                       == tcpinfo->flg_cmp,
1549                       IPT_TCP_INV_FLAGS))
1550                 return 0;
1551         if (tcpinfo->option) {
1552                 if (th->doff * 4 < sizeof(_tcph)) {
1553                         *hotdrop = 1;
1554                         return 0;
1555                 }
1556                 if (!tcp_find_option(tcpinfo->option, skb,
1557                                      th->doff*4 - sizeof(_tcph),
1558                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1559                                      hotdrop))
1560                         return 0;
1561         }
1562         return 1;
1563 }
1564
1565 /* Called when user tries to insert an entry of this type. */
1566 static int
1567 tcp_checkentry(const char *tablename,
1568                const struct ipt_ip *ip,
1569                void *matchinfo,
1570                unsigned int matchsize,
1571                unsigned int hook_mask)
1572 {
1573         const struct ipt_tcp *tcpinfo = matchinfo;
1574
1575         /* Must specify proto == TCP, and no unknown invflags */
1576         return ip->proto == IPPROTO_TCP
1577                 && !(ip->invflags & IPT_INV_PROTO)
1578                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1579                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1580 }
1581
1582 static int
1583 udp_match(const struct sk_buff *skb,
1584           const struct net_device *in,
1585           const struct net_device *out,
1586           const void *matchinfo,
1587           int offset,
1588           int *hotdrop)
1589 {
1590         struct udphdr _udph, *uh;
1591         const struct ipt_udp *udpinfo = matchinfo;
1592
1593         /* Must not be a fragment. */
1594         if (offset)
1595                 return 0;
1596
1597         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1598                                 sizeof(_udph), &_udph);
1599         if (uh == NULL) {
1600                 /* We've been asked to examine this packet, and we
1601                    can't.  Hence, no choice but to drop. */
1602                 duprintf("Dropping evil UDP tinygram.\n");
1603                 *hotdrop = 1;
1604                 return 0;
1605         }
1606
1607         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1608                           ntohs(uh->source),
1609                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1610                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1611                               ntohs(uh->dest),
1612                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1613 }
1614
1615 /* Called when user tries to insert an entry of this type. */
1616 static int
1617 udp_checkentry(const char *tablename,
1618                const struct ipt_ip *ip,
1619                void *matchinfo,
1620                unsigned int matchinfosize,
1621                unsigned int hook_mask)
1622 {
1623         const struct ipt_udp *udpinfo = matchinfo;
1624
1625         /* Must specify proto == UDP, and no unknown invflags */
1626         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1627                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1628                          IPPROTO_UDP);
1629                 return 0;
1630         }
1631         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1632                 duprintf("ipt_udp: matchsize %u != %u\n",
1633                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1634                 return 0;
1635         }
1636         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1637                 duprintf("ipt_udp: unknown flags %X\n",
1638                          udpinfo->invflags);
1639                 return 0;
1640         }
1641
1642         return 1;
1643 }
1644
1645 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1646 static inline int
1647 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1648                      u_int8_t type, u_int8_t code,
1649                      int invert)
1650 {
1651         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1652                 ^ invert;
1653 }
1654
1655 static int
1656 icmp_match(const struct sk_buff *skb,
1657            const struct net_device *in,
1658            const struct net_device *out,
1659            const void *matchinfo,
1660            int offset,
1661            int *hotdrop)
1662 {
1663         struct icmphdr _icmph, *ic;
1664         const struct ipt_icmp *icmpinfo = matchinfo;
1665
1666         /* Must not be a fragment. */
1667         if (offset)
1668                 return 0;
1669
1670         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1671                                 sizeof(_icmph), &_icmph);
1672         if (ic == NULL) {
1673                 /* We've been asked to examine this packet, and we
1674                  * can't.  Hence, no choice but to drop.
1675                  */
1676                 duprintf("Dropping evil ICMP tinygram.\n");
1677                 *hotdrop = 1;
1678                 return 0;
1679         }
1680
1681         return icmp_type_code_match(icmpinfo->type,
1682                                     icmpinfo->code[0],
1683                                     icmpinfo->code[1],
1684                                     ic->type, ic->code,
1685                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1686 }
1687
1688 /* Called when user tries to insert an entry of this type. */
1689 static int
1690 icmp_checkentry(const char *tablename,
1691            const struct ipt_ip *ip,
1692            void *matchinfo,
1693            unsigned int matchsize,
1694            unsigned int hook_mask)
1695 {
1696         const struct ipt_icmp *icmpinfo = matchinfo;
1697
1698         /* Must specify proto == ICMP, and no unknown invflags */
1699         return ip->proto == IPPROTO_ICMP
1700                 && !(ip->invflags & IPT_INV_PROTO)
1701                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1702                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1703 }
1704
1705 /* The built-in targets: standard (NULL) and error. */
1706 static struct ipt_target ipt_standard_target = {
1707         .name           = IPT_STANDARD_TARGET,
1708 };
1709
1710 static struct ipt_target ipt_error_target = {
1711         .name           = IPT_ERROR_TARGET,
1712         .target         = ipt_error,
1713 };
1714
1715 static struct nf_sockopt_ops ipt_sockopts = {
1716         .pf             = PF_INET,
1717         .set_optmin     = IPT_BASE_CTL,
1718         .set_optmax     = IPT_SO_SET_MAX+1,
1719         .set            = do_ipt_set_ctl,
1720         .get_optmin     = IPT_BASE_CTL,
1721         .get_optmax     = IPT_SO_GET_MAX+1,
1722         .get            = do_ipt_get_ctl,
1723 };
1724
1725 static struct ipt_match tcp_matchstruct = {
1726         .name           = "tcp",
1727         .match          = &tcp_match,
1728         .checkentry     = &tcp_checkentry,
1729 };
1730
1731 static struct ipt_match udp_matchstruct = {
1732         .name           = "udp",
1733         .match          = &udp_match,
1734         .checkentry     = &udp_checkentry,
1735 };
1736
1737 static struct ipt_match icmp_matchstruct = {
1738         .name           = "icmp",
1739         .match          = &icmp_match,
1740         .checkentry     = &icmp_checkentry,
1741 };
1742
1743 #ifdef CONFIG_PROC_FS
1744 static inline int print_name(const char *i,
1745                              off_t start_offset, char *buffer, int length,
1746                              off_t *pos, unsigned int *count)
1747 {
1748         if ((*count)++ >= start_offset) {
1749                 unsigned int namelen;
1750
1751                 namelen = sprintf(buffer + *pos, "%s\n",
1752                                   i + sizeof(struct list_head));
1753                 if (*pos + namelen > length) {
1754                         /* Stop iterating */
1755                         return 1;
1756                 }
1757                 *pos += namelen;
1758         }
1759         return 0;
1760 }
1761
1762 static inline int print_target(const struct ipt_target *t,
1763                                off_t start_offset, char *buffer, int length,
1764                                off_t *pos, unsigned int *count)
1765 {
1766         if (t == &ipt_standard_target || t == &ipt_error_target)
1767                 return 0;
1768         return print_name((char *)t, start_offset, buffer, length, pos, count);
1769 }
1770
1771 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1772 {
1773         off_t pos = 0;
1774         unsigned int count = 0;
1775
1776         if (down_interruptible(&ipt_mutex) != 0)
1777                 return 0;
1778
1779         LIST_FIND(&ipt_tables, print_name, void *,
1780                   offset, buffer, length, &pos, &count);
1781
1782         up(&ipt_mutex);
1783
1784         /* `start' hack - see fs/proc/generic.c line ~105 */
1785         *start=(char *)((unsigned long)count-offset);
1786         return pos;
1787 }
1788
1789 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1790 {
1791         off_t pos = 0;
1792         unsigned int count = 0;
1793
1794         if (down_interruptible(&ipt_mutex) != 0)
1795                 return 0;
1796
1797         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1798                   offset, buffer, length, &pos, &count);
1799         
1800         up(&ipt_mutex);
1801
1802         *start = (char *)((unsigned long)count - offset);
1803         return pos;
1804 }
1805
1806 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1807 {
1808         off_t pos = 0;
1809         unsigned int count = 0;
1810
1811         if (down_interruptible(&ipt_mutex) != 0)
1812                 return 0;
1813         
1814         LIST_FIND(&ipt_match, print_name, void *,
1815                   offset, buffer, length, &pos, &count);
1816
1817         up(&ipt_mutex);
1818
1819         *start = (char *)((unsigned long)count - offset);
1820         return pos;
1821 }
1822
1823 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1824 { { "ip_tables_names", ipt_get_tables },
1825   { "ip_tables_targets", ipt_get_targets },
1826   { "ip_tables_matches", ipt_get_matches },
1827   { NULL, NULL} };
1828 #endif /*CONFIG_PROC_FS*/
1829
1830 static int __init init(void)
1831 {
1832         int ret;
1833
1834         /* Noone else will be downing sem now, so we won't sleep */
1835         down(&ipt_mutex);
1836         list_append(&ipt_target, &ipt_standard_target);
1837         list_append(&ipt_target, &ipt_error_target);
1838         list_append(&ipt_match, &tcp_matchstruct);
1839         list_append(&ipt_match, &udp_matchstruct);
1840         list_append(&ipt_match, &icmp_matchstruct);
1841         up(&ipt_mutex);
1842
1843         /* Register setsockopt */
1844         ret = nf_register_sockopt(&ipt_sockopts);
1845         if (ret < 0) {
1846                 duprintf("Unable to register sockopts.\n");
1847                 return ret;
1848         }
1849
1850 #ifdef CONFIG_PROC_FS
1851         {
1852         struct proc_dir_entry *proc;
1853         int i;
1854
1855         for (i = 0; ipt_proc_entry[i].name; i++) {
1856                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1857                                        ipt_proc_entry[i].get_info);
1858                 if (!proc) {
1859                         while (--i >= 0)
1860                                 proc_net_remove(ipt_proc_entry[i].name);
1861                         nf_unregister_sockopt(&ipt_sockopts);
1862                         return -ENOMEM;
1863                 }
1864                 proc->owner = THIS_MODULE;
1865         }
1866         }
1867 #endif
1868
1869         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1870         return 0;
1871 }
1872
1873 static void __exit fini(void)
1874 {
1875         nf_unregister_sockopt(&ipt_sockopts);
1876 #ifdef CONFIG_PROC_FS
1877         {
1878         int i;
1879         for (i = 0; ipt_proc_entry[i].name; i++)
1880                 proc_net_remove(ipt_proc_entry[i].name);
1881         }
1882 #endif
1883 }
1884
1885 EXPORT_SYMBOL(ipt_register_table);
1886 EXPORT_SYMBOL(ipt_unregister_table);
1887 EXPORT_SYMBOL(ipt_register_match);
1888 EXPORT_SYMBOL(ipt_unregister_match);
1889 EXPORT_SYMBOL(ipt_do_table);
1890 EXPORT_SYMBOL(ipt_register_target);
1891 EXPORT_SYMBOL(ipt_unregister_target);
1892 EXPORT_SYMBOL_GPL(__ipt_find_target_lock);
1893 EXPORT_SYMBOL_GPL(__ipt_mutex_up);
1894
1895 module_init(init);
1896 module_exit(fini);