patch-2_6_7-vs1_9_1_12
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29
30 #include <linux/netfilter_ipv4/ip_tables.h>
31
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
35
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
39
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...)  printk(format , ## args)
42 #else
43 #define dprintf(format, args...)
44 #endif
45
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
48 #else
49 #define duprintf(format, args...)
50 #endif
51
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x)                                         \
54 do {                                                            \
55         if (!(x))                                               \
56                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
57                        __FUNCTION__, __FILE__, __LINE__);       \
58 } while(0)
59 #else
60 #define IP_NF_ASSERT(x)
61 #endif
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
63
64 /* Must have mutex */
65 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
66 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
67 #include <linux/netfilter_ipv4/lockhelp.h>
68 #include <linux/netfilter_ipv4/listhelp.h>
69
70 #if 0
71 /* All the better to debug you with... */
72 #define static
73 #define inline
74 #endif
75
76 /*
77    We keep a set of rules for each CPU, so we can avoid write-locking
78    them in the softirq when updating the counters and therefore
79    only need to read-lock in the softirq; doing a write_lock_bh() in user
80    context stops packets coming through and allows user context to read
81    the counters or update the rules.
82
83    To be cache friendly on SMP, we arrange them like so:
84    [ n-entries ]
85    ... cache-align padding ...
86    [ n-entries ]
87
88    Hence the start of any table is given by get_table() below.  */
89
90 /* The table itself */
91 struct ipt_table_info
92 {
93         /* Size per table */
94         unsigned int size;
95         /* Number of entries: FIXME. --RR */
96         unsigned int number;
97         /* Initial number of entries. Needed for module usage count */
98         unsigned int initial_entries;
99
100         /* Entry points and underflows */
101         unsigned int hook_entry[NF_IP_NUMHOOKS];
102         unsigned int underflow[NF_IP_NUMHOOKS];
103
104         /* ipt_entry tables: one per CPU */
105         char entries[0] ____cacheline_aligned;
106 };
107
108 static LIST_HEAD(ipt_target);
109 static LIST_HEAD(ipt_match);
110 static LIST_HEAD(ipt_tables);
111 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
112
113 #ifdef CONFIG_SMP
114 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
115 #else
116 #define TABLE_OFFSET(t,p) 0
117 #endif
118
119 #if 0
120 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
121 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
122 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
123 #endif
124
125 /* Returns whether matches rule or not. */
126 static inline int
127 ip_packet_match(const struct iphdr *ip,
128                 const char *indev,
129                 const char *outdev,
130                 const struct ipt_ip *ipinfo,
131                 int isfrag)
132 {
133         size_t i;
134         unsigned long ret;
135
136 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
137
138         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
139                   IPT_INV_SRCIP)
140             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
141                      IPT_INV_DSTIP)) {
142                 dprintf("Source or dest mismatch.\n");
143
144                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
145                         NIPQUAD(ip->saddr),
146                         NIPQUAD(ipinfo->smsk.s_addr),
147                         NIPQUAD(ipinfo->src.s_addr),
148                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
149                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
150                         NIPQUAD(ip->daddr),
151                         NIPQUAD(ipinfo->dmsk.s_addr),
152                         NIPQUAD(ipinfo->dst.s_addr),
153                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
154                 return 0;
155         }
156
157         /* Look for ifname matches; this should unroll nicely. */
158         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
159                 ret |= (((const unsigned long *)indev)[i]
160                         ^ ((const unsigned long *)ipinfo->iniface)[i])
161                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
162         }
163
164         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
165                 dprintf("VIA in mismatch (%s vs %s).%s\n",
166                         indev, ipinfo->iniface,
167                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
168                 return 0;
169         }
170
171         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
172                 ret |= (((const unsigned long *)outdev)[i]
173                         ^ ((const unsigned long *)ipinfo->outiface)[i])
174                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
175         }
176
177         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
178                 dprintf("VIA out mismatch (%s vs %s).%s\n",
179                         outdev, ipinfo->outiface,
180                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
181                 return 0;
182         }
183
184         /* Check specific protocol */
185         if (ipinfo->proto
186             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
187                 dprintf("Packet protocol %hi does not match %hi.%s\n",
188                         ip->protocol, ipinfo->proto,
189                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
190                 return 0;
191         }
192
193         /* If we have a fragment rule but the packet is not a fragment
194          * then we return zero */
195         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
196                 dprintf("Fragment rule but not fragment.%s\n",
197                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
198                 return 0;
199         }
200
201         return 1;
202 }
203
204 static inline int
205 ip_checkentry(const struct ipt_ip *ip)
206 {
207         if (ip->flags & ~IPT_F_MASK) {
208                 duprintf("Unknown flag bits set: %08X\n",
209                          ip->flags & ~IPT_F_MASK);
210                 return 0;
211         }
212         if (ip->invflags & ~IPT_INV_MASK) {
213                 duprintf("Unknown invflag bits set: %08X\n",
214                          ip->invflags & ~IPT_INV_MASK);
215                 return 0;
216         }
217         return 1;
218 }
219
220 static unsigned int
221 ipt_error(struct sk_buff **pskb,
222           const struct net_device *in,
223           const struct net_device *out,
224           unsigned int hooknum,
225           const void *targinfo,
226           void *userinfo)
227 {
228         if (net_ratelimit())
229                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
230
231         return NF_DROP;
232 }
233
234 static inline
235 int do_match(struct ipt_entry_match *m,
236              const struct sk_buff *skb,
237              const struct net_device *in,
238              const struct net_device *out,
239              int offset,
240              int *hotdrop)
241 {
242         /* Stop iteration if it doesn't match */
243         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
244                 return 1;
245         else
246                 return 0;
247 }
248
249 static inline struct ipt_entry *
250 get_entry(void *base, unsigned int offset)
251 {
252         return (struct ipt_entry *)(base + offset);
253 }
254
255 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
256 unsigned int
257 ipt_do_table(struct sk_buff **pskb,
258              unsigned int hook,
259              const struct net_device *in,
260              const struct net_device *out,
261              struct ipt_table *table,
262              void *userdata)
263 {
264         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
265         u_int16_t offset;
266         struct iphdr *ip;
267         u_int16_t datalen;
268         int hotdrop = 0;
269         /* Initializing verdict to NF_DROP keeps gcc happy. */
270         unsigned int verdict = NF_DROP;
271         const char *indev, *outdev;
272         void *table_base;
273         struct ipt_entry *e, *back;
274
275         /* Initialization */
276         ip = (*pskb)->nh.iph;
277         datalen = (*pskb)->len - ip->ihl * 4;
278         indev = in ? in->name : nulldevname;
279         outdev = out ? out->name : nulldevname;
280         /* We handle fragments by dealing with the first fragment as
281          * if it was a normal packet.  All other fragments are treated
282          * normally, except that they will NEVER match rules that ask
283          * things we don't know, ie. tcp syn flag or ports).  If the
284          * rule is also a fragment-specific rule, non-fragments won't
285          * match it. */
286         offset = ntohs(ip->frag_off) & IP_OFFSET;
287
288         read_lock_bh(&table->lock);
289         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
290         table_base = (void *)table->private->entries
291                 + TABLE_OFFSET(table->private, smp_processor_id());
292         e = get_entry(table_base, table->private->hook_entry[hook]);
293
294 #ifdef CONFIG_NETFILTER_DEBUG
295         /* Check noone else using our table */
296         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
297             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
298                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
299                        smp_processor_id(),
300                        table->name,
301                        &((struct ipt_entry *)table_base)->comefrom,
302                        ((struct ipt_entry *)table_base)->comefrom);
303         }
304         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
305 #endif
306
307         /* For return from builtin chain */
308         back = get_entry(table_base, table->private->underflow[hook]);
309
310         do {
311                 IP_NF_ASSERT(e);
312                 IP_NF_ASSERT(back);
313                 (*pskb)->nfcache |= e->nfcache;
314                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
315                         struct ipt_entry_target *t;
316
317                         if (IPT_MATCH_ITERATE(e, do_match,
318                                               *pskb, in, out,
319                                               offset, &hotdrop) != 0)
320                                 goto no_match;
321
322                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
323
324                         t = ipt_get_target(e);
325                         IP_NF_ASSERT(t->u.kernel.target);
326                         /* Standard target? */
327                         if (!t->u.kernel.target->target) {
328                                 int v;
329
330                                 v = ((struct ipt_standard_target *)t)->verdict;
331                                 if (v < 0) {
332                                         /* Pop from stack? */
333                                         if (v != IPT_RETURN) {
334                                                 verdict = (unsigned)(-v) - 1;
335                                                 break;
336                                         }
337                                         e = back;
338                                         back = get_entry(table_base,
339                                                          back->comefrom);
340                                         continue;
341                                 }
342                                 if (table_base + v
343                                     != (void *)e + e->next_offset) {
344                                         /* Save old back ptr in next entry */
345                                         struct ipt_entry *next
346                                                 = (void *)e + e->next_offset;
347                                         next->comefrom
348                                                 = (void *)back - table_base;
349                                         /* set back pointer to next entry */
350                                         back = next;
351                                 }
352
353                                 e = get_entry(table_base, v);
354                         } else {
355                                 /* Targets which reenter must return
356                                    abs. verdicts */
357 #ifdef CONFIG_NETFILTER_DEBUG
358                                 ((struct ipt_entry *)table_base)->comefrom
359                                         = 0xeeeeeeec;
360 #endif
361                                 verdict = t->u.kernel.target->target(pskb,
362                                                                      in, out,
363                                                                      hook,
364                                                                      t->data,
365                                                                      userdata);
366
367 #ifdef CONFIG_NETFILTER_DEBUG
368                                 if (((struct ipt_entry *)table_base)->comefrom
369                                     != 0xeeeeeeec
370                                     && verdict == IPT_CONTINUE) {
371                                         printk("Target %s reentered!\n",
372                                                t->u.kernel.target->name);
373                                         verdict = NF_DROP;
374                                 }
375                                 ((struct ipt_entry *)table_base)->comefrom
376                                         = 0x57acc001;
377 #endif
378                                 /* Target might have changed stuff. */
379                                 ip = (*pskb)->nh.iph;
380                                 datalen = (*pskb)->len - ip->ihl * 4;
381
382                                 if (verdict == IPT_CONTINUE)
383                                         e = (void *)e + e->next_offset;
384                                 else
385                                         /* Verdict */
386                                         break;
387                         }
388                 } else {
389
390                 no_match:
391                         e = (void *)e + e->next_offset;
392                 }
393         } while (!hotdrop);
394
395 #ifdef CONFIG_NETFILTER_DEBUG
396         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
397 #endif
398         read_unlock_bh(&table->lock);
399
400 #ifdef DEBUG_ALLOW_ALL
401         return NF_ACCEPT;
402 #else
403         if (hotdrop)
404                 return NF_DROP;
405         else return verdict;
406 #endif
407 }
408
409 /* If it succeeds, returns element and locks mutex */
410 static inline void *
411 find_inlist_lock_noload(struct list_head *head,
412                         const char *name,
413                         int *error,
414                         struct semaphore *mutex)
415 {
416         void *ret;
417
418 #if 0 
419         duprintf("find_inlist: searching for `%s' in %s.\n",
420                  name, head == &ipt_target ? "ipt_target"
421                  : head == &ipt_match ? "ipt_match"
422                  : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
423 #endif
424
425         *error = down_interruptible(mutex);
426         if (*error != 0)
427                 return NULL;
428
429         ret = list_named_find(head, name);
430         if (!ret) {
431                 *error = -ENOENT;
432                 up(mutex);
433         }
434         return ret;
435 }
436
437 #ifndef CONFIG_KMOD
438 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
439 #else
440 static void *
441 find_inlist_lock(struct list_head *head,
442                  const char *name,
443                  const char *prefix,
444                  int *error,
445                  struct semaphore *mutex)
446 {
447         void *ret;
448
449         ret = find_inlist_lock_noload(head, name, error, mutex);
450         if (!ret) {
451                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
452                 request_module("%s%s", prefix, name);
453                 ret = find_inlist_lock_noload(head, name, error, mutex);
454         }
455
456         return ret;
457 }
458 #endif
459
460 static inline struct ipt_table *
461 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
462 {
463         return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
464 }
465
466 static inline struct ipt_match *
467 find_match_lock(const char *name, int *error, struct semaphore *mutex)
468 {
469         return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
470 }
471
472 struct ipt_target *
473 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
474 {
475         return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
476 }
477
478 /* All zeroes == unconditional rule. */
479 static inline int
480 unconditional(const struct ipt_ip *ip)
481 {
482         unsigned int i;
483
484         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
485                 if (((__u32 *)ip)[i])
486                         return 0;
487
488         return 1;
489 }
490
491 /* Figures out from what hook each rule can be called: returns 0 if
492    there are loops.  Puts hook bitmask in comefrom. */
493 static int
494 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
495 {
496         unsigned int hook;
497
498         /* No recursion; use packet counter to save back ptrs (reset
499            to 0 as we leave), and comefrom to save source hook bitmask */
500         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
501                 unsigned int pos = newinfo->hook_entry[hook];
502                 struct ipt_entry *e
503                         = (struct ipt_entry *)(newinfo->entries + pos);
504
505                 if (!(valid_hooks & (1 << hook)))
506                         continue;
507
508                 /* Set initial back pointer. */
509                 e->counters.pcnt = pos;
510
511                 for (;;) {
512                         struct ipt_standard_target *t
513                                 = (void *)ipt_get_target(e);
514
515                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
516                                 printk("iptables: loop hook %u pos %u %08X.\n",
517                                        hook, pos, e->comefrom);
518                                 return 0;
519                         }
520                         e->comefrom
521                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
522
523                         /* Unconditional return/END. */
524                         if (e->target_offset == sizeof(struct ipt_entry)
525                             && (strcmp(t->target.u.user.name,
526                                        IPT_STANDARD_TARGET) == 0)
527                             && t->verdict < 0
528                             && unconditional(&e->ip)) {
529                                 unsigned int oldpos, size;
530
531                                 /* Return: backtrack through the last
532                                    big jump. */
533                                 do {
534                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
535 #ifdef DEBUG_IP_FIREWALL_USER
536                                         if (e->comefrom
537                                             & (1 << NF_IP_NUMHOOKS)) {
538                                                 duprintf("Back unset "
539                                                          "on hook %u "
540                                                          "rule %u\n",
541                                                          hook, pos);
542                                         }
543 #endif
544                                         oldpos = pos;
545                                         pos = e->counters.pcnt;
546                                         e->counters.pcnt = 0;
547
548                                         /* We're at the start. */
549                                         if (pos == oldpos)
550                                                 goto next;
551
552                                         e = (struct ipt_entry *)
553                                                 (newinfo->entries + pos);
554                                 } while (oldpos == pos + e->next_offset);
555
556                                 /* Move along one */
557                                 size = e->next_offset;
558                                 e = (struct ipt_entry *)
559                                         (newinfo->entries + pos + size);
560                                 e->counters.pcnt = pos;
561                                 pos += size;
562                         } else {
563                                 int newpos = t->verdict;
564
565                                 if (strcmp(t->target.u.user.name,
566                                            IPT_STANDARD_TARGET) == 0
567                                     && newpos >= 0) {
568                                         /* This a jump; chase it. */
569                                         duprintf("Jump rule %u -> %u\n",
570                                                  pos, newpos);
571                                 } else {
572                                         /* ... this is a fallthru */
573                                         newpos = pos + e->next_offset;
574                                 }
575                                 e = (struct ipt_entry *)
576                                         (newinfo->entries + newpos);
577                                 e->counters.pcnt = pos;
578                                 pos = newpos;
579                         }
580                 }
581                 next:
582                 duprintf("Finished chain %u\n", hook);
583         }
584         return 1;
585 }
586
587 static inline int
588 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
589 {
590         if (i && (*i)-- == 0)
591                 return 1;
592
593         if (m->u.kernel.match->destroy)
594                 m->u.kernel.match->destroy(m->data,
595                                            m->u.match_size - sizeof(*m));
596         module_put(m->u.kernel.match->me);
597         return 0;
598 }
599
600 static inline int
601 standard_check(const struct ipt_entry_target *t,
602                unsigned int max_offset)
603 {
604         struct ipt_standard_target *targ = (void *)t;
605
606         /* Check standard info. */
607         if (t->u.target_size
608             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
609                 duprintf("standard_check: target size %u != %u\n",
610                          t->u.target_size,
611                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
612                 return 0;
613         }
614
615         if (targ->verdict >= 0
616             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
617                 duprintf("ipt_standard_check: bad verdict (%i)\n",
618                          targ->verdict);
619                 return 0;
620         }
621
622         if (targ->verdict < -NF_MAX_VERDICT - 1) {
623                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
624                          targ->verdict);
625                 return 0;
626         }
627         return 1;
628 }
629
630 static inline int
631 check_match(struct ipt_entry_match *m,
632             const char *name,
633             const struct ipt_ip *ip,
634             unsigned int hookmask,
635             unsigned int *i)
636 {
637         int ret;
638         struct ipt_match *match;
639
640         match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
641         if (!match) {
642                 duprintf("check_match: `%s' not found\n", m->u.user.name);
643                 return ret;
644         }
645         if (!try_module_get(match->me)) {
646                 up(&ipt_mutex);
647                 return -ENOENT;
648         }
649         m->u.kernel.match = match;
650         up(&ipt_mutex);
651
652         if (m->u.kernel.match->checkentry
653             && !m->u.kernel.match->checkentry(name, ip, m->data,
654                                               m->u.match_size - sizeof(*m),
655                                               hookmask)) {
656                 module_put(m->u.kernel.match->me);
657                 duprintf("ip_tables: check failed for `%s'.\n",
658                          m->u.kernel.match->name);
659                 return -EINVAL;
660         }
661
662         (*i)++;
663         return 0;
664 }
665
666 static struct ipt_target ipt_standard_target;
667
668 static inline int
669 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
670             unsigned int *i)
671 {
672         struct ipt_entry_target *t;
673         struct ipt_target *target;
674         int ret;
675         unsigned int j;
676
677         if (!ip_checkentry(&e->ip)) {
678                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
679                 return -EINVAL;
680         }
681
682         j = 0;
683         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
684         if (ret != 0)
685                 goto cleanup_matches;
686
687         t = ipt_get_target(e);
688         target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
689         if (!target) {
690                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
691                 goto cleanup_matches;
692         }
693         if (!try_module_get(target->me)) {
694                 up(&ipt_mutex);
695                 ret = -ENOENT;
696                 goto cleanup_matches;
697         }
698         t->u.kernel.target = target;
699         up(&ipt_mutex);
700
701         if (t->u.kernel.target == &ipt_standard_target) {
702                 if (!standard_check(t, size)) {
703                         ret = -EINVAL;
704                         goto cleanup_matches;
705                 }
706         } else if (t->u.kernel.target->checkentry
707                    && !t->u.kernel.target->checkentry(name, e, t->data,
708                                                       t->u.target_size
709                                                       - sizeof(*t),
710                                                       e->comefrom)) {
711                 module_put(t->u.kernel.target->me);
712                 duprintf("ip_tables: check failed for `%s'.\n",
713                          t->u.kernel.target->name);
714                 ret = -EINVAL;
715                 goto cleanup_matches;
716         }
717
718         (*i)++;
719         return 0;
720
721  cleanup_matches:
722         IPT_MATCH_ITERATE(e, cleanup_match, &j);
723         return ret;
724 }
725
726 static inline int
727 check_entry_size_and_hooks(struct ipt_entry *e,
728                            struct ipt_table_info *newinfo,
729                            unsigned char *base,
730                            unsigned char *limit,
731                            const unsigned int *hook_entries,
732                            const unsigned int *underflows,
733                            unsigned int *i)
734 {
735         unsigned int h;
736
737         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
738             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
739                 duprintf("Bad offset %p\n", e);
740                 return -EINVAL;
741         }
742
743         if (e->next_offset
744             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
745                 duprintf("checking: element %p size %u\n",
746                          e, e->next_offset);
747                 return -EINVAL;
748         }
749
750         /* Check hooks & underflows */
751         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
752                 if ((unsigned char *)e - base == hook_entries[h])
753                         newinfo->hook_entry[h] = hook_entries[h];
754                 if ((unsigned char *)e - base == underflows[h])
755                         newinfo->underflow[h] = underflows[h];
756         }
757
758         /* FIXME: underflows must be unconditional, standard verdicts
759            < 0 (not IPT_RETURN). --RR */
760
761         /* Clear counters and comefrom */
762         e->counters = ((struct ipt_counters) { 0, 0 });
763         e->comefrom = 0;
764
765         (*i)++;
766         return 0;
767 }
768
769 static inline int
770 cleanup_entry(struct ipt_entry *e, unsigned int *i)
771 {
772         struct ipt_entry_target *t;
773
774         if (i && (*i)-- == 0)
775                 return 1;
776
777         /* Cleanup all matches */
778         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
779         t = ipt_get_target(e);
780         if (t->u.kernel.target->destroy)
781                 t->u.kernel.target->destroy(t->data,
782                                             t->u.target_size - sizeof(*t));
783         module_put(t->u.kernel.target->me);
784         return 0;
785 }
786
787 /* Checks and translates the user-supplied table segment (held in
788    newinfo) */
789 static int
790 translate_table(const char *name,
791                 unsigned int valid_hooks,
792                 struct ipt_table_info *newinfo,
793                 unsigned int size,
794                 unsigned int number,
795                 const unsigned int *hook_entries,
796                 const unsigned int *underflows)
797 {
798         unsigned int i;
799         int ret;
800
801         newinfo->size = size;
802         newinfo->number = number;
803
804         /* Init all hooks to impossible value. */
805         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
806                 newinfo->hook_entry[i] = 0xFFFFFFFF;
807                 newinfo->underflow[i] = 0xFFFFFFFF;
808         }
809
810         duprintf("translate_table: size %u\n", newinfo->size);
811         i = 0;
812         /* Walk through entries, checking offsets. */
813         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
814                                 check_entry_size_and_hooks,
815                                 newinfo,
816                                 newinfo->entries,
817                                 newinfo->entries + size,
818                                 hook_entries, underflows, &i);
819         if (ret != 0)
820                 return ret;
821
822         if (i != number) {
823                 duprintf("translate_table: %u not %u entries\n",
824                          i, number);
825                 return -EINVAL;
826         }
827
828         /* Check hooks all assigned */
829         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
830                 /* Only hooks which are valid */
831                 if (!(valid_hooks & (1 << i)))
832                         continue;
833                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
834                         duprintf("Invalid hook entry %u %u\n",
835                                  i, hook_entries[i]);
836                         return -EINVAL;
837                 }
838                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
839                         duprintf("Invalid underflow %u %u\n",
840                                  i, underflows[i]);
841                         return -EINVAL;
842                 }
843         }
844
845         if (!mark_source_chains(newinfo, valid_hooks))
846                 return -ELOOP;
847
848         /* Finally, each sanity check must pass */
849         i = 0;
850         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
851                                 check_entry, name, size, &i);
852
853         if (ret != 0) {
854                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
855                                   cleanup_entry, &i);
856                 return ret;
857         }
858
859         /* And one copy for every other CPU */
860         for (i = 1; i < NR_CPUS; i++) {
861                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
862                        newinfo->entries,
863                        SMP_ALIGN(newinfo->size));
864         }
865
866         return ret;
867 }
868
869 static struct ipt_table_info *
870 replace_table(struct ipt_table *table,
871               unsigned int num_counters,
872               struct ipt_table_info *newinfo,
873               int *error)
874 {
875         struct ipt_table_info *oldinfo;
876
877 #ifdef CONFIG_NETFILTER_DEBUG
878         {
879                 struct ipt_entry *table_base;
880                 unsigned int i;
881
882                 for (i = 0; i < NR_CPUS; i++) {
883                         table_base =
884                                 (void *)newinfo->entries
885                                 + TABLE_OFFSET(newinfo, i);
886
887                         table_base->comefrom = 0xdead57ac;
888                 }
889         }
890 #endif
891
892         /* Do the substitution. */
893         write_lock_bh(&table->lock);
894         /* Check inside lock: is the old number correct? */
895         if (num_counters != table->private->number) {
896                 duprintf("num_counters != table->private->number (%u/%u)\n",
897                          num_counters, table->private->number);
898                 write_unlock_bh(&table->lock);
899                 *error = -EAGAIN;
900                 return NULL;
901         }
902         oldinfo = table->private;
903         table->private = newinfo;
904         newinfo->initial_entries = oldinfo->initial_entries;
905         write_unlock_bh(&table->lock);
906
907         return oldinfo;
908 }
909
910 /* Gets counters. */
911 static inline int
912 add_entry_to_counter(const struct ipt_entry *e,
913                      struct ipt_counters total[],
914                      unsigned int *i)
915 {
916         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
917
918         (*i)++;
919         return 0;
920 }
921
922 static void
923 get_counters(const struct ipt_table_info *t,
924              struct ipt_counters counters[])
925 {
926         unsigned int cpu;
927         unsigned int i;
928
929         for (cpu = 0; cpu < NR_CPUS; cpu++) {
930                 i = 0;
931                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
932                                   t->size,
933                                   add_entry_to_counter,
934                                   counters,
935                                   &i);
936         }
937 }
938
939 static int
940 copy_entries_to_user(unsigned int total_size,
941                      struct ipt_table *table,
942                      void __user *userptr)
943 {
944         unsigned int off, num, countersize;
945         struct ipt_entry *e;
946         struct ipt_counters *counters;
947         int ret = 0;
948
949         /* We need atomic snapshot of counters: rest doesn't change
950            (other than comefrom, which userspace doesn't care
951            about). */
952         countersize = sizeof(struct ipt_counters) * table->private->number;
953         counters = vmalloc(countersize);
954
955         if (counters == NULL)
956                 return -ENOMEM;
957
958         /* First, sum counters... */
959         memset(counters, 0, countersize);
960         write_lock_bh(&table->lock);
961         get_counters(table->private, counters);
962         write_unlock_bh(&table->lock);
963
964         /* ... then copy entire thing from CPU 0... */
965         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
966                 ret = -EFAULT;
967                 goto free_counters;
968         }
969
970         /* FIXME: use iterator macros --RR */
971         /* ... then go back and fix counters and names */
972         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
973                 unsigned int i;
974                 struct ipt_entry_match *m;
975                 struct ipt_entry_target *t;
976
977                 e = (struct ipt_entry *)(table->private->entries + off);
978                 if (copy_to_user(userptr + off
979                                  + offsetof(struct ipt_entry, counters),
980                                  &counters[num],
981                                  sizeof(counters[num])) != 0) {
982                         ret = -EFAULT;
983                         goto free_counters;
984                 }
985
986                 for (i = sizeof(struct ipt_entry);
987                      i < e->target_offset;
988                      i += m->u.match_size) {
989                         m = (void *)e + i;
990
991                         if (copy_to_user(userptr + off + i
992                                          + offsetof(struct ipt_entry_match,
993                                                     u.user.name),
994                                          m->u.kernel.match->name,
995                                          strlen(m->u.kernel.match->name)+1)
996                             != 0) {
997                                 ret = -EFAULT;
998                                 goto free_counters;
999                         }
1000                 }
1001
1002                 t = ipt_get_target(e);
1003                 if (copy_to_user(userptr + off + e->target_offset
1004                                  + offsetof(struct ipt_entry_target,
1005                                             u.user.name),
1006                                  t->u.kernel.target->name,
1007                                  strlen(t->u.kernel.target->name)+1) != 0) {
1008                         ret = -EFAULT;
1009                         goto free_counters;
1010                 }
1011         }
1012
1013  free_counters:
1014         vfree(counters);
1015         return ret;
1016 }
1017
1018 static int
1019 get_entries(const struct ipt_get_entries *entries,
1020             struct ipt_get_entries __user *uptr)
1021 {
1022         int ret;
1023         struct ipt_table *t;
1024
1025         t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1026         if (t) {
1027                 duprintf("t->private->number = %u\n",
1028                          t->private->number);
1029                 if (entries->size == t->private->size)
1030                         ret = copy_entries_to_user(t->private->size,
1031                                                    t, uptr->entrytable);
1032                 else {
1033                         duprintf("get_entries: I've got %u not %u!\n",
1034                                  t->private->size,
1035                                  entries->size);
1036                         ret = -EINVAL;
1037                 }
1038                 up(&ipt_mutex);
1039         } else
1040                 duprintf("get_entries: Can't find %s!\n",
1041                          entries->name);
1042
1043         return ret;
1044 }
1045
1046 static int
1047 do_replace(void __user *user, unsigned int len)
1048 {
1049         int ret;
1050         struct ipt_replace tmp;
1051         struct ipt_table *t;
1052         struct ipt_table_info *newinfo, *oldinfo;
1053         struct ipt_counters *counters;
1054
1055         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1056                 return -EFAULT;
1057
1058         /* Hack: Causes ipchains to give correct error msg --RR */
1059         if (len != sizeof(tmp) + tmp.size)
1060                 return -ENOPROTOOPT;
1061
1062         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1063         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1064                 return -ENOMEM;
1065
1066         newinfo = vmalloc(sizeof(struct ipt_table_info)
1067                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1068         if (!newinfo)
1069                 return -ENOMEM;
1070
1071         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1072                            tmp.size) != 0) {
1073                 ret = -EFAULT;
1074                 goto free_newinfo;
1075         }
1076
1077         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1078         if (!counters) {
1079                 ret = -ENOMEM;
1080                 goto free_newinfo;
1081         }
1082         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1083
1084         ret = translate_table(tmp.name, tmp.valid_hooks,
1085                               newinfo, tmp.size, tmp.num_entries,
1086                               tmp.hook_entry, tmp.underflow);
1087         if (ret != 0)
1088                 goto free_newinfo_counters;
1089
1090         duprintf("ip_tables: Translated table\n");
1091
1092         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1093         if (!t)
1094                 goto free_newinfo_counters_untrans;
1095
1096         /* You lied! */
1097         if (tmp.valid_hooks != t->valid_hooks) {
1098                 duprintf("Valid hook crap: %08X vs %08X\n",
1099                          tmp.valid_hooks, t->valid_hooks);
1100                 ret = -EINVAL;
1101                 goto free_newinfo_counters_untrans_unlock;
1102         }
1103
1104         /* Get a reference in advance, we're not allowed fail later */
1105         if (!try_module_get(t->me)) {
1106                 ret = -EBUSY;
1107                 goto free_newinfo_counters_untrans_unlock;
1108         }
1109
1110
1111         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1112         if (!oldinfo)
1113                 goto put_module;
1114
1115         /* Update module usage count based on number of rules */
1116         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1117                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1118         if ((oldinfo->number > oldinfo->initial_entries) || 
1119             (newinfo->number <= oldinfo->initial_entries)) 
1120                 module_put(t->me);
1121         if ((oldinfo->number > oldinfo->initial_entries) &&
1122             (newinfo->number <= oldinfo->initial_entries))
1123                 module_put(t->me);
1124
1125         /* Get the old counters. */
1126         get_counters(oldinfo, counters);
1127         /* Decrease module usage counts and free resource */
1128         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1129         vfree(oldinfo);
1130         /* Silent error: too late now. */
1131         copy_to_user(tmp.counters, counters,
1132                      sizeof(struct ipt_counters) * tmp.num_counters);
1133         vfree(counters);
1134         up(&ipt_mutex);
1135         return 0;
1136
1137  put_module:
1138         module_put(t->me);
1139  free_newinfo_counters_untrans_unlock:
1140         up(&ipt_mutex);
1141  free_newinfo_counters_untrans:
1142         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1143  free_newinfo_counters:
1144         vfree(counters);
1145  free_newinfo:
1146         vfree(newinfo);
1147         return ret;
1148 }
1149
1150 /* We're lazy, and add to the first CPU; overflow works its fey magic
1151  * and everything is OK. */
1152 static inline int
1153 add_counter_to_entry(struct ipt_entry *e,
1154                      const struct ipt_counters addme[],
1155                      unsigned int *i)
1156 {
1157 #if 0
1158         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1159                  *i,
1160                  (long unsigned int)e->counters.pcnt,
1161                  (long unsigned int)e->counters.bcnt,
1162                  (long unsigned int)addme[*i].pcnt,
1163                  (long unsigned int)addme[*i].bcnt);
1164 #endif
1165
1166         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1167
1168         (*i)++;
1169         return 0;
1170 }
1171
1172 static int
1173 do_add_counters(void __user *user, unsigned int len)
1174 {
1175         unsigned int i;
1176         struct ipt_counters_info tmp, *paddc;
1177         struct ipt_table *t;
1178         int ret;
1179
1180         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1181                 return -EFAULT;
1182
1183         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1184                 return -EINVAL;
1185
1186         paddc = vmalloc(len);
1187         if (!paddc)
1188                 return -ENOMEM;
1189
1190         if (copy_from_user(paddc, user, len) != 0) {
1191                 ret = -EFAULT;
1192                 goto free;
1193         }
1194
1195         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1196         if (!t)
1197                 goto free;
1198
1199         write_lock_bh(&t->lock);
1200         if (t->private->number != paddc->num_counters) {
1201                 ret = -EINVAL;
1202                 goto unlock_up_free;
1203         }
1204
1205         i = 0;
1206         IPT_ENTRY_ITERATE(t->private->entries,
1207                           t->private->size,
1208                           add_counter_to_entry,
1209                           paddc->counters,
1210                           &i);
1211  unlock_up_free:
1212         write_unlock_bh(&t->lock);
1213         up(&ipt_mutex);
1214  free:
1215         vfree(paddc);
1216
1217         return ret;
1218 }
1219
1220 static int
1221 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1222 {
1223         int ret;
1224
1225         if (!capable(CAP_NET_ADMIN))
1226                 return -EPERM;
1227
1228         switch (cmd) {
1229         case IPT_SO_SET_REPLACE:
1230                 ret = do_replace(user, len);
1231                 break;
1232
1233         case IPT_SO_SET_ADD_COUNTERS:
1234                 ret = do_add_counters(user, len);
1235                 break;
1236
1237         default:
1238                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1239                 ret = -EINVAL;
1240         }
1241
1242         return ret;
1243 }
1244
1245 static int
1246 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1247 {
1248         int ret;
1249
1250         if (!capable(CAP_NET_ADMIN))
1251                 return -EPERM;
1252
1253         switch (cmd) {
1254         case IPT_SO_GET_INFO: {
1255                 char name[IPT_TABLE_MAXNAMELEN];
1256                 struct ipt_table *t;
1257
1258                 if (*len != sizeof(struct ipt_getinfo)) {
1259                         duprintf("length %u != %u\n", *len,
1260                                  sizeof(struct ipt_getinfo));
1261                         ret = -EINVAL;
1262                         break;
1263                 }
1264
1265                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1266                         ret = -EFAULT;
1267                         break;
1268                 }
1269                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1270                 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1271                 if (t) {
1272                         struct ipt_getinfo info;
1273
1274                         info.valid_hooks = t->valid_hooks;
1275                         memcpy(info.hook_entry, t->private->hook_entry,
1276                                sizeof(info.hook_entry));
1277                         memcpy(info.underflow, t->private->underflow,
1278                                sizeof(info.underflow));
1279                         info.num_entries = t->private->number;
1280                         info.size = t->private->size;
1281                         strcpy(info.name, name);
1282
1283                         if (copy_to_user(user, &info, *len) != 0)
1284                                 ret = -EFAULT;
1285                         else
1286                                 ret = 0;
1287
1288                         up(&ipt_mutex);
1289                 }
1290         }
1291         break;
1292
1293         case IPT_SO_GET_ENTRIES: {
1294                 struct ipt_get_entries get;
1295
1296                 if (*len < sizeof(get)) {
1297                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1298                         ret = -EINVAL;
1299                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1300                         ret = -EFAULT;
1301                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1302                         duprintf("get_entries: %u != %u\n", *len,
1303                                  sizeof(struct ipt_get_entries) + get.size);
1304                         ret = -EINVAL;
1305                 } else
1306                         ret = get_entries(&get, user);
1307                 break;
1308         }
1309
1310         default:
1311                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1312                 ret = -EINVAL;
1313         }
1314
1315         return ret;
1316 }
1317
1318 /* Registration hooks for targets. */
1319 int
1320 ipt_register_target(struct ipt_target *target)
1321 {
1322         int ret;
1323
1324         ret = down_interruptible(&ipt_mutex);
1325         if (ret != 0)
1326                 return ret;
1327
1328         if (!list_named_insert(&ipt_target, target)) {
1329                 duprintf("ipt_register_target: `%s' already in list!\n",
1330                          target->name);
1331                 ret = -EINVAL;
1332         }
1333         up(&ipt_mutex);
1334         return ret;
1335 }
1336
1337 void
1338 ipt_unregister_target(struct ipt_target *target)
1339 {
1340         down(&ipt_mutex);
1341         LIST_DELETE(&ipt_target, target);
1342         up(&ipt_mutex);
1343 }
1344
1345 int
1346 ipt_register_match(struct ipt_match *match)
1347 {
1348         int ret;
1349
1350         ret = down_interruptible(&ipt_mutex);
1351         if (ret != 0)
1352                 return ret;
1353
1354         if (!list_named_insert(&ipt_match, match)) {
1355                 duprintf("ipt_register_match: `%s' already in list!\n",
1356                          match->name);
1357                 ret = -EINVAL;
1358         }
1359         up(&ipt_mutex);
1360
1361         return ret;
1362 }
1363
1364 void
1365 ipt_unregister_match(struct ipt_match *match)
1366 {
1367         down(&ipt_mutex);
1368         LIST_DELETE(&ipt_match, match);
1369         up(&ipt_mutex);
1370 }
1371
1372 int ipt_register_table(struct ipt_table *table)
1373 {
1374         int ret;
1375         struct ipt_table_info *newinfo;
1376         static struct ipt_table_info bootstrap
1377                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1378
1379         newinfo = vmalloc(sizeof(struct ipt_table_info)
1380                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1381         if (!newinfo)
1382                 return -ENOMEM;
1383
1384         memcpy(newinfo->entries, table->table->entries, table->table->size);
1385
1386         ret = translate_table(table->name, table->valid_hooks,
1387                               newinfo, table->table->size,
1388                               table->table->num_entries,
1389                               table->table->hook_entry,
1390                               table->table->underflow);
1391         if (ret != 0) {
1392                 vfree(newinfo);
1393                 return ret;
1394         }
1395
1396         ret = down_interruptible(&ipt_mutex);
1397         if (ret != 0) {
1398                 vfree(newinfo);
1399                 return ret;
1400         }
1401
1402         /* Don't autoload: we'd eat our tail... */
1403         if (list_named_find(&ipt_tables, table->name)) {
1404                 ret = -EEXIST;
1405                 goto free_unlock;
1406         }
1407
1408         /* Simplifies replace_table code. */
1409         table->private = &bootstrap;
1410         if (!replace_table(table, 0, newinfo, &ret))
1411                 goto free_unlock;
1412
1413         duprintf("table->private->number = %u\n",
1414                  table->private->number);
1415         
1416         /* save number of initial entries */
1417         table->private->initial_entries = table->private->number;
1418
1419         table->lock = RW_LOCK_UNLOCKED;
1420         list_prepend(&ipt_tables, table);
1421
1422  unlock:
1423         up(&ipt_mutex);
1424         return ret;
1425
1426  free_unlock:
1427         vfree(newinfo);
1428         goto unlock;
1429 }
1430
1431 void ipt_unregister_table(struct ipt_table *table)
1432 {
1433         down(&ipt_mutex);
1434         LIST_DELETE(&ipt_tables, table);
1435         up(&ipt_mutex);
1436
1437         /* Decrease module usage counts and free resources */
1438         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1439                           cleanup_entry, NULL);
1440         vfree(table->private);
1441 }
1442
1443 /* Returns 1 if the port is matched by the range, 0 otherwise */
1444 static inline int
1445 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1446 {
1447         int ret;
1448
1449         ret = (port >= min && port <= max) ^ invert;
1450         return ret;
1451 }
1452
1453 static int
1454 tcp_find_option(u_int8_t option,
1455                 const struct sk_buff *skb,
1456                 unsigned int optlen,
1457                 int invert,
1458                 int *hotdrop)
1459 {
1460         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1461         char opt[60 - sizeof(struct tcphdr)];
1462         unsigned int i;
1463
1464         duprintf("tcp_match: finding option\n");
1465         /* If we don't have the whole header, drop packet. */
1466         if (skb_copy_bits(skb, skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1467                           opt, optlen) < 0) {
1468                 *hotdrop = 1;
1469                 return 0;
1470         }
1471
1472         for (i = 0; i < optlen; ) {
1473                 if (opt[i] == option) return !invert;
1474                 if (opt[i] < 2) i++;
1475                 else i += opt[i+1]?:1;
1476         }
1477
1478         return invert;
1479 }
1480
1481 static int
1482 tcp_match(const struct sk_buff *skb,
1483           const struct net_device *in,
1484           const struct net_device *out,
1485           const void *matchinfo,
1486           int offset,
1487           int *hotdrop)
1488 {
1489         struct tcphdr tcph;
1490         const struct ipt_tcp *tcpinfo = matchinfo;
1491
1492         if (offset) {
1493                 /* To quote Alan:
1494
1495                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1496                    causes this. Its a cracker trying to break in by doing a
1497                    flag overwrite to pass the direction checks.
1498                 */
1499                 if (offset == 1) {
1500                         duprintf("Dropping evil TCP offset=1 frag.\n");
1501                         *hotdrop = 1;
1502                 }
1503                 /* Must not be a fragment. */
1504                 return 0;
1505         }
1506
1507 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1508
1509         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) {
1510                 /* We've been asked to examine this packet, and we
1511                    can't.  Hence, no choice but to drop. */
1512                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1513                 *hotdrop = 1;
1514                 return 0;
1515         }
1516
1517         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1518                         ntohs(tcph.source),
1519                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1520                 return 0;
1521         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1522                         ntohs(tcph.dest),
1523                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1524                 return 0;
1525         if (!FWINVTCP((((unsigned char *)&tcph)[13] & tcpinfo->flg_mask)
1526                       == tcpinfo->flg_cmp,
1527                       IPT_TCP_INV_FLAGS))
1528                 return 0;
1529         if (tcpinfo->option) {
1530                 if (tcph.doff * 4 < sizeof(tcph)) {
1531                         *hotdrop = 1;
1532                         return 0;
1533                 }
1534                 if (!tcp_find_option(tcpinfo->option, skb, tcph.doff*4 - sizeof(tcph),
1535                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1536                                      hotdrop))
1537                         return 0;
1538         }
1539         return 1;
1540 }
1541
1542 /* Called when user tries to insert an entry of this type. */
1543 static int
1544 tcp_checkentry(const char *tablename,
1545                const struct ipt_ip *ip,
1546                void *matchinfo,
1547                unsigned int matchsize,
1548                unsigned int hook_mask)
1549 {
1550         const struct ipt_tcp *tcpinfo = matchinfo;
1551
1552         /* Must specify proto == TCP, and no unknown invflags */
1553         return ip->proto == IPPROTO_TCP
1554                 && !(ip->invflags & IPT_INV_PROTO)
1555                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1556                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1557 }
1558
1559 static int
1560 udp_match(const struct sk_buff *skb,
1561           const struct net_device *in,
1562           const struct net_device *out,
1563           const void *matchinfo,
1564           int offset,
1565           int *hotdrop)
1566 {
1567         struct udphdr udph;
1568         const struct ipt_udp *udpinfo = matchinfo;
1569
1570         /* Must not be a fragment. */
1571         if (offset)
1572                 return 0;
1573
1574         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) {
1575                 /* We've been asked to examine this packet, and we
1576                    can't.  Hence, no choice but to drop. */
1577                 duprintf("Dropping evil UDP tinygram.\n");
1578                 *hotdrop = 1;
1579                 return 0;
1580         }
1581
1582         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1583                           ntohs(udph.source),
1584                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1585                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1586                               ntohs(udph.dest),
1587                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1588 }
1589
1590 /* Called when user tries to insert an entry of this type. */
1591 static int
1592 udp_checkentry(const char *tablename,
1593                const struct ipt_ip *ip,
1594                void *matchinfo,
1595                unsigned int matchinfosize,
1596                unsigned int hook_mask)
1597 {
1598         const struct ipt_udp *udpinfo = matchinfo;
1599
1600         /* Must specify proto == UDP, and no unknown invflags */
1601         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1602                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1603                          IPPROTO_UDP);
1604                 return 0;
1605         }
1606         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1607                 duprintf("ipt_udp: matchsize %u != %u\n",
1608                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1609                 return 0;
1610         }
1611         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1612                 duprintf("ipt_udp: unknown flags %X\n",
1613                          udpinfo->invflags);
1614                 return 0;
1615         }
1616
1617         return 1;
1618 }
1619
1620 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1621 static inline int
1622 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1623                      u_int8_t type, u_int8_t code,
1624                      int invert)
1625 {
1626         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1627                 ^ invert;
1628 }
1629
1630 static int
1631 icmp_match(const struct sk_buff *skb,
1632            const struct net_device *in,
1633            const struct net_device *out,
1634            const void *matchinfo,
1635            int offset,
1636            int *hotdrop)
1637 {
1638         struct icmphdr icmph;
1639         const struct ipt_icmp *icmpinfo = matchinfo;
1640
1641         /* Must not be a fragment. */
1642         if (offset)
1643                 return 0;
1644
1645         if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &icmph, sizeof(icmph)) < 0){
1646                 /* We've been asked to examine this packet, and we
1647                    can't.  Hence, no choice but to drop. */
1648                 duprintf("Dropping evil ICMP tinygram.\n");
1649                 *hotdrop = 1;
1650                 return 0;
1651         }
1652
1653         return icmp_type_code_match(icmpinfo->type,
1654                                     icmpinfo->code[0],
1655                                     icmpinfo->code[1],
1656                                     icmph.type, icmph.code,
1657                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1658 }
1659
1660 /* Called when user tries to insert an entry of this type. */
1661 static int
1662 icmp_checkentry(const char *tablename,
1663            const struct ipt_ip *ip,
1664            void *matchinfo,
1665            unsigned int matchsize,
1666            unsigned int hook_mask)
1667 {
1668         const struct ipt_icmp *icmpinfo = matchinfo;
1669
1670         /* Must specify proto == ICMP, and no unknown invflags */
1671         return ip->proto == IPPROTO_ICMP
1672                 && !(ip->invflags & IPT_INV_PROTO)
1673                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1674                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1675 }
1676
1677 /* The built-in targets: standard (NULL) and error. */
1678 static struct ipt_target ipt_standard_target = {
1679         .name           = IPT_STANDARD_TARGET,
1680 };
1681
1682 static struct ipt_target ipt_error_target = {
1683         .name           = IPT_ERROR_TARGET,
1684         .target         = ipt_error,
1685 };
1686
1687 static struct nf_sockopt_ops ipt_sockopts = {
1688         .pf             = PF_INET,
1689         .set_optmin     = IPT_BASE_CTL,
1690         .set_optmax     = IPT_SO_SET_MAX+1,
1691         .set            = do_ipt_set_ctl,
1692         .get_optmin     = IPT_BASE_CTL,
1693         .get_optmax     = IPT_SO_GET_MAX+1,
1694         .get            = do_ipt_get_ctl,
1695 };
1696
1697 static struct ipt_match tcp_matchstruct = {
1698         .name           = "tcp",
1699         .match          = &tcp_match,
1700         .checkentry     = &tcp_checkentry,
1701 };
1702
1703 static struct ipt_match udp_matchstruct = {
1704         .name           = "udp",
1705         .match          = &udp_match,
1706         .checkentry     = &udp_checkentry,
1707 };
1708
1709 static struct ipt_match icmp_matchstruct = {
1710         .name           = "icmp",
1711         .match          = &icmp_match,
1712         .checkentry     = &icmp_checkentry,
1713 };
1714
1715 #ifdef CONFIG_PROC_FS
1716 static inline int print_name(const char *i,
1717                              off_t start_offset, char *buffer, int length,
1718                              off_t *pos, unsigned int *count)
1719 {
1720         if ((*count)++ >= start_offset) {
1721                 unsigned int namelen;
1722
1723                 namelen = sprintf(buffer + *pos, "%s\n",
1724                                   i + sizeof(struct list_head));
1725                 if (*pos + namelen > length) {
1726                         /* Stop iterating */
1727                         return 1;
1728                 }
1729                 *pos += namelen;
1730         }
1731         return 0;
1732 }
1733
1734 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1735 {
1736         off_t pos = 0;
1737         unsigned int count = 0;
1738
1739         if (down_interruptible(&ipt_mutex) != 0)
1740                 return 0;
1741
1742         LIST_FIND(&ipt_tables, print_name, void *,
1743                   offset, buffer, length, &pos, &count);
1744
1745         up(&ipt_mutex);
1746
1747         /* `start' hack - see fs/proc/generic.c line ~105 */
1748         *start=(char *)((unsigned long)count-offset);
1749         return pos;
1750 }
1751
1752 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1753 {
1754         off_t pos = 0;
1755         unsigned int count = 0;
1756
1757         if (down_interruptible(&ipt_mutex) != 0)
1758                 return 0;
1759
1760         LIST_FIND(&ipt_target, print_name, void *,
1761                   offset, buffer, length, &pos, &count);
1762         
1763         up(&ipt_mutex);
1764
1765         *start = (char *)((unsigned long)count - offset);
1766         return pos;
1767 }
1768
1769 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1770 {
1771         off_t pos = 0;
1772         unsigned int count = 0;
1773
1774         if (down_interruptible(&ipt_mutex) != 0)
1775                 return 0;
1776         
1777         LIST_FIND(&ipt_match, print_name, void *,
1778                   offset, buffer, length, &pos, &count);
1779
1780         up(&ipt_mutex);
1781
1782         *start = (char *)((unsigned long)count - offset);
1783         return pos;
1784 }
1785
1786 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1787 { { "ip_tables_names", ipt_get_tables },
1788   { "ip_tables_targets", ipt_get_targets },
1789   { "ip_tables_matches", ipt_get_matches },
1790   { NULL, NULL} };
1791 #endif /*CONFIG_PROC_FS*/
1792
1793 static int __init init(void)
1794 {
1795         int ret;
1796
1797         /* Noone else will be downing sem now, so we won't sleep */
1798         down(&ipt_mutex);
1799         list_append(&ipt_target, &ipt_standard_target);
1800         list_append(&ipt_target, &ipt_error_target);
1801         list_append(&ipt_match, &tcp_matchstruct);
1802         list_append(&ipt_match, &udp_matchstruct);
1803         list_append(&ipt_match, &icmp_matchstruct);
1804         up(&ipt_mutex);
1805
1806         /* Register setsockopt */
1807         ret = nf_register_sockopt(&ipt_sockopts);
1808         if (ret < 0) {
1809                 duprintf("Unable to register sockopts.\n");
1810                 return ret;
1811         }
1812
1813 #ifdef CONFIG_PROC_FS
1814         {
1815         struct proc_dir_entry *proc;
1816         int i;
1817
1818         for (i = 0; ipt_proc_entry[i].name; i++) {
1819                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1820                                        ipt_proc_entry[i].get_info);
1821                 if (!proc) {
1822                         while (--i >= 0)
1823                                 proc_net_remove(ipt_proc_entry[i].name);
1824                         nf_unregister_sockopt(&ipt_sockopts);
1825                         return -ENOMEM;
1826                 }
1827                 proc->owner = THIS_MODULE;
1828         }
1829         }
1830 #endif
1831
1832         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1833         return 0;
1834 }
1835
1836 static void __exit fini(void)
1837 {
1838         nf_unregister_sockopt(&ipt_sockopts);
1839 #ifdef CONFIG_PROC_FS
1840         {
1841         int i;
1842         for (i = 0; ipt_proc_entry[i].name; i++)
1843                 proc_net_remove(ipt_proc_entry[i].name);
1844         }
1845 #endif
1846 }
1847
1848 EXPORT_SYMBOL(ipt_register_table);
1849 EXPORT_SYMBOL(ipt_unregister_table);
1850 EXPORT_SYMBOL(ipt_register_match);
1851 EXPORT_SYMBOL(ipt_unregister_match);
1852 EXPORT_SYMBOL(ipt_do_table);
1853 EXPORT_SYMBOL(ipt_register_target);
1854 EXPORT_SYMBOL(ipt_unregister_target);
1855 EXPORT_SYMBOL(ipt_find_target_lock);
1856
1857 module_init(init);
1858 module_exit(fini);