vserver 1.9.3
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29
30 #include <linux/netfilter_ipv4/ip_tables.h>
31
32 MODULE_LICENSE("GPL");
33 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
34 MODULE_DESCRIPTION("IPv4 packet filter");
35
36 /*#define DEBUG_IP_FIREWALL*/
37 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
38 /*#define DEBUG_IP_FIREWALL_USER*/
39
40 #ifdef DEBUG_IP_FIREWALL
41 #define dprintf(format, args...)  printk(format , ## args)
42 #else
43 #define dprintf(format, args...)
44 #endif
45
46 #ifdef DEBUG_IP_FIREWALL_USER
47 #define duprintf(format, args...) printk(format , ## args)
48 #else
49 #define duprintf(format, args...)
50 #endif
51
52 #ifdef CONFIG_NETFILTER_DEBUG
53 #define IP_NF_ASSERT(x)                                         \
54 do {                                                            \
55         if (!(x))                                               \
56                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
57                        __FUNCTION__, __FILE__, __LINE__);       \
58 } while(0)
59 #else
60 #define IP_NF_ASSERT(x)
61 #endif
62 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
63
64 static DECLARE_MUTEX(ipt_mutex);
65
66 /* Must have mutex */
67 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
68 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #include <linux/netfilter_ipv4/lockhelp.h>
70 #include <linux/netfilter_ipv4/listhelp.h>
71
72 #if 0
73 /* All the better to debug you with... */
74 #define static
75 #define inline
76 #endif
77
78 /*
79    We keep a set of rules for each CPU, so we can avoid write-locking
80    them in the softirq when updating the counters and therefore
81    only need to read-lock in the softirq; doing a write_lock_bh() in user
82    context stops packets coming through and allows user context to read
83    the counters or update the rules.
84
85    To be cache friendly on SMP, we arrange them like so:
86    [ n-entries ]
87    ... cache-align padding ...
88    [ n-entries ]
89
90    Hence the start of any table is given by get_table() below.  */
91
92 /* The table itself */
93 struct ipt_table_info
94 {
95         /* Size per table */
96         unsigned int size;
97         /* Number of entries: FIXME. --RR */
98         unsigned int number;
99         /* Initial number of entries. Needed for module usage count */
100         unsigned int initial_entries;
101
102         /* Entry points and underflows */
103         unsigned int hook_entry[NF_IP_NUMHOOKS];
104         unsigned int underflow[NF_IP_NUMHOOKS];
105
106         /* ipt_entry tables: one per CPU */
107         char entries[0] ____cacheline_aligned;
108 };
109
110 static LIST_HEAD(ipt_target);
111 static LIST_HEAD(ipt_match);
112 static LIST_HEAD(ipt_tables);
113 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
114
115 #ifdef CONFIG_SMP
116 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
117 #else
118 #define TABLE_OFFSET(t,p) 0
119 #endif
120
121 #if 0
122 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
123 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
124 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
125 #endif
126
127 /* Returns whether matches rule or not. */
128 static inline int
129 ip_packet_match(const struct iphdr *ip,
130                 const char *indev,
131                 const char *outdev,
132                 const struct ipt_ip *ipinfo,
133                 int isfrag)
134 {
135         size_t i;
136         unsigned long ret;
137
138 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
139
140         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
141                   IPT_INV_SRCIP)
142             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
143                      IPT_INV_DSTIP)) {
144                 dprintf("Source or dest mismatch.\n");
145
146                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
147                         NIPQUAD(ip->saddr),
148                         NIPQUAD(ipinfo->smsk.s_addr),
149                         NIPQUAD(ipinfo->src.s_addr),
150                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
151                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
152                         NIPQUAD(ip->daddr),
153                         NIPQUAD(ipinfo->dmsk.s_addr),
154                         NIPQUAD(ipinfo->dst.s_addr),
155                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
156                 return 0;
157         }
158
159         /* Look for ifname matches; this should unroll nicely. */
160         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
161                 ret |= (((const unsigned long *)indev)[i]
162                         ^ ((const unsigned long *)ipinfo->iniface)[i])
163                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
164         }
165
166         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
167                 dprintf("VIA in mismatch (%s vs %s).%s\n",
168                         indev, ipinfo->iniface,
169                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
170                 return 0;
171         }
172
173         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
174                 ret |= (((const unsigned long *)outdev)[i]
175                         ^ ((const unsigned long *)ipinfo->outiface)[i])
176                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
177         }
178
179         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
180                 dprintf("VIA out mismatch (%s vs %s).%s\n",
181                         outdev, ipinfo->outiface,
182                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
183                 return 0;
184         }
185
186         /* Check specific protocol */
187         if (ipinfo->proto
188             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
189                 dprintf("Packet protocol %hi does not match %hi.%s\n",
190                         ip->protocol, ipinfo->proto,
191                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
192                 return 0;
193         }
194
195         /* If we have a fragment rule but the packet is not a fragment
196          * then we return zero */
197         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
198                 dprintf("Fragment rule but not fragment.%s\n",
199                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
200                 return 0;
201         }
202
203         return 1;
204 }
205
206 static inline int
207 ip_checkentry(const struct ipt_ip *ip)
208 {
209         if (ip->flags & ~IPT_F_MASK) {
210                 duprintf("Unknown flag bits set: %08X\n",
211                          ip->flags & ~IPT_F_MASK);
212                 return 0;
213         }
214         if (ip->invflags & ~IPT_INV_MASK) {
215                 duprintf("Unknown invflag bits set: %08X\n",
216                          ip->invflags & ~IPT_INV_MASK);
217                 return 0;
218         }
219         return 1;
220 }
221
222 static unsigned int
223 ipt_error(struct sk_buff **pskb,
224           const struct net_device *in,
225           const struct net_device *out,
226           unsigned int hooknum,
227           const void *targinfo,
228           void *userinfo)
229 {
230         if (net_ratelimit())
231                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
232
233         return NF_DROP;
234 }
235
236 static inline
237 int do_match(struct ipt_entry_match *m,
238              const struct sk_buff *skb,
239              const struct net_device *in,
240              const struct net_device *out,
241              int offset,
242              int *hotdrop)
243 {
244         /* Stop iteration if it doesn't match */
245         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
246                 return 1;
247         else
248                 return 0;
249 }
250
251 static inline struct ipt_entry *
252 get_entry(void *base, unsigned int offset)
253 {
254         return (struct ipt_entry *)(base + offset);
255 }
256
257 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
258 unsigned int
259 ipt_do_table(struct sk_buff **pskb,
260              unsigned int hook,
261              const struct net_device *in,
262              const struct net_device *out,
263              struct ipt_table *table,
264              void *userdata)
265 {
266         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
267         u_int16_t offset;
268         struct iphdr *ip;
269         u_int16_t datalen;
270         int hotdrop = 0;
271         /* Initializing verdict to NF_DROP keeps gcc happy. */
272         unsigned int verdict = NF_DROP;
273         const char *indev, *outdev;
274         void *table_base;
275         struct ipt_entry *e, *back;
276
277         /* Initialization */
278         ip = (*pskb)->nh.iph;
279         datalen = (*pskb)->len - ip->ihl * 4;
280         indev = in ? in->name : nulldevname;
281         outdev = out ? out->name : nulldevname;
282         /* We handle fragments by dealing with the first fragment as
283          * if it was a normal packet.  All other fragments are treated
284          * normally, except that they will NEVER match rules that ask
285          * things we don't know, ie. tcp syn flag or ports).  If the
286          * rule is also a fragment-specific rule, non-fragments won't
287          * match it. */
288         offset = ntohs(ip->frag_off) & IP_OFFSET;
289
290         read_lock_bh(&table->lock);
291         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
292         table_base = (void *)table->private->entries
293                 + TABLE_OFFSET(table->private, smp_processor_id());
294         e = get_entry(table_base, table->private->hook_entry[hook]);
295
296 #ifdef CONFIG_NETFILTER_DEBUG
297         /* Check noone else using our table */
298         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
299             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
300                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
301                        smp_processor_id(),
302                        table->name,
303                        &((struct ipt_entry *)table_base)->comefrom,
304                        ((struct ipt_entry *)table_base)->comefrom);
305         }
306         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
307 #endif
308
309         /* For return from builtin chain */
310         back = get_entry(table_base, table->private->underflow[hook]);
311
312         do {
313                 IP_NF_ASSERT(e);
314                 IP_NF_ASSERT(back);
315                 (*pskb)->nfcache |= e->nfcache;
316                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
317                         struct ipt_entry_target *t;
318
319                         if (IPT_MATCH_ITERATE(e, do_match,
320                                               *pskb, in, out,
321                                               offset, &hotdrop) != 0)
322                                 goto no_match;
323
324                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
325
326                         t = ipt_get_target(e);
327                         IP_NF_ASSERT(t->u.kernel.target);
328                         /* Standard target? */
329                         if (!t->u.kernel.target->target) {
330                                 int v;
331
332                                 v = ((struct ipt_standard_target *)t)->verdict;
333                                 if (v < 0) {
334                                         /* Pop from stack? */
335                                         if (v != IPT_RETURN) {
336                                                 verdict = (unsigned)(-v) - 1;
337                                                 break;
338                                         }
339                                         e = back;
340                                         back = get_entry(table_base,
341                                                          back->comefrom);
342                                         continue;
343                                 }
344                                 if (table_base + v
345                                     != (void *)e + e->next_offset) {
346                                         /* Save old back ptr in next entry */
347                                         struct ipt_entry *next
348                                                 = (void *)e + e->next_offset;
349                                         next->comefrom
350                                                 = (void *)back - table_base;
351                                         /* set back pointer to next entry */
352                                         back = next;
353                                 }
354
355                                 e = get_entry(table_base, v);
356                         } else {
357                                 /* Targets which reenter must return
358                                    abs. verdicts */
359 #ifdef CONFIG_NETFILTER_DEBUG
360                                 ((struct ipt_entry *)table_base)->comefrom
361                                         = 0xeeeeeeec;
362 #endif
363                                 verdict = t->u.kernel.target->target(pskb,
364                                                                      in, out,
365                                                                      hook,
366                                                                      t->data,
367                                                                      userdata);
368
369 #ifdef CONFIG_NETFILTER_DEBUG
370                                 if (((struct ipt_entry *)table_base)->comefrom
371                                     != 0xeeeeeeec
372                                     && verdict == IPT_CONTINUE) {
373                                         printk("Target %s reentered!\n",
374                                                t->u.kernel.target->name);
375                                         verdict = NF_DROP;
376                                 }
377                                 ((struct ipt_entry *)table_base)->comefrom
378                                         = 0x57acc001;
379 #endif
380                                 /* Target might have changed stuff. */
381                                 ip = (*pskb)->nh.iph;
382                                 datalen = (*pskb)->len - ip->ihl * 4;
383
384                                 if (verdict == IPT_CONTINUE)
385                                         e = (void *)e + e->next_offset;
386                                 else
387                                         /* Verdict */
388                                         break;
389                         }
390                 } else {
391
392                 no_match:
393                         e = (void *)e + e->next_offset;
394                 }
395         } while (!hotdrop);
396
397 #ifdef CONFIG_NETFILTER_DEBUG
398         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
399 #endif
400         read_unlock_bh(&table->lock);
401
402 #ifdef DEBUG_ALLOW_ALL
403         return NF_ACCEPT;
404 #else
405         if (hotdrop)
406                 return NF_DROP;
407         else return verdict;
408 #endif
409 }
410
411 /* If it succeeds, returns element and locks mutex */
412 static inline void *
413 find_inlist_lock_noload(struct list_head *head,
414                         const char *name,
415                         int *error,
416                         struct semaphore *mutex)
417 {
418         void *ret;
419
420 #if 0 
421         duprintf("find_inlist: searching for `%s' in %s.\n",
422                  name, head == &ipt_target ? "ipt_target"
423                  : head == &ipt_match ? "ipt_match"
424                  : head == &ipt_tables ? "ipt_tables" : "UNKNOWN");
425 #endif
426
427         *error = down_interruptible(mutex);
428         if (*error != 0)
429                 return NULL;
430
431         ret = list_named_find(head, name);
432         if (!ret) {
433                 *error = -ENOENT;
434                 up(mutex);
435         }
436         return ret;
437 }
438
439 #ifndef CONFIG_KMOD
440 #define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
441 #else
442 static void *
443 find_inlist_lock(struct list_head *head,
444                  const char *name,
445                  const char *prefix,
446                  int *error,
447                  struct semaphore *mutex)
448 {
449         void *ret;
450
451         ret = find_inlist_lock_noload(head, name, error, mutex);
452         if (!ret) {
453                 duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
454                 request_module("%s%s", prefix, name);
455                 ret = find_inlist_lock_noload(head, name, error, mutex);
456         }
457
458         return ret;
459 }
460 #endif
461
462 static inline struct ipt_table *
463 ipt_find_table_lock(const char *name, int *error, struct semaphore *mutex)
464 {
465         return find_inlist_lock(&ipt_tables, name, "iptable_", error, mutex);
466 }
467
468 static inline struct ipt_match *
469 find_match_lock(const char *name, int *error, struct semaphore *mutex)
470 {
471         return find_inlist_lock(&ipt_match, name, "ipt_", error, mutex);
472 }
473
474 struct ipt_target *
475 ipt_find_target_lock(const char *name, int *error, struct semaphore *mutex)
476 {
477         return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex);
478 }
479
480 /* All zeroes == unconditional rule. */
481 static inline int
482 unconditional(const struct ipt_ip *ip)
483 {
484         unsigned int i;
485
486         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
487                 if (((__u32 *)ip)[i])
488                         return 0;
489
490         return 1;
491 }
492
493 /* Figures out from what hook each rule can be called: returns 0 if
494    there are loops.  Puts hook bitmask in comefrom. */
495 static int
496 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
497 {
498         unsigned int hook;
499
500         /* No recursion; use packet counter to save back ptrs (reset
501            to 0 as we leave), and comefrom to save source hook bitmask */
502         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
503                 unsigned int pos = newinfo->hook_entry[hook];
504                 struct ipt_entry *e
505                         = (struct ipt_entry *)(newinfo->entries + pos);
506
507                 if (!(valid_hooks & (1 << hook)))
508                         continue;
509
510                 /* Set initial back pointer. */
511                 e->counters.pcnt = pos;
512
513                 for (;;) {
514                         struct ipt_standard_target *t
515                                 = (void *)ipt_get_target(e);
516
517                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
518                                 printk("iptables: loop hook %u pos %u %08X.\n",
519                                        hook, pos, e->comefrom);
520                                 return 0;
521                         }
522                         e->comefrom
523                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
524
525                         /* Unconditional return/END. */
526                         if (e->target_offset == sizeof(struct ipt_entry)
527                             && (strcmp(t->target.u.user.name,
528                                        IPT_STANDARD_TARGET) == 0)
529                             && t->verdict < 0
530                             && unconditional(&e->ip)) {
531                                 unsigned int oldpos, size;
532
533                                 /* Return: backtrack through the last
534                                    big jump. */
535                                 do {
536                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
537 #ifdef DEBUG_IP_FIREWALL_USER
538                                         if (e->comefrom
539                                             & (1 << NF_IP_NUMHOOKS)) {
540                                                 duprintf("Back unset "
541                                                          "on hook %u "
542                                                          "rule %u\n",
543                                                          hook, pos);
544                                         }
545 #endif
546                                         oldpos = pos;
547                                         pos = e->counters.pcnt;
548                                         e->counters.pcnt = 0;
549
550                                         /* We're at the start. */
551                                         if (pos == oldpos)
552                                                 goto next;
553
554                                         e = (struct ipt_entry *)
555                                                 (newinfo->entries + pos);
556                                 } while (oldpos == pos + e->next_offset);
557
558                                 /* Move along one */
559                                 size = e->next_offset;
560                                 e = (struct ipt_entry *)
561                                         (newinfo->entries + pos + size);
562                                 e->counters.pcnt = pos;
563                                 pos += size;
564                         } else {
565                                 int newpos = t->verdict;
566
567                                 if (strcmp(t->target.u.user.name,
568                                            IPT_STANDARD_TARGET) == 0
569                                     && newpos >= 0) {
570                                         /* This a jump; chase it. */
571                                         duprintf("Jump rule %u -> %u\n",
572                                                  pos, newpos);
573                                 } else {
574                                         /* ... this is a fallthru */
575                                         newpos = pos + e->next_offset;
576                                 }
577                                 e = (struct ipt_entry *)
578                                         (newinfo->entries + newpos);
579                                 e->counters.pcnt = pos;
580                                 pos = newpos;
581                         }
582                 }
583                 next:
584                 duprintf("Finished chain %u\n", hook);
585         }
586         return 1;
587 }
588
589 static inline int
590 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
591 {
592         if (i && (*i)-- == 0)
593                 return 1;
594
595         if (m->u.kernel.match->destroy)
596                 m->u.kernel.match->destroy(m->data,
597                                            m->u.match_size - sizeof(*m));
598         module_put(m->u.kernel.match->me);
599         return 0;
600 }
601
602 static inline int
603 standard_check(const struct ipt_entry_target *t,
604                unsigned int max_offset)
605 {
606         struct ipt_standard_target *targ = (void *)t;
607
608         /* Check standard info. */
609         if (t->u.target_size
610             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
611                 duprintf("standard_check: target size %u != %u\n",
612                          t->u.target_size,
613                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
614                 return 0;
615         }
616
617         if (targ->verdict >= 0
618             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
619                 duprintf("ipt_standard_check: bad verdict (%i)\n",
620                          targ->verdict);
621                 return 0;
622         }
623
624         if (targ->verdict < -NF_MAX_VERDICT - 1) {
625                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
626                          targ->verdict);
627                 return 0;
628         }
629         return 1;
630 }
631
632 static inline int
633 check_match(struct ipt_entry_match *m,
634             const char *name,
635             const struct ipt_ip *ip,
636             unsigned int hookmask,
637             unsigned int *i)
638 {
639         int ret;
640         struct ipt_match *match;
641
642         match = find_match_lock(m->u.user.name, &ret, &ipt_mutex);
643         if (!match) {
644                 duprintf("check_match: `%s' not found\n", m->u.user.name);
645                 return ret;
646         }
647         if (!try_module_get(match->me)) {
648                 up(&ipt_mutex);
649                 return -ENOENT;
650         }
651         m->u.kernel.match = match;
652         up(&ipt_mutex);
653
654         if (m->u.kernel.match->checkentry
655             && !m->u.kernel.match->checkentry(name, ip, m->data,
656                                               m->u.match_size - sizeof(*m),
657                                               hookmask)) {
658                 module_put(m->u.kernel.match->me);
659                 duprintf("ip_tables: check failed for `%s'.\n",
660                          m->u.kernel.match->name);
661                 return -EINVAL;
662         }
663
664         (*i)++;
665         return 0;
666 }
667
668 static struct ipt_target ipt_standard_target;
669
670 static inline int
671 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
672             unsigned int *i)
673 {
674         struct ipt_entry_target *t;
675         struct ipt_target *target;
676         int ret;
677         unsigned int j;
678
679         if (!ip_checkentry(&e->ip)) {
680                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
681                 return -EINVAL;
682         }
683
684         j = 0;
685         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
686         if (ret != 0)
687                 goto cleanup_matches;
688
689         t = ipt_get_target(e);
690         target = ipt_find_target_lock(t->u.user.name, &ret, &ipt_mutex);
691         if (!target) {
692                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
693                 goto cleanup_matches;
694         }
695         if (!try_module_get(target->me)) {
696                 up(&ipt_mutex);
697                 ret = -ENOENT;
698                 goto cleanup_matches;
699         }
700         t->u.kernel.target = target;
701         up(&ipt_mutex);
702
703         if (t->u.kernel.target == &ipt_standard_target) {
704                 if (!standard_check(t, size)) {
705                         ret = -EINVAL;
706                         goto cleanup_matches;
707                 }
708         } else if (t->u.kernel.target->checkentry
709                    && !t->u.kernel.target->checkentry(name, e, t->data,
710                                                       t->u.target_size
711                                                       - sizeof(*t),
712                                                       e->comefrom)) {
713                 module_put(t->u.kernel.target->me);
714                 duprintf("ip_tables: check failed for `%s'.\n",
715                          t->u.kernel.target->name);
716                 ret = -EINVAL;
717                 goto cleanup_matches;
718         }
719
720         (*i)++;
721         return 0;
722
723  cleanup_matches:
724         IPT_MATCH_ITERATE(e, cleanup_match, &j);
725         return ret;
726 }
727
728 static inline int
729 check_entry_size_and_hooks(struct ipt_entry *e,
730                            struct ipt_table_info *newinfo,
731                            unsigned char *base,
732                            unsigned char *limit,
733                            const unsigned int *hook_entries,
734                            const unsigned int *underflows,
735                            unsigned int *i)
736 {
737         unsigned int h;
738
739         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
740             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
741                 duprintf("Bad offset %p\n", e);
742                 return -EINVAL;
743         }
744
745         if (e->next_offset
746             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
747                 duprintf("checking: element %p size %u\n",
748                          e, e->next_offset);
749                 return -EINVAL;
750         }
751
752         /* Check hooks & underflows */
753         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
754                 if ((unsigned char *)e - base == hook_entries[h])
755                         newinfo->hook_entry[h] = hook_entries[h];
756                 if ((unsigned char *)e - base == underflows[h])
757                         newinfo->underflow[h] = underflows[h];
758         }
759
760         /* FIXME: underflows must be unconditional, standard verdicts
761            < 0 (not IPT_RETURN). --RR */
762
763         /* Clear counters and comefrom */
764         e->counters = ((struct ipt_counters) { 0, 0 });
765         e->comefrom = 0;
766
767         (*i)++;
768         return 0;
769 }
770
771 static inline int
772 cleanup_entry(struct ipt_entry *e, unsigned int *i)
773 {
774         struct ipt_entry_target *t;
775
776         if (i && (*i)-- == 0)
777                 return 1;
778
779         /* Cleanup all matches */
780         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
781         t = ipt_get_target(e);
782         if (t->u.kernel.target->destroy)
783                 t->u.kernel.target->destroy(t->data,
784                                             t->u.target_size - sizeof(*t));
785         module_put(t->u.kernel.target->me);
786         return 0;
787 }
788
789 /* Checks and translates the user-supplied table segment (held in
790    newinfo) */
791 static int
792 translate_table(const char *name,
793                 unsigned int valid_hooks,
794                 struct ipt_table_info *newinfo,
795                 unsigned int size,
796                 unsigned int number,
797                 const unsigned int *hook_entries,
798                 const unsigned int *underflows)
799 {
800         unsigned int i;
801         int ret;
802
803         newinfo->size = size;
804         newinfo->number = number;
805
806         /* Init all hooks to impossible value. */
807         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
808                 newinfo->hook_entry[i] = 0xFFFFFFFF;
809                 newinfo->underflow[i] = 0xFFFFFFFF;
810         }
811
812         duprintf("translate_table: size %u\n", newinfo->size);
813         i = 0;
814         /* Walk through entries, checking offsets. */
815         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
816                                 check_entry_size_and_hooks,
817                                 newinfo,
818                                 newinfo->entries,
819                                 newinfo->entries + size,
820                                 hook_entries, underflows, &i);
821         if (ret != 0)
822                 return ret;
823
824         if (i != number) {
825                 duprintf("translate_table: %u not %u entries\n",
826                          i, number);
827                 return -EINVAL;
828         }
829
830         /* Check hooks all assigned */
831         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
832                 /* Only hooks which are valid */
833                 if (!(valid_hooks & (1 << i)))
834                         continue;
835                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
836                         duprintf("Invalid hook entry %u %u\n",
837                                  i, hook_entries[i]);
838                         return -EINVAL;
839                 }
840                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
841                         duprintf("Invalid underflow %u %u\n",
842                                  i, underflows[i]);
843                         return -EINVAL;
844                 }
845         }
846
847         if (!mark_source_chains(newinfo, valid_hooks))
848                 return -ELOOP;
849
850         /* Finally, each sanity check must pass */
851         i = 0;
852         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
853                                 check_entry, name, size, &i);
854
855         if (ret != 0) {
856                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
857                                   cleanup_entry, &i);
858                 return ret;
859         }
860
861         /* And one copy for every other CPU */
862         for (i = 1; i < NR_CPUS; i++) {
863                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
864                        newinfo->entries,
865                        SMP_ALIGN(newinfo->size));
866         }
867
868         return ret;
869 }
870
871 static struct ipt_table_info *
872 replace_table(struct ipt_table *table,
873               unsigned int num_counters,
874               struct ipt_table_info *newinfo,
875               int *error)
876 {
877         struct ipt_table_info *oldinfo;
878
879 #ifdef CONFIG_NETFILTER_DEBUG
880         {
881                 struct ipt_entry *table_base;
882                 unsigned int i;
883
884                 for (i = 0; i < NR_CPUS; i++) {
885                         table_base =
886                                 (void *)newinfo->entries
887                                 + TABLE_OFFSET(newinfo, i);
888
889                         table_base->comefrom = 0xdead57ac;
890                 }
891         }
892 #endif
893
894         /* Do the substitution. */
895         write_lock_bh(&table->lock);
896         /* Check inside lock: is the old number correct? */
897         if (num_counters != table->private->number) {
898                 duprintf("num_counters != table->private->number (%u/%u)\n",
899                          num_counters, table->private->number);
900                 write_unlock_bh(&table->lock);
901                 *error = -EAGAIN;
902                 return NULL;
903         }
904         oldinfo = table->private;
905         table->private = newinfo;
906         newinfo->initial_entries = oldinfo->initial_entries;
907         write_unlock_bh(&table->lock);
908
909         return oldinfo;
910 }
911
912 /* Gets counters. */
913 static inline int
914 add_entry_to_counter(const struct ipt_entry *e,
915                      struct ipt_counters total[],
916                      unsigned int *i)
917 {
918         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
919
920         (*i)++;
921         return 0;
922 }
923
924 static void
925 get_counters(const struct ipt_table_info *t,
926              struct ipt_counters counters[])
927 {
928         unsigned int cpu;
929         unsigned int i;
930
931         for (cpu = 0; cpu < NR_CPUS; cpu++) {
932                 i = 0;
933                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
934                                   t->size,
935                                   add_entry_to_counter,
936                                   counters,
937                                   &i);
938         }
939 }
940
941 static int
942 copy_entries_to_user(unsigned int total_size,
943                      struct ipt_table *table,
944                      void __user *userptr)
945 {
946         unsigned int off, num, countersize;
947         struct ipt_entry *e;
948         struct ipt_counters *counters;
949         int ret = 0;
950
951         /* We need atomic snapshot of counters: rest doesn't change
952            (other than comefrom, which userspace doesn't care
953            about). */
954         countersize = sizeof(struct ipt_counters) * table->private->number;
955         counters = vmalloc(countersize);
956
957         if (counters == NULL)
958                 return -ENOMEM;
959
960         /* First, sum counters... */
961         memset(counters, 0, countersize);
962         write_lock_bh(&table->lock);
963         get_counters(table->private, counters);
964         write_unlock_bh(&table->lock);
965
966         /* ... then copy entire thing from CPU 0... */
967         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
968                 ret = -EFAULT;
969                 goto free_counters;
970         }
971
972         /* FIXME: use iterator macros --RR */
973         /* ... then go back and fix counters and names */
974         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
975                 unsigned int i;
976                 struct ipt_entry_match *m;
977                 struct ipt_entry_target *t;
978
979                 e = (struct ipt_entry *)(table->private->entries + off);
980                 if (copy_to_user(userptr + off
981                                  + offsetof(struct ipt_entry, counters),
982                                  &counters[num],
983                                  sizeof(counters[num])) != 0) {
984                         ret = -EFAULT;
985                         goto free_counters;
986                 }
987
988                 for (i = sizeof(struct ipt_entry);
989                      i < e->target_offset;
990                      i += m->u.match_size) {
991                         m = (void *)e + i;
992
993                         if (copy_to_user(userptr + off + i
994                                          + offsetof(struct ipt_entry_match,
995                                                     u.user.name),
996                                          m->u.kernel.match->name,
997                                          strlen(m->u.kernel.match->name)+1)
998                             != 0) {
999                                 ret = -EFAULT;
1000                                 goto free_counters;
1001                         }
1002                 }
1003
1004                 t = ipt_get_target(e);
1005                 if (copy_to_user(userptr + off + e->target_offset
1006                                  + offsetof(struct ipt_entry_target,
1007                                             u.user.name),
1008                                  t->u.kernel.target->name,
1009                                  strlen(t->u.kernel.target->name)+1) != 0) {
1010                         ret = -EFAULT;
1011                         goto free_counters;
1012                 }
1013         }
1014
1015  free_counters:
1016         vfree(counters);
1017         return ret;
1018 }
1019
1020 static int
1021 get_entries(const struct ipt_get_entries *entries,
1022             struct ipt_get_entries __user *uptr)
1023 {
1024         int ret;
1025         struct ipt_table *t;
1026
1027         t = ipt_find_table_lock(entries->name, &ret, &ipt_mutex);
1028         if (t) {
1029                 duprintf("t->private->number = %u\n",
1030                          t->private->number);
1031                 if (entries->size == t->private->size)
1032                         ret = copy_entries_to_user(t->private->size,
1033                                                    t, uptr->entrytable);
1034                 else {
1035                         duprintf("get_entries: I've got %u not %u!\n",
1036                                  t->private->size,
1037                                  entries->size);
1038                         ret = -EINVAL;
1039                 }
1040                 up(&ipt_mutex);
1041         } else
1042                 duprintf("get_entries: Can't find %s!\n",
1043                          entries->name);
1044
1045         return ret;
1046 }
1047
1048 static int
1049 do_replace(void __user *user, unsigned int len)
1050 {
1051         int ret;
1052         struct ipt_replace tmp;
1053         struct ipt_table *t;
1054         struct ipt_table_info *newinfo, *oldinfo;
1055         struct ipt_counters *counters;
1056
1057         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1058                 return -EFAULT;
1059
1060         /* Hack: Causes ipchains to give correct error msg --RR */
1061         if (len != sizeof(tmp) + tmp.size)
1062                 return -ENOPROTOOPT;
1063
1064         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1065         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1066                 return -ENOMEM;
1067
1068         newinfo = vmalloc(sizeof(struct ipt_table_info)
1069                           + SMP_ALIGN(tmp.size) * NR_CPUS);
1070         if (!newinfo)
1071                 return -ENOMEM;
1072
1073         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1074                            tmp.size) != 0) {
1075                 ret = -EFAULT;
1076                 goto free_newinfo;
1077         }
1078
1079         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1080         if (!counters) {
1081                 ret = -ENOMEM;
1082                 goto free_newinfo;
1083         }
1084         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1085
1086         ret = translate_table(tmp.name, tmp.valid_hooks,
1087                               newinfo, tmp.size, tmp.num_entries,
1088                               tmp.hook_entry, tmp.underflow);
1089         if (ret != 0)
1090                 goto free_newinfo_counters;
1091
1092         duprintf("ip_tables: Translated table\n");
1093
1094         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1095         if (!t)
1096                 goto free_newinfo_counters_untrans;
1097
1098         /* You lied! */
1099         if (tmp.valid_hooks != t->valid_hooks) {
1100                 duprintf("Valid hook crap: %08X vs %08X\n",
1101                          tmp.valid_hooks, t->valid_hooks);
1102                 ret = -EINVAL;
1103                 goto free_newinfo_counters_untrans_unlock;
1104         }
1105
1106         /* Get a reference in advance, we're not allowed fail later */
1107         if (!try_module_get(t->me)) {
1108                 ret = -EBUSY;
1109                 goto free_newinfo_counters_untrans_unlock;
1110         }
1111
1112
1113         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1114         if (!oldinfo)
1115                 goto put_module;
1116
1117         /* Update module usage count based on number of rules */
1118         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1119                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1120         if ((oldinfo->number > oldinfo->initial_entries) || 
1121             (newinfo->number <= oldinfo->initial_entries)) 
1122                 module_put(t->me);
1123         if ((oldinfo->number > oldinfo->initial_entries) &&
1124             (newinfo->number <= oldinfo->initial_entries))
1125                 module_put(t->me);
1126
1127         /* Get the old counters. */
1128         get_counters(oldinfo, counters);
1129         /* Decrease module usage counts and free resource */
1130         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1131         vfree(oldinfo);
1132         /* Silent error: too late now. */
1133         copy_to_user(tmp.counters, counters,
1134                      sizeof(struct ipt_counters) * tmp.num_counters);
1135         vfree(counters);
1136         up(&ipt_mutex);
1137         return 0;
1138
1139  put_module:
1140         module_put(t->me);
1141  free_newinfo_counters_untrans_unlock:
1142         up(&ipt_mutex);
1143  free_newinfo_counters_untrans:
1144         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1145  free_newinfo_counters:
1146         vfree(counters);
1147  free_newinfo:
1148         vfree(newinfo);
1149         return ret;
1150 }
1151
1152 /* We're lazy, and add to the first CPU; overflow works its fey magic
1153  * and everything is OK. */
1154 static inline int
1155 add_counter_to_entry(struct ipt_entry *e,
1156                      const struct ipt_counters addme[],
1157                      unsigned int *i)
1158 {
1159 #if 0
1160         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1161                  *i,
1162                  (long unsigned int)e->counters.pcnt,
1163                  (long unsigned int)e->counters.bcnt,
1164                  (long unsigned int)addme[*i].pcnt,
1165                  (long unsigned int)addme[*i].bcnt);
1166 #endif
1167
1168         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1169
1170         (*i)++;
1171         return 0;
1172 }
1173
1174 static int
1175 do_add_counters(void __user *user, unsigned int len)
1176 {
1177         unsigned int i;
1178         struct ipt_counters_info tmp, *paddc;
1179         struct ipt_table *t;
1180         int ret;
1181
1182         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1183                 return -EFAULT;
1184
1185         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1186                 return -EINVAL;
1187
1188         paddc = vmalloc(len);
1189         if (!paddc)
1190                 return -ENOMEM;
1191
1192         if (copy_from_user(paddc, user, len) != 0) {
1193                 ret = -EFAULT;
1194                 goto free;
1195         }
1196
1197         t = ipt_find_table_lock(tmp.name, &ret, &ipt_mutex);
1198         if (!t)
1199                 goto free;
1200
1201         write_lock_bh(&t->lock);
1202         if (t->private->number != paddc->num_counters) {
1203                 ret = -EINVAL;
1204                 goto unlock_up_free;
1205         }
1206
1207         i = 0;
1208         IPT_ENTRY_ITERATE(t->private->entries,
1209                           t->private->size,
1210                           add_counter_to_entry,
1211                           paddc->counters,
1212                           &i);
1213  unlock_up_free:
1214         write_unlock_bh(&t->lock);
1215         up(&ipt_mutex);
1216  free:
1217         vfree(paddc);
1218
1219         return ret;
1220 }
1221
1222 static int
1223 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1224 {
1225         int ret;
1226
1227         if (!capable(CAP_NET_ADMIN))
1228                 return -EPERM;
1229
1230         switch (cmd) {
1231         case IPT_SO_SET_REPLACE:
1232                 ret = do_replace(user, len);
1233                 break;
1234
1235         case IPT_SO_SET_ADD_COUNTERS:
1236                 ret = do_add_counters(user, len);
1237                 break;
1238
1239         default:
1240                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1241                 ret = -EINVAL;
1242         }
1243
1244         return ret;
1245 }
1246
1247 static int
1248 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1249 {
1250         int ret;
1251
1252         if (!capable(CAP_NET_ADMIN))
1253                 return -EPERM;
1254
1255         switch (cmd) {
1256         case IPT_SO_GET_INFO: {
1257                 char name[IPT_TABLE_MAXNAMELEN];
1258                 struct ipt_table *t;
1259
1260                 if (*len != sizeof(struct ipt_getinfo)) {
1261                         duprintf("length %u != %u\n", *len,
1262                                  sizeof(struct ipt_getinfo));
1263                         ret = -EINVAL;
1264                         break;
1265                 }
1266
1267                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1268                         ret = -EFAULT;
1269                         break;
1270                 }
1271                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1272                 t = ipt_find_table_lock(name, &ret, &ipt_mutex);
1273                 if (t) {
1274                         struct ipt_getinfo info;
1275
1276                         info.valid_hooks = t->valid_hooks;
1277                         memcpy(info.hook_entry, t->private->hook_entry,
1278                                sizeof(info.hook_entry));
1279                         memcpy(info.underflow, t->private->underflow,
1280                                sizeof(info.underflow));
1281                         info.num_entries = t->private->number;
1282                         info.size = t->private->size;
1283                         strcpy(info.name, name);
1284
1285                         if (copy_to_user(user, &info, *len) != 0)
1286                                 ret = -EFAULT;
1287                         else
1288                                 ret = 0;
1289
1290                         up(&ipt_mutex);
1291                 }
1292         }
1293         break;
1294
1295         case IPT_SO_GET_ENTRIES: {
1296                 struct ipt_get_entries get;
1297
1298                 if (*len < sizeof(get)) {
1299                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1300                         ret = -EINVAL;
1301                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1302                         ret = -EFAULT;
1303                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1304                         duprintf("get_entries: %u != %u\n", *len,
1305                                  sizeof(struct ipt_get_entries) + get.size);
1306                         ret = -EINVAL;
1307                 } else
1308                         ret = get_entries(&get, user);
1309                 break;
1310         }
1311
1312         default:
1313                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1314                 ret = -EINVAL;
1315         }
1316
1317         return ret;
1318 }
1319
1320 /* Registration hooks for targets. */
1321 int
1322 ipt_register_target(struct ipt_target *target)
1323 {
1324         int ret;
1325
1326         ret = down_interruptible(&ipt_mutex);
1327         if (ret != 0)
1328                 return ret;
1329
1330         if (!list_named_insert(&ipt_target, target)) {
1331                 duprintf("ipt_register_target: `%s' already in list!\n",
1332                          target->name);
1333                 ret = -EINVAL;
1334         }
1335         up(&ipt_mutex);
1336         return ret;
1337 }
1338
1339 void
1340 ipt_unregister_target(struct ipt_target *target)
1341 {
1342         down(&ipt_mutex);
1343         LIST_DELETE(&ipt_target, target);
1344         up(&ipt_mutex);
1345 }
1346
1347 int
1348 ipt_register_match(struct ipt_match *match)
1349 {
1350         int ret;
1351
1352         ret = down_interruptible(&ipt_mutex);
1353         if (ret != 0)
1354                 return ret;
1355
1356         if (!list_named_insert(&ipt_match, match)) {
1357                 duprintf("ipt_register_match: `%s' already in list!\n",
1358                          match->name);
1359                 ret = -EINVAL;
1360         }
1361         up(&ipt_mutex);
1362
1363         return ret;
1364 }
1365
1366 void
1367 ipt_unregister_match(struct ipt_match *match)
1368 {
1369         down(&ipt_mutex);
1370         LIST_DELETE(&ipt_match, match);
1371         up(&ipt_mutex);
1372 }
1373
1374 int ipt_register_table(struct ipt_table *table)
1375 {
1376         int ret;
1377         struct ipt_table_info *newinfo;
1378         static struct ipt_table_info bootstrap
1379                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1380
1381         newinfo = vmalloc(sizeof(struct ipt_table_info)
1382                           + SMP_ALIGN(table->table->size) * NR_CPUS);
1383         if (!newinfo)
1384                 return -ENOMEM;
1385
1386         memcpy(newinfo->entries, table->table->entries, table->table->size);
1387
1388         ret = translate_table(table->name, table->valid_hooks,
1389                               newinfo, table->table->size,
1390                               table->table->num_entries,
1391                               table->table->hook_entry,
1392                               table->table->underflow);
1393         if (ret != 0) {
1394                 vfree(newinfo);
1395                 return ret;
1396         }
1397
1398         ret = down_interruptible(&ipt_mutex);
1399         if (ret != 0) {
1400                 vfree(newinfo);
1401                 return ret;
1402         }
1403
1404         /* Don't autoload: we'd eat our tail... */
1405         if (list_named_find(&ipt_tables, table->name)) {
1406                 ret = -EEXIST;
1407                 goto free_unlock;
1408         }
1409
1410         /* Simplifies replace_table code. */
1411         table->private = &bootstrap;
1412         if (!replace_table(table, 0, newinfo, &ret))
1413                 goto free_unlock;
1414
1415         duprintf("table->private->number = %u\n",
1416                  table->private->number);
1417         
1418         /* save number of initial entries */
1419         table->private->initial_entries = table->private->number;
1420
1421         table->lock = RW_LOCK_UNLOCKED;
1422         list_prepend(&ipt_tables, table);
1423
1424  unlock:
1425         up(&ipt_mutex);
1426         return ret;
1427
1428  free_unlock:
1429         vfree(newinfo);
1430         goto unlock;
1431 }
1432
1433 void ipt_unregister_table(struct ipt_table *table)
1434 {
1435         down(&ipt_mutex);
1436         LIST_DELETE(&ipt_tables, table);
1437         up(&ipt_mutex);
1438
1439         /* Decrease module usage counts and free resources */
1440         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1441                           cleanup_entry, NULL);
1442         vfree(table->private);
1443 }
1444
1445 /* Returns 1 if the port is matched by the range, 0 otherwise */
1446 static inline int
1447 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1448 {
1449         int ret;
1450
1451         ret = (port >= min && port <= max) ^ invert;
1452         return ret;
1453 }
1454
1455 static int
1456 tcp_find_option(u_int8_t option,
1457                 const struct sk_buff *skb,
1458                 unsigned int optlen,
1459                 int invert,
1460                 int *hotdrop)
1461 {
1462         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1463         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1464         unsigned int i;
1465
1466         duprintf("tcp_match: finding option\n");
1467
1468         if (!optlen)
1469                 return invert;
1470
1471         /* If we don't have the whole header, drop packet. */
1472         op = skb_header_pointer(skb,
1473                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1474                                 optlen, _opt);
1475         if (op == NULL) {
1476                 *hotdrop = 1;
1477                 return 0;
1478         }
1479
1480         for (i = 0; i < optlen; ) {
1481                 if (op[i] == option) return !invert;
1482                 if (op[i] < 2) i++;
1483                 else i += op[i+1]?:1;
1484         }
1485
1486         return invert;
1487 }
1488
1489 static int
1490 tcp_match(const struct sk_buff *skb,
1491           const struct net_device *in,
1492           const struct net_device *out,
1493           const void *matchinfo,
1494           int offset,
1495           int *hotdrop)
1496 {
1497         struct tcphdr _tcph, *th;
1498         const struct ipt_tcp *tcpinfo = matchinfo;
1499
1500         if (offset) {
1501                 /* To quote Alan:
1502
1503                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1504                    causes this. Its a cracker trying to break in by doing a
1505                    flag overwrite to pass the direction checks.
1506                 */
1507                 if (offset == 1) {
1508                         duprintf("Dropping evil TCP offset=1 frag.\n");
1509                         *hotdrop = 1;
1510                 }
1511                 /* Must not be a fragment. */
1512                 return 0;
1513         }
1514
1515 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1516
1517         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1518                                 sizeof(_tcph), &_tcph);
1519         if (th == NULL) {
1520                 /* We've been asked to examine this packet, and we
1521                    can't.  Hence, no choice but to drop. */
1522                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1523                 *hotdrop = 1;
1524                 return 0;
1525         }
1526
1527         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1528                         ntohs(th->source),
1529                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1530                 return 0;
1531         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1532                         ntohs(th->dest),
1533                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1534                 return 0;
1535         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1536                       == tcpinfo->flg_cmp,
1537                       IPT_TCP_INV_FLAGS))
1538                 return 0;
1539         if (tcpinfo->option) {
1540                 if (th->doff * 4 < sizeof(_tcph)) {
1541                         *hotdrop = 1;
1542                         return 0;
1543                 }
1544                 if (!tcp_find_option(tcpinfo->option, skb,
1545                                      th->doff*4 - sizeof(_tcph),
1546                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1547                                      hotdrop))
1548                         return 0;
1549         }
1550         return 1;
1551 }
1552
1553 /* Called when user tries to insert an entry of this type. */
1554 static int
1555 tcp_checkentry(const char *tablename,
1556                const struct ipt_ip *ip,
1557                void *matchinfo,
1558                unsigned int matchsize,
1559                unsigned int hook_mask)
1560 {
1561         const struct ipt_tcp *tcpinfo = matchinfo;
1562
1563         /* Must specify proto == TCP, and no unknown invflags */
1564         return ip->proto == IPPROTO_TCP
1565                 && !(ip->invflags & IPT_INV_PROTO)
1566                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1567                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1568 }
1569
1570 static int
1571 udp_match(const struct sk_buff *skb,
1572           const struct net_device *in,
1573           const struct net_device *out,
1574           const void *matchinfo,
1575           int offset,
1576           int *hotdrop)
1577 {
1578         struct udphdr _udph, *uh;
1579         const struct ipt_udp *udpinfo = matchinfo;
1580
1581         /* Must not be a fragment. */
1582         if (offset)
1583                 return 0;
1584
1585         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1586                                 sizeof(_udph), &_udph);
1587         if (uh == NULL) {
1588                 /* We've been asked to examine this packet, and we
1589                    can't.  Hence, no choice but to drop. */
1590                 duprintf("Dropping evil UDP tinygram.\n");
1591                 *hotdrop = 1;
1592                 return 0;
1593         }
1594
1595         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1596                           ntohs(uh->source),
1597                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1598                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1599                               ntohs(uh->dest),
1600                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1601 }
1602
1603 /* Called when user tries to insert an entry of this type. */
1604 static int
1605 udp_checkentry(const char *tablename,
1606                const struct ipt_ip *ip,
1607                void *matchinfo,
1608                unsigned int matchinfosize,
1609                unsigned int hook_mask)
1610 {
1611         const struct ipt_udp *udpinfo = matchinfo;
1612
1613         /* Must specify proto == UDP, and no unknown invflags */
1614         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1615                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1616                          IPPROTO_UDP);
1617                 return 0;
1618         }
1619         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1620                 duprintf("ipt_udp: matchsize %u != %u\n",
1621                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1622                 return 0;
1623         }
1624         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1625                 duprintf("ipt_udp: unknown flags %X\n",
1626                          udpinfo->invflags);
1627                 return 0;
1628         }
1629
1630         return 1;
1631 }
1632
1633 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1634 static inline int
1635 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1636                      u_int8_t type, u_int8_t code,
1637                      int invert)
1638 {
1639         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1640                 ^ invert;
1641 }
1642
1643 static int
1644 icmp_match(const struct sk_buff *skb,
1645            const struct net_device *in,
1646            const struct net_device *out,
1647            const void *matchinfo,
1648            int offset,
1649            int *hotdrop)
1650 {
1651         struct icmphdr _icmph, *ic;
1652         const struct ipt_icmp *icmpinfo = matchinfo;
1653
1654         /* Must not be a fragment. */
1655         if (offset)
1656                 return 0;
1657
1658         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1659                                 sizeof(_icmph), &_icmph);
1660         if (ic == NULL) {
1661                 /* We've been asked to examine this packet, and we
1662                  * can't.  Hence, no choice but to drop.
1663                  */
1664                 duprintf("Dropping evil ICMP tinygram.\n");
1665                 *hotdrop = 1;
1666                 return 0;
1667         }
1668
1669         return icmp_type_code_match(icmpinfo->type,
1670                                     icmpinfo->code[0],
1671                                     icmpinfo->code[1],
1672                                     ic->type, ic->code,
1673                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1674 }
1675
1676 /* Called when user tries to insert an entry of this type. */
1677 static int
1678 icmp_checkentry(const char *tablename,
1679            const struct ipt_ip *ip,
1680            void *matchinfo,
1681            unsigned int matchsize,
1682            unsigned int hook_mask)
1683 {
1684         const struct ipt_icmp *icmpinfo = matchinfo;
1685
1686         /* Must specify proto == ICMP, and no unknown invflags */
1687         return ip->proto == IPPROTO_ICMP
1688                 && !(ip->invflags & IPT_INV_PROTO)
1689                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1690                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1691 }
1692
1693 /* The built-in targets: standard (NULL) and error. */
1694 static struct ipt_target ipt_standard_target = {
1695         .name           = IPT_STANDARD_TARGET,
1696 };
1697
1698 static struct ipt_target ipt_error_target = {
1699         .name           = IPT_ERROR_TARGET,
1700         .target         = ipt_error,
1701 };
1702
1703 static struct nf_sockopt_ops ipt_sockopts = {
1704         .pf             = PF_INET,
1705         .set_optmin     = IPT_BASE_CTL,
1706         .set_optmax     = IPT_SO_SET_MAX+1,
1707         .set            = do_ipt_set_ctl,
1708         .get_optmin     = IPT_BASE_CTL,
1709         .get_optmax     = IPT_SO_GET_MAX+1,
1710         .get            = do_ipt_get_ctl,
1711 };
1712
1713 static struct ipt_match tcp_matchstruct = {
1714         .name           = "tcp",
1715         .match          = &tcp_match,
1716         .checkentry     = &tcp_checkentry,
1717 };
1718
1719 static struct ipt_match udp_matchstruct = {
1720         .name           = "udp",
1721         .match          = &udp_match,
1722         .checkentry     = &udp_checkentry,
1723 };
1724
1725 static struct ipt_match icmp_matchstruct = {
1726         .name           = "icmp",
1727         .match          = &icmp_match,
1728         .checkentry     = &icmp_checkentry,
1729 };
1730
1731 #ifdef CONFIG_PROC_FS
1732 static inline int print_name(const char *i,
1733                              off_t start_offset, char *buffer, int length,
1734                              off_t *pos, unsigned int *count)
1735 {
1736         if ((*count)++ >= start_offset) {
1737                 unsigned int namelen;
1738
1739                 namelen = sprintf(buffer + *pos, "%s\n",
1740                                   i + sizeof(struct list_head));
1741                 if (*pos + namelen > length) {
1742                         /* Stop iterating */
1743                         return 1;
1744                 }
1745                 *pos += namelen;
1746         }
1747         return 0;
1748 }
1749
1750 static inline int print_target(const struct ipt_target *t,
1751                                off_t start_offset, char *buffer, int length,
1752                                off_t *pos, unsigned int *count)
1753 {
1754         if (t == &ipt_standard_target || t == &ipt_error_target)
1755                 return 0;
1756         return print_name((char *)t, start_offset, buffer, length, pos, count);
1757 }
1758
1759 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1760 {
1761         off_t pos = 0;
1762         unsigned int count = 0;
1763
1764         if (down_interruptible(&ipt_mutex) != 0)
1765                 return 0;
1766
1767         LIST_FIND(&ipt_tables, print_name, void *,
1768                   offset, buffer, length, &pos, &count);
1769
1770         up(&ipt_mutex);
1771
1772         /* `start' hack - see fs/proc/generic.c line ~105 */
1773         *start=(char *)((unsigned long)count-offset);
1774         return pos;
1775 }
1776
1777 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1778 {
1779         off_t pos = 0;
1780         unsigned int count = 0;
1781
1782         if (down_interruptible(&ipt_mutex) != 0)
1783                 return 0;
1784
1785         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1786                   offset, buffer, length, &pos, &count);
1787         
1788         up(&ipt_mutex);
1789
1790         *start = (char *)((unsigned long)count - offset);
1791         return pos;
1792 }
1793
1794 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1795 {
1796         off_t pos = 0;
1797         unsigned int count = 0;
1798
1799         if (down_interruptible(&ipt_mutex) != 0)
1800                 return 0;
1801         
1802         LIST_FIND(&ipt_match, print_name, void *,
1803                   offset, buffer, length, &pos, &count);
1804
1805         up(&ipt_mutex);
1806
1807         *start = (char *)((unsigned long)count - offset);
1808         return pos;
1809 }
1810
1811 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1812 { { "ip_tables_names", ipt_get_tables },
1813   { "ip_tables_targets", ipt_get_targets },
1814   { "ip_tables_matches", ipt_get_matches },
1815   { NULL, NULL} };
1816 #endif /*CONFIG_PROC_FS*/
1817
1818 static int __init init(void)
1819 {
1820         int ret;
1821
1822         /* Noone else will be downing sem now, so we won't sleep */
1823         down(&ipt_mutex);
1824         list_append(&ipt_target, &ipt_standard_target);
1825         list_append(&ipt_target, &ipt_error_target);
1826         list_append(&ipt_match, &tcp_matchstruct);
1827         list_append(&ipt_match, &udp_matchstruct);
1828         list_append(&ipt_match, &icmp_matchstruct);
1829         up(&ipt_mutex);
1830
1831         /* Register setsockopt */
1832         ret = nf_register_sockopt(&ipt_sockopts);
1833         if (ret < 0) {
1834                 duprintf("Unable to register sockopts.\n");
1835                 return ret;
1836         }
1837
1838 #ifdef CONFIG_PROC_FS
1839         {
1840         struct proc_dir_entry *proc;
1841         int i;
1842
1843         for (i = 0; ipt_proc_entry[i].name; i++) {
1844                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1845                                        ipt_proc_entry[i].get_info);
1846                 if (!proc) {
1847                         while (--i >= 0)
1848                                 proc_net_remove(ipt_proc_entry[i].name);
1849                         nf_unregister_sockopt(&ipt_sockopts);
1850                         return -ENOMEM;
1851                 }
1852                 proc->owner = THIS_MODULE;
1853         }
1854         }
1855 #endif
1856
1857         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1858         return 0;
1859 }
1860
1861 static void __exit fini(void)
1862 {
1863         nf_unregister_sockopt(&ipt_sockopts);
1864 #ifdef CONFIG_PROC_FS
1865         {
1866         int i;
1867         for (i = 0; ipt_proc_entry[i].name; i++)
1868                 proc_net_remove(ipt_proc_entry[i].name);
1869         }
1870 #endif
1871 }
1872
1873 EXPORT_SYMBOL(ipt_register_table);
1874 EXPORT_SYMBOL(ipt_unregister_table);
1875 EXPORT_SYMBOL(ipt_register_match);
1876 EXPORT_SYMBOL(ipt_unregister_match);
1877 EXPORT_SYMBOL(ipt_do_table);
1878 EXPORT_SYMBOL(ipt_register_target);
1879 EXPORT_SYMBOL(ipt_unregister_target);
1880 EXPORT_SYMBOL(ipt_find_target_lock);
1881
1882 module_init(init);
1883 module_exit(fini);