vserver 2.0 rc7
[linux-2.6.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30
31 #include <linux/netfilter_ipv4/ip_tables.h>
32
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
35 MODULE_DESCRIPTION("IPv4 packet filter");
36
37 /*#define DEBUG_IP_FIREWALL*/
38 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
39 /*#define DEBUG_IP_FIREWALL_USER*/
40
41 #ifdef DEBUG_IP_FIREWALL
42 #define dprintf(format, args...)  printk(format , ## args)
43 #else
44 #define dprintf(format, args...)
45 #endif
46
47 #ifdef DEBUG_IP_FIREWALL_USER
48 #define duprintf(format, args...) printk(format , ## args)
49 #else
50 #define duprintf(format, args...)
51 #endif
52
53 #ifdef CONFIG_NETFILTER_DEBUG
54 #define IP_NF_ASSERT(x)                                         \
55 do {                                                            \
56         if (!(x))                                               \
57                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
58                        __FUNCTION__, __FILE__, __LINE__);       \
59 } while(0)
60 #else
61 #define IP_NF_ASSERT(x)
62 #endif
63 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
64
65 static DECLARE_MUTEX(ipt_mutex);
66
67 /* Must have mutex */
68 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
69 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #include <linux/netfilter_ipv4/lockhelp.h>
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80    We keep a set of rules for each CPU, so we can avoid write-locking
81    them in the softirq when updating the counters and therefore
82    only need to read-lock in the softirq; doing a write_lock_bh() in user
83    context stops packets coming through and allows user context to read
84    the counters or update the rules.
85
86    To be cache friendly on SMP, we arrange them like so:
87    [ n-entries ]
88    ... cache-align padding ...
89    [ n-entries ]
90
91    Hence the start of any table is given by get_table() below.  */
92
93 /* The table itself */
94 struct ipt_table_info
95 {
96         /* Size per table */
97         unsigned int size;
98         /* Number of entries: FIXME. --RR */
99         unsigned int number;
100         /* Initial number of entries. Needed for module usage count */
101         unsigned int initial_entries;
102
103         /* Entry points and underflows */
104         unsigned int hook_entry[NF_IP_NUMHOOKS];
105         unsigned int underflow[NF_IP_NUMHOOKS];
106
107         /* ipt_entry tables: one per CPU */
108         char entries[0] ____cacheline_aligned;
109 };
110
111 static LIST_HEAD(ipt_target);
112 static LIST_HEAD(ipt_match);
113 static LIST_HEAD(ipt_tables);
114 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
115
116 #ifdef CONFIG_SMP
117 #define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
118 #else
119 #define TABLE_OFFSET(t,p) 0
120 #endif
121
122 #if 0
123 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
124 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
125 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
126 #endif
127
128 /* Returns whether matches rule or not. */
129 static inline int
130 ip_packet_match(const struct iphdr *ip,
131                 const char *indev,
132                 const char *outdev,
133                 const struct ipt_ip *ipinfo,
134                 int isfrag)
135 {
136         size_t i;
137         unsigned long ret;
138
139 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
140
141         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
142                   IPT_INV_SRCIP)
143             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
144                      IPT_INV_DSTIP)) {
145                 dprintf("Source or dest mismatch.\n");
146
147                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
148                         NIPQUAD(ip->saddr),
149                         NIPQUAD(ipinfo->smsk.s_addr),
150                         NIPQUAD(ipinfo->src.s_addr),
151                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
152                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
153                         NIPQUAD(ip->daddr),
154                         NIPQUAD(ipinfo->dmsk.s_addr),
155                         NIPQUAD(ipinfo->dst.s_addr),
156                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
157                 return 0;
158         }
159
160         /* Look for ifname matches; this should unroll nicely. */
161         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
162                 ret |= (((const unsigned long *)indev)[i]
163                         ^ ((const unsigned long *)ipinfo->iniface)[i])
164                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
165         }
166
167         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
168                 dprintf("VIA in mismatch (%s vs %s).%s\n",
169                         indev, ipinfo->iniface,
170                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
171                 return 0;
172         }
173
174         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
175                 ret |= (((const unsigned long *)outdev)[i]
176                         ^ ((const unsigned long *)ipinfo->outiface)[i])
177                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
178         }
179
180         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
181                 dprintf("VIA out mismatch (%s vs %s).%s\n",
182                         outdev, ipinfo->outiface,
183                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
184                 return 0;
185         }
186
187         /* Check specific protocol */
188         if (ipinfo->proto
189             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
190                 dprintf("Packet protocol %hi does not match %hi.%s\n",
191                         ip->protocol, ipinfo->proto,
192                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
193                 return 0;
194         }
195
196         /* If we have a fragment rule but the packet is not a fragment
197          * then we return zero */
198         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
199                 dprintf("Fragment rule but not fragment.%s\n",
200                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
201                 return 0;
202         }
203
204         return 1;
205 }
206
207 static inline int
208 ip_checkentry(const struct ipt_ip *ip)
209 {
210         if (ip->flags & ~IPT_F_MASK) {
211                 duprintf("Unknown flag bits set: %08X\n",
212                          ip->flags & ~IPT_F_MASK);
213                 return 0;
214         }
215         if (ip->invflags & ~IPT_INV_MASK) {
216                 duprintf("Unknown invflag bits set: %08X\n",
217                          ip->invflags & ~IPT_INV_MASK);
218                 return 0;
219         }
220         return 1;
221 }
222
223 static unsigned int
224 ipt_error(struct sk_buff **pskb,
225           const struct net_device *in,
226           const struct net_device *out,
227           unsigned int hooknum,
228           const void *targinfo,
229           void *userinfo)
230 {
231         if (net_ratelimit())
232                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
233
234         return NF_DROP;
235 }
236
237 static inline
238 int do_match(struct ipt_entry_match *m,
239              const struct sk_buff *skb,
240              const struct net_device *in,
241              const struct net_device *out,
242              int offset,
243              int *hotdrop)
244 {
245         /* Stop iteration if it doesn't match */
246         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
247                 return 1;
248         else
249                 return 0;
250 }
251
252 static inline struct ipt_entry *
253 get_entry(void *base, unsigned int offset)
254 {
255         return (struct ipt_entry *)(base + offset);
256 }
257
258 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
259 unsigned int
260 ipt_do_table(struct sk_buff **pskb,
261              unsigned int hook,
262              const struct net_device *in,
263              const struct net_device *out,
264              struct ipt_table *table,
265              void *userdata)
266 {
267         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
268         u_int16_t offset;
269         struct iphdr *ip;
270         u_int16_t datalen;
271         int hotdrop = 0;
272         /* Initializing verdict to NF_DROP keeps gcc happy. */
273         unsigned int verdict = NF_DROP;
274         const char *indev, *outdev;
275         void *table_base;
276         struct ipt_entry *e, *back;
277
278         /* Initialization */
279         ip = (*pskb)->nh.iph;
280         datalen = (*pskb)->len - ip->ihl * 4;
281         indev = in ? in->name : nulldevname;
282         outdev = out ? out->name : nulldevname;
283         /* We handle fragments by dealing with the first fragment as
284          * if it was a normal packet.  All other fragments are treated
285          * normally, except that they will NEVER match rules that ask
286          * things we don't know, ie. tcp syn flag or ports).  If the
287          * rule is also a fragment-specific rule, non-fragments won't
288          * match it. */
289         offset = ntohs(ip->frag_off) & IP_OFFSET;
290
291         read_lock_bh(&table->lock);
292         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
293         table_base = (void *)table->private->entries
294                 + TABLE_OFFSET(table->private, smp_processor_id());
295         e = get_entry(table_base, table->private->hook_entry[hook]);
296
297 #ifdef CONFIG_NETFILTER_DEBUG
298         /* Check noone else using our table */
299         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
300             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
301                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
302                        smp_processor_id(),
303                        table->name,
304                        &((struct ipt_entry *)table_base)->comefrom,
305                        ((struct ipt_entry *)table_base)->comefrom);
306         }
307         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
308 #endif
309
310         /* For return from builtin chain */
311         back = get_entry(table_base, table->private->underflow[hook]);
312
313         do {
314                 IP_NF_ASSERT(e);
315                 IP_NF_ASSERT(back);
316                 (*pskb)->nfcache |= e->nfcache;
317                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
318                         struct ipt_entry_target *t;
319
320                         if (IPT_MATCH_ITERATE(e, do_match,
321                                               *pskb, in, out,
322                                               offset, &hotdrop) != 0)
323                                 goto no_match;
324
325                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
326
327                         t = ipt_get_target(e);
328                         IP_NF_ASSERT(t->u.kernel.target);
329                         /* Standard target? */
330                         if (!t->u.kernel.target->target) {
331                                 int v;
332
333                                 v = ((struct ipt_standard_target *)t)->verdict;
334                                 if (v < 0) {
335                                         /* Pop from stack? */
336                                         if (v != IPT_RETURN) {
337                                                 verdict = (unsigned)(-v) - 1;
338                                                 break;
339                                         }
340                                         e = back;
341                                         back = get_entry(table_base,
342                                                          back->comefrom);
343                                         continue;
344                                 }
345                                 if (table_base + v
346                                     != (void *)e + e->next_offset) {
347                                         /* Save old back ptr in next entry */
348                                         struct ipt_entry *next
349                                                 = (void *)e + e->next_offset;
350                                         next->comefrom
351                                                 = (void *)back - table_base;
352                                         /* set back pointer to next entry */
353                                         back = next;
354                                 }
355
356                                 e = get_entry(table_base, v);
357                         } else {
358                                 /* Targets which reenter must return
359                                    abs. verdicts */
360 #ifdef CONFIG_NETFILTER_DEBUG
361                                 ((struct ipt_entry *)table_base)->comefrom
362                                         = 0xeeeeeeec;
363 #endif
364                                 verdict = t->u.kernel.target->target(pskb,
365                                                                      in, out,
366                                                                      hook,
367                                                                      t->data,
368                                                                      userdata);
369
370 #ifdef CONFIG_NETFILTER_DEBUG
371                                 if (((struct ipt_entry *)table_base)->comefrom
372                                     != 0xeeeeeeec
373                                     && verdict == IPT_CONTINUE) {
374                                         printk("Target %s reentered!\n",
375                                                t->u.kernel.target->name);
376                                         verdict = NF_DROP;
377                                 }
378                                 ((struct ipt_entry *)table_base)->comefrom
379                                         = 0x57acc001;
380 #endif
381                                 /* Target might have changed stuff. */
382                                 ip = (*pskb)->nh.iph;
383                                 datalen = (*pskb)->len - ip->ihl * 4;
384
385                                 if (verdict == IPT_CONTINUE)
386                                         e = (void *)e + e->next_offset;
387                                 else
388                                         /* Verdict */
389                                         break;
390                         }
391                 } else {
392
393                 no_match:
394                         e = (void *)e + e->next_offset;
395                 }
396         } while (!hotdrop);
397
398 #ifdef CONFIG_NETFILTER_DEBUG
399         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
400 #endif
401         read_unlock_bh(&table->lock);
402
403 #ifdef DEBUG_ALLOW_ALL
404         return NF_ACCEPT;
405 #else
406         if (hotdrop)
407                 return NF_DROP;
408         else return verdict;
409 #endif
410 }
411
412 /*
413  * These are weird, but module loading must not be done with mutex
414  * held (since they will register), and we have to have a single
415  * function to use try_then_request_module().
416  */
417
418 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
419 static inline struct ipt_table *find_table_lock(const char *name)
420 {
421         struct ipt_table *t;
422
423         if (down_interruptible(&ipt_mutex) != 0)
424                 return ERR_PTR(-EINTR);
425
426         list_for_each_entry(t, &ipt_tables, list)
427                 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
428                         return t;
429         up(&ipt_mutex);
430         return NULL;
431 }
432
433 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
434 static inline struct ipt_match *find_match(const char *name, u8 revision)
435 {
436         struct ipt_match *m;
437         int err = 0;
438
439         if (down_interruptible(&ipt_mutex) != 0)
440                 return ERR_PTR(-EINTR);
441
442         list_for_each_entry(m, &ipt_match, list) {
443                 if (strcmp(m->name, name) == 0) {
444                         if (m->revision == revision) {
445                                 if (try_module_get(m->me)) {
446                                         up(&ipt_mutex);
447                                         return m;
448                                 }
449                         } else
450                                 err = -EPROTOTYPE; /* Found something. */
451                 }
452         }
453         up(&ipt_mutex);
454         return ERR_PTR(err);
455 }
456
457 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
458 static inline struct ipt_target *find_target(const char *name, u8 revision)
459 {
460         struct ipt_target *t;
461         int err = 0;
462
463         if (down_interruptible(&ipt_mutex) != 0)
464                 return ERR_PTR(-EINTR);
465
466         list_for_each_entry(t, &ipt_target, list) {
467                 if (strcmp(t->name, name) == 0) {
468                         if (t->revision == revision) {
469                                 if (try_module_get(t->me)) {
470                                         up(&ipt_mutex);
471                                         return t;
472                                 }
473                         } else
474                                 err = -EPROTOTYPE; /* Found something. */
475                 }
476         }
477         up(&ipt_mutex);
478         return ERR_PTR(err);
479 }
480
481 struct ipt_target *ipt_find_target(const char *name, u8 revision)
482 {
483         struct ipt_target *target;
484
485         target = try_then_request_module(find_target(name, revision),
486                                          "ipt_%s", name);
487         if (IS_ERR(target) || !target)
488                 return NULL;
489         return target;
490 }
491
492 static int match_revfn(const char *name, u8 revision, int *bestp)
493 {
494         struct ipt_match *m;
495         int have_rev = 0;
496
497         list_for_each_entry(m, &ipt_match, list) {
498                 if (strcmp(m->name, name) == 0) {
499                         if (m->revision > *bestp)
500                                 *bestp = m->revision;
501                         if (m->revision == revision)
502                                 have_rev = 1;
503                 }
504         }
505         return have_rev;
506 }
507
508 static int target_revfn(const char *name, u8 revision, int *bestp)
509 {
510         struct ipt_target *t;
511         int have_rev = 0;
512
513         list_for_each_entry(t, &ipt_target, list) {
514                 if (strcmp(t->name, name) == 0) {
515                         if (t->revision > *bestp)
516                                 *bestp = t->revision;
517                         if (t->revision == revision)
518                                 have_rev = 1;
519                 }
520         }
521         return have_rev;
522 }
523
524 /* Returns true or false (if no such extension at all) */
525 static inline int find_revision(const char *name, u8 revision,
526                                 int (*revfn)(const char *, u8, int *),
527                                 int *err)
528 {
529         int have_rev, best = -1;
530
531         if (down_interruptible(&ipt_mutex) != 0) {
532                 *err = -EINTR;
533                 return 1;
534         }
535         have_rev = revfn(name, revision, &best);
536         up(&ipt_mutex);
537
538         /* Nothing at all?  Return 0 to try loading module. */
539         if (best == -1) {
540                 *err = -ENOENT;
541                 return 0;
542         }
543
544         *err = best;
545         if (!have_rev)
546                 *err = -EPROTONOSUPPORT;
547         return 1;
548 }
549
550
551 /* All zeroes == unconditional rule. */
552 static inline int
553 unconditional(const struct ipt_ip *ip)
554 {
555         unsigned int i;
556
557         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
558                 if (((__u32 *)ip)[i])
559                         return 0;
560
561         return 1;
562 }
563
564 /* Figures out from what hook each rule can be called: returns 0 if
565    there are loops.  Puts hook bitmask in comefrom. */
566 static int
567 mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks)
568 {
569         unsigned int hook;
570
571         /* No recursion; use packet counter to save back ptrs (reset
572            to 0 as we leave), and comefrom to save source hook bitmask */
573         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
574                 unsigned int pos = newinfo->hook_entry[hook];
575                 struct ipt_entry *e
576                         = (struct ipt_entry *)(newinfo->entries + pos);
577
578                 if (!(valid_hooks & (1 << hook)))
579                         continue;
580
581                 /* Set initial back pointer. */
582                 e->counters.pcnt = pos;
583
584                 for (;;) {
585                         struct ipt_standard_target *t
586                                 = (void *)ipt_get_target(e);
587
588                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
589                                 printk("iptables: loop hook %u pos %u %08X.\n",
590                                        hook, pos, e->comefrom);
591                                 return 0;
592                         }
593                         e->comefrom
594                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
595
596                         /* Unconditional return/END. */
597                         if (e->target_offset == sizeof(struct ipt_entry)
598                             && (strcmp(t->target.u.user.name,
599                                        IPT_STANDARD_TARGET) == 0)
600                             && t->verdict < 0
601                             && unconditional(&e->ip)) {
602                                 unsigned int oldpos, size;
603
604                                 /* Return: backtrack through the last
605                                    big jump. */
606                                 do {
607                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
608 #ifdef DEBUG_IP_FIREWALL_USER
609                                         if (e->comefrom
610                                             & (1 << NF_IP_NUMHOOKS)) {
611                                                 duprintf("Back unset "
612                                                          "on hook %u "
613                                                          "rule %u\n",
614                                                          hook, pos);
615                                         }
616 #endif
617                                         oldpos = pos;
618                                         pos = e->counters.pcnt;
619                                         e->counters.pcnt = 0;
620
621                                         /* We're at the start. */
622                                         if (pos == oldpos)
623                                                 goto next;
624
625                                         e = (struct ipt_entry *)
626                                                 (newinfo->entries + pos);
627                                 } while (oldpos == pos + e->next_offset);
628
629                                 /* Move along one */
630                                 size = e->next_offset;
631                                 e = (struct ipt_entry *)
632                                         (newinfo->entries + pos + size);
633                                 e->counters.pcnt = pos;
634                                 pos += size;
635                         } else {
636                                 int newpos = t->verdict;
637
638                                 if (strcmp(t->target.u.user.name,
639                                            IPT_STANDARD_TARGET) == 0
640                                     && newpos >= 0) {
641                                         /* This a jump; chase it. */
642                                         duprintf("Jump rule %u -> %u\n",
643                                                  pos, newpos);
644                                 } else {
645                                         /* ... this is a fallthru */
646                                         newpos = pos + e->next_offset;
647                                 }
648                                 e = (struct ipt_entry *)
649                                         (newinfo->entries + newpos);
650                                 e->counters.pcnt = pos;
651                                 pos = newpos;
652                         }
653                 }
654                 next:
655                 duprintf("Finished chain %u\n", hook);
656         }
657         return 1;
658 }
659
660 static inline int
661 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
662 {
663         if (i && (*i)-- == 0)
664                 return 1;
665
666         if (m->u.kernel.match->destroy)
667                 m->u.kernel.match->destroy(m->data,
668                                            m->u.match_size - sizeof(*m));
669         module_put(m->u.kernel.match->me);
670         return 0;
671 }
672
673 static inline int
674 standard_check(const struct ipt_entry_target *t,
675                unsigned int max_offset)
676 {
677         struct ipt_standard_target *targ = (void *)t;
678
679         /* Check standard info. */
680         if (t->u.target_size
681             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
682                 duprintf("standard_check: target size %u != %u\n",
683                          t->u.target_size,
684                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
685                 return 0;
686         }
687
688         if (targ->verdict >= 0
689             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
690                 duprintf("ipt_standard_check: bad verdict (%i)\n",
691                          targ->verdict);
692                 return 0;
693         }
694
695         if (targ->verdict < -NF_MAX_VERDICT - 1) {
696                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
697                          targ->verdict);
698                 return 0;
699         }
700         return 1;
701 }
702
703 static inline int
704 check_match(struct ipt_entry_match *m,
705             const char *name,
706             const struct ipt_ip *ip,
707             unsigned int hookmask,
708             unsigned int *i)
709 {
710         struct ipt_match *match;
711
712         match = try_then_request_module(find_match(m->u.user.name,
713                                                    m->u.user.revision),
714                                         "ipt_%s", m->u.user.name);
715         if (IS_ERR(match) || !match) {
716                 duprintf("check_match: `%s' not found\n", m->u.user.name);
717                 return match ? PTR_ERR(match) : -ENOENT;
718         }
719         m->u.kernel.match = match;
720
721         if (m->u.kernel.match->checkentry
722             && !m->u.kernel.match->checkentry(name, ip, m->data,
723                                               m->u.match_size - sizeof(*m),
724                                               hookmask)) {
725                 module_put(m->u.kernel.match->me);
726                 duprintf("ip_tables: check failed for `%s'.\n",
727                          m->u.kernel.match->name);
728                 return -EINVAL;
729         }
730
731         (*i)++;
732         return 0;
733 }
734
735 static struct ipt_target ipt_standard_target;
736
737 static inline int
738 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
739             unsigned int *i)
740 {
741         struct ipt_entry_target *t;
742         struct ipt_target *target;
743         int ret;
744         unsigned int j;
745
746         if (!ip_checkentry(&e->ip)) {
747                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
748                 return -EINVAL;
749         }
750
751         j = 0;
752         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
753         if (ret != 0)
754                 goto cleanup_matches;
755
756         t = ipt_get_target(e);
757         target = try_then_request_module(find_target(t->u.user.name,
758                                                      t->u.user.revision),
759                                          "ipt_%s", t->u.user.name);
760         if (IS_ERR(target) || !target) {
761                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
762                 ret = target ? PTR_ERR(target) : -ENOENT;
763                 goto cleanup_matches;
764         }
765         t->u.kernel.target = target;
766
767         if (t->u.kernel.target == &ipt_standard_target) {
768                 if (!standard_check(t, size)) {
769                         ret = -EINVAL;
770                         goto cleanup_matches;
771                 }
772         } else if (t->u.kernel.target->checkentry
773                    && !t->u.kernel.target->checkentry(name, e, t->data,
774                                                       t->u.target_size
775                                                       - sizeof(*t),
776                                                       e->comefrom)) {
777                 module_put(t->u.kernel.target->me);
778                 duprintf("ip_tables: check failed for `%s'.\n",
779                          t->u.kernel.target->name);
780                 ret = -EINVAL;
781                 goto cleanup_matches;
782         }
783
784         (*i)++;
785         return 0;
786
787  cleanup_matches:
788         IPT_MATCH_ITERATE(e, cleanup_match, &j);
789         return ret;
790 }
791
792 static inline int
793 check_entry_size_and_hooks(struct ipt_entry *e,
794                            struct ipt_table_info *newinfo,
795                            unsigned char *base,
796                            unsigned char *limit,
797                            const unsigned int *hook_entries,
798                            const unsigned int *underflows,
799                            unsigned int *i)
800 {
801         unsigned int h;
802
803         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
804             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
805                 duprintf("Bad offset %p\n", e);
806                 return -EINVAL;
807         }
808
809         if (e->next_offset
810             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
811                 duprintf("checking: element %p size %u\n",
812                          e, e->next_offset);
813                 return -EINVAL;
814         }
815
816         /* Check hooks & underflows */
817         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
818                 if ((unsigned char *)e - base == hook_entries[h])
819                         newinfo->hook_entry[h] = hook_entries[h];
820                 if ((unsigned char *)e - base == underflows[h])
821                         newinfo->underflow[h] = underflows[h];
822         }
823
824         /* FIXME: underflows must be unconditional, standard verdicts
825            < 0 (not IPT_RETURN). --RR */
826
827         /* Clear counters and comefrom */
828         e->counters = ((struct ipt_counters) { 0, 0 });
829         e->comefrom = 0;
830
831         (*i)++;
832         return 0;
833 }
834
835 static inline int
836 cleanup_entry(struct ipt_entry *e, unsigned int *i)
837 {
838         struct ipt_entry_target *t;
839
840         if (i && (*i)-- == 0)
841                 return 1;
842
843         /* Cleanup all matches */
844         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
845         t = ipt_get_target(e);
846         if (t->u.kernel.target->destroy)
847                 t->u.kernel.target->destroy(t->data,
848                                             t->u.target_size - sizeof(*t));
849         module_put(t->u.kernel.target->me);
850         return 0;
851 }
852
853 /* Checks and translates the user-supplied table segment (held in
854    newinfo) */
855 static int
856 translate_table(const char *name,
857                 unsigned int valid_hooks,
858                 struct ipt_table_info *newinfo,
859                 unsigned int size,
860                 unsigned int number,
861                 const unsigned int *hook_entries,
862                 const unsigned int *underflows)
863 {
864         unsigned int i;
865         int ret;
866
867         newinfo->size = size;
868         newinfo->number = number;
869
870         /* Init all hooks to impossible value. */
871         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
872                 newinfo->hook_entry[i] = 0xFFFFFFFF;
873                 newinfo->underflow[i] = 0xFFFFFFFF;
874         }
875
876         duprintf("translate_table: size %u\n", newinfo->size);
877         i = 0;
878         /* Walk through entries, checking offsets. */
879         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
880                                 check_entry_size_and_hooks,
881                                 newinfo,
882                                 newinfo->entries,
883                                 newinfo->entries + size,
884                                 hook_entries, underflows, &i);
885         if (ret != 0)
886                 return ret;
887
888         if (i != number) {
889                 duprintf("translate_table: %u not %u entries\n",
890                          i, number);
891                 return -EINVAL;
892         }
893
894         /* Check hooks all assigned */
895         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
896                 /* Only hooks which are valid */
897                 if (!(valid_hooks & (1 << i)))
898                         continue;
899                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
900                         duprintf("Invalid hook entry %u %u\n",
901                                  i, hook_entries[i]);
902                         return -EINVAL;
903                 }
904                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
905                         duprintf("Invalid underflow %u %u\n",
906                                  i, underflows[i]);
907                         return -EINVAL;
908                 }
909         }
910
911         if (!mark_source_chains(newinfo, valid_hooks))
912                 return -ELOOP;
913
914         /* Finally, each sanity check must pass */
915         i = 0;
916         ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
917                                 check_entry, name, size, &i);
918
919         if (ret != 0) {
920                 IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
921                                   cleanup_entry, &i);
922                 return ret;
923         }
924
925         /* And one copy for every other CPU */
926         for (i = 1; i < num_possible_cpus(); i++) {
927                 memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
928                        newinfo->entries,
929                        SMP_ALIGN(newinfo->size));
930         }
931
932         return ret;
933 }
934
935 static struct ipt_table_info *
936 replace_table(struct ipt_table *table,
937               unsigned int num_counters,
938               struct ipt_table_info *newinfo,
939               int *error)
940 {
941         struct ipt_table_info *oldinfo;
942
943 #ifdef CONFIG_NETFILTER_DEBUG
944         {
945                 struct ipt_entry *table_base;
946                 unsigned int i;
947
948                 for (i = 0; i < num_possible_cpus(); i++) {
949                         table_base =
950                                 (void *)newinfo->entries
951                                 + TABLE_OFFSET(newinfo, i);
952
953                         table_base->comefrom = 0xdead57ac;
954                 }
955         }
956 #endif
957
958         /* Do the substitution. */
959         write_lock_bh(&table->lock);
960         /* Check inside lock: is the old number correct? */
961         if (num_counters != table->private->number) {
962                 duprintf("num_counters != table->private->number (%u/%u)\n",
963                          num_counters, table->private->number);
964                 write_unlock_bh(&table->lock);
965                 *error = -EAGAIN;
966                 return NULL;
967         }
968         oldinfo = table->private;
969         table->private = newinfo;
970         newinfo->initial_entries = oldinfo->initial_entries;
971         write_unlock_bh(&table->lock);
972
973         return oldinfo;
974 }
975
976 /* Gets counters. */
977 static inline int
978 add_entry_to_counter(const struct ipt_entry *e,
979                      struct ipt_counters total[],
980                      unsigned int *i)
981 {
982         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
983
984         (*i)++;
985         return 0;
986 }
987
988 static void
989 get_counters(const struct ipt_table_info *t,
990              struct ipt_counters counters[])
991 {
992         unsigned int cpu;
993         unsigned int i;
994
995         for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
996                 i = 0;
997                 IPT_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
998                                   t->size,
999                                   add_entry_to_counter,
1000                                   counters,
1001                                   &i);
1002         }
1003 }
1004
1005 static int
1006 copy_entries_to_user(unsigned int total_size,
1007                      struct ipt_table *table,
1008                      void __user *userptr)
1009 {
1010         unsigned int off, num, countersize;
1011         struct ipt_entry *e;
1012         struct ipt_counters *counters;
1013         int ret = 0;
1014
1015         /* We need atomic snapshot of counters: rest doesn't change
1016            (other than comefrom, which userspace doesn't care
1017            about). */
1018         countersize = sizeof(struct ipt_counters) * table->private->number;
1019         counters = vmalloc(countersize);
1020
1021         if (counters == NULL)
1022                 return -ENOMEM;
1023
1024         /* First, sum counters... */
1025         memset(counters, 0, countersize);
1026         write_lock_bh(&table->lock);
1027         get_counters(table->private, counters);
1028         write_unlock_bh(&table->lock);
1029
1030         /* ... then copy entire thing from CPU 0... */
1031         if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
1032                 ret = -EFAULT;
1033                 goto free_counters;
1034         }
1035
1036         /* FIXME: use iterator macros --RR */
1037         /* ... then go back and fix counters and names */
1038         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1039                 unsigned int i;
1040                 struct ipt_entry_match *m;
1041                 struct ipt_entry_target *t;
1042
1043                 e = (struct ipt_entry *)(table->private->entries + off);
1044                 if (copy_to_user(userptr + off
1045                                  + offsetof(struct ipt_entry, counters),
1046                                  &counters[num],
1047                                  sizeof(counters[num])) != 0) {
1048                         ret = -EFAULT;
1049                         goto free_counters;
1050                 }
1051
1052                 for (i = sizeof(struct ipt_entry);
1053                      i < e->target_offset;
1054                      i += m->u.match_size) {
1055                         m = (void *)e + i;
1056
1057                         if (copy_to_user(userptr + off + i
1058                                          + offsetof(struct ipt_entry_match,
1059                                                     u.user.name),
1060                                          m->u.kernel.match->name,
1061                                          strlen(m->u.kernel.match->name)+1)
1062                             != 0) {
1063                                 ret = -EFAULT;
1064                                 goto free_counters;
1065                         }
1066                 }
1067
1068                 t = ipt_get_target(e);
1069                 if (copy_to_user(userptr + off + e->target_offset
1070                                  + offsetof(struct ipt_entry_target,
1071                                             u.user.name),
1072                                  t->u.kernel.target->name,
1073                                  strlen(t->u.kernel.target->name)+1) != 0) {
1074                         ret = -EFAULT;
1075                         goto free_counters;
1076                 }
1077         }
1078
1079  free_counters:
1080         vfree(counters);
1081         return ret;
1082 }
1083
1084 static int
1085 get_entries(const struct ipt_get_entries *entries,
1086             struct ipt_get_entries __user *uptr)
1087 {
1088         int ret;
1089         struct ipt_table *t;
1090
1091         t = find_table_lock(entries->name);
1092         if (t && !IS_ERR(t)) {
1093                 duprintf("t->private->number = %u\n",
1094                          t->private->number);
1095                 if (entries->size == t->private->size)
1096                         ret = copy_entries_to_user(t->private->size,
1097                                                    t, uptr->entrytable);
1098                 else {
1099                         duprintf("get_entries: I've got %u not %u!\n",
1100                                  t->private->size,
1101                                  entries->size);
1102                         ret = -EINVAL;
1103                 }
1104                 module_put(t->me);
1105                 up(&ipt_mutex);
1106         } else
1107                 ret = t ? PTR_ERR(t) : -ENOENT;
1108
1109         return ret;
1110 }
1111
1112 static int
1113 do_replace(void __user *user, unsigned int len)
1114 {
1115         int ret;
1116         struct ipt_replace tmp;
1117         struct ipt_table *t;
1118         struct ipt_table_info *newinfo, *oldinfo;
1119         struct ipt_counters *counters;
1120
1121         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1122                 return -EFAULT;
1123
1124         /* Hack: Causes ipchains to give correct error msg --RR */
1125         if (len != sizeof(tmp) + tmp.size)
1126                 return -ENOPROTOOPT;
1127
1128         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1129         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1130                 return -ENOMEM;
1131
1132         newinfo = vmalloc(sizeof(struct ipt_table_info)
1133                           + SMP_ALIGN(tmp.size) * num_possible_cpus());
1134         if (!newinfo)
1135                 return -ENOMEM;
1136
1137         if (copy_from_user(newinfo->entries, user + sizeof(tmp),
1138                            tmp.size) != 0) {
1139                 ret = -EFAULT;
1140                 goto free_newinfo;
1141         }
1142
1143         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1144         if (!counters) {
1145                 ret = -ENOMEM;
1146                 goto free_newinfo;
1147         }
1148         memset(counters, 0, tmp.num_counters * sizeof(struct ipt_counters));
1149
1150         ret = translate_table(tmp.name, tmp.valid_hooks,
1151                               newinfo, tmp.size, tmp.num_entries,
1152                               tmp.hook_entry, tmp.underflow);
1153         if (ret != 0)
1154                 goto free_newinfo_counters;
1155
1156         duprintf("ip_tables: Translated table\n");
1157
1158         t = try_then_request_module(find_table_lock(tmp.name),
1159                                     "iptable_%s", tmp.name);
1160         if (!t || IS_ERR(t)) {
1161                 ret = t ? PTR_ERR(t) : -ENOENT;
1162                 goto free_newinfo_counters_untrans;
1163         }
1164
1165         /* You lied! */
1166         if (tmp.valid_hooks != t->valid_hooks) {
1167                 duprintf("Valid hook crap: %08X vs %08X\n",
1168                          tmp.valid_hooks, t->valid_hooks);
1169                 ret = -EINVAL;
1170                 goto put_module;
1171         }
1172
1173         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1174         if (!oldinfo)
1175                 goto put_module;
1176
1177         /* Update module usage count based on number of rules */
1178         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1179                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1180         if ((oldinfo->number > oldinfo->initial_entries) || 
1181             (newinfo->number <= oldinfo->initial_entries)) 
1182                 module_put(t->me);
1183         if ((oldinfo->number > oldinfo->initial_entries) &&
1184             (newinfo->number <= oldinfo->initial_entries))
1185                 module_put(t->me);
1186
1187         /* Get the old counters. */
1188         get_counters(oldinfo, counters);
1189         /* Decrease module usage counts and free resource */
1190         IPT_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
1191         vfree(oldinfo);
1192         if (copy_to_user(tmp.counters, counters,
1193                          sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1194                 ret = -EFAULT;
1195         vfree(counters);
1196         up(&ipt_mutex);
1197         return ret;
1198
1199  put_module:
1200         module_put(t->me);
1201         up(&ipt_mutex);
1202  free_newinfo_counters_untrans:
1203         IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
1204  free_newinfo_counters:
1205         vfree(counters);
1206  free_newinfo:
1207         vfree(newinfo);
1208         return ret;
1209 }
1210
1211 /* We're lazy, and add to the first CPU; overflow works its fey magic
1212  * and everything is OK. */
1213 static inline int
1214 add_counter_to_entry(struct ipt_entry *e,
1215                      const struct ipt_counters addme[],
1216                      unsigned int *i)
1217 {
1218 #if 0
1219         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1220                  *i,
1221                  (long unsigned int)e->counters.pcnt,
1222                  (long unsigned int)e->counters.bcnt,
1223                  (long unsigned int)addme[*i].pcnt,
1224                  (long unsigned int)addme[*i].bcnt);
1225 #endif
1226
1227         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1228
1229         (*i)++;
1230         return 0;
1231 }
1232
1233 static int
1234 do_add_counters(void __user *user, unsigned int len)
1235 {
1236         unsigned int i;
1237         struct ipt_counters_info tmp, *paddc;
1238         struct ipt_table *t;
1239         int ret = 0;
1240
1241         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1242                 return -EFAULT;
1243
1244         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1245                 return -EINVAL;
1246
1247         paddc = vmalloc(len);
1248         if (!paddc)
1249                 return -ENOMEM;
1250
1251         if (copy_from_user(paddc, user, len) != 0) {
1252                 ret = -EFAULT;
1253                 goto free;
1254         }
1255
1256         t = find_table_lock(tmp.name);
1257         if (!t || IS_ERR(t)) {
1258                 ret = t ? PTR_ERR(t) : -ENOENT;
1259                 goto free;
1260         }
1261
1262         write_lock_bh(&t->lock);
1263         if (t->private->number != paddc->num_counters) {
1264                 ret = -EINVAL;
1265                 goto unlock_up_free;
1266         }
1267
1268         i = 0;
1269         IPT_ENTRY_ITERATE(t->private->entries,
1270                           t->private->size,
1271                           add_counter_to_entry,
1272                           paddc->counters,
1273                           &i);
1274  unlock_up_free:
1275         write_unlock_bh(&t->lock);
1276         up(&ipt_mutex);
1277         module_put(t->me);
1278  free:
1279         vfree(paddc);
1280
1281         return ret;
1282 }
1283
1284 static int
1285 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1286 {
1287         int ret;
1288
1289         if (!capable(CAP_NET_ADMIN))
1290                 return -EPERM;
1291
1292         switch (cmd) {
1293         case IPT_SO_SET_REPLACE:
1294                 ret = do_replace(user, len);
1295                 break;
1296
1297         case IPT_SO_SET_ADD_COUNTERS:
1298                 ret = do_add_counters(user, len);
1299                 break;
1300
1301         default:
1302                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1303                 ret = -EINVAL;
1304         }
1305
1306         return ret;
1307 }
1308
1309 static int
1310 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1311 {
1312         int ret;
1313
1314         if (!capable(CAP_NET_ADMIN))
1315                 return -EPERM;
1316
1317         switch (cmd) {
1318         case IPT_SO_GET_INFO: {
1319                 char name[IPT_TABLE_MAXNAMELEN];
1320                 struct ipt_table *t;
1321
1322                 if (*len != sizeof(struct ipt_getinfo)) {
1323                         duprintf("length %u != %u\n", *len,
1324                                  sizeof(struct ipt_getinfo));
1325                         ret = -EINVAL;
1326                         break;
1327                 }
1328
1329                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1330                         ret = -EFAULT;
1331                         break;
1332                 }
1333                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1334
1335                 t = try_then_request_module(find_table_lock(name),
1336                                             "iptable_%s", name);
1337                 if (t && !IS_ERR(t)) {
1338                         struct ipt_getinfo info;
1339
1340                         info.valid_hooks = t->valid_hooks;
1341                         memcpy(info.hook_entry, t->private->hook_entry,
1342                                sizeof(info.hook_entry));
1343                         memcpy(info.underflow, t->private->underflow,
1344                                sizeof(info.underflow));
1345                         info.num_entries = t->private->number;
1346                         info.size = t->private->size;
1347                         memcpy(info.name, name, sizeof(info.name));
1348
1349                         if (copy_to_user(user, &info, *len) != 0)
1350                                 ret = -EFAULT;
1351                         else
1352                                 ret = 0;
1353                         up(&ipt_mutex);
1354                         module_put(t->me);
1355                 } else
1356                         ret = t ? PTR_ERR(t) : -ENOENT;
1357         }
1358         break;
1359
1360         case IPT_SO_GET_ENTRIES: {
1361                 struct ipt_get_entries get;
1362
1363                 if (*len < sizeof(get)) {
1364                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1365                         ret = -EINVAL;
1366                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1367                         ret = -EFAULT;
1368                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1369                         duprintf("get_entries: %u != %u\n", *len,
1370                                  sizeof(struct ipt_get_entries) + get.size);
1371                         ret = -EINVAL;
1372                 } else
1373                         ret = get_entries(&get, user);
1374                 break;
1375         }
1376
1377         case IPT_SO_GET_REVISION_MATCH:
1378         case IPT_SO_GET_REVISION_TARGET: {
1379                 struct ipt_get_revision rev;
1380                 int (*revfn)(const char *, u8, int *);
1381
1382                 if (*len != sizeof(rev)) {
1383                         ret = -EINVAL;
1384                         break;
1385                 }
1386                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1387                         ret = -EFAULT;
1388                         break;
1389                 }
1390
1391                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1392                         revfn = target_revfn;
1393                 else
1394                         revfn = match_revfn;
1395
1396                 try_then_request_module(find_revision(rev.name, rev.revision,
1397                                                       revfn, &ret),
1398                                         "ipt_%s", rev.name);
1399                 break;
1400         }
1401
1402         default:
1403                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1404                 ret = -EINVAL;
1405         }
1406
1407         return ret;
1408 }
1409
1410 /* Registration hooks for targets. */
1411 int
1412 ipt_register_target(struct ipt_target *target)
1413 {
1414         int ret;
1415
1416         ret = down_interruptible(&ipt_mutex);
1417         if (ret != 0)
1418                 return ret;
1419         list_add(&target->list, &ipt_target);
1420         up(&ipt_mutex);
1421         return ret;
1422 }
1423
1424 void
1425 ipt_unregister_target(struct ipt_target *target)
1426 {
1427         down(&ipt_mutex);
1428         LIST_DELETE(&ipt_target, target);
1429         up(&ipt_mutex);
1430 }
1431
1432 int
1433 ipt_register_match(struct ipt_match *match)
1434 {
1435         int ret;
1436
1437         ret = down_interruptible(&ipt_mutex);
1438         if (ret != 0)
1439                 return ret;
1440
1441         list_add(&match->list, &ipt_match);
1442         up(&ipt_mutex);
1443
1444         return ret;
1445 }
1446
1447 void
1448 ipt_unregister_match(struct ipt_match *match)
1449 {
1450         down(&ipt_mutex);
1451         LIST_DELETE(&ipt_match, match);
1452         up(&ipt_mutex);
1453 }
1454
1455 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1456 {
1457         int ret;
1458         struct ipt_table_info *newinfo;
1459         static struct ipt_table_info bootstrap
1460                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1461
1462         newinfo = vmalloc(sizeof(struct ipt_table_info)
1463                           + SMP_ALIGN(repl->size) * num_possible_cpus());
1464         if (!newinfo)
1465                 return -ENOMEM;
1466
1467         memcpy(newinfo->entries, repl->entries, repl->size);
1468
1469         ret = translate_table(table->name, table->valid_hooks,
1470                               newinfo, repl->size,
1471                               repl->num_entries,
1472                               repl->hook_entry,
1473                               repl->underflow);
1474         if (ret != 0) {
1475                 vfree(newinfo);
1476                 return ret;
1477         }
1478
1479         ret = down_interruptible(&ipt_mutex);
1480         if (ret != 0) {
1481                 vfree(newinfo);
1482                 return ret;
1483         }
1484
1485         /* Don't autoload: we'd eat our tail... */
1486         if (list_named_find(&ipt_tables, table->name)) {
1487                 ret = -EEXIST;
1488                 goto free_unlock;
1489         }
1490
1491         /* Simplifies replace_table code. */
1492         table->private = &bootstrap;
1493         if (!replace_table(table, 0, newinfo, &ret))
1494                 goto free_unlock;
1495
1496         duprintf("table->private->number = %u\n",
1497                  table->private->number);
1498         
1499         /* save number of initial entries */
1500         table->private->initial_entries = table->private->number;
1501
1502         rwlock_init(&table->lock);
1503         list_prepend(&ipt_tables, table);
1504
1505  unlock:
1506         up(&ipt_mutex);
1507         return ret;
1508
1509  free_unlock:
1510         vfree(newinfo);
1511         goto unlock;
1512 }
1513
1514 void ipt_unregister_table(struct ipt_table *table)
1515 {
1516         down(&ipt_mutex);
1517         LIST_DELETE(&ipt_tables, table);
1518         up(&ipt_mutex);
1519
1520         /* Decrease module usage counts and free resources */
1521         IPT_ENTRY_ITERATE(table->private->entries, table->private->size,
1522                           cleanup_entry, NULL);
1523         vfree(table->private);
1524 }
1525
1526 /* Returns 1 if the port is matched by the range, 0 otherwise */
1527 static inline int
1528 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1529 {
1530         int ret;
1531
1532         ret = (port >= min && port <= max) ^ invert;
1533         return ret;
1534 }
1535
1536 static int
1537 tcp_find_option(u_int8_t option,
1538                 const struct sk_buff *skb,
1539                 unsigned int optlen,
1540                 int invert,
1541                 int *hotdrop)
1542 {
1543         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1544         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1545         unsigned int i;
1546
1547         duprintf("tcp_match: finding option\n");
1548
1549         if (!optlen)
1550                 return invert;
1551
1552         /* If we don't have the whole header, drop packet. */
1553         op = skb_header_pointer(skb,
1554                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1555                                 optlen, _opt);
1556         if (op == NULL) {
1557                 *hotdrop = 1;
1558                 return 0;
1559         }
1560
1561         for (i = 0; i < optlen; ) {
1562                 if (op[i] == option) return !invert;
1563                 if (op[i] < 2) i++;
1564                 else i += op[i+1]?:1;
1565         }
1566
1567         return invert;
1568 }
1569
1570 static int
1571 tcp_match(const struct sk_buff *skb,
1572           const struct net_device *in,
1573           const struct net_device *out,
1574           const void *matchinfo,
1575           int offset,
1576           int *hotdrop)
1577 {
1578         struct tcphdr _tcph, *th;
1579         const struct ipt_tcp *tcpinfo = matchinfo;
1580
1581         if (offset) {
1582                 /* To quote Alan:
1583
1584                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1585                    causes this. Its a cracker trying to break in by doing a
1586                    flag overwrite to pass the direction checks.
1587                 */
1588                 if (offset == 1) {
1589                         duprintf("Dropping evil TCP offset=1 frag.\n");
1590                         *hotdrop = 1;
1591                 }
1592                 /* Must not be a fragment. */
1593                 return 0;
1594         }
1595
1596 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1597
1598         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1599                                 sizeof(_tcph), &_tcph);
1600         if (th == NULL) {
1601                 /* We've been asked to examine this packet, and we
1602                    can't.  Hence, no choice but to drop. */
1603                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1604                 *hotdrop = 1;
1605                 return 0;
1606         }
1607
1608         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1609                         ntohs(th->source),
1610                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1611                 return 0;
1612         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1613                         ntohs(th->dest),
1614                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1615                 return 0;
1616         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1617                       == tcpinfo->flg_cmp,
1618                       IPT_TCP_INV_FLAGS))
1619                 return 0;
1620         if (tcpinfo->option) {
1621                 if (th->doff * 4 < sizeof(_tcph)) {
1622                         *hotdrop = 1;
1623                         return 0;
1624                 }
1625                 if (!tcp_find_option(tcpinfo->option, skb,
1626                                      th->doff*4 - sizeof(_tcph),
1627                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1628                                      hotdrop))
1629                         return 0;
1630         }
1631         return 1;
1632 }
1633
1634 /* Called when user tries to insert an entry of this type. */
1635 static int
1636 tcp_checkentry(const char *tablename,
1637                const struct ipt_ip *ip,
1638                void *matchinfo,
1639                unsigned int matchsize,
1640                unsigned int hook_mask)
1641 {
1642         const struct ipt_tcp *tcpinfo = matchinfo;
1643
1644         /* Must specify proto == TCP, and no unknown invflags */
1645         return ip->proto == IPPROTO_TCP
1646                 && !(ip->invflags & IPT_INV_PROTO)
1647                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1648                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1649 }
1650
1651 static int
1652 udp_match(const struct sk_buff *skb,
1653           const struct net_device *in,
1654           const struct net_device *out,
1655           const void *matchinfo,
1656           int offset,
1657           int *hotdrop)
1658 {
1659         struct udphdr _udph, *uh;
1660         const struct ipt_udp *udpinfo = matchinfo;
1661
1662         /* Must not be a fragment. */
1663         if (offset)
1664                 return 0;
1665
1666         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1667                                 sizeof(_udph), &_udph);
1668         if (uh == NULL) {
1669                 /* We've been asked to examine this packet, and we
1670                    can't.  Hence, no choice but to drop. */
1671                 duprintf("Dropping evil UDP tinygram.\n");
1672                 *hotdrop = 1;
1673                 return 0;
1674         }
1675
1676         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1677                           ntohs(uh->source),
1678                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1679                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1680                               ntohs(uh->dest),
1681                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1682 }
1683
1684 /* Called when user tries to insert an entry of this type. */
1685 static int
1686 udp_checkentry(const char *tablename,
1687                const struct ipt_ip *ip,
1688                void *matchinfo,
1689                unsigned int matchinfosize,
1690                unsigned int hook_mask)
1691 {
1692         const struct ipt_udp *udpinfo = matchinfo;
1693
1694         /* Must specify proto == UDP, and no unknown invflags */
1695         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1696                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1697                          IPPROTO_UDP);
1698                 return 0;
1699         }
1700         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1701                 duprintf("ipt_udp: matchsize %u != %u\n",
1702                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1703                 return 0;
1704         }
1705         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1706                 duprintf("ipt_udp: unknown flags %X\n",
1707                          udpinfo->invflags);
1708                 return 0;
1709         }
1710
1711         return 1;
1712 }
1713
1714 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1715 static inline int
1716 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1717                      u_int8_t type, u_int8_t code,
1718                      int invert)
1719 {
1720         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1721                 ^ invert;
1722 }
1723
1724 static int
1725 icmp_match(const struct sk_buff *skb,
1726            const struct net_device *in,
1727            const struct net_device *out,
1728            const void *matchinfo,
1729            int offset,
1730            int *hotdrop)
1731 {
1732         struct icmphdr _icmph, *ic;
1733         const struct ipt_icmp *icmpinfo = matchinfo;
1734
1735         /* Must not be a fragment. */
1736         if (offset)
1737                 return 0;
1738
1739         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1740                                 sizeof(_icmph), &_icmph);
1741         if (ic == NULL) {
1742                 /* We've been asked to examine this packet, and we
1743                  * can't.  Hence, no choice but to drop.
1744                  */
1745                 duprintf("Dropping evil ICMP tinygram.\n");
1746                 *hotdrop = 1;
1747                 return 0;
1748         }
1749
1750         return icmp_type_code_match(icmpinfo->type,
1751                                     icmpinfo->code[0],
1752                                     icmpinfo->code[1],
1753                                     ic->type, ic->code,
1754                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1755 }
1756
1757 /* Called when user tries to insert an entry of this type. */
1758 static int
1759 icmp_checkentry(const char *tablename,
1760            const struct ipt_ip *ip,
1761            void *matchinfo,
1762            unsigned int matchsize,
1763            unsigned int hook_mask)
1764 {
1765         const struct ipt_icmp *icmpinfo = matchinfo;
1766
1767         /* Must specify proto == ICMP, and no unknown invflags */
1768         return ip->proto == IPPROTO_ICMP
1769                 && !(ip->invflags & IPT_INV_PROTO)
1770                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1771                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1772 }
1773
1774 /* The built-in targets: standard (NULL) and error. */
1775 static struct ipt_target ipt_standard_target = {
1776         .name           = IPT_STANDARD_TARGET,
1777 };
1778
1779 static struct ipt_target ipt_error_target = {
1780         .name           = IPT_ERROR_TARGET,
1781         .target         = ipt_error,
1782 };
1783
1784 static struct nf_sockopt_ops ipt_sockopts = {
1785         .pf             = PF_INET,
1786         .set_optmin     = IPT_BASE_CTL,
1787         .set_optmax     = IPT_SO_SET_MAX+1,
1788         .set            = do_ipt_set_ctl,
1789         .get_optmin     = IPT_BASE_CTL,
1790         .get_optmax     = IPT_SO_GET_MAX+1,
1791         .get            = do_ipt_get_ctl,
1792 };
1793
1794 static struct ipt_match tcp_matchstruct = {
1795         .name           = "tcp",
1796         .match          = &tcp_match,
1797         .checkentry     = &tcp_checkentry,
1798 };
1799
1800 static struct ipt_match udp_matchstruct = {
1801         .name           = "udp",
1802         .match          = &udp_match,
1803         .checkentry     = &udp_checkentry,
1804 };
1805
1806 static struct ipt_match icmp_matchstruct = {
1807         .name           = "icmp",
1808         .match          = &icmp_match,
1809         .checkentry     = &icmp_checkentry,
1810 };
1811
1812 #ifdef CONFIG_PROC_FS
1813 static inline int print_name(const char *i,
1814                              off_t start_offset, char *buffer, int length,
1815                              off_t *pos, unsigned int *count)
1816 {
1817         if ((*count)++ >= start_offset) {
1818                 unsigned int namelen;
1819
1820                 namelen = sprintf(buffer + *pos, "%s\n",
1821                                   i + sizeof(struct list_head));
1822                 if (*pos + namelen > length) {
1823                         /* Stop iterating */
1824                         return 1;
1825                 }
1826                 *pos += namelen;
1827         }
1828         return 0;
1829 }
1830
1831 static inline int print_target(const struct ipt_target *t,
1832                                off_t start_offset, char *buffer, int length,
1833                                off_t *pos, unsigned int *count)
1834 {
1835         if (t == &ipt_standard_target || t == &ipt_error_target)
1836                 return 0;
1837         return print_name((char *)t, start_offset, buffer, length, pos, count);
1838 }
1839
1840 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1841 {
1842         off_t pos = 0;
1843         unsigned int count = 0;
1844
1845         if (down_interruptible(&ipt_mutex) != 0)
1846                 return 0;
1847
1848         LIST_FIND(&ipt_tables, print_name, void *,
1849                   offset, buffer, length, &pos, &count);
1850
1851         up(&ipt_mutex);
1852
1853         /* `start' hack - see fs/proc/generic.c line ~105 */
1854         *start=(char *)((unsigned long)count-offset);
1855         return pos;
1856 }
1857
1858 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1859 {
1860         off_t pos = 0;
1861         unsigned int count = 0;
1862
1863         if (down_interruptible(&ipt_mutex) != 0)
1864                 return 0;
1865
1866         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1867                   offset, buffer, length, &pos, &count);
1868         
1869         up(&ipt_mutex);
1870
1871         *start = (char *)((unsigned long)count - offset);
1872         return pos;
1873 }
1874
1875 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1876 {
1877         off_t pos = 0;
1878         unsigned int count = 0;
1879
1880         if (down_interruptible(&ipt_mutex) != 0)
1881                 return 0;
1882         
1883         LIST_FIND(&ipt_match, print_name, void *,
1884                   offset, buffer, length, &pos, &count);
1885
1886         up(&ipt_mutex);
1887
1888         *start = (char *)((unsigned long)count - offset);
1889         return pos;
1890 }
1891
1892 static struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1893 { { "ip_tables_names", ipt_get_tables },
1894   { "ip_tables_targets", ipt_get_targets },
1895   { "ip_tables_matches", ipt_get_matches },
1896   { NULL, NULL} };
1897 #endif /*CONFIG_PROC_FS*/
1898
1899 static int __init init(void)
1900 {
1901         int ret;
1902
1903         /* Noone else will be downing sem now, so we won't sleep */
1904         down(&ipt_mutex);
1905         list_append(&ipt_target, &ipt_standard_target);
1906         list_append(&ipt_target, &ipt_error_target);
1907         list_append(&ipt_match, &tcp_matchstruct);
1908         list_append(&ipt_match, &udp_matchstruct);
1909         list_append(&ipt_match, &icmp_matchstruct);
1910         up(&ipt_mutex);
1911
1912         /* Register setsockopt */
1913         ret = nf_register_sockopt(&ipt_sockopts);
1914         if (ret < 0) {
1915                 duprintf("Unable to register sockopts.\n");
1916                 return ret;
1917         }
1918
1919 #ifdef CONFIG_PROC_FS
1920         {
1921         struct proc_dir_entry *proc;
1922         int i;
1923
1924         for (i = 0; ipt_proc_entry[i].name; i++) {
1925                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1926                                        ipt_proc_entry[i].get_info);
1927                 if (!proc) {
1928                         while (--i >= 0)
1929                                 proc_net_remove(ipt_proc_entry[i].name);
1930                         nf_unregister_sockopt(&ipt_sockopts);
1931                         return -ENOMEM;
1932                 }
1933                 proc->owner = THIS_MODULE;
1934         }
1935         }
1936 #endif
1937
1938         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
1939         return 0;
1940 }
1941
1942 static void __exit fini(void)
1943 {
1944         nf_unregister_sockopt(&ipt_sockopts);
1945 #ifdef CONFIG_PROC_FS
1946         {
1947         int i;
1948         for (i = 0; ipt_proc_entry[i].name; i++)
1949                 proc_net_remove(ipt_proc_entry[i].name);
1950         }
1951 #endif
1952 }
1953
1954 EXPORT_SYMBOL(ipt_register_table);
1955 EXPORT_SYMBOL(ipt_unregister_table);
1956 EXPORT_SYMBOL(ipt_register_match);
1957 EXPORT_SYMBOL(ipt_unregister_match);
1958 EXPORT_SYMBOL(ipt_do_table);
1959 EXPORT_SYMBOL(ipt_register_target);
1960 EXPORT_SYMBOL(ipt_unregister_target);
1961 EXPORT_SYMBOL(ipt_find_target);
1962
1963 module_init(init);
1964 module_exit(fini);