ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / net / ipv4 / netfilter / ipchains_core.c
1 /* Minor modifications to fit on compatibility framework:
2    Rusty.Russell@rustcorp.com.au
3 */
4
5 /*
6  * This code is heavily based on the code on the old ip_fw.c code; see below for
7  * copyrights and attributions of the old code.  This code is basically GPL.
8  *
9  * 15-Aug-1997: Major changes to allow graphs for firewall rules.
10  *              Paul Russell <Paul.Russell@rustcorp.com.au> and
11  *              Michael Neuling <Michael.Neuling@rustcorp.com.au>
12  * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
13  *              Added explicit RETURN from chains.
14  *              Removed TOS mangling (done in ipchains 1.0.1).
15  *              Fixed read & reset bug by reworking proc handling.
16  *              Paul Russell <Paul.Russell@rustcorp.com.au>
17  * 28-Sep-1997: Added packet marking for net sched code.
18  *              Removed fw_via comparisons: all done on device name now,
19  *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
20  *              Paul Russell <Paul.Russell@rustcorp.com.au>
21  * 2-Nov-1997:  Moved types across to __u16, etc.
22  *              Added inverse flags.
23  *              Fixed fragment bug (in args to port_match).
24  *              Changed mark to only one flag (MARKABS).
25  * 21-Nov-1997: Added ability to test ICMP code.
26  * 19-Jan-1998: Added wildcard interfaces.
27  * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
28  *              Initialised ip_masq for 2.0.x version.
29  *              Added explicit NETLINK option for 2.1.x version.
30  *              Added packet and byte counters for policy matches.
31  * 26-Feb-1998: Fixed race conditions, added SMP support.
32  * 18-Mar-1998: Fix SMP, fix race condition fix.
33  * 1-May-1998:  Remove caching of device pointer.
34  * 12-May-1998: Allow tiny fragment case for TCP/UDP.
35  * 15-May-1998: Treat short packets as fragments, don't just block.
36  * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
37  *              be allowed to view the chains!
38  *              Marc Santoro <ultima@snicker.emoti.com>
39  * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
40  *              during dump_packet. --RR.
41  * 19-May-1999: Star Wars: The Phantom Menace opened.  Rule num
42  *              printed in log (modified from Michael Hasenstein's patch).
43  *              Added SYN in log message. --RR
44  * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998.
45  *              John McDonald <jm@dataprotect.com>
46  *              Thomas Lopatic <tl@dataprotect.com>
47  */
48
49 /*
50  *
51  * The origina Linux port was done Alan Cox, with changes/fixes from
52  * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
53  * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
54  *
55  * Copyright from the original FreeBSD version follows:
56  *
57  * Copyright (c) 1993 Daniel Boulet
58  * Copyright (c) 1994 Ugen J.S.Antsilevich
59  *
60  * Redistribution and use in source forms, with and without modification,
61  * are permitted provided that this entire comment appears intact.
62  *
63  * Redistribution in binary form may occur without any restrictions.
64  * Obviously, it would be nice if you gave credit where credit is due
65  * but requiring it would be too onerous.
66  *
67  * This software is provided ``AS IS'' without any warranties of any kind.  */
68
69 #include <linux/config.h>
70
71 #include <asm/uaccess.h>
72 #include <asm/system.h>
73 #include <linux/types.h>
74 #include <linux/sched.h>
75 #include <linux/string.h>
76 #include <linux/errno.h>
77 #include <linux/module.h>
78
79 #include <linux/socket.h>
80 #include <linux/sockios.h>
81 #include <linux/in.h>
82 #include <linux/inet.h>
83 #include <linux/netdevice.h>
84 #include <linux/icmp.h>
85 #include <linux/udp.h>
86 #include <net/ip.h>
87 #include <net/protocol.h>
88 #include <net/route.h>
89 #include <net/tcp.h>
90 #include <net/udp.h>
91 #include <net/sock.h>
92 #include <net/icmp.h>
93 #include <linux/netlink.h>
94 #include <linux/netfilter.h>
95 #include <linux/netfilter_ipv4/compat_firewall.h>
96 #include <linux/netfilter_ipv4/ipchains_core.h>
97 #include <linux/netfilter_ipv4/ip_nat_core.h>
98
99 #include <net/checksum.h>
100 #include <linux/proc_fs.h>
101 #include <linux/stat.h>
102
103 MODULE_LICENSE("Dual BSD/GPL");
104 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
105 MODULE_DESCRIPTION("ipchains backwards compatibility layer");
106
107 /* Understanding locking in this code: (thanks to Alan Cox for using
108  * little words to explain this to me). -- PR
109  *
110  * In UP, there can be two packets traversing the chains:
111  * 1) A packet from the current userspace context
112  * 2) A packet off the bh handlers (timer or net).
113  *
114  * For SMP (kernel v2.1+), multiply this by # CPUs.
115  *
116  * [Note that this in not correct for 2.2 - because the socket code always
117  *  uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
118  *  only run on one CPU at a time.  This will probably change for 2.3.
119  *  It is still good to use spinlocks because that avoids the global cli()
120  *  for updating the tables, which is rather costly in SMP kernels -AK]
121  *
122  * This means counters and backchains can get corrupted if no precautions
123  * are taken.
124  *
125  * To actually alter a chain on UP, we need only do a cli(), as this will
126  * stop a bh handler firing, as we are in the current userspace context
127  * (coming from a setsockopt()).
128  *
129  * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
130  * UP.
131  *
132  * For backchains and counters, we use an array, indexed by
133  * [smp_processor_id()*2 + !in_interrupt()]; the array is of
134  * size [NR_CPUS*2].  For v2.0, NR_CPUS is effectively 1.  So,
135  * confident of uniqueness, we modify counters even though we only
136  * have a read lock (to read the counters, you need a write lock,
137  * though).  */
138
139 /* Why I didn't use straight locking... -- PR
140  *
141  * The backchains can be separated out of the ip_chains structure, and
142  * allocated as needed inside ip_fw_check().
143  *
144  * The counters, however, can't.  Trying to lock these means blocking
145  * interrupts every time we want to access them.  This would suck HARD
146  * performance-wise.  Not locking them leads to possible corruption,
147  * made worse on 32-bit machines (counters are 64-bit).  */
148
149 /*#define DEBUG_IP_FIREWALL*/
150 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
151 /*#define DEBUG_IP_FIREWALL_USER*/
152 /*#define DEBUG_IP_FIREWALL_LOCKING*/
153
154 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
155 static struct sock *ipfwsk;
156 #endif
157
158 #ifdef CONFIG_SMP
159 #define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
160 #else /* !SMP */
161 #define SLOT_NUMBER() (!in_interrupt())
162 #endif /* CONFIG_SMP */
163 #define NUM_SLOTS (NR_CPUS*2)
164
165 #define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
166                                 + NUM_SLOTS*sizeof(struct ip_reent))
167 #define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
168                                     + NUM_SLOTS*sizeof(struct ip_counters))
169
170 #ifdef DEBUG_IP_FIREWALL_LOCKING
171 static unsigned int fwc_rlocks, fwc_wlocks;
172 #define FWC_DEBUG_LOCK(d)                       \
173 do {                                            \
174         FWC_DONT_HAVE_LOCK(d);                  \
175         d |= (1 << SLOT_NUMBER());              \
176 } while (0)
177
178 #define FWC_DEBUG_UNLOCK(d)                     \
179 do {                                            \
180         FWC_HAVE_LOCK(d);                       \
181         d &= ~(1 << SLOT_NUMBER());             \
182 } while (0)
183
184 #define FWC_DONT_HAVE_LOCK(d)                                   \
185 do {                                                            \
186         if ((d) & (1 << SLOT_NUMBER()))                         \
187                 printk("%s:%i: Got lock on %i already!\n",      \
188                        __FILE__, __LINE__, SLOT_NUMBER());      \
189 } while(0)
190
191 #define FWC_HAVE_LOCK(d)                                \
192 do {                                                    \
193         if (!((d) & (1 << SLOT_NUMBER())))              \
194         printk("%s:%i:No lock on %i!\n",                \
195                __FILE__, __LINE__, SLOT_NUMBER());      \
196 } while (0)
197
198 #else
199 #define FWC_DEBUG_LOCK(d) do { } while(0)
200 #define FWC_DEBUG_UNLOCK(d) do { } while(0)
201 #define FWC_DONT_HAVE_LOCK(d) do { } while(0)
202 #define FWC_HAVE_LOCK(d) do { } while(0)
203 #endif /*DEBUG_IP_FIRWALL_LOCKING*/
204
205 #define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
206 #define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
207 #define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
208 #define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
209 #define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
210 #define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
211 #define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
212 #define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
213
214 struct ip_chain;
215
216 struct ip_counters
217 {
218         __u64 pcnt, bcnt;                       /* Packet and byte counters */
219 };
220
221 struct ip_fwkernel
222 {
223         struct ip_fw ipfw;
224         struct ip_fwkernel *next;       /* where to go next if current
225                                          * rule doesn't match */
226         struct ip_chain *branch;        /* which branch to jump to if
227                                          * current rule matches */
228         int simplebranch;               /* Use this if branch == NULL */
229         struct ip_counters counters[0]; /* Actually several of these */
230 };
231
232 struct ip_reent
233 {
234         struct ip_chain *prevchain;     /* Pointer to referencing chain */
235         struct ip_fwkernel *prevrule;   /* Pointer to referencing rule */
236         struct ip_counters counters;
237 };
238
239 struct ip_chain
240 {
241         ip_chainlabel label;        /* Defines the label for each block */
242         struct ip_chain *next;      /* Pointer to next block */
243         struct ip_fwkernel *chain;  /* Pointer to first rule in block */
244         __u32 refcount;             /* Number of refernces to block */
245         int policy;                 /* Default rule for chain.  Only *
246                                      * used in built in chains */
247         struct ip_reent reent[0];   /* Actually several of these */
248 };
249
250 /*
251  *      Implement IP packet firewall
252  */
253
254 #ifdef DEBUG_IP_FIREWALL
255 #define dprintf(format, args...)  printk(format , ## args)
256 #else
257 #define dprintf(format, args...)
258 #endif
259
260 #ifdef DEBUG_IP_FIREWALL_USER
261 #define duprintf(format, args...) printk(format , ## args)
262 #else
263 #define duprintf(format, args...)
264 #endif
265
266 /* Lock around ip_fw_chains linked list structure */
267 rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
268
269 /* Head of linked list of fw rules */
270 static struct ip_chain *ip_fw_chains;
271
272 #define IP_FW_INPUT_CHAIN ip_fw_chains
273 #define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
274 #define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
275
276 /* Returns 1 if the port is matched by the range, 0 otherwise */
277 extern inline int port_match(__u16 min, __u16 max, __u16 port,
278                              int frag, int invert)
279 {
280         if (frag) /* Fragments fail ANY port test. */
281                 return (min == 0 && max == 0xFFFF);
282         else return (port >= min && port <= max) ^ invert;
283 }
284
285 /* Returns whether matches rule or not. */
286 static int ip_rule_match(struct ip_fwkernel *f,
287                          const char *ifname,
288                          struct sk_buff **pskb,
289                          char tcpsyn,
290                          __u16 src_port, __u16 dst_port,
291                          char isfrag)
292 {
293         struct iphdr *ip = (*pskb)->nh.iph;
294
295 #define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
296         /*
297          *      This is a bit simpler as we don't have to walk
298          *      an interface chain as you do in BSD - same logic
299          *      however.
300          */
301
302         if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
303                   IP_FW_INV_SRCIP)
304             || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
305                      IP_FW_INV_DSTIP)) {
306                 dprintf("Source or dest mismatch.\n");
307
308                 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
309                         f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
310                         f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
311                 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
312                         f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
313                         f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
314                 return 0;
315         }
316
317         /*
318          *      Look for a VIA device match
319          */
320         if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
321             if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
322                               strlen(f->ipfw.fw_vianame)) != 0,
323                       IP_FW_INV_VIA)) {
324                 dprintf("Wildcard interface mismatch.%s\n",
325                         f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
326                 return 0;       /* Mismatch */
327             }
328         }
329         else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
330                        IP_FW_INV_VIA)) {
331             dprintf("Interface name does not match.%s\n",
332                     f->ipfw.fw_invflg & IP_FW_INV_VIA
333                     ? " (INV)" : "");
334             return 0;   /* Mismatch */
335         }
336
337         /*
338          *      Ok the chain addresses match.
339          */
340
341         /* If we have a fragment rule but the packet is not a fragment
342          * the we return zero */
343         if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) {
344                 dprintf("Fragment rule but not fragment.%s\n",
345                         f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
346                 return 0;
347         }
348
349         /* Fragment NEVER passes a SYN test, even an inverted one. */
350         if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
351             || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
352                 dprintf("Rule requires SYN and packet has no SYN.%s\n",
353                         f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
354                 return 0;
355         }
356
357         if (f->ipfw.fw_proto) {
358                 /*
359                  *      Specific firewall - packet's protocol
360                  *      must match firewall's.
361                  */
362
363                 if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
364                         dprintf("Packet protocol %hi does not match %hi.%s\n",
365                                 ip->protocol, f->ipfw.fw_proto,
366                                 f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
367                         return 0;
368                 }
369
370                 /* For non TCP/UDP/ICMP, port range is max anyway. */
371                 if (!port_match(f->ipfw.fw_spts[0],
372                                 f->ipfw.fw_spts[1],
373                                 src_port, isfrag,
374                                 !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
375                     || !port_match(f->ipfw.fw_dpts[0],
376                                    f->ipfw.fw_dpts[1],
377                                    dst_port, isfrag,
378                                    !!(f->ipfw.fw_invflg
379                                       &IP_FW_INV_DSTPT))) {
380                     dprintf("Port match failed.\n");
381                     return 0;
382                 }
383         }
384
385         dprintf("Match succeeded.\n");
386         return 1;
387 }
388
389 static const char *branchname(struct ip_chain *branch,int simplebranch)
390 {
391         if (branch)
392                 return branch->label;
393         switch (simplebranch)
394         {
395         case FW_BLOCK: return IP_FW_LABEL_BLOCK;
396         case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
397         case FW_REJECT: return IP_FW_LABEL_REJECT;
398         case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
399         case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
400         case FW_SKIP: return "-";
401         case FW_SKIP+1: return IP_FW_LABEL_RETURN;
402         default:
403                 return "UNKNOWN";
404         }
405 }
406
407 /*
408  * VERY ugly piece of code which actually
409  * makes kernel printf for matching packets...
410  */
411 static void dump_packet(struct sk_buff **pskb,
412                         const char *ifname,
413                         struct ip_fwkernel *f,
414                         const ip_chainlabel chainlabel,
415                         __u16 src_port,
416                         __u16 dst_port,
417                         unsigned int count,
418                         int syn)
419 {
420         __u32 *opt = (__u32 *) ((*pskb)->nh.iph + 1);
421         int opti;
422
423         if (f) {
424                 printk(KERN_INFO "Packet log: %s ",chainlabel);
425                 printk("%s ",branchname(f->branch,f->simplebranch));
426                 if (f->simplebranch==FW_REDIRECT)
427                         printk("%d ",f->ipfw.fw_redirpt);
428         }
429
430         printk("%s PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
431                " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
432                ifname, (*pskb)->nh.iph->protocol,
433                NIPQUAD((*pskb)->nh.iph->saddr),
434                src_port,
435                NIPQUAD((*pskb)->nh.iph->daddr),
436                dst_port,
437                ntohs((*pskb)->nh.iph->tot_len),
438                (*pskb)->nh.iph->tos,
439                ntohs((*pskb)->nh.iph->id),
440                ntohs((*pskb)->nh.iph->frag_off),
441                (*pskb)->nh.iph->ttl);
442
443         for (opti = 0; opti < ((*pskb)->nh.iph->ihl - sizeof(struct iphdr) / 4); opti++)
444                 printk(" O=0x%8.8X", *opt++);
445         printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count);
446 }
447
448 /* function for checking chain labels for user space. */
449 static int check_label(ip_chainlabel label)
450 {
451         unsigned int i;
452         /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
453         for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
454                 if (label[i] == '\0') return 1;
455
456         return 0;
457 }
458
459 /*      This function returns a pointer to the first chain with a label
460  *      that matches the one given. */
461 static struct ip_chain *find_label(ip_chainlabel label)
462 {
463         struct ip_chain *tmp;
464         FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
465         for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
466                 if (strcmp(tmp->label,label) == 0)
467                         break;
468         return tmp;
469 }
470
471 /* This function returns a boolean which when true sets answer to one
472    of the FW_*. */
473 static int find_special(ip_chainlabel label, int *answer)
474 {
475         if (label[0] == '\0') {
476                 *answer = FW_SKIP; /* => pass-through rule */
477                 return 1;
478         } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
479                 *answer = FW_ACCEPT;
480                 return 1;
481         } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
482                 *answer = FW_BLOCK;
483                 return 1;
484         } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
485                 *answer = FW_REJECT;
486                 return 1;
487         } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
488                 *answer = FW_REDIRECT;
489                 return 1;
490         } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
491                 *answer = FW_MASQUERADE;
492                 return 1;
493         } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
494                 *answer = FW_SKIP+1;
495                 return 1;
496         } else {
497                 return 0;
498         }
499 }
500
501 /* This function cleans up the prevchain and prevrule.  If the verbose
502  * flag is set then he names of the chains will be printed as it
503  * cleans up.  */
504 static void cleanup(struct ip_chain *chain,
505                     const int verbose,
506                     unsigned int slot)
507 {
508         struct ip_chain *tmpchain = chain->reent[slot].prevchain;
509         if (verbose)
510                 printk(KERN_ERR "Chain backtrace: ");
511         while (tmpchain) {
512                 if (verbose)
513                         printk("%s<-",chain->label);
514                 chain->reent[slot].prevchain = NULL;
515                 chain = tmpchain;
516                 tmpchain = chain->reent[slot].prevchain;
517         }
518         if (verbose)
519                 printk("%s\n",chain->label);
520 }
521
522 static inline int
523 ip_fw_domatch(struct ip_fwkernel *f,
524               const char *rif,
525               const ip_chainlabel label,
526               struct sk_buff **pskb,
527               unsigned int slot,
528               __u16 src_port, __u16 dst_port,
529               unsigned int count,
530               int tcpsyn,
531               unsigned char *tos)
532 {
533         f->counters[slot].bcnt+=ntohs((*pskb)->nh.iph->tot_len);
534         f->counters[slot].pcnt++;
535         if (f->ipfw.fw_flg & IP_FW_F_PRN) {
536                 dump_packet(pskb,rif,f,label,src_port,dst_port,count,tcpsyn);
537         }
538
539         *tos = (*tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
540
541 /* This functionality is useless in stock 2.0.x series, but we don't
542  * discard the mark thing altogether, to avoid breaking ipchains (and,
543  * more importantly, the ipfwadm wrapper) --PR */
544         if (f->ipfw.fw_flg & IP_FW_F_MARKABS) {
545                 (*pskb)->nfmark = f->ipfw.fw_mark;
546         } else {
547                 (*pskb)->nfmark += f->ipfw.fw_mark;
548         }
549         if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
550 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
551                 size_t len = min_t(unsigned int, f->ipfw.fw_outputsize, ntohs((*pskb)->nh.iph->tot_len))
552                         + sizeof(__u32) + sizeof((*pskb)->nfmark) + IFNAMSIZ;
553                 struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
554
555                 duprintf("Sending packet out NETLINK (length = %u).\n",
556                          (unsigned int)len);
557                 if (outskb) {
558                         /* Prepend length, mark & interface */
559                         skb_put(outskb, len);
560                         *((__u32 *)outskb->data) = (__u32)len;
561                         *((__u32 *)(outskb->data+sizeof(__u32))) =
562                                 (*pskb)->nfmark;
563                         strcpy(outskb->data+sizeof(__u32)*2, rif);
564                         skb_copy_bits(*pskb,
565                                 ((char *)(*pskb)->nh.iph - (char *)(*pskb)->data),
566                                 outskb->data+sizeof(__u32)*2+IFNAMSIZ,
567                                 len-(sizeof(__u32)*2+IFNAMSIZ));
568                         netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_ATOMIC);
569                 }
570                 else {
571 #endif
572                         if (net_ratelimit())
573                                 printk(KERN_WARNING "ip_fw: packet drop due to "
574                                        "netlink failure\n");
575                         return 0;
576 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
577                 }
578 #endif
579         }
580         return 1;
581 }
582
583 /*
584  *      Returns one of the generic firewall policies, like FW_ACCEPT.
585  *
586  *      The testing is either false for normal firewall mode or true for
587  *      user checking mode (counters are not updated, TOS & mark not done).
588  */
589 static int
590 ip_fw_check(const char *rif,
591             __u16 *redirport,
592             struct ip_chain *chain,
593             struct sk_buff **pskb,
594             unsigned int slot,
595             int testing)
596 {
597         __u32                   src, dst;
598         __u16                   src_port = 0xFFFF, dst_port = 0xFFFF;
599         char                    tcpsyn=0;
600         __u16                   offset;
601         unsigned char           tos;
602         struct ip_fwkernel      *f;
603         int                     ret = FW_SKIP+2;
604         unsigned int            count;
605
606         /* We handle fragments by dealing with the first fragment as
607          * if it was a normal packet.  All other fragments are treated
608          * normally, except that they will NEVER match rules that ask
609          * things we don't know, ie. tcp syn flag or ports).  If the
610          * rule is also a fragment-specific rule, non-fragments won't
611          * match it. */
612
613         offset = ntohs((*pskb)->nh.iph->frag_off) & IP_OFFSET;
614
615         /*
616          *      Don't allow a fragment of TCP 8 bytes in. Nobody
617          *      normal causes this. Its a cracker trying to break
618          *      in by doing a flag overwrite to pass the direction
619          *      checks.
620          */
621         if (offset == 1 && (*pskb)->nh.iph->protocol == IPPROTO_TCP) {
622                 if (!testing && net_ratelimit()) {
623                         printk("Suspect TCP fragment.\n");
624                         dump_packet(pskb,rif,NULL,NULL,0,0,0,0);
625                 }
626                 return FW_BLOCK;
627         }
628
629         /* If we can't investigate ports, treat as fragment.  It's
630          * either a trucated whole packet, or a truncated first
631          * fragment, or a TCP first fragment of length 8-15, in which
632          * case the above rule stops reassembly.
633          */
634         if (offset == 0) {
635                 unsigned int size_req;
636                 switch ((*pskb)->nh.iph->protocol) {
637                 case IPPROTO_TCP:
638                         /* Don't care about things past flags word */
639                         size_req = 16;
640                         break;
641
642                 case IPPROTO_UDP:
643                 case IPPROTO_ICMP:
644                         size_req = 8;
645                         break;
646
647                 default:
648                         size_req = 0;
649                 }
650
651                 /* If it is a truncated first fragment then it can be
652                  * used to rewrite port information, and thus should
653                  * be blocked.
654                  */
655                 if (ntohs((*pskb)->nh.iph->tot_len) <
656                     ((*pskb)->nh.iph->ihl<<2)+size_req) {
657                         if (!testing && net_ratelimit()) {
658                                 printk("Suspect short first fragment.\n");
659                                 dump_packet(pskb,rif,NULL,NULL,0,0,0,0);
660                         }
661                         return FW_BLOCK;
662                 }
663         }
664
665         src = (*pskb)->nh.iph->saddr;
666         dst = (*pskb)->nh.iph->daddr;
667         tos = (*pskb)->nh.iph->tos;
668
669         /*
670          *      If we got interface from which packet came
671          *      we can use the address directly. Linux 2.1 now uses address
672          *      chains per device too, but unlike BSD we first check if the
673          *      incoming packet matches a device address and the routing
674          *      table before calling the firewall.
675          */
676
677         dprintf("Packet ");
678         switch ((*pskb)->nh.iph->protocol) {
679                 case IPPROTO_TCP:
680                         dprintf("TCP ");
681                         if (!offset) {
682                                 struct tcphdr tcph;
683
684                                 if (skb_copy_bits(*pskb,
685                                                   (*pskb)->nh.iph->ihl * 4,
686                                                   &tcph, sizeof(tcph)))
687                                         return FW_BLOCK;
688
689                                 src_port = ntohs(tcph.source);
690                                 dst_port = ntohs(tcph.dest);
691
692                                 /* Connection initilisation can only
693                                  * be made when the syn bit is set and
694                                  * neither of the ack or reset is
695                                  * set. */
696                                 if (tcph.syn && !(tcph.ack || tcph.rst))
697                                         tcpsyn = 1;
698                         }
699                         break;
700                 case IPPROTO_UDP:
701                         dprintf("UDP ");
702                         if (!offset) {
703                                 struct udphdr udph;
704
705                                 if (skb_copy_bits(*pskb,
706                                                   (*pskb)->nh.iph->ihl * 4,
707                                                   &udph, sizeof(udph)))
708                                         return FW_BLOCK;
709
710                                 src_port = ntohs(udph.source);
711                                 dst_port = ntohs(udph.dest);
712                         }
713                         break;
714                 case IPPROTO_ICMP:
715                         if (!offset) {
716                                 struct icmphdr icmph;
717
718                                 if (skb_copy_bits(*pskb,
719                                                   (*pskb)->nh.iph->ihl * 4,
720                                                   &icmph, sizeof(icmph)))
721                                         return FW_BLOCK;
722
723                                 src_port = (__u16) icmph.type;
724                                 dst_port = (__u16) icmph.code;
725                         }
726                         dprintf("ICMP ");
727                         break;
728                 default:
729                         dprintf("p=%d ", (*pskb)->nh.iph->protocol);
730                         break;
731         }
732 #ifdef DEBUG_IP_FIREWALL
733         print_ip((*pskb)->nh.iph->saddr);
734
735         if (offset)
736                 dprintf(":fragment (%i) ", ((int)offset)<<2);
737         else if ((*pskb)->nh.iph->protocol == IPPROTO_TCP ||
738                  (*pskb)->nh.iph->protocol == IPPROTO_UDP ||
739                  (*pskb)->nh.iph->protocol == IPPROTO_ICMP)
740                 dprintf(":%hu:%hu", src_port, dst_port);
741         dprintf("\n");
742 #endif
743
744         if (!testing) FWC_READ_LOCK(&ip_fw_lock);
745         else FWC_HAVE_LOCK(fwc_rlocks);
746
747         f = chain->chain;
748         do {
749                 count = 0;
750                 for (; f; f = f->next) {
751                         count++;
752                         if (ip_rule_match(f, rif, pskb,
753                                           tcpsyn, src_port, dst_port,
754                                           offset)) {
755                                 if (!testing
756                                     && !ip_fw_domatch(f, rif, chain->label,
757                                                       pskb, slot,
758                                                       src_port, dst_port,
759                                                       count, tcpsyn, &tos)) {
760                                         ret = FW_BLOCK;
761                                         cleanup(chain, 0, slot);
762                                         goto out;
763                                 }
764                                 break;
765                         }
766                 }
767                 if (f) {
768                         if (f->branch) {
769                                 /* Do sanity check to see if we have
770                                  * already set prevchain and if so we
771                                  * must be in a loop */
772                                 if (f->branch->reent[slot].prevchain) {
773                                         if (!testing) {
774                                                 printk(KERN_ERR
775                                                        "IP firewall: "
776                                                        "Loop detected "
777                                                        "at `%s'.\n",
778                                                        f->branch->label);
779                                                 cleanup(chain, 1, slot);
780                                                 ret = FW_BLOCK;
781                                         } else {
782                                                 cleanup(chain, 0, slot);
783                                                 ret = FW_SKIP+1;
784                                         }
785                                 }
786                                 else {
787                                         f->branch->reent[slot].prevchain
788                                                 = chain;
789                                         f->branch->reent[slot].prevrule
790                                                 = f->next;
791                                         chain = f->branch;
792                                         f = chain->chain;
793                                 }
794                         }
795                         else if (f->simplebranch == FW_SKIP)
796                                 f = f->next;
797                         else if (f->simplebranch == FW_SKIP+1) {
798                                 /* Just like falling off the chain */
799                                 goto fall_off_chain;
800                         } else {
801                                 cleanup(chain, 0, slot);
802                                 ret = f->simplebranch;
803                         }
804                 } /* f == NULL */
805                 else {
806                 fall_off_chain:
807                         if (chain->reent[slot].prevchain) {
808                                 struct ip_chain *tmp = chain;
809                                 f = chain->reent[slot].prevrule;
810                                 chain = chain->reent[slot].prevchain;
811                                 tmp->reent[slot].prevchain = NULL;
812                         }
813                         else {
814                                 ret = chain->policy;
815                                 if (!testing) {
816                                         chain->reent[slot].counters.pcnt++;
817                                         chain->reent[slot].counters.bcnt
818                                                 += ntohs((*pskb)->nh.iph->tot_len);
819                                 }
820                         }
821                 }
822         } while (ret == FW_SKIP+2);
823
824  out:
825         if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
826
827         /* Recalculate checksum if not going to reject, and TOS changed. */
828         if ((*pskb)->nh.iph->tos != tos
829             && ret != FW_REJECT && ret != FW_BLOCK
830             && !testing) {
831                 if (!skb_ip_make_writable(pskb, offsetof(struct iphdr, tos)+1))
832                         ret = FW_BLOCK;
833                 else {
834                         (*pskb)->nh.iph->tos = tos;
835                         ip_send_check((*pskb)->nh.iph);
836                 }
837         }
838
839         if (ret == FW_REDIRECT && redirport) {
840                 if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
841                         /* Wildcard redirection.
842                          * Note that redirport will become
843                          * 0xFFFF for non-TCP/UDP packets.
844                          */
845                         *redirport = htons(dst_port);
846                 }
847         }
848
849 #ifdef DEBUG_ALLOW_ALL
850         return (testing ? ret : FW_ACCEPT);
851 #else
852         return ret;
853 #endif
854 }
855
856 /* Must have write lock & interrupts off for any of these */
857
858 /* This function sets all the byte counters in a chain to zero.  The
859  * input is a pointer to the chain required for zeroing */
860 static int zero_fw_chain(struct ip_chain *chainptr)
861 {
862         struct ip_fwkernel *i;
863
864         FWC_HAVE_LOCK(fwc_wlocks);
865         for (i = chainptr->chain; i; i = i->next)
866                 memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
867         return 0;
868 }
869
870 static int clear_fw_chain(struct ip_chain *chainptr)
871 {
872         struct ip_fwkernel *i= chainptr->chain;
873
874         FWC_HAVE_LOCK(fwc_wlocks);
875         chainptr->chain=NULL;
876
877         while (i) {
878                 struct ip_fwkernel *tmp = i->next;
879                 if (i->branch)
880                         i->branch->refcount--;
881                 kfree(i);
882                 i = tmp;
883                 /* We will block in cleanup's unregister sockopt if unloaded,
884                    so this is safe. */
885                 module_put(THIS_MODULE);
886         }
887         return 0;
888 }
889
890 static int replace_in_chain(struct ip_chain *chainptr,
891                             struct ip_fwkernel *frwl,
892                             __u32 position)
893 {
894         struct ip_fwkernel *f = chainptr->chain;
895
896         FWC_HAVE_LOCK(fwc_wlocks);
897
898         while (--position && f != NULL) f = f->next;
899         if (f == NULL)
900                 return EINVAL;
901
902         if (f->branch) f->branch->refcount--;
903         if (frwl->branch) frwl->branch->refcount++;
904
905         frwl->next = f->next;
906         memcpy(f,frwl,sizeof(struct ip_fwkernel));
907         kfree(frwl);
908         return 0;
909 }
910
911 static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
912 {
913         struct ip_fwkernel *i;
914
915         FWC_HAVE_LOCK(fwc_wlocks);
916
917         /* Are we unloading now?  We will block on nf_unregister_sockopt */
918         if (!try_module_get(THIS_MODULE))
919                 return ENOPROTOOPT;
920
921         /* Special case if no rules already present */
922         if (chainptr->chain == NULL) {
923
924                 /* If pointer writes are atomic then turning off
925                  * interrupts is not necessary. */
926                 chainptr->chain = rule;
927                 if (rule->branch) rule->branch->refcount++;
928                 goto append_successful;
929         }
930
931         /* Find the rule before the end of the chain */
932         for (i = chainptr->chain; i->next; i = i->next);
933         i->next = rule;
934         if (rule->branch) rule->branch->refcount++;
935
936 append_successful:
937         return 0;
938 }
939
940 /* This function inserts a rule at the position of position in the
941  * chain refenced by chainptr.  If position is 1 then this rule will
942  * become the new rule one. */
943 static int insert_in_chain(struct ip_chain *chainptr,
944                            struct ip_fwkernel *frwl,
945                            __u32 position)
946 {
947         struct ip_fwkernel *f = chainptr->chain;
948
949         FWC_HAVE_LOCK(fwc_wlocks);
950
951         /* Are we unloading now?  We will block on nf_unregister_sockopt */
952         if (!try_module_get(THIS_MODULE))
953                 return ENOPROTOOPT;
954
955         /* special case if the position is number 1 */
956         if (position == 1) {
957                 frwl->next = chainptr->chain;
958                 if (frwl->branch) frwl->branch->refcount++;
959                 chainptr->chain = frwl;
960                 goto insert_successful;
961         }
962         position--;
963         while (--position && f != NULL) f = f->next;
964         if (f == NULL)
965                 return EINVAL;
966         if (frwl->branch) frwl->branch->refcount++;
967         frwl->next = f->next;
968
969         f->next = frwl;
970
971 insert_successful:
972         return 0;
973 }
974
975 /* This function deletes the a rule from a given rulenum and chain.
976  * With rulenum = 1 is the first rule is deleted. */
977
978 static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
979 {
980         struct ip_fwkernel *i=chainptr->chain,*tmp;
981
982         FWC_HAVE_LOCK(fwc_wlocks);
983
984         if (!chainptr->chain)
985                 return ENOENT;
986
987         /* Need a special case for the first rule */
988         if (rulenum == 1) {
989                 /* store temp to allow for freeing up of memory */
990                 tmp = chainptr->chain;
991                 if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
992                 chainptr->chain = chainptr->chain->next;
993                 kfree(tmp); /* free memory that is now unused */
994         } else {
995                 rulenum--;
996                 while (--rulenum && i->next ) i = i->next;
997                 if (!i->next)
998                         return ENOENT;
999                 tmp = i->next;
1000                 if (i->next->branch)
1001                         i->next->branch->refcount--;
1002                 i->next = i->next->next;
1003                 kfree(tmp);
1004         }
1005
1006         /* We will block in cleanup's unregister sockopt if unloaded,
1007            so this is safe. */
1008         module_put(THIS_MODULE);
1009         return 0;
1010 }
1011
1012
1013 /* This function deletes the a rule from a given rule and chain.
1014  * The rule that is deleted is the first occursance of that rule. */
1015 static int del_rule_from_chain(struct ip_chain *chainptr,
1016                                struct ip_fwkernel *frwl)
1017 {
1018         struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
1019         int was_found;
1020
1021         FWC_HAVE_LOCK(fwc_wlocks);
1022
1023         /* Sure, we should compare marks, but since the `ipfwadm'
1024          * script uses it for an unholy hack... well, life is easier
1025          * this way.  We also mask it out of the flags word. --PR */
1026         for (ltmp=NULL, was_found=0;
1027              !was_found && ftmp != NULL;
1028              ltmp = ftmp,ftmp = ftmp->next) {
1029                 if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr
1030                     || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
1031                     || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
1032                     || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
1033 #if 0
1034                     || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
1035 #else
1036                     || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS)
1037                         != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
1038 #endif
1039                     || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
1040                     || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
1041 #if 0
1042                     || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
1043 #endif
1044                     || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
1045                     || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
1046                     || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
1047                     || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
1048                     || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
1049                     || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
1050                         duprintf("del_rule_from_chain: mismatch:"
1051                                  "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
1052                                  "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
1053                                  "mark:%u/%u "
1054                                  "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
1055                                  "outputsize:%hu-%hu\n",
1056                                  ftmp->ipfw.fw_src.s_addr,
1057                                  frwl->ipfw.fw_src.s_addr,
1058                                  ftmp->ipfw.fw_dst.s_addr,
1059                                  frwl->ipfw.fw_dst.s_addr,
1060                                  ftmp->ipfw.fw_smsk.s_addr,
1061                                  frwl->ipfw.fw_smsk.s_addr,
1062                                  ftmp->ipfw.fw_dmsk.s_addr,
1063                                  frwl->ipfw.fw_dmsk.s_addr,
1064                                  ftmp->ipfw.fw_flg,
1065                                  frwl->ipfw.fw_flg,
1066                                  ftmp->ipfw.fw_invflg,
1067                                  frwl->ipfw.fw_invflg,
1068                                  ftmp->ipfw.fw_proto,
1069                                  frwl->ipfw.fw_proto,
1070                                  ftmp->ipfw.fw_mark,
1071                                  frwl->ipfw.fw_mark,
1072                                  ftmp->ipfw.fw_spts[0],
1073                                  frwl->ipfw.fw_spts[0],
1074                                  ftmp->ipfw.fw_spts[1],
1075                                  frwl->ipfw.fw_spts[1],
1076                                  ftmp->ipfw.fw_dpts[0],
1077                                  frwl->ipfw.fw_dpts[0],
1078                                  ftmp->ipfw.fw_dpts[1],
1079                                  frwl->ipfw.fw_dpts[1],
1080                                  ftmp->ipfw.fw_outputsize,
1081                                  frwl->ipfw.fw_outputsize);
1082                         continue;
1083                 }
1084
1085                 if (strncmp(ftmp->ipfw.fw_vianame,
1086                             frwl->ipfw.fw_vianame,
1087                             IFNAMSIZ)) {
1088                         duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
1089                                  ftmp->ipfw.fw_vianame,
1090                                  frwl->ipfw.fw_vianame);
1091                         continue;
1092                 }
1093                 if (ftmp->branch != frwl->branch) {
1094                         duprintf("del_rule_from_chain: branch mismatch: "
1095                                  "%s/%s\n",
1096                                  ftmp->branch?ftmp->branch->label:"(null)",
1097                                  frwl->branch?frwl->branch->label:"(null)");
1098                         continue;
1099                 }
1100                 if (ftmp->branch == NULL
1101                     && ftmp->simplebranch != frwl->simplebranch) {
1102                         duprintf("del_rule_from_chain: simplebranch mismatch: "
1103                                  "%i/%i\n",
1104                                  ftmp->simplebranch, frwl->simplebranch);
1105                         continue;
1106                 }
1107                 was_found = 1;
1108                 if (ftmp->branch)
1109                         ftmp->branch->refcount--;
1110                 if (ltmp)
1111                         ltmp->next = ftmp->next;
1112                 else
1113                         chainptr->chain = ftmp->next;
1114                 kfree(ftmp);
1115                 /* We will block in cleanup's unregister sockopt if unloaded,
1116                    so this is safe. */
1117                 module_put(THIS_MODULE);
1118                 break;
1119         }
1120
1121         if (was_found)
1122                 return 0;
1123         else {
1124                 duprintf("del_rule_from_chain: no matching rule found\n");
1125                 return EINVAL;
1126         }
1127 }
1128
1129 /* This function takes the label of a chain and deletes the first
1130  * chain with that name.  No special cases required for the built in
1131  * chains as they have their refcount initilised to 1 so that they are
1132  * never deleted.  */
1133 static int del_chain(ip_chainlabel label)
1134 {
1135         struct ip_chain *tmp,*tmp2;
1136
1137         FWC_HAVE_LOCK(fwc_wlocks);
1138         /* Corner case: return EBUSY not ENOENT for first elem ("input") */
1139         if (strcmp(label, ip_fw_chains->label) == 0)
1140                 return EBUSY;
1141
1142         for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1143                 if(strcmp(tmp->next->label,label) == 0)
1144                         break;
1145
1146         tmp2 = tmp->next;
1147         if (!tmp2)
1148                 return ENOENT;
1149
1150         if (tmp2->refcount)
1151                 return EBUSY;
1152
1153         if (tmp2->chain)
1154                 return ENOTEMPTY;
1155
1156         tmp->next = tmp2->next;
1157         kfree(tmp2);
1158
1159         /* We will block in cleanup's unregister sockopt if unloaded,
1160            so this is safe. */
1161         module_put(THIS_MODULE);
1162         return 0;
1163 }
1164
1165 /* This is a function to initilise a chain.  Built in rules start with
1166  * refcount = 1 so that they cannot be deleted.  User defined rules
1167  * start with refcount = 0 so they can be deleted. */
1168 static struct ip_chain *ip_init_chain(ip_chainlabel name,
1169                                       __u32 ref,
1170                                       int policy)
1171 {
1172         unsigned int i;
1173         struct ip_chain *label
1174                 = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
1175         if (label == NULL)
1176                 panic("Can't kmalloc for firewall chains.\n");
1177         strcpy(label->label,name);
1178         label->next = NULL;
1179         label->chain = NULL;
1180         label->refcount = ref;
1181         label->policy = policy;
1182         for (i = 0; i < NUM_SLOTS; i++) {
1183                 label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
1184                         = 0;
1185                 label->reent[i].prevchain = NULL;
1186                 label->reent[i].prevrule = NULL;
1187         }
1188
1189         return label;
1190 }
1191
1192 /* This is a function for reating a new chain.  The chains is not
1193  * created if a chain of the same name already exists */
1194 static int create_chain(ip_chainlabel label)
1195 {
1196         struct ip_chain *tmp;
1197
1198         if (!check_label(label))
1199                 return EINVAL;
1200
1201         FWC_HAVE_LOCK(fwc_wlocks);
1202         for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1203                 if (strcmp(tmp->label,label) == 0)
1204                         return EEXIST;
1205
1206         if (strcmp(tmp->label,label) == 0)
1207                 return EEXIST;
1208
1209         /* Are we unloading now?  We will block on nf_unregister_sockopt */
1210         if (!try_module_get(THIS_MODULE))
1211                 return ENOPROTOOPT;
1212
1213         tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
1214                                               * zero since this is a
1215                                               * user defined chain *
1216                                               * and therefore can be
1217                                               * deleted */
1218         return 0;
1219 }
1220
1221 /* This function simply changes the policy on one of the built in
1222  * chains.  checking must be done before this is call to ensure that
1223  * chainptr is pointing to one of the three possible chains */
1224 static int change_policy(struct ip_chain *chainptr, int policy)
1225 {
1226         FWC_HAVE_LOCK(fwc_wlocks);
1227         chainptr->policy = policy;
1228         return 0;
1229 }
1230
1231 /* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
1232  * performs some checks in the structure. */
1233 static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
1234 {
1235         struct ip_fwkernel *fwkern;
1236
1237         if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
1238                 duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
1239                          fwuser->ipfw.fw_flg);
1240                 *errno = EINVAL;
1241                 return NULL;
1242         }
1243
1244 #ifdef DEBUG_IP_FIREWALL_USER
1245         /* These are sanity checks that don't really matter.
1246          * We can get rid of these once testing is complete.
1247          */
1248         if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1249             && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1250                 || fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
1251                 duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
1252                 *errno = EINVAL;
1253                 return NULL;
1254         }
1255
1256         if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
1257             && fwuser->ipfw.fw_redirpt != 0) {
1258                 duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
1259                 *errno = EINVAL;
1260                 return NULL;
1261         }
1262
1263         if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1264              && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
1265             || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1266                 && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
1267                 duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
1268                 *errno = EINVAL;
1269                 return NULL;
1270         }
1271
1272         if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT)
1273              && fwuser->ipfw.fw_spts[0] == 0
1274              && fwuser->ipfw.fw_spts[1] == 0xFFFF)
1275             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT)
1276                 && fwuser->ipfw.fw_dpts[0] == 0
1277                 && fwuser->ipfw.fw_dpts[1] == 0xFFFF)
1278             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA)
1279                 && (fwuser->ipfw.fw_vianame)[0] == '\0')
1280             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
1281                 && fwuser->ipfw.fw_smsk.s_addr == 0)
1282             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
1283                 && fwuser->ipfw.fw_dmsk.s_addr == 0)) {
1284                 duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
1285                 *errno = EINVAL;
1286                 return NULL;
1287         }
1288
1289         if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1290             && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
1291             && (fwuser->ipfw.fw_spts[0] != 0
1292                 || fwuser->ipfw.fw_spts[1] != 0xFFFF
1293                 || fwuser->ipfw.fw_dpts[0] != 0
1294                 || fwuser->ipfw.fw_dpts[1] != 0xFFFF
1295                 || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
1296                 duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
1297                 *errno = EINVAL;
1298                 return NULL;
1299         }
1300 #endif
1301
1302         if ((fwuser->ipfw.fw_spts[0] != 0
1303              || fwuser->ipfw.fw_spts[1] != 0xFFFF
1304              || fwuser->ipfw.fw_dpts[0] != 0
1305              || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
1306             && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1307                 || (fwuser->ipfw.fw_proto != IPPROTO_TCP
1308                     && fwuser->ipfw.fw_proto != IPPROTO_UDP
1309                     && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
1310                 duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
1311                 *errno = EINVAL;
1312                 return NULL;
1313         }
1314
1315         fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_ATOMIC);
1316         if (!fwkern) {
1317                 duprintf("convert_ipfw: kmalloc failed!\n");
1318                 *errno = ENOMEM;
1319                 return NULL;
1320         }
1321         memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
1322
1323         if (!find_special(fwuser->label, &fwkern->simplebranch)) {
1324                 fwkern->branch = find_label(fwuser->label);
1325                 if (!fwkern->branch) {
1326                         duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
1327                                  fwuser->label);
1328                         kfree(fwkern);
1329                         *errno = ENOENT;
1330                         return NULL;
1331                 } else if (fwkern->branch == IP_FW_INPUT_CHAIN
1332                            || fwkern->branch == IP_FW_FORWARD_CHAIN
1333                            || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
1334                         duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
1335                                  fwuser->label);
1336                         kfree(fwkern);
1337                         *errno = ENOENT;
1338                         return NULL;
1339                 }
1340         } else
1341                 fwkern->branch = NULL;
1342         memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
1343
1344         /* Handle empty vianame by making it a wildcard */
1345         if ((fwkern->ipfw.fw_vianame)[0] == '\0')
1346             fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
1347
1348         fwkern->next = NULL;
1349         return fwkern;
1350 }
1351
1352 int ip_fw_ctl(int cmd, void *m, int len)
1353 {
1354         int ret;
1355         struct ip_chain *chain;
1356         unsigned long flags;
1357
1358         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1359
1360         switch (cmd) {
1361         case IP_FW_FLUSH:
1362                 if (len != sizeof(ip_chainlabel) || !check_label(m))
1363                         ret = EINVAL;
1364                 else if ((chain = find_label(m)) == NULL)
1365                         ret = ENOENT;
1366                 else ret = clear_fw_chain(chain);
1367                 break;
1368
1369         case IP_FW_ZERO:
1370                 if (len != sizeof(ip_chainlabel) || !check_label(m))
1371                         ret = EINVAL;
1372                 else if ((chain = find_label(m)) == NULL)
1373                         ret = ENOENT;
1374                 else ret = zero_fw_chain(chain);
1375                 break;
1376
1377         case IP_FW_CHECK: {
1378                 struct ip_fwtest *new = m;
1379                 struct iphdr *ip;
1380
1381                 /* Don't need write lock. */
1382                 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1383
1384                 if (len != sizeof(struct ip_fwtest) || !check_label(m))
1385                         return EINVAL;
1386
1387                 /* Need readlock to do find_label */
1388                 FWC_READ_LOCK(&ip_fw_lock);
1389
1390                 if ((chain = find_label(new->fwt_label)) == NULL)
1391                         ret = ENOENT;
1392                 else {
1393                         struct sk_buff *tmp_skb;
1394                         int hdrlen;
1395
1396                         hdrlen = sizeof(struct ip_fwpkt) -
1397                                 sizeof(struct in_addr) -
1398                                 IFNAMSIZ;
1399
1400                         ip = &(new->fwt_packet.fwp_iph);
1401
1402                         /* Fix this one up by hand, who knows how many
1403                          * tools will break if we start to barf on this.
1404                          */
1405                         if (ntohs(ip->tot_len) > hdrlen)
1406                                 ip->tot_len = htons(hdrlen);
1407
1408                         if (ip->ihl != sizeof(struct iphdr) / sizeof(u32)) {
1409                                 duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
1410                                          ip->ihl,
1411                                          sizeof(struct iphdr) / sizeof(u32));
1412                                 ret = EINVAL;
1413                         } else if ((tmp_skb = alloc_skb(hdrlen,
1414                                                         GFP_ATOMIC)) == NULL) {
1415                                 duprintf("ip_fw_ctl: tmp_skb alloc failure\n");
1416                                 ret = EFAULT;
1417                         } else {
1418                                 skb_reserve(tmp_skb, hdrlen);
1419                                 skb_push(tmp_skb, hdrlen);
1420                                 memcpy(tmp_skb->data, ip, hdrlen);
1421                                 tmp_skb->nh.raw =
1422                                         (unsigned char *) tmp_skb->data;
1423                                 ret = ip_fw_check(new->fwt_packet.fwp_vianame,
1424                                                   NULL, chain,
1425                                                   &tmp_skb, SLOT_NUMBER(), 1);
1426                                 kfree_skb(tmp_skb);
1427                                 switch (ret) {
1428                                 case FW_ACCEPT:
1429                                         ret = 0; break;
1430                                 case FW_REDIRECT:
1431                                         ret = ECONNABORTED; break;
1432                                 case FW_MASQUERADE:
1433                                         ret = ECONNRESET; break;
1434                                 case FW_REJECT:
1435                                         ret = ECONNREFUSED; break;
1436                                         /* Hack to help diag; these only get
1437                                            returned when testing. */
1438                                 case FW_SKIP+1:
1439                                         ret = ELOOP; break;
1440                                 case FW_SKIP:
1441                                         ret = ENFILE; break;
1442                                 default: /* FW_BLOCK */
1443                                         ret = ETIMEDOUT; break;
1444                                 }
1445                         }
1446                 }
1447                 FWC_READ_UNLOCK(&ip_fw_lock);
1448                 return ret;
1449         }
1450
1451         case IP_FW_MASQ_TIMEOUTS: {
1452                 ret = ip_fw_masq_timeouts(m, len);
1453         }
1454         break;
1455
1456         case IP_FW_REPLACE: {
1457                 struct ip_fwkernel *ip_fwkern;
1458                 struct ip_fwnew *new = m;
1459
1460                 if (len != sizeof(struct ip_fwnew)
1461                     || !check_label(new->fwn_label))
1462                         ret = EINVAL;
1463                 else if ((chain = find_label(new->fwn_label)) == NULL)
1464                         ret = ENOENT;
1465                 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1466                          != NULL)
1467                         ret = replace_in_chain(chain, ip_fwkern,
1468                                                new->fwn_rulenum);
1469         }
1470         break;
1471
1472         case IP_FW_APPEND: {
1473                 struct ip_fwchange *new = m;
1474                 struct ip_fwkernel *ip_fwkern;
1475
1476                 if (len != sizeof(struct ip_fwchange)
1477                     || !check_label(new->fwc_label))
1478                         ret = EINVAL;
1479                 else if ((chain = find_label(new->fwc_label)) == NULL)
1480                         ret = ENOENT;
1481                 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1482                          != NULL)
1483                         ret = append_to_chain(chain, ip_fwkern);
1484         }
1485         break;
1486
1487         case IP_FW_INSERT: {
1488                 struct ip_fwkernel *ip_fwkern;
1489                 struct ip_fwnew *new = m;
1490
1491                 if (len != sizeof(struct ip_fwnew)
1492                     || !check_label(new->fwn_label))
1493                         ret = EINVAL;
1494                 else if ((chain = find_label(new->fwn_label)) == NULL)
1495                         ret = ENOENT;
1496                 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1497                          != NULL)
1498                         ret = insert_in_chain(chain, ip_fwkern,
1499                                               new->fwn_rulenum);
1500         }
1501         break;
1502
1503         case IP_FW_DELETE: {
1504                 struct ip_fwchange *new = m;
1505                 struct ip_fwkernel *ip_fwkern;
1506
1507                 if (len != sizeof(struct ip_fwchange)
1508                     || !check_label(new->fwc_label))
1509                         ret = EINVAL;
1510                 else if ((chain = find_label(new->fwc_label)) == NULL)
1511                         ret = ENOENT;
1512                 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1513                          != NULL) {
1514                         ret = del_rule_from_chain(chain, ip_fwkern);
1515                         kfree(ip_fwkern);
1516                 }
1517         }
1518         break;
1519
1520         case IP_FW_DELETE_NUM: {
1521                 struct ip_fwdelnum *new = m;
1522
1523                 if (len != sizeof(struct ip_fwdelnum)
1524                     || !check_label(new->fwd_label))
1525                         ret = EINVAL;
1526                 else if ((chain = find_label(new->fwd_label)) == NULL)
1527                         ret = ENOENT;
1528                 else ret = del_num_from_chain(chain, new->fwd_rulenum);
1529         }
1530         break;
1531
1532         case IP_FW_CREATECHAIN: {
1533                 if (len != sizeof(ip_chainlabel)) {
1534                         duprintf("create_chain: bad size %i\n", len);
1535                         ret = EINVAL;
1536                 }
1537                 else ret = create_chain(m);
1538         }
1539         break;
1540
1541         case IP_FW_DELETECHAIN: {
1542                 if (len != sizeof(ip_chainlabel)) {
1543                         duprintf("delete_chain: bad size %i\n", len);
1544                         ret = EINVAL;
1545                 }
1546                 else ret = del_chain(m);
1547         }
1548         break;
1549
1550         case IP_FW_POLICY: {
1551                 struct ip_fwpolicy *new = m;
1552
1553                 if (len != sizeof(struct ip_fwpolicy)
1554                     || !check_label(new->fwp_label))
1555                         ret = EINVAL;
1556                 else if ((chain = find_label(new->fwp_label)) == NULL)
1557                         ret = ENOENT;
1558                 else if (chain != IP_FW_INPUT_CHAIN
1559                          && chain != IP_FW_FORWARD_CHAIN
1560                          && chain != IP_FW_OUTPUT_CHAIN) {
1561                         duprintf("change_policy: can't change policy on user"
1562                                  " defined chain.\n");
1563                         ret = EINVAL;
1564                 }
1565                 else {
1566                         int pol = FW_SKIP;
1567                         find_special(new->fwp_policy, &pol);
1568
1569                         switch(pol) {
1570                         case FW_MASQUERADE:
1571                                 if (chain != IP_FW_FORWARD_CHAIN) {
1572                                         ret = EINVAL;
1573                                         break;
1574                                 }
1575                                 /* Fall thru... */
1576                         case FW_BLOCK:
1577                         case FW_ACCEPT:
1578                         case FW_REJECT:
1579                                 ret = change_policy(chain, pol);
1580                                 break;
1581                         default:
1582                                 duprintf("change_policy: bad policy `%s'\n",
1583                                          new->fwp_policy);
1584                                 ret = EINVAL;
1585                         }
1586                 }
1587                 break;
1588         }
1589         default:
1590                 duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
1591                 ret = ENOPROTOOPT;
1592         }
1593
1594         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1595         return ret;
1596 }
1597
1598 /* Returns bytes used - doesn't NUL terminate */
1599 static int dump_rule(char *buffer,
1600                      const char *chainlabel,
1601                      const struct ip_fwkernel *rule)
1602 {
1603         int len;
1604         unsigned int i;
1605         __u64 packets = 0, bytes = 0;
1606
1607         FWC_HAVE_LOCK(fwc_wlocks);
1608         for (i = 0; i < NUM_SLOTS; i++) {
1609                 packets += rule->counters[i].pcnt;
1610                 bytes += rule->counters[i].bcnt;
1611         }
1612
1613         len=sprintf(buffer,
1614                     "%9s "                      /* Chain name */
1615                     "%08X/%08X->%08X/%08X "     /* Source & Destination IPs */
1616                     "%.16s "                    /* Interface */
1617                     "%X %X "                    /* fw_flg and fw_invflg fields */
1618                     "%u "                       /* Protocol */
1619                     "%-9u %-9u %-9u %-9u "      /* Packet & byte counters */
1620                     "%u-%u %u-%u "              /* Source & Dest port ranges */
1621                     "A%02X X%02X "              /* TOS and and xor masks */
1622                     "%08X "                     /* Redirection port */
1623                     "%u "                       /* fw_mark field */
1624                     "%u "                       /* output size */
1625                     "%9s\n",                    /* Target */
1626                     chainlabel,
1627                     ntohl(rule->ipfw.fw_src.s_addr),
1628                     ntohl(rule->ipfw.fw_smsk.s_addr),
1629                     ntohl(rule->ipfw.fw_dst.s_addr),
1630                     ntohl(rule->ipfw.fw_dmsk.s_addr),
1631                     (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
1632                     rule->ipfw.fw_flg,
1633                     rule->ipfw.fw_invflg,
1634                     rule->ipfw.fw_proto,
1635                     (__u32)(packets >> 32), (__u32)packets,
1636                     (__u32)(bytes >> 32), (__u32)bytes,
1637                     rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
1638                     rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1],
1639                     rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor,
1640                     rule->ipfw.fw_redirpt,
1641                     rule->ipfw.fw_mark,
1642                     rule->ipfw.fw_outputsize,
1643                     branchname(rule->branch,rule->simplebranch));
1644
1645         duprintf("dump_rule: %i bytes done.\n", len);
1646         return len;
1647 }
1648
1649 /* File offset is actually in records, not bytes. */
1650 static int ip_chain_procinfo(char *buffer, char **start,
1651                              off_t offset, int length)
1652 {
1653         struct ip_chain *i;
1654         struct ip_fwkernel *j = ip_fw_chains->chain;
1655         unsigned long flags;
1656         int len = 0;
1657         int last_len = 0;
1658         off_t upto = 0;
1659
1660         duprintf("Offset starts at %lu\n", offset);
1661         duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
1662
1663         /* Need a write lock to lock out ``readers'' which update counters. */
1664         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1665
1666         for (i = ip_fw_chains; i; i = i->next) {
1667             for (j = i->chain; j; j = j->next) {
1668                 if (upto == offset) break;
1669                 duprintf("Skipping rule in chain `%s'\n",
1670                          i->label);
1671                 upto++;
1672             }
1673             if (upto == offset) break;
1674         }
1675
1676         /* Don't init j first time, or once i = NULL */
1677         for (; i; (void)((i = i->next) && (j = i->chain))) {
1678                 duprintf("Dumping chain `%s'\n", i->label);
1679                 for (; j; j = j->next, upto++, last_len = len)
1680                 {
1681                         len += dump_rule(buffer+len, i->label, j);
1682                         if (len > length) {
1683                                 duprintf("Dumped to %i (past %i).  "
1684                                          "Moving back to %i.\n",
1685                                          len, length, last_len);
1686                                 len = last_len;
1687                                 goto outside;
1688                         }
1689                 }
1690         }
1691 outside:
1692         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1693         buffer[len] = '\0';
1694
1695         duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
1696                  len, length, upto);
1697         /* `start' hack - see fs/proc/generic.c line ~165 */
1698         *start=(char *)((unsigned int)upto-offset);
1699         return len;
1700 }
1701
1702 static int ip_chain_name_procinfo(char *buffer, char **start,
1703                                   off_t offset, int length)
1704 {
1705         struct ip_chain *i;
1706         int len = 0,last_len = 0;
1707         off_t pos = 0,begin = 0;
1708         unsigned long flags;
1709
1710         /* Need a write lock to lock out ``readers'' which update counters. */
1711         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1712
1713         for (i = ip_fw_chains; i; i = i->next)
1714         {
1715                 unsigned int j;
1716                 __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
1717
1718                 for (j = 0; j < NUM_SLOTS; j++) {
1719                         packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
1720                         packetsHi += ((i->reent[j].counters.pcnt >> 32)
1721                                       & 0xFFFFFFFF);
1722                         bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
1723                         bytesHi += ((i->reent[j].counters.bcnt >> 32)
1724                                     & 0xFFFFFFFF);
1725                 }
1726
1727                 /* print the label and the policy */
1728                 len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
1729                              i->label,branchname(NULL, i->policy),i->refcount,
1730                              packetsHi, packetsLo, bytesHi, bytesLo);
1731                 pos=begin+len;
1732                 if(pos<offset) {
1733                         len=0;
1734                         begin=pos;
1735                 }
1736                 else if(pos>offset+length) {
1737                         len = last_len;
1738                         break;
1739                 }
1740
1741                 last_len = len;
1742         }
1743         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1744
1745         *start = buffer+(offset-begin);
1746         len-=(offset-begin);
1747         if(len>length)
1748                 len=length;
1749         return len;
1750 }
1751
1752 /*
1753  *      Interface to the generic firewall chains.
1754  */
1755 int ipfw_input_check(struct firewall_ops *this, int pf,
1756                      struct net_device *dev, void *arg,
1757                      struct sk_buff **pskb)
1758 {
1759         return ip_fw_check(dev->name,
1760                            arg, IP_FW_INPUT_CHAIN, pskb, SLOT_NUMBER(), 0);
1761 }
1762
1763 int ipfw_output_check(struct firewall_ops *this, int pf,
1764                       struct net_device *dev, void *arg,
1765                       struct sk_buff **pskb)
1766 {
1767         /* Locally generated bogus packets by root. <SIGH>. */
1768         if ((*pskb)->len < sizeof(struct iphdr) ||
1769             (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
1770                 return FW_ACCEPT;
1771         return ip_fw_check(dev->name,
1772                            arg, IP_FW_OUTPUT_CHAIN, pskb, SLOT_NUMBER(), 0);
1773 }
1774
1775 int ipfw_forward_check(struct firewall_ops *this, int pf,
1776                        struct net_device *dev, void *arg,
1777                        struct sk_buff **pskb)
1778 {
1779         return ip_fw_check(dev->name,
1780                            arg, IP_FW_FORWARD_CHAIN, pskb, SLOT_NUMBER(), 0);
1781 }
1782
1783 struct firewall_ops ipfw_ops = {
1784         .fw_forward     =       ipfw_forward_check,
1785         .fw_input       =       ipfw_input_check,
1786         .fw_output      =       ipfw_output_check,
1787 };
1788
1789 int ipfw_init_or_cleanup(int init)
1790 {
1791         struct proc_dir_entry *proc;
1792         int ret = 0;
1793         unsigned long flags;
1794
1795         if (!init) goto cleanup;
1796
1797 #ifdef DEBUG_IP_FIREWALL_LOCKING
1798         fwc_wlocks = fwc_rlocks = 0;
1799 #endif
1800
1801 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1802         ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
1803         if (ipfwsk == NULL)
1804                 goto cleanup_nothing;
1805 #endif
1806
1807         ret = register_firewall(PF_INET, &ipfw_ops);
1808         if (ret < 0)
1809                 goto cleanup_netlink;
1810
1811         proc = proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR,
1812                                ip_chain_procinfo);
1813         if (proc) proc->owner = THIS_MODULE;
1814         proc = proc_net_create(IP_FW_PROC_CHAIN_NAMES,
1815                                S_IFREG | S_IRUSR | S_IWUSR,
1816                                ip_chain_name_procinfo);
1817         if (proc) proc->owner = THIS_MODULE;
1818
1819         IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
1820         IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
1821         IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
1822
1823         return ret;
1824
1825  cleanup:
1826         unregister_firewall(PF_INET, &ipfw_ops);
1827
1828         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1829         while (ip_fw_chains) {
1830                 struct ip_chain *next = ip_fw_chains->next;
1831
1832                 clear_fw_chain(ip_fw_chains);
1833                 kfree(ip_fw_chains);
1834                 ip_fw_chains = next;
1835         }
1836         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1837
1838         proc_net_remove(IP_FW_PROC_CHAINS);
1839         proc_net_remove(IP_FW_PROC_CHAIN_NAMES);
1840
1841  cleanup_netlink:
1842 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1843         sock_release(ipfwsk->sk_socket);
1844
1845  cleanup_nothing:
1846 #endif
1847         return ret;
1848 }