vserver 1.9.3
[linux-2.6.git] / net / ipv4 / netfilter / ipchains_core.c
1 #warning ipchains is obsolete, and will be removed soon.
2
3 /* Minor modifications to fit on compatibility framework:
4    Rusty.Russell@rustcorp.com.au
5 */
6
7 /*
8  * This code is heavily based on the code on the old ip_fw.c code; see below for
9  * copyrights and attributions of the old code.  This code is basically GPL.
10  *
11  * 15-Aug-1997: Major changes to allow graphs for firewall rules.
12  *              Paul Russell <Paul.Russell@rustcorp.com.au> and
13  *              Michael Neuling <Michael.Neuling@rustcorp.com.au>
14  * 24-Aug-1997: Generalised protocol handling (not just TCP/UDP/ICMP).
15  *              Added explicit RETURN from chains.
16  *              Removed TOS mangling (done in ipchains 1.0.1).
17  *              Fixed read & reset bug by reworking proc handling.
18  *              Paul Russell <Paul.Russell@rustcorp.com.au>
19  * 28-Sep-1997: Added packet marking for net sched code.
20  *              Removed fw_via comparisons: all done on device name now,
21  *              similar to changes in ip_fw.c in DaveM's CVS970924 tree.
22  *              Paul Russell <Paul.Russell@rustcorp.com.au>
23  * 2-Nov-1997:  Moved types across to __u16, etc.
24  *              Added inverse flags.
25  *              Fixed fragment bug (in args to port_match).
26  *              Changed mark to only one flag (MARKABS).
27  * 21-Nov-1997: Added ability to test ICMP code.
28  * 19-Jan-1998: Added wildcard interfaces.
29  * 6-Feb-1998:  Merged 2.0 and 2.1 versions.
30  *              Initialised ip_masq for 2.0.x version.
31  *              Added explicit NETLINK option for 2.1.x version.
32  *              Added packet and byte counters for policy matches.
33  * 26-Feb-1998: Fixed race conditions, added SMP support.
34  * 18-Mar-1998: Fix SMP, fix race condition fix.
35  * 1-May-1998:  Remove caching of device pointer.
36  * 12-May-1998: Allow tiny fragment case for TCP/UDP.
37  * 15-May-1998: Treat short packets as fragments, don't just block.
38  * 3-Jan-1999:  Fixed serious procfs security hole -- users should never
39  *              be allowed to view the chains!
40  *              Marc Santoro <ultima@snicker.emoti.com>
41  * 29-Jan-1999: Locally generated bogus IPs dealt with, rather than crash
42  *              during dump_packet. --RR.
43  * 19-May-1999: Star Wars: The Phantom Menace opened.  Rule num
44  *              printed in log (modified from Michael Hasenstein's patch).
45  *              Added SYN in log message. --RR
46  * 23-Jul-1999: Fixed small fragment security exposure opened on 15-May-1998.
47  *              John McDonald <jm@dataprotect.com>
48  *              Thomas Lopatic <tl@dataprotect.com>
49  */
50
51 /*
52  *
53  * The origina Linux port was done Alan Cox, with changes/fixes from
54  * Pauline Middlelink, Jos Vos, Thomas Quinot, Wouter Gadeyne, Juan
55  * Jose Ciarlante, Bernd Eckenfels, Keith Owens and others.
56  *
57  * Copyright from the original FreeBSD version follows:
58  *
59  * Copyright (c) 1993 Daniel Boulet
60  * Copyright (c) 1994 Ugen J.S.Antsilevich
61  *
62  * Redistribution and use in source forms, with and without modification,
63  * are permitted provided that this entire comment appears intact.
64  *
65  * Redistribution in binary form may occur without any restrictions.
66  * Obviously, it would be nice if you gave credit where credit is due
67  * but requiring it would be too onerous.
68  *
69  * This software is provided ``AS IS'' without any warranties of any kind.  */
70
71 #include <linux/config.h>
72
73 #include <asm/uaccess.h>
74 #include <asm/system.h>
75 #include <linux/types.h>
76 #include <linux/sched.h>
77 #include <linux/string.h>
78 #include <linux/errno.h>
79 #include <linux/module.h>
80
81 #include <linux/socket.h>
82 #include <linux/sockios.h>
83 #include <linux/in.h>
84 #include <linux/inet.h>
85 #include <linux/netdevice.h>
86 #include <linux/icmp.h>
87 #include <linux/udp.h>
88 #include <net/ip.h>
89 #include <net/protocol.h>
90 #include <net/route.h>
91 #include <net/tcp.h>
92 #include <net/udp.h>
93 #include <net/sock.h>
94 #include <net/icmp.h>
95 #include <linux/netlink.h>
96 #include <linux/netfilter.h>
97 #include <linux/netfilter_ipv4/compat_firewall.h>
98 #include <linux/netfilter_ipv4/ipchains_core.h>
99 #include <linux/netfilter_ipv4/ip_nat_core.h>
100
101 #include <net/checksum.h>
102 #include <linux/proc_fs.h>
103 #include <linux/stat.h>
104
105 MODULE_LICENSE("Dual BSD/GPL");
106 MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
107 MODULE_DESCRIPTION("ipchains backwards compatibility layer");
108
109 /* Understanding locking in this code: (thanks to Alan Cox for using
110  * little words to explain this to me). -- PR
111  *
112  * In UP, there can be two packets traversing the chains:
113  * 1) A packet from the current userspace context
114  * 2) A packet off the bh handlers (timer or net).
115  *
116  * For SMP (kernel v2.1+), multiply this by # CPUs.
117  *
118  * [Note that this in not correct for 2.2 - because the socket code always
119  *  uses lock_kernel() to serialize, and bottom halves (timers and net_bhs)
120  *  only run on one CPU at a time.  This will probably change for 2.3.
121  *  It is still good to use spinlocks because that avoids the global cli()
122  *  for updating the tables, which is rather costly in SMP kernels -AK]
123  *
124  * This means counters and backchains can get corrupted if no precautions
125  * are taken.
126  *
127  * To actually alter a chain on UP, we need only do a cli(), as this will
128  * stop a bh handler firing, as we are in the current userspace context
129  * (coming from a setsockopt()).
130  *
131  * On SMP, we need a write_lock_irqsave(), which is a simple cli() in
132  * UP.
133  *
134  * For backchains and counters, we use an array, indexed by
135  * [smp_processor_id()*2 + !in_interrupt()]; the array is of
136  * size [NR_CPUS*2].  For v2.0, NR_CPUS is effectively 1.  So,
137  * confident of uniqueness, we modify counters even though we only
138  * have a read lock (to read the counters, you need a write lock,
139  * though).  */
140
141 /* Why I didn't use straight locking... -- PR
142  *
143  * The backchains can be separated out of the ip_chains structure, and
144  * allocated as needed inside ip_fw_check().
145  *
146  * The counters, however, can't.  Trying to lock these means blocking
147  * interrupts every time we want to access them.  This would suck HARD
148  * performance-wise.  Not locking them leads to possible corruption,
149  * made worse on 32-bit machines (counters are 64-bit).  */
150
151 /*#define DEBUG_IP_FIREWALL*/
152 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
153 /*#define DEBUG_IP_FIREWALL_USER*/
154 /*#define DEBUG_IP_FIREWALL_LOCKING*/
155
156 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
157 static struct sock *ipfwsk;
158 #endif
159
160 #ifdef CONFIG_SMP
161 #define SLOT_NUMBER() (smp_processor_id()*2 + !in_interrupt())
162 #else /* !SMP */
163 #define SLOT_NUMBER() (!in_interrupt())
164 #endif /* CONFIG_SMP */
165 #define NUM_SLOTS (NR_CPUS*2)
166
167 #define SIZEOF_STRUCT_IP_CHAIN (sizeof(struct ip_chain) \
168                                 + NUM_SLOTS*sizeof(struct ip_reent))
169 #define SIZEOF_STRUCT_IP_FW_KERNEL (sizeof(struct ip_fwkernel) \
170                                     + NUM_SLOTS*sizeof(struct ip_counters))
171
172 #ifdef DEBUG_IP_FIREWALL_LOCKING
173 static unsigned int fwc_rlocks, fwc_wlocks;
174 #define FWC_DEBUG_LOCK(d)                       \
175 do {                                            \
176         FWC_DONT_HAVE_LOCK(d);                  \
177         d |= (1 << SLOT_NUMBER());              \
178 } while (0)
179
180 #define FWC_DEBUG_UNLOCK(d)                     \
181 do {                                            \
182         FWC_HAVE_LOCK(d);                       \
183         d &= ~(1 << SLOT_NUMBER());             \
184 } while (0)
185
186 #define FWC_DONT_HAVE_LOCK(d)                                   \
187 do {                                                            \
188         if ((d) & (1 << SLOT_NUMBER()))                         \
189                 printk("%s:%i: Got lock on %i already!\n",      \
190                        __FILE__, __LINE__, SLOT_NUMBER());      \
191 } while(0)
192
193 #define FWC_HAVE_LOCK(d)                                \
194 do {                                                    \
195         if (!((d) & (1 << SLOT_NUMBER())))              \
196         printk("%s:%i:No lock on %i!\n",                \
197                __FILE__, __LINE__, SLOT_NUMBER());      \
198 } while (0)
199
200 #else
201 #define FWC_DEBUG_LOCK(d) do { } while(0)
202 #define FWC_DEBUG_UNLOCK(d) do { } while(0)
203 #define FWC_DONT_HAVE_LOCK(d) do { } while(0)
204 #define FWC_HAVE_LOCK(d) do { } while(0)
205 #endif /*DEBUG_IP_FIRWALL_LOCKING*/
206
207 #define FWC_READ_LOCK(l) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock(l); } while (0)
208 #define FWC_WRITE_LOCK(l) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock(l); } while (0)
209 #define FWC_READ_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_rlocks); read_lock_irqsave(l,f); } while (0)
210 #define FWC_WRITE_LOCK_IRQ(l,f) do { FWC_DEBUG_LOCK(fwc_wlocks); write_lock_irqsave(l,f); } while (0)
211 #define FWC_READ_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock(l); } while (0)
212 #define FWC_WRITE_UNLOCK(l) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock(l); } while (0)
213 #define FWC_READ_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_rlocks); read_unlock_irqrestore(l,f); } while (0)
214 #define FWC_WRITE_UNLOCK_IRQ(l,f) do { FWC_DEBUG_UNLOCK(fwc_wlocks); write_unlock_irqrestore(l,f); } while (0)
215
216 struct ip_chain;
217
218 struct ip_counters
219 {
220         __u64 pcnt, bcnt;                       /* Packet and byte counters */
221 };
222
223 struct ip_fwkernel
224 {
225         struct ip_fw ipfw;
226         struct ip_fwkernel *next;       /* where to go next if current
227                                          * rule doesn't match */
228         struct ip_chain *branch;        /* which branch to jump to if
229                                          * current rule matches */
230         int simplebranch;               /* Use this if branch == NULL */
231         struct ip_counters counters[0]; /* Actually several of these */
232 };
233
234 struct ip_reent
235 {
236         struct ip_chain *prevchain;     /* Pointer to referencing chain */
237         struct ip_fwkernel *prevrule;   /* Pointer to referencing rule */
238         struct ip_counters counters;
239 };
240
241 struct ip_chain
242 {
243         ip_chainlabel label;        /* Defines the label for each block */
244         struct ip_chain *next;      /* Pointer to next block */
245         struct ip_fwkernel *chain;  /* Pointer to first rule in block */
246         __u32 refcount;             /* Number of refernces to block */
247         int policy;                 /* Default rule for chain.  Only *
248                                      * used in built in chains */
249         struct ip_reent reent[0];   /* Actually several of these */
250 };
251
252 /*
253  *      Implement IP packet firewall
254  */
255
256 #ifdef DEBUG_IP_FIREWALL
257 #define dprintf(format, args...)  printk(format , ## args)
258 #else
259 #define dprintf(format, args...)
260 #endif
261
262 #ifdef DEBUG_IP_FIREWALL_USER
263 #define duprintf(format, args...) printk(format , ## args)
264 #else
265 #define duprintf(format, args...)
266 #endif
267
268 /* Lock around ip_fw_chains linked list structure */
269 rwlock_t ip_fw_lock = RW_LOCK_UNLOCKED;
270
271 /* Head of linked list of fw rules */
272 static struct ip_chain *ip_fw_chains;
273
274 #define IP_FW_INPUT_CHAIN ip_fw_chains
275 #define IP_FW_FORWARD_CHAIN (ip_fw_chains->next)
276 #define IP_FW_OUTPUT_CHAIN (ip_fw_chains->next->next)
277
278 /* Returns 1 if the port is matched by the range, 0 otherwise */
279 extern inline int port_match(__u16 min, __u16 max, __u16 port,
280                              int frag, int invert)
281 {
282         if (frag) /* Fragments fail ANY port test. */
283                 return (min == 0 && max == 0xFFFF);
284         else return (port >= min && port <= max) ^ invert;
285 }
286
287 /* Returns whether matches rule or not. */
288 static int ip_rule_match(struct ip_fwkernel *f,
289                          const char *ifname,
290                          struct sk_buff **pskb,
291                          char tcpsyn,
292                          __u16 src_port, __u16 dst_port,
293                          char isfrag)
294 {
295         struct iphdr *ip = (*pskb)->nh.iph;
296
297 #define FWINV(bool,invflg) ((bool) ^ !!(f->ipfw.fw_invflg & invflg))
298         /*
299          *      This is a bit simpler as we don't have to walk
300          *      an interface chain as you do in BSD - same logic
301          *      however.
302          */
303
304         if (FWINV((ip->saddr&f->ipfw.fw_smsk.s_addr) != f->ipfw.fw_src.s_addr,
305                   IP_FW_INV_SRCIP)
306             || FWINV((ip->daddr&f->ipfw.fw_dmsk.s_addr)!=f->ipfw.fw_dst.s_addr,
307                      IP_FW_INV_DSTIP)) {
308                 dprintf("Source or dest mismatch.\n");
309
310                 dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
311                         f->ipfw.fw_smsk.s_addr, f->ipfw.fw_src.s_addr,
312                         f->ipfw.fw_invflg & IP_FW_INV_SRCIP ? " (INV)" : "");
313                 dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
314                         f->ipfw.fw_dmsk.s_addr, f->ipfw.fw_dst.s_addr,
315                         f->ipfw.fw_invflg & IP_FW_INV_DSTIP ? " (INV)" : "");
316                 return 0;
317         }
318
319         /*
320          *      Look for a VIA device match
321          */
322         if (f->ipfw.fw_flg & IP_FW_F_WILDIF) {
323             if (FWINV(strncmp(ifname, f->ipfw.fw_vianame,
324                               strlen(f->ipfw.fw_vianame)) != 0,
325                       IP_FW_INV_VIA)) {
326                 dprintf("Wildcard interface mismatch.%s\n",
327                         f->ipfw.fw_invflg & IP_FW_INV_VIA ? " (INV)" : "");
328                 return 0;       /* Mismatch */
329             }
330         }
331         else if (FWINV(strcmp(ifname, f->ipfw.fw_vianame) != 0,
332                        IP_FW_INV_VIA)) {
333             dprintf("Interface name does not match.%s\n",
334                     f->ipfw.fw_invflg & IP_FW_INV_VIA
335                     ? " (INV)" : "");
336             return 0;   /* Mismatch */
337         }
338
339         /*
340          *      Ok the chain addresses match.
341          */
342
343         /* If we have a fragment rule but the packet is not a fragment
344          * the we return zero */
345         if (FWINV((f->ipfw.fw_flg&IP_FW_F_FRAG) && !isfrag, IP_FW_INV_FRAG)) {
346                 dprintf("Fragment rule but not fragment.%s\n",
347                         f->ipfw.fw_invflg & IP_FW_INV_FRAG ? " (INV)" : "");
348                 return 0;
349         }
350
351         /* Fragment NEVER passes a SYN test, even an inverted one. */
352         if (FWINV((f->ipfw.fw_flg&IP_FW_F_TCPSYN) && !tcpsyn, IP_FW_INV_SYN)
353             || (isfrag && (f->ipfw.fw_flg&IP_FW_F_TCPSYN))) {
354                 dprintf("Rule requires SYN and packet has no SYN.%s\n",
355                         f->ipfw.fw_invflg & IP_FW_INV_SYN ? " (INV)" : "");
356                 return 0;
357         }
358
359         if (f->ipfw.fw_proto) {
360                 /*
361                  *      Specific firewall - packet's protocol
362                  *      must match firewall's.
363                  */
364
365                 if (FWINV(ip->protocol!=f->ipfw.fw_proto, IP_FW_INV_PROTO)) {
366                         dprintf("Packet protocol %hi does not match %hi.%s\n",
367                                 ip->protocol, f->ipfw.fw_proto,
368                                 f->ipfw.fw_invflg&IP_FW_INV_PROTO ? " (INV)":"");
369                         return 0;
370                 }
371
372                 /* For non TCP/UDP/ICMP, port range is max anyway. */
373                 if (!port_match(f->ipfw.fw_spts[0],
374                                 f->ipfw.fw_spts[1],
375                                 src_port, isfrag,
376                                 !!(f->ipfw.fw_invflg&IP_FW_INV_SRCPT))
377                     || !port_match(f->ipfw.fw_dpts[0],
378                                    f->ipfw.fw_dpts[1],
379                                    dst_port, isfrag,
380                                    !!(f->ipfw.fw_invflg
381                                       &IP_FW_INV_DSTPT))) {
382                     dprintf("Port match failed.\n");
383                     return 0;
384                 }
385         }
386
387         dprintf("Match succeeded.\n");
388         return 1;
389 }
390
391 static const char *branchname(struct ip_chain *branch,int simplebranch)
392 {
393         if (branch)
394                 return branch->label;
395         switch (simplebranch)
396         {
397         case FW_BLOCK: return IP_FW_LABEL_BLOCK;
398         case FW_ACCEPT: return IP_FW_LABEL_ACCEPT;
399         case FW_REJECT: return IP_FW_LABEL_REJECT;
400         case FW_REDIRECT: return IP_FW_LABEL_REDIRECT;
401         case FW_MASQUERADE: return IP_FW_LABEL_MASQUERADE;
402         case FW_SKIP: return "-";
403         case FW_SKIP+1: return IP_FW_LABEL_RETURN;
404         default:
405                 return "UNKNOWN";
406         }
407 }
408
409 /*
410  * VERY ugly piece of code which actually
411  * makes kernel printf for matching packets...
412  */
413 static void dump_packet(struct sk_buff **pskb,
414                         const char *ifname,
415                         struct ip_fwkernel *f,
416                         const ip_chainlabel chainlabel,
417                         __u16 src_port,
418                         __u16 dst_port,
419                         unsigned int count,
420                         int syn)
421 {
422         __u32 *opt = (__u32 *) ((*pskb)->nh.iph + 1);
423         int opti;
424
425         if (f) {
426                 printk(KERN_INFO "Packet log: %s ",chainlabel);
427                 printk("%s ",branchname(f->branch,f->simplebranch));
428                 if (f->simplebranch==FW_REDIRECT)
429                         printk("%d ",f->ipfw.fw_redirpt);
430         }
431
432         printk("%s PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
433                " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
434                ifname, (*pskb)->nh.iph->protocol,
435                NIPQUAD((*pskb)->nh.iph->saddr),
436                src_port,
437                NIPQUAD((*pskb)->nh.iph->daddr),
438                dst_port,
439                ntohs((*pskb)->nh.iph->tot_len),
440                (*pskb)->nh.iph->tos,
441                ntohs((*pskb)->nh.iph->id),
442                ntohs((*pskb)->nh.iph->frag_off),
443                (*pskb)->nh.iph->ttl);
444
445         for (opti = 0; opti < ((*pskb)->nh.iph->ihl - sizeof(struct iphdr) / 4); opti++)
446                 printk(" O=0x%8.8X", *opt++);
447         printk(" %s(#%d)\n", syn ? "SYN " : /* "PENANCE" */ "", count);
448 }
449
450 /* function for checking chain labels for user space. */
451 static int check_label(ip_chainlabel label)
452 {
453         unsigned int i;
454         /* strlen must be < IP_FW_MAX_LABEL_LENGTH. */
455         for (i = 0; i < IP_FW_MAX_LABEL_LENGTH + 1; i++)
456                 if (label[i] == '\0') return 1;
457
458         return 0;
459 }
460
461 /*      This function returns a pointer to the first chain with a label
462  *      that matches the one given. */
463 static struct ip_chain *find_label(ip_chainlabel label)
464 {
465         struct ip_chain *tmp;
466         FWC_HAVE_LOCK(fwc_rlocks | fwc_wlocks);
467         for (tmp = ip_fw_chains; tmp; tmp = tmp->next)
468                 if (strcmp(tmp->label,label) == 0)
469                         break;
470         return tmp;
471 }
472
473 /* This function returns a boolean which when true sets answer to one
474    of the FW_*. */
475 static int find_special(ip_chainlabel label, int *answer)
476 {
477         if (label[0] == '\0') {
478                 *answer = FW_SKIP; /* => pass-through rule */
479                 return 1;
480         } else if (strcmp(label,IP_FW_LABEL_ACCEPT) == 0) {
481                 *answer = FW_ACCEPT;
482                 return 1;
483         } else if (strcmp(label,IP_FW_LABEL_BLOCK) == 0) {
484                 *answer = FW_BLOCK;
485                 return 1;
486         } else if (strcmp(label,IP_FW_LABEL_REJECT) == 0) {
487                 *answer = FW_REJECT;
488                 return 1;
489         } else if (strcmp(label,IP_FW_LABEL_REDIRECT) == 0) {
490                 *answer = FW_REDIRECT;
491                 return 1;
492         } else if (strcmp(label,IP_FW_LABEL_MASQUERADE) == 0) {
493                 *answer = FW_MASQUERADE;
494                 return 1;
495         } else if (strcmp(label, IP_FW_LABEL_RETURN) == 0) {
496                 *answer = FW_SKIP+1;
497                 return 1;
498         } else {
499                 return 0;
500         }
501 }
502
503 /* This function cleans up the prevchain and prevrule.  If the verbose
504  * flag is set then he names of the chains will be printed as it
505  * cleans up.  */
506 static void cleanup(struct ip_chain *chain,
507                     const int verbose,
508                     unsigned int slot)
509 {
510         struct ip_chain *tmpchain = chain->reent[slot].prevchain;
511         if (verbose)
512                 printk(KERN_ERR "Chain backtrace: ");
513         while (tmpchain) {
514                 if (verbose)
515                         printk("%s<-",chain->label);
516                 chain->reent[slot].prevchain = NULL;
517                 chain = tmpchain;
518                 tmpchain = chain->reent[slot].prevchain;
519         }
520         if (verbose)
521                 printk("%s\n",chain->label);
522 }
523
524 static inline int
525 ip_fw_domatch(struct ip_fwkernel *f,
526               const char *rif,
527               const ip_chainlabel label,
528               struct sk_buff **pskb,
529               unsigned int slot,
530               __u16 src_port, __u16 dst_port,
531               unsigned int count,
532               int tcpsyn,
533               unsigned char *tos)
534 {
535         f->counters[slot].bcnt+=ntohs((*pskb)->nh.iph->tot_len);
536         f->counters[slot].pcnt++;
537         if (f->ipfw.fw_flg & IP_FW_F_PRN) {
538                 dump_packet(pskb,rif,f,label,src_port,dst_port,count,tcpsyn);
539         }
540
541         *tos = (*tos & f->ipfw.fw_tosand) ^ f->ipfw.fw_tosxor;
542
543 /* This functionality is useless in stock 2.0.x series, but we don't
544  * discard the mark thing altogether, to avoid breaking ipchains (and,
545  * more importantly, the ipfwadm wrapper) --PR */
546         if (f->ipfw.fw_flg & IP_FW_F_MARKABS) {
547                 (*pskb)->nfmark = f->ipfw.fw_mark;
548         } else {
549                 (*pskb)->nfmark += f->ipfw.fw_mark;
550         }
551         if (f->ipfw.fw_flg & IP_FW_F_NETLINK) {
552 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
553                 size_t len = min_t(unsigned int, f->ipfw.fw_outputsize, ntohs((*pskb)->nh.iph->tot_len))
554                         + sizeof(__u32) + sizeof((*pskb)->nfmark) + IFNAMSIZ;
555                 struct sk_buff *outskb=alloc_skb(len, GFP_ATOMIC);
556
557                 duprintf("Sending packet out NETLINK (length = %u).\n",
558                          (unsigned int)len);
559                 if (outskb) {
560                         /* Prepend length, mark & interface */
561                         skb_put(outskb, len);
562                         *((__u32 *)outskb->data) = (__u32)len;
563                         *((__u32 *)(outskb->data+sizeof(__u32))) =
564                                 (*pskb)->nfmark;
565                         strcpy(outskb->data+sizeof(__u32)*2, rif);
566                         skb_copy_bits(*pskb,
567                                 ((char *)(*pskb)->nh.iph - (char *)(*pskb)->data),
568                                 outskb->data+sizeof(__u32)*2+IFNAMSIZ,
569                                 len-(sizeof(__u32)*2+IFNAMSIZ));
570                         netlink_broadcast(ipfwsk, outskb, 0, ~0, GFP_ATOMIC);
571                 }
572                 else {
573 #endif
574                         if (net_ratelimit())
575                                 printk(KERN_WARNING "ip_fw: packet drop due to "
576                                        "netlink failure\n");
577                         return 0;
578 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
579                 }
580 #endif
581         }
582         return 1;
583 }
584
585 /*
586  *      Returns one of the generic firewall policies, like FW_ACCEPT.
587  *
588  *      The testing is either false for normal firewall mode or true for
589  *      user checking mode (counters are not updated, TOS & mark not done).
590  */
591 static int
592 ip_fw_check(const char *rif,
593             __u16 *redirport,
594             struct ip_chain *chain,
595             struct sk_buff **pskb,
596             unsigned int slot,
597             int testing)
598 {
599         __u32                   src, dst;
600         __u16                   src_port = 0xFFFF, dst_port = 0xFFFF;
601         char                    tcpsyn=0;
602         __u16                   offset;
603         unsigned char           tos;
604         struct ip_fwkernel      *f;
605         int                     ret = FW_SKIP+2;
606         unsigned int            count;
607
608         /* We handle fragments by dealing with the first fragment as
609          * if it was a normal packet.  All other fragments are treated
610          * normally, except that they will NEVER match rules that ask
611          * things we don't know, ie. tcp syn flag or ports).  If the
612          * rule is also a fragment-specific rule, non-fragments won't
613          * match it. */
614
615         offset = ntohs((*pskb)->nh.iph->frag_off) & IP_OFFSET;
616
617         /*
618          *      Don't allow a fragment of TCP 8 bytes in. Nobody
619          *      normal causes this. Its a cracker trying to break
620          *      in by doing a flag overwrite to pass the direction
621          *      checks.
622          */
623         if (offset == 1 && (*pskb)->nh.iph->protocol == IPPROTO_TCP) {
624                 if (!testing && net_ratelimit()) {
625                         printk("Suspect TCP fragment.\n");
626                         dump_packet(pskb,rif,NULL,NULL,0,0,0,0);
627                 }
628                 return FW_BLOCK;
629         }
630
631         /* If we can't investigate ports, treat as fragment.  It's
632          * either a trucated whole packet, or a truncated first
633          * fragment, or a TCP first fragment of length 8-15, in which
634          * case the above rule stops reassembly.
635          */
636         if (offset == 0) {
637                 unsigned int size_req;
638                 switch ((*pskb)->nh.iph->protocol) {
639                 case IPPROTO_TCP:
640                         /* Don't care about things past flags word */
641                         size_req = 16;
642                         break;
643
644                 case IPPROTO_UDP:
645                 case IPPROTO_ICMP:
646                         size_req = 8;
647                         break;
648
649                 default:
650                         size_req = 0;
651                 }
652
653                 /* If it is a truncated first fragment then it can be
654                  * used to rewrite port information, and thus should
655                  * be blocked.
656                  */
657                 if (ntohs((*pskb)->nh.iph->tot_len) <
658                     ((*pskb)->nh.iph->ihl<<2)+size_req) {
659                         if (!testing && net_ratelimit()) {
660                                 printk("Suspect short first fragment.\n");
661                                 dump_packet(pskb,rif,NULL,NULL,0,0,0,0);
662                         }
663                         return FW_BLOCK;
664                 }
665         }
666
667         src = (*pskb)->nh.iph->saddr;
668         dst = (*pskb)->nh.iph->daddr;
669         tos = (*pskb)->nh.iph->tos;
670
671         /*
672          *      If we got interface from which packet came
673          *      we can use the address directly. Linux 2.1 now uses address
674          *      chains per device too, but unlike BSD we first check if the
675          *      incoming packet matches a device address and the routing
676          *      table before calling the firewall.
677          */
678
679         dprintf("Packet ");
680         switch ((*pskb)->nh.iph->protocol) {
681                 case IPPROTO_TCP:
682                         dprintf("TCP ");
683                         if (!offset) {
684                                 struct tcphdr _tcph, *th;
685
686                                 th = skb_header_pointer(*pskb,
687                                                         (*pskb)->nh.iph->ihl*4,
688                                                         sizeof(_tcph), &_tcph);
689                                 if (th == NULL)
690                                         return FW_BLOCK;
691
692                                 src_port = ntohs(th->source);
693                                 dst_port = ntohs(th->dest);
694
695                                 /* Connection initilisation can only
696                                  * be made when the syn bit is set and
697                                  * neither of the ack or reset is
698                                  * set. */
699                                 if (th->syn && !(th->ack || th->rst))
700                                         tcpsyn = 1;
701                         }
702                         break;
703                 case IPPROTO_UDP:
704                         dprintf("UDP ");
705                         if (!offset) {
706                                 struct udphdr _udph, *uh;
707
708                                 uh = skb_header_pointer(*pskb,
709                                                         (*pskb)->nh.iph->ihl*4,
710                                                         sizeof(_udph), &_udph);
711                                 if (uh == NULL)
712                                         return FW_BLOCK;
713
714                                 src_port = ntohs(uh->source);
715                                 dst_port = ntohs(uh->dest);
716                         }
717                         break;
718                 case IPPROTO_ICMP:
719                         if (!offset) {
720                                 struct icmphdr _icmph, *ic;
721
722                                 ic = skb_header_pointer(*pskb,
723                                                         (*pskb)->nh.iph->ihl*4,
724                                                         sizeof(_icmph),
725                                                         &_icmph);
726                                 if (ic == NULL)
727                                         return FW_BLOCK;
728
729                                 src_port = (__u16) ic->type;
730                                 dst_port = (__u16) ic->code;
731                         }
732                         dprintf("ICMP ");
733                         break;
734                 default:
735                         dprintf("p=%d ", (*pskb)->nh.iph->protocol);
736                         break;
737         }
738 #ifdef DEBUG_IP_FIREWALL
739         print_ip((*pskb)->nh.iph->saddr);
740
741         if (offset)
742                 dprintf(":fragment (%i) ", ((int)offset)<<2);
743         else if ((*pskb)->nh.iph->protocol == IPPROTO_TCP ||
744                  (*pskb)->nh.iph->protocol == IPPROTO_UDP ||
745                  (*pskb)->nh.iph->protocol == IPPROTO_ICMP)
746                 dprintf(":%hu:%hu", src_port, dst_port);
747         dprintf("\n");
748 #endif
749
750         if (!testing) FWC_READ_LOCK(&ip_fw_lock);
751         else FWC_HAVE_LOCK(fwc_rlocks);
752
753         f = chain->chain;
754         do {
755                 count = 0;
756                 for (; f; f = f->next) {
757                         count++;
758                         if (ip_rule_match(f, rif, pskb,
759                                           tcpsyn, src_port, dst_port,
760                                           offset)) {
761                                 if (!testing
762                                     && !ip_fw_domatch(f, rif, chain->label,
763                                                       pskb, slot,
764                                                       src_port, dst_port,
765                                                       count, tcpsyn, &tos)) {
766                                         ret = FW_BLOCK;
767                                         cleanup(chain, 0, slot);
768                                         goto out;
769                                 }
770                                 break;
771                         }
772                 }
773                 if (f) {
774                         if (f->branch) {
775                                 /* Do sanity check to see if we have
776                                  * already set prevchain and if so we
777                                  * must be in a loop */
778                                 if (f->branch->reent[slot].prevchain) {
779                                         if (!testing) {
780                                                 printk(KERN_ERR
781                                                        "IP firewall: "
782                                                        "Loop detected "
783                                                        "at `%s'.\n",
784                                                        f->branch->label);
785                                                 cleanup(chain, 1, slot);
786                                                 ret = FW_BLOCK;
787                                         } else {
788                                                 cleanup(chain, 0, slot);
789                                                 ret = FW_SKIP+1;
790                                         }
791                                 }
792                                 else {
793                                         f->branch->reent[slot].prevchain
794                                                 = chain;
795                                         f->branch->reent[slot].prevrule
796                                                 = f->next;
797                                         chain = f->branch;
798                                         f = chain->chain;
799                                 }
800                         }
801                         else if (f->simplebranch == FW_SKIP)
802                                 f = f->next;
803                         else if (f->simplebranch == FW_SKIP+1) {
804                                 /* Just like falling off the chain */
805                                 goto fall_off_chain;
806                         } else {
807                                 cleanup(chain, 0, slot);
808                                 ret = f->simplebranch;
809                         }
810                 } /* f == NULL */
811                 else {
812                 fall_off_chain:
813                         if (chain->reent[slot].prevchain) {
814                                 struct ip_chain *tmp = chain;
815                                 f = chain->reent[slot].prevrule;
816                                 chain = chain->reent[slot].prevchain;
817                                 tmp->reent[slot].prevchain = NULL;
818                         }
819                         else {
820                                 ret = chain->policy;
821                                 if (!testing) {
822                                         chain->reent[slot].counters.pcnt++;
823                                         chain->reent[slot].counters.bcnt
824                                                 += ntohs((*pskb)->nh.iph->tot_len);
825                                 }
826                         }
827                 }
828         } while (ret == FW_SKIP+2);
829
830  out:
831         if (!testing) FWC_READ_UNLOCK(&ip_fw_lock);
832
833         /* Recalculate checksum if not going to reject, and TOS changed. */
834         if ((*pskb)->nh.iph->tos != tos
835             && ret != FW_REJECT && ret != FW_BLOCK
836             && !testing) {
837                 if (!skb_ip_make_writable(pskb, offsetof(struct iphdr, tos)+1))
838                         ret = FW_BLOCK;
839                 else {
840                         (*pskb)->nh.iph->tos = tos;
841                         ip_send_check((*pskb)->nh.iph);
842                 }
843         }
844
845         if (ret == FW_REDIRECT && redirport) {
846                 if ((*redirport = htons(f->ipfw.fw_redirpt)) == 0) {
847                         /* Wildcard redirection.
848                          * Note that redirport will become
849                          * 0xFFFF for non-TCP/UDP packets.
850                          */
851                         *redirport = htons(dst_port);
852                 }
853         }
854
855 #ifdef DEBUG_ALLOW_ALL
856         return (testing ? ret : FW_ACCEPT);
857 #else
858         return ret;
859 #endif
860 }
861
862 /* Must have write lock & interrupts off for any of these */
863
864 /* This function sets all the byte counters in a chain to zero.  The
865  * input is a pointer to the chain required for zeroing */
866 static int zero_fw_chain(struct ip_chain *chainptr)
867 {
868         struct ip_fwkernel *i;
869
870         FWC_HAVE_LOCK(fwc_wlocks);
871         for (i = chainptr->chain; i; i = i->next)
872                 memset(i->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
873         return 0;
874 }
875
876 static int clear_fw_chain(struct ip_chain *chainptr)
877 {
878         struct ip_fwkernel *i= chainptr->chain;
879
880         FWC_HAVE_LOCK(fwc_wlocks);
881         chainptr->chain=NULL;
882
883         while (i) {
884                 struct ip_fwkernel *tmp = i->next;
885                 if (i->branch)
886                         i->branch->refcount--;
887                 kfree(i);
888                 i = tmp;
889                 /* We will block in cleanup's unregister sockopt if unloaded,
890                    so this is safe. */
891                 module_put(THIS_MODULE);
892         }
893         return 0;
894 }
895
896 static int replace_in_chain(struct ip_chain *chainptr,
897                             struct ip_fwkernel *frwl,
898                             __u32 position)
899 {
900         struct ip_fwkernel *f = chainptr->chain;
901
902         FWC_HAVE_LOCK(fwc_wlocks);
903
904         while (--position && f != NULL) f = f->next;
905         if (f == NULL)
906                 return EINVAL;
907
908         if (f->branch) f->branch->refcount--;
909         if (frwl->branch) frwl->branch->refcount++;
910
911         frwl->next = f->next;
912         memcpy(f,frwl,sizeof(struct ip_fwkernel));
913         kfree(frwl);
914         return 0;
915 }
916
917 static int append_to_chain(struct ip_chain *chainptr, struct ip_fwkernel *rule)
918 {
919         struct ip_fwkernel *i;
920
921         FWC_HAVE_LOCK(fwc_wlocks);
922
923         /* Are we unloading now?  We will block on nf_unregister_sockopt */
924         if (!try_module_get(THIS_MODULE))
925                 return ENOPROTOOPT;
926
927         /* Special case if no rules already present */
928         if (chainptr->chain == NULL) {
929
930                 /* If pointer writes are atomic then turning off
931                  * interrupts is not necessary. */
932                 chainptr->chain = rule;
933                 if (rule->branch) rule->branch->refcount++;
934                 goto append_successful;
935         }
936
937         /* Find the rule before the end of the chain */
938         for (i = chainptr->chain; i->next; i = i->next);
939         i->next = rule;
940         if (rule->branch) rule->branch->refcount++;
941
942 append_successful:
943         return 0;
944 }
945
946 /* This function inserts a rule at the position of position in the
947  * chain refenced by chainptr.  If position is 1 then this rule will
948  * become the new rule one. */
949 static int insert_in_chain(struct ip_chain *chainptr,
950                            struct ip_fwkernel *frwl,
951                            __u32 position)
952 {
953         struct ip_fwkernel *f = chainptr->chain;
954
955         FWC_HAVE_LOCK(fwc_wlocks);
956
957         /* Are we unloading now?  We will block on nf_unregister_sockopt */
958         if (!try_module_get(THIS_MODULE))
959                 return ENOPROTOOPT;
960
961         /* special case if the position is number 1 */
962         if (position == 1) {
963                 frwl->next = chainptr->chain;
964                 if (frwl->branch) frwl->branch->refcount++;
965                 chainptr->chain = frwl;
966                 goto insert_successful;
967         }
968         position--;
969         while (--position && f != NULL) f = f->next;
970         if (f == NULL)
971                 return EINVAL;
972         if (frwl->branch) frwl->branch->refcount++;
973         frwl->next = f->next;
974
975         f->next = frwl;
976
977 insert_successful:
978         return 0;
979 }
980
981 /* This function deletes the a rule from a given rulenum and chain.
982  * With rulenum = 1 is the first rule is deleted. */
983
984 static int del_num_from_chain(struct ip_chain *chainptr, __u32 rulenum)
985 {
986         struct ip_fwkernel *i=chainptr->chain,*tmp;
987
988         FWC_HAVE_LOCK(fwc_wlocks);
989
990         if (!chainptr->chain)
991                 return ENOENT;
992
993         /* Need a special case for the first rule */
994         if (rulenum == 1) {
995                 /* store temp to allow for freeing up of memory */
996                 tmp = chainptr->chain;
997                 if (chainptr->chain->branch) chainptr->chain->branch->refcount--;
998                 chainptr->chain = chainptr->chain->next;
999                 kfree(tmp); /* free memory that is now unused */
1000         } else {
1001                 rulenum--;
1002                 while (--rulenum && i->next ) i = i->next;
1003                 if (!i->next)
1004                         return ENOENT;
1005                 tmp = i->next;
1006                 if (i->next->branch)
1007                         i->next->branch->refcount--;
1008                 i->next = i->next->next;
1009                 kfree(tmp);
1010         }
1011
1012         /* We will block in cleanup's unregister sockopt if unloaded,
1013            so this is safe. */
1014         module_put(THIS_MODULE);
1015         return 0;
1016 }
1017
1018
1019 /* This function deletes the a rule from a given rule and chain.
1020  * The rule that is deleted is the first occursance of that rule. */
1021 static int del_rule_from_chain(struct ip_chain *chainptr,
1022                                struct ip_fwkernel *frwl)
1023 {
1024         struct ip_fwkernel *ltmp,*ftmp = chainptr->chain ;
1025         int was_found;
1026
1027         FWC_HAVE_LOCK(fwc_wlocks);
1028
1029         /* Sure, we should compare marks, but since the `ipfwadm'
1030          * script uses it for an unholy hack... well, life is easier
1031          * this way.  We also mask it out of the flags word. --PR */
1032         for (ltmp=NULL, was_found=0;
1033              !was_found && ftmp != NULL;
1034              ltmp = ftmp,ftmp = ftmp->next) {
1035                 if (ftmp->ipfw.fw_src.s_addr!=frwl->ipfw.fw_src.s_addr
1036                     || ftmp->ipfw.fw_dst.s_addr!=frwl->ipfw.fw_dst.s_addr
1037                     || ftmp->ipfw.fw_smsk.s_addr!=frwl->ipfw.fw_smsk.s_addr
1038                     || ftmp->ipfw.fw_dmsk.s_addr!=frwl->ipfw.fw_dmsk.s_addr
1039 #if 0
1040                     || ftmp->ipfw.fw_flg!=frwl->ipfw.fw_flg
1041 #else
1042                     || ((ftmp->ipfw.fw_flg & ~IP_FW_F_MARKABS)
1043                         != (frwl->ipfw.fw_flg & ~IP_FW_F_MARKABS))
1044 #endif
1045                     || ftmp->ipfw.fw_invflg!=frwl->ipfw.fw_invflg
1046                     || ftmp->ipfw.fw_proto!=frwl->ipfw.fw_proto
1047 #if 0
1048                     || ftmp->ipfw.fw_mark!=frwl->ipfw.fw_mark
1049 #endif
1050                     || ftmp->ipfw.fw_redirpt!=frwl->ipfw.fw_redirpt
1051                     || ftmp->ipfw.fw_spts[0]!=frwl->ipfw.fw_spts[0]
1052                     || ftmp->ipfw.fw_spts[1]!=frwl->ipfw.fw_spts[1]
1053                     || ftmp->ipfw.fw_dpts[0]!=frwl->ipfw.fw_dpts[0]
1054                     || ftmp->ipfw.fw_dpts[1]!=frwl->ipfw.fw_dpts[1]
1055                     || ftmp->ipfw.fw_outputsize!=frwl->ipfw.fw_outputsize) {
1056                         duprintf("del_rule_from_chain: mismatch:"
1057                                  "src:%u/%u dst:%u/%u smsk:%u/%u dmsk:%u/%u "
1058                                  "flg:%hX/%hX invflg:%hX/%hX proto:%u/%u "
1059                                  "mark:%u/%u "
1060                                  "ports:%hu-%hu/%hu-%hu %hu-%hu/%hu-%hu "
1061                                  "outputsize:%hu-%hu\n",
1062                                  ftmp->ipfw.fw_src.s_addr,
1063                                  frwl->ipfw.fw_src.s_addr,
1064                                  ftmp->ipfw.fw_dst.s_addr,
1065                                  frwl->ipfw.fw_dst.s_addr,
1066                                  ftmp->ipfw.fw_smsk.s_addr,
1067                                  frwl->ipfw.fw_smsk.s_addr,
1068                                  ftmp->ipfw.fw_dmsk.s_addr,
1069                                  frwl->ipfw.fw_dmsk.s_addr,
1070                                  ftmp->ipfw.fw_flg,
1071                                  frwl->ipfw.fw_flg,
1072                                  ftmp->ipfw.fw_invflg,
1073                                  frwl->ipfw.fw_invflg,
1074                                  ftmp->ipfw.fw_proto,
1075                                  frwl->ipfw.fw_proto,
1076                                  ftmp->ipfw.fw_mark,
1077                                  frwl->ipfw.fw_mark,
1078                                  ftmp->ipfw.fw_spts[0],
1079                                  frwl->ipfw.fw_spts[0],
1080                                  ftmp->ipfw.fw_spts[1],
1081                                  frwl->ipfw.fw_spts[1],
1082                                  ftmp->ipfw.fw_dpts[0],
1083                                  frwl->ipfw.fw_dpts[0],
1084                                  ftmp->ipfw.fw_dpts[1],
1085                                  frwl->ipfw.fw_dpts[1],
1086                                  ftmp->ipfw.fw_outputsize,
1087                                  frwl->ipfw.fw_outputsize);
1088                         continue;
1089                 }
1090
1091                 if (strncmp(ftmp->ipfw.fw_vianame,
1092                             frwl->ipfw.fw_vianame,
1093                             IFNAMSIZ)) {
1094                         duprintf("del_rule_from_chain: if mismatch: %s/%s\n",
1095                                  ftmp->ipfw.fw_vianame,
1096                                  frwl->ipfw.fw_vianame);
1097                         continue;
1098                 }
1099                 if (ftmp->branch != frwl->branch) {
1100                         duprintf("del_rule_from_chain: branch mismatch: "
1101                                  "%s/%s\n",
1102                                  ftmp->branch?ftmp->branch->label:"(null)",
1103                                  frwl->branch?frwl->branch->label:"(null)");
1104                         continue;
1105                 }
1106                 if (ftmp->branch == NULL
1107                     && ftmp->simplebranch != frwl->simplebranch) {
1108                         duprintf("del_rule_from_chain: simplebranch mismatch: "
1109                                  "%i/%i\n",
1110                                  ftmp->simplebranch, frwl->simplebranch);
1111                         continue;
1112                 }
1113                 was_found = 1;
1114                 if (ftmp->branch)
1115                         ftmp->branch->refcount--;
1116                 if (ltmp)
1117                         ltmp->next = ftmp->next;
1118                 else
1119                         chainptr->chain = ftmp->next;
1120                 kfree(ftmp);
1121                 /* We will block in cleanup's unregister sockopt if unloaded,
1122                    so this is safe. */
1123                 module_put(THIS_MODULE);
1124                 break;
1125         }
1126
1127         if (was_found)
1128                 return 0;
1129         else {
1130                 duprintf("del_rule_from_chain: no matching rule found\n");
1131                 return EINVAL;
1132         }
1133 }
1134
1135 /* This function takes the label of a chain and deletes the first
1136  * chain with that name.  No special cases required for the built in
1137  * chains as they have their refcount initilised to 1 so that they are
1138  * never deleted.  */
1139 static int del_chain(ip_chainlabel label)
1140 {
1141         struct ip_chain *tmp,*tmp2;
1142
1143         FWC_HAVE_LOCK(fwc_wlocks);
1144         /* Corner case: return EBUSY not ENOENT for first elem ("input") */
1145         if (strcmp(label, ip_fw_chains->label) == 0)
1146                 return EBUSY;
1147
1148         for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1149                 if(strcmp(tmp->next->label,label) == 0)
1150                         break;
1151
1152         tmp2 = tmp->next;
1153         if (!tmp2)
1154                 return ENOENT;
1155
1156         if (tmp2->refcount)
1157                 return EBUSY;
1158
1159         if (tmp2->chain)
1160                 return ENOTEMPTY;
1161
1162         tmp->next = tmp2->next;
1163         kfree(tmp2);
1164
1165         /* We will block in cleanup's unregister sockopt if unloaded,
1166            so this is safe. */
1167         module_put(THIS_MODULE);
1168         return 0;
1169 }
1170
1171 /* This is a function to initilise a chain.  Built in rules start with
1172  * refcount = 1 so that they cannot be deleted.  User defined rules
1173  * start with refcount = 0 so they can be deleted. */
1174 static struct ip_chain *ip_init_chain(ip_chainlabel name,
1175                                       __u32 ref,
1176                                       int policy)
1177 {
1178         unsigned int i;
1179         struct ip_chain *label
1180                 = kmalloc(SIZEOF_STRUCT_IP_CHAIN, GFP_KERNEL);
1181         if (label == NULL)
1182                 panic("Can't kmalloc for firewall chains.\n");
1183         strcpy(label->label,name);
1184         label->next = NULL;
1185         label->chain = NULL;
1186         label->refcount = ref;
1187         label->policy = policy;
1188         for (i = 0; i < NUM_SLOTS; i++) {
1189                 label->reent[i].counters.pcnt = label->reent[i].counters.bcnt
1190                         = 0;
1191                 label->reent[i].prevchain = NULL;
1192                 label->reent[i].prevrule = NULL;
1193         }
1194
1195         return label;
1196 }
1197
1198 /* This is a function for reating a new chain.  The chains is not
1199  * created if a chain of the same name already exists */
1200 static int create_chain(ip_chainlabel label)
1201 {
1202         struct ip_chain *tmp;
1203
1204         if (!check_label(label))
1205                 return EINVAL;
1206
1207         FWC_HAVE_LOCK(fwc_wlocks);
1208         for (tmp = ip_fw_chains; tmp->next; tmp = tmp->next)
1209                 if (strcmp(tmp->label,label) == 0)
1210                         return EEXIST;
1211
1212         if (strcmp(tmp->label,label) == 0)
1213                 return EEXIST;
1214
1215         /* Are we unloading now?  We will block on nf_unregister_sockopt */
1216         if (!try_module_get(THIS_MODULE))
1217                 return ENOPROTOOPT;
1218
1219         tmp->next = ip_init_chain(label, 0, FW_SKIP); /* refcount is
1220                                               * zero since this is a
1221                                               * user defined chain *
1222                                               * and therefore can be
1223                                               * deleted */
1224         return 0;
1225 }
1226
1227 /* This function simply changes the policy on one of the built in
1228  * chains.  checking must be done before this is call to ensure that
1229  * chainptr is pointing to one of the three possible chains */
1230 static int change_policy(struct ip_chain *chainptr, int policy)
1231 {
1232         FWC_HAVE_LOCK(fwc_wlocks);
1233         chainptr->policy = policy;
1234         return 0;
1235 }
1236
1237 /* This function takes an ip_fwuser and converts it to a ip_fwkernel.  It also
1238  * performs some checks in the structure. */
1239 static struct ip_fwkernel *convert_ipfw(struct ip_fwuser *fwuser, int *errno)
1240 {
1241         struct ip_fwkernel *fwkern;
1242
1243         if ( (fwuser->ipfw.fw_flg & ~IP_FW_F_MASK) != 0 ) {
1244                 duprintf("convert_ipfw: undefined flag bits set (flags=%x)\n",
1245                          fwuser->ipfw.fw_flg);
1246                 *errno = EINVAL;
1247                 return NULL;
1248         }
1249
1250 #ifdef DEBUG_IP_FIREWALL_USER
1251         /* These are sanity checks that don't really matter.
1252          * We can get rid of these once testing is complete.
1253          */
1254         if ((fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1255             && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1256                 || fwuser->ipfw.fw_proto != IPPROTO_TCP)) {
1257                 duprintf("convert_ipfw: TCP SYN flag set but proto != TCP!\n");
1258                 *errno = EINVAL;
1259                 return NULL;
1260         }
1261
1262         if (strcmp(fwuser->label, IP_FW_LABEL_REDIRECT) != 0
1263             && fwuser->ipfw.fw_redirpt != 0) {
1264                 duprintf("convert_ipfw: Target not REDIR but redirpt != 0!\n");
1265                 *errno = EINVAL;
1266                 return NULL;
1267         }
1268
1269         if ((!(fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1270              && (fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG))
1271             || (!(fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN)
1272                 && (fwuser->ipfw.fw_invflg & IP_FW_INV_SYN))) {
1273                 duprintf("convert_ipfw: Can't have INV flag if flag unset!\n");
1274                 *errno = EINVAL;
1275                 return NULL;
1276         }
1277
1278         if (((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCPT)
1279              && fwuser->ipfw.fw_spts[0] == 0
1280              && fwuser->ipfw.fw_spts[1] == 0xFFFF)
1281             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTPT)
1282                 && fwuser->ipfw.fw_dpts[0] == 0
1283                 && fwuser->ipfw.fw_dpts[1] == 0xFFFF)
1284             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_VIA)
1285                 && (fwuser->ipfw.fw_vianame)[0] == '\0')
1286             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_SRCIP)
1287                 && fwuser->ipfw.fw_smsk.s_addr == 0)
1288             || ((fwuser->ipfw.fw_invflg & IP_FW_INV_DSTIP)
1289                 && fwuser->ipfw.fw_dmsk.s_addr == 0)) {
1290                 duprintf("convert_ipfw: INV flag makes rule unmatchable!\n");
1291                 *errno = EINVAL;
1292                 return NULL;
1293         }
1294
1295         if ((fwuser->ipfw.fw_flg & IP_FW_F_FRAG)
1296             && !(fwuser->ipfw.fw_invflg & IP_FW_INV_FRAG)
1297             && (fwuser->ipfw.fw_spts[0] != 0
1298                 || fwuser->ipfw.fw_spts[1] != 0xFFFF
1299                 || fwuser->ipfw.fw_dpts[0] != 0
1300                 || fwuser->ipfw.fw_dpts[1] != 0xFFFF
1301                 || (fwuser->ipfw.fw_flg & IP_FW_F_TCPSYN))) {
1302                 duprintf("convert_ipfw: Can't test ports or SYN with frag!\n");
1303                 *errno = EINVAL;
1304                 return NULL;
1305         }
1306 #endif
1307
1308         if ((fwuser->ipfw.fw_spts[0] != 0
1309              || fwuser->ipfw.fw_spts[1] != 0xFFFF
1310              || fwuser->ipfw.fw_dpts[0] != 0
1311              || fwuser->ipfw.fw_dpts[1] != 0xFFFF)
1312             && ((fwuser->ipfw.fw_invflg & IP_FW_INV_PROTO)
1313                 || (fwuser->ipfw.fw_proto != IPPROTO_TCP
1314                     && fwuser->ipfw.fw_proto != IPPROTO_UDP
1315                     && fwuser->ipfw.fw_proto != IPPROTO_ICMP))) {
1316                 duprintf("convert_ipfw: Can only test ports for TCP/UDP/ICMP!\n");
1317                 *errno = EINVAL;
1318                 return NULL;
1319         }
1320
1321         fwkern = kmalloc(SIZEOF_STRUCT_IP_FW_KERNEL, GFP_ATOMIC);
1322         if (!fwkern) {
1323                 duprintf("convert_ipfw: kmalloc failed!\n");
1324                 *errno = ENOMEM;
1325                 return NULL;
1326         }
1327         memcpy(&fwkern->ipfw,&fwuser->ipfw,sizeof(struct ip_fw));
1328
1329         if (!find_special(fwuser->label, &fwkern->simplebranch)) {
1330                 fwkern->branch = find_label(fwuser->label);
1331                 if (!fwkern->branch) {
1332                         duprintf("convert_ipfw: chain doesn't exist `%s'.\n",
1333                                  fwuser->label);
1334                         kfree(fwkern);
1335                         *errno = ENOENT;
1336                         return NULL;
1337                 } else if (fwkern->branch == IP_FW_INPUT_CHAIN
1338                            || fwkern->branch == IP_FW_FORWARD_CHAIN
1339                            || fwkern->branch == IP_FW_OUTPUT_CHAIN) {
1340                         duprintf("convert_ipfw: Can't branch to builtin chain `%s'.\n",
1341                                  fwuser->label);
1342                         kfree(fwkern);
1343                         *errno = ENOENT;
1344                         return NULL;
1345                 }
1346         } else
1347                 fwkern->branch = NULL;
1348         memset(fwkern->counters, 0, sizeof(struct ip_counters)*NUM_SLOTS);
1349
1350         /* Handle empty vianame by making it a wildcard */
1351         if ((fwkern->ipfw.fw_vianame)[0] == '\0')
1352             fwkern->ipfw.fw_flg |= IP_FW_F_WILDIF;
1353
1354         fwkern->next = NULL;
1355         return fwkern;
1356 }
1357
1358 int ip_fw_ctl(int cmd, void *m, int len)
1359 {
1360         int ret;
1361         struct ip_chain *chain;
1362         unsigned long flags;
1363
1364         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1365
1366         switch (cmd) {
1367         case IP_FW_FLUSH:
1368                 if (len != sizeof(ip_chainlabel) || !check_label(m))
1369                         ret = EINVAL;
1370                 else if ((chain = find_label(m)) == NULL)
1371                         ret = ENOENT;
1372                 else ret = clear_fw_chain(chain);
1373                 break;
1374
1375         case IP_FW_ZERO:
1376                 if (len != sizeof(ip_chainlabel) || !check_label(m))
1377                         ret = EINVAL;
1378                 else if ((chain = find_label(m)) == NULL)
1379                         ret = ENOENT;
1380                 else ret = zero_fw_chain(chain);
1381                 break;
1382
1383         case IP_FW_CHECK: {
1384                 struct ip_fwtest *new = m;
1385                 struct iphdr *ip;
1386
1387                 /* Don't need write lock. */
1388                 FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1389
1390                 if (len != sizeof(struct ip_fwtest) || !check_label(m))
1391                         return EINVAL;
1392
1393                 /* Need readlock to do find_label */
1394                 FWC_READ_LOCK(&ip_fw_lock);
1395
1396                 if ((chain = find_label(new->fwt_label)) == NULL)
1397                         ret = ENOENT;
1398                 else {
1399                         struct sk_buff *tmp_skb;
1400                         int hdrlen;
1401
1402                         hdrlen = sizeof(struct ip_fwpkt) -
1403                                 sizeof(struct in_addr) -
1404                                 IFNAMSIZ;
1405
1406                         ip = &(new->fwt_packet.fwp_iph);
1407
1408                         /* Fix this one up by hand, who knows how many
1409                          * tools will break if we start to barf on this.
1410                          */
1411                         if (ntohs(ip->tot_len) > hdrlen)
1412                                 ip->tot_len = htons(hdrlen);
1413
1414                         if (ip->ihl != sizeof(struct iphdr) / sizeof(u32)) {
1415                                 duprintf("ip_fw_ctl: ip->ihl=%d, want %d\n",
1416                                          ip->ihl,
1417                                          sizeof(struct iphdr) / sizeof(u32));
1418                                 ret = EINVAL;
1419                         } else if ((tmp_skb = alloc_skb(hdrlen,
1420                                                         GFP_ATOMIC)) == NULL) {
1421                                 duprintf("ip_fw_ctl: tmp_skb alloc failure\n");
1422                                 ret = EFAULT;
1423                         } else {
1424                                 skb_reserve(tmp_skb, hdrlen);
1425                                 skb_push(tmp_skb, hdrlen);
1426                                 memcpy(tmp_skb->data, ip, hdrlen);
1427                                 tmp_skb->nh.raw =
1428                                         (unsigned char *) tmp_skb->data;
1429                                 ret = ip_fw_check(new->fwt_packet.fwp_vianame,
1430                                                   NULL, chain,
1431                                                   &tmp_skb, SLOT_NUMBER(), 1);
1432                                 kfree_skb(tmp_skb);
1433                                 switch (ret) {
1434                                 case FW_ACCEPT:
1435                                         ret = 0; break;
1436                                 case FW_REDIRECT:
1437                                         ret = ECONNABORTED; break;
1438                                 case FW_MASQUERADE:
1439                                         ret = ECONNRESET; break;
1440                                 case FW_REJECT:
1441                                         ret = ECONNREFUSED; break;
1442                                         /* Hack to help diag; these only get
1443                                            returned when testing. */
1444                                 case FW_SKIP+1:
1445                                         ret = ELOOP; break;
1446                                 case FW_SKIP:
1447                                         ret = ENFILE; break;
1448                                 default: /* FW_BLOCK */
1449                                         ret = ETIMEDOUT; break;
1450                                 }
1451                         }
1452                 }
1453                 FWC_READ_UNLOCK(&ip_fw_lock);
1454                 return ret;
1455         }
1456
1457         case IP_FW_MASQ_TIMEOUTS: {
1458                 ret = ip_fw_masq_timeouts(m, len);
1459         }
1460         break;
1461
1462         case IP_FW_REPLACE: {
1463                 struct ip_fwkernel *ip_fwkern;
1464                 struct ip_fwnew *new = m;
1465
1466                 if (len != sizeof(struct ip_fwnew)
1467                     || !check_label(new->fwn_label))
1468                         ret = EINVAL;
1469                 else if ((chain = find_label(new->fwn_label)) == NULL)
1470                         ret = ENOENT;
1471                 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1472                          != NULL)
1473                         ret = replace_in_chain(chain, ip_fwkern,
1474                                                new->fwn_rulenum);
1475         }
1476         break;
1477
1478         case IP_FW_APPEND: {
1479                 struct ip_fwchange *new = m;
1480                 struct ip_fwkernel *ip_fwkern;
1481
1482                 if (len != sizeof(struct ip_fwchange)
1483                     || !check_label(new->fwc_label))
1484                         ret = EINVAL;
1485                 else if ((chain = find_label(new->fwc_label)) == NULL)
1486                         ret = ENOENT;
1487                 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1488                          != NULL)
1489                         ret = append_to_chain(chain, ip_fwkern);
1490         }
1491         break;
1492
1493         case IP_FW_INSERT: {
1494                 struct ip_fwkernel *ip_fwkern;
1495                 struct ip_fwnew *new = m;
1496
1497                 if (len != sizeof(struct ip_fwnew)
1498                     || !check_label(new->fwn_label))
1499                         ret = EINVAL;
1500                 else if ((chain = find_label(new->fwn_label)) == NULL)
1501                         ret = ENOENT;
1502                 else if ((ip_fwkern = convert_ipfw(&new->fwn_rule, &ret))
1503                          != NULL)
1504                         ret = insert_in_chain(chain, ip_fwkern,
1505                                               new->fwn_rulenum);
1506         }
1507         break;
1508
1509         case IP_FW_DELETE: {
1510                 struct ip_fwchange *new = m;
1511                 struct ip_fwkernel *ip_fwkern;
1512
1513                 if (len != sizeof(struct ip_fwchange)
1514                     || !check_label(new->fwc_label))
1515                         ret = EINVAL;
1516                 else if ((chain = find_label(new->fwc_label)) == NULL)
1517                         ret = ENOENT;
1518                 else if ((ip_fwkern = convert_ipfw(&new->fwc_rule, &ret))
1519                          != NULL) {
1520                         ret = del_rule_from_chain(chain, ip_fwkern);
1521                         kfree(ip_fwkern);
1522                 }
1523         }
1524         break;
1525
1526         case IP_FW_DELETE_NUM: {
1527                 struct ip_fwdelnum *new = m;
1528
1529                 if (len != sizeof(struct ip_fwdelnum)
1530                     || !check_label(new->fwd_label))
1531                         ret = EINVAL;
1532                 else if ((chain = find_label(new->fwd_label)) == NULL)
1533                         ret = ENOENT;
1534                 else ret = del_num_from_chain(chain, new->fwd_rulenum);
1535         }
1536         break;
1537
1538         case IP_FW_CREATECHAIN: {
1539                 if (len != sizeof(ip_chainlabel)) {
1540                         duprintf("create_chain: bad size %i\n", len);
1541                         ret = EINVAL;
1542                 }
1543                 else ret = create_chain(m);
1544         }
1545         break;
1546
1547         case IP_FW_DELETECHAIN: {
1548                 if (len != sizeof(ip_chainlabel)) {
1549                         duprintf("delete_chain: bad size %i\n", len);
1550                         ret = EINVAL;
1551                 }
1552                 else ret = del_chain(m);
1553         }
1554         break;
1555
1556         case IP_FW_POLICY: {
1557                 struct ip_fwpolicy *new = m;
1558
1559                 if (len != sizeof(struct ip_fwpolicy)
1560                     || !check_label(new->fwp_label))
1561                         ret = EINVAL;
1562                 else if ((chain = find_label(new->fwp_label)) == NULL)
1563                         ret = ENOENT;
1564                 else if (chain != IP_FW_INPUT_CHAIN
1565                          && chain != IP_FW_FORWARD_CHAIN
1566                          && chain != IP_FW_OUTPUT_CHAIN) {
1567                         duprintf("change_policy: can't change policy on user"
1568                                  " defined chain.\n");
1569                         ret = EINVAL;
1570                 }
1571                 else {
1572                         int pol = FW_SKIP;
1573                         find_special(new->fwp_policy, &pol);
1574
1575                         switch(pol) {
1576                         case FW_MASQUERADE:
1577                                 if (chain != IP_FW_FORWARD_CHAIN) {
1578                                         ret = EINVAL;
1579                                         break;
1580                                 }
1581                                 /* Fall thru... */
1582                         case FW_BLOCK:
1583                         case FW_ACCEPT:
1584                         case FW_REJECT:
1585                                 ret = change_policy(chain, pol);
1586                                 break;
1587                         default:
1588                                 duprintf("change_policy: bad policy `%s'\n",
1589                                          new->fwp_policy);
1590                                 ret = EINVAL;
1591                         }
1592                 }
1593                 break;
1594         }
1595         default:
1596                 duprintf("ip_fw_ctl:  unknown request %d\n",cmd);
1597                 ret = ENOPROTOOPT;
1598         }
1599
1600         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1601         return ret;
1602 }
1603
1604 /* Returns bytes used - doesn't NUL terminate */
1605 static int dump_rule(char *buffer,
1606                      const char *chainlabel,
1607                      const struct ip_fwkernel *rule)
1608 {
1609         int len;
1610         unsigned int i;
1611         __u64 packets = 0, bytes = 0;
1612
1613         FWC_HAVE_LOCK(fwc_wlocks);
1614         for (i = 0; i < NUM_SLOTS; i++) {
1615                 packets += rule->counters[i].pcnt;
1616                 bytes += rule->counters[i].bcnt;
1617         }
1618
1619         len=sprintf(buffer,
1620                     "%9s "                      /* Chain name */
1621                     "%08X/%08X->%08X/%08X "     /* Source & Destination IPs */
1622                     "%.16s "                    /* Interface */
1623                     "%X %X "                    /* fw_flg and fw_invflg fields */
1624                     "%u "                       /* Protocol */
1625                     "%-9u %-9u %-9u %-9u "      /* Packet & byte counters */
1626                     "%u-%u %u-%u "              /* Source & Dest port ranges */
1627                     "A%02X X%02X "              /* TOS and and xor masks */
1628                     "%08X "                     /* Redirection port */
1629                     "%u "                       /* fw_mark field */
1630                     "%u "                       /* output size */
1631                     "%9s\n",                    /* Target */
1632                     chainlabel,
1633                     ntohl(rule->ipfw.fw_src.s_addr),
1634                     ntohl(rule->ipfw.fw_smsk.s_addr),
1635                     ntohl(rule->ipfw.fw_dst.s_addr),
1636                     ntohl(rule->ipfw.fw_dmsk.s_addr),
1637                     (rule->ipfw.fw_vianame)[0] ? rule->ipfw.fw_vianame : "-",
1638                     rule->ipfw.fw_flg,
1639                     rule->ipfw.fw_invflg,
1640                     rule->ipfw.fw_proto,
1641                     (__u32)(packets >> 32), (__u32)packets,
1642                     (__u32)(bytes >> 32), (__u32)bytes,
1643                     rule->ipfw.fw_spts[0], rule->ipfw.fw_spts[1],
1644                     rule->ipfw.fw_dpts[0], rule->ipfw.fw_dpts[1],
1645                     rule->ipfw.fw_tosand, rule->ipfw.fw_tosxor,
1646                     rule->ipfw.fw_redirpt,
1647                     rule->ipfw.fw_mark,
1648                     rule->ipfw.fw_outputsize,
1649                     branchname(rule->branch,rule->simplebranch));
1650
1651         duprintf("dump_rule: %i bytes done.\n", len);
1652         return len;
1653 }
1654
1655 /* File offset is actually in records, not bytes. */
1656 static int ip_chain_procinfo(char *buffer, char **start,
1657                              off_t offset, int length)
1658 {
1659         struct ip_chain *i;
1660         struct ip_fwkernel *j = ip_fw_chains->chain;
1661         unsigned long flags;
1662         int len = 0;
1663         int last_len = 0;
1664         off_t upto = 0;
1665
1666         duprintf("Offset starts at %lu\n", offset);
1667         duprintf("ip_fw_chains is 0x%0lX\n", (unsigned long int)ip_fw_chains);
1668
1669         /* Need a write lock to lock out ``readers'' which update counters. */
1670         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1671
1672         for (i = ip_fw_chains; i; i = i->next) {
1673             for (j = i->chain; j; j = j->next) {
1674                 if (upto == offset) break;
1675                 duprintf("Skipping rule in chain `%s'\n",
1676                          i->label);
1677                 upto++;
1678             }
1679             if (upto == offset) break;
1680         }
1681
1682         /* Don't init j first time, or once i = NULL */
1683         for (; i; (void)((i = i->next) && (j = i->chain))) {
1684                 duprintf("Dumping chain `%s'\n", i->label);
1685                 for (; j; j = j->next, upto++, last_len = len)
1686                 {
1687                         len += dump_rule(buffer+len, i->label, j);
1688                         if (len > length) {
1689                                 duprintf("Dumped to %i (past %i).  "
1690                                          "Moving back to %i.\n",
1691                                          len, length, last_len);
1692                                 len = last_len;
1693                                 goto outside;
1694                         }
1695                 }
1696         }
1697 outside:
1698         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1699         buffer[len] = '\0';
1700
1701         duprintf("ip_chain_procinfo: Length = %i (of %i).  Offset = %li.\n",
1702                  len, length, upto);
1703         /* `start' hack - see fs/proc/generic.c line ~165 */
1704         *start=(char *)((unsigned int)upto-offset);
1705         return len;
1706 }
1707
1708 static int ip_chain_name_procinfo(char *buffer, char **start,
1709                                   off_t offset, int length)
1710 {
1711         struct ip_chain *i;
1712         int len = 0,last_len = 0;
1713         off_t pos = 0,begin = 0;
1714         unsigned long flags;
1715
1716         /* Need a write lock to lock out ``readers'' which update counters. */
1717         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1718
1719         for (i = ip_fw_chains; i; i = i->next)
1720         {
1721                 unsigned int j;
1722                 __u32 packetsHi = 0, packetsLo = 0, bytesHi = 0, bytesLo = 0;
1723
1724                 for (j = 0; j < NUM_SLOTS; j++) {
1725                         packetsLo += i->reent[j].counters.pcnt & 0xFFFFFFFF;
1726                         packetsHi += ((i->reent[j].counters.pcnt >> 32)
1727                                       & 0xFFFFFFFF);
1728                         bytesLo += i->reent[j].counters.bcnt & 0xFFFFFFFF;
1729                         bytesHi += ((i->reent[j].counters.bcnt >> 32)
1730                                     & 0xFFFFFFFF);
1731                 }
1732
1733                 /* print the label and the policy */
1734                 len+=sprintf(buffer+len,"%s %s %i %u %u %u %u\n",
1735                              i->label,branchname(NULL, i->policy),i->refcount,
1736                              packetsHi, packetsLo, bytesHi, bytesLo);
1737                 pos=begin+len;
1738                 if(pos<offset) {
1739                         len=0;
1740                         begin=pos;
1741                 }
1742                 else if(pos>offset+length) {
1743                         len = last_len;
1744                         break;
1745                 }
1746
1747                 last_len = len;
1748         }
1749         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1750
1751         *start = buffer+(offset-begin);
1752         len-=(offset-begin);
1753         if(len>length)
1754                 len=length;
1755         return len;
1756 }
1757
1758 /*
1759  *      Interface to the generic firewall chains.
1760  */
1761 int ipfw_input_check(struct firewall_ops *this, int pf,
1762                      struct net_device *dev, void *arg,
1763                      struct sk_buff **pskb)
1764 {
1765         return ip_fw_check(dev->name,
1766                            arg, IP_FW_INPUT_CHAIN, pskb, SLOT_NUMBER(), 0);
1767 }
1768
1769 int ipfw_output_check(struct firewall_ops *this, int pf,
1770                       struct net_device *dev, void *arg,
1771                       struct sk_buff **pskb)
1772 {
1773         /* Locally generated bogus packets by root. <SIGH>. */
1774         if ((*pskb)->len < sizeof(struct iphdr) ||
1775             (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
1776                 return FW_ACCEPT;
1777         return ip_fw_check(dev->name,
1778                            arg, IP_FW_OUTPUT_CHAIN, pskb, SLOT_NUMBER(), 0);
1779 }
1780
1781 int ipfw_forward_check(struct firewall_ops *this, int pf,
1782                        struct net_device *dev, void *arg,
1783                        struct sk_buff **pskb)
1784 {
1785         return ip_fw_check(dev->name,
1786                            arg, IP_FW_FORWARD_CHAIN, pskb, SLOT_NUMBER(), 0);
1787 }
1788
1789 struct firewall_ops ipfw_ops = {
1790         .fw_forward     =       ipfw_forward_check,
1791         .fw_input       =       ipfw_input_check,
1792         .fw_output      =       ipfw_output_check,
1793 };
1794
1795 int ipfw_init_or_cleanup(int init)
1796 {
1797         struct proc_dir_entry *proc;
1798         int ret = 0;
1799         unsigned long flags;
1800
1801         if (!init) goto cleanup;
1802
1803 #ifdef DEBUG_IP_FIREWALL_LOCKING
1804         fwc_wlocks = fwc_rlocks = 0;
1805 #endif
1806
1807 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1808         ipfwsk = netlink_kernel_create(NETLINK_FIREWALL, NULL);
1809         if (ipfwsk == NULL)
1810                 goto cleanup_nothing;
1811 #endif
1812
1813         ret = register_firewall(PF_INET, &ipfw_ops);
1814         if (ret < 0)
1815                 goto cleanup_netlink;
1816
1817         proc = proc_net_create(IP_FW_PROC_CHAINS, S_IFREG | S_IRUSR | S_IWUSR,
1818                                ip_chain_procinfo);
1819         if (proc) proc->owner = THIS_MODULE;
1820         proc = proc_net_create(IP_FW_PROC_CHAIN_NAMES,
1821                                S_IFREG | S_IRUSR | S_IWUSR,
1822                                ip_chain_name_procinfo);
1823         if (proc) proc->owner = THIS_MODULE;
1824
1825         IP_FW_INPUT_CHAIN = ip_init_chain(IP_FW_LABEL_INPUT, 1, FW_ACCEPT);
1826         IP_FW_FORWARD_CHAIN = ip_init_chain(IP_FW_LABEL_FORWARD, 1, FW_ACCEPT);
1827         IP_FW_OUTPUT_CHAIN = ip_init_chain(IP_FW_LABEL_OUTPUT, 1, FW_ACCEPT);
1828
1829         return ret;
1830
1831  cleanup:
1832         unregister_firewall(PF_INET, &ipfw_ops);
1833
1834         FWC_WRITE_LOCK_IRQ(&ip_fw_lock, flags);
1835         while (ip_fw_chains) {
1836                 struct ip_chain *next = ip_fw_chains->next;
1837
1838                 clear_fw_chain(ip_fw_chains);
1839                 kfree(ip_fw_chains);
1840                 ip_fw_chains = next;
1841         }
1842         FWC_WRITE_UNLOCK_IRQ(&ip_fw_lock, flags);
1843
1844         proc_net_remove(IP_FW_PROC_CHAINS);
1845         proc_net_remove(IP_FW_PROC_CHAIN_NAMES);
1846
1847  cleanup_netlink:
1848 #if defined(CONFIG_NETLINK_DEV) || defined(CONFIG_NETLINK_DEV_MODULE)
1849         sock_release(ipfwsk->sk_socket);
1850
1851  cleanup_nothing:
1852 #endif
1853         return ret;
1854 }