Added the new version for dummynet.
[ipfw.git] / dummynet2 / ip_fw_sockopt.c
diff --git a/dummynet2/ip_fw_sockopt.c b/dummynet2/ip_fw_sockopt.c
new file mode 100644 (file)
index 0000000..086d7f0
--- /dev/null
@@ -0,0 +1,1086 @@
+/*-
+ * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa
+ *
+ * Supported by: Valeria Paoli
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_sockopt.c 200601 2009-12-16 10:48:40Z luigi $");
+
+/*
+ * Sockopt support for ipfw. The routines here implement
+ * the upper half of the ipfw code.
+ */
+
+#if !defined(KLD_MODULE)
+#include "opt_ipfw.h"
+#include "opt_ipdivert.h"
+#include "opt_ipdn.h"
+#include "opt_inet.h"
+#ifndef INET
+#error IPFIREWALL requires INET.
+#endif /* INET */
+#endif
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>  /* struct m_tag used by nested headers */
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <net/if.h>
+#include <net/route.h>
+#include <net/vnet.h>
+
+#include <netinet/in.h>
+#include <netinet/ip_fw.h>
+#include <netinet/ipfw/ip_fw_private.h>
+
+#ifdef MAC
+#include <security/mac/mac_framework.h>
+#endif
+
+MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's");
+
+/*
+ * static variables followed by global ones (none in this file)
+ */
+
+/*
+ * Find the smallest rule >= key, id.
+ * We could use bsearch but it is so simple that we code it directly
+ */
+int
+ipfw_find_rule(struct ip_fw_chain *chain, uint32_t key, uint32_t id)
+{
+       int i, lo, hi;
+       struct ip_fw *r;
+
+       for (lo = 0, hi = chain->n_rules - 1; lo < hi;) {
+               i = (lo + hi) / 2;
+               r = chain->map[i];
+               if (r->rulenum < key)
+                       lo = i + 1;     /* continue from the next one */
+               else if (r->rulenum > key)
+                       hi = i;         /* this might be good */
+               else if (r->id < id)
+                       lo = i + 1;     /* continue from the next one */
+               else /* r->id >= id */
+                       hi = i;         /* this might be good */
+       };
+       return hi;
+}
+
+/*
+ * allocate a new map, returns the chain locked. extra is the number
+ * of entries to add or delete.
+ */
+static struct ip_fw **
+get_map(struct ip_fw_chain *chain, int extra, int locked)
+{
+
+       for (;;) {
+               struct ip_fw **map;
+               int i;
+
+               i = chain->n_rules + extra;
+               map = malloc(i * sizeof(struct ip_fw *), M_IPFW, M_WAITOK);
+               if (map == NULL) {
+                       printf("%s: cannot allocate map\n", __FUNCTION__);
+                       return NULL;
+               }
+               if (!locked)
+                       IPFW_UH_WLOCK(chain);
+               if (i >= chain->n_rules + extra) /* good */
+                       return map;
+               /* otherwise we lost the race, free and retry */
+               if (!locked)
+                       IPFW_UH_WUNLOCK(chain);
+               free(map, M_IPFW);
+       }
+}
+
+/*
+ * swap the maps. It is supposed to be called with IPFW_UH_WLOCK
+ */
+static struct ip_fw **
+swap_map(struct ip_fw_chain *chain, struct ip_fw **new_map, int new_len)
+{
+       struct ip_fw **old_map;
+
+       IPFW_WLOCK(chain);
+       chain->id++;
+       chain->n_rules = new_len;
+       old_map = chain->map;
+       chain->map = new_map;
+       IPFW_WUNLOCK(chain);
+       return old_map;
+}
+
+/*
+ * Add a new rule to the list. Copy the rule into a malloc'ed area, then
+ * possibly create a rule number and add the rule to the list.
+ * Update the rule_number in the input struct so the caller knows it as well.
+ * XXX DO NOT USE FOR THE DEFAULT RULE.
+ * Must be called without IPFW_UH held
+ */
+int
+ipfw_add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule)
+{
+       struct ip_fw *rule;
+       int i, l, insert_before;
+       struct ip_fw **map;     /* the new array of pointers */
+
+       if (chain->rules == NULL || input_rule->rulenum > IPFW_DEFAULT_RULE-1)
+               return (EINVAL);
+
+       l = RULESIZE(input_rule);
+       rule = malloc(l, M_IPFW, M_WAITOK | M_ZERO);
+       if (rule == NULL)
+               return (ENOSPC);
+       /* get_map returns with IPFW_UH_WLOCK if successful */
+       map = get_map(chain, 1, 0 /* not locked */);
+       if (map == NULL) {
+               free(rule, M_IPFW);
+               return ENOSPC;
+       }
+
+       bcopy(input_rule, rule, l);
+       /* clear fields not settable from userland */
+       rule->x_next = NULL;
+       rule->next_rule = NULL;
+       rule->pcnt = 0;
+       rule->bcnt = 0;
+       rule->timestamp = 0;
+
+       if (V_autoinc_step < 1)
+               V_autoinc_step = 1;
+       else if (V_autoinc_step > 1000)
+               V_autoinc_step = 1000;
+       /* find the insertion point, we will insert before */
+       insert_before = rule->rulenum ? rule->rulenum + 1 : IPFW_DEFAULT_RULE;
+       i = ipfw_find_rule(chain, insert_before, 0);
+       /* duplicate first part */
+       if (i > 0)
+               bcopy(chain->map, map, i * sizeof(struct ip_fw *));
+       map[i] = rule;
+       /* duplicate remaining part, we always have the default rule */
+       bcopy(chain->map + i, map + i + 1,
+               sizeof(struct ip_fw *) *(chain->n_rules - i));
+       if (rule->rulenum == 0) {
+               /* write back the number */
+               rule->rulenum = i > 0 ? map[i-1]->rulenum : 0;
+               if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step)
+                       rule->rulenum += V_autoinc_step;
+               input_rule->rulenum = rule->rulenum;
+       }
+
+       rule->id = chain->id + 1;
+       map = swap_map(chain, map, chain->n_rules + 1);
+       chain->static_len += l;
+       IPFW_UH_WUNLOCK(chain);
+       if (map)
+               free(map, M_IPFW);
+       return (0);
+}
+
+/*
+ * Reclaim storage associated with a list of rules.  This is
+ * typically the list created using remove_rule.
+ * A NULL pointer on input is handled correctly.
+ */
+void
+ipfw_reap_rules(struct ip_fw *head)
+{
+       struct ip_fw *rule;
+
+       while ((rule = head) != NULL) {
+               head = head->x_next;
+               free(rule, M_IPFW);
+       }
+}
+
+/**
+ * Remove all rules with given number, and also do set manipulation.
+ * Assumes chain != NULL && *chain != NULL.
+ *
+ * The argument is an u_int32_t. The low 16 bit are the rule or set number,
+ * the next 8 bits are the new set, the top 8 bits are the command:
+ *
+ *     0       delete rules with given number
+ *     1       delete rules with given set number
+ *     2       move rules with given number to new set
+ *     3       move rules with given set number to new set
+ *     4       swap sets with given numbers
+ *     5       delete rules with given number and with given set number
+ */
+static int
+del_entry(struct ip_fw_chain *chain, u_int32_t arg)
+{
+       struct ip_fw *rule;
+       uint32_t rulenum;       /* rule or old_set */
+       uint8_t cmd, new_set;
+       int start, end = 0, i, ofs, n;
+       struct ip_fw **map = NULL;
+       int error = 0;
+
+       rulenum = arg & 0xffff;
+       cmd = (arg >> 24) & 0xff;
+       new_set = (arg >> 16) & 0xff;
+
+       if (cmd > 5 || new_set > RESVD_SET)
+               return EINVAL;
+       if (cmd == 0 || cmd == 2 || cmd == 5) {
+               if (rulenum >= IPFW_DEFAULT_RULE)
+                       return EINVAL;
+       } else {
+               if (rulenum > RESVD_SET)        /* old_set */
+                       return EINVAL;
+       }
+
+       IPFW_UH_WLOCK(chain); /* prevent conflicts among the writers */
+       chain->reap = NULL;     /* prepare for deletions */
+
+       switch (cmd) {
+       case 0: /* delete rules with given number (0 is special means all) */
+       case 1: /* delete all rules with given set number, rule->set == rulenum */
+       case 5: /* delete rules with given number and with given set number.
+                * rulenum - given rule number;
+                * new_set - given set number.
+                */
+               /* locate first rule to delete (start), the one after the
+                * last one (end), and count how many rules to delete (n)
+                */
+               n = 0;
+               if (cmd == 1) { /* look for a specific set, must scan all */
+                       for (start = -1, i = 0; i < chain->n_rules; i++) {
+                               if (chain->map[start]->set != rulenum)
+                                       continue;
+                               if (start < 0)
+                                       start = i;
+                               end = i;
+                               n++;
+                       }
+                       end++;  /* first non-matching */
+               } else {
+                       start = ipfw_find_rule(chain, rulenum, 0);
+                       for (end = start; end < chain->n_rules; end++) {
+                               rule = chain->map[end];
+                               if (rulenum > 0 && rule->rulenum != rulenum)
+                                       break;
+                               if (rule->set != RESVD_SET &&
+                                   (cmd == 0 || rule->set == new_set) )
+                                       n++;
+                       }
+               }
+               if (n == 0 && arg == 0)
+                       break; /* special case, flush on empty ruleset */
+               /* allocate the map, if needed */
+               if (n > 0)
+                       map = get_map(chain, -n, 1 /* locked */);
+               if (n == 0 || map == NULL) {
+                       error = EINVAL;
+               break;
+               }
+               /* copy the initial part of the map */
+               if (start > 0)
+                       bcopy(chain->map, map, start * sizeof(struct ip_fw *));
+               /* copy active rules between start and end */
+               for (i = ofs = start; i < end; i++) {
+                       rule = chain->map[i];
+                       if (!(rule->set != RESVD_SET &&
+                           (cmd == 0 || rule->set == new_set) ))
+                               map[ofs++] = chain->map[i];
+               }
+               /* finally the tail */
+               bcopy(chain->map + end, map + ofs,
+                       (chain->n_rules - end) * sizeof(struct ip_fw *));
+               map = swap_map(chain, map, chain->n_rules - n);
+               /* now remove the rules deleted */
+               for (i = start; i < end; i++) {
+                       rule = map[i];
+                       if (rule->set != RESVD_SET &&
+                           (cmd == 0 || rule->set == new_set) ) {
+                               int l = RULESIZE(rule);
+
+                               chain->static_len -= l;
+                               ipfw_remove_dyn_children(rule);
+                               rule->x_next = chain->reap;
+                               chain->reap = rule;
+                       }
+               }
+               break;
+
+       case 2: /* move rules with given number to new set */
+               IPFW_UH_WLOCK(chain);
+               for (i = 0; i < chain->n_rules; i++) {
+                       rule = chain->map[i];
+                       if (rule->rulenum == rulenum)
+                               rule->set = new_set;
+               }
+               IPFW_UH_WUNLOCK(chain);
+               break;
+
+       case 3: /* move rules with given set number to new set */
+               IPFW_UH_WLOCK(chain);
+               for (i = 0; i < chain->n_rules; i++) {
+                       rule = chain->map[i];
+                       if (rule->set == rulenum)
+                               rule->set = new_set;
+               }
+               IPFW_UH_WUNLOCK(chain);
+               break;
+
+       case 4: /* swap two sets */
+               IPFW_UH_WLOCK(chain);
+               for (i = 0; i < chain->n_rules; i++) {
+                       rule = chain->map[i];
+                       if (rule->set == rulenum)
+                               rule->set = new_set;
+                       else if (rule->set == new_set)
+                               rule->set = rulenum;
+               }
+               IPFW_UH_WUNLOCK(chain);
+               break;
+       }
+       rule = chain->reap;
+       chain->reap = NULL;
+       IPFW_UH_WUNLOCK(chain);
+       ipfw_reap_rules(rule);
+       if (map)
+               free(map, M_IPFW);
+       return error;
+}
+
+/*
+ * Clear counters for a specific rule.
+ * Normally run under IPFW_UH_RLOCK, but these are idempotent ops
+ * so we only care that rules do not disappear.
+ */
+static void
+clear_counters(struct ip_fw *rule, int log_only)
+{
+       ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule);
+
+       if (log_only == 0) {
+               rule->bcnt = rule->pcnt = 0;
+               rule->timestamp = 0;
+       }
+       if (l->o.opcode == O_LOG)
+               l->log_left = l->max_log;
+}
+
+/**
+ * Reset some or all counters on firewall rules.
+ * The argument `arg' is an u_int32_t. The low 16 bit are the rule number,
+ * the next 8 bits are the set number, the top 8 bits are the command:
+ *     0       work with rules from all set's;
+ *     1       work with rules only from specified set.
+ * Specified rule number is zero if we want to clear all entries.
+ * log_only is 1 if we only want to reset logs, zero otherwise.
+ */
+static int
+zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only)
+{
+       struct ip_fw *rule;
+       char *msg;
+       int i;
+
+       uint16_t rulenum = arg & 0xffff;
+       uint8_t set = (arg >> 16) & 0xff;
+       uint8_t cmd = (arg >> 24) & 0xff;
+
+       if (cmd > 1)
+               return (EINVAL);
+       if (cmd == 1 && set > RESVD_SET)
+               return (EINVAL);
+
+       IPFW_UH_RLOCK(chain);
+       if (rulenum == 0) {
+               V_norule_counter = 0;
+               for (i = 0; i < chain->n_rules; i++) {
+                       rule = chain->map[i];
+                       /* Skip rules not in our set. */
+                       if (cmd == 1 && rule->set != set)
+                               continue;
+                       clear_counters(rule, log_only);
+               }
+               msg = log_only ? "All logging counts reset" :
+                   "Accounting cleared";
+       } else {
+               int cleared = 0;
+               for (i = 0; i < chain->n_rules; i++) {
+                       rule = chain->map[i];
+                       if (rule->rulenum == rulenum) {
+                                       if (cmd == 0 || rule->set == set)
+                                               clear_counters(rule, log_only);
+                               cleared = 1;
+                       }
+                       if (rule->rulenum > rulenum)
+                               break;
+                       }
+               if (!cleared) { /* we did not find any matching rules */
+                       IPFW_WUNLOCK(chain);
+                       return (EINVAL);
+               }
+               msg = log_only ? "logging count reset" : "cleared";
+       }
+       IPFW_UH_RUNLOCK(chain);
+
+       if (V_fw_verbose) {
+               int lev = LOG_SECURITY | LOG_NOTICE;
+
+               if (rulenum)
+                       log(lev, "ipfw: Entry %d %s.\n", rulenum, msg);
+               else
+                       log(lev, "ipfw: %s.\n", msg);
+       }
+       return (0);
+}
+
+/*
+ * Check validity of the structure before insert.
+ * Rules are simple, so this mostly need to check rule sizes.
+ */
+static int
+check_ipfw_struct(struct ip_fw *rule, int size)
+{
+       int l, cmdlen = 0;
+       int have_action=0;
+       ipfw_insn *cmd;
+
+       if (size < sizeof(*rule)) {
+               printf("ipfw: rule too short\n");
+               return (EINVAL);
+       }
+       /* first, check for valid size */
+       l = RULESIZE(rule);
+       if (l != size) {
+               printf("ipfw: size mismatch (have %d want %d)\n", size, l);
+               return (EINVAL);
+       }
+       if (rule->act_ofs >= rule->cmd_len) {
+               printf("ipfw: bogus action offset (%u > %u)\n",
+                   rule->act_ofs, rule->cmd_len - 1);
+               return (EINVAL);
+       }
+       /*
+        * Now go for the individual checks. Very simple ones, basically only
+        * instruction sizes.
+        */
+       for (l = rule->cmd_len, cmd = rule->cmd ;
+                       l > 0 ; l -= cmdlen, cmd += cmdlen) {
+               cmdlen = F_LEN(cmd);
+               if (cmdlen > l) {
+                       printf("ipfw: opcode %d size truncated\n",
+                           cmd->opcode);
+                       return EINVAL;
+               }
+               switch (cmd->opcode) {
+               case O_PROBE_STATE:
+               case O_KEEP_STATE:
+               case O_PROTO:
+               case O_IP_SRC_ME:
+               case O_IP_DST_ME:
+               case O_LAYER2:
+               case O_IN:
+               case O_FRAG:
+               case O_DIVERTED:
+               case O_IPOPT:
+               case O_IPTOS:
+               case O_IPPRECEDENCE:
+               case O_IPVER:
+               case O_TCPWIN:
+               case O_TCPFLAGS:
+               case O_TCPOPTS:
+               case O_ESTAB:
+               case O_VERREVPATH:
+               case O_VERSRCREACH:
+               case O_ANTISPOOF:
+               case O_IPSEC:
+#ifdef INET6
+               case O_IP6_SRC_ME:
+               case O_IP6_DST_ME:
+               case O_EXT_HDR:
+               case O_IP6:
+#endif
+               case O_IP4:
+               case O_TAG:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn))
+                               goto bad_size;
+                       break;
+
+               case O_FIB:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn))
+                               goto bad_size;
+                       if (cmd->arg1 >= rt_numfibs) {
+                               printf("ipfw: invalid fib number %d\n",
+                                       cmd->arg1);
+                               return EINVAL;
+                       }
+                       break;
+
+               case O_SETFIB:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn))
+                               goto bad_size;
+                       if (cmd->arg1 >= rt_numfibs) {
+                               printf("ipfw: invalid fib number %d\n",
+                                       cmd->arg1);
+                               return EINVAL;
+                       }
+                       goto check_action;
+
+               case O_UID:
+               case O_GID:
+               case O_JAIL:
+               case O_IP_SRC:
+               case O_IP_DST:
+               case O_TCPSEQ:
+               case O_TCPACK:
+               case O_PROB:
+               case O_ICMPTYPE:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+                               goto bad_size;
+                       break;
+
+               case O_LIMIT:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_limit))
+                               goto bad_size;
+                       break;
+
+               case O_LOG:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_log))
+                               goto bad_size;
+
+                       ((ipfw_insn_log *)cmd)->log_left =
+                           ((ipfw_insn_log *)cmd)->max_log;
+
+                       break;
+
+               case O_IP_SRC_MASK:
+               case O_IP_DST_MASK:
+                       /* only odd command lengths */
+                       if ( !(cmdlen & 1) || cmdlen > 31)
+                               goto bad_size;
+                       break;
+
+               case O_IP_SRC_SET:
+               case O_IP_DST_SET:
+                       if (cmd->arg1 == 0 || cmd->arg1 > 256) {
+                               printf("ipfw: invalid set size %d\n",
+                                       cmd->arg1);
+                               return EINVAL;
+                       }
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
+                           (cmd->arg1+31)/32 )
+                               goto bad_size;
+                       break;
+
+               case O_IP_SRC_LOOKUP:
+               case O_IP_DST_LOOKUP:
+                       if (cmd->arg1 >= IPFW_TABLES_MAX) {
+                               printf("ipfw: invalid table number %d\n",
+                                   cmd->arg1);
+                               return (EINVAL);
+                       }
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn) &&
+                           cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 &&
+                           cmdlen != F_INSN_SIZE(ipfw_insn_u32))
+                               goto bad_size;
+                       break;
+
+               case O_MACADDR2:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_mac))
+                               goto bad_size;
+                       break;
+
+               case O_NOP:
+               case O_IPID:
+               case O_IPTTL:
+               case O_IPLEN:
+               case O_TCPDATALEN:
+               case O_TAGGED:
+                       if (cmdlen < 1 || cmdlen > 31)
+                               goto bad_size;
+                       break;
+
+               case O_MAC_TYPE:
+               case O_IP_SRCPORT:
+               case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */
+                       if (cmdlen < 2 || cmdlen > 31)
+                               goto bad_size;
+                       break;
+
+               case O_RECV:
+               case O_XMIT:
+               case O_VIA:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_if))
+                               goto bad_size;
+                       break;
+
+               case O_ALTQ:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_altq))
+                               goto bad_size;
+                       break;
+
+               case O_PIPE:
+               case O_QUEUE:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn))
+                               goto bad_size;
+                       goto check_action;
+
+               case O_FORWARD_IP:
+#ifdef IPFIREWALL_FORWARD
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_sa))
+                               goto bad_size;
+                       goto check_action;
+#else
+                       return EINVAL;
+#endif
+
+               case O_DIVERT:
+               case O_TEE:
+                       if (ip_divert_ptr == NULL)
+                               return EINVAL;
+                       else
+                               goto check_size;
+               case O_NETGRAPH:
+               case O_NGTEE:
+                       if (!NG_IPFW_LOADED)
+                               return EINVAL;
+                       else
+                               goto check_size;
+               case O_NAT:
+                       if (!IPFW_NAT_LOADED)
+                               return EINVAL;
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_nat))
+                               goto bad_size;          
+                       goto check_action;
+               case O_FORWARD_MAC: /* XXX not implemented yet */
+               case O_CHECK_STATE:
+               case O_COUNT:
+               case O_ACCEPT:
+               case O_DENY:
+               case O_REJECT:
+#ifdef INET6
+               case O_UNREACH6:
+#endif
+               case O_SKIPTO:
+               case O_REASS:
+check_size:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn))
+                               goto bad_size;
+check_action:
+                       if (have_action) {
+                               printf("ipfw: opcode %d, multiple actions"
+                                       " not allowed\n",
+                                       cmd->opcode);
+                               return EINVAL;
+                       }
+                       have_action = 1;
+                       if (l != cmdlen) {
+                               printf("ipfw: opcode %d, action must be"
+                                       " last opcode\n",
+                                       cmd->opcode);
+                               return EINVAL;
+                       }
+                       break;
+#ifdef INET6
+               case O_IP6_SRC:
+               case O_IP6_DST:
+                       if (cmdlen != F_INSN_SIZE(struct in6_addr) +
+                           F_INSN_SIZE(ipfw_insn))
+                               goto bad_size;
+                       break;
+
+               case O_FLOW6ID:
+                       if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) +
+                           ((ipfw_insn_u32 *)cmd)->o.arg1)
+                               goto bad_size;
+                       break;
+
+               case O_IP6_SRC_MASK:
+               case O_IP6_DST_MASK:
+                       if ( !(cmdlen & 1) || cmdlen > 127)
+                               goto bad_size;
+                       break;
+               case O_ICMP6TYPE:
+                       if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) )
+                               goto bad_size;
+                       break;
+#endif
+
+               default:
+                       switch (cmd->opcode) {
+#ifndef INET6
+                       case O_IP6_SRC_ME:
+                       case O_IP6_DST_ME:
+                       case O_EXT_HDR:
+                       case O_IP6:
+                       case O_UNREACH6:
+                       case O_IP6_SRC:
+                       case O_IP6_DST:
+                       case O_FLOW6ID:
+                       case O_IP6_SRC_MASK:
+                       case O_IP6_DST_MASK:
+                       case O_ICMP6TYPE:
+                               printf("ipfw: no IPv6 support in kernel\n");
+                               return EPROTONOSUPPORT;
+#endif
+                       default:
+                               printf("ipfw: opcode %d, unknown opcode\n",
+                                       cmd->opcode);
+                               return EINVAL;
+                       }
+               }
+       }
+       if (have_action == 0) {
+               printf("ipfw: missing action\n");
+               return EINVAL;
+       }
+       return 0;
+
+bad_size:
+       printf("ipfw: opcode %d size %d wrong\n",
+               cmd->opcode, cmdlen);
+       return EINVAL;
+}
+
+/*
+ * Copy the static and dynamic rules to the supplied buffer
+ * and return the amount of space actually used.
+ * Must be run under IPFW_UH_RLOCK
+ */
+static size_t
+ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space)
+{
+       char *bp = buf;
+       char *ep = bp + space;
+       struct ip_fw *rule, *dst;
+       int l, i;
+       time_t  boot_seconds;
+
+        boot_seconds = boottime.tv_sec;
+       for (i = 0; i < chain->n_rules; i++) {
+               rule = chain->map[i];
+               l = RULESIZE(rule);
+               if (bp + l > ep) { /* should not happen */
+                       printf("overflow dumping static rules\n");
+                       break;
+               }
+               dst = (struct ip_fw *)bp;
+               bcopy(rule, dst, l);
+                       /*
+                        * XXX HACK. Store the disable mask in the "next"
+                        * pointer in a wild attempt to keep the ABI the same.
+                        * Why do we do this on EVERY rule?
+                        */
+               bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
+               if (dst->timestamp)
+                       dst->timestamp += boot_seconds;
+               bp += l;
+       }
+       ipfw_get_dynamic(&bp, ep); /* protected by the dynamic lock */
+       return (bp - (char *)buf);
+}
+
+
+/**
+ * {set|get}sockopt parser.
+ */
+int
+ipfw_ctl(struct sockopt *sopt)
+{
+#define        RULE_MAXSIZE    (256*sizeof(u_int32_t))
+       int error;
+       size_t size;
+       struct ip_fw *buf, *rule;
+       struct ip_fw_chain *chain;
+       u_int32_t rulenum[2];
+
+       error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW);
+       if (error)
+               return (error);
+
+       /*
+        * Disallow modifications in really-really secure mode, but still allow
+        * the logging counters to be reset.
+        */
+       if (sopt->sopt_name == IP_FW_ADD ||
+           (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) {
+               error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+               if (error)
+                       return (error);
+       }
+
+       chain = &V_layer3_chain;
+       error = 0;
+
+       switch (sopt->sopt_name) {
+       case IP_FW_GET:
+               /*
+                * pass up a copy of the current rules. Static rules
+                * come first (the last of which has number IPFW_DEFAULT_RULE),
+                * followed by a possibly empty list of dynamic rule.
+                * The last dynamic rule has NULL in the "next" field.
+                *
+                * Note that the calculated size is used to bound the
+                * amount of data returned to the user.  The rule set may
+                * change between calculating the size and returning the
+                * data in which case we'll just return what fits.
+                */
+               for (;;) {
+                       int len = 0, want;
+
+                       size = chain->static_len;
+                       size += ipfw_dyn_len();
+               if (size >= sopt->sopt_valsize)
+                       break;
+               buf = malloc(size, M_TEMP, M_WAITOK);
+                       if (buf == NULL)
+                               break;
+                       IPFW_UH_RLOCK(chain);
+                       /* check again how much space we need */
+                       want = chain->static_len + ipfw_dyn_len();
+                       if (size >= want)
+                               len = ipfw_getrules(chain, buf, size);
+                       IPFW_UH_RUNLOCK(chain);
+                       if (size >= want)
+                               error = sooptcopyout(sopt, buf, len);
+               free(buf, M_TEMP);
+                       if (size >= want)
+                               break;
+               }
+               break;
+
+       case IP_FW_FLUSH:
+               /* locking is done within del_entry() */
+               error = del_entry(chain, 0); /* special case, rule=0, cmd=0 means all */
+               break;
+
+       case IP_FW_ADD:
+               rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
+               error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
+                       sizeof(struct ip_fw) );
+               if (error == 0)
+                       error = check_ipfw_struct(rule, sopt->sopt_valsize);
+               if (error == 0) {
+                       /* locking is done within ipfw_add_rule() */
+                       error = ipfw_add_rule(chain, rule);
+                       size = RULESIZE(rule);
+                       if (!error && sopt->sopt_dir == SOPT_GET)
+                               error = sooptcopyout(sopt, rule, size);
+               }
+               free(rule, M_TEMP);
+               break;
+
+       case IP_FW_DEL:
+               /*
+                * IP_FW_DEL is used for deleting single rules or sets,
+                * and (ab)used to atomically manipulate sets. Argument size
+                * is used to distinguish between the two:
+                *    sizeof(u_int32_t)
+                *      delete single rule or set of rules,
+                *      or reassign rules (or sets) to a different set.
+                *    2*sizeof(u_int32_t)
+                *      atomic disable/enable sets.
+                *      first u_int32_t contains sets to be disabled,
+                *      second u_int32_t contains sets to be enabled.
+                */
+               error = sooptcopyin(sopt, rulenum,
+                       2*sizeof(u_int32_t), sizeof(u_int32_t));
+               if (error)
+                       break;
+               size = sopt->sopt_valsize;
+               if (size == sizeof(u_int32_t) && rulenum[0] != 0) {
+                       /* delete or reassign, locking done in del_entry() */
+                       error = del_entry(chain, rulenum[0]);
+               } else if (size == 2*sizeof(u_int32_t)) { /* set enable/disable */
+                       IPFW_UH_WLOCK(chain);
+                       V_set_disable =
+                           (V_set_disable | rulenum[0]) & ~rulenum[1] &
+                           ~(1<<RESVD_SET); /* set RESVD_SET always enabled */
+                       IPFW_UH_WUNLOCK(chain);
+               } else
+                       error = EINVAL;
+               break;
+
+       case IP_FW_ZERO:
+       case IP_FW_RESETLOG: /* argument is an u_int_32, the rule number */
+               rulenum[0] = 0;
+               if (sopt->sopt_val != 0) {
+                   error = sooptcopyin(sopt, rulenum,
+                           sizeof(u_int32_t), sizeof(u_int32_t));
+                   if (error)
+                       break;
+               }
+               error = zero_entry(chain, rulenum[0],
+                       sopt->sopt_name == IP_FW_RESETLOG);
+               break;
+
+       /*--- TABLE manipulations are protected by the IPFW_LOCK ---*/
+       case IP_FW_TABLE_ADD:
+               {
+                       ipfw_table_entry ent;
+
+                       error = sooptcopyin(sopt, &ent,
+                           sizeof(ent), sizeof(ent));
+                       if (error)
+                               break;
+                       error = ipfw_add_table_entry(chain, ent.tbl,
+                           ent.addr, ent.masklen, ent.value);
+               }
+               break;
+
+       case IP_FW_TABLE_DEL:
+               {
+                       ipfw_table_entry ent;
+
+                       error = sooptcopyin(sopt, &ent,
+                           sizeof(ent), sizeof(ent));
+                       if (error)
+                               break;
+                       error = ipfw_del_table_entry(chain, ent.tbl,
+                           ent.addr, ent.masklen);
+               }
+               break;
+
+       case IP_FW_TABLE_FLUSH:
+               {
+                       u_int16_t tbl;
+
+                       error = sooptcopyin(sopt, &tbl,
+                           sizeof(tbl), sizeof(tbl));
+                       if (error)
+                               break;
+                       IPFW_WLOCK(chain);
+                       error = ipfw_flush_table(chain, tbl);
+                       IPFW_WUNLOCK(chain);
+               }
+               break;
+
+       case IP_FW_TABLE_GETSIZE:
+               {
+                       u_int32_t tbl, cnt;
+
+                       if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl),
+                           sizeof(tbl))))
+                               break;
+                       IPFW_RLOCK(chain);
+                       error = ipfw_count_table(chain, tbl, &cnt);
+                       IPFW_RUNLOCK(chain);
+                       if (error)
+                               break;
+                       error = sooptcopyout(sopt, &cnt, sizeof(cnt));
+               }
+               break;
+
+       case IP_FW_TABLE_LIST:
+               {
+                       ipfw_table *tbl;
+
+                       if (sopt->sopt_valsize < sizeof(*tbl)) {
+                               error = EINVAL;
+                               break;
+                       }
+                       size = sopt->sopt_valsize;
+                       tbl = malloc(size, M_TEMP, M_WAITOK);
+                       error = sooptcopyin(sopt, tbl, size, sizeof(*tbl));
+                       if (error) {
+                               free(tbl, M_TEMP);
+                               break;
+                       }
+                       tbl->size = (size - sizeof(*tbl)) /
+                           sizeof(ipfw_table_entry);
+                       IPFW_RLOCK(chain);
+                       error = ipfw_dump_table(chain, tbl);
+                       IPFW_RUNLOCK(chain);
+                       if (error) {
+                               free(tbl, M_TEMP);
+                               break;
+                       }
+                       error = sooptcopyout(sopt, tbl, size);
+                       free(tbl, M_TEMP);
+               }
+               break;
+
+       /*--- NAT operations are protected by the IPFW_LOCK ---*/
+       case IP_FW_NAT_CFG:
+               if (IPFW_NAT_LOADED)
+                       error = ipfw_nat_cfg_ptr(sopt);
+               else {
+                       printf("IP_FW_NAT_CFG: %s\n",
+                           "ipfw_nat not present, please load it");
+                       error = EINVAL;
+               }
+               break;
+
+       case IP_FW_NAT_DEL:
+               if (IPFW_NAT_LOADED)
+                       error = ipfw_nat_del_ptr(sopt);
+               else {
+                       printf("IP_FW_NAT_DEL: %s\n",
+                           "ipfw_nat not present, please load it");
+                       error = EINVAL;
+               }
+               break;
+
+       case IP_FW_NAT_GET_CONFIG:
+               if (IPFW_NAT_LOADED)
+                       error = ipfw_nat_get_cfg_ptr(sopt);
+               else {
+                       printf("IP_FW_NAT_GET_CFG: %s\n",
+                           "ipfw_nat not present, please load it");
+                       error = EINVAL;
+               }
+               break;
+
+       case IP_FW_NAT_GET_LOG:
+               if (IPFW_NAT_LOADED)
+                       error = ipfw_nat_get_log_ptr(sopt);
+               else {
+                       printf("IP_FW_NAT_GET_LOG: %s\n",
+                           "ipfw_nat not present, please load it");
+                       error = EINVAL;
+               }
+               break;
+
+       default:
+               printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name);
+               error = EINVAL;
+       }
+
+       return (error);
+#undef RULE_MAXSIZE
+}
+/* end of file */