X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=dummynet%2Fip_fw2.c;h=21d1b416c0a07517f43011d5abc562fd95a9c822;hb=4e189c94aef3d3e9a4e8edfd2bb989feeb5d5e26;hp=d05fa7473d3548b00e7e38e4b0f2bbe902d253f7;hpb=1f3693d89c1c88e895721e21751c354e74b99627;p=ipfw.git diff --git a/dummynet/ip_fw2.c b/dummynet/ip_fw2.c index d05fa74..21d1b41 100644 --- a/dummynet/ip_fw2.c +++ b/dummynet/ip_fw2.c @@ -44,10 +44,11 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz #endif #include "opt_inet6.h" #include "opt_ipsec.h" -#include "opt_mac.h" #include #include +#include +#include #include #include #include @@ -56,6 +57,7 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz #include #include #include +#include #include #include #include @@ -66,6 +68,7 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz #include #include #include +#include #define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ @@ -84,12 +87,14 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz #include #include #include + #include #include #include #ifdef INET6 #include +#include #endif #include /* XXX for in_cksum */ @@ -98,8 +103,8 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz #include #endif -#include "missing.h" - +static VNET_DEFINE(int, ipfw_vnet_ready) = 0; +#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) /* * set_disable contains one bit per set value (0..31). * If the bit is set, all rules with the corresponding set @@ -108,30 +113,27 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz * and CANNOT be disabled. * Rules in set RESVD_SET can only be deleted explicitly. */ -static u_int32_t set_disable; -static int fw_verbose; -static struct callout ipfw_timeout; -static int verbose_limit; +static VNET_DEFINE(u_int32_t, set_disable); +static VNET_DEFINE(int, fw_verbose); +static VNET_DEFINE(struct callout, ipfw_timeout); +static VNET_DEFINE(int, verbose_limit); -static uma_zone_t ipfw_dyn_rule_zone; +#define V_set_disable VNET(set_disable) +#define V_fw_verbose VNET(fw_verbose) +#define V_ipfw_timeout VNET(ipfw_timeout) +#define V_verbose_limit VNET(verbose_limit) -/* - * Data structure to cache our ucred related - * information. This structure only gets used if - * the user specified UID/GID based constraints in - * a firewall rule. - */ -struct ip_fw_ugid { - gid_t fw_groups[NGROUPS]; - int fw_ngroups; - uid_t fw_uid; - int fw_prid; -}; +#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT +static int default_to_accept = 1; +#else +static int default_to_accept; +#endif +static uma_zone_t ipfw_dyn_rule_zone; /* * list of rules for layer 3 */ -struct ip_fw_chain layer3_chain; +VNET_DEFINE(struct ip_fw_chain, layer3_chain); MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); @@ -148,30 +150,54 @@ struct table_entry { u_int32_t value; }; -static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ +static VNET_DEFINE(int, autoinc_step); +#define V_autoinc_step VNET(autoinc_step) +static VNET_DEFINE(int, fw_deny_unknown_exthdrs); +#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); #ifdef SYSCTL_NODE SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); -SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &fw_enable, 0, +SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0, ipfw_chg_hook, "I", "Enable ipfw"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, - &autoinc_step, 0, "Rule number autincrement step"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, - CTLFLAG_RW | CTLFLAG_SECURE3, - &fw_one_pass, 0, +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, + CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, + "Rule number auto-increment step"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass, + CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, "Only do a single pass through ipfw when using dummynet(4)"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, - CTLFLAG_RW | CTLFLAG_SECURE3, - &fw_verbose, 0, "Log matches to ipfw rules"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, - &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose, + CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, + "Log matches to ipfw rules"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, + CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, + "Set upper limit of matches of ipfw rules logged"); +unsigned int dummy_default_rule = IPFW_DEFAULT_RULE; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, - NULL, IPFW_DEFAULT_RULE, "The default/max possible rule number."); + &dummy_default_rule, IPFW_DEFAULT_RULE, + "The default/max possible rule number."); +unsigned int dummy_tables_max = IPFW_TABLES_MAX; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, - NULL, IPFW_TABLES_MAX, "The maximum number of tables."); + &dummy_tables_max, IPFW_TABLES_MAX, + "The maximum number of tables."); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, + &default_to_accept, 0, + "Make the default rule accept all packets."); +TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); + +#ifdef INET6 +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); +SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0, + ipfw_chg_hook, "I", "Enable ipfw+6"); +SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, + CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0, + "Deny packets with unknown IPv6 Extension Headers"); +#endif /* INET6 */ + #endif /* SYSCTL_NODE */ /* @@ -210,9 +236,13 @@ SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, * obey the 'randomized match', and we do not do multiple * passes through the firewall. XXX check the latter!!! */ -static ipfw_dyn_rule **ipfw_dyn_v = NULL; -static u_int32_t dyn_buckets = 256; /* must be power of 2 */ -static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */ +static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); +static VNET_DEFINE(u_int32_t, dyn_buckets); +static VNET_DEFINE(u_int32_t, curr_dyn_buckets); + +#define V_ipfw_dyn_v VNET(ipfw_dyn_v) +#define V_dyn_buckets VNET(dyn_buckets) +#define V_curr_dyn_buckets VNET(curr_dyn_buckets) #if defined( __linux__ ) || defined( _WIN32 ) DEFINE_SPINLOCK(ipfw_dyn_mtx); @@ -226,15 +256,26 @@ static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ #define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) #define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) +static struct mbuf *send_pkt(struct mbuf *, struct ipfw_flow_id *, + u_int32_t, u_int32_t, int); + + /* * Timeouts for various events in handing dynamic rules. */ -static u_int32_t dyn_ack_lifetime = 300; -static u_int32_t dyn_syn_lifetime = 20; -static u_int32_t dyn_fin_lifetime = 1; -static u_int32_t dyn_rst_lifetime = 1; -static u_int32_t dyn_udp_lifetime = 10; -static u_int32_t dyn_short_lifetime = 5; +static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); +static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); +static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); +static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); +static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); +static VNET_DEFINE(u_int32_t, dyn_short_lifetime); + +#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) +#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) +#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) +#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) +#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) +#define V_dyn_short_lifetime VNET(dyn_short_lifetime) /* * Keepalives are sent if dyn_keepalive is set. They are sent every @@ -244,57 +285,63 @@ static u_int32_t dyn_short_lifetime = 5; * than dyn_keepalive_period. */ -static u_int32_t dyn_keepalive_interval = 20; -static u_int32_t dyn_keepalive_period = 5; -static u_int32_t dyn_keepalive = 1; /* do send keepalives */ +static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); +static VNET_DEFINE(u_int32_t, dyn_keepalive_period); +static VNET_DEFINE(u_int32_t, dyn_keepalive); -static u_int32_t static_count; /* # of static rules */ -static u_int32_t static_len; /* size in bytes of static rules */ -static u_int32_t dyn_count; /* # of dynamic rules */ -static u_int32_t dyn_max = 4096; /* max # of dynamic rules */ +#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) +#define V_dyn_keepalive_period VNET(dyn_keepalive_period) +#define V_dyn_keepalive VNET(dyn_keepalive) -#ifdef SYSCTL_NODE -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, - &dyn_buckets, 0, "Number of dyn. buckets"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, - &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, - &dyn_count, 0, "Number of dyn. rules"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, - &dyn_max, 0, "Max number of dyn. rules"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, - &static_count, 0, "Number of static rules"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, - &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, - &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, - &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, - &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, - &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, - &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, - &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); -#endif /* SYSCTL_NODE */ +static VNET_DEFINE(u_int32_t, static_count); /* # of static rules */ +static VNET_DEFINE(u_int32_t, static_len); /* bytes of static rules */ +static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ +static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ + +#define V_static_count VNET(static_count) +#define V_static_len VNET(static_len) +#define V_dyn_count VNET(dyn_count) +#define V_dyn_max VNET(dyn_max) -#ifdef INET6 -/* - * IPv6 specific variables - */ #ifdef SYSCTL_NODE -SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, + CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, + "Number of dyn. buckets"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, + CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, + "Current Number of dyn. buckets"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, + CTLFLAG_RD, &VNET_NAME(dyn_count), 0, + "Number of dyn. rules"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, + CTLFLAG_RW, &VNET_NAME(dyn_max), 0, + "Max number of dyn. rules"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, + CTLFLAG_RD, &VNET_NAME(static_count), 0, + "Number of static rules"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, + CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, + "Lifetime of dyn. rules for acks"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, + CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, + "Lifetime of dyn. rules for syn"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, + CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, + "Lifetime of dyn. rules for fin"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, + CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, + "Lifetime of dyn. rules for rst"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, + CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, + "Lifetime of dyn. rules for UDP"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, + CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, + "Lifetime of dyn. rules for other situations"); +SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, + CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, + "Enable keepalives for dyn. rules"); #endif /* SYSCTL_NODE */ -static struct sysctl_ctx_list ip6_fw_sysctl_ctx; -static struct sysctl_oid *ip6_fw_sysctl_tree; -#endif /* INET6 */ - -static int fw_deny_unknown_exthdrs = 1; - - /* * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T * Other macros just cast void * into the appropriate type @@ -461,14 +508,17 @@ iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) #if !defined( __linux__ ) && !defined( _WIN32 ) struct ifaddr *ia; - /* XXX lock? */ + if_addr_rlock(ifp); TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { if (ia->ifa_addr->sa_family != AF_INET) continue; if (cmd->p.ip.s_addr == ((struct sockaddr_in *) - (ia->ifa_addr))->sin_addr.s_addr) + (ia->ifa_addr))->sin_addr.s_addr) { + if_addr_runlock(ifp); return(1); /* match */ + } } + if_addr_runlock(ifp); #endif } return(0); /* no match, fail ... */ @@ -507,7 +557,7 @@ verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = src; - in_rtalloc_ign(&ro, RTF_CLONING, fib); + in_rtalloc_ign(&ro, 0, fib); if (ro.ro_rt == NULL) return 0; @@ -572,17 +622,22 @@ search_ip6_addr_net (struct in6_addr * ip6_addr) struct in6_ifaddr *fdm; struct in6_addr copia; - TAILQ_FOREACH(mdc, &ifnet, if_link) - TAILQ_FOREACH(mdc2, &mdc->if_addrlist, ifa_list) { + TAILQ_FOREACH(mdc, &V_ifnet, if_link) { + if_addr_rlock(mdc); + TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) { if (mdc2->ifa_addr->sa_family == AF_INET6) { fdm = (struct in6_ifaddr *)mdc2; copia = fdm->ia_addr.sin6_addr; /* need for leaving scope_id in the sock_addr */ in6_clearscope(&copia); - if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) + if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) { + if_addr_runlock(mdc); return 1; + } } } + if_addr_runlock(mdc); + } return 0; } @@ -599,7 +654,7 @@ verify_path6(struct in6_addr *src, struct ifnet *ifp) dst->sin6_len = sizeof(*dst); dst->sin6_addr = *src; /* XXX MRT 0 for ipv6 at this time */ - rtalloc_ign((struct route *)&ro, RTF_CLONING); + rtalloc_ign((struct route *)&ro, 0); if (ro.ro_rt == NULL) return 0; @@ -668,60 +723,18 @@ send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) m = args->m; if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { struct tcphdr *tcp; - tcp_seq ack, seq; - int flags; - struct { - struct ip6_hdr ip6; - struct tcphdr th; - } ti; tcp = (struct tcphdr *)((char *)ip6 + hlen); - if ((tcp->th_flags & TH_RST) != 0) { - m_freem(m); - args->m = NULL; - return; - } - - ti.ip6 = *ip6; - ti.th = *tcp; - ti.th.th_seq = ntohl(ti.th.th_seq); - ti.th.th_ack = ntohl(ti.th.th_ack); - ti.ip6.ip6_nxt = IPPROTO_TCP; - - if (ti.th.th_flags & TH_ACK) { - ack = 0; - seq = ti.th.th_ack; - flags = TH_RST; - } else { - ack = ti.th.th_seq; - if ((m->m_flags & M_PKTHDR) != 0) { - /* - * total new data to ACK is: - * total packet length, - * minus the header length, - * minus the tcp header length. - */ - ack += m->m_pkthdr.len - hlen - - (ti.th.th_off << 2); - } else if (ip6->ip6_plen) { - ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) - - hlen - (ti.th.th_off << 2); - } else { - m_freem(m); - return; - } - if (tcp->th_flags & TH_SYN) - ack++; - seq = 0; - flags = TH_RST|TH_ACK; + if ((tcp->th_flags & TH_RST) == 0) { + struct mbuf *m0; + m0 = send_pkt(args->m, &(args->f_id), + ntohl(tcp->th_seq), ntohl(tcp->th_ack), + tcp->th_flags | TH_RST); + if (m0 != NULL) + ip6_output(m0, NULL, NULL, 0, NULL, NULL, + NULL); } - bcopy(&ti, ip6, sizeof(ti)); - /* - * m is only used to recycle the mbuf - * The data in it is never read so we don't need - * to correct the offsets or anything - */ - tcp_respond(NULL, ip6, tcp, m, ack, seq, flags); + m_freem(m); } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ #if 0 /* @@ -744,7 +757,9 @@ send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) #endif /* INET6 */ -static u_int64_t norule_counter; /* counter for ipfw_log(NULL...) */ +/* counter for ipfw_log(NULL...) */ +static VNET_DEFINE(u_int64_t, norule_counter); +#define V_norule_counter VNET(norule_counter) #define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 #define SNP(buf) buf, sizeof(buf) @@ -767,11 +782,11 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, proto[0] = '\0'; if (f == NULL) { /* bogus pkt */ - if (verbose_limit != 0 && norule_counter >= verbose_limit) + if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit) return; - norule_counter++; - if (norule_counter == verbose_limit) - limit_reached = verbose_limit; + V_norule_counter++; + if (V_norule_counter == V_verbose_limit) + limit_reached = V_verbose_limit; action = "Refuse"; } else { /* O_LOG is the first action, find the real one */ ipfw_insn *cmd = ACTION_PTR(f); @@ -878,6 +893,9 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, case O_NAT: action = "Nat"; break; + case O_REASS: + action = "Reass"; + break; default: action = "UNKNOWN"; break; @@ -889,7 +907,11 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, } else { int len; - char src[48], dst[48]; +#ifdef INET6 + char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; +#else + char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; +#endif struct icmphdr *icmp; struct tcphdr *tcp; struct udphdr *udp; @@ -1036,10 +1058,36 @@ hash_packet(struct ipfw_flow_id *id) else #endif /* INET6 */ i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); - i &= (curr_dyn_buckets - 1); + i &= (V_curr_dyn_buckets - 1); return i; } +static __inline void +unlink_dyn_rule_print(struct ipfw_flow_id *id) +{ + struct in_addr da; +#ifdef INET6 + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; +#else + char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; +#endif + +#ifdef INET6 + if (IS_IP6_FLOW_ID(id)) { + ip6_sprintf(src, &id->src_ip6); + ip6_sprintf(dst, &id->dst_ip6); + } else +#endif + { + da.s_addr = htonl(id->src_ip); + inet_ntoa_r(da, src); + da.s_addr = htonl(id->dst_ip); + inet_ntoa_r(da, dst); + } + printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", + src, id->src_port, dst, id->dst_port, V_dyn_count - 1); +} + /** * unlink a dynamic rule from a chain. prev is a pointer to * the previous one, q is a pointer to the rule to delete, @@ -1052,14 +1100,12 @@ hash_packet(struct ipfw_flow_id *id) /* remove a refcount to the parent */ \ if (q->dyn_type == O_LIMIT) \ q->parent->count--; \ - DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\ - (q->id.src_ip), (q->id.src_port), \ - (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ + DEB(unlink_dyn_rule_print(&q->id);) \ if (prev != NULL) \ prev->next = q = q->next; \ else \ head = q = q->next; \ - dyn_count--; \ + V_dyn_count--; \ uma_zfree(ipfw_dyn_rule_zone, old_q); } #define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) @@ -1088,7 +1134,7 @@ remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) IPFW_DYN_LOCK_ASSERT(); - if (ipfw_dyn_v == NULL || dyn_count == 0) + if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) return; /* do not expire more than once per second, it is useless */ if (!FORCE && last_remove == time_uptime) @@ -1101,8 +1147,8 @@ remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) * them in a second pass. */ next_pass: - for (i = 0 ; i < curr_dyn_buckets ; i++) { - for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { + for (i = 0 ; i < V_curr_dyn_buckets ; i++) { + for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { /* * Logic can become complex here, so we split tests. */ @@ -1129,7 +1175,7 @@ next_pass: goto next; } if (q->dyn_type != O_LIMIT_PARENT || !q->count) { - UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); continue; } next: @@ -1162,14 +1208,14 @@ lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, IPFW_DYN_LOCK_ASSERT(); - if (ipfw_dyn_v == NULL) + if (V_ipfw_dyn_v == NULL) goto done; /* not found */ i = hash_packet( pkt ); - for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { + for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { if (q->dyn_type == O_LIMIT_PARENT && q->count) goto next; if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ - UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); continue; } if (pkt->proto == q->id.proto && @@ -1219,8 +1265,8 @@ next: if ( prev != NULL) { /* found and not in front */ prev->next = q->next; - q->next = ipfw_dyn_v[i]; - ipfw_dyn_v[i] = q; + q->next = V_ipfw_dyn_v[i]; + V_ipfw_dyn_v[i] = q; } if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); @@ -1230,7 +1276,7 @@ next: q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); switch (q->state) { case TH_SYN: /* opening */ - q->expire = time_uptime + dyn_syn_lifetime; + q->expire = time_uptime + V_dyn_syn_lifetime; break; case BOTH_SYN: /* move to established */ @@ -1253,13 +1299,13 @@ next: } } } - q->expire = time_uptime + dyn_ack_lifetime; + q->expire = time_uptime + V_dyn_ack_lifetime; break; case BOTH_SYN | BOTH_FIN: /* both sides closed */ - if (dyn_fin_lifetime >= dyn_keepalive_period) - dyn_fin_lifetime = dyn_keepalive_period - 1; - q->expire = time_uptime + dyn_fin_lifetime; + if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) + V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; + q->expire = time_uptime + V_dyn_fin_lifetime; break; default: @@ -1271,16 +1317,16 @@ next: if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) printf("invalid state: 0x%x\n", q->state); #endif - if (dyn_rst_lifetime >= dyn_keepalive_period) - dyn_rst_lifetime = dyn_keepalive_period - 1; - q->expire = time_uptime + dyn_rst_lifetime; + if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) + V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; + q->expire = time_uptime + V_dyn_rst_lifetime; break; } } else if (pkt->proto == IPPROTO_UDP) { - q->expire = time_uptime + dyn_udp_lifetime; + q->expire = time_uptime + V_dyn_udp_lifetime; } else { /* other protocols */ - q->expire = time_uptime + dyn_short_lifetime; + q->expire = time_uptime + V_dyn_short_lifetime; } done: if (match_direction) @@ -1313,21 +1359,21 @@ realloc_dynamic_table(void) * default to 1024. */ - if (dyn_buckets > 65536) - dyn_buckets = 1024; - if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ - dyn_buckets = curr_dyn_buckets; /* reset */ + if (V_dyn_buckets > 65536) + V_dyn_buckets = 1024; + if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ + V_dyn_buckets = V_curr_dyn_buckets; /* reset */ return; } - curr_dyn_buckets = dyn_buckets; - if (ipfw_dyn_v != NULL) - free(ipfw_dyn_v, M_IPFW); + V_curr_dyn_buckets = V_dyn_buckets; + if (V_ipfw_dyn_v != NULL) + free(V_ipfw_dyn_v, M_IPFW); for (;;) { - ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), + V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), M_IPFW, M_NOWAIT | M_ZERO); - if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) + if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) break; - curr_dyn_buckets /= 2; + V_curr_dyn_buckets /= 2; } } @@ -1349,10 +1395,10 @@ add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) IPFW_DYN_LOCK_ASSERT(); - if (ipfw_dyn_v == NULL || - (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { + if (V_ipfw_dyn_v == NULL || + (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { realloc_dynamic_table(); - if (ipfw_dyn_v == NULL) + if (V_ipfw_dyn_v == NULL) return NULL; /* failed ! */ } i = hash_packet(id); @@ -1374,21 +1420,42 @@ add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) } r->id = *id; - r->expire = time_uptime + dyn_syn_lifetime; + r->expire = time_uptime + V_dyn_syn_lifetime; r->rule = rule; r->dyn_type = dyn_type; r->pcnt = r->bcnt = 0; r->count = 0; r->bucket = i; - r->next = ipfw_dyn_v[i]; - ipfw_dyn_v[i] = r; - dyn_count++; - DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", - dyn_type, - (r->id.src_ip), (r->id.src_port), - (r->id.dst_ip), (r->id.dst_port), - dyn_count ); ) + r->next = V_ipfw_dyn_v[i]; + V_ipfw_dyn_v[i] = r; + V_dyn_count++; + DEB({ + struct in_addr da; +#ifdef INET6 + char src[INET6_ADDRSTRLEN]; + char dst[INET6_ADDRSTRLEN]; +#else + char src[INET_ADDRSTRLEN]; + char dst[INET_ADDRSTRLEN]; +#endif + +#ifdef INET6 + if (IS_IP6_FLOW_ID(&(r->id))) { + ip6_sprintf(src, &r->id.src_ip6); + ip6_sprintf(dst, &r->id.dst_ip6); + } else +#endif + { + da.s_addr = htonl(r->id.src_ip); + inet_ntoa_r(da, src); + da.s_addr = htonl(r->id.dst_ip); + inet_ntoa_r(da, dst); + } + printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", + dyn_type, src, r->id.src_port, dst, r->id.dst_port, + V_dyn_count); + }) return r; } @@ -1404,10 +1471,10 @@ lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) IPFW_DYN_LOCK_ASSERT(); - if (ipfw_dyn_v) { + if (V_ipfw_dyn_v) { int is_v6 = IS_IP6_FLOW_ID(pkt); i = hash_packet( pkt ); - for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) + for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) if (q->dyn_type == O_LIMIT_PARENT && rule== q->rule && pkt->proto == q->id.proto && @@ -1424,7 +1491,7 @@ lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) pkt->dst_ip == q->id.dst_ip) ) ) { - q->expire = time_uptime + dyn_short_lifetime; + q->expire = time_uptime + V_dyn_short_lifetime; DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) return q; } @@ -1445,20 +1512,37 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, static int last_log; ipfw_dyn_rule *q; struct in_addr da; - char src[48], dst[48]; +#ifdef INET6 + char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; +#else + char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; +#endif src[0] = '\0'; dst[0] = '\0'; + IPFW_DYN_LOCK(); + DEB( - printf("ipfw: %s: type %d 0x%08x %u -> 0x%08x %u\n", - __func__, cmd->o.opcode, - (args->f_id.src_ip), (args->f_id.src_port), - (args->f_id.dst_ip), (args->f_id.dst_port)); +#ifdef INET6 + if (IS_IP6_FLOW_ID(&(args->f_id))) { + ip6_sprintf(src, &args->f_id.src_ip6); + ip6_sprintf(dst, &args->f_id.dst_ip6); + } else +#endif + { + da.s_addr = htonl(args->f_id.src_ip); + inet_ntoa_r(da, src); + da.s_addr = htonl(args->f_id.dst_ip); + inet_ntoa_r(da, dst); + } + printf("ipfw: %s: type %d %s %u -> %s %u\n", + __func__, cmd->o.opcode, src, args->f_id.src_port, + dst, args->f_id.dst_port); + src[0] = '\0'; + dst[0] = '\0'; ) - IPFW_DYN_LOCK(); - q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); if (q != NULL) { /* should never occur */ @@ -1471,11 +1555,11 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, return (0); } - if (dyn_count >= dyn_max) + if (V_dyn_count >= V_dyn_max) /* Run out of slots, try to remove any expired rule. */ remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); - if (dyn_count >= dyn_max) { + if (V_dyn_count >= V_dyn_max) { if (last_log != time_uptime) { last_log = time_uptime; printf("ipfw: %s: Too many dynamic rules\n", __func__); @@ -1537,7 +1621,7 @@ install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, /* See if we can remove some expired rule. */ remove_dyn_rule(rule, parent); if (parent->count >= conn_limit) { - if (fw_verbose && last_log != time_uptime) { + if (V_fw_verbose && last_log != time_uptime) { last_log = time_uptime; #ifdef INET6 /* @@ -1607,85 +1691,141 @@ send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, return NULL; #else struct mbuf *m; - struct ip *ip; - struct tcphdr *tcp; + int len, dir; + struct ip *h = NULL; /* stupid compiler */ +#ifdef INET6 + struct ip6_hdr *h6 = NULL; +#endif + struct tcphdr *th = NULL; MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == 0) + if (m == NULL) return (NULL); - m->m_pkthdr.rcvif = (struct ifnet *)0; M_SETFIB(m, id->fib); #ifdef MAC if (replyto != NULL) - mac_create_mbuf_netlayer(replyto, m); + mac_netinet_firewall_reply(replyto, m); else - mac_create_mbuf_from_firewall(m); + mac_netinet_firewall_send(m); #else (void)replyto; /* don't warn about unused arg */ #endif - m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); + switch (id->addr_type) { + case 4: + len = sizeof(struct ip) + sizeof(struct tcphdr); + break; +#ifdef INET6 + case 6: + len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); + break; +#endif + default: + /* XXX: log me?!? */ + m_freem(m); + return (NULL); + } + dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); + m->m_data += max_linkhdr; + m->m_flags |= M_SKIP_FIREWALL; + m->m_pkthdr.len = m->m_len = len; + m->m_pkthdr.rcvif = NULL; + bzero(m->m_data, len); + + switch (id->addr_type) { + case 4: + h = mtod(m, struct ip *); + + /* prepare for checksum */ + h->ip_p = IPPROTO_TCP; + h->ip_len = htons(sizeof(struct tcphdr)); + if (dir) { + h->ip_src.s_addr = htonl(id->src_ip); + h->ip_dst.s_addr = htonl(id->dst_ip); + } else { + h->ip_src.s_addr = htonl(id->dst_ip); + h->ip_dst.s_addr = htonl(id->src_ip); + } - ip = mtod(m, struct ip *); - bzero(ip, m->m_len); - tcp = (struct tcphdr *)(ip + 1); /* no IP options */ - ip->ip_p = IPPROTO_TCP; - tcp->th_off = 5; - /* - * Assume we are sending a RST (or a keepalive in the reverse - * direction), swap src and destination addresses and ports. - */ - ip->ip_src.s_addr = htonl(id->dst_ip); - ip->ip_dst.s_addr = htonl(id->src_ip); - tcp->th_sport = htons(id->dst_port); - tcp->th_dport = htons(id->src_port); - if (flags & TH_RST) { /* we are sending a RST */ + th = (struct tcphdr *)(h + 1); + break; +#ifdef INET6 + case 6: + h6 = mtod(m, struct ip6_hdr *); + + /* prepare for checksum */ + h6->ip6_nxt = IPPROTO_TCP; + h6->ip6_plen = htons(sizeof(struct tcphdr)); + if (dir) { + h6->ip6_src = id->src_ip6; + h6->ip6_dst = id->dst_ip6; + } else { + h6->ip6_src = id->dst_ip6; + h6->ip6_dst = id->src_ip6; + } + + th = (struct tcphdr *)(h6 + 1); + break; +#endif + } + + if (dir) { + th->th_sport = htons(id->src_port); + th->th_dport = htons(id->dst_port); + } else { + th->th_sport = htons(id->dst_port); + th->th_dport = htons(id->src_port); + } + th->th_off = sizeof(struct tcphdr) >> 2; + + if (flags & TH_RST) { if (flags & TH_ACK) { - tcp->th_seq = htonl(ack); - tcp->th_ack = htonl(0); - tcp->th_flags = TH_RST; + th->th_seq = htonl(ack); + // XXX th->th_ack = htonl(0); + th->th_flags = TH_RST; } else { if (flags & TH_SYN) seq++; - tcp->th_seq = htonl(0); - tcp->th_ack = htonl(seq); - tcp->th_flags = TH_RST | TH_ACK; + // XXX th->th_seq = htonl(0); + th->th_ack = htonl(seq); + th->th_flags = TH_RST | TH_ACK; } } else { /* - * We are sending a keepalive. flags & TH_SYN determines - * the direction, forward if set, reverse if clear. - * NOTE: seq and ack are always assumed to be correct - * as set by the caller. This may be confusing... + * Keepalive - use caller provided sequence numbers */ - if (flags & TH_SYN) { - /* - * we have to rewrite the correct addresses! - */ - ip->ip_dst.s_addr = htonl(id->dst_ip); - ip->ip_src.s_addr = htonl(id->src_ip); - tcp->th_dport = htons(id->dst_port); - tcp->th_sport = htons(id->src_port); - } - tcp->th_seq = htonl(seq); - tcp->th_ack = htonl(ack); - tcp->th_flags = TH_ACK; + th->th_seq = htonl(seq); + th->th_ack = htonl(ack); + th->th_flags = TH_ACK; } - /* - * set ip_len to the payload size so we can compute - * the tcp checksum on the pseudoheader - * XXX check this, could save a couple of words ? - */ - ip->ip_len = htons(sizeof(struct tcphdr)); - tcp->th_sum = in_cksum(m, m->m_pkthdr.len); - /* - * now fill fields left out earlier - */ - ip->ip_ttl = ip_defttl; - ip->ip_len = m->m_pkthdr.len; - m->m_flags |= M_SKIP_FIREWALL; + + switch (id->addr_type) { + case 4: + th->th_sum = in_cksum(m, len); + + /* finish the ip header */ + h->ip_v = 4; + h->ip_hl = sizeof(*h) >> 2; + h->ip_tos = IPTOS_LOWDELAY; + h->ip_off = 0; + h->ip_len = len; + h->ip_ttl = V_ip_defttl; + h->ip_sum = 0; + break; +#ifdef INET6 + case 6: + th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), + sizeof(struct tcphdr)); + + /* finish the ip6 header */ + h6->ip6_vfc |= IPV6_VERSION; + h6->ip6_hlim = IPV6_DEFHLIM; + break; +#endif + } + return (m); #endif /* !__linux__ */ } @@ -1775,13 +1915,12 @@ lookup_next_rule(struct ip_fw *me, u_int32_t tablearg) } } } - if (rule == NULL) /* failure or not a skipto */ + if (rule == NULL) /* failure or not a skipto */ rule = me->next; me->next_rule = rule; return rule; } -#ifdef radix static int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen, uint32_t value) @@ -1797,7 +1936,16 @@ add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, if (ent == NULL) return (ENOMEM); ent->value = value; +#ifdef linux + /* there is no sin_len on linux, and the code assumes the first + * byte in the sockaddr to contain the length in bits. + * So we just dump the number right there + */ + *((uint8_t *)&(ent->addr)) = 8; + *((uint8_t *)&(ent->mask)) = 8; +#else ent->addr.sin_len = ent->mask.sin_len = 8; +#endif ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; IPFW_WLOCK(ch); @@ -1822,7 +1970,13 @@ del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; +#ifdef linux + /* there is no sin_len on linux, see above */ + *((uint8_t *)&sa) = 8; + *((uint8_t *)&mask) = 8; +#else sa.sin_len = mask.sin_len = 8; +#endif mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; IPFW_WLOCK(ch); @@ -1863,25 +2017,21 @@ flush_table(struct ip_fw_chain *ch, uint16_t tbl) rnh->rnh_walktree(rnh, flush_table_entry, rnh); return (0); } -#endif static void flush_tables(struct ip_fw_chain *ch) { -#ifdef radix uint16_t tbl; IPFW_WLOCK_ASSERT(ch); for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) flush_table(ch, tbl); -#endif } static int init_tables(struct ip_fw_chain *ch) { -#ifdef radix int i; uint16_t j; @@ -1893,7 +2043,6 @@ init_tables(struct ip_fw_chain *ch) return (ENOMEM); } } -#endif return (0); } @@ -1901,7 +2050,6 @@ static int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { -#ifdef radix struct radix_node_head *rnh; struct table_entry *ent; struct sockaddr_in sa; @@ -1909,18 +2057,21 @@ lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, if (tbl >= IPFW_TABLES_MAX) return (0); rnh = ch->tables[tbl]; +#ifdef linux + /* there is no sin_len on linux, see above */ + *((uint8_t *)&sa) = 8; +#else sa.sin_len = 8; +#endif sa.sin_addr.s_addr = addr; ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); if (ent != NULL) { *val = ent->value; return (1); } -#endif return (0); } -#ifdef radix static int count_table_entry(struct radix_node *rn, void *arg) { @@ -1976,62 +2127,46 @@ dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) rnh->rnh_walktree(rnh, dump_table_entry, tbl); return (0); } -#endif - -#ifndef linux /* FreeBSD */ -static void -fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp) -{ - struct ucred *cr; - - cr = inp->inp_cred; - ugp->fw_prid = jailed(cr) ? cr->cr_prison->pr_id : -1; - ugp->fw_uid = cr->cr_uid; - ugp->fw_ngroups = cr->cr_ngroups; - bcopy(cr->cr_groups, ugp->fw_groups, sizeof(ugp->fw_groups)); -} -#endif static int check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, - u_int16_t src_port, struct ip_fw_ugid *ugp, int *ugid_lookupp, + u_int16_t src_port, struct ucred **uc, int *ugid_lookup, struct inpcb *inp) { #ifdef linux int match = 0; struct sk_buff *skb = ((struct mbuf *)inp)->m_skb; + struct bsd_ucred *u = (struct bsd_ucred *)uc; - if (*ugid_lookupp == 0) { /* actively lookup and copy in cache */ - + if (*ugid_lookup == 0) { /* actively lookup and copy in cache */ /* returns null if any element of the chain up to file is null. - * if sk != NULL then we also have a reference + * if sk != NULL then we also have a reference */ - *ugid_lookupp = linux_lookup(proto, + *ugid_lookup = linux_lookup(proto, src_ip.s_addr, htons(src_port), dst_ip.s_addr, htons(dst_port), - skb, oif ? 1 : 0, ugp); + skb, oif ? 1 : 0, u); } - if (*ugid_lookupp < 0) + if (*ugid_lookup < 0) return 0; if (insn->o.opcode == O_UID) - match = (ugp->fw_uid == (uid_t)insn->d[0]); + match = (u->uid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_JAIL) - match = (ugp->fw_groups[1] == (uid_t)insn->d[0]); + match = (u->xid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_GID) - match = (ugp->fw_groups[0] == (uid_t)insn->d[0]); + match = (u->gid == (uid_t)insn->d[0]); return match; -#else /* FreeBSD */ +#else /* FreeBSD */ struct inpcbinfo *pi; int wildcard; struct inpcb *pcb; int match; - gid_t *gp; /* * Check to see if the UDP or TCP stack supplied us with @@ -2041,7 +2176,7 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, if (inp && *ugid_lookupp == 0) { INP_LOCK_ASSERT(inp); if (inp->inp_socket != NULL) { - fill_ugid_cache(inp, ugp); + *uc = crhold(inp->inp_cred); *ugid_lookupp = 1; } else *ugid_lookupp = -1; @@ -2055,10 +2190,10 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, return (0); if (proto == IPPROTO_TCP) { wildcard = 0; - pi = &tcbinfo; + pi = &V_tcbinfo; } else if (proto == IPPROTO_UDP) { wildcard = INPLOOKUP_WILDCARD; - pi = &udbinfo; + pi = &V_udbinfo; } else return 0; match = 0; @@ -2074,7 +2209,7 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, dst_ip, htons(dst_port), wildcard, NULL); if (pcb != NULL) { - fill_ugid_cache(pcb, ugp); + *uc = crhold(pcb->inp_cred); *ugid_lookupp = 1; } INP_INFO_RUNLOCK(pi); @@ -2090,16 +2225,11 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, } } if (insn->o.opcode == O_UID) - match = (ugp->fw_uid == (uid_t)insn->d[0]); - else if (insn->o.opcode == O_GID) { - for (gp = ugp->fw_groups; - gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++) - if (*gp == (gid_t)insn->d[0]) { - match = 1; - break; - } - } else if (insn->o.opcode == O_JAIL) - match = (ugp->fw_prid == (int)insn->d[0]); + match = ((*uc)->cr_uid == (uid_t)insn->d[0]); + else if (insn->o.opcode == O_GID) + match = groupmember((gid_t)insn->d[0], *uc); + else if (insn->o.opcode == O_JAIL) + match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); return match; #endif } @@ -2141,6 +2271,7 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, int ipfw_chk(struct ip_fw_args *args) { + /* * Local variables holding state during the processing of a packet: * @@ -2175,8 +2306,8 @@ ipfw_chk(struct ip_fw_args *args) * these types of constraints, as well as decrease contention * on pcb related locks. */ - struct ip_fw_ugid fw_ugid_cache; - int ugid_lookup = 0; + struct bsd_ucred ucred_cache; + int ucred_lookup = 0; /* * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG @@ -2243,7 +2374,7 @@ ipfw_chk(struct ip_fw_args *args) */ int dyn_dir = MATCH_UNKNOWN; ipfw_dyn_rule *q = NULL; - struct ip_fw_chain *chain = &layer3_chain; + struct ip_fw_chain *chain = &V_layer3_chain; struct m_tag *mtag; /* @@ -2259,9 +2390,9 @@ ipfw_chk(struct ip_fw_args *args) /* end of ipv6 variables */ int is_ipv4 = 0; - int done = 0; /* flag for actions match */ + int done = 0; /* flag to exit the outer loop */ - if (m->m_flags & M_SKIP_FIREWALL) + if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) return (IP_FW_PASS); /* accept */ dst_ip.s_addr = 0; /* make sure it is initialized */ @@ -2283,7 +2414,7 @@ do { \ if ((m)->m_len < x) { \ goto pullup_failed; \ } \ - p = (mtod(m, char *) + (_len)); \ + p = (mtod(m, char *) + (_len)); \ } while (0) /* @@ -2349,7 +2480,7 @@ do { \ printf("IPFW2: IPV6 - Unknown Routing " "Header type(%d)\n", ((struct ip6_rthdr *)ulp)->ip6r_type); - if (fw_deny_unknown_exthdrs) + if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; } @@ -2373,7 +2504,7 @@ do { \ if (offset == 0) { printf("IPFW2: IPV6 - Invalid Fragment " "Header\n"); - if (fw_deny_unknown_exthdrs) + if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); break; } @@ -2445,7 +2576,7 @@ do { \ default: printf("IPFW2: IPV6 - Unknown Extension " "Header(%d), ext_hd=%x\n", proto, ext_hd); - if (fw_deny_unknown_exthdrs) + if (V_fw_deny_unknown_exthdrs) return (IP_FW_DENY); PULLUP_TO(hlen, ulp, struct ip6_ext); break; @@ -2517,22 +2648,36 @@ do { \ } IPFW_RLOCK(chain); + if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ + IPFW_RUNLOCK(chain); + return (IP_FW_PASS); /* accept */ + } mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); if (args->rule) { /* * Packet has already been tagged. Look for the next rule - * to restart processing. - * + * to restart processing. Make sure that args->rule still + * exists and not changed. * If fw_one_pass != 0 then just accept it. * XXX should not happen here, but optimized out in * the caller. */ - if (fw_one_pass) { + if (V_fw_one_pass) { IPFW_RUNLOCK(chain); return (IP_FW_PASS); } + if (chain->id != args->chain_id) { + for (f = chain->rules; f != NULL; f = f->next) + if (f == args->rule && f->id == args->rule_id) + break; + + if (f != NULL) + f = f->next_rule; + else + f = chain->default_rule; + } else + f = args->rule->next_rule; - f = args->rule->next_rule; if (f == NULL) f = lookup_next_rule(args->rule, 0); } else { @@ -2548,12 +2693,9 @@ do { \ IPFW_RUNLOCK(chain); return (IP_FW_DENY); /* invalid */ } +// f = rule2ptr(chain, skipto+1); while (f && f->rulenum <= skipto) f = f->next; - if (f == NULL) { /* drop packet */ - IPFW_RUNLOCK(chain); - return (IP_FW_DENY); - } } } /* reset divert rule to avoid confusion later */ @@ -2565,6 +2707,21 @@ do { \ /* * Now scan the rules, and parse microinstructions for each rule. + * We have two nested loops and an inner switch. Sometimes we + * need to break out of one or both loops, or re-enter one of + * the loops with updated variables. Loop variables are: + * + * f (outer loop) points to the current rule. + * On output it points to the matching rule. + * done (outer loop) is used as a flag to break the loop. + * l (inner loop) residual length of current rule. + * cmd points to the current microinstruction. + * + * We break the inner loop by setting l=0 and possibly + * cmdlen=0 if we don't want to advance cmd. + * We break the outer loop by setting done=1 + * We can restart the inner loop by setting l>0 and f, cmd + * as needed. */ for (; f; f = f->next) { ipfw_insn *cmd; @@ -2572,7 +2729,7 @@ do { \ int l, cmdlen, skip_or; /* skip rest of OR block */ /* again: */ - if (set_disable & (1 << f->set) ) + if (V_set_disable & (1 << f->set) ) continue; skip_or = 0; @@ -2637,8 +2794,8 @@ do { \ (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, - src_ip, src_port, &fw_ugid_cache, - &ugid_lookup, (struct inpcb *)args->m); + src_ip, src_port, (struct ucred **)&ucred_cache, + &ucred_lookup, (struct inpcb *)args->m); break; case O_RECV: @@ -2724,6 +2881,42 @@ do { \ dst_ip.s_addr : src_ip.s_addr; uint32_t v = 0; + if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { + v = ((ipfw_insn_u32 *)cmd)->d[1]; + if (v == 0) + a = dst_ip.s_addr; + else if (v == 1) + a = src_ip.s_addr; + else if (offset != 0) + break; + else if (proto != IPPROTO_TCP && + proto != IPPROTO_UDP) + break; + else if (v == 2) + a = dst_port; + else if (v == 3) + a = src_port; + else if (v == 4 || v == 5) { + check_uidgid( + (ipfw_insn_u32 *)cmd, + proto, oif, + dst_ip, dst_port, + src_ip, src_port, (struct ucred **)&ucred_cache, + &ucred_lookup, (struct inpcb *)args->m); +#ifdef linux + if (v ==4 /* O_UID */) + a = ucred_cache.uid; + else if (v == 5 /* O_JAIL */) + a = ucred_cache.xid; +#else + if (v ==4 /* O_UID */) + a = (*uc)->cr_uid; + else if (v == 5 /* O_JAIL */) + a = (*uc)->cr_prison->pr_id; +#endif + } else + break; + } match = lookup_table(chain, cmd->arg1, a, &v); if (!match) @@ -2958,7 +3151,7 @@ do { \ } case O_LOG: - if (fw_verbose) + if (V_fw_verbose) ipfw_log(f, hlen, args, m, oif, offset, tablearg, ip); match = 1; @@ -3161,14 +3354,13 @@ do { \ * * In general, here we set retval and terminate the * outer loop (would be a 'break 3' in some language, - * but we need to do a 'goto done'). + * but we need to set l=0, done=1) * * Exceptions: * O_COUNT and O_SKIPTO actions: * instead of terminating, we jump to the next rule - * ('break' after setting match and l) - * or to the SKIPTO target ('break' after - * having set f, cmd and l), respectively. + * (setting l=0), or to the SKIPTO target (by + * setting f, cmd and l as needed), respectively. * * O_TAG, O_LOG and O_ALTQ action parameters: * perform some action and set match = 1; @@ -3179,30 +3371,28 @@ do { \ * These opcodes try to install an entry in the * state tables; if successful, we continue with * the next opcode (match=1; break;), otherwise - * the packet * must be dropped - * ('goto done' after setting retval); + * the packet must be dropped (set retval, + * break loops with l=0, done=1) * * O_PROBE_STATE and O_CHECK_STATE: these opcodes * cause a lookup of the state table, and a jump * to the 'action' part of the parent rule * if an entry is found, or * (CHECK_STATE only) a jump to the next rule if - * the entry is not found ('goto next_rule'). - * The result of the lookup is cached to make - * further instances of these opcodes are - * effectively NOPs. - * The jump to the next rule is done by a break - * after zeroing the cmdlen value and setting - * match. + * the entry is not found. + * The result of the lookup is cached so that + * further instances of these opcodes become NOPs. + * The jump to the next rule is done by setting + * l=0, cmdlen=0. */ case O_LIMIT: case O_KEEP_STATE: if (install_state(f, (ipfw_insn_limit *)cmd, args, tablearg)) { + /* error or limit violation */ retval = IP_FW_DENY; - /* was goto done; */ /* error/limit violation */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ } match = 1; break; @@ -3226,7 +3416,8 @@ do { \ /* * Found dynamic entry, update stats * and jump to the 'action' part of - * the parent rule. + * the parent rule by setting + * f, cmd, l and clearing cmdlen. */ q->pcnt++; q->bcnt += pktlen; @@ -3234,9 +3425,8 @@ do { \ cmd = ACTION_PTR(f); l = f->cmd_len - f->act_ofs; IPFW_DYN_UNLOCK(); - /* previously was goto check_body; */ - cmdlen = 0; /* make null for() changes */ - match = 1; /* do not break to the external loop */ + cmdlen = 0; + match = 1; break; } /* @@ -3245,60 +3435,56 @@ do { \ * ignore and continue with next opcode. */ if (cmd->opcode == O_CHECK_STATE) - l = 0; /* was goto next_rule; */ + l = 0; /* exit inner loop */ match = 1; break; case O_ACCEPT: retval = 0; /* accept */ - /* was goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ break; case O_PIPE: case O_QUEUE: args->rule = f; /* report matching rule */ + args->rule_id = f->id; + args->chain_id = chain->id; if (cmd->arg1 == IP_FW_TABLEARG) args->cookie = tablearg; else args->cookie = cmd->arg1; retval = IP_FW_DUMMYNET; - /* was goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ break; #if 0 case O_DIVERT: - case O_TEE: { - struct divert_tag *dt; - + case O_TEE: if (args->eh) /* not on layer 2 */ break; + /* otherwise this is terminal */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ mtag = m_tag_get(PACKET_TAG_DIVERT, - sizeof(struct divert_tag), - M_NOWAIT); + sizeof(struct divert_tag), + M_NOWAIT); if (mtag == NULL) { - /* XXX statistic */ - /* drop packet */ - IPFW_RUNLOCK(chain); - return (IP_FW_DENY); - } - dt = (struct divert_tag *)(mtag+1); - dt->cookie = f->rulenum; - if (cmd->arg1 == IP_FW_TABLEARG) + retval = IP_FW_DENY; + } else { + struct divert_tag *dt; + dt = (struct divert_tag *)(mtag+1); + dt->cookie = f->rulenum; + if (cmd->arg1 == IP_FW_TABLEARG) dt->info = tablearg; - else + else dt->info = cmd->arg1; - m_tag_prepend(m, mtag); - retval = (cmd->opcode == O_DIVERT) ? - IP_FW_DIVERT : IP_FW_TEE; - /* was goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + m_tag_prepend(m, mtag); + retval = (cmd->opcode == O_DIVERT) ? + IP_FW_DIVERT : IP_FW_TEE; + } break; - } #endif case O_COUNT: @@ -3307,38 +3493,34 @@ do { \ f->bcnt += pktlen; f->timestamp = time_uptime; if (cmd->opcode == O_COUNT) { - /* was goto next_rule; */ - l = 0; /* exit the inner loop */ - match = 1; /* do not break the loop */ + l = 0; /* exit inner loop */ break; } /* handle skipto */ if (cmd->arg1 == IP_FW_TABLEARG) { f = lookup_next_rule(f, tablearg); - } else { + } else { // XXX ? if (f->next_rule == NULL) lookup_next_rule(f, 0); f = f->next_rule; } - /* previously was "goto again;" - * We emulate by re-entering the inner loop + /* + * Skip disabled rules, and + * re-enter the inner loop * with the correct f, l and cmd. - * First, skip over disabled rules. - * Should at least match the default rule, - * but try to be robust. + * Also clear cmdlen and skip_or */ - while (f && (set_disable & (1 << f->set))) + while (f && (V_set_disable & (1 << f->set))) f = f->next; - /* prepare to re-enter the inner loop. */ - if (f) { /* better safe than sorry */ + if (f) { /* found a valid rule */ l = f->cmd_len; cmd = f->cmd; } else { - l = 0; /* this will break the inner loop */ + l = 0; /* exit inner loop */ } - cmdlen = 0; /* reset loop condition */ + match = 1; + cmdlen = 0; skip_or = 0; - match = 1; /* do not break the loop */ break; case O_REJECT: @@ -3373,47 +3555,44 @@ do { \ #endif case O_DENY: retval = IP_FW_DENY; - /* goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ break; - case O_FORWARD_IP: { - struct sockaddr_in *sa; - sa = &(((ipfw_insn_sa *)cmd)->sa); + case O_FORWARD_IP: if (args->eh) /* not valid on layer2 pkts */ break; if (!q || dyn_dir == MATCH_FORWARD) { - if (sa->sin_addr.s_addr == INADDR_ANY) { - bcopy(sa, &args->hopstore, - sizeof(*sa)); - args->hopstore.sin_addr.s_addr = - htonl(tablearg); - args->next_hop = - &args->hopstore; - } else { - args->next_hop = sa; - } + struct sockaddr_in *sa; + sa = &(((ipfw_insn_sa *)cmd)->sa); + if (sa->sin_addr.s_addr == INADDR_ANY) { + bcopy(sa, &args->hopstore, + sizeof(*sa)); + args->hopstore.sin_addr.s_addr = + htonl(tablearg); + args->next_hop = &args->hopstore; + } else { + args->next_hop = sa; + } } retval = IP_FW_PASS; - } - /* goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ break; case O_NETGRAPH: case O_NGTEE: args->rule = f; /* report matching rule */ + args->rule_id = f->id; + args->chain_id = chain->id; if (cmd->arg1 == IP_FW_TABLEARG) args->cookie = tablearg; else args->cookie = cmd->arg1; retval = (cmd->opcode == O_NETGRAPH) ? IP_FW_NETGRAPH : IP_FW_NGTEE; - /* goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ break; #if 0 @@ -3423,38 +3602,89 @@ do { \ f->timestamp = time_uptime; M_SETFIB(m, cmd->arg1); args->f_id.fib = cmd->arg1; - /* was goto next_rule; */ - l = 0; - match = 1; + l = 0; /* exit inner loop */ break; - case O_NAT: { - struct cfg_nat *t; - int nat_id; - - if (IPFW_NAT_LOADED) { - args->rule = f; /* Report matching rule. */ - t = ((ipfw_insn_nat *)cmd)->nat; + case O_NAT: + if (!IPFW_NAT_LOADED) { + retval = IP_FW_DENY; + } else { + struct cfg_nat *t; + int nat_id; + + args->rule = f; /* Report matching rule. */ + args->rule_id = f->id; + args->chain_id = chain->id; + t = ((ipfw_insn_nat *)cmd)->nat; + if (t == NULL) { + nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? + tablearg : cmd->arg1; + LOOKUP_NAT(V_layer3_chain, nat_id, t); if (t == NULL) { - nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? - tablearg : cmd->arg1; - LOOKUP_NAT(layer3_chain, nat_id, t); - if (t == NULL) { - retval = IP_FW_DENY; - /* goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ - break; - } - if (cmd->arg1 != IP_FW_TABLEARG) - ((ipfw_insn_nat *)cmd)->nat = t; + retval = IP_FW_DENY; + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ + break; } - retval = ipfw_nat_ptr(args, t, m); - } else - retval = IP_FW_DENY; - /* goto done; */ - l = 0; /* break the inner loop */ - done = 1; /* break the external loop */ + if (cmd->arg1 != IP_FW_TABLEARG) + ((ipfw_insn_nat *)cmd)->nat = t; + } + retval = ipfw_nat_ptr(args, t, m); + } + l = 0; /* exit inner loop */ + done = 1; /* exit outer loop */ + break; + + case O_REASS: { + int ip_off; + + f->pcnt++; + f->bcnt += pktlen; + l = 0; /* in any case exit inner loop */ + + ip_off = (args->eh != NULL) ? + ntohs(ip->ip_off) : ip->ip_off; + /* if not fragmented, go to next rule */ + if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) + break; + /* + * ip_reass() expects len & off in host + * byte order: fix them in case we come + * from layer2. + */ + if (args->eh != NULL) { + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + } + + args->m = m = ip_reass(m); + + /* + * IP header checksum fixup after + * reassembly and leave header + * in network byte order. + */ + if (m == NULL) { /* fragment got swallowed */ + retval = IP_FW_DENY; + } else { /* good, packet complete */ + int hlen; + + ip = mtod(m, struct ip *); + hlen = ip->ip_hl << 2; + /* revert len & off for layer2 pkts */ + if (args->eh != NULL) + ip->ip_len = htons(ip->ip_len); + ip->ip_sum = 0; + if (hlen == sizeof(struct ip)) + ip->ip_sum = in_cksum_hdr(ip); + else + ip->ip_sum = in_cksum(m, hlen); + retval = IP_FW_REASS; + args->rule = f; + args->rule_id = f->id; + args->chain_id = chain->id; + } + done = 1; /* exit outer loop */ break; } #endif @@ -3463,6 +3693,9 @@ do { \ break; // XXX we disabled some panic("-- unknown opcode %d\n", cmd->opcode); } /* end of switch() on opcodes */ + /* + * if we get here with l=0, then match is irrelevant. + */ if (cmd->len & F_NOT) match = !match; @@ -3475,12 +3708,13 @@ do { \ break; /* try next rule */ } - } /* end of inner for, scan opcodes */ + } /* end of inner loop, scan opcodes */ if (done) break; -/* next_rule:; */ /* try next rule */ +/* next_rule:;*/ /* try next rule */ + } /* end of outer for, scan rules */ if (done) { @@ -3488,16 +3722,19 @@ do { \ f->pcnt++; f->bcnt += pktlen; f->timestamp = time_uptime; - IPFW_RUNLOCK(chain); - return (retval); + } else { + retval = IP_FW_DENY; + printf("ipfw: ouch!, skip past end of rules, denying packet\n"); } - - printf("ipfw: ouch!, skip past end of rules, denying packet\n"); IPFW_RUNLOCK(chain); - return (IP_FW_DENY); +#ifdef __FreeBSD__ + if (ucred_cache != NULL) + crfree(ucred_cache); +#endif + return (retval); pullup_failed: - if (fw_verbose) + if (V_fw_verbose) printf("ipfw: pullup failed\n"); return (IP_FW_DENY); } @@ -3513,6 +3750,8 @@ flush_rule_ptrs(struct ip_fw_chain *chain) IPFW_WLOCK_ASSERT(chain); + chain->id++; + for (rule = chain->rules; rule; rule = rule->next) rule->next_rule = NULL; } @@ -3548,6 +3787,7 @@ add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) if (chain->rules == NULL) { /* default rule */ chain->rules = rule; + rule->id = ++chain->id; goto done; } @@ -3555,10 +3795,10 @@ add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) * If rulenum is 0, find highest numbered rule before the * default rule, and add autoinc_step */ - if (autoinc_step < 1) - autoinc_step = 1; - else if (autoinc_step > 1000) - autoinc_step = 1000; + if (V_autoinc_step < 1) + V_autoinc_step = 1; + else if (V_autoinc_step > 1000) + V_autoinc_step = 1000; if (rule->rulenum == 0) { /* * locate the highest numbered rule before default @@ -3568,8 +3808,8 @@ add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) break; rule->rulenum = f->rulenum; } - if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) - rule->rulenum += autoinc_step; + if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) + rule->rulenum += V_autoinc_step; input_rule->rulenum = rule->rulenum; } @@ -3589,12 +3829,14 @@ add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) } } flush_rule_ptrs(chain); + /* chain->id incremented inside flush_rule_ptrs() */ + rule->id = chain->id; done: - static_count++; - static_len += l; + V_static_count++; + V_static_len += l; IPFW_WUNLOCK(chain); DEB(printf("ipfw: installed rule %d, static count now %d\n", - rule->rulenum, static_count);) + rule->rulenum, V_static_count);) return (0); } @@ -3623,8 +3865,8 @@ remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, chain->rules = n; else prev->next = n; - static_count--; - static_len -= l; + V_static_count--; + V_static_len -= l; rule->next = chain->reap; chain->reap = rule; @@ -3633,12 +3875,6 @@ remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, } /* - * Hook for cleaning up dummynet when an ipfw rule is deleted. - * Set/cleared when dummynet module is loaded/unloaded. - */ -void (*ip_dn_ruledel_ptr)(void *) = NULL; - -/** * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. @@ -3650,8 +3886,6 @@ reap_rules(struct ip_fw *head) while ((rule = head) != NULL) { head = head->next; - if (ip_dn_ruledel_ptr) - ip_dn_ruledel_ptr(rule); free(rule, M_IPFW); } } @@ -3668,6 +3902,7 @@ free_chain(struct ip_fw_chain *chain, int kill_default) IPFW_WLOCK_ASSERT(chain); + chain->reap = NULL; flush_rule_ptrs(chain); /* more efficient to do outside the loop */ for (prev = NULL, rule = chain->rules; rule ; ) if (kill_default || rule->set != RESVD_SET) @@ -3714,8 +3949,8 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg) } IPFW_WLOCK(chain); - rule = chain->rules; - chain->reap = NULL; + rule = chain->rules; /* common starting point */ + chain->reap = NULL; /* prepare for deletions */ switch (cmd) { case 0: /* delete rules with given number */ /* @@ -3739,18 +3974,17 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg) case 1: /* delete all rules with given set number */ flush_rule_ptrs(chain); - rule = chain->rules; - while (rule->rulenum < IPFW_DEFAULT_RULE) + while (rule->rulenum < IPFW_DEFAULT_RULE) { if (rule->set == rulenum) rule = remove_rule(chain, rule, prev); else { prev = rule; rule = rule->next; } + } break; case 2: /* move rules with given number to new set */ - rule = chain->rules; for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) if (rule->rulenum == rulenum) rule->set = new_set; @@ -3769,6 +4003,7 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg) else if (rule->set == new_set) rule->set = rulenum; break; + case 5: /* delete rules with given number and with given set number. * rulenum - given rule number; * new_set - given set number. @@ -3795,10 +4030,8 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg) * avoid a LOR with dummynet. */ rule = chain->reap; - chain->reap = NULL; IPFW_WUNLOCK(chain); - if (rule) - reap_rules(rule); + reap_rules(rule); return 0; } @@ -3845,7 +4078,7 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) IPFW_WLOCK(chain); if (rulenum == 0) { - norule_counter = 0; + V_norule_counter = 0; for (rule = chain->rules; rule; rule = rule->next) { /* Skip rules from another set. */ if (cmd == 1 && rule->set != set) @@ -3878,7 +4111,7 @@ zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) } IPFW_WUNLOCK(chain); - if (fw_verbose) { + if (V_fw_verbose) { #define lev LOG_SECURITY | LOG_NOTICE if (rulenum) @@ -4032,10 +4265,11 @@ check_ipfw_struct(struct ip_fw *rule, int size) case O_IP_DST_LOOKUP: if (cmd->arg1 >= IPFW_TABLES_MAX) { printf("ipfw: invalid table number %d\n", - cmd->arg1); + cmd->arg1); return (EINVAL); } if (cmdlen != F_INSN_SIZE(ipfw_insn) && + cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && cmdlen != F_INSN_SIZE(ipfw_insn_u32)) goto bad_size; break; @@ -4117,6 +4351,7 @@ check_ipfw_struct(struct ip_fw *rule, int size) case O_UNREACH6: #endif case O_SKIPTO: + case O_REASS: check_size: if (cmdlen != F_INSN_SIZE(ipfw_insn)) goto bad_size; @@ -4210,7 +4445,6 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) time_t boot_seconds; boot_seconds = boottime.tv_sec; - /* XXX this can take a long time and locking will block packet flow */ IPFW_RLOCK(chain); for (rule = chain->rules; rule ; rule = rule->next) { @@ -4224,19 +4458,19 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) if (bp + i <= ep) { bcopy(rule, bp, i); /* - * XXX HACK. Store the disable mask in the "next" pointer - * in a wild attempt to keep the ABI the same. + * XXX HACK. Store the disable mask in the "next" + * pointer in a wild attempt to keep the ABI the same. * Why do we do this on EVERY rule? */ - bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule), - sizeof(set_disable)); + bcopy(&V_set_disable, + &(((struct ip_fw *)bp)->next_rule), + sizeof(V_set_disable)); if (((struct ip_fw *)bp)->timestamp) ((struct ip_fw *)bp)->timestamp += boot_seconds; bp += i; } } IPFW_RUNLOCK(chain); - return (bp - (char *)buf); } @@ -4255,14 +4489,14 @@ ipfw_getdynrules(struct ip_fw_chain *chain, void *buf, size_t space) time_t boot_seconds; printf("dynrules requested\n"); - boot_seconds = boottime.tv_sec; + boot_seconds = boottime.tv_sec; - if (ipfw_dyn_v) { + if (V_ipfw_dyn_v) { ipfw_dyn_rule *p, *last = NULL; IPFW_DYN_LOCK(); - for (i = 0 ; i < curr_dyn_buckets; i++) - for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) { + for (i = 0 ; i < V_curr_dyn_buckets; i++) + for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { if (bp + sizeof *p <= ep) { ipfw_dyn_rule *dst = (ipfw_dyn_rule *)bp; @@ -4291,14 +4525,13 @@ ipfw_getdynrules(struct ip_fw_chain *chain, void *buf, size_t space) bp += sizeof(ipfw_dyn_rule); } else { p = NULL; /* break the loop */ - i = curr_dyn_buckets; + i = V_curr_dyn_buckets; } } IPFW_DYN_UNLOCK(); if (last != NULL) /* mark last dynamic rule */ bzero(&last->next, sizeof(last)); } - return (bp - (char *)buf); } @@ -4343,7 +4576,7 @@ ipfw_ctl(struct sockopt *sopt) * change between calculating the size and returning the * data in which case we'll just return what fits. */ - size = static_len; /* size of static rules */ + size = V_static_len; /* size of static rules */ /* * XXX todo: if the user passes a short length just to know @@ -4352,7 +4585,7 @@ ipfw_ctl(struct sockopt *sopt) */ buf = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyout(sopt, buf, - ipfw_getrules(&layer3_chain, buf, size)); + ipfw_getrules(&V_layer3_chain, buf, size)); free(buf, M_TEMP); break; @@ -4361,12 +4594,12 @@ ipfw_ctl(struct sockopt *sopt) * pass up a copy of the current dynamic rules. * The last dynamic rule has NULL in the "next" field. */ - /* if (!ipfw_dyn_v) XXX check for empty set ? */ - size = (dyn_count * sizeof(ipfw_dyn_rule)); /* size of dyn. rules */ + /* if (!V_ipfw_dyn_v) XXX check for empty set ? */ + size = (V_dyn_count * sizeof(ipfw_dyn_rule)); /* size of dyn. rules */ buf = malloc(size, M_TEMP, M_WAITOK); error = sooptcopyout(sopt, buf, - ipfw_getdynrules(&layer3_chain, buf, size)); + ipfw_getdynrules(&V_layer3_chain, buf, size)); free(buf, M_TEMP); break; @@ -4384,14 +4617,11 @@ ipfw_ctl(struct sockopt *sopt) * the old list without the need for a lock. */ - IPFW_WLOCK(&layer3_chain); - layer3_chain.reap = NULL; - free_chain(&layer3_chain, 0 /* keep default rule */); - rule = layer3_chain.reap; - layer3_chain.reap = NULL; - IPFW_WUNLOCK(&layer3_chain); - if (rule != NULL) - reap_rules(rule); + IPFW_WLOCK(&V_layer3_chain); + free_chain(&V_layer3_chain, 0 /* keep default rule */); + rule = V_layer3_chain.reap; + IPFW_WUNLOCK(&V_layer3_chain); + reap_rules(rule); break; case IP_FW_ADD: @@ -4401,7 +4631,7 @@ ipfw_ctl(struct sockopt *sopt) if (error == 0) error = check_ipfw_struct(rule, sopt->sopt_valsize); if (error == 0) { - error = add_rule(&layer3_chain, rule); + error = add_rule(&V_layer3_chain, rule); size = RULESIZE(rule); if (!error && sopt->sopt_dir == SOPT_GET) error = sooptcopyout(sopt, rule, size); @@ -4428,10 +4658,10 @@ ipfw_ctl(struct sockopt *sopt) break; size = sopt->sopt_valsize; if (size == sizeof(u_int32_t)) /* delete or reassign */ - error = del_entry(&layer3_chain, rulenum[0]); + error = del_entry(&V_layer3_chain, rulenum[0]); else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */ - set_disable = - (set_disable | rulenum[0]) & ~rulenum[1] & + V_set_disable = + (V_set_disable | rulenum[0]) & ~rulenum[1] & ~(1<sopt_name == IP_FW_RESETLOG); break; -#ifdef radix case IP_FW_TABLE_ADD: { ipfw_table_entry ent; @@ -4459,7 +4688,7 @@ ipfw_ctl(struct sockopt *sopt) sizeof(ent), sizeof(ent)); if (error) break; - error = add_table_entry(&layer3_chain, ent.tbl, + error = add_table_entry(&V_layer3_chain, ent.tbl, ent.addr, ent.masklen, ent.value); } break; @@ -4472,7 +4701,7 @@ ipfw_ctl(struct sockopt *sopt) sizeof(ent), sizeof(ent)); if (error) break; - error = del_table_entry(&layer3_chain, ent.tbl, + error = del_table_entry(&V_layer3_chain, ent.tbl, ent.addr, ent.masklen); } break; @@ -4485,9 +4714,9 @@ ipfw_ctl(struct sockopt *sopt) sizeof(tbl), sizeof(tbl)); if (error) break; - IPFW_WLOCK(&layer3_chain); - error = flush_table(&layer3_chain, tbl); - IPFW_WUNLOCK(&layer3_chain); + IPFW_WLOCK(&V_layer3_chain); + error = flush_table(&V_layer3_chain, tbl); + IPFW_WUNLOCK(&V_layer3_chain); } break; @@ -4498,9 +4727,9 @@ ipfw_ctl(struct sockopt *sopt) if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), sizeof(tbl)))) break; - IPFW_RLOCK(&layer3_chain); - error = count_table(&layer3_chain, tbl, &cnt); - IPFW_RUNLOCK(&layer3_chain); + IPFW_RLOCK(&V_layer3_chain); + error = count_table(&V_layer3_chain, tbl, &cnt); + IPFW_RUNLOCK(&V_layer3_chain); if (error) break; error = sooptcopyout(sopt, &cnt, sizeof(cnt)); @@ -4524,9 +4753,9 @@ ipfw_ctl(struct sockopt *sopt) } tbl->size = (size - sizeof(*tbl)) / sizeof(ipfw_table_entry); - IPFW_RLOCK(&layer3_chain); - error = dump_table(&layer3_chain, tbl); - IPFW_RUNLOCK(&layer3_chain); + IPFW_RLOCK(&V_layer3_chain); + error = dump_table(&V_layer3_chain, tbl); + IPFW_RUNLOCK(&V_layer3_chain); if (error) { free(tbl, M_TEMP); break; @@ -4536,8 +4765,6 @@ ipfw_ctl(struct sockopt *sopt) } break; -#endif /* radix */ - case IP_FW_NAT_CFG: if (IPFW_NAT_LOADED) error = ipfw_nat_cfg_ptr(sopt); @@ -4587,27 +4814,26 @@ ipfw_ctl(struct sockopt *sopt) #undef RULE_MAXSIZE } -/** - * dummynet needs a reference to the default rule, because rules can be - * deleted while packets hold a reference to them. When this happens, - * dummynet changes the reference to the default rule (it could well be a - * NULL pointer, but this way we do not need to check for the special - * case, plus here he have info on the default behaviour). - */ -struct ip_fw *ip_fw_default_rule; /* * This procedure is only used to handle keepalives. It is invoked * every dyn_keepalive_period */ static void -ipfw_tick(void * __unused unused) +ipfw_tick(void * vnetx) { struct mbuf *m0, *m, *mnext, **mtailp; +#ifdef INET6 + struct mbuf *m6, **m6_tailp; +#endif int i; ipfw_dyn_rule *q; +#ifdef VIMAGE + struct vnet *vp = vnetx; +#endif - if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) + CURVNET_SET(vp); + if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) goto done; /* @@ -4618,29 +4844,56 @@ ipfw_tick(void * __unused unused) */ m0 = NULL; mtailp = &m0; +#ifdef INET6 + m6 = NULL; + m6_tailp = &m6; +#endif IPFW_DYN_LOCK(); - for (i = 0 ; i < curr_dyn_buckets ; i++) { - for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { + for (i = 0 ; i < V_curr_dyn_buckets ; i++) { + for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { if (q->dyn_type == O_LIMIT_PARENT) continue; if (q->id.proto != IPPROTO_TCP) continue; if ( (q->state & BOTH_SYN) != BOTH_SYN) continue; - if (TIME_LEQ( time_uptime+dyn_keepalive_interval, + if (TIME_LEQ( time_uptime+V_dyn_keepalive_interval, q->expire)) continue; /* too early */ if (TIME_LEQ(q->expire, time_uptime)) continue; /* too late, rule expired */ - *mtailp = send_pkt(NULL, &(q->id), q->ack_rev - 1, + m = send_pkt(NULL, &(q->id), q->ack_rev - 1, q->ack_fwd, TH_SYN); - if (*mtailp != NULL) - mtailp = &(*mtailp)->m_nextpkt; - *mtailp = send_pkt(NULL, &(q->id), q->ack_fwd - 1, + mnext = send_pkt(NULL, &(q->id), q->ack_fwd - 1, q->ack_rev, 0); - if (*mtailp != NULL) - mtailp = &(*mtailp)->m_nextpkt; + + switch (q->id.addr_type) { + case 4: + if (m != NULL) { + *mtailp = m; + mtailp = &(*mtailp)->m_nextpkt; + } + if (mnext != NULL) { + *mtailp = mnext; + mtailp = &(*mtailp)->m_nextpkt; + } + break; +#ifdef INET6 + case 6: + if (m != NULL) { + *m6_tailp = m; + m6_tailp = &(*m6_tailp)->m_nextpkt; + } + if (mnext != NULL) { + *m6_tailp = mnext; + m6_tailp = &(*m6_tailp)->m_nextpkt; + } + break; +#endif + } + + m = mnext = NULL; } } IPFW_DYN_UNLOCK(); @@ -4649,64 +4902,43 @@ ipfw_tick(void * __unused unused) m->m_nextpkt = NULL; ip_output(m, NULL, NULL, 0, NULL, NULL); } +#ifdef INET6 + for (m = mnext = m6; m != NULL; m = mnext) { + mnext = m->m_nextpkt; + m->m_nextpkt = NULL; + ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); + } +#endif done: - callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL); + callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period*hz, + ipfw_tick, vnetx); + CURVNET_RESTORE(); } +static int vnet_ipfw_init(const void *); + int ipfw_init(void) { - struct ip_fw default_rule; - int error; + int error = 0; -#ifdef INET6 - /* Setup IPv6 fw sysctl tree. */ - sysctl_ctx_init(&ip6_fw_sysctl_ctx); - ip6_fw_sysctl_tree = SYSCTL_ADD_NODE(&ip6_fw_sysctl_ctx, - SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw", - CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall"); - SYSCTL_ADD_PROC(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree), - OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, - &fw6_enable, 0, ipfw_chg_hook, "I", "Enable ipfw+6"); - SYSCTL_ADD_INT(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree), - OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE, - &fw_deny_unknown_exthdrs, 0, - "Deny packets with unknown IPv6 Extension Headers"); -#endif - - layer3_chain.rules = NULL; - IPFW_LOCK_INIT(&layer3_chain); ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); - IPFW_DYN_LOCK_INIT(); - callout_init(&ipfw_timeout, CALLOUT_MPSAFE); - - bzero(&default_rule, sizeof default_rule); - - default_rule.act_ofs = 0; - default_rule.rulenum = IPFW_DEFAULT_RULE; - default_rule.cmd_len = 1; - default_rule.set = RESVD_SET; - default_rule.cmd[0].len = 1; - default_rule.cmd[0].opcode = -#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT - 1 ? O_ACCEPT : -#endif - O_DENY; - - error = add_rule(&layer3_chain, &default_rule); - if (error != 0) { - printf("ipfw2: error %u initializing default rule " - "(support disabled)\n", error); + IPFW_DYN_LOCK_INIT(); + error = vnet_ipfw_init(NULL); + if (error) { IPFW_DYN_LOCK_DESTROY(); - IPFW_LOCK_DESTROY(&layer3_chain); + IPFW_LOCK_DESTROY(&V_layer3_chain); uma_zdestroy(ipfw_dyn_rule_zone); return (error); } - ip_fw_default_rule = layer3_chain.rules; + /* + * Only print out this stuff the first time around, + * when called from the sysinit code. + */ printf("ipfw2 " #ifdef INET6 "(+ipv6) " @@ -4729,35 +4961,24 @@ ipfw_init(void) #else "loadable", #endif + default_to_accept ? "accept" : "deny"); - default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny"); - -#ifdef IPFIREWALL_VERBOSE - fw_verbose = 1; -#endif -#ifdef IPFIREWALL_VERBOSE_LIMIT - verbose_limit = IPFIREWALL_VERBOSE_LIMIT; -#endif - if (fw_verbose == 0) + /* + * Note: V_xxx variables can be accessed here but the vnet specific + * initializer may not have been called yet for the VIMAGE case. + * Tuneables will have been processed. We will print out values for + * the default vnet. + * XXX This should all be rationalized AFTER 8.0 + */ + if (V_fw_verbose == 0) printf("disabled\n"); - else if (verbose_limit == 0) + else if (V_verbose_limit == 0) printf("unlimited\n"); else printf("limited to %d packets/entry by default\n", - verbose_limit); + V_verbose_limit); - error = init_tables(&layer3_chain); - if (error) { - IPFW_DYN_LOCK_DESTROY(); - IPFW_LOCK_DESTROY(&layer3_chain); - uma_zdestroy(ipfw_dyn_rule_zone); - return (error); - } - ip_fw_ctl_ptr = ipfw_ctl; - ip_fw_chk_ptr = ipfw_chk; - callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL); - LIST_INIT(&layer3_chain.nat); - return (0); + return (error); } void @@ -4768,24 +4989,126 @@ ipfw_destroy(void) ip_fw_chk_ptr = NULL; ip_fw_ctl_ptr = NULL; callout_drain(&ipfw_timeout); - IPFW_WLOCK(&layer3_chain); - flush_tables(&layer3_chain); - layer3_chain.reap = NULL; - free_chain(&layer3_chain, 1 /* kill default rule */); - reap = layer3_chain.reap, layer3_chain.reap = NULL; - IPFW_WUNLOCK(&layer3_chain); + IPFW_WLOCK(&V_layer3_chain); + flush_tables(&V_layer3_chain); + V_layer3_chain.reap = NULL; + free_chain(&V_layer3_chain, 1 /* kill default rule */); + reap = V_layer3_chain.reap, V_layer3_chain.reap = NULL; + IPFW_WUNLOCK(&V_layer3_chain); if (reap != NULL) reap_rules(reap); - IPFW_DYN_LOCK_DESTROY(); uma_zdestroy(ipfw_dyn_rule_zone); - if (ipfw_dyn_v != NULL) - free(ipfw_dyn_v, M_IPFW); - IPFW_LOCK_DESTROY(&layer3_chain); + IPFW_DYN_LOCK_DESTROY(); + if (V_ipfw_dyn_v != NULL) + free(V_ipfw_dyn_v, M_IPFW); + IPFW_LOCK_DESTROY(&V_layer3_chain); -#ifdef INET6 - /* Free IPv6 fw sysctl tree. */ - sysctl_ctx_free(&ip6_fw_sysctl_ctx); + printf("IP firewall unloaded\n"); +} + +/**************** + * Stuff that must be initialized for every instance + * (including the first of course). + */ +static int +vnet_ipfw_init(const void *unused) +{ + int error; + struct ip_fw default_rule; + + /* First set up some values that are compile time options */ +#ifdef IPFIREWALL_VERBOSE + V_fw_verbose = 1; +#endif +#ifdef IPFIREWALL_VERBOSE_LIMIT + V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; #endif - printf("IP firewall unloaded\n"); + error = init_tables(&V_layer3_chain); + if (error) { + panic("init_tables"); /* XXX Marko fix this ! */ + } +#ifdef IPFIREWALL_NAT + LIST_INIT(&V_layer3_chain.nat); +#endif + + V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ + + V_ipfw_dyn_v = NULL; + V_dyn_buckets = 256; /* must be power of 2 */ + V_curr_dyn_buckets = 256; /* must be power of 2 */ + + V_dyn_ack_lifetime = 300; + V_dyn_syn_lifetime = 20; + V_dyn_fin_lifetime = 1; + V_dyn_rst_lifetime = 1; + V_dyn_udp_lifetime = 10; + V_dyn_short_lifetime = 5; + + V_dyn_keepalive_interval = 20; + V_dyn_keepalive_period = 5; + V_dyn_keepalive = 1; /* do send keepalives */ + + V_dyn_max = 4096; /* max # of dynamic rules */ + + V_fw_deny_unknown_exthdrs = 1; + + V_layer3_chain.rules = NULL; + IPFW_LOCK_INIT(&V_layer3_chain); + callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); + + bzero(&default_rule, sizeof default_rule); + default_rule.act_ofs = 0; + default_rule.rulenum = IPFW_DEFAULT_RULE; + default_rule.cmd_len = 1; + default_rule.set = RESVD_SET; + default_rule.cmd[0].len = 1; + default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; + error = add_rule(&V_layer3_chain, &default_rule); + + if (error != 0) { + printf("ipfw2: error %u initializing default rule " + "(support disabled)\n", error); + IPFW_LOCK_DESTROY(&V_layer3_chain); + printf("leaving ipfw_iattach (1) with error %d\n", error); + return (error); + } + + V_layer3_chain.default_rule = V_layer3_chain.rules; + + /* curvnet is NULL in the !VIMAGE case */ + callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); + + /* First set up some values that are compile time options */ + V_ipfw_vnet_ready = 1; /* Open for business */ + + /* + * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr) + * and pfil hooks for ipv4 and ipv6. Even if the latter two fail + * we still keep the module alive because the sockopt and + * layer2 paths are still useful. + * ipfw[6]_hook return 0 on success, ENOENT on failure, + * so we can ignore the exact return value and just set a flag. + * + * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so + * changes in the underlying (per-vnet) variables trigger + * immediate hook()/unhook() calls. + * In layer2 we have the same behaviour, except that V_ether_ipfw + * is checked on each packet because there are no pfil hooks. + */ + V_ip_fw_ctl_ptr = ipfw_ctl; + V_ip_fw_chk_ptr = ipfw_chk; +#ifndef linux + if (V_fw_enable && ipfw_hook() != 0) { + error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */ + printf("ipfw_hook() error\n"); + } +#ifdef INET6 + if (V_fw6_enable && ipfw6_hook() != 0) { + error = ENOENT; + printf("ipfw6_hook() error\n"); + } +#endif +#endif /* !linux */ + return (error); }