From 1c3dc9f45532c25adc21f297422f0f5a7420b8ca Mon Sep 17 00:00:00 2001 From: marta Date: Wed, 8 Jul 2009 20:40:24 +0000 Subject: [PATCH] Import source code for dummynet innode emulation. --- Makefile | 13 + README | 107 + dummynet/Makefile | 159 + dummynet/bsd_compat.c | 336 ++ dummynet/in_cksum.c | 146 + dummynet/include/net/if.h | 1 + dummynet/include/net/pfil.h | 118 + dummynet/include/netgraph/ng_ipfw.h | 54 + dummynet/include/netinet/ip.h | 46 + dummynet/include/netinet/ip6.h | 58 + dummynet/include/netinet/ip_divert.h | 14 + dummynet/include/netinet/ip_dummynet.h | 399 ++ dummynet/include/netinet/ip_fw.h | 680 ++++ dummynet/include/netinet/ip_icmp.h | 17 + dummynet/include/netinet/tcp.h | 228 ++ dummynet/include/netinet/tcp_var.h | 4 + dummynet/include/netinet/udp.h | 48 + dummynet/include/sys/cdefs.h | 33 + dummynet/include/sys/kernel.h | 20 + dummynet/include/sys/malloc.h | 48 + dummynet/include/sys/mbuf.h | 204 ++ dummynet/include/sys/module.h | 42 + dummynet/include/sys/param.h | 11 + dummynet/include/sys/queue.h | 620 ++++ dummynet/include/sys/syslog.h | 7 + dummynet/include/sys/systm.h | 73 + dummynet/include/sys/taskqueue.h | 24 + dummynet/ip_dummynet.c | 2406 ++++++++++++ dummynet/ip_fw2.c | 4665 ++++++++++++++++++++++++ dummynet/ip_fw_pfil.c | 571 +++ dummynet/ipfw2_mod.c | 545 +++ dummynet/ipfw_mod.c | 0 dummynet/missing.h | 418 +++ glue.h | 278 ++ include_e/altq/if_altq.h | 0 include_e/arpa/inet.h | 0 include_e/machine/in_cksum.h | 0 include_e/net/ethernet.h | 0 include_e/net/netisr.h | 0 include_e/net/pf_mtag.h | 0 include_e/net/radix.h | 0 include_e/netinet/ether.h | 0 include_e/netinet/icmp6.h | 0 include_e/netinet/if_ether.h | 0 include_e/netinet/in.h | 0 include_e/netinet/in_pcb.h | 0 include_e/netinet/in_var.h | 0 include_e/netinet/ip_carp.h | 0 include_e/netinet/ip_var.h | 0 include_e/netinet/pim.h | 0 include_e/netinet/sctp.h | 0 include_e/netinet/tcp_timer.h | 0 include_e/netinet/tcpip.h | 0 include_e/netinet/udp_var.h | 0 include_e/netinet6/ip6_var.h | 0 include_e/opt_inet6.h | 0 include_e/opt_ipfw.h | 0 include_e/opt_ipsec.h | 0 include_e/opt_mac.h | 0 include_e/opt_mbuf_stress_test.h | 0 include_e/opt_param.h | 0 include_e/sys/_lock.h | 0 include_e/sys/_mutex.h | 0 include_e/sys/jail.h | 0 include_e/sys/limits.h | 0 include_e/sys/lock.h | 0 include_e/sys/mutex.h | 0 include_e/sys/priv.h | 0 include_e/sys/proc.h | 0 include_e/sys/rwlock.h | 0 include_e/sys/socket.h | 0 include_e/sys/socketvar.h | 0 include_e/sys/sysctl.h | 0 include_e/sys/time.h | 0 include_e/sys/ucred.h | 0 ipfw-cleanup | 55 + ipfw-slice.spec | 60 + ipfw.cron | 3 + ipfw.spec | 81 + ipfw/Makefile | 45 + ipfw/add_rules | 25 + ipfw/altq.c | 151 + ipfw/dummynet.c | 1061 ++++++ ipfw/glue.c | 100 + ipfw/include/alias.h | 71 + ipfw/include/net/if_dl.h | 82 + ipfw/include/net/pfvar.h | 32 + ipfw/include/netinet/ip_dummynet.h | 399 ++ ipfw/include/netinet/ip_fw.h | 676 ++++ ipfw/include/netinet/tcp.h | 14 + ipfw/include/sys/sockio.h | 0 ipfw/include/timeconv.h | 29 + ipfw/ipfw2.c | 3823 +++++++++++++++++++ ipfw/ipfw2.h | 272 ++ ipfw/ipv6.c | 501 +++ ipfw/main.c | 539 +++ ipfw/nat.c | 940 +++++ ipfw/svn-commit. | 4 + slice/ipfw.8.gz | Bin 0 -> 26396 bytes slice/netconfig | 124 + 100 files changed, 21480 insertions(+) create mode 100644 Makefile create mode 100644 README create mode 100644 dummynet/Makefile create mode 100644 dummynet/bsd_compat.c create mode 100644 dummynet/in_cksum.c create mode 100644 dummynet/include/net/if.h create mode 100644 dummynet/include/net/pfil.h create mode 100644 dummynet/include/netgraph/ng_ipfw.h create mode 100644 dummynet/include/netinet/ip.h create mode 100644 dummynet/include/netinet/ip6.h create mode 100644 dummynet/include/netinet/ip_divert.h create mode 100644 dummynet/include/netinet/ip_dummynet.h create mode 100644 dummynet/include/netinet/ip_fw.h create mode 100644 dummynet/include/netinet/ip_icmp.h create mode 100644 dummynet/include/netinet/tcp.h create mode 100644 dummynet/include/netinet/tcp_var.h create mode 100644 dummynet/include/netinet/udp.h create mode 100644 dummynet/include/sys/cdefs.h create mode 100644 dummynet/include/sys/kernel.h create mode 100644 dummynet/include/sys/malloc.h create mode 100644 dummynet/include/sys/mbuf.h create mode 100644 dummynet/include/sys/module.h create mode 100644 dummynet/include/sys/param.h create mode 100644 dummynet/include/sys/queue.h create mode 100644 dummynet/include/sys/syslog.h create mode 100644 dummynet/include/sys/systm.h create mode 100644 dummynet/include/sys/taskqueue.h create mode 100644 dummynet/ip_dummynet.c create mode 100644 dummynet/ip_fw2.c create mode 100644 dummynet/ip_fw_pfil.c create mode 100644 dummynet/ipfw2_mod.c create mode 100644 dummynet/ipfw_mod.c create mode 100644 dummynet/missing.h create mode 100644 glue.h create mode 100644 include_e/altq/if_altq.h create mode 100644 include_e/arpa/inet.h create mode 100644 include_e/machine/in_cksum.h create mode 100644 include_e/net/ethernet.h create mode 100644 include_e/net/netisr.h create mode 100644 include_e/net/pf_mtag.h create mode 100644 include_e/net/radix.h create mode 100644 include_e/netinet/ether.h create mode 100644 include_e/netinet/icmp6.h create mode 100644 include_e/netinet/if_ether.h create mode 100644 include_e/netinet/in.h create mode 100644 include_e/netinet/in_pcb.h create mode 100644 include_e/netinet/in_var.h create mode 100644 include_e/netinet/ip_carp.h create mode 100644 include_e/netinet/ip_var.h create mode 100644 include_e/netinet/pim.h create mode 100644 include_e/netinet/sctp.h create mode 100644 include_e/netinet/tcp_timer.h create mode 100644 include_e/netinet/tcpip.h create mode 100644 include_e/netinet/udp_var.h create mode 100644 include_e/netinet6/ip6_var.h create mode 100644 include_e/opt_inet6.h create mode 100644 include_e/opt_ipfw.h create mode 100644 include_e/opt_ipsec.h create mode 100644 include_e/opt_mac.h create mode 100644 include_e/opt_mbuf_stress_test.h create mode 100644 include_e/opt_param.h create mode 100644 include_e/sys/_lock.h create mode 100644 include_e/sys/_mutex.h create mode 100644 include_e/sys/jail.h create mode 100644 include_e/sys/limits.h create mode 100644 include_e/sys/lock.h create mode 100644 include_e/sys/mutex.h create mode 100644 include_e/sys/priv.h create mode 100644 include_e/sys/proc.h create mode 100644 include_e/sys/rwlock.h create mode 100644 include_e/sys/socket.h create mode 100644 include_e/sys/socketvar.h create mode 100644 include_e/sys/sysctl.h create mode 100644 include_e/sys/time.h create mode 100644 include_e/sys/ucred.h create mode 100755 ipfw-cleanup create mode 100644 ipfw-slice.spec create mode 100644 ipfw.cron create mode 100644 ipfw.spec create mode 100644 ipfw/Makefile create mode 100755 ipfw/add_rules create mode 100644 ipfw/altq.c create mode 100644 ipfw/dummynet.c create mode 100644 ipfw/glue.c create mode 100644 ipfw/include/alias.h create mode 100644 ipfw/include/net/if_dl.h create mode 100644 ipfw/include/net/pfvar.h create mode 100644 ipfw/include/netinet/ip_dummynet.h create mode 100644 ipfw/include/netinet/ip_fw.h create mode 100644 ipfw/include/netinet/tcp.h create mode 100644 ipfw/include/sys/sockio.h create mode 100644 ipfw/include/timeconv.h create mode 100644 ipfw/ipfw2.c create mode 100644 ipfw/ipfw2.h create mode 100644 ipfw/ipv6.c create mode 100644 ipfw/main.c create mode 100644 ipfw/nat.c create mode 100644 ipfw/svn-commit. create mode 100644 slice/ipfw.8.gz create mode 100755 slice/netconfig diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..69b6481 --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +# $Id$ +# Top level makefile for building ipfw kernel and userspace. +# You can run it manually or also under the Planetlab build. +# Planetlab wants also the 'install' target. + +_all: all + +all clean distclean: + echo target is $(@) + (cd ipfw && $(MAKE) $(@) ) + (cd dummynet && $(MAKE) $(@) ) + +install: diff --git a/README b/README new file mode 100644 index 0000000..fd40bac --- /dev/null +++ b/README @@ -0,0 +1,107 @@ +# +# $Id$ +# + +This directory contains a port of ipfw and dummynet to Linux and OpenWrt +(a Windows version is in the works but not ready yet). +Building the code produces: + + a kernel module, ipfw_mod.ko + a userland program, /sbin/ipfw + +The source code here comes straight from FreeBSD (roughly the +version in RELENG_7 and HEAD as of June 2009), plus some glue code +and headers written from scratch. +Unless specified otherwise, all the code here is under a BSD license. + +=== To compile for a 2.6 kernel, simply run + + make + + Make sure that kernel headers (or sources) are installed on your + system, and that the link "/lib/modules/`uname -r`/build" points + to the header/source tree matching your kernel. + + You can override the default kernel tree with + + make KERNELPATH=your_kernel_source_tree + + NOTE: make sure CONFIG_NETFILTER is enabled in the kernel + configuration file. You can enable it by doing + + "(cd ${KERNELPATH}; make menuconfig)" + + and enabling the option listed below: + + Networking ---> + Networking options ---> + [*] Network packet filtering framework (Netfilter) + + +=== To compile for a 2.4 kernel: + + make VER=2.4 KERNELPATH=... + + You need to follow the same instruction for the 2.6 kernel, enabling + the kernel options: + + Networking options ---> + [*] Network packet filtering (replaces ipchains) + +=== To build an Openwrt package + + (Tested with kamikaze_8.09.1 and Linux 2.4) + + + Download and extract the OpenWrt package, e.g. + + wget http://downloads.openwrt.org/kamikaze/8.09.1/kamikaze_8.09.1_source.tar.bz2 + tar xvjf kamikaze_8.09.1_source.tar.bz2 + + + "cd" to the directory with the OpenWrt sources (the one that + contains Config.in, rules.mk ...) + + cd kamikaze_8.09.1 + + + Optional: to be sure that the tools are working, make a first + compilation as follows: + + - run "make menuconfig" and set the correct target device, + drivers, and so on; + - run "make" to do the build + + + Add ipfw2 to the openwrt package, as follows: + + - fetch and extract the code e.g. + + (cd ..; \ + wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_linux-20090622.tgz;\ + tar xvzf ipfw_linux-20090622.tgz) + + (but you should have done it already) + + - run the following commands: + (mkdir packages/ipfw2; + cp ../ipfw_mod/Makefile.openwrt packages/ipfw2/Makefile) + + to create the package/ipfw2 directory in the OpenWrt source + directory, and copy Makefile.openwrt to package/ipfw2/Makefile: + + - if necessary, edit package/ipfw2/Makefile and set IPFW_DIR to point to + the directory with the ipfw sources (the directory + which contains this README, dummynet/ ipfw/ and so on); + + - run "make menuconfig" and select ipfw2 as a module in + Kernel Modules -> Other modules -> ipfw2 + + - run "make" to build the package, "make V=99" for verbose build. + + The resulting package is located in bin/packages/mipsel/kmod-ipfw2*, + upload the file and install on the target system, as follows: + + opkg install kmod-ipfw2_2.4.35.4-brcm-2.4-1_mipsel.ipk #install + ls -l ls -l /lib/modules/2.4.35.4/ipfw* # check + insmod /lib/modules/2.4.35.4/ipfw_mod.o # load the module + /lib/modules/2.4.35.4/ipfw show # launch the userspace tool + rmmod ipfw_mod.o # remove the module + +----------------------------------------------------------------------------- diff --git a/dummynet/Makefile b/dummynet/Makefile new file mode 100644 index 0000000..b361cba --- /dev/null +++ b/dummynet/Makefile @@ -0,0 +1,159 @@ +# +# $Id$ +# +# gnu Makefile to build linux module for ipfw+dummynet + +# Unless specified otherwise, variable names are arbitrary. +# Exceptions are the following: +# +# ccflags-y additional $(CC) flags +# M used by Kbuild, we must set it to `pwd` +# obj-m list of .o modules to build +# $(MOD)-y for each $MOD in obj-m, the list of objects +# obj-y same as above, for openwrt +# O_TARGET the link target, for openwrt +# EXTRA_CFLAGS as the name says... in openwrt +# EXTRA_CFLAGS are used in 2.6.22 module kernel compilation too +#--- + +$(warning including dummynet/Makefile) + +# lets default for 2.6 for planetlab builds +VER ?= 2.6 + +ifeq ($(VER),openwrt) + +$(warning dummynet/Makefile doing openwrt) +obj-m := ipfw_mod.o +obj-y := ipfw2_mod.o bsd_compat.o \ + in_cksum.o ip_dummynet.o ip_fw2.o ip_fw_pfil.o +O_TARGET := ipfw_mod.o + +xcflags-y += -O1 -DLINUX_24 +xcflags-y += -DIPFIREWALL_DEFAULT_TO_ACCEPT +xcflags-y += -g +xcflags-y += -D_BSD_SOURCE # enable __FAVOR_BSD (udp/tcp bsd struct over posix) +xcflags-y += -DKERNEL_MODULE # linux kernel module +xcflags-y += -I include_e -I include +xcflags-y += -include ../glue.h # headers + +EXTRA_CFLAGS := $(xcflags-y) + +# we should not export anything +#export-objs := ipfw2_mod.o +-include $(TOPDIR)/Rules.make + +else # !openwrt + +obj-m := ipfw_mod.o +ifneq ($(shell echo $(VER)|grep '2.4'),) + $(warning "---- Building for Version $(VER)") + KERNELDIR := -isystem /usr/src/linux-2.4.35.4/include + # replace the system include directory + WARN += -nostdinc -isystem /usr/lib/gcc/i486-linux-gnu/4.2.4/include + #WARN = -Wp,-MD,/home/luigi/ports-luigi/dummynet-branches/ipfw_mod/dummynet/.ipfw2_mod.o.d + #WARN += -Iinclude -include include/linux/autoconf.h + WARN += -Wall -Wundef + WARN += -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing + WARN += -fno-common -Werror-implicit-function-declaration + # WARN += -O2 -fno-stack-protector -m32 -msoft-float -mregparm=3 + # -mregparm=3 gives a printk error + WARN += -m32 -msoft-float # -mregparm=3 + #WARN += -freg-struct-return -mpreferred-stack-boundary=2 + WARN += -Wno-sign-compare + WARN += -Wdeclaration-after-statement -Wno-pointer-sign + + ccflags-y += -O1 -DLINUX_24 + CFLAGS = -DMODULE -D__KERNEL__ ${KERNELDIR} ${ccflags-y} + # The Main target +all: mod24 + +else + # KERNELDIR is where the kernel headers reside + # XXX on Planetlab, KERNELDIR must be same as KERNELPATH + KERNELDIR ?= $(KERNELPATH) + # KERNELDIR := /home/luigi/linux-2.6.25.17/ + # test on rock + #KERNELDIR := /usr/src/linux-2.6.24 # not with v.2237 + #KERNELDIR := /usr/src/linux-2.6.26 + #KERNELDIR := /usr/src/linux-2.6.22 + #KERNELDIR := /usr/src/linux-source-2.6.26 + #KERNELDIR := /lib/modules/`uname -r`/build + $(warning "---- Building Version 2.6 $(VER) in $(KERNELDIR)") + WARN := -O1 -Wall -Werror -DDEBUG_SPINLOCK -DDEBUG_MUTEXES + # The main target + + # Required by kernel <= 2.6.22, ccflags-y is used on newer version +LINUX_VERSION_CODE := $(shell grep LINUX_VERSION_CODE $(KERNELDIR)/include/linux/version.h|cut -d " " -f3) +ifeq ($(LINUX_VERSION_CODE),132630) + EXTRA_CFLAGS += $(ccflags-y) +endif + +all: include_e + $(MAKE) -C $(KERNELDIR) V=1 M=`pwd` modules +endif + +# the list of object use to build the module +ipfw_mod-y = $(IPFW_SRCS:%.c=%.o) + +# Original ipfw + dummynet + FreeBSD stuff, +IPFW_SRCS = ip_fw2.c ip_dummynet.c ip_fw_pfil.c in_cksum.c + +# module glue and functions missing in linux +IPFW_SRCS += ipfw2_mod.c bsd_compat.c + + +# additional $(CC) flags +ccflags-y += $(WARN) +ccflags-y += -DIPFIREWALL_DEFAULT_TO_ACCEPT +ccflags-y += -g +ccflags-y += -D_BSD_SOURCE # enable __FAVOR_BSD (udp/tcp bsd structure over posix) +ccflags-y += -DKERNEL_MODULE # linux kernel module +ccflags-y += -I $(M)/include_e -I $(M)/include +ccflags-y += -include $(M)/../glue.h # headers + +mod24: include_e $(obj-m) + +$(obj-m): $(ipfw_mod-y) + $(LD) $(LDFLAGS) -m elf_i386 -r -o $@ $^ +clean: + -rm -f *.o *.ko Module.symvers *.mod.c + +distclean: clean + -rm -f .*cmd modules.order opt_* + -rm -rf .tmp_versions include_e + +# support to create empty dirs and files in include_e/ +# EDIRS is the list of directories, EFILES is the list of files. + +EDIRS= altq arpa machine net netinet netinet6 sys + +EFILES += opt_inet6.h opt_ipfw.h opt_ipsec.h opt_mac.h +EFILES += opt_mbuf_stress_test.h opt_param.h + +EFILES += altq/if_altq.h +EFILES += arpa/inet.h +EFILES += machine/in_cksum.h +EFILES += net/ethernet.h net/netisr.h net/pf_mtag.h net/radix.h + +EFILES += netinet/ether.h netinet/icmp6.h netinet/if_ether.h +EFILES += netinet/in.h netinet/in_pcb.h netinet/in_var.h +EFILES += netinet/ip_carp.h netinet/ip_var.h netinet/pim.h +EFILES += netinet/sctp.h netinet/tcp_timer.h netinet/tcpip.h +EFILES += netinet/udp_var.h + +EFILES += netinet6/ip6_var.h + +EFILES += sys/_lock.h sys/_mutex.h sys/jail.h +EFILES += sys/limits.h sys/lock.h sys/mutex.h sys/priv.h +EFILES += sys/proc.h sys/rwlock.h sys/socket.h sys/socketvar.h +EFILES += sys/sysctl.h sys/time.h sys/ucred.h + +M ?= $(shell pwd) +include_e: + echo "running in $M" + -@rm -rf $(M)/include_e opt_* + -@mkdir -p $(M)/include_e + -@(cd $(M)/include_e; mkdir -p $(EDIRS); touch $(EFILES) ) + +endif # !openwrt diff --git a/dummynet/bsd_compat.c b/dummynet/bsd_compat.c new file mode 100644 index 0000000..995d60c --- /dev/null +++ b/dummynet/bsd_compat.c @@ -0,0 +1,336 @@ +/* + * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Id$ + * + * kernel variables and functions that are not available in linux. + */ + +#include +#include /* do_div on 2.4 */ +#include /* get_random_bytes on 2.4 */ +#include "missing.h" + +/* + * gettimeofday would be in sys/time.h but it is not + * visible if _KERNEL is defined + */ +int gettimeofday(struct timeval *, struct timezone *); + +int ticks; /* kernel ticks counter */ +int hz = 1000; /* default clock time */ +long tick = 1000; /* XXX is this 100000/hz ? */ +int bootverbose = 0; +time_t time_uptime = 0; +struct timeval boottime; + +int ip_defttl; +int fw_one_pass = 1; +u_long in_ifaddrhmask; /* mask for hash table */ +struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ + +u_int rt_numfibs = RT_NUMFIBS; + +/* + * pfil hook support. + * We make pfil_head_get return a non-null pointer, which is then ignored + * in our 'add-hook' routines. + */ +struct pfil_head; +typedef int (pfil_hook_t) + (void *, struct mbuf **, struct ifnet *, int, struct inpcb *); + +struct pfil_head * +pfil_head_get(int proto, u_long flags) +{ + static int dummy; + return (struct pfil_head *)&dummy; +} + +int +pfil_add_hook(pfil_hook_t *func, void *arg, int dir, struct pfil_head *h) +{ + return 0; +} + +int +pfil_remove_hook(pfil_hook_t *func, void *arg, int dir, struct pfil_head *h) +{ + return 0; +} + +/* define empty body for kernel function */ +int +priv_check(struct thread *td, int priv) +{ + return 0; +} + +int +securelevel_ge(struct ucred *cr, int level) +{ + return 0; +} + +int +sysctl_handle_int(SYSCTL_HANDLER_ARGS) +{ + return 0; +} + +int +sysctl_handle_long(SYSCTL_HANDLER_ARGS) +{ + return 0; +} + +void +ether_demux(struct ifnet *ifp, struct mbuf *m) +{ + return; +} + +int +ether_output_frame(struct ifnet *ifp, struct mbuf *m) +{ + return 0; +} + +void +in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum) +{ + return; +} + +void +icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) +{ + return; +} + +u_short +in_cksum_skip(struct mbuf *m, int len, int skip) +{ + return 0; +} + +u_short +in_cksum_hdr(struct ip *ip) +{ + return 0; +} + +struct mbuf * +ip_reass(struct mbuf *clone) +{ + return clone; +} +#ifdef INP_LOCK_ASSERT +#undef INP_LOCK_ASSERT +#define INP_LOCK_ASSERT(a) +#endif + +int +jailed(struct ucred *cred) +{ + return 0; +} + +/* +* Return 1 if an internet address is for a ``local'' host +* (one to which we have a connection). If subnetsarelocal +* is true, this includes other subnets of the local net. +* Otherwise, it includes only the directly-connected (sub)nets. +*/ +int +in_localaddr(struct in_addr in) +{ + return 1; +} + +int +sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) +{ + size_t valsize = sopt->sopt_valsize; + + if (len < valsize) + sopt->sopt_valsize = valsize = len; + bcopy(buf, sopt->sopt_val, valsize); + return 0; +} + +/* + * copy data from userland to kernel + */ +int +sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) +{ + size_t valsize = sopt->sopt_valsize; + + if (valsize < minlen) + return EINVAL; + if (valsize > len) + sopt->sopt_valsize = valsize = len; + bcopy(sopt->sopt_val, buf, valsize); + return 0; +} + +void +getmicrouptime(struct timeval *tv) +{ +#ifdef _WIN32 +#else + do_gettimeofday(tv); +#endif +} + + +#include + +char * +inet_ntoa_r(struct in_addr ina, char *buf) +{ +#ifdef _WIN32 +#else + unsigned char *ucp = (unsigned char *)&ina; + + sprintf(buf, "%d.%d.%d.%d", + ucp[0] & 0xff, + ucp[1] & 0xff, + ucp[2] & 0xff, + ucp[3] & 0xff); +#endif + return buf; +} + +char * +inet_ntoa(struct in_addr ina) +{ + static char buf[16]; + return inet_ntoa_r(ina, buf); +} + +int +random(void) +{ +#ifdef _WIN32 + return 0x123456; +#else + int r; + get_random_bytes(&r, sizeof(r)); + return r & 0x7fffffff; +#endif +} + + +/* + * do_div really does a u64 / u32 bit division. + * we save the sign and convert to uint befor calling. + * We are safe just because we always call it with small operands. + */ +int64_t +div64(int64_t a, int64_t b) +{ +#ifdef _WIN32 + int a1 = a, b1 = b; + return a1/b1; +#else + uint64_t ua, ub; + int sign = ((a>0)?1:-1) * ((b>0)?1:-1); + + ua = ((a>0)?a:-a); + ub = ((b>0)?b:-b); + do_div(ua, ub); + return sign*ua; +#endif +} + +/* + * compact version of fnmatch. + */ +int +fnmatch(const char *pattern, const char *string, int flags) +{ + char s; + + if (!string || !pattern) + return 1; /* no match */ + while ( (s = *string++) ) { + char p = *pattern++; + if (p == '\0') /* pattern is over, no match */ + return 1; + if (p == '*') /* wildcard, match */ + return 0; + if (p == '.' || p == s) /* char match, continue */ + continue; + return 1; /* no match */ + } + /* end of string, make sure the pattern is over too */ + if (*pattern == '\0' || *pattern == '*') + return 0; + return 1; /* no match */ +} + +#ifdef _WIN32 +/* + * as good as anywhere, place here the missing calls + */ + +void * +my_alloc(int size) +{ + void *_ret = ExAllocatePoolWithTag(0, size, 'wfpi'); + if (_ret) + memset(_ret, 0, size); + return _ret; +} + +void +panic(const char *fmt, ...) +{ + printf("%s", fmt); + for (;;); +} + +#include + +extern int _vsnprintf(char *buf, int buf_size, char * fmt, va_list ap); + +/* + * Windows' _snprintf doesn't terminate buffer with zero if size > buf_size + */ +int +snprintf(char *buf, int buf_size, char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + if (_vsnprintf(buf, buf_size, fmt, ap) < 0) + buf[buf_size - 1] = '\0'; + va_end(ap); + + return 0; +} +#endif diff --git a/dummynet/in_cksum.c b/dummynet/in_cksum.c new file mode 100644 index 0000000..ca56508 --- /dev/null +++ b/dummynet/in_cksum.c @@ -0,0 +1,146 @@ +/*- + * Copyright (c) 1988, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 + */ + +#include +__FBSDID("$FreeBSD: src/sys/netinet/in_cksum.c,v 1.10 2007/10/07 20:44:22 silby Exp $"); + +#include +#include + +/* + * Checksum routine for Internet Protocol family headers (Portable Version). + * + * This routine is very heavily used in the network + * code and should be modified for each CPU to be as fast as possible. + */ + +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) +#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} + +int +in_cksum(struct mbuf *m, int len) +{ + register u_short *w; + register int sum = 0; + register int mlen = 0; + int byte_swapped = 0; + + union { + char c[2]; + u_short s; + } s_util; + union { + u_short s[2]; + long l; + } l_util; + + for (;m && len; m = m->m_next) { + if (m->m_len == 0) + continue; + w = mtod(m, u_short *); + if (mlen == -1) { + /* + * The first byte of this mbuf is the continuation + * of a word spanning between this mbuf and the + * last mbuf. + * + * s_util.c[0] is already saved when scanning previous + * mbuf. + */ + s_util.c[1] = *(char *)w; + sum += s_util.s; + w = (u_short *)((char *)w + 1); + mlen = m->m_len - 1; + len--; + } else + mlen = m->m_len; + if (len < mlen) + mlen = len; + len -= mlen; + /* + * Force to even boundary. + */ + if ((1 & (int) w) && (mlen > 0)) { + REDUCE; + sum <<= 8; + s_util.c[0] = *(u_char *)w; + w = (u_short *)((char *)w + 1); + mlen--; + byte_swapped = 1; + } + /* + * Unroll the loop to make overhead from + * branches &c small. + */ + while ((mlen -= 32) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; + sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; + sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; + w += 16; + } + mlen += 32; + while ((mlen -= 8) >= 0) { + sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; + w += 4; + } + mlen += 8; + if (mlen == 0 && byte_swapped == 0) + continue; + REDUCE; + while ((mlen -= 2) >= 0) { + sum += *w++; + } + if (byte_swapped) { + REDUCE; + sum <<= 8; + byte_swapped = 0; + if (mlen == -1) { + s_util.c[1] = *(char *)w; + sum += s_util.s; + mlen = 0; + } else + mlen = -1; + } else if (mlen == -1) + s_util.c[0] = *(char *)w; + } + if (len) + printf("cksum: out of data\n"); + if (mlen == -1) { + /* The last mbuf has odd # of bytes. Follow the + standard (the odd byte may be shifted left by 8 bits + or not as determined by endian-ness of the machine) */ + s_util.c[1] = 0; + sum += s_util.s; + } + REDUCE; + return (~sum & 0xffff); +} diff --git a/dummynet/include/net/if.h b/dummynet/include/net/if.h new file mode 100644 index 0000000..1aa8e7b --- /dev/null +++ b/dummynet/include/net/if.h @@ -0,0 +1 @@ +#include diff --git a/dummynet/include/net/pfil.h b/dummynet/include/net/pfil.h new file mode 100644 index 0000000..19a3d9c --- /dev/null +++ b/dummynet/include/net/pfil.h @@ -0,0 +1,118 @@ +/* $FreeBSD: src/sys/net/pfil.h,v 1.16 2007/06/08 12:43:25 gallatin Exp $ */ +/* $NetBSD: pfil.h,v 1.22 2003/06/23 12:57:08 martin Exp $ */ + +/*- + * Copyright (c) 1996 Matthew R. Green + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NET_PFIL_H_ +#define _NET_PFIL_H_ + +#include +#include +#include +#include +#include +#include + +struct mbuf; +struct ifnet; +struct inpcb; + +/* + * The packet filter hooks are designed for anything to call them to + * possibly intercept the packet. + */ +struct packet_filter_hook { + TAILQ_ENTRY(packet_filter_hook) pfil_link; + int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *); + void *pfil_arg; + int pfil_flags; +}; + +#define PFIL_IN 0x00000001 +#define PFIL_OUT 0x00000002 +#define PFIL_WAITOK 0x00000004 +#define PFIL_ALL (PFIL_IN|PFIL_OUT) + +typedef TAILQ_HEAD(pfil_list, packet_filter_hook) pfil_list_t; + +#define PFIL_TYPE_AF 1 /* key is AF_* type */ +#define PFIL_TYPE_IFNET 2 /* key is ifnet pointer */ + +struct pfil_head { + pfil_list_t ph_in; + pfil_list_t ph_out; + int ph_type; + int ph_nhooks; +#if defined( __linux__ ) || defined( _WIN32 ) + rwlock_t ph_mtx; +#else + struct rwlock ph_mtx; +#endif + union { + u_long phu_val; + void *phu_ptr; + } ph_un; +#define ph_af ph_un.phu_val +#define ph_ifnet ph_un.phu_ptr + LIST_ENTRY(pfil_head) ph_list; +}; + +int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, + int, struct inpcb *inp); + +int pfil_add_hook(int (*func)(void *, struct mbuf **, + struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *); +int pfil_remove_hook(int (*func)(void *, struct mbuf **, + struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *); + +int pfil_head_register(struct pfil_head *); +int pfil_head_unregister(struct pfil_head *); + +struct pfil_head *pfil_head_get(int, u_long); + +#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) +#define PFIL_RLOCK(p) rw_rlock(&(p)->ph_mtx) +#define PFIL_WLOCK(p) rw_wlock(&(p)->ph_mtx) +#define PFIL_RUNLOCK(p) rw_runlock(&(p)->ph_mtx) +#define PFIL_WUNLOCK(p) rw_wunlock(&(p)->ph_mtx) +#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock) +#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock) + +static __inline struct packet_filter_hook * +pfil_hook_get(int dir, struct pfil_head *ph) +{ + if (dir == PFIL_IN) + return (TAILQ_FIRST(&ph->ph_in)); + else if (dir == PFIL_OUT) + return (TAILQ_FIRST(&ph->ph_out)); + else + return (NULL); +} + +#endif /* _NET_PFIL_H_ */ diff --git a/dummynet/include/netgraph/ng_ipfw.h b/dummynet/include/netgraph/ng_ipfw.h new file mode 100644 index 0000000..a6b6ea9 --- /dev/null +++ b/dummynet/include/netgraph/ng_ipfw.h @@ -0,0 +1,54 @@ +/*- + * Copyright 2005, Gleb Smirnoff + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netgraph/ng_ipfw.h,v 1.2 2006/02/17 09:42:49 glebius Exp $ + */ +#ifndef __NG_IPFW_H +#define __NG_IPFW_H + +#define NG_IPFW_NODE_TYPE "ipfw" +#define NGM_IPFW_COOKIE 1105988990 + +#ifdef _KERNEL + +struct mbuf; +struct ip_fw_args; +typedef int ng_ipfw_input_t(struct mbuf **, int, struct ip_fw_args *, int); +extern ng_ipfw_input_t *ng_ipfw_input_p; +#define NG_IPFW_LOADED (ng_ipfw_input_p != NULL) + +struct ng_ipfw_tag { + struct m_tag mt; /* tag header */ + struct ip_fw *rule; /* matching rule */ + struct ifnet *ifp; /* interface, for ip_output */ + int dir; +#define NG_IPFW_OUT 0 +#define NG_IPFW_IN 1 +}; + +#define TAGSIZ (sizeof(struct ng_ipfw_tag) - sizeof(struct m_tag)) + +#endif /* _KERNEL */ +#endif /* __NG_IPFW_H */ diff --git a/dummynet/include/netinet/ip.h b/dummynet/include/netinet/ip.h new file mode 100644 index 0000000..bdd8cf0 --- /dev/null +++ b/dummynet/include/netinet/ip.h @@ -0,0 +1,46 @@ +#ifndef _NETINET_IP_H_ +#define _NETINET_IP_H_ + +#define LITTLE_ENDIAN 1234 +#define BIG_ENDIAN 4321 +#if defined(__BIG_ENDIAN) +#error we are in bigendian +#elif defined(__LITTLE_ENDIAN) +//#warning we are in littleendian +#define BYTE_ORDER LITTLE_ENDIAN +#else +#error no platform +#endif + +/* XXX endiannes doesn't belong here */ +// #define LITTLE_ENDIAN 1234 +// #define BIG_ENDIAN 4321 +// #define BYTE_ORDER LITTLE_ENDIAN + +/* + * Structure of an internet header, naked of options. + */ +struct ip { +#if BYTE_ORDER == LITTLE_ENDIAN + u_int ip_hl:4, /* header length */ + ip_v:4; /* version */ +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_int ip_v:4, /* version */ + ip_hl:4; /* header length */ +#endif + u_char ip_tos; /* type of service */ + u_short ip_len; /* total length */ + u_short ip_id; /* identification */ + u_short ip_off; /* fragment offset field */ +#define IP_RF 0x8000 /* reserved fragment flag */ +#define IP_DF 0x4000 /* dont fragment flag */ +#define IP_MF 0x2000 /* more fragments flag */ +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */ + u_char ip_ttl; /* time to live */ + u_char ip_p; /* protocol */ + u_short ip_sum; /* checksum */ + struct in_addr ip_src,ip_dst; /* source and dest address */ +} __packed __aligned(4); + +#endif /* _NETINET_IP_H_ */ diff --git a/dummynet/include/netinet/ip6.h b/dummynet/include/netinet/ip6.h new file mode 100644 index 0000000..88b42a4 --- /dev/null +++ b/dummynet/include/netinet/ip6.h @@ -0,0 +1,58 @@ +#ifndef _NETINET_IP6_H_ +#define _NETINET_IP6_H_ +#define IN6_ARE_ADDR_EQUAL(a, b) \ +(memcmp(&(a)->s6_addr[0], &(b)->s6_addr[0], sizeof(struct in6_addr)) == 0) + +struct ip6_hdr { + union { + struct ip6_hdrctl { + u_int32_t ip6_un1_flow; /* 20 bits of flow-ID */ + u_int16_t ip6_un1_plen; /* payload length */ + u_int8_t ip6_un1_nxt; /* next header */ + u_int8_t ip6_un1_hlim; /* hop limit */ + } ip6_un1; + u_int8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */ + } ip6_ctlun; + struct in6_addr ip6_src; /* source address */ + struct in6_addr ip6_dst; /* destination address */ +}; +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt +#define ip6_flow ip6_ctlun.ip6_un1.ip6_un1_flow + + +struct icmp6_hdr { + u_int8_t icmp6_type; /* type field */ + u_int8_t icmp6_code; /* code field */ + u_int16_t icmp6_cksum; /* checksum field */ + union { + u_int32_t icmp6_un_data32[1]; /* type-specific field */ + u_int16_t icmp6_un_data16[2]; /* type-specific field */ + u_int8_t icmp6_un_data8[4]; /* type-specific field */ + } icmp6_dataun; +}; + +struct ip6_hbh { + u_int8_t ip6h_nxt; /* next header */ + u_int8_t ip6h_len; /* length in units of 8 octets */ + /* followed by options */ +}; +struct ip6_rthdr { + u_int8_t ip6r_nxt; /* next header */ + u_int8_t ip6r_len; /* length in units of 8 octets */ + u_int8_t ip6r_type; /* routing type */ + u_int8_t ip6r_segleft; /* segments left */ + /* followed by routing type specific data */ +}; +struct ip6_frag { + u_int8_t ip6f_nxt; /* next header */ + u_int8_t ip6f_reserved; /* reserved field */ + u_int16_t ip6f_offlg; /* offset, reserved, and flag */ + u_int32_t ip6f_ident; /* identification */ +}; +#define IP6F_OFF_MASK 0xfff8 /* mask out offset from _offlg */ +#define IP6F_MORE_FRAG 0x0001 /* more-fragments flag */ +struct ip6_ext { + u_int8_t ip6e_nxt; + u_int8_t ip6e_len; +}; +#endif /* _NETINET_IP6_H_ */ diff --git a/dummynet/include/netinet/ip_divert.h b/dummynet/include/netinet/ip_divert.h new file mode 100644 index 0000000..4bb6e42 --- /dev/null +++ b/dummynet/include/netinet/ip_divert.h @@ -0,0 +1,14 @@ +#ifndef _IP_DIVERT_H +#define _IP_DIVERT_H + +struct mbuf; +typedef void ip_divert_packet_t(struct mbuf *, int); + +extern ip_divert_packet_t *ip_divert_ptr; + +struct divert_tag { + u_int32_t info; /* port & flags */ + u_int16_t cookie; /* ipfw rule number */ +}; + +#endif /* !_IP_DIVERT_H */ diff --git a/dummynet/include/netinet/ip_dummynet.h b/dummynet/include/netinet/ip_dummynet.h new file mode 100644 index 0000000..c6a6575 --- /dev/null +++ b/dummynet/include/netinet/ip_dummynet.h @@ -0,0 +1,399 @@ +/*- + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * Portions Copyright (c) 2000 Akamba Corp. + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40.2.1 2008/04/25 10:26:30 oleg Exp $ + */ + +#ifndef _IP_DUMMYNET_H +#define _IP_DUMMYNET_H + +/* + * Definition of dummynet data structures. In the structures, I decided + * not to use the macros in in the hope of making the code + * easier to port to other architectures. The type of lists and queue we + * use here is pretty simple anyways. + */ + +/* + * We start with a heap, which is used in the scheduler to decide when + * to transmit packets etc. + * + * The key for the heap is used for two different values: + * + * 1. timer ticks- max 10K/second, so 32 bits are enough; + * + * 2. virtual times. These increase in steps of len/x, where len is the + * packet length, and x is either the weight of the flow, or the + * sum of all weights. + * If we limit to max 1000 flows and a max weight of 100, then + * x needs 17 bits. The packet size is 16 bits, so we can easily + * overflow if we do not allow errors. + * So we use a key "dn_key" which is 64 bits. Some macros are used to + * compare key values and handle wraparounds. + * MAX64 returns the largest of two key values. + * MY_M is used as a shift count when doing fixed point arithmetic + * (a better name would be useful...). + */ +typedef u_int64_t dn_key ; /* sorting key */ +#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) +#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) +#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) +#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) +#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) +#define MY_M 16 /* number of left shift to obtain a larger precision */ + +/* + * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the + * virtual time wraps every 15 days. + */ + + +/* + * The maximum hash table size for queues. This value must be a power + * of 2. + */ +#define DN_MAX_HASH_SIZE 65536 + +/* + * A heap entry is made of a key and a pointer to the actual + * object stored in the heap. + * The heap is an array of dn_heap_entry entries, dynamically allocated. + * Current size is "size", with "elements" actually in use. + * The heap normally supports only ordered insert and extract from the top. + * If we want to extract an object from the middle of the heap, we + * have to know where the object itself is located in the heap (or we + * need to scan the whole array). To this purpose, an object has a + * field (int) which contains the index of the object itself into the + * heap. When the object is moved, the field must also be updated. + * The offset of the index in the object is stored in the 'offset' + * field in the heap descriptor. The assumption is that this offset + * is non-zero if we want to support extract from the middle. + */ +struct dn_heap_entry { + dn_key key ; /* sorting key. Topmost element is smallest one */ + void *object ; /* object pointer */ +} ; + +struct dn_heap { + int size ; + int elements ; + int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ + struct dn_heap_entry *p ; /* really an array of "size" entries */ +} ; + +#ifdef _KERNEL +/* + * Packets processed by dummynet have an mbuf tag associated with + * them that carries their dummynet state. This is used within + * the dummynet code as well as outside when checking for special + * processing requirements. + */ +struct dn_pkt_tag { + struct ip_fw *rule; /* matching rule */ + int dn_dir; /* action when packet comes out. */ +#define DN_TO_IP_OUT 1 +#define DN_TO_IP_IN 2 +/* Obsolete: #define DN_TO_BDG_FWD 3 */ +#define DN_TO_ETH_DEMUX 4 +#define DN_TO_ETH_OUT 5 +#define DN_TO_IP6_IN 6 +#define DN_TO_IP6_OUT 7 +#define DN_TO_IFB_FWD 8 + + dn_key output_time; /* when the pkt is due for delivery */ + struct ifnet *ifp; /* interface, for ip_output */ + struct _ip6dn_args ip6opt; /* XXX ipv6 options */ +}; +#endif /* _KERNEL */ + +/* + * Overall structure of dummynet (with WF2Q+): + +In dummynet, packets are selected with the firewall rules, and passed +to two different objects: PIPE or QUEUE. + +A QUEUE is just a queue with configurable size and queue management +policy. It is also associated with a mask (to discriminate among +different flows), a weight (used to give different shares of the +bandwidth to different flows) and a "pipe", which essentially +supplies the transmit clock for all queues associated with that +pipe. + +A PIPE emulates a fixed-bandwidth link, whose bandwidth is +configurable. The "clock" for a pipe can come from either an +internal timer, or from the transmit interrupt of an interface. +A pipe is also associated with one (or more, if masks are used) +queue, where all packets for that pipe are stored. + +The bandwidth available on the pipe is shared by the queues +associated with that pipe (only one in case the packet is sent +to a PIPE) according to the WF2Q+ scheduling algorithm and the +configured weights. + +In general, incoming packets are stored in the appropriate queue, +which is then placed into one of a few heaps managed by a scheduler +to decide when the packet should be extracted. +The scheduler (a function called dummynet()) is run at every timer +tick, and grabs queues from the head of the heaps when they are +ready for processing. + +There are three data structures definining a pipe and associated queues: + + + dn_pipe, which contains the main configuration parameters related + to delay and bandwidth; + + dn_flow_set, which contains WF2Q+ configuration, flow + masks, plr and RED configuration; + + dn_flow_queue, which is the per-flow queue (containing the packets) + +Multiple dn_flow_set can be linked to the same pipe, and multiple +dn_flow_queue can be linked to the same dn_flow_set. +All data structures are linked in a linear list which is used for +housekeeping purposes. + +During configuration, we create and initialize the dn_flow_set +and dn_pipe structures (a dn_pipe also contains a dn_flow_set). + +At runtime: packets are sent to the appropriate dn_flow_set (either +WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), +which in turn dispatches them to the appropriate dn_flow_queue +(created dynamically according to the masks). + +The transmit clock for fixed rate flows (ready_event()) selects the +dn_flow_queue to be used to transmit the next packet. For WF2Q, +wfq_ready_event() extract a pipe which in turn selects the right +flow using a number of heaps defined into the pipe itself. + + * + */ + +/* + * per flow queue. This contains the flow identifier, the queue + * of packets, counters, and parameters used to support both RED and + * WF2Q+. + * + * A dn_flow_queue is created and initialized whenever a packet for + * a new flow arrives. + */ +struct dn_flow_queue { + struct dn_flow_queue *next ; + struct ipfw_flow_id id ; + + struct mbuf *head, *tail ; /* queue of packets */ + u_int len ; + u_int len_bytes ; + + /* + * When we emulate MAC overheads, or channel unavailability due + * to other traffic on a shared medium, we augment the packet at + * the head of the queue with an 'extra_bits' field representsing + * the additional delay the packet will be subject to: + * extra_bits = bw*unavailable_time. + * With large bandwidth and large delays, extra_bits (and also numbytes) + * can become very large, so better play safe and use 64 bit + */ + uint64_t numbytes ; /* credit for transmission (dynamic queues) */ + int64_t extra_bits; /* extra bits simulating unavailable channel */ + + u_int64_t tot_pkts ; /* statistics counters */ + u_int64_t tot_bytes ; + u_int32_t drops ; + + int hash_slot ; /* debugging/diagnostic */ + + /* RED parameters */ + int avg ; /* average queue length est. (scaled) */ + int count ; /* arrivals since last RED drop */ + int random ; /* random value (scaled) */ + dn_key q_time; /* start of queue idle time */ + + /* WF2Q+ support */ + struct dn_flow_set *fs ; /* parent flow set */ + int heap_pos ; /* position (index) of struct in heap */ + dn_key sched_time ; /* current time when queue enters ready_heap */ + + dn_key S,F ; /* start time, finish time */ + /* + * Setting F < S means the timestamp is invalid. We only need + * to test this when the queue is empty. + */ +} ; + +/* + * flow_set descriptor. Contains the "template" parameters for the + * queue configuration, and pointers to the hash table of dn_flow_queue's. + * + * The hash table is an array of lists -- we identify the slot by + * hashing the flow-id, then scan the list looking for a match. + * The size of the hash table (buckets) is configurable on a per-queue + * basis. + * + * A dn_flow_set is created whenever a new queue or pipe is created (in the + * latter case, the structure is located inside the struct dn_pipe). + */ +struct dn_flow_set { + SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ + + u_short fs_nr ; /* flow_set number */ + u_short flags_fs; +#define DN_HAVE_FLOW_MASK 0x0001 +#define DN_IS_RED 0x0002 +#define DN_IS_GENTLE_RED 0x0004 +#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ +#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ +#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ +#define DN_IS_PIPE 0x4000 +#define DN_IS_QUEUE 0x8000 + + struct dn_pipe *pipe ; /* pointer to parent pipe */ + u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ + + int weight ; /* WFQ queue weight */ + int qsize ; /* queue size in slots or bytes */ + int plr ; /* pkt loss rate (2^31-1 means 100%) */ + + struct ipfw_flow_id flow_mask ; + + /* hash table of queues onto this flow_set */ + int rq_size ; /* number of slots */ + int rq_elements ; /* active elements */ + struct dn_flow_queue **rq; /* array of rq_size entries */ + + u_int32_t last_expired ; /* do not expire too frequently */ + int backlogged ; /* #active queues for this flowset */ + + /* RED parameters */ +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ + u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ + u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ + u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ + u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ + u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ + u_int lookup_depth ; /* depth of lookup table */ + int lookup_step ; /* granularity inside the lookup table */ + int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ + int avg_pkt_size ; /* medium packet size */ + int max_pkt_size ; /* max packet size */ +}; +SLIST_HEAD(dn_flow_set_head, dn_flow_set); + +/* + * Pipe descriptor. Contains global parameters, delay-line queue, + * and the flow_set used for fixed-rate queues. + * + * For WF2Q+ support it also has 3 heaps holding dn_flow_queue: + * not_eligible_heap, for queues whose start time is higher + * than the virtual time. Sorted by start time. + * scheduler_heap, for queues eligible for scheduling. Sorted by + * finish time. + * idle_heap, all flows that are idle and can be removed. We + * do that on each tick so we do not slow down too much + * operations during forwarding. + * + */ +struct dn_pipe { /* a pipe */ + SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */ + + int pipe_nr ; /* number */ + int bandwidth; /* really, bytes/tick. */ + int delay ; /* really, ticks */ + + struct mbuf *head, *tail ; /* packets in delay line */ + + /* WF2Q+ */ + struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ + struct dn_heap not_eligible_heap; /* top extract- key Start time */ + struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ + + dn_key V ; /* virtual time */ + int sum; /* sum of weights of all active sessions */ + + /* Same as in dn_flow_queue, numbytes can become large */ + int64_t numbytes; /* bits I can transmit (more or less). */ + + dn_key sched_time ; /* time pipe was scheduled in ready_heap */ + + /* + * When the tx clock come from an interface (if_name[0] != '\0'), its name + * is stored below, whereas the ifp is filled when the rule is configured. + */ + char if_name[IFNAMSIZ]; + struct ifnet *ifp ; + int ready ; /* set if ifp != NULL and we got a signal from it */ + + struct dn_flow_set fs ; /* used with fixed-rate flows */ + + /* fields to simulate a delay profile */ + +#define ED_MAX_NAME_LEN 32 + char name[ED_MAX_NAME_LEN]; + int loss_level; + int samples_no; + int *samples; +}; + +/* dn_pipe_max is used to pass pipe configuration from userland onto + * kernel space and back + */ +#define ED_MAX_SAMPLES_NO 1024 +struct dn_pipe_max { + struct dn_pipe pipe; + int samples[ED_MAX_SAMPLES_NO]; +}; + +SLIST_HEAD(dn_pipe_head, dn_pipe); + +#ifdef _KERNEL +typedef int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */ +typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ +typedef int ip_dn_io_t(struct mbuf **m, int dir, struct ip_fw_args *fwa); +extern ip_dn_ctl_t *ip_dn_ctl_ptr; +extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; +extern ip_dn_io_t *ip_dn_io_ptr; +#define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) + +/* + * Return the IPFW rule associated with the dummynet tag; if any. + * Make sure that the dummynet tag is not reused by lower layers. + */ +static __inline struct ip_fw * +ip_dn_claim_rule(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + if (mtag != NULL) { + mtag->m_tag_id = PACKET_TAG_NONE; + return (((struct dn_pkt_tag *)(mtag+1))->rule); + } else + return (NULL); +} +#endif +#endif /* _IP_DUMMYNET_H */ diff --git a/dummynet/include/netinet/ip_fw.h b/dummynet/include/netinet/ip_fw.h new file mode 100644 index 0000000..bd4d3f9 --- /dev/null +++ b/dummynet/include/netinet/ip_fw.h @@ -0,0 +1,680 @@ +/*- + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.110.2.6 2008/10/14 08:03:58 rwatson Exp $ + */ + +#ifndef _IPFW2_H +#define _IPFW2_H + +/* + * The default rule number. By the design of ip_fw, the default rule + * is the last one, so its number can also serve as the highest number + * allowed for a rule. The ip_fw code relies on both meanings of this + * constant. + */ +#define IPFW_DEFAULT_RULE 65535 + +/* + * The kernel representation of ipfw rules is made of a list of + * 'instructions' (for all practical purposes equivalent to BPF + * instructions), which specify which fields of the packet + * (or its metadata) should be analysed. + * + * Each instruction is stored in a structure which begins with + * "ipfw_insn", and can contain extra fields depending on the + * instruction type (listed below). + * Note that the code is written so that individual instructions + * have a size which is a multiple of 32 bits. This means that, if + * such structures contain pointers or other 64-bit entities, + * (there is just one instance now) they may end up unaligned on + * 64-bit architectures, so the must be handled with care. + * + * "enum ipfw_opcodes" are the opcodes supported. We can have up + * to 256 different opcodes. When adding new opcodes, they should + * be appended to the end of the opcode list before O_LAST_OPCODE, + * this will prevent the ABI from being broken, otherwise users + * will have to recompile ipfw(8) when they update the kernel. + */ + +enum ipfw_opcodes { /* arguments (4 byte each) */ + O_NOP, + + O_IP_SRC, /* u32 = IP */ + O_IP_SRC_MASK, /* ip = IP/mask */ + O_IP_SRC_ME, /* none */ + O_IP_SRC_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_DST, /* u32 = IP */ + O_IP_DST_MASK, /* ip = IP/mask */ + O_IP_DST_ME, /* none */ + O_IP_DST_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_SRCPORT, /* (n)port list:mask 4 byte ea */ + O_IP_DSTPORT, /* (n)port list:mask 4 byte ea */ + O_PROTO, /* arg1=protocol */ + + O_MACADDR2, /* 2 mac addr:mask */ + O_MAC_TYPE, /* same as srcport */ + + O_LAYER2, /* none */ + O_IN, /* none */ + O_FRAG, /* none */ + + O_RECV, /* none */ + O_XMIT, /* none */ + O_VIA, /* none */ + + O_IPOPT, /* arg1 = 2*u8 bitmap */ + O_IPLEN, /* arg1 = len */ + O_IPID, /* arg1 = id */ + + O_IPTOS, /* arg1 = id */ + O_IPPRECEDENCE, /* arg1 = precedence << 5 */ + O_IPTTL, /* arg1 = TTL */ + + O_IPVER, /* arg1 = version */ + O_UID, /* u32 = id */ + O_GID, /* u32 = id */ + O_ESTAB, /* none (tcp established) */ + O_TCPFLAGS, /* arg1 = 2*u8 bitmap */ + O_TCPWIN, /* arg1 = desired win */ + O_TCPSEQ, /* u32 = desired seq. */ + O_TCPACK, /* u32 = desired seq. */ + O_ICMPTYPE, /* u32 = icmp bitmap */ + O_TCPOPTS, /* arg1 = 2*u8 bitmap */ + + O_VERREVPATH, /* none */ + O_VERSRCREACH, /* none */ + + O_PROBE_STATE, /* none */ + O_KEEP_STATE, /* none */ + O_LIMIT, /* ipfw_insn_limit */ + O_LIMIT_PARENT, /* dyn_type, not an opcode. */ + + /* + * These are really 'actions'. + */ + + O_LOG, /* ipfw_insn_log */ + O_PROB, /* u32 = match probability */ + + O_CHECK_STATE, /* none */ + O_ACCEPT, /* none */ + O_DENY, /* none */ + O_REJECT, /* arg1=icmp arg (same as deny) */ + O_COUNT, /* none */ + O_SKIPTO, /* arg1=next rule number */ + O_PIPE, /* arg1=pipe number */ + O_QUEUE, /* arg1=queue number */ + O_DIVERT, /* arg1=port number */ + O_TEE, /* arg1=port number */ + O_FORWARD_IP, /* fwd sockaddr */ + O_FORWARD_MAC, /* fwd mac */ + O_NAT, /* nope */ + O_REASS, /* none */ + + /* + * More opcodes. + */ + O_IPSEC, /* has ipsec history */ + O_IP_SRC_LOOKUP, /* arg1=table number, u32=value */ + O_IP_DST_LOOKUP, /* arg1=table number, u32=value */ + O_ANTISPOOF, /* none */ + O_JAIL, /* u32 = id */ + O_ALTQ, /* u32 = altq classif. qid */ + O_DIVERTED, /* arg1=bitmap (1:loop, 2:out) */ + O_TCPDATALEN, /* arg1 = tcp data len */ + O_IP6_SRC, /* address without mask */ + O_IP6_SRC_ME, /* my addresses */ + O_IP6_SRC_MASK, /* address with the mask */ + O_IP6_DST, + O_IP6_DST_ME, + O_IP6_DST_MASK, + O_FLOW6ID, /* for flow id tag in the ipv6 pkt */ + O_ICMP6TYPE, /* icmp6 packet type filtering */ + O_EXT_HDR, /* filtering for ipv6 extension header */ + O_IP6, + + /* + * actions for ng_ipfw + */ + O_NETGRAPH, /* send to ng_ipfw */ + O_NGTEE, /* copy to ng_ipfw */ + + O_IP4, + + O_UNREACH6, /* arg1=icmpv6 code arg (deny) */ + + O_TAG, /* arg1=tag number */ + O_TAGGED, /* arg1=tag number */ + + O_SETFIB, /* arg1=FIB number */ + O_FIB, /* arg1=FIB desired fib number */ + + O_LAST_OPCODE /* not an opcode! */ +}; + +/* + * The extension header are filtered only for presence using a bit + * vector with a flag for each header. + */ +#define EXT_FRAGMENT 0x1 +#define EXT_HOPOPTS 0x2 +#define EXT_ROUTING 0x4 +#define EXT_AH 0x8 +#define EXT_ESP 0x10 +#define EXT_DSTOPTS 0x20 +#define EXT_RTHDR0 0x40 +#define EXT_RTHDR2 0x80 + +/* + * Template for instructions. + * + * ipfw_insn is used for all instructions which require no operands, + * a single 16-bit value (arg1), or a couple of 8-bit values. + * + * For other instructions which require different/larger arguments + * we have derived structures, ipfw_insn_*. + * + * The size of the instruction (in 32-bit words) is in the low + * 6 bits of "len". The 2 remaining bits are used to implement + * NOT and OR on individual instructions. Given a type, you can + * compute the length to be put in "len" using F_INSN_SIZE(t) + * + * F_NOT negates the match result of the instruction. + * + * F_OR is used to build or blocks. By default, instructions + * are evaluated as part of a logical AND. An "or" block + * { X or Y or Z } contains F_OR set in all but the last + * instruction of the block. A match will cause the code + * to skip past the last instruction of the block. + * + * NOTA BENE: in a couple of places we assume that + * sizeof(ipfw_insn) == sizeof(u_int32_t) + * this needs to be fixed. + * + */ +typedef struct _ipfw_insn { /* template for instructions */ + enum ipfw_opcodes opcode:8; + u_int8_t len; /* number of 32-bit words */ +#define F_NOT 0x80 +#define F_OR 0x40 +#define F_LEN_MASK 0x3f +#define F_LEN(cmd) ((cmd)->len & F_LEN_MASK) + + u_int16_t arg1; +} ipfw_insn; + +/* + * The F_INSN_SIZE(type) computes the size, in 4-byte words, of + * a given type. + */ +#define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t)) + +#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ + +/* + * This is used to store an array of 16-bit entries (ports etc.) + */ +typedef struct _ipfw_insn_u16 { + ipfw_insn o; + u_int16_t ports[2]; /* there may be more */ +} ipfw_insn_u16; + +/* + * This is used to store an array of 32-bit entries + * (uid, single IPv4 addresses etc.) + */ +typedef struct _ipfw_insn_u32 { + ipfw_insn o; + u_int32_t d[1]; /* one or more */ +} ipfw_insn_u32; + +/* + * This is used to store IP addr-mask pairs. + */ +typedef struct _ipfw_insn_ip { + ipfw_insn o; + struct in_addr addr; + struct in_addr mask; +} ipfw_insn_ip; + +/* + * This is used to forward to a given address (ip). + */ +typedef struct _ipfw_insn_sa { + ipfw_insn o; + struct sockaddr_in sa; +} ipfw_insn_sa; + +/* + * This is used for MAC addr-mask pairs. + */ +typedef struct _ipfw_insn_mac { + ipfw_insn o; + u_char addr[12]; /* dst[6] + src[6] */ + u_char mask[12]; /* dst[6] + src[6] */ +} ipfw_insn_mac; + +/* + * This is used for interface match rules (recv xx, xmit xx). + */ +typedef struct _ipfw_insn_if { + ipfw_insn o; + union { + struct in_addr ip; + int glob; + } p; + char name[IFNAMSIZ]; +} ipfw_insn_if; + +/* + * This is used for storing an altq queue id number. + */ +typedef struct _ipfw_insn_altq { + ipfw_insn o; + u_int32_t qid; +} ipfw_insn_altq; + +/* + * This is used for limit rules. + */ +typedef struct _ipfw_insn_limit { + ipfw_insn o; + u_int8_t _pad; + u_int8_t limit_mask; /* combination of DYN_* below */ +#define DYN_SRC_ADDR 0x1 +#define DYN_SRC_PORT 0x2 +#define DYN_DST_ADDR 0x4 +#define DYN_DST_PORT 0x8 + + u_int16_t conn_limit; +} ipfw_insn_limit; + +/* + * This is used for log instructions. + */ +typedef struct _ipfw_insn_log { + ipfw_insn o; + u_int32_t max_log; /* how many do we log -- 0 = all */ + u_int32_t log_left; /* how many left to log */ +} ipfw_insn_log; + +/* + * Data structures required by both ipfw(8) and ipfw(4) but not part of the + * management API are protected by IPFW_INTERNAL. + */ +#ifdef IPFW_INTERNAL +/* Server pool support (LSNAT). */ +struct cfg_spool { + LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */ + struct in_addr addr; + u_short port; +}; +#endif + +/* Redirect modes id. */ +#define REDIR_ADDR 0x01 +#define REDIR_PORT 0x02 +#define REDIR_PROTO 0x04 + +#ifdef IPFW_INTERNAL +/* Nat redirect configuration. */ +struct cfg_redir { + LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */ + u_int16_t mode; /* type of redirect mode */ + struct in_addr laddr; /* local ip address */ + struct in_addr paddr; /* public ip address */ + struct in_addr raddr; /* remote ip address */ + u_short lport; /* local port */ + u_short pport; /* public port */ + u_short rport; /* remote port */ + u_short pport_cnt; /* number of public ports */ + u_short rport_cnt; /* number of remote ports */ + int proto; /* protocol: tcp/udp */ + struct alias_link **alink; + /* num of entry in spool chain */ + u_int16_t spool_cnt; + /* chain of spool instances */ + LIST_HEAD(spool_chain, cfg_spool) spool_chain; +}; +#endif + +#define NAT_BUF_LEN 1024 + +#ifdef IPFW_INTERNAL +/* Nat configuration data struct. */ +struct cfg_nat { + /* chain of nat instances */ + LIST_ENTRY(cfg_nat) _next; + int id; /* nat id */ + struct in_addr ip; /* nat ip address */ + char if_name[IF_NAMESIZE]; /* interface name */ + int mode; /* aliasing mode */ + struct libalias *lib; /* libalias instance */ + /* number of entry in spool chain */ + int redir_cnt; + /* chain of redir instances */ + LIST_HEAD(redir_chain, cfg_redir) redir_chain; +}; +#endif + +#define SOF_NAT sizeof(struct cfg_nat) +#define SOF_REDIR sizeof(struct cfg_redir) +#define SOF_SPOOL sizeof(struct cfg_spool) + +/* Nat command. */ +typedef struct _ipfw_insn_nat { + ipfw_insn o; + struct cfg_nat *nat; +} ipfw_insn_nat; + +/* Apply ipv6 mask on ipv6 addr */ +#define APPLY_MASK(addr,mask) \ + (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \ + (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \ + (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \ + (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3]; + +/* Structure for ipv6 */ +typedef struct _ipfw_insn_ip6 { + ipfw_insn o; + struct in6_addr addr6; + struct in6_addr mask6; +} ipfw_insn_ip6; + +/* Used to support icmp6 types */ +typedef struct _ipfw_insn_icmp6 { + ipfw_insn o; + uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h + * define ICMP6_MAXTYPE + * as follows: n = ICMP6_MAXTYPE/32 + 1 + * Actually is 203 + */ +} ipfw_insn_icmp6; + +/* + * Here we have the structure representing an ipfw rule. + * + * It starts with a general area (with link fields and counters) + * followed by an array of one or more instructions, which the code + * accesses as an array of 32-bit values. + * + * Given a rule pointer r: + * + * r->cmd is the start of the first instruction. + * ACTION_PTR(r) is the start of the first action (things to do + * once a rule matched). + * + * When assembling instruction, remember the following: + * + * + if a rule has a "keep-state" (or "limit") option, then the + * first instruction (at r->cmd) MUST BE an O_PROBE_STATE + * + if a rule has a "log" option, then the first action + * (at ACTION_PTR(r)) MUST be O_LOG + * + if a rule has an "altq" option, it comes after "log" + * + if a rule has an O_TAG option, it comes after "log" and "altq" + * + * NOTE: we use a simple linked list of rules because we never need + * to delete a rule without scanning the list. We do not use + * queue(3) macros for portability and readability. + */ + +struct ip_fw { + struct ip_fw *next; /* linked list of rules */ + struct ip_fw *next_rule; /* ptr to next [skipto] rule */ + /* 'next_rule' is used to pass up 'set_disable' status */ + + u_int16_t act_ofs; /* offset of action in 32-bit units */ + u_int16_t cmd_len; /* # of 32-bit words in cmd */ + u_int16_t rulenum; /* rule number */ + u_int8_t set; /* rule set (0..31) */ +#define RESVD_SET 31 /* set for default and persistent rules */ + u_int8_t _pad; /* padding */ + + /* These fields are present in all rules. */ + u_int64_t pcnt; /* Packet counter */ + u_int64_t bcnt; /* Byte counter */ + u_int32_t timestamp; /* tv_sec of last match */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +#define ACTION_PTR(rule) \ + (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) + +#define RULESIZE(rule) (sizeof(struct ip_fw) + \ + ((struct ip_fw *)(rule))->cmd_len * 4 - 4) + +/* + * This structure is used as a flow mask and a flow id for various + * parts of the code. + */ +struct ipfw_flow_id { + u_int32_t dst_ip; + u_int32_t src_ip; + u_int16_t dst_port; + u_int16_t src_port; + u_int8_t fib; + u_int8_t proto; + u_int8_t flags; /* protocol-specific flags */ + uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */ + struct in6_addr dst_ip6; /* could also store MAC addr! */ + struct in6_addr src_ip6; + u_int32_t flow_id6; + u_int32_t frag_id6; +}; + +#define IS_IP6_FLOW_ID(id) ((id)->addr_type == 6) + +/* + * Dynamic ipfw rule. + */ +typedef struct _ipfw_dyn_rule ipfw_dyn_rule; + +struct _ipfw_dyn_rule { + ipfw_dyn_rule *next; /* linked list of rules. */ + struct ip_fw *rule; /* pointer to rule */ + /* 'rule' is used to pass up the rule number (from the parent) */ + + ipfw_dyn_rule *parent; /* pointer to parent rule */ + u_int64_t pcnt; /* packet match counter */ + u_int64_t bcnt; /* byte match counter */ + struct ipfw_flow_id id; /* (masked) flow id */ + u_int32_t expire; /* expire time */ + u_int32_t bucket; /* which bucket in hash table */ + u_int32_t state; /* state of this rule (typically a + * combination of TCP flags) + */ + u_int32_t ack_fwd; /* most recent ACKs in forward */ + u_int32_t ack_rev; /* and reverse directions (used */ + /* to generate keepalives) */ + u_int16_t dyn_type; /* rule type */ + u_int16_t count; /* refcount */ +}; + +/* + * Definitions for IP option names. + */ +#define IP_FW_IPOPT_LSRR 0x01 +#define IP_FW_IPOPT_SSRR 0x02 +#define IP_FW_IPOPT_RR 0x04 +#define IP_FW_IPOPT_TS 0x08 + +/* + * Definitions for TCP option names. + */ +#define IP_FW_TCPOPT_MSS 0x01 +#define IP_FW_TCPOPT_WINDOW 0x02 +#define IP_FW_TCPOPT_SACK 0x04 +#define IP_FW_TCPOPT_TS 0x08 +#define IP_FW_TCPOPT_CC 0x10 + +#define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ +#define ICMP6_UNREACH_RST 0x100 /* fake ICMPv6 code (send a TCP RST) */ + +/* + * These are used for lookup tables. + */ +typedef struct _ipfw_table_entry { + in_addr_t addr; /* network address */ + u_int32_t value; /* value */ + u_int16_t tbl; /* table number */ + u_int8_t masklen; /* mask length */ +} ipfw_table_entry; + +typedef struct _ipfw_table { + u_int32_t size; /* size of entries in bytes */ + u_int32_t cnt; /* # of entries */ + u_int16_t tbl; /* table number */ + ipfw_table_entry ent[0]; /* entries */ +} ipfw_table; + +#define IP_FW_TABLEARG 65535 + +/* + * Main firewall chains definitions and global var's definitions. + */ +#ifdef _KERNEL + +/* Return values from ipfw_chk() */ +enum { + IP_FW_PASS = 0, + IP_FW_DENY, + IP_FW_DIVERT, + IP_FW_TEE, + IP_FW_DUMMYNET, + IP_FW_NETGRAPH, + IP_FW_NGTEE, + IP_FW_NAT, + IP_FW_REASS, +}; + +/* flags for divert mtag */ +#define IP_FW_DIVERT_LOOPBACK_FLAG 0x00080000 +#define IP_FW_DIVERT_OUTPUT_FLAG 0x00100000 + +/* + * Structure for collecting parameters to dummynet for ip6_output forwarding + */ +struct _ip6dn_args { + struct ip6_pktopts *opt_or; + struct route_in6 ro_or; + int flags_or; + struct ip6_moptions *im6o_or; + struct ifnet *origifp_or; + struct ifnet *ifp_or; + struct sockaddr_in6 dst_or; + u_long mtu_or; + struct route_in6 ro_pmtu_or; +}; + +/* + * Arguments for calling ipfw_chk() and dummynet_io(). We put them + * all into a structure because this way it is easier and more + * efficient to pass variables around and extend the interface. + */ +struct ip_fw_args { + struct mbuf *m; /* the mbuf chain */ + struct ifnet *oif; /* output interface */ + struct sockaddr_in *next_hop; /* forward address */ + struct ip_fw *rule; /* matching rule */ + struct ether_header *eh; /* for bridged packets */ + + struct ipfw_flow_id f_id; /* grabbed from IP header */ + u_int32_t cookie; /* a cookie depending on rule action */ + struct inpcb *inp; + + struct _ip6dn_args dummypar; /* dummynet->ip6_output */ + struct sockaddr_in hopstore; /* store here if cannot use a pointer */ +}; + +/* + * Function definitions. + */ + +/* Firewall hooks */ +struct sockopt; +struct dn_flow_set; + +int ipfw_check_in(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); +int ipfw_check_out(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); + +int ipfw_chk(struct ip_fw_args *); + +int ipfw_init(void); +void ipfw_destroy(void); + +typedef int ip_fw_ctl_t(struct sockopt *); +extern ip_fw_ctl_t *ip_fw_ctl_ptr; +extern int fw_one_pass; +extern int fw_enable; +#ifdef INET6 +extern int fw6_enable; +#endif + +/* For kernel ipfw_ether and ipfw_bridge. */ +typedef int ip_fw_chk_t(struct ip_fw_args *args); +extern ip_fw_chk_t *ip_fw_chk_ptr; +#define IPFW_LOADED (ip_fw_chk_ptr != NULL) + +#ifdef IPFW_INTERNAL + +#define IPFW_TABLES_MAX 128 +struct ip_fw_chain { + struct ip_fw *rules; /* list of rules */ + struct ip_fw *reap; /* list of rules to reap */ + LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ + struct radix_node_head *tables[IPFW_TABLES_MAX]; +#if defined( __linux__ ) || defined( _WIN32 ) + spinlock_t rwmtx; +#else + struct rwlock rwmtx; +#endif /* !__linux__ */ +}; +#define IPFW_LOCK_INIT(_chain) \ + rw_init(&(_chain)->rwmtx, "IPFW static rules") +#define IPFW_LOCK_DESTROY(_chain) rw_destroy(&(_chain)->rwmtx) +#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) + +#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) +#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) +#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) +#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) + +#define LOOKUP_NAT(l, i, p) do { \ + LIST_FOREACH((p), &(l.nat), _next) { \ + if ((p)->id == (i)) { \ + break; \ + } \ + } \ + } while (0) + +typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); +typedef int ipfw_nat_cfg_t(struct sockopt *); +#endif + +#endif /* _KERNEL */ +#endif /* _IPFW2_H */ diff --git a/dummynet/include/netinet/ip_icmp.h b/dummynet/include/netinet/ip_icmp.h new file mode 100644 index 0000000..5c7b851 --- /dev/null +++ b/dummynet/include/netinet/ip_icmp.h @@ -0,0 +1,17 @@ +/* + * additional define not present in linux + * should go in glue.h + */ +#ifndef _NETINET_IP_ICMP_H_ +#define _NETINET_IP_ICMP_H_ + +#define ICMP_MAXTYPE 40 /* defined as 18 in compat.h */ +#define ICMP_ROUTERSOLICIT 10 /* router solicitation */ +#define ICMP_TSTAMP 13 /* timestamp request */ +#define ICMP_IREQ 15 /* information request */ +#define ICMP_MASKREQ 17 /* address mask request */ +#define ICMP_UNREACH_HOST 1 /* bad host */ + +#define ICMP_UNREACH 3 /* dest unreachable, codes: */ + +#endif /* _NETINET_IP_ICMP_H_ */ diff --git a/dummynet/include/netinet/tcp.h b/dummynet/include/netinet/tcp.h new file mode 100644 index 0000000..168d971 --- /dev/null +++ b/dummynet/include/netinet/tcp.h @@ -0,0 +1,228 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/netinet/tcp.h,v 1.40.2.2 2008/07/31 06:10:25 kmacy Exp $ + */ + +#ifndef _NETINET_TCP_H_ +#define _NETINET_TCP_H_ + +#include + +#define __BSD_VISIBLE 1 + +#if __BSD_VISIBLE + +typedef u_int32_t tcp_seq; + +#define tcp6_seq tcp_seq /* for KAME src sync over BSD*'s */ +#define tcp6hdr tcphdr /* for KAME src sync over BSD*'s */ + +/* + * TCP header. + * Per RFC 793, September, 1981. + */ +struct tcphdr { + u_short th_sport; /* source port */ + u_short th_dport; /* destination port */ + tcp_seq th_seq; /* sequence number */ + tcp_seq th_ack; /* acknowledgement number */ +#if BYTE_ORDER == LITTLE_ENDIAN + u_int th_x2:4, /* (unused) */ + th_off:4; /* data offset */ +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_int th_off:4, /* data offset */ + th_x2:4; /* (unused) */ +#endif + u_char th_flags; +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#define TH_ECE 0x40 +#define TH_CWR 0x80 +#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR) +#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" + + u_short th_win; /* window */ + u_short th_sum; /* checksum */ + u_short th_urp; /* urgent pointer */ +}; + +#define TCPOPT_EOL 0 +#define TCPOLEN_EOL 1 +#define TCPOPT_PAD 0 /* padding after EOL */ +#define TCPOLEN_PAD 1 +#define TCPOPT_NOP 1 +#define TCPOLEN_NOP 1 +#define TCPOPT_MAXSEG 2 +#define TCPOLEN_MAXSEG 4 +#define TCPOPT_WINDOW 3 +#define TCPOLEN_WINDOW 3 +#define TCPOPT_SACK_PERMITTED 4 +#define TCPOLEN_SACK_PERMITTED 2 +#define TCPOPT_SACK 5 +#define TCPOLEN_SACKHDR 2 +#define TCPOLEN_SACK 8 /* 2*sizeof(tcp_seq) */ +#define TCPOPT_TIMESTAMP 8 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ +#define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ +#define TCPOLEN_SIGNATURE 18 + +/* Miscellaneous constants */ +#define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ +#define TCP_MAX_SACK 4 /* MAX # SACKs sent in any segment */ + + +/* + * Default maximum segment size for TCP. + * With an IP MTU of 576, this is 536, + * but 512 is probably more convenient. + * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). + */ +#define TCP_MSS 512 +/* + * TCP_MINMSS is defined to be 216 which is fine for the smallest + * link MTU (256 bytes, AX.25 packet radio) in the Internet. + * However it is very unlikely to come across such low MTU interfaces + * these days (anno dato 2003). + * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments. + * Setting this to "0" disables the minmss check. + */ +#define TCP_MINMSS 216 + +/* + * Default maximum segment size for TCP6. + * With an IP6 MSS of 1280, this is 1220, + * but 1024 is probably more convenient. (xxx kazu in doubt) + * This should be defined as MIN(1024, IP6_MSS - sizeof (struct tcpip6hdr)) + */ +#define TCP6_MSS 1024 + +#define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ +#define TTCP_CLIENT_SND_WND 4096 /* dflt send window for T/TCP client */ + +#define TCP_MAX_WINSHIFT 14 /* maximum window shift */ + +#define TCP_MAXBURST 4 /* maximum segments in a burst */ + +#define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */ +#define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr)) + /* max space left for options */ +#endif /* __BSD_VISIBLE */ + +/* + * User-settable options (used with setsockopt). + */ +#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */ +#if __BSD_VISIBLE +#define TCP_MAXSEG 0x02 /* set maximum segment size */ +#define TCP_NOPUSH 0x04 /* don't push last block of write */ +#define TCP_NOOPT 0x08 /* don't use TCP options */ +#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */ +#define TCP_INFO 0x20 /* retrieve tcp_info structure */ +#define TCP_CONGESTION 0x40 /* get/set congestion control algorithm */ + +#define TCP_CA_NAME_MAX 16 /* max congestion control name length */ + +#define TCPI_OPT_TIMESTAMPS 0x01 +#define TCPI_OPT_SACK 0x02 +#define TCPI_OPT_WSCALE 0x04 +#define TCPI_OPT_ECN 0x08 +#define TCPI_OPT_TOE 0x10 + +/* + * The TCP_INFO socket option comes from the Linux 2.6 TCP API, and permits + * the caller to query certain information about the state of a TCP + * connection. We provide an overlapping set of fields with the Linux + * implementation, but since this is a fixed size structure, room has been + * left for growth. In order to maximize potential future compatibility with + * the Linux API, the same variable names and order have been adopted, and + * padding left to make room for omitted fields in case they are added later. + * + * XXX: This is currently an unstable ABI/API, in that it is expected to + * change. + */ +struct tcp_info { + u_int8_t tcpi_state; /* TCP FSM state. */ + u_int8_t __tcpi_ca_state; + u_int8_t __tcpi_retransmits; + u_int8_t __tcpi_probes; + u_int8_t __tcpi_backoff; + u_int8_t tcpi_options; /* Options enabled on conn. */ + u_int8_t tcpi_snd_wscale:4, /* RFC1323 send shift value. */ + tcpi_rcv_wscale:4; /* RFC1323 recv shift value. */ + + u_int32_t __tcpi_rto; + u_int32_t __tcpi_ato; + u_int32_t __tcpi_snd_mss; + u_int32_t __tcpi_rcv_mss; + + u_int32_t __tcpi_unacked; + u_int32_t __tcpi_sacked; + u_int32_t __tcpi_lost; + u_int32_t __tcpi_retrans; + u_int32_t __tcpi_fackets; + + /* Times; measurements in usecs. */ + u_int32_t __tcpi_last_data_sent; + u_int32_t __tcpi_last_ack_sent; /* Also unimpl. on Linux? */ + u_int32_t __tcpi_last_data_recv; + u_int32_t __tcpi_last_ack_recv; + + /* Metrics; variable units. */ + u_int32_t __tcpi_pmtu; + u_int32_t __tcpi_rcv_ssthresh; + u_int32_t tcpi_rtt; /* Smoothed RTT in usecs. */ + u_int32_t tcpi_rttvar; /* RTT variance in usecs. */ + u_int32_t tcpi_snd_ssthresh; /* Slow start threshold. */ + u_int32_t tcpi_snd_cwnd; /* Send congestion window. */ + u_int32_t __tcpi_advmss; + u_int32_t __tcpi_reordering; + + u_int32_t __tcpi_rcv_rtt; + u_int32_t tcpi_rcv_space; /* Advertised recv window. */ + + /* FreeBSD extensions to tcp_info. */ + u_int32_t tcpi_snd_wnd; /* Advertised send window. */ + u_int32_t tcpi_snd_bwnd; /* Bandwidth send window. */ + u_int32_t tcpi_snd_nxt; /* Next egress seqno */ + u_int32_t tcpi_rcv_nxt; /* Next ingress seqno */ + u_int32_t tcpi_toe_tid; /* HWTID for TOE endpoints */ + + /* Padding to grow without breaking ABI. */ + u_int32_t __tcpi_pad[29]; /* Padding. */ +}; +#endif + +#endif /* !_NETINET_TCP_H_ */ diff --git a/dummynet/include/netinet/tcp_var.h b/dummynet/include/netinet/tcp_var.h new file mode 100644 index 0000000..35196a2 --- /dev/null +++ b/dummynet/include/netinet/tcp_var.h @@ -0,0 +1,4 @@ +#ifndef _NETINET_TCP_VAR_H_ +#define _NETINET_TCP_VAR_H_ +#include +#endif /* !_NETINET_TCP_VAR_H_ */ diff --git a/dummynet/include/netinet/udp.h b/dummynet/include/netinet/udp.h new file mode 100644 index 0000000..aed3099 --- /dev/null +++ b/dummynet/include/netinet/udp.h @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)udp.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/netinet/udp.h,v 1.10 2007/02/20 10:13:11 rwatson Exp $ + */ + +#ifndef _NETINET_UDP_H_ +#define _NETINET_UDP_H_ + +/* + * UDP protocol header. + * Per RFC 768, September, 1981. + */ +struct udphdr { + u_short uh_sport; /* source port */ + u_short uh_dport; /* destination port */ + u_short uh_ulen; /* udp length */ + u_short uh_sum; /* udp checksum */ +}; + +#endif diff --git a/dummynet/include/sys/cdefs.h b/dummynet/include/sys/cdefs.h new file mode 100644 index 0000000..b95b4b7 --- /dev/null +++ b/dummynet/include/sys/cdefs.h @@ -0,0 +1,33 @@ +#ifndef _CDEFS_H_ +#define _CDEFS_H_ + +/* + * various compiler macros and common functions + */ + +#ifndef __unused +#define __unused __attribute__ ((__unused__)) +#endif + +#ifndef __packed +#define __packed __attribute__ ((__packed__)) +#endif + +#ifndef __aligned +#define __aligned(x) __attribute__((__aligned__(x))) +#endif + +/* defined as assert */ +void panic(const char *fmt, ...); + +#define KASSERT(exp,msg) do { \ + if (__predict_false(!(exp))) \ + panic msg; \ +} while (0) + +/* don't bother to optimize */ +#ifndef __predict_false +#define __predict_false(x) (x) /* __builtin_expect((exp), 0) */ +#endif + +#endif /* !_CDEFS_H_ */ diff --git a/dummynet/include/sys/kernel.h b/dummynet/include/sys/kernel.h new file mode 100644 index 0000000..61b3bec --- /dev/null +++ b/dummynet/include/sys/kernel.h @@ -0,0 +1,20 @@ +/* + * from freebsd's kernel.h + */ +#ifndef _SYS_KERNEL_H_ +#define _SYS_KERNEL_H_ + +#define SYSINIT(a, b, c, d, e) \ + void *dummy_ ## d = d + +/* + * Some enumerated orders; "ANY" sorts last. + */ +enum sysinit_elem_order { + SI_ORDER_FIRST = 0x0000000, /* first*/ + SI_ORDER_SECOND = 0x0000001, /* second*/ + SI_ORDER_THIRD = 0x0000002, /* third*/ + SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ + SI_ORDER_ANY = 0xfffffff /* last*/ +}; +#endif diff --git a/dummynet/include/sys/malloc.h b/dummynet/include/sys/malloc.h new file mode 100644 index 0000000..d103801 --- /dev/null +++ b/dummynet/include/sys/malloc.h @@ -0,0 +1,48 @@ +#ifndef _SYS_MALLOC_H_ +#define _SYS_MALLOC_H_ + +/* + * No matter what, try to get clear memory and be non-blocking. + * XXX check if 2.4 has a native way to zero memory, + * XXX obey to the flags (M_NOWAIT <-> GPF_ATOMIC, M_WAIT <-> GPF_KERNEL) + */ +#ifndef _WIN32 /* this is the linux version */ + +#ifndef LINUX_24 +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) +#define malloc(_size, type, flags) \ + kmalloc(_size, GFP_ATOMIC | __GFP_ZERO) +#else /* LINUX < 2.6.22 and LINUX_24 */ +/* linux 2.6.22 does not zero allocated memory */ +#define malloc(_size, type, flags) \ + ({ int _s = _size; \ + void *_ret = kmalloc(_s, GFP_ATOMIC); \ + if (_ret) memset(_ret, 0, _s); \ + (_ret); \ + }) +#endif /* !LINUX_24 */ +#endif /* LINUX < 2.6.22 */ + +#define calloc(_n, _s) malloc((_n * _s), NULL, GFP_ATOMIC | __GFP_ZERO) +#define free(_var, type) kfree(_var) + +#else /* _WIN32, the windows version */ + +/* + * ntddk.h uses win_malloc() and MmFreeContiguousMemory(). + * wipfw uses + * ExAllocatePoolWithTag(, pool, len, tag) + * ExFreePoolWithTag(ptr, tag) + */ +#define malloc(_size, _type, _flags) my_alloc(_size) + +void *my_alloc(int _size); +/* the 'tag' version does not work without -Gz in the linker */ +#define free(_var, type) ExFreePool(_var) +//#define free(_var, type) ExFreePoolWithTag(_var, 'wfpi') + +#endif /* _WIN32 */ + +#define M_NOWAIT 0x0001 /* do not block */ +#define M_ZERO 0x0100 /* bzero the allocation */ +#endif /* _SYS_MALLOC_H_ */ diff --git a/dummynet/include/sys/mbuf.h b/dummynet/include/sys/mbuf.h new file mode 100644 index 0000000..e4e7591 --- /dev/null +++ b/dummynet/include/sys/mbuf.h @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2009 Luigi Rizzo, Universita` di Pisa + * + * BSD copyright. + * + * A simple compatibility interface to map mbufs onto sk_buff + */ + +#ifndef _SYS_MBUF_H_ +#define _SYS_MBUF_H_ + +#include /* we use free() */ +/* hopefully queue.h is already included by someone else */ +#include +#ifdef _KERNEL + +/* bzero not present on linux, but this should go in glue.h */ +#define bzero(s, n) memset(s, 0, n) + +/* + * We implement a very simplified UMA allocator where the backend + * is simply malloc, and uma_zone only stores the length of the components. + */ +typedef int uma_zone_t; /* the zone size */ + +#define uma_zcreate(name, len, _3, _4, _5, _6, _7, _8) (len) + + +#define uma_zfree(zone, item) free(item, M_IPFW) +#define uma_zalloc(zone, flags) malloc(zone, M_IPFW, flags) +#define uma_zdestroy(zone) do {} while (0) + +/*- + * Macros for type conversion: + * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. + */ +#define mtod(m, t) ((t)((m)->m_data)) + +#endif /* _KERNEL */ + +/* + * Packet tag structure (see below for details). + */ +struct m_tag { + SLIST_ENTRY(m_tag) m_tag_link; /* List of packet tags */ + u_int16_t m_tag_id; /* Tag ID */ + u_int16_t m_tag_len; /* Length of data */ + u_int32_t m_tag_cookie; /* ABI/Module ID */ + void (*m_tag_free)(struct m_tag *); +}; + +#if defined(__linux__) || defined( _WIN32 ) + +/* + * Auxiliary structure to store values from the sk_buf. + * Note that we should not alter the sk_buff, and if we do + * so make sure to keep the values in sync between the mbuf + * and the sk_buff (especially m_len and m_pkthdr.len). + */ + +struct mbuf { + struct mbuf *m_next; + struct mbuf *m_nextpkt; + void *m_data; + int m_len; /* length in this mbuf */ + int m_flags; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + struct nf_info *queue_entry; +#else + struct nf_queue_entry *queue_entry; +#endif + struct sk_buff *m_skb; + struct { + struct net_device *rcvif; + int len; /* total packet len */ + SLIST_HEAD (packet_tags, m_tag) tags; + } m_pkthdr; +}; + +#define M_SKIP_FIREWALL 0x01 /* skip firewall processing */ +#define M_BCAST 0x02 /* send/received as link-level broadcast */ +#define M_MCAST 0x04 /* send/received as link-level multicast */ + +#define M_DONTWAIT M_NOWAIT /* should not be here... */ + + +/* + * m_dup() is used in the TEE case, currently unsupported so we + * just return. + */ +static __inline struct mbuf *m_dup(struct mbuf __unused *m, int __unused n) +{ + return NULL; +}; + +#define MTAG_ABI_COMPAT 0 /* compatibility ABI */ +static __inline struct m_tag * +m_tag_find(struct mbuf __unused *m, int __unused type, struct m_tag __unused *start) +{ + return NULL; +}; + + +static __inline void +m_tag_prepend(struct mbuf *m, struct m_tag *t) +{ + SLIST_INSERT_HEAD(&m->m_pkthdr.tags, t, m_tag_link); +} + +/* + * Create an mtag of the given type + */ +static __inline struct m_tag * +m_tag_get(int type, int length, int wait) +{ + int l = length + sizeof(struct m_tag); + struct m_tag *m = malloc(l, 0, M_NOWAIT); + if (m) { + memset(m, 0, l); + m->m_tag_id = type; + m->m_tag_len = length; + } + return m; +}; + +static __inline struct m_tag * +m_tag_first(struct mbuf *m) +{ + return SLIST_FIRST(&m->m_pkthdr.tags); +}; + +static __inline void +m_tag_delete(struct mbuf *m, struct m_tag *t) +{ +}; + +static __inline struct m_tag * +m_tag_locate(struct mbuf *m, u_int32_t n, int x, struct m_tag *t) +{ + return NULL; +}; + +static __inline void +m_freem(struct mbuf *m) +{ +#if 0 + struct m_tag *t; + + while ( (t = SLIST_FIRST(&m->m_pkthdr.tags) ) ) { + SLIST_REMOVE_HEAD(&m->m_pkthdr.tags, m_tag_link); + free(t, 0); + } +#endif + free(m, 0); +}; + +/* we cannot pullup */ +#define m_pullup(__m, __i) (m) + +#define M_GETFIB(_m) 0 + +#endif /* !__linux__ */ + +/* + * Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise + * tags are expected to ``vanish'' when they pass through a network + * interface. For most interfaces this happens normally as the tags are + * reclaimed when the mbuf is free'd. However in some special cases + * reclaiming must be done manually. An example is packets that pass through + * the loopback interface. Also, one must be careful to do this when + * ``turning around'' packets (e.g., icmp_reflect). + * + * To mark a tag persistent bit-or this flag in when defining the tag id. + * The tag will then be treated as described above. + */ +#define MTAG_PERSISTENT 0x800 + +#define PACKET_TAG_NONE 0 /* Nadda */ + +/* Packet tags for use with PACKET_ABI_COMPAT. */ +#define PACKET_TAG_IPSEC_IN_DONE 1 /* IPsec applied, in */ +#define PACKET_TAG_IPSEC_OUT_DONE 2 /* IPsec applied, out */ +#define PACKET_TAG_IPSEC_IN_CRYPTO_DONE 3 /* NIC IPsec crypto done */ +#define PACKET_TAG_IPSEC_OUT_CRYPTO_NEEDED 4 /* NIC IPsec crypto req'ed */ +#define PACKET_TAG_IPSEC_IN_COULD_DO_CRYPTO 5 /* NIC notifies IPsec */ +#define PACKET_TAG_IPSEC_PENDING_TDB 6 /* Reminder to do IPsec */ +#define PACKET_TAG_BRIDGE 7 /* Bridge processing done */ +#define PACKET_TAG_GIF 8 /* GIF processing done */ +#define PACKET_TAG_GRE 9 /* GRE processing done */ +#define PACKET_TAG_IN_PACKET_CHECKSUM 10 /* NIC checksumming done */ +#define PACKET_TAG_ENCAP 11 /* Encap. processing */ +#define PACKET_TAG_IPSEC_SOCKET 12 /* IPSEC socket ref */ +#define PACKET_TAG_IPSEC_HISTORY 13 /* IPSEC history */ +#define PACKET_TAG_IPV6_INPUT 14 /* IPV6 input processing */ +#define PACKET_TAG_DUMMYNET 15 /* dummynet info */ +#define PACKET_TAG_DIVERT 17 /* divert info */ +#define PACKET_TAG_IPFORWARD 18 /* ipforward info */ +#define PACKET_TAG_MACLABEL (19 | MTAG_PERSISTENT) /* MAC label */ +#define PACKET_TAG_PF 21 /* PF + ALTQ information */ +#define PACKET_TAG_RTSOCKFAM 25 /* rtsock sa family */ +#define PACKET_TAG_IPOPTIONS 27 /* Saved IP options */ +#define PACKET_TAG_CARP 28 /* CARP info */ + +#endif /* !_SYS_MBUF_H_ */ diff --git a/dummynet/include/sys/module.h b/dummynet/include/sys/module.h new file mode 100644 index 0000000..5296517 --- /dev/null +++ b/dummynet/include/sys/module.h @@ -0,0 +1,42 @@ +/* + * trivial module support + */ +#ifndef _SYS_MODULE_H_ +#define _SYS_MODULE_H_ +typedef struct module *module_t; +typedef int (*modeventhand_t)(module_t, int /* modeventtype_t */, void *); + +typedef enum modeventtype { + MOD_LOAD, + MOD_UNLOAD, + MOD_SHUTDOWN, + MOD_QUIESCE +} modeventtype_t; + +typedef struct moduledata { + const char *name; /* module name */ + modeventhand_t evhand; /* event handler */ + void *priv; /* extra data */ +} moduledata_t; + +int my_mod_register(struct moduledata *mod, const char *name, int order); +/* + * Hook the module descriptor, md, into our list of things to do. + * We should in principle respect the order of loading. + * + * XXX use the gcc .init functions + */ +#define DECLARE_MODULE(a, md, c,d) \ + moduledata_t *moddesc_##a = &md; + +/* + * XXX MODULE_VERSION is define in linux too + */ +#define MODULE_DEPEND(a,b,c,d,e) +#if defined( __linux__ ) || defined( _WIN32 ) +#undef MODULE_VERSION +#define MODULE_VERSION(a,b) +#endif + +#endif /* _SYS_MODULE_H_ */ + diff --git a/dummynet/include/sys/param.h b/dummynet/include/sys/param.h new file mode 100644 index 0000000..f068998 --- /dev/null +++ b/dummynet/include/sys/param.h @@ -0,0 +1,11 @@ +#ifndef _SYS_PARAM_H_ +#define _SYS_PARAM_H_ + +/* + * number of additional groups + */ +#ifndef LINUX_24 +#define NGROUPS 16 +#endif + +#endif /* _SYS_PARAM_H_ */ diff --git a/dummynet/include/sys/queue.h b/dummynet/include/sys/queue.h new file mode 100644 index 0000000..8f06f17 --- /dev/null +++ b/dummynet/include/sys/queue.h @@ -0,0 +1,620 @@ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)queue.h 8.5 (Berkeley) 8/20/94 + * $FreeBSD: src/sys/sys/queue.h,v 1.68 2006/10/24 11:20:29 ru Exp $ + */ + +#ifndef _SYS_QUEUE_H_ +#define _SYS_QUEUE_H_ + +//#include + +/* + * This file defines four types of data structures: singly-linked lists, + * singly-linked tail queues, lists and tail queues. + * + * A singly-linked list is headed by a single forward pointer. The elements + * are singly linked for minimum space and pointer manipulation overhead at + * the expense of O(n) removal for arbitrary elements. New elements can be + * added to the list after an existing element or at the head of the list. + * Elements being removed from the head of the list should use the explicit + * macro for this purpose for optimum efficiency. A singly-linked list may + * only be traversed in the forward direction. Singly-linked lists are ideal + * for applications with large datasets and few or no removals or for + * implementing a LIFO queue. + * + * A singly-linked tail queue is headed by a pair of pointers, one to the + * head of the list and the other to the tail of the list. The elements are + * singly linked for minimum space and pointer manipulation overhead at the + * expense of O(n) removal for arbitrary elements. New elements can be added + * to the list after an existing element, at the head of the list, or at the + * end of the list. Elements being removed from the head of the tail queue + * should use the explicit macro for this purpose for optimum efficiency. + * A singly-linked tail queue may only be traversed in the forward direction. + * Singly-linked tail queues are ideal for applications with large datasets + * and few or no removals or for implementing a FIFO queue. + * + * A list is headed by a single forward pointer (or an array of forward + * pointers for a hash table header). The elements are doubly linked + * so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before + * or after an existing element or at the head of the list. A list + * may only be traversed in the forward direction. + * + * A tail queue is headed by a pair of pointers, one to the head of the + * list and the other to the tail of the list. The elements are doubly + * linked so that an arbitrary element can be removed without a need to + * traverse the list. New elements can be added to the list before or + * after an existing element, at the head of the list, or at the end of + * the list. A tail queue may be traversed in either direction. + * + * For details on the use of these macros, see the queue(3) manual page. + * + * + * SLIST LIST STAILQ TAILQ + * _HEAD + + + + + * _HEAD_INITIALIZER + + + + + * _ENTRY + + + + + * _INIT + + + + + * _EMPTY + + + + + * _FIRST + + + + + * _NEXT + + + + + * _PREV - - - + + * _LAST - - + + + * _FOREACH + + + + + * _FOREACH_SAFE + + + + + * _FOREACH_REVERSE - - - + + * _FOREACH_REVERSE_SAFE - - - + + * _INSERT_HEAD + + + + + * _INSERT_BEFORE - + - + + * _INSERT_AFTER + + + + + * _INSERT_TAIL - - + + + * _CONCAT - - + + + * _REMOVE_HEAD + - + - + * _REMOVE + + + + + * + */ +#ifdef QUEUE_MACRO_DEBUG +/* Store the last 2 places the queue element or head was altered */ +struct qm_trace { + char * lastfile; + int lastline; + char * prevfile; + int prevline; +}; + +#define TRACEBUF struct qm_trace trace; +#define TRASHIT(x) do {(x) = (void *)-1;} while (0) + +#define QMD_TRACE_HEAD(head) do { \ + (head)->trace.prevline = (head)->trace.lastline; \ + (head)->trace.prevfile = (head)->trace.lastfile; \ + (head)->trace.lastline = __LINE__; \ + (head)->trace.lastfile = __FILE__; \ +} while (0) + +#define QMD_TRACE_ELEM(elem) do { \ + (elem)->trace.prevline = (elem)->trace.lastline; \ + (elem)->trace.prevfile = (elem)->trace.lastfile; \ + (elem)->trace.lastline = __LINE__; \ + (elem)->trace.lastfile = __FILE__; \ +} while (0) + +#else +#define QMD_TRACE_ELEM(elem) +#define QMD_TRACE_HEAD(head) +#define TRACEBUF +#define TRASHIT(x) +#endif /* QUEUE_MACRO_DEBUG */ + +/* + * Singly-linked List declarations. + */ +#define SLIST_HEAD(name, type) \ +struct name { \ + struct type *slh_first; /* first element */ \ +} + +#define SLIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define SLIST_ENTRY(type) \ +struct { \ + struct type *sle_next; /* next element */ \ +} + +/* + * Singly-linked List functions. + */ +#define SLIST_EMPTY(head) ((head)->slh_first == NULL) + +#define SLIST_FIRST(head) ((head)->slh_first) + +#define SLIST_FOREACH(var, head, field) \ + for ((var) = SLIST_FIRST((head)); \ + (var); \ + (var) = SLIST_NEXT((var), field)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_PREVPTR(var, varp, head, field) \ + for ((varp) = &SLIST_FIRST((head)); \ + ((var) = *(varp)) != NULL; \ + (varp) = &SLIST_NEXT((var), field)) + +#define SLIST_INIT(head) do { \ + SLIST_FIRST((head)) = NULL; \ +} while (0) + +#define SLIST_INSERT_AFTER(slistelm, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_NEXT((slistelm), field); \ + SLIST_NEXT((slistelm), field) = (elm); \ +} while (0) + +#define SLIST_INSERT_HEAD(head, elm, field) do { \ + SLIST_NEXT((elm), field) = SLIST_FIRST((head)); \ + SLIST_FIRST((head)) = (elm); \ +} while (0) + +#define SLIST_NEXT(elm, field) ((elm)->field.sle_next) + +#define SLIST_REMOVE(head, elm, type, field) do { \ + if (SLIST_FIRST((head)) == (elm)) { \ + SLIST_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = SLIST_FIRST((head)); \ + while (SLIST_NEXT(curelm, field) != (elm)) \ + curelm = SLIST_NEXT(curelm, field); \ + SLIST_NEXT(curelm, field) = \ + SLIST_NEXT(SLIST_NEXT(curelm, field), field); \ + } \ + TRASHIT((elm)->field.sle_next); \ +} while (0) + +#define SLIST_REMOVE_HEAD(head, field) do { \ + SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ +} while (0) + +/* + * Singly-linked Tail queue declarations. + */ +#define STAILQ_HEAD(name, type) \ +struct name { \ + struct type *stqh_first;/* first element */ \ + struct type **stqh_last;/* addr of last next element */ \ +} + +#define STAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).stqh_first } + +#define STAILQ_ENTRY(type) \ +struct { \ + struct type *stqe_next; /* next element */ \ +} + +/* + * Singly-linked Tail queue functions. + */ +#define STAILQ_CONCAT(head1, head2) do { \ + if (!STAILQ_EMPTY((head2))) { \ + *(head1)->stqh_last = (head2)->stqh_first; \ + (head1)->stqh_last = (head2)->stqh_last; \ + STAILQ_INIT((head2)); \ + } \ +} while (0) + +#define STAILQ_EMPTY(head) ((head)->stqh_first == NULL) + +#define STAILQ_FIRST(head) ((head)->stqh_first) + +#define STAILQ_FOREACH(var, head, field) \ + for((var) = STAILQ_FIRST((head)); \ + (var); \ + (var) = STAILQ_NEXT((var), field)) + + +#define STAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = STAILQ_FIRST((head)); \ + (var) && ((tvar) = STAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define STAILQ_INIT(head) do { \ + STAILQ_FIRST((head)) = NULL; \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#define STAILQ_INSERT_AFTER(head, tqelm, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_NEXT((tqelm), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_NEXT((tqelm), field) = (elm); \ +} while (0) + +#define STAILQ_INSERT_HEAD(head, elm, field) do { \ + if ((STAILQ_NEXT((elm), field) = STAILQ_FIRST((head))) == NULL) \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ + STAILQ_FIRST((head)) = (elm); \ +} while (0) + +#define STAILQ_INSERT_TAIL(head, elm, field) do { \ + STAILQ_NEXT((elm), field) = NULL; \ + *(head)->stqh_last = (elm); \ + (head)->stqh_last = &STAILQ_NEXT((elm), field); \ +} while (0) + +#define STAILQ_LAST(head, type, field) \ + (STAILQ_EMPTY((head)) ? \ + NULL : \ + ((struct type *)(void *) \ + ((char *)((head)->stqh_last) - __offsetof(struct type, field)))) + +#define STAILQ_NEXT(elm, field) ((elm)->field.stqe_next) + +#define STAILQ_REMOVE(head, elm, type, field) do { \ + if (STAILQ_FIRST((head)) == (elm)) { \ + STAILQ_REMOVE_HEAD((head), field); \ + } \ + else { \ + struct type *curelm = STAILQ_FIRST((head)); \ + while (STAILQ_NEXT(curelm, field) != (elm)) \ + curelm = STAILQ_NEXT(curelm, field); \ + if ((STAILQ_NEXT(curelm, field) = \ + STAILQ_NEXT(STAILQ_NEXT(curelm, field), field)) == NULL)\ + (head)->stqh_last = &STAILQ_NEXT((curelm), field);\ + } \ + TRASHIT((elm)->field.stqe_next); \ +} while (0) + +#define STAILQ_REMOVE_HEAD(head, field) do { \ + if ((STAILQ_FIRST((head)) = \ + STAILQ_NEXT(STAILQ_FIRST((head)), field)) == NULL) \ + (head)->stqh_last = &STAILQ_FIRST((head)); \ +} while (0) + +#ifndef LIST_HEAD +/* + * List declarations. + */ +#define LIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define LIST_HEAD_INITIALIZER(head) \ + { NULL } + +#define LIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List functions. + */ + +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_LIST_CHECK_HEAD(head, field) do { \ + if (LIST_FIRST((head)) != NULL && \ + LIST_FIRST((head))->field.le_prev != \ + &LIST_FIRST((head))) \ + panic("Bad list head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_LIST_CHECK_NEXT(elm, field) do { \ + if (LIST_NEXT((elm), field) != NULL && \ + LIST_NEXT((elm), field)->field.le_prev != \ + &((elm)->field.le_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_LIST_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.le_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_LIST_CHECK_HEAD(head, field) +#define QMD_LIST_CHECK_NEXT(elm, field) +#define QMD_LIST_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define LIST_EMPTY(head) ((head)->lh_first == NULL) + +#define LIST_FIRST(head) ((head)->lh_first) + +#define LIST_FOREACH(var, head, field) \ + for ((var) = LIST_FIRST((head)); \ + (var); \ + (var) = LIST_NEXT((var), field)) + +#define LIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = LIST_FIRST((head)); \ + (var) && ((tvar) = LIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define LIST_INIT(head) do { \ + LIST_FIRST((head)) = NULL; \ +} while (0) + +#define LIST_INSERT_AFTER(listelm, elm, field) do { \ + QMD_LIST_CHECK_NEXT(listelm, field); \ + if ((LIST_NEXT((elm), field) = LIST_NEXT((listelm), field)) != NULL)\ + LIST_NEXT((listelm), field)->field.le_prev = \ + &LIST_NEXT((elm), field); \ + LIST_NEXT((listelm), field) = (elm); \ + (elm)->field.le_prev = &LIST_NEXT((listelm), field); \ +} while (0) + +#define LIST_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_LIST_CHECK_PREV(listelm, field); \ + (elm)->field.le_prev = (listelm)->field.le_prev; \ + LIST_NEXT((elm), field) = (listelm); \ + *(listelm)->field.le_prev = (elm); \ + (listelm)->field.le_prev = &LIST_NEXT((elm), field); \ +} while (0) + +#define LIST_INSERT_HEAD(head, elm, field) do { \ + QMD_LIST_CHECK_HEAD((head), field); \ + if ((LIST_NEXT((elm), field) = LIST_FIRST((head))) != NULL) \ + LIST_FIRST((head))->field.le_prev = &LIST_NEXT((elm), field);\ + LIST_FIRST((head)) = (elm); \ + (elm)->field.le_prev = &LIST_FIRST((head)); \ +} while (0) + +#define LIST_NEXT(elm, field) ((elm)->field.le_next) + +#define LIST_REMOVE(elm, field) do { \ + QMD_LIST_CHECK_NEXT(elm, field); \ + QMD_LIST_CHECK_PREV(elm, field); \ + if (LIST_NEXT((elm), field) != NULL) \ + LIST_NEXT((elm), field)->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = LIST_NEXT((elm), field); \ + TRASHIT((elm)->field.le_next); \ + TRASHIT((elm)->field.le_prev); \ +} while (0) +#endif /* LIST_HEAD */ + +/* + * Tail queue declarations. + */ +#define TAILQ_HEAD(name, type) \ +struct name { \ + struct type *tqh_first; /* first element */ \ + struct type **tqh_last; /* addr of last next element */ \ + TRACEBUF \ +} + +#define TAILQ_HEAD_INITIALIZER(head) \ + { NULL, &(head).tqh_first } + +#define TAILQ_ENTRY(type) \ +struct { \ + struct type *tqe_next; /* next element */ \ + struct type **tqe_prev; /* address of previous next element */ \ + TRACEBUF \ +} + +/* + * Tail queue functions. + */ +#if (defined(_KERNEL) && defined(INVARIANTS)) +#define QMD_TAILQ_CHECK_HEAD(head, field) do { \ + if (!TAILQ_EMPTY(head) && \ + TAILQ_FIRST((head))->field.tqe_prev != \ + &TAILQ_FIRST((head))) \ + panic("Bad tailq head %p first->prev != head", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_TAIL(head, field) do { \ + if (*(head)->tqh_last != NULL) \ + panic("Bad tailq NEXT(%p->tqh_last) != NULL", (head)); \ +} while (0) + +#define QMD_TAILQ_CHECK_NEXT(elm, field) do { \ + if (TAILQ_NEXT((elm), field) != NULL && \ + TAILQ_NEXT((elm), field)->field.tqe_prev != \ + &((elm)->field.tqe_next)) \ + panic("Bad link elm %p next->prev != elm", (elm)); \ +} while (0) + +#define QMD_TAILQ_CHECK_PREV(elm, field) do { \ + if (*(elm)->field.tqe_prev != (elm)) \ + panic("Bad link elm %p prev->next != elm", (elm)); \ +} while (0) +#else +#define QMD_TAILQ_CHECK_HEAD(head, field) +#define QMD_TAILQ_CHECK_TAIL(head, headname) +#define QMD_TAILQ_CHECK_NEXT(elm, field) +#define QMD_TAILQ_CHECK_PREV(elm, field) +#endif /* (_KERNEL && INVARIANTS) */ + +#define TAILQ_CONCAT(head1, head2, field) do { \ + if (!TAILQ_EMPTY(head2)) { \ + *(head1)->tqh_last = (head2)->tqh_first; \ + (head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \ + (head1)->tqh_last = (head2)->tqh_last; \ + TAILQ_INIT((head2)); \ + QMD_TRACE_HEAD(head1); \ + QMD_TRACE_HEAD(head2); \ + } \ +} while (0) + +#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL) + +#define TAILQ_FIRST(head) ((head)->tqh_first) + +#define TAILQ_FOREACH(var, head, field) \ + for ((var) = TAILQ_FIRST((head)); \ + (var); \ + (var) = TAILQ_NEXT((var), field)) + +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var); \ + (var) = TAILQ_PREV((var), headname, field)) + +#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \ + for ((var) = TAILQ_LAST((head), headname); \ + (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \ + (var) = (tvar)) + +#define TAILQ_INIT(head) do { \ + TAILQ_FIRST((head)) = NULL; \ + (head)->tqh_last = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ +} while (0) + +#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(listelm, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else { \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + } \ + TAILQ_NEXT((listelm), field) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \ + QMD_TAILQ_CHECK_PREV(listelm, field); \ + (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \ + TAILQ_NEXT((elm), field) = (listelm); \ + *(listelm)->field.tqe_prev = (elm); \ + (listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_ELEM(&(elm)->field); \ + QMD_TRACE_ELEM(&listelm->field); \ +} while (0) + +#define TAILQ_INSERT_HEAD(head, elm, field) do { \ + QMD_TAILQ_CHECK_HEAD(head, field); \ + if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \ + TAILQ_FIRST((head))->field.tqe_prev = \ + &TAILQ_NEXT((elm), field); \ + else \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + TAILQ_FIRST((head)) = (elm); \ + (elm)->field.tqe_prev = &TAILQ_FIRST((head)); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_INSERT_TAIL(head, elm, field) do { \ + QMD_TAILQ_CHECK_TAIL(head, field); \ + TAILQ_NEXT((elm), field) = NULL; \ + (elm)->field.tqe_prev = (head)->tqh_last; \ + *(head)->tqh_last = (elm); \ + (head)->tqh_last = &TAILQ_NEXT((elm), field); \ + QMD_TRACE_HEAD(head); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + +#define TAILQ_LAST(head, headname) \ + (*(((struct headname *)((head)->tqh_last))->tqh_last)) + +#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next) + +#define TAILQ_PREV(elm, headname, field) \ + (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) + +#define TAILQ_REMOVE(head, elm, field) do { \ + QMD_TAILQ_CHECK_NEXT(elm, field); \ + QMD_TAILQ_CHECK_PREV(elm, field); \ + if ((TAILQ_NEXT((elm), field)) != NULL) \ + TAILQ_NEXT((elm), field)->field.tqe_prev = \ + (elm)->field.tqe_prev; \ + else { \ + (head)->tqh_last = (elm)->field.tqe_prev; \ + QMD_TRACE_HEAD(head); \ + } \ + *(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \ + TRASHIT((elm)->field.tqe_next); \ + TRASHIT((elm)->field.tqe_prev); \ + QMD_TRACE_ELEM(&(elm)->field); \ +} while (0) + + +#ifdef _KERNEL + +/* + * XXX insque() and remque() are an old way of handling certain queues. + * They bogusly assumes that all queue heads look alike. + */ + +struct quehead { + struct quehead *qh_link; + struct quehead *qh_rlink; +}; + +#ifdef __CC_SUPPORTS___INLINE + +static __inline void +insque(void *a, void *b) +{ + struct quehead *element = (struct quehead *)a, + *head = (struct quehead *)b; + + element->qh_link = head->qh_link; + element->qh_rlink = head; + head->qh_link = element; + element->qh_link->qh_rlink = element; +} + +static __inline void +remque(void *a) +{ + struct quehead *element = (struct quehead *)a; + + element->qh_link->qh_rlink = element->qh_rlink; + element->qh_rlink->qh_link = element->qh_link; + element->qh_rlink = 0; +} + +#else /* !__CC_SUPPORTS___INLINE */ + +void insque(void *a, void *b); +void remque(void *a); + +#endif /* __CC_SUPPORTS___INLINE */ + +#endif /* _KERNEL */ + +#endif /* !_SYS_QUEUE_H_ */ diff --git a/dummynet/include/sys/syslog.h b/dummynet/include/sys/syslog.h new file mode 100644 index 0000000..143df1f --- /dev/null +++ b/dummynet/include/sys/syslog.h @@ -0,0 +1,7 @@ +#ifndef _SYS_SYSLOG_H_ +#define _SYS_SYSLOG_H_ +/* XXX find linux equivalent */ +#define LOG_SECURITY 0 +#define LOG_NOTICE 0 +#define LOG_DEBUG 0 +#endif /* _SYS_SYSLOG_H_ */ diff --git a/dummynet/include/sys/systm.h b/dummynet/include/sys/systm.h new file mode 100644 index 0000000..238a7d3 --- /dev/null +++ b/dummynet/include/sys/systm.h @@ -0,0 +1,73 @@ +#ifndef _SYS_SYSTM_H_ +#define _SYS_SYSTM_H_ + +#ifndef _WIN32 /* this is the linux version */ +/* callout support, in on FreeBSD */ +/* + * callout support on linux module is done using timers + */ +#include +#ifdef LINUX_24 +#include /* jiffies definition is here in 2.4 */ +#endif +#define callout timer_list +static __inline int +callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg) +{ + co->expires = jiffies + ticks; + co->function = (void (*)(unsigned long))fn; + co->data = (unsigned long)arg; + add_timer(co); + return 0; +} + +#define callout_init(co, safe) init_timer(co) +#define callout_drain(co) del_timer(co) +#define callout_stop(co) del_timer(co) + +#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */ +#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */ + +#else /* _WIN32 */ + +/* This is the windows part for callout support */ +struct callout { + int dummy; +}; +static __inline int +callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg) +{ + return 0; +} + +#define callout_init(co, safe) +#define callout_drain(co) +#define callout_stop(co) +#endif /* !_WIN32 */ + + +#if 0 +/* add out timer to the kernel global timer list */ +NTSTATUS + IoInitializeTimer( + IN PDEVICE_OBJECT DeviceObject, + IN PIO_TIMER_ROUTINE TimerRoutine, + IN PVOID Context + ); + +/* see differences : +IoInitializeDpcRequest + http://dsrg.mff.cuni.cz/~ceres/sch/osy/text/ch04s01s01.php + example http://www.beyondlogic.org/interrupts/winnt_isr_dpc.htm +KeInitializeDpc IRQL: Any level +IoInitializeTimer IRQL: Passive level +KeInitializeTimer */ +VOID + KeInitializeDpc( + IN PRKDPC Dpc, + IN PKDEFERRED_ROUTINE DeferredRoutine, + IN PVOID DeferredContext + ); +#endif /* commented out */ + +#endif /* _SYS_SYSTM_H_ */ diff --git a/dummynet/include/sys/taskqueue.h b/dummynet/include/sys/taskqueue.h new file mode 100644 index 0000000..f11d286 --- /dev/null +++ b/dummynet/include/sys/taskqueue.h @@ -0,0 +1,24 @@ +#ifndef _SYS_TASKQUEUE_H_ +#define _SYS_TASKQUEUE_H_ + +/* + * Remap taskqueue to direct calls + */ +struct task { + void (*func)(void); +}; +#define taskqueue_enqueue(tq, ta) (ta)->func() +#define TASK_INIT(a,b,c,d) do { \ + (a)->func = (void (*)(void))c; } while (0) + +#define taskqueue_create_fast(_a, _b, _c, _d) NULL +#define taskqueue_start_threads(_a, _b, _c, _d) + +#define taskqueue_drain(_a, _b) /* XXX to be completed */ +#define taskqueue_free(_a) /* XXX to be completed */ + +#define PRI_MIN (0) /* Highest priority. */ +#define PRI_MIN_ITHD (PRI_MIN) +#define PI_NET (PRI_MIN_ITHD + 16) + +#endif /* !_SYS_TASKQUEUE_H_ */ diff --git a/dummynet/ip_dummynet.c b/dummynet/ip_dummynet.c new file mode 100644 index 0000000..bdf0a8e --- /dev/null +++ b/dummynet/ip_dummynet.c @@ -0,0 +1,2406 @@ +/*- + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * Portions Copyright (c) 2000 Akamba Corp. + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:12 oleg Exp $"); + +#define DUMMYNET_DEBUG + +#include "opt_inet6.h" + +/* + * This module implements IP dummynet, a bandwidth limiter/delay emulator + * used in conjunction with the ipfw package. + * Description of the data structures used is in ip_dummynet.h + * Here you mainly find the following blocks of code: + * + variable declarations; + * + heap management functions; + * + scheduler and dummynet functions; + * + configuration and initialization. + * + * NOTA BENE: critical sections are protected by the "dummynet lock". + * + * Most important Changes: + * + * 011004: KLDable + * 010124: Fixed WF2Q behaviour + * 010122: Fixed spl protection. + * 000601: WF2Q support + * 000106: large rewrite, use heaps to handle very many pipes. + * 980513: initial release + * + * include files marked with XXX are probably not needed + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* IFNAMSIZ, struct ifaddr, ifq head */ +#include +#include +#include /* ip_len, ip_off */ +#include +#include +#include /* ip_output(), IP_FORWARDING */ + +#include /* various ether_* routines */ + +#include /* for ip6_input, ip6_output prototypes */ +#include + +#include "missing.h" +/* + * We keep a private variable for the simulation time, but we could + * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) + */ +static dn_key curr_time = 0 ; /* current simulation time */ + +static int dn_hash_size = 64 ; /* default hash size */ + +/* statistics on number of queue searches and search steps */ +static long searches, search_steps ; +static int pipe_expire = 1 ; /* expire queue if empty */ +static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ + +static long pipe_slot_limit = 100; /* Foot shooting limit for pipe queues. */ +static long pipe_byte_limit = 1024 * 1024; + +static int red_lookup_depth = 256; /* RED - default lookup table depth */ +static int red_avg_pkt_size = 512; /* RED - default medium packet size */ +static int red_max_pkt_size = 1500; /* RED - default max packet size */ + +static struct timeval prev_t, t; +static long tick_last; /* Last tick duration (usec). */ +static long tick_delta; /* Last vs standard tick diff (usec). */ +static long tick_delta_sum; /* Accumulated tick difference (usec).*/ +static long tick_adjustment; /* Tick adjustments done. */ +static long tick_lost; /* Lost(coalesced) ticks number. */ +/* Adjusted vs non-adjusted curr_time difference (ticks). */ +static long tick_diff; + +static int io_fast; +static unsigned long io_pkt; +static unsigned long io_pkt_fast; +static unsigned long io_pkt_drop; + +/* + * Three heaps contain queues and pipes that the scheduler handles: + * + * ready_heap contains all dn_flow_queue related to fixed-rate pipes. + * + * wfq_ready_heap contains the pipes associated with WF2Q flows + * + * extract_heap contains pipes associated with delay lines. + * + */ + +MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); + +static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; + +static int heap_init(struct dn_heap *h, int size); +static int heap_insert (struct dn_heap *h, dn_key key1, void *p); +static void heap_extract(struct dn_heap *h, void *obj); +static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, + struct mbuf **tail); +static void ready_event(struct dn_flow_queue *q, struct mbuf **head, + struct mbuf **tail); +static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, + struct mbuf **tail); + +#define HASHSIZE 16 +#define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) +static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */ +static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */ + +static struct callout dn_timeout; + +extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); + +#ifdef SYSCTL_NODE +SYSCTL_DECL(_net_inet); +SYSCTL_DECL(_net_inet_ip); + +SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, + CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); +#if 0 /* curr_time is 64 bit */ +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time, + CTLFLAG_RD, &curr_time, 0, "Current tick"); +#endif +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, + CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, + CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches, + CTLFLAG_RD, &searches, 0, "Number of queue searches"); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps, + CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, + CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, + CTLFLAG_RW, &dn_max_ratio, 0, + "Max ratio between dynamic queues and buckets"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, + CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, + CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, + CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta, + CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec)."); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum, + CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec)."); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment, + CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done."); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff, + CTLFLAG_RD, &tick_diff, 0, + "Adjusted vs non-adjusted curr_time difference (ticks)."); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, + CTLFLAG_RD, &tick_lost, 0, + "Number of ticks coalesced by dummynet taskqueue."); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, + CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, + CTLFLAG_RD, &io_pkt, 0, + "Number of packets passed to dummynet."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, + CTLFLAG_RD, &io_pkt_fast, 0, + "Number of packets bypassed dummynet scheduler."); +SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, + CTLFLAG_RD, &io_pkt_drop, 0, + "Number of packets dropped by dummynet."); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, + CTLFLAG_RW, &pipe_slot_limit, 0, "Upper limit in slots for pipe queue."); +SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, + CTLFLAG_RW, &pipe_byte_limit, 0, "Upper limit in bytes for pipe queue."); +#endif + +#ifdef DUMMYNET_DEBUG +int dummynet_debug = 0; +#ifdef SYSCTL_NODE +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug, + 0, "control debugging printfs"); +#endif +#define DPRINTF(X) if (dummynet_debug) printf X +#else +#define DPRINTF(X) +#endif + +static struct task dn_task; +static struct taskqueue *dn_tq = NULL; +static void dummynet_task(void *, int); + +#if defined( __linux__ ) || defined( _WIN32 ) +static DEFINE_SPINLOCK(dummynet_mtx); +#else +static struct mtx dummynet_mtx; +#endif +#define DUMMYNET_LOCK_INIT() \ + mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF) +#define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx) +#define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx) +#define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx) +#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED) + +static int config_pipe(struct dn_pipe *p); +static int ip_dn_ctl(struct sockopt *sopt); + +static void dummynet(void *); +static void dummynet_flush(void); +static void dummynet_send(struct mbuf *); +void dummynet_drain(void); +static ip_dn_io_t dummynet_io; +static void dn_rule_delete(void *); + +/* + * Heap management functions. + * + * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. + * Some macros help finding parent/children so we can optimize them. + * + * heap_init() is called to expand the heap when needed. + * Increment size in blocks of 16 entries. + * XXX failure to allocate a new element is a pretty bad failure + * as we basically stall a whole queue forever!! + * Returns 1 on error, 0 on success + */ +#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) +#define HEAP_LEFT(x) ( 2*(x) + 1 ) +#define HEAP_IS_LEFT(x) ( (x) & 1 ) +#define HEAP_RIGHT(x) ( 2*(x) + 2 ) +#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } +#define HEAP_INCREMENT 15 + +static int +heap_init(struct dn_heap *h, int new_size) +{ + struct dn_heap_entry *p; + + if (h->size >= new_size ) { + printf("dummynet: %s, Bogus call, have %d want %d\n", __func__, + h->size, new_size); + return 0 ; + } + new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; + p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); + if (p == NULL) { + printf("dummynet: %s, resize %d failed\n", __func__, new_size ); + return 1 ; /* error */ + } + if (h->size > 0) { + bcopy(h->p, p, h->size * sizeof(*p) ); + free(h->p, M_DUMMYNET); + } + h->p = p ; + h->size = new_size ; + return 0 ; +} + +/* + * Insert element in heap. Normally, p != NULL, we insert p in + * a new position and bubble up. If p == NULL, then the element is + * already in place, and key is the position where to start the + * bubble-up. + * Returns 1 on failure (cannot allocate new heap entry) + * + * If offset > 0 the position (index, int) of the element in the heap is + * also stored in the element itself at the given offset in bytes. + */ +#define SET_OFFSET(heap, node) \ + if (heap->offset > 0) \ + *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; +/* + * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. + */ +#define RESET_OFFSET(heap, node) \ + if (heap->offset > 0) \ + *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; +static int +heap_insert(struct dn_heap *h, dn_key key1, void *p) +{ + int son = h->elements ; + + if (p == NULL) /* data already there, set starting point */ + son = key1 ; + else { /* insert new element at the end, possibly resize */ + son = h->elements ; + if (son == h->size) /* need resize... */ + if (heap_init(h, h->elements+1) ) + return 1 ; /* failure... */ + h->p[son].object = p ; + h->p[son].key = key1 ; + h->elements++ ; + } + while (son > 0) { /* bubble up */ + int father = HEAP_FATHER(son) ; + struct dn_heap_entry tmp ; + + if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) + break ; /* found right position */ + /* son smaller than father, swap and repeat */ + HEAP_SWAP(h->p[son], h->p[father], tmp) ; + SET_OFFSET(h, son); + son = father ; + } + SET_OFFSET(h, son); + return 0 ; +} + +/* + * remove top element from heap, or obj if obj != NULL + */ +static void +heap_extract(struct dn_heap *h, void *obj) +{ + int child, father, max = h->elements - 1 ; + + if (max < 0) { + printf("dummynet: warning, extract from empty heap 0x%p\n", h); + return ; + } + father = 0 ; /* default: move up smallest child */ + if (obj != NULL) { /* extract specific element, index is at offset */ + if (h->offset <= 0) + panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); + father = *((int *)((char *)obj + h->offset)) ; + if (father < 0 || father >= h->elements) { + printf("dummynet: heap_extract, father %d out of bound 0..%d\n", + father, h->elements); + panic("dummynet: heap_extract"); + } + } + RESET_OFFSET(h, father); + child = HEAP_LEFT(father) ; /* left child */ + while (child <= max) { /* valid entry */ + if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) + child = child+1 ; /* take right child, otherwise left */ + h->p[father] = h->p[child] ; + SET_OFFSET(h, father); + father = child ; + child = HEAP_LEFT(child) ; /* left child for next loop */ + } + h->elements-- ; + if (father != max) { + /* + * Fill hole with last entry and bubble up, reusing the insert code + */ + h->p[father] = h->p[max] ; + heap_insert(h, father, NULL); /* this one cannot fail */ + } +} + +#if 0 +/* + * change object position and update references + * XXX this one is never used! + */ +static void +heap_move(struct dn_heap *h, dn_key new_key, void *object) +{ + int temp; + int i ; + int max = h->elements-1 ; + struct dn_heap_entry buf ; + + if (h->offset <= 0) + panic("cannot move items on this heap"); + + i = *((int *)((char *)object + h->offset)); + if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ + h->p[i].key = new_key ; + for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; + i = temp ) { /* bubble up */ + HEAP_SWAP(h->p[i], h->p[temp], buf) ; + SET_OFFSET(h, i); + } + } else { /* must move down */ + h->p[i].key = new_key ; + while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ + if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) + temp++ ; /* select child with min key */ + if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ + HEAP_SWAP(h->p[i], h->p[temp], buf) ; + SET_OFFSET(h, i); + } else + break ; + i = temp ; + } + } + SET_OFFSET(h, i); +} +#endif /* heap_move, unused */ + +/* + * heapify() will reorganize data inside an array to maintain the + * heap property. It is needed when we delete a bunch of entries. + */ +static void +heapify(struct dn_heap *h) +{ + int i ; + + for (i = 0 ; i < h->elements ; i++ ) + heap_insert(h, i , NULL) ; +} + +/* + * cleanup the heap and free data structure + */ +static void +heap_free(struct dn_heap *h) +{ + if (h->size >0 ) + free(h->p, M_DUMMYNET); + bzero(h, sizeof(*h) ); +} + +/* + * --- end of heap management functions --- + */ + +/* + * Return the mbuf tag holding the dummynet state. As an optimization + * this is assumed to be the first tag on the list. If this turns out + * wrong we'll need to search the list. + */ +static struct dn_pkt_tag * +dn_tag_get(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_first(m); + KASSERT(mtag != NULL && + mtag->m_tag_cookie == MTAG_ABI_COMPAT && + mtag->m_tag_id == PACKET_TAG_DUMMYNET, + ("packet on dummynet queue w/o dummynet tag!")); + return (struct dn_pkt_tag *)(mtag+1); +} + +/* + * Scheduler functions: + * + * transmit_event() is called when the delay-line needs to enter + * the scheduler, either because of existing pkts getting ready, + * or new packets entering the queue. The event handled is the delivery + * time of the packet. + * + * ready_event() does something similar with fixed-rate queues, and the + * event handled is the finish time of the head pkt. + * + * wfq_ready_event() does something similar with WF2Q queues, and the + * event handled is the start time of the head pkt. + * + * In all cases, we make sure that the data structures are consistent + * before passing pkts out, because this might trigger recursive + * invocations of the procedures. + */ +static void +transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) +{ + struct mbuf *m; + struct dn_pkt_tag *pkt; + + DUMMYNET_LOCK_ASSERT(); + + while ((m = pipe->head) != NULL) { + pkt = dn_tag_get(m); + if (!DN_KEY_LEQ(pkt->output_time, curr_time)) + break; + + pipe->head = m->m_nextpkt; + if (*tail != NULL) + (*tail)->m_nextpkt = m; + else + *head = m; + *tail = m; + } + if (*tail != NULL) + (*tail)->m_nextpkt = NULL; + + /* If there are leftover packets, put into the heap for next event. */ + if ((m = pipe->head) != NULL) { + pkt = dn_tag_get(m); + /* + * XXX Should check errors on heap_insert, by draining the + * whole pipe p and hoping in the future we are more successful. + */ + heap_insert(&extract_heap, pkt->output_time, pipe); + } +} + +#ifndef __linux__ +#define div64(a, b) ((int64_t)(a) / (int64_t)(b)) +#endif +#define DN_TO_DROP 0xffff +/* + * Compute how many ticks we have to wait before being able to send + * a packet. This is computed as the "wire time" for the packet + * (length + extra bits), minus the credit available, scaled to ticks. + * Check that the result is not be negative (it could be if we have + * too much leftover credit in q->numbytes). + */ +static inline dn_key +set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p) +{ + int64_t ret; + + ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz + - q->numbytes + p->bandwidth - 1 , p->bandwidth); +#if 0 + printf("%s %d extra_bits %d numb %d ret %d\n", + __FUNCTION__, __LINE__, + (int)(q->extra_bits & 0xffffffff), + (int)(q->numbytes & 0xffffffff), + (int)(ret & 0xffffffff)); +#endif + if (ret < 0) + ret = 0; + return ret; +} + +/* + * Convert the additional MAC overheads/delays into an equivalent + * number of bits for the given data rate. The samples are in milliseconds + * so we need to divide by 1000. + */ +static dn_key +compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p) +{ + int index; + dn_key extra_bits; + + if (!p->samples || p->samples_no == 0) + return 0; + index = random() % p->samples_no; + extra_bits = div64((dn_key)p->samples[index] * p->bandwidth, 1000); + if (index >= p->loss_level) { + struct dn_pkt_tag *dt = dn_tag_get(pkt); + if (dt) + dt->dn_dir = DN_TO_DROP; + } + return extra_bits; +} + +static void +free_pipe(struct dn_pipe *p) +{ + if (p->samples) + free(p->samples, M_DUMMYNET); + free(p, M_DUMMYNET); +} + +/* + * extract pkt from queue, compute output time (could be now) + * and put into delay line (p_queue) + */ +static void +move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p, + int len) +{ + struct dn_pkt_tag *dt = dn_tag_get(pkt); + + q->head = pkt->m_nextpkt ; + q->len-- ; + q->len_bytes -= len ; + + dt->output_time = curr_time + p->delay ; + + if (p->head == NULL) + p->head = pkt; + else + p->tail->m_nextpkt = pkt; + p->tail = pkt; + p->tail->m_nextpkt = NULL; +} + +/* + * ready_event() is invoked every time the queue must enter the + * scheduler, either because the first packet arrives, or because + * a previously scheduled event fired. + * On invokation, drain as many pkts as possible (could be 0) and then + * if there are leftover packets reinsert the pkt in the scheduler. + */ +static void +ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) +{ + struct mbuf *pkt; + struct dn_pipe *p = q->fs->pipe; + int p_was_empty; + + DUMMYNET_LOCK_ASSERT(); + + if (p == NULL) { + printf("dummynet: ready_event- pipe is gone\n"); + return; + } + p_was_empty = (p->head == NULL); + + /* + * Schedule fixed-rate queues linked to this pipe: + * account for the bw accumulated since last scheduling, then + * drain as many pkts as allowed by q->numbytes and move to + * the delay line (in p) computing output time. + * bandwidth==0 (no limit) means we can drain the whole queue, + * setting len_scaled = 0 does the job. + */ + q->numbytes += (curr_time - q->sched_time) * p->bandwidth; + while ((pkt = q->head) != NULL) { + int len = pkt->m_pkthdr.len; + dn_key len_scaled = p->bandwidth ? len*8*hz + + q->extra_bits*hz + : 0; + + if (DN_KEY_GT(len_scaled, q->numbytes)) + break; + q->numbytes -= len_scaled; + move_pkt(pkt, q, p, len); + if (q->head) + q->extra_bits = compute_extra_bits(q->head, p); + } + /* + * If we have more packets queued, schedule next ready event + * (can only occur when bandwidth != 0, otherwise we would have + * flushed the whole queue in the previous loop). + * To this purpose we record the current time and compute how many + * ticks to go for the finish time of the packet. + */ + if ((pkt = q->head) != NULL) { /* this implies bandwidth != 0 */ + dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */ + + q->sched_time = curr_time; + heap_insert(&ready_heap, curr_time + t, (void *)q); + /* + * XXX Should check errors on heap_insert, and drain the whole + * queue on error hoping next time we are luckier. + */ + } else /* RED needs to know when the queue becomes empty. */ + q->q_time = curr_time; + + /* + * If the delay line was empty call transmit_event() now. + * Otherwise, the scheduler will take care of it. + */ + if (p_was_empty) + transmit_event(p, head, tail); +} + +/* + * Called when we can transmit packets on WF2Q queues. Take pkts out of + * the queues at their start time, and enqueue into the delay line. + * Packets are drained until p->numbytes < 0. As long as + * len_scaled >= p->numbytes, the packet goes into the delay line + * with a deadline p->delay. For the last packet, if p->numbytes < 0, + * there is an additional delay. + */ +static void +ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) +{ + int p_was_empty = (p->head == NULL); + struct dn_heap *sch = &(p->scheduler_heap); + struct dn_heap *neh = &(p->not_eligible_heap); + int64_t p_numbytes = p->numbytes; + + DUMMYNET_LOCK_ASSERT(); + + if (p->if_name[0] == 0) /* tx clock is simulated */ + /* + * Since result may not fit into p->numbytes (32bit) we + * are using 64bit var here. + */ + p_numbytes += (curr_time - p->sched_time) * p->bandwidth; + else { /* + * tx clock is for real, + * the ifq must be empty or this is a NOP. + * XXX not supported in Linux + */ + if (1) // p->ifp && p->ifp->if_snd.ifq_head != NULL) + return; + else { + DPRINTF(("dummynet: pipe %d ready from %s --\n", + p->pipe_nr, p->if_name)); + } + } + + /* + * While we have backlogged traffic AND credit, we need to do + * something on the queue. + */ + while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) { + if (sch->elements > 0) { + /* Have some eligible pkts to send out. */ + struct dn_flow_queue *q = sch->p[0].object; + struct mbuf *pkt = q->head; + struct dn_flow_set *fs = q->fs; + uint64_t len = pkt->m_pkthdr.len; + int len_scaled = p->bandwidth ? len * 8 * hz : 0; + + heap_extract(sch, NULL); /* Remove queue from heap. */ + p_numbytes -= len_scaled; + move_pkt(pkt, q, p, len); + + p->V += div64((len << MY_M), p->sum); /* Update V. */ + q->S = q->F; /* Update start time. */ + if (q->len == 0) { + /* Flow not backlogged any more. */ + fs->backlogged--; + heap_insert(&(p->idle_heap), q->F, q); + } else { + /* Still backlogged. */ + + /* + * Update F and position in backlogged queue, + * then put flow in not_eligible_heap + * (we will fix this later). + */ + len = (q->head)->m_pkthdr.len; + q->F += div64((len << MY_M), fs->weight); + if (DN_KEY_LEQ(q->S, p->V)) + heap_insert(neh, q->S, q); + else + heap_insert(sch, q->F, q); + } + } + /* + * Now compute V = max(V, min(S_i)). Remember that all elements + * in sch have by definition S_i <= V so if sch is not empty, + * V is surely the max and we must not update it. Conversely, + * if sch is empty we only need to look at neh. + */ + if (sch->elements == 0 && neh->elements > 0) + p->V = MAX64(p->V, neh->p[0].key); + /* Move from neh to sch any packets that have become eligible */ + while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) { + struct dn_flow_queue *q = neh->p[0].object; + heap_extract(neh, NULL); + heap_insert(sch, q->F, q); + } + + if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */ + p_numbytes = -1; /* Mark not ready for I/O. */ + break; + } + } + if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 && + p->idle_heap.elements > 0) { + /* + * No traffic and no events scheduled. + * We can get rid of idle-heap. + */ + int i; + + for (i = 0; i < p->idle_heap.elements; i++) { + struct dn_flow_queue *q = p->idle_heap.p[i].object; + + q->F = 0; + q->S = q->F + 1; + } + p->sum = 0; + p->V = 0; + p->idle_heap.elements = 0; + } + /* + * If we are getting clocks from dummynet (not a real interface) and + * If we are under credit, schedule the next ready event. + * Also fix the delivery time of the last packet. + */ + if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */ + dn_key t = 0; /* Number of ticks i have to wait. */ + + if (p->bandwidth > 0) + t = div64(p->bandwidth - 1 - p_numbytes, p->bandwidth); + dn_tag_get(p->tail)->output_time += t; + p->sched_time = curr_time; + heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); + /* + * XXX Should check errors on heap_insert, and drain the whole + * queue on error hoping next time we are luckier. + */ + } + + /* Fit (adjust if necessary) 64bit result into 32bit variable. */ + if (p_numbytes > INT_MAX) + p->numbytes = INT_MAX; + else if (p_numbytes < INT_MIN) + p->numbytes = INT_MIN; + else + p->numbytes = p_numbytes; + + /* + * If the delay line was empty call transmit_event() now. + * Otherwise, the scheduler will take care of it. + */ + if (p_was_empty) + transmit_event(p, head, tail); +} + +/* + * This is called one tick, after previous run. It is used to + * schedule next run. + */ +static void +dummynet(void * __unused unused) +{ + + taskqueue_enqueue(dn_tq, &dn_task); +} + +/* + * The main dummynet processing function. + */ +static void +dummynet_task(void *context, int pending) +{ + struct mbuf *head = NULL, *tail = NULL; + struct dn_pipe *pipe; + struct dn_heap *heaps[3]; + struct dn_heap *h; + void *p; /* generic parameter to handler */ + int i; + + DUMMYNET_LOCK(); + + heaps[0] = &ready_heap; /* fixed-rate queues */ + heaps[1] = &wfq_ready_heap; /* wfq queues */ + heaps[2] = &extract_heap; /* delay line */ + + /* Update number of lost(coalesced) ticks. */ + tick_lost += pending - 1; + + getmicrouptime(&t); + /* Last tick duration (usec). */ + tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 + + (t.tv_usec - prev_t.tv_usec); + /* Last tick vs standard tick difference (usec). */ + tick_delta = (tick_last * hz - 1000000) / hz; + /* Accumulated tick difference (usec). */ + tick_delta_sum += tick_delta; + + prev_t = t; + + /* + * Adjust curr_time if accumulated tick difference greater than + * 'standard' tick. Since curr_time should be monotonically increasing, + * we do positive adjustment as required and throttle curr_time in + * case of negative adjustment. + */ + curr_time++; + if (tick_delta_sum - tick >= 0) { + int diff = tick_delta_sum / tick; + + curr_time += diff; + tick_diff += diff; + tick_delta_sum %= tick; + tick_adjustment++; + } else if (tick_delta_sum + tick <= 0) { + curr_time--; + tick_diff--; + tick_delta_sum += tick; + tick_adjustment++; + } + + for (i = 0; i < 3; i++) { + h = heaps[i]; + while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) { + if (h->p[0].key > curr_time) + printf("dummynet: warning, " + "heap %d is %d ticks late\n", + i, (int)(curr_time - h->p[0].key)); + /* store a copy before heap_extract */ + p = h->p[0].object; + /* need to extract before processing */ + heap_extract(h, NULL); + if (i == 0) + ready_event(p, &head, &tail); + else if (i == 1) { + struct dn_pipe *pipe = p; + if (pipe->if_name[0] != '\0') + printf("dummynet: bad ready_event_wfq " + "for pipe %s\n", pipe->if_name); + else + ready_event_wfq(p, &head, &tail); + } else + transmit_event(p, &head, &tail); + } + } + + /* Sweep pipes trying to expire idle flow_queues. */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(pipe, &pipehash[i], next) + if (pipe->idle_heap.elements > 0 && + DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) { + struct dn_flow_queue *q = + pipe->idle_heap.p[0].object; + + heap_extract(&(pipe->idle_heap), NULL); + /* Mark timestamp as invalid. */ + q->S = q->F + 1; + pipe->sum -= q->fs->weight; + } + + DUMMYNET_UNLOCK(); + + if (head != NULL) + dummynet_send(head); + + callout_reset(&dn_timeout, 1, dummynet, NULL); +} + +static void +dummynet_send(struct mbuf *m) +{ + struct dn_pkt_tag *pkt; + struct mbuf *n; + struct ip *ip; + int dst; + + for (; m != NULL; m = n) { + n = m->m_nextpkt; + m->m_nextpkt = NULL; + if (m_tag_first(m) == NULL) { + pkt = NULL; /* probably unnecessary */ + dst = DN_TO_DROP; + } else { + pkt = dn_tag_get(m); + dst = pkt->dn_dir; + } + switch (dst) { + case DN_TO_IP_OUT: + ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); + break ; + case DN_TO_IP_IN : + ip = mtod(m, struct ip *); +#ifndef __linux__ /* restore net format for FreeBSD */ + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); +#endif + netisr_dispatch(NETISR_IP, m); + break; +#ifdef INET6 + case DN_TO_IP6_IN: + netisr_dispatch(NETISR_IPV6, m); + break; + + case DN_TO_IP6_OUT: + ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); + break; +#endif + case DN_TO_IFB_FWD: + if (bridge_dn_p != NULL) + ((*bridge_dn_p)(m, pkt->ifp)); + else + printf("dummynet: if_bridge not loaded\n"); + + break; + case DN_TO_ETH_DEMUX: + /* + * The Ethernet code assumes the Ethernet header is + * contiguous in the first mbuf header. + * Insure this is true. + */ + if (m->m_len < ETHER_HDR_LEN && + (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { + printf("dummynet/ether: pullup failed, " + "dropping packet\n"); + break; + } + ether_demux(m->m_pkthdr.rcvif, m); + break; + case DN_TO_ETH_OUT: + ether_output_frame(pkt->ifp, m); + break; + + case DN_TO_DROP: + /* drop the packet after some time */ +#ifdef __linux__ + netisr_dispatch(-1, m); /* -1 drop the packet */ +#else + m_freem(m); +#endif + printf("need to drop the skbuf\n"); + break; + + default: + printf("dummynet: bad switch %d!\n", pkt->dn_dir); + m_freem(m); + break; + } + } +} + +/* + * Unconditionally expire empty queues in case of shortage. + * Returns the number of queues freed. + */ +static int +expire_queues(struct dn_flow_set *fs) +{ + struct dn_flow_queue *q, *prev ; + int i, initial_elements = fs->rq_elements ; + + if (fs->last_expired == time_uptime) + return 0 ; + fs->last_expired = time_uptime ; + for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ + for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) + if (q->head != NULL || q->S != q->F+1) { + prev = q ; + q = q->next ; + } else { /* entry is idle, expire it */ + struct dn_flow_queue *old_q = q ; + + if (prev != NULL) + prev->next = q = q->next ; + else + fs->rq[i] = q = q->next ; + fs->rq_elements-- ; + free(old_q, M_DUMMYNET); + } + return initial_elements - fs->rq_elements ; +} + +/* + * If room, create a new queue and put at head of slot i; + * otherwise, create or use the default queue. + */ +static struct dn_flow_queue * +create_queue(struct dn_flow_set *fs, int i) +{ + struct dn_flow_queue *q; + + if (fs->rq_elements > fs->rq_size * dn_max_ratio && + expire_queues(fs) == 0) { + /* No way to get room, use or create overflow queue. */ + i = fs->rq_size; + if (fs->rq[i] != NULL) + return fs->rq[i]; + } + q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); + if (q == NULL) { + printf("dummynet: sorry, cannot allocate queue for new flow\n"); + return (NULL); + } + q->fs = fs; + q->hash_slot = i; + q->next = fs->rq[i]; + q->S = q->F + 1; /* hack - mark timestamp as invalid. */ + q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + fs->rq[i] = q; + fs->rq_elements++; + return (q); +} + +/* + * Given a flow_set and a pkt in last_pkt, find a matching queue + * after appropriate masking. The queue is moved to front + * so that further searches take less time. + */ +static struct dn_flow_queue * +find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) +{ + int i = 0 ; /* we need i and q for new allocations */ + struct dn_flow_queue *q, *prev; + int is_v6 = IS_IP6_FLOW_ID(id); + + if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) + q = fs->rq[0] ; + else { + /* first, do the masking, then hash */ + id->dst_port &= fs->flow_mask.dst_port ; + id->src_port &= fs->flow_mask.src_port ; + id->proto &= fs->flow_mask.proto ; + id->flags = 0 ; /* we don't care about this one */ + if (is_v6) { + APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6); + APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6); + id->flow_id6 &= fs->flow_mask.flow_id6; + + i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^ + + ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^ + ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^ + + ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^ + + ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^ + ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^ + + (id->dst_port << 1) ^ (id->src_port) ^ + (id->proto ) ^ + (id->flow_id6); + } else { + id->dst_ip &= fs->flow_mask.dst_ip ; + id->src_ip &= fs->flow_mask.src_ip ; + + i = ( (id->dst_ip) & 0xffff ) ^ + ( (id->dst_ip >> 15) & 0xffff ) ^ + ( (id->src_ip << 1) & 0xffff ) ^ + ( (id->src_ip >> 16 ) & 0xffff ) ^ + (id->dst_port << 1) ^ (id->src_port) ^ + (id->proto ); + } + i = i % fs->rq_size ; + /* finally, scan the current list for a match */ + searches++ ; + for (prev=NULL, q = fs->rq[i] ; q ; ) { + search_steps++; + if (is_v6 && + IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) && + IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) && + id->dst_port == q->id.dst_port && + id->src_port == q->id.src_port && + id->proto == q->id.proto && + id->flags == q->id.flags && + id->flow_id6 == q->id.flow_id6) + break ; /* found */ + + if (!is_v6 && id->dst_ip == q->id.dst_ip && + id->src_ip == q->id.src_ip && + id->dst_port == q->id.dst_port && + id->src_port == q->id.src_port && + id->proto == q->id.proto && + id->flags == q->id.flags) + break ; /* found */ + + /* No match. Check if we can expire the entry */ + if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { + /* entry is idle and not in any heap, expire it */ + struct dn_flow_queue *old_q = q ; + + if (prev != NULL) + prev->next = q = q->next ; + else + fs->rq[i] = q = q->next ; + fs->rq_elements-- ; + free(old_q, M_DUMMYNET); + continue ; + } + prev = q ; + q = q->next ; + } + if (q && prev != NULL) { /* found and not in front */ + prev->next = q->next ; + q->next = fs->rq[i] ; + fs->rq[i] = q ; + } + } + if (q == NULL) { /* no match, need to allocate a new entry */ + q = create_queue(fs, i); + if (q != NULL) + q->id = *id ; + } + return q ; +} + +static int +red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) +{ + /* + * RED algorithm + * + * RED calculates the average queue size (avg) using a low-pass filter + * with an exponential weighted (w_q) moving average: + * avg <- (1-w_q) * avg + w_q * q_size + * where q_size is the queue length (measured in bytes or * packets). + * + * If q_size == 0, we compute the idle time for the link, and set + * avg = (1 - w_q)^(idle/s) + * where s is the time needed for transmitting a medium-sized packet. + * + * Now, if avg < min_th the packet is enqueued. + * If avg > max_th the packet is dropped. Otherwise, the packet is + * dropped with probability P function of avg. + */ + + int64_t p_b = 0; + + /* Queue in bytes or packets? */ + u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? + q->len_bytes : q->len; + + DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size)); + + /* Average queue size estimation. */ + if (q_size != 0) { + /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */ + int diff = SCALE(q_size) - q->avg; + int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q); + + q->avg += (int)v; + } else { + /* + * Queue is empty, find for how long the queue has been + * empty and use a lookup table for computing + * (1 - * w_q)^(idle_time/s) where s is the time to send a + * (small) packet. + * XXX check wraps... + */ + if (q->avg) { + u_int t = div64(curr_time - q->q_time, + fs->lookup_step); + + q->avg = (t >= 0 && t < fs->lookup_depth) ? + SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; + } + } + DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg))); + + /* Should i drop? */ + if (q->avg < fs->min_th) { + q->count = -1; + return (0); /* accept packet */ + } + if (q->avg >= fs->max_th) { /* average queue >= max threshold */ + if (fs->flags_fs & DN_IS_GENTLE_RED) { + /* + * According to Gentle-RED, if avg is greater than + * max_th the packet is dropped with a probability + * p_b = c_3 * avg - c_4 + * where c_3 = (1 - max_p) / max_th + * c_4 = 1 - 2 * max_p + */ + p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) - + fs->c_4; + } else { + q->count = -1; + DPRINTF(("dummynet: - drop")); + return (1); + } + } else if (q->avg > fs->min_th) { + /* + * We compute p_b using the linear dropping function + * p_b = c_1 * avg - c_2 + * where c_1 = max_p / (max_th - min_th) + * c_2 = max_p * min_th / (max_th - min_th) + */ + p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2; + } + + if (fs->flags_fs & DN_QSIZE_IS_BYTES) + p_b = div64(p_b * len, fs->max_pkt_size); + if (++q->count == 0) + q->random = random() & 0xffff; + else { + /* + * q->count counts packets arrived since last drop, so a greater + * value of q->count means a greater packet drop probability. + */ + if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) { + q->count = 0; + DPRINTF(("dummynet: - red drop")); + /* After a drop we calculate a new random value. */ + q->random = random() & 0xffff; + return (1); /* drop */ + } + } + /* End of RED algorithm. */ + + return (0); /* accept */ +} + +static __inline struct dn_flow_set * +locate_flowset(int fs_nr) +{ + struct dn_flow_set *fs; + + SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next) + if (fs->fs_nr == fs_nr) + return (fs); + + return (NULL); +} + +static __inline struct dn_pipe * +locate_pipe(int pipe_nr) +{ + struct dn_pipe *pipe; + + SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next) + if (pipe->pipe_nr == pipe_nr) + return (pipe); + + return (NULL); +} + +/* + * dummynet hook for packets. Below 'pipe' is a pipe or a queue + * depending on whether WF2Q or fixed bw is used. + * + * pipe_nr pipe or queue the packet is destined for. + * dir where shall we send the packet after dummynet. + * m the mbuf with the packet + * ifp the 'ifp' parameter from the caller. + * NULL in ip_input, destination interface in ip_output, + * rule matching rule, in case of multiple passes + */ +static int +dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) +{ + struct mbuf *m = *m0, *head = NULL, *tail = NULL; + struct dn_pkt_tag *pkt; + struct m_tag *mtag; + struct dn_flow_set *fs = NULL; + struct dn_pipe *pipe; + uint64_t len = m->m_pkthdr.len; + struct dn_flow_queue *q = NULL; + int is_pipe; + ipfw_insn *cmd = ACTION_PTR(fwa->rule); + + KASSERT(m->m_nextpkt == NULL, + ("dummynet_io: mbuf queue passed to dummynet")); + + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + if (cmd->opcode == O_ALTQ) + cmd += F_LEN(cmd); + if (cmd->opcode == O_TAG) + cmd += F_LEN(cmd); + is_pipe = (cmd->opcode == O_PIPE); + + DUMMYNET_LOCK(); + io_pkt++; + /* + * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. + * + * XXXGL: probably the pipe->fs and fs->pipe logic here + * below can be simplified. + */ + if (is_pipe) { + pipe = locate_pipe(fwa->cookie); + if (pipe != NULL) + fs = &(pipe->fs); + } else + fs = locate_flowset(fwa->cookie); + + if (fs == NULL) + goto dropit; /* This queue/pipe does not exist! */ + pipe = fs->pipe; + if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ + pipe = locate_pipe(fs->parent_nr); + if (pipe != NULL) + fs->pipe = pipe; + else { + printf("dummynet: no pipe %d for queue %d, drop pkt\n", + fs->parent_nr, fs->fs_nr); + goto dropit; + } + } + q = find_queue(fs, &(fwa->f_id)); + if (q == NULL) + goto dropit; /* Cannot allocate queue. */ + + /* Update statistics, then check reasons to drop pkt. */ + q->tot_bytes += len; + q->tot_pkts++; + if (fs->plr && random() < fs->plr) + goto dropit; /* Random pkt drop. */ + if (fs->flags_fs & DN_QSIZE_IS_BYTES) { + if (q->len_bytes > fs->qsize) + goto dropit; /* Queue size overflow. */ + } else { + if (q->len >= fs->qsize) + goto dropit; /* Queue count overflow. */ + } + if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len)) + goto dropit; + + /* XXX expensive to zero, see if we can remove it. */ + mtag = m_tag_get(PACKET_TAG_DUMMYNET, + sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO); + if (mtag == NULL) + goto dropit; /* Cannot allocate packet header. */ + m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ + + pkt = (struct dn_pkt_tag *)(mtag + 1); + /* + * Ok, i can handle the pkt now... + * Build and enqueue packet + parameters. + */ + pkt->rule = fwa->rule; + pkt->dn_dir = dir; + + pkt->ifp = fwa->oif; + + if (q->head == NULL) + q->head = m; + else + q->tail->m_nextpkt = m; + q->tail = m; + q->len++; + q->len_bytes += len; + + if (q->head != m) /* Flow was not idle, we are done. */ + goto done; + + if (q->q_time < (uint32_t)curr_time) + q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + q->q_time = curr_time; + + /* + * If we reach this point the flow was previously idle, so we need + * to schedule it. This involves different actions for fixed-rate or + * WF2Q queues. + */ + if (is_pipe) { + /* Fixed-rate queue: just insert into the ready_heap. */ + dn_key t = 0; + + if (pipe->bandwidth) { + q->extra_bits = compute_extra_bits(m, pipe); + t = set_ticks(m, q, pipe); + } + q->sched_time = curr_time; + if (t == 0) /* Must process it now. */ + ready_event(q, &head, &tail); + else + heap_insert(&ready_heap, curr_time + t , q); + } else { + /* + * WF2Q. First, compute start time S: if the flow was + * idle (S = F + 1) set S to the virtual time V for the + * controlling pipe, and update the sum of weights for the pipe; + * otherwise, remove flow from idle_heap and set S to max(F,V). + * Second, compute finish time F = S + len / weight. + * Third, if pipe was idle, update V = max(S, V). + * Fourth, count one more backlogged flow. + */ + if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */ + q->S = pipe->V; + pipe->sum += fs->weight; /* Add weight of new queue. */ + } else { + heap_extract(&(pipe->idle_heap), q); + q->S = MAX64(q->F, pipe->V); + } + q->F = div64(q->S + (len << MY_M), fs->weight); + + if (pipe->not_eligible_heap.elements == 0 && + pipe->scheduler_heap.elements == 0) + pipe->V = MAX64(q->S, pipe->V); + fs->backlogged++; + /* + * Look at eligibility. A flow is not eligibile if S>V (when + * this happens, it means that there is some other flow already + * scheduled for the same pipe, so the scheduler_heap cannot be + * empty). If the flow is not eligible we just store it in the + * not_eligible_heap. Otherwise, we store in the scheduler_heap + * and possibly invoke ready_event_wfq() right now if there is + * leftover credit. + * Note that for all flows in scheduler_heap (SCH), S_i <= V, + * and for all flows in not_eligible_heap (NEH), S_i > V. + * So when we need to compute max(V, min(S_i)) forall i in + * SCH+NEH, we only need to look into NEH. + */ + if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */ + if (pipe->scheduler_heap.elements == 0) + printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); + heap_insert(&(pipe->not_eligible_heap), q->S, q); + } else { + heap_insert(&(pipe->scheduler_heap), q->F, q); + if (pipe->numbytes >= 0) { /* Pipe is idle. */ + if (pipe->scheduler_heap.elements != 1) + printf("dummynet: OUCH! pipe should have been idle!\n"); + DPRINTF(("dummynet: waking up pipe %d at %d\n", + pipe->pipe_nr, (int)(q->F >> MY_M))); + pipe->sched_time = curr_time; + ready_event_wfq(pipe, &head, &tail); + } + } + } +done: + if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX && + dir != DN_TO_ETH_OUT) { /* Fast io. */ + io_pkt_fast++; + if (m->m_nextpkt != NULL) + printf("dummynet: fast io: pkt chain detected!\n"); + head = m->m_nextpkt = NULL; + } else + *m0 = NULL; /* Normal io. */ + + DUMMYNET_UNLOCK(); + if (head != NULL) + dummynet_send(head); + return (0); + +dropit: + io_pkt_drop++; + if (q) + q->drops++; + DUMMYNET_UNLOCK(); + /* + * set the tag, if present. dn_tag_get cannot fail + * so we need to check first + */ + if (m_tag_first(m)) { + pkt = dn_tag_get(m); + pkt->dn_dir = DN_TO_DROP; + } + dummynet_send(m); /* drop the packet */ + *m0 = NULL; + return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); +} + +/* + * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) + * Doing this would probably save us the initial bzero of dn_pkt + */ +#define DN_FREE_PKT(_m) do { \ + m_freem(_m); \ +} while (0) + +/* + * Dispose all packets and flow_queues on a flow_set. + * If all=1, also remove red lookup table and other storage, + * including the descriptor itself. + * For the one in dn_pipe MUST also cleanup ready_heap... + */ +static void +purge_flow_set(struct dn_flow_set *fs, int all) +{ + struct dn_flow_queue *q, *qn; + int i; + + DUMMYNET_LOCK_ASSERT(); + + for (i = 0; i <= fs->rq_size; i++) { + for (q = fs->rq[i]; q != NULL; q = qn) { + struct mbuf *m, *mnext; + + mnext = q->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } + qn = q->next; + free(q, M_DUMMYNET); + } + fs->rq[i] = NULL; + } + + fs->rq_elements = 0; + if (all) { + /* RED - free lookup table. */ + if (fs->w_q_lookup != NULL) + free(fs->w_q_lookup, M_DUMMYNET); + if (fs->rq != NULL) + free(fs->rq, M_DUMMYNET); + /* If this fs is not part of a pipe, free it. */ + if (fs->pipe == NULL || fs != &(fs->pipe->fs)) + free(fs, M_DUMMYNET); + } +} + +/* + * Dispose all packets queued on a pipe (not a flow_set). + * Also free all resources associated to a pipe, which is about + * to be deleted. + */ +static void +purge_pipe(struct dn_pipe *pipe) +{ + struct mbuf *m, *mnext; + + purge_flow_set( &(pipe->fs), 1 ); + + mnext = pipe->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } + + heap_free( &(pipe->scheduler_heap) ); + heap_free( &(pipe->not_eligible_heap) ); + heap_free( &(pipe->idle_heap) ); +} + +/* + * Delete all pipes and heaps returning memory. Must also + * remove references from all ipfw rules to all pipes. + */ +static void +dummynet_flush(void) +{ + struct dn_pipe *pipe, *pipe1; + struct dn_flow_set *fs, *fs1; + int i; + + DUMMYNET_LOCK(); + /* Free heaps so we don't have unwanted events. */ + heap_free(&ready_heap); + heap_free(&wfq_ready_heap); + heap_free(&extract_heap); + + /* + * Now purge all queued pkts and delete all pipes. + * + * XXXGL: can we merge the for(;;) cycles into one or not? + */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) { + SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next); + purge_flow_set(fs, 1); + } + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) { + SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next); + purge_pipe(pipe); + free_pipe(pipe); + } + DUMMYNET_UNLOCK(); +} + +extern struct ip_fw *ip_fw_default_rule; +static void +dn_rule_delete_fs(struct dn_flow_set *fs, void *r) +{ + int i ; + struct dn_flow_queue *q ; + struct mbuf *m ; + + for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ + for (q = fs->rq[i] ; q ; q = q->next ) + for (m = q->head ; m ; m = m->m_nextpkt ) { + struct dn_pkt_tag *pkt = dn_tag_get(m) ; + if (pkt->rule == r) + pkt->rule = ip_fw_default_rule ; + } +} + +/* + * When a firewall rule is deleted, scan all queues and remove the pointer + * to the rule from matching packets, making them point to the default rule. + * The pointer is used to reinject packets in case one_pass = 0. + */ +void +dn_rule_delete(void *r) +{ + struct dn_pipe *pipe; + struct dn_flow_set *fs; + struct dn_pkt_tag *pkt; + struct mbuf *m; + int i; + + DUMMYNET_LOCK(); + /* + * If the rule references a queue (dn_flow_set), then scan + * the flow set, otherwise scan pipes. Should do either, but doing + * both does not harm. + */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) + dn_rule_delete_fs(fs, r); + + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(pipe, &pipehash[i], next) { + fs = &(pipe->fs); + dn_rule_delete_fs(fs, r); + for (m = pipe->head ; m ; m = m->m_nextpkt ) { + pkt = dn_tag_get(m); + if (pkt->rule == r) + pkt->rule = ip_fw_default_rule; + } + } + DUMMYNET_UNLOCK(); +} + +/* + * setup RED parameters + */ +static int +config_red(struct dn_flow_set *p, struct dn_flow_set *x) +{ + int i; + + x->w_q = p->w_q; + x->min_th = SCALE(p->min_th); + x->max_th = SCALE(p->max_th); + x->max_p = p->max_p; + + x->c_1 = p->max_p / (p->max_th - p->min_th); + x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); + + if (x->flags_fs & DN_IS_GENTLE_RED) { + x->c_3 = (SCALE(1) - p->max_p) / p->max_th; + x->c_4 = SCALE(1) - 2 * p->max_p; + } + + /* If the lookup table already exist, free and create it again. */ + if (x->w_q_lookup) { + free(x->w_q_lookup, M_DUMMYNET); + x->w_q_lookup = NULL; + } + if (red_lookup_depth == 0) { + printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" + "must be > 0\n"); + free(x, M_DUMMYNET); + return (EINVAL); + } + x->lookup_depth = red_lookup_depth; + x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int), + M_DUMMYNET, M_NOWAIT); + if (x->w_q_lookup == NULL) { + printf("dummynet: sorry, cannot allocate red lookup table\n"); + free(x, M_DUMMYNET); + return(ENOSPC); + } + + /* Fill the lookup table with (1 - w_q)^x */ + x->lookup_step = p->lookup_step; + x->lookup_weight = p->lookup_weight; + x->w_q_lookup[0] = SCALE(1) - x->w_q; + + for (i = 1; i < x->lookup_depth; i++) + x->w_q_lookup[i] = + SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); + + if (red_avg_pkt_size < 1) + red_avg_pkt_size = 512; + x->avg_pkt_size = red_avg_pkt_size; + if (red_max_pkt_size < 1) + red_max_pkt_size = 1500; + x->max_pkt_size = red_max_pkt_size; + return (0); +} + +static int +alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) +{ + if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ + int l = pfs->rq_size; + + if (l == 0) + l = dn_hash_size; + if (l < 4) + l = 4; + else if (l > DN_MAX_HASH_SIZE) + l = DN_MAX_HASH_SIZE; + x->rq_size = l; + } else /* one is enough for null mask */ + x->rq_size = 1; + x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *), + M_DUMMYNET, M_NOWAIT | M_ZERO); + if (x->rq == NULL) { + printf("dummynet: sorry, cannot allocate queue\n"); + return (ENOMEM); + } + x->rq_elements = 0; + return 0 ; +} + +static void +set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) +{ + x->flags_fs = src->flags_fs; + x->qsize = src->qsize; + x->plr = src->plr; + x->flow_mask = src->flow_mask; + if (x->flags_fs & DN_QSIZE_IS_BYTES) { + if (x->qsize > pipe_byte_limit) + x->qsize = 1024 * 1024; + } else { + if (x->qsize == 0) + x->qsize = 50; + if (x->qsize > pipe_slot_limit) + x->qsize = 50; + } + /* Configuring RED. */ + if (x->flags_fs & DN_IS_RED) + config_red(src, x); /* XXX should check errors */ +} + +/* + * Setup pipe or queue parameters. + */ +static int +config_pipe(struct dn_pipe *p) +{ + struct dn_flow_set *pfs = &(p->fs); + struct dn_flow_queue *q; + int i, error; + + /* + * The config program passes parameters as follows: + * bw = bits/second (0 means no limits), + * delay = ms, must be translated into ticks. + * qsize = slots/bytes + */ + p->delay = (p->delay * hz) / 1000; + /* We need either a pipe number or a flow_set number. */ + if (p->pipe_nr == 0 && pfs->fs_nr == 0) + return (EINVAL); + if (p->pipe_nr != 0 && pfs->fs_nr != 0) + return (EINVAL); + if (p->pipe_nr != 0) { /* this is a pipe */ + struct dn_pipe *pipe; + + DUMMYNET_LOCK(); + pipe = locate_pipe(p->pipe_nr); /* locate pipe */ + + if (pipe == NULL) { /* new pipe */ + pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET, + M_NOWAIT | M_ZERO); + if (pipe == NULL) { + DUMMYNET_UNLOCK(); + printf("dummynet: no memory for new pipe\n"); + return (ENOMEM); + } + pipe->pipe_nr = p->pipe_nr; + pipe->fs.pipe = pipe; + /* + * idle_heap is the only one from which + * we extract from the middle. + */ + pipe->idle_heap.size = pipe->idle_heap.elements = 0; + pipe->idle_heap.offset = + offsetof(struct dn_flow_queue, heap_pos); + } else + /* Flush accumulated credit for all queues. */ + for (i = 0; i <= pipe->fs.rq_size; i++) + for (q = pipe->fs.rq[i]; q; q = q->next) + q->numbytes = io_fast ? p->bandwidth : 0; + + pipe->bandwidth = p->bandwidth; + pipe->numbytes = 0; /* just in case... */ + bcopy(p->if_name, pipe->if_name, sizeof(p->if_name)); + pipe->ifp = NULL; /* reset interface ptr */ + pipe->delay = p->delay; + set_fs_parms(&(pipe->fs), pfs); + + /* Handle changes in the delay profile. */ + if (p->samples_no > 0) { + if (pipe->samples_no != p->samples_no) { + if (pipe->samples != NULL) + free(pipe->samples, M_DUMMYNET); + pipe->samples = + malloc(p->samples_no*sizeof(dn_key), + M_DUMMYNET, M_NOWAIT | M_ZERO); + if (pipe->samples == NULL) { + DUMMYNET_UNLOCK(); + printf("dummynet: no memory " + "for new samples\n"); + return (ENOMEM); + } + pipe->samples_no = p->samples_no; + } + + strncpy(pipe->name,p->name,sizeof(pipe->name)); + pipe->loss_level = p->loss_level; + for (i = 0; isamples_no; ++i) + pipe->samples[i] = p->samples[i]; + } else if (pipe->samples != NULL) { + free(pipe->samples, M_DUMMYNET); + pipe->samples = NULL; + pipe->samples_no = 0; + } + + if (pipe->fs.rq == NULL) { /* a new pipe */ + error = alloc_hash(&(pipe->fs), pfs); + if (error) { + DUMMYNET_UNLOCK(); + free_pipe(pipe); + return (error); + } + SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)], + pipe, next); + } + DUMMYNET_UNLOCK(); + } else { /* config queue */ + struct dn_flow_set *fs; + + DUMMYNET_LOCK(); + fs = locate_flowset(pfs->fs_nr); /* locate flow_set */ + + if (fs == NULL) { /* new */ + if (pfs->parent_nr == 0) { /* need link to a pipe */ + DUMMYNET_UNLOCK(); + return (EINVAL); + } + fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET, + M_NOWAIT | M_ZERO); + if (fs == NULL) { + DUMMYNET_UNLOCK(); + printf( + "dummynet: no memory for new flow_set\n"); + return (ENOMEM); + } + fs->fs_nr = pfs->fs_nr; + fs->parent_nr = pfs->parent_nr; + fs->weight = pfs->weight; + if (fs->weight == 0) + fs->weight = 1; + else if (fs->weight > 100) + fs->weight = 100; + } else { + /* + * Change parent pipe not allowed; + * must delete and recreate. + */ + if (pfs->parent_nr != 0 && + fs->parent_nr != pfs->parent_nr) { + DUMMYNET_UNLOCK(); + return (EINVAL); + } + } + + set_fs_parms(fs, pfs); + + if (fs->rq == NULL) { /* a new flow_set */ + error = alloc_hash(fs, pfs); + if (error) { + DUMMYNET_UNLOCK(); + free(fs, M_DUMMYNET); + return (error); + } + SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)], + fs, next); + } + DUMMYNET_UNLOCK(); + } + return (0); +} + +/* + * Helper function to remove from a heap queues which are linked to + * a flow_set about to be deleted. + */ +static void +fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) +{ + int i = 0, found = 0 ; + for (; i < h->elements ;) + if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { + h->elements-- ; + h->p[i] = h->p[h->elements] ; + found++ ; + } else + i++ ; + if (found) + heapify(h); +} + +/* + * helper function to remove a pipe from a heap (can be there at most once) + */ +static void +pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) +{ + if (h->elements > 0) { + int i = 0 ; + for (i=0; i < h->elements ; i++ ) { + if (h->p[i].object == p) { /* found it */ + h->elements-- ; + h->p[i] = h->p[h->elements] ; + heapify(h); + break ; + } + } + } +} + +/* + * drain all queues. Called in case of severe mbuf shortage. + */ +void +dummynet_drain(void) +{ + struct dn_flow_set *fs; + struct dn_pipe *pipe; + struct mbuf *m, *mnext; + int i; + + DUMMYNET_LOCK_ASSERT(); + + heap_free(&ready_heap); + heap_free(&wfq_ready_heap); + heap_free(&extract_heap); + /* remove all references to this pipe from flow_sets */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) + purge_flow_set(fs, 0); + + for (i = 0; i < HASHSIZE; i++) { + SLIST_FOREACH(pipe, &pipehash[i], next) { + purge_flow_set(&(pipe->fs), 0); + + mnext = pipe->head; + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + DN_FREE_PKT(m); + } + pipe->head = pipe->tail = NULL; + } + } +} + +/* + * Fully delete a pipe or a queue, cleaning up associated info. + */ +static int +delete_pipe(struct dn_pipe *p) +{ + + if (p->pipe_nr == 0 && p->fs.fs_nr == 0) + return EINVAL ; + if (p->pipe_nr != 0 && p->fs.fs_nr != 0) + return EINVAL ; + if (p->pipe_nr != 0) { /* this is an old-style pipe */ + struct dn_pipe *pipe; + struct dn_flow_set *fs; + int i; + + DUMMYNET_LOCK(); + pipe = locate_pipe(p->pipe_nr); /* locate pipe */ + + if (pipe == NULL) { + DUMMYNET_UNLOCK(); + return (ENOENT); /* not found */ + } + + /* Unlink from list of pipes. */ + SLIST_REMOVE(&pipehash[HASH(pipe->pipe_nr)], pipe, dn_pipe, next); + + /* Remove all references to this pipe from flow_sets. */ + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) + if (fs->pipe == pipe) { + printf("dummynet: ++ ref to pipe %d from fs %d\n", + p->pipe_nr, fs->fs_nr); + fs->pipe = NULL ; + purge_flow_set(fs, 0); + } + fs_remove_from_heap(&ready_heap, &(pipe->fs)); + purge_pipe(pipe); /* remove all data associated to this pipe */ + /* remove reference to here from extract_heap and wfq_ready_heap */ + pipe_remove_from_heap(&extract_heap, pipe); + pipe_remove_from_heap(&wfq_ready_heap, pipe); + DUMMYNET_UNLOCK(); + + free_pipe(pipe); + } else { /* this is a WF2Q queue (dn_flow_set) */ + struct dn_flow_set *fs; + + DUMMYNET_LOCK(); + fs = locate_flowset(p->fs.fs_nr); /* locate set */ + + if (fs == NULL) { + DUMMYNET_UNLOCK(); + return (ENOENT); /* not found */ + } + + /* Unlink from list of flowsets. */ + SLIST_REMOVE( &flowsethash[HASH(fs->fs_nr)], fs, dn_flow_set, next); + + if (fs->pipe != NULL) { + /* Update total weight on parent pipe and cleanup parent heaps. */ + fs->pipe->sum -= fs->weight * fs->backlogged ; + fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs); + fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs); +#if 1 /* XXX should i remove from idle_heap as well ? */ + fs_remove_from_heap(&(fs->pipe->idle_heap), fs); +#endif + } + purge_flow_set(fs, 1); + DUMMYNET_UNLOCK(); + } + return 0 ; +} + +/* + * helper function used to copy data from kernel in DUMMYNET_GET + */ +static char * +dn_copy_set(struct dn_flow_set *set, char *bp) +{ + int i, copied = 0 ; + struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; + + DUMMYNET_LOCK_ASSERT(); + + for (i = 0 ; i <= set->rq_size ; i++) + for (q = set->rq[i] ; q ; q = q->next, qp++ ) { + if (q->hash_slot != i) + printf("dummynet: ++ at %d: wrong slot (have %d, " + "should be %d)\n", copied, q->hash_slot, i); + if (q->fs != set) + printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", + i, q->fs, set); + copied++ ; + bcopy(q, qp, sizeof( *q ) ); + /* cleanup pointers */ + qp->next = NULL ; + qp->head = qp->tail = NULL ; + qp->fs = NULL ; + } + if (copied != set->rq_elements) + printf("dummynet: ++ wrong count, have %d should be %d\n", + copied, set->rq_elements); + return (char *)qp ; +} + +static size_t +dn_calc_size(void) +{ + struct dn_flow_set *fs; + struct dn_pipe *pipe; + size_t size = 0; + int i; + + DUMMYNET_LOCK_ASSERT(); + /* + * Compute size of data structures: list of pipes and flow_sets. + */ + for (i = 0; i < HASHSIZE; i++) { + SLIST_FOREACH(pipe, &pipehash[i], next) + size += sizeof(*pipe) + + pipe->fs.rq_elements * sizeof(struct dn_flow_queue); + SLIST_FOREACH(fs, &flowsethash[i], next) + size += sizeof (*fs) + + fs->rq_elements * sizeof(struct dn_flow_queue); + } + return size; +} + +static int +dummynet_get(struct sockopt *sopt) +{ + char *buf, *bp ; /* bp is the "copy-pointer" */ + size_t size ; + struct dn_flow_set *fs; + struct dn_pipe *pipe; + int error=0, i ; + + /* XXX lock held too long */ + DUMMYNET_LOCK(); + /* + * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we + * cannot use this flag while holding a mutex. + */ + for (i = 0; i < 10; i++) { + size = dn_calc_size(); + DUMMYNET_UNLOCK(); + buf = malloc(size, M_TEMP, M_WAITOK); + DUMMYNET_LOCK(); + if (size == dn_calc_size()) + break; + free(buf, M_TEMP); + buf = NULL; + } + if (buf == NULL) { + DUMMYNET_UNLOCK(); + return ENOBUFS ; + } + bp = buf; + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(pipe, &pipehash[i], next) { + struct dn_pipe *pipe_bp = (struct dn_pipe *)bp; + + /* + * Copy pipe descriptor into *bp, convert delay back to ms, + * then copy the flow_set descriptor(s) one at a time. + * After each flow_set, copy the queue descriptor it owns. + */ + bcopy(pipe, bp, sizeof(*pipe)); + pipe_bp->delay = (pipe_bp->delay * 1000) / hz; + /* + * XXX the following is a hack based on ->next being the + * first field in dn_pipe and dn_flow_set. The correct + * solution would be to move the dn_flow_set to the beginning + * of struct dn_pipe. + */ + pipe_bp->next.sle_next = (struct dn_pipe *)DN_IS_PIPE; + /* Clean pointers. */ + pipe_bp->head = pipe_bp->tail = NULL; + pipe_bp->fs.next.sle_next = NULL; + pipe_bp->fs.pipe = NULL; + pipe_bp->fs.rq = NULL; + pipe_bp->samples = NULL; + + bp += sizeof(*pipe) ; + bp = dn_copy_set(&(pipe->fs), bp); + } + + for (i = 0; i < HASHSIZE; i++) + SLIST_FOREACH(fs, &flowsethash[i], next) { + struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp; + + bcopy(fs, bp, sizeof(*fs)); + /* XXX same hack as above */ + fs_bp->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE; + fs_bp->pipe = NULL; + fs_bp->rq = NULL; + bp += sizeof(*fs); + bp = dn_copy_set(fs, bp); + } + + DUMMYNET_UNLOCK(); + + error = sooptcopyout(sopt, buf, size); + free(buf, M_TEMP); + return error ; +} + +/* + * Handler for the various dummynet socket options (get, flush, config, del) + */ +static int +ip_dn_ctl(struct sockopt *sopt) +{ + int error; + struct dn_pipe *p = NULL; + + error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); + if (error) + return (error); + + /* Disallow sets in really-really secure mode. */ + if (sopt->sopt_dir == SOPT_SET) { +#if __FreeBSD_version >= 500034 + error = securelevel_ge(sopt->sopt_td->td_ucred, 3); + if (error) + return (error); +#else + if (securelevel >= 3) + return (EPERM); +#endif + } + + switch (sopt->sopt_name) { + default : + printf("dummynet: -- unknown option %d", sopt->sopt_name); + error = EINVAL ; + break ; + + case IP_DUMMYNET_GET : + error = dummynet_get(sopt); + break ; + + case IP_DUMMYNET_FLUSH : + dummynet_flush() ; + break ; + + case IP_DUMMYNET_CONFIGURE : + p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK); + error = sooptcopyin(sopt, p, sizeof(struct dn_pipe_max), sizeof *p); + if (error) + break ; + if (p->samples_no > 0) + p->samples = &( ((struct dn_pipe_max*) p)->samples[0] ); + + error = config_pipe(p); + break ; + + case IP_DUMMYNET_DEL : /* remove a pipe or queue */ + p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK); + error = sooptcopyin(sopt, p, sizeof *p, sizeof *p); + if (error) + break ; + + error = delete_pipe(p); + break ; + } + + if (p != NULL) + free(p, M_TEMP); + + return error ; +} + +static void +ip_dn_init(void) +{ + int i; + + if (bootverbose) + printf("DUMMYNET with IPv6 initialized (040826)\n"); + + DUMMYNET_LOCK_INIT(); + + for (i = 0; i < HASHSIZE; i++) { + SLIST_INIT(&pipehash[i]); + SLIST_INIT(&flowsethash[i]); + } + ready_heap.size = ready_heap.elements = 0; + ready_heap.offset = 0; + + wfq_ready_heap.size = wfq_ready_heap.elements = 0; + wfq_ready_heap.offset = 0; + + extract_heap.size = extract_heap.elements = 0; + extract_heap.offset = 0; + + ip_dn_ctl_ptr = ip_dn_ctl; + ip_dn_io_ptr = dummynet_io; + ip_dn_ruledel_ptr = dn_rule_delete; + + TASK_INIT(&dn_task, 0, dummynet_task, NULL); + dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, + taskqueue_thread_enqueue, &dn_tq); + taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); + + callout_init(&dn_timeout, CALLOUT_MPSAFE); + callout_reset(&dn_timeout, 1, dummynet, NULL); + + /* Initialize curr_time adjustment mechanics. */ + getmicrouptime(&prev_t); +} + +#ifdef KLD_MODULE +static void +ip_dn_destroy(void) +{ + ip_dn_ctl_ptr = NULL; + ip_dn_io_ptr = NULL; + ip_dn_ruledel_ptr = NULL; + + DUMMYNET_LOCK(); + callout_stop(&dn_timeout); + DUMMYNET_UNLOCK(); + taskqueue_drain(dn_tq, &dn_task); + taskqueue_free(dn_tq); + + dummynet_flush(); + + DUMMYNET_LOCK_DESTROY(); +} +#endif /* KLD_MODULE */ + +static int +dummynet_modevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + if (ip_dn_io_ptr) { + printf("DUMMYNET already loaded\n"); + return EEXIST ; + } + ip_dn_init(); + break; + + case MOD_UNLOAD: +#if !defined(KLD_MODULE) + printf("dummynet statically compiled, cannot unload\n"); + return EINVAL ; +#else + ip_dn_destroy(); +#endif + break ; + default: + return EOPNOTSUPP; + break ; + } + return 0 ; +} + +static moduledata_t dummynet_mod = { + "dummynet", + dummynet_modevent, + NULL +}; +DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); +MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); +MODULE_VERSION(dummynet, 1); diff --git a/dummynet/ip_fw2.c b/dummynet/ip_fw2.c new file mode 100644 index 0000000..bdcfe11 --- /dev/null +++ b/dummynet/ip_fw2.c @@ -0,0 +1,4665 @@ +/*- + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz Exp $"); + +#define DEB(x) +#define DDB(x) x + +/* + * Implement IP packet firewall (new version) + */ + +#if !defined(KLD_MODULE) +#include "opt_ipfw.h" +#include "opt_ipdivert.h" +#include "opt_ipdn.h" +#include "opt_inet.h" +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ +#endif +#include "opt_inet6.h" +#include "opt_ipsec.h" +#include "opt_mac.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* for ETHERTYPE_IP */ +#include +#include +#include +#include + +#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#ifdef INET6 +#include +#endif + +#include /* XXX for in_cksum */ + +#ifdef MAC +#include +#endif + +#include "missing.h" + +/* + * set_disable contains one bit per set value (0..31). + * If the bit is set, all rules with the corresponding set + * are disabled. Set RESVD_SET(31) is reserved for the default rule + * and rules that are not deleted by the flush command, + * and CANNOT be disabled. + * Rules in set RESVD_SET can only be deleted explicitly. + */ +static u_int32_t set_disable; +static int fw_verbose; +static struct callout ipfw_timeout; +static int verbose_limit; + +static uma_zone_t ipfw_dyn_rule_zone; + +/* + * Data structure to cache our ucred related + * information. This structure only gets used if + * the user specified UID/GID based constraints in + * a firewall rule. + */ +struct ip_fw_ugid { + gid_t fw_groups[NGROUPS]; + int fw_ngroups; + uid_t fw_uid; + int fw_prid; +}; + +/* + * list of rules for layer 3 + */ +struct ip_fw_chain layer3_chain; + +MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); +MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); +#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL) +ipfw_nat_t *ipfw_nat_ptr = NULL; +ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; +ipfw_nat_cfg_t *ipfw_nat_del_ptr; +ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; +ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; + +struct table_entry { + struct radix_node rn[2]; + struct sockaddr_in addr, mask; + u_int32_t value; +}; + +static int autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ + +extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); + +#ifdef SYSCTL_NODE +SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); +SYSCTL_PROC(_net_inet_ip_fw, OID_AUTO, enable, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &fw_enable, 0, + ipfw_chg_hook, "I", "Enable ipfw"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, CTLFLAG_RW, + &autoinc_step, 0, "Rule number autincrement step"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, one_pass, + CTLFLAG_RW | CTLFLAG_SECURE3, + &fw_one_pass, 0, + "Only do a single pass through ipfw when using dummynet(4)"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose, + CTLFLAG_RW | CTLFLAG_SECURE3, + &fw_verbose, 0, "Log matches to ipfw rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, + &verbose_limit, 0, "Set upper limit of matches of ipfw rules logged"); +SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, + NULL, IPFW_DEFAULT_RULE, "The default/max possible rule number."); +SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, + NULL, IPFW_TABLES_MAX, "The maximum number of tables."); +#endif /* SYSCTL_NODE */ + +/* + * Description of dynamic rules. + * + * Dynamic rules are stored in lists accessed through a hash table + * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can + * be modified through the sysctl variable dyn_buckets which is + * updated when the table becomes empty. + * + * XXX currently there is only one list, ipfw_dyn. + * + * When a packet is received, its address fields are first masked + * with the mask defined for the rule, then hashed, then matched + * against the entries in the corresponding list. + * Dynamic rules can be used for different purposes: + * + stateful rules; + * + enforcing limits on the number of sessions; + * + in-kernel NAT (not implemented yet) + * + * The lifetime of dynamic rules is regulated by dyn_*_lifetime, + * measured in seconds and depending on the flags. + * + * The total number of dynamic rules is stored in dyn_count. + * The max number of dynamic rules is dyn_max. When we reach + * the maximum number of rules we do not create anymore. This is + * done to avoid consuming too much memory, but also too much + * time when searching on each packet (ideally, we should try instead + * to put a limit on the length of the list on each bucket...). + * + * Each dynamic rule holds a pointer to the parent ipfw rule so + * we know what action to perform. Dynamic rules are removed when + * the parent rule is deleted. XXX we should make them survive. + * + * There are some limitations with dynamic rules -- we do not + * obey the 'randomized match', and we do not do multiple + * passes through the firewall. XXX check the latter!!! + */ +static ipfw_dyn_rule **ipfw_dyn_v = NULL; +static u_int32_t dyn_buckets = 256; /* must be power of 2 */ +static u_int32_t curr_dyn_buckets = 256; /* must be power of 2 */ + +#if defined( __linux__ ) || defined( _WIN32 ) +DEFINE_SPINLOCK(ipfw_dyn_mtx); +#else +static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ +#endif /* !__linux__ */ +#define IPFW_DYN_LOCK_INIT() \ + mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) +#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) +#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) +#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) +#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) + +/* + * Timeouts for various events in handing dynamic rules. + */ +static u_int32_t dyn_ack_lifetime = 300; +static u_int32_t dyn_syn_lifetime = 20; +static u_int32_t dyn_fin_lifetime = 1; +static u_int32_t dyn_rst_lifetime = 1; +static u_int32_t dyn_udp_lifetime = 10; +static u_int32_t dyn_short_lifetime = 5; + +/* + * Keepalives are sent if dyn_keepalive is set. They are sent every + * dyn_keepalive_period seconds, in the last dyn_keepalive_interval + * seconds of lifetime of a rule. + * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower + * than dyn_keepalive_period. + */ + +static u_int32_t dyn_keepalive_interval = 20; +static u_int32_t dyn_keepalive_period = 5; +static u_int32_t dyn_keepalive = 1; /* do send keepalives */ + +static u_int32_t static_count; /* # of static rules */ +static u_int32_t static_len; /* size in bytes of static rules */ +static u_int32_t dyn_count; /* # of dynamic rules */ +static u_int32_t dyn_max = 4096; /* max # of dynamic rules */ + +#ifdef SYSCTL_NODE +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, CTLFLAG_RW, + &dyn_buckets, 0, "Number of dyn. buckets"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, CTLFLAG_RD, + &curr_dyn_buckets, 0, "Current Number of dyn. buckets"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, CTLFLAG_RD, + &dyn_count, 0, "Number of dyn. rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, CTLFLAG_RW, + &dyn_max, 0, "Max number of dyn. rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, static_count, CTLFLAG_RD, + &static_count, 0, "Number of static rules"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, CTLFLAG_RW, + &dyn_ack_lifetime, 0, "Lifetime of dyn. rules for acks"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, CTLFLAG_RW, + &dyn_syn_lifetime, 0, "Lifetime of dyn. rules for syn"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, CTLFLAG_RW, + &dyn_fin_lifetime, 0, "Lifetime of dyn. rules for fin"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, CTLFLAG_RW, + &dyn_rst_lifetime, 0, "Lifetime of dyn. rules for rst"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, CTLFLAG_RW, + &dyn_udp_lifetime, 0, "Lifetime of dyn. rules for UDP"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, CTLFLAG_RW, + &dyn_short_lifetime, 0, "Lifetime of dyn. rules for other situations"); +SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, CTLFLAG_RW, + &dyn_keepalive, 0, "Enable keepalives for dyn. rules"); +#endif /* SYSCTL_NODE */ + +#ifdef INET6 +/* + * IPv6 specific variables + */ +#ifdef SYSCTL_NODE +SYSCTL_DECL(_net_inet6_ip6); +#endif /* SYSCTL_NODE */ + +static struct sysctl_ctx_list ip6_fw_sysctl_ctx; +static struct sysctl_oid *ip6_fw_sysctl_tree; +#endif /* INET6 */ + +static int fw_deny_unknown_exthdrs = 1; + + +/* + * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T + * Other macros just cast void * into the appropriate type + */ +#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) +#define TCP(p) ((struct tcphdr *)(p)) +#define SCTP(p) ((struct sctphdr *)(p)) +#define UDP(p) ((struct udphdr *)(p)) +#define ICMP(p) ((struct icmphdr *)(p)) +#define ICMP6(p) ((struct icmp6_hdr *)(p)) + +static __inline int +icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) +{ + int type = icmp->icmp_type; + + return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<icmp_type; + + return (type <= ICMP_MAXTYPE && (TT & (1<arg1 or cmd->d[0]. + * + * We scan options and store the bits we find set. We succeed if + * + * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear + * + * The code is sometimes optimized not to store additional variables. + */ + +static int +flags_match(ipfw_insn *cmd, u_int8_t bits) +{ + u_char want_clear; + bits = ~bits; + + if ( ((cmd->arg1 & 0xff) & bits) != 0) + return 0; /* some bits we want set were clear */ + want_clear = (cmd->arg1 >> 8) & 0xff; + if ( (want_clear & bits) != want_clear) + return 0; /* some bits we want clear were set */ + return 1; +} + +static int +ipopts_match(struct ip *ip, ipfw_insn *cmd) +{ + int optlen, bits = 0; + u_char *cp = (u_char *)(ip + 1); + int x = (ip->ip_hl << 2) - sizeof (struct ip); + + for (; x > 0; x -= optlen, cp += optlen) { + int opt = cp[IPOPT_OPTVAL]; + + if (opt == IPOPT_EOL) + break; + if (opt == IPOPT_NOP) + optlen = 1; + else { + optlen = cp[IPOPT_OLEN]; + if (optlen <= 0 || optlen > x) + return 0; /* invalid or truncated */ + } + switch (opt) { + + default: + break; + + case IPOPT_LSRR: + bits |= IP_FW_IPOPT_LSRR; + break; + + case IPOPT_SSRR: + bits |= IP_FW_IPOPT_SSRR; + break; + + case IPOPT_RR: + bits |= IP_FW_IPOPT_RR; + break; + + case IPOPT_TS: + bits |= IP_FW_IPOPT_TS; + break; + } + } + return (flags_match(cmd, bits)); +} + +static int +tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) +{ + int optlen, bits = 0; + u_char *cp = (u_char *)(tcp + 1); + int x = (tcp->th_off << 2) - sizeof(struct tcphdr); + + for (; x > 0; x -= optlen, cp += optlen) { + int opt = cp[0]; + if (opt == TCPOPT_EOL) + break; + if (opt == TCPOPT_NOP) + optlen = 1; + else { + optlen = cp[1]; + if (optlen <= 0) + break; + } + + switch (opt) { + + default: + break; + + case TCPOPT_MAXSEG: + bits |= IP_FW_TCPOPT_MSS; + break; + + case TCPOPT_WINDOW: + bits |= IP_FW_TCPOPT_WINDOW; + break; + + case TCPOPT_SACK_PERMITTED: + case TCPOPT_SACK: + bits |= IP_FW_TCPOPT_SACK; + break; + + case TCPOPT_TIMESTAMP: + bits |= IP_FW_TCPOPT_TS; + break; + + } + } + return (flags_match(cmd, bits)); +} + +static int +iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) +{ + if (ifp == NULL) /* no iface with this packet, match fails */ + return 0; + /* Check by name or by IP address */ + if (cmd->name[0] != '\0') { /* match by name */ + /* Check name */ + if (cmd->p.glob) { + if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) + return(1); + } else { + if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) + return(1); + } + } else { +#if !defined( __linux__ ) && !defined( _WIN32 ) + struct ifaddr *ia; + + /* XXX lock? */ + TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { + if (ia->ifa_addr->sa_family != AF_INET) + continue; + if (cmd->p.ip.s_addr == ((struct sockaddr_in *) + (ia->ifa_addr))->sin_addr.s_addr) + return(1); /* match */ + } +#endif + } + return(0); /* no match, fail ... */ +} + +#if !defined( __linux__ ) && !defined( _WIN32 ) +/* + * The verify_path function checks if a route to the src exists and + * if it is reachable via ifp (when provided). + * + * The 'verrevpath' option checks that the interface that an IP packet + * arrives on is the same interface that traffic destined for the + * packet's source address would be routed out of. The 'versrcreach' + * option just checks that the source address is reachable via any route + * (except default) in the routing table. These two are a measure to block + * forged packets. This is also commonly known as "anti-spoofing" or Unicast + * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs + * is purposely reminiscent of the Cisco IOS command, + * + * ip verify unicast reverse-path + * ip verify unicast source reachable-via any + * + * which implements the same functionality. But note that syntax is + * misleading. The check may be performed on all IP packets whether unicast, + * multicast, or broadcast. + */ +static int +verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) +{ + struct route ro; + struct sockaddr_in *dst; + + bzero(&ro, sizeof(ro)); + + dst = (struct sockaddr_in *)&(ro.ro_dst); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = src; + in_rtalloc_ign(&ro, RTF_CLONING, fib); + + if (ro.ro_rt == NULL) + return 0; + + /* + * If ifp is provided, check for equality with rtentry. + * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, + * in order to pass packets injected back by if_simloop(): + * if useloopback == 1 routing entry (via lo0) for our own address + * may exist, so we need to handle routing assymetry. + */ + if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { + RTFREE(ro.ro_rt); + return 0; + } + + /* if no ifp provided, check if rtentry is not default route */ + if (ifp == NULL && + satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { + RTFREE(ro.ro_rt); + return 0; + } + + /* or if this is a blackhole/reject route */ + if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + RTFREE(ro.ro_rt); + return 0; + } + + /* found valid route */ + RTFREE(ro.ro_rt); + return 1; +} +#endif + +#ifdef INET6 +/* + * ipv6 specific rules here... + */ +static __inline int +icmp6type_match (int type, ipfw_insn_u32 *cmd) +{ + return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); +} + +static int +flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) +{ + int i; + for (i=0; i <= cmd->o.arg1; ++i ) + if (curr_flow == cmd->d[i] ) + return 1; + return 0; +} + +/* support for IP6_*_ME opcodes */ +static int +search_ip6_addr_net (struct in6_addr * ip6_addr) +{ + struct ifnet *mdc; + struct ifaddr *mdc2; + struct in6_ifaddr *fdm; + struct in6_addr copia; + + TAILQ_FOREACH(mdc, &ifnet, if_link) + TAILQ_FOREACH(mdc2, &mdc->if_addrlist, ifa_list) { + if (mdc2->ifa_addr->sa_family == AF_INET6) { + fdm = (struct in6_ifaddr *)mdc2; + copia = fdm->ia_addr.sin6_addr; + /* need for leaving scope_id in the sock_addr */ + in6_clearscope(&copia); + if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) + return 1; + } + } + return 0; +} + +static int +verify_path6(struct in6_addr *src, struct ifnet *ifp) +{ + struct route_in6 ro; + struct sockaddr_in6 *dst; + + bzero(&ro, sizeof(ro)); + + dst = (struct sockaddr_in6 * )&(ro.ro_dst); + dst->sin6_family = AF_INET6; + dst->sin6_len = sizeof(*dst); + dst->sin6_addr = *src; + /* XXX MRT 0 for ipv6 at this time */ + rtalloc_ign((struct route *)&ro, RTF_CLONING); + + if (ro.ro_rt == NULL) + return 0; + + /* + * if ifp is provided, check for equality with rtentry + * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, + * to support the case of sending packets to an address of our own. + * (where the former interface is the first argument of if_simloop() + * (=ifp), the latter is lo0) + */ + if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { + RTFREE(ro.ro_rt); + return 0; + } + + /* if no ifp provided, check if rtentry is not default route */ + if (ifp == NULL && + IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) { + RTFREE(ro.ro_rt); + return 0; + } + + /* or if this is a blackhole/reject route */ + if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { + RTFREE(ro.ro_rt); + return 0; + } + + /* found valid route */ + RTFREE(ro.ro_rt); + return 1; + +} +static __inline int +hash_packet6(struct ipfw_flow_id *id) +{ + u_int32_t i; + i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ + (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ + (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ + (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ + (id->dst_port) ^ (id->src_port); + return i; +} + +static int +is_icmp6_query(int icmp6_type) +{ + if ((icmp6_type <= ICMP6_MAXTYPE) && + (icmp6_type == ICMP6_ECHO_REQUEST || + icmp6_type == ICMP6_MEMBERSHIP_QUERY || + icmp6_type == ICMP6_WRUREQUEST || + icmp6_type == ICMP6_FQDN_QUERY || + icmp6_type == ICMP6_NI_QUERY)) + return (1); + + return (0); +} + +static void +send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) +{ + struct mbuf *m; + + m = args->m; + if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { + struct tcphdr *tcp; + tcp_seq ack, seq; + int flags; + struct { + struct ip6_hdr ip6; + struct tcphdr th; + } ti; + tcp = (struct tcphdr *)((char *)ip6 + hlen); + + if ((tcp->th_flags & TH_RST) != 0) { + m_freem(m); + args->m = NULL; + return; + } + + ti.ip6 = *ip6; + ti.th = *tcp; + ti.th.th_seq = ntohl(ti.th.th_seq); + ti.th.th_ack = ntohl(ti.th.th_ack); + ti.ip6.ip6_nxt = IPPROTO_TCP; + + if (ti.th.th_flags & TH_ACK) { + ack = 0; + seq = ti.th.th_ack; + flags = TH_RST; + } else { + ack = ti.th.th_seq; + if ((m->m_flags & M_PKTHDR) != 0) { + /* + * total new data to ACK is: + * total packet length, + * minus the header length, + * minus the tcp header length. + */ + ack += m->m_pkthdr.len - hlen + - (ti.th.th_off << 2); + } else if (ip6->ip6_plen) { + ack += ntohs(ip6->ip6_plen) + sizeof(*ip6) - + hlen - (ti.th.th_off << 2); + } else { + m_freem(m); + return; + } + if (tcp->th_flags & TH_SYN) + ack++; + seq = 0; + flags = TH_RST|TH_ACK; + } + bcopy(&ti, ip6, sizeof(ti)); + /* + * m is only used to recycle the mbuf + * The data in it is never read so we don't need + * to correct the offsets or anything + */ + tcp_respond(NULL, ip6, tcp, m, ack, seq, flags); + } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ +#if 0 + /* + * Unlike above, the mbufs need to line up with the ip6 hdr, + * as the contents are read. We need to m_adj() the + * needed amount. + * The mbuf will however be thrown away so we can adjust it. + * Remember we did an m_pullup on it already so we + * can make some assumptions about contiguousness. + */ + if (args->L3offset) + m_adj(m, args->L3offset); +#endif + icmp6_error(m, ICMP6_DST_UNREACH, code, 0); + } else + m_freem(m); + + args->m = NULL; +} + +#endif /* INET6 */ + +static u_int64_t norule_counter; /* counter for ipfw_log(NULL...) */ + +#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 +#define SNP(buf) buf, sizeof(buf) + +/* + * We enter here when we have a rule with O_LOG. + * XXX this function alone takes about 2Kbytes of code! + */ +static void +ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, + struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg, + struct ip *ip) +{ + struct ether_header *eh = args->eh; + char *action; + int limit_reached = 0; + char action2[40], proto[128], fragment[32]; + + fragment[0] = '\0'; + proto[0] = '\0'; + + if (f == NULL) { /* bogus pkt */ + if (verbose_limit != 0 && norule_counter >= verbose_limit) + return; + norule_counter++; + if (norule_counter == verbose_limit) + limit_reached = verbose_limit; + action = "Refuse"; + } else { /* O_LOG is the first action, find the real one */ + ipfw_insn *cmd = ACTION_PTR(f); + ipfw_insn_log *l = (ipfw_insn_log *)cmd; + + if (l->max_log != 0 && l->log_left == 0) + return; + l->log_left--; + if (l->log_left == 0) + limit_reached = l->max_log; + cmd += F_LEN(cmd); /* point to first action */ + if (cmd->opcode == O_ALTQ) { + ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; + + snprintf(SNPARGS(action2, 0), "Altq %d", + altq->qid); + cmd += F_LEN(cmd); + } + if (cmd->opcode == O_PROB) + cmd += F_LEN(cmd); + + if (cmd->opcode == O_TAG) + cmd += F_LEN(cmd); + + action = action2; + switch (cmd->opcode) { + case O_DENY: + action = "Deny"; + break; + + case O_REJECT: + if (cmd->arg1==ICMP_REJECT_RST) + action = "Reset"; + else if (cmd->arg1==ICMP_UNREACH_HOST) + action = "Reject"; + else + snprintf(SNPARGS(action2, 0), "Unreach %d", + cmd->arg1); + break; + + case O_UNREACH6: + if (cmd->arg1==ICMP6_UNREACH_RST) + action = "Reset"; + else + snprintf(SNPARGS(action2, 0), "Unreach %d", + cmd->arg1); + break; + + case O_ACCEPT: + action = "Accept"; + break; + case O_COUNT: + action = "Count"; + break; + case O_DIVERT: + snprintf(SNPARGS(action2, 0), "Divert %d", + cmd->arg1); + break; + case O_TEE: + snprintf(SNPARGS(action2, 0), "Tee %d", + cmd->arg1); + break; + case O_SETFIB: + snprintf(SNPARGS(action2, 0), "SetFib %d", + cmd->arg1); + break; + case O_SKIPTO: + snprintf(SNPARGS(action2, 0), "SkipTo %d", + cmd->arg1); + break; + case O_PIPE: + snprintf(SNPARGS(action2, 0), "Pipe %d", + cmd->arg1); + break; + case O_QUEUE: + snprintf(SNPARGS(action2, 0), "Queue %d", + cmd->arg1); + break; + case O_FORWARD_IP: { + ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; + int len; + struct in_addr dummyaddr; + if (sa->sa.sin_addr.s_addr == INADDR_ANY) + dummyaddr.s_addr = htonl(tablearg); + else + dummyaddr.s_addr = sa->sa.sin_addr.s_addr; + + len = snprintf(SNPARGS(action2, 0), "Forward to %s", + inet_ntoa(dummyaddr)); + + if (sa->sa.sin_port) + snprintf(SNPARGS(action2, len), ":%d", + sa->sa.sin_port); + } + break; + case O_NETGRAPH: + snprintf(SNPARGS(action2, 0), "Netgraph %d", + cmd->arg1); + break; + case O_NGTEE: + snprintf(SNPARGS(action2, 0), "Ngtee %d", + cmd->arg1); + break; + case O_NAT: + action = "Nat"; + break; + default: + action = "UNKNOWN"; + break; + } + } + + if (hlen == 0) { /* non-ip */ + snprintf(SNPARGS(proto, 0), "MAC"); + + } else { + int len; + char src[48], dst[48]; + struct icmphdr *icmp; + struct tcphdr *tcp; + struct udphdr *udp; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; + struct icmp6_hdr *icmp6; +#endif + src[0] = '\0'; + dst[0] = '\0'; +#ifdef INET6 + if (IS_IP6_FLOW_ID(&(args->f_id))) { + char ip6buf[INET6_ADDRSTRLEN]; + snprintf(src, sizeof(src), "[%s]", + ip6_sprintf(ip6buf, &args->f_id.src_ip6)); + snprintf(dst, sizeof(dst), "[%s]", + ip6_sprintf(ip6buf, &args->f_id.dst_ip6)); + + ip6 = (struct ip6_hdr *)ip; + tcp = (struct tcphdr *)(((char *)ip) + hlen); + udp = (struct udphdr *)(((char *)ip) + hlen); + } else +#endif + { + tcp = L3HDR(struct tcphdr, ip); + udp = L3HDR(struct udphdr, ip); + + inet_ntoa_r(ip->ip_src, src); + inet_ntoa_r(ip->ip_dst, dst); + } + + switch (args->f_id.proto) { + case IPPROTO_TCP: + len = snprintf(SNPARGS(proto, 0), "TCP %s", src); + if (offset == 0) + snprintf(SNPARGS(proto, len), ":%d %s:%d", + ntohs(tcp->th_sport), + dst, + ntohs(tcp->th_dport)); + else + snprintf(SNPARGS(proto, len), " %s", dst); + break; + + case IPPROTO_UDP: + len = snprintf(SNPARGS(proto, 0), "UDP %s", src); + if (offset == 0) + snprintf(SNPARGS(proto, len), ":%d %s:%d", + ntohs(udp->uh_sport), + dst, + ntohs(udp->uh_dport)); + else + snprintf(SNPARGS(proto, len), " %s", dst); + break; + + case IPPROTO_ICMP: + icmp = L3HDR(struct icmphdr, ip); + if (offset == 0) + len = snprintf(SNPARGS(proto, 0), + "ICMP:%u.%u ", + icmp->icmp_type, icmp->icmp_code); + else + len = snprintf(SNPARGS(proto, 0), "ICMP "); + len += snprintf(SNPARGS(proto, len), "%s", src); + snprintf(SNPARGS(proto, len), " %s", dst); + break; +#ifdef INET6 + case IPPROTO_ICMPV6: + icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen); + if (offset == 0) + len = snprintf(SNPARGS(proto, 0), + "ICMPv6:%u.%u ", + icmp6->icmp6_type, icmp6->icmp6_code); + else + len = snprintf(SNPARGS(proto, 0), "ICMPv6 "); + len += snprintf(SNPARGS(proto, len), "%s", src); + snprintf(SNPARGS(proto, len), " %s", dst); + break; +#endif + default: + len = snprintf(SNPARGS(proto, 0), "P:%d %s", + args->f_id.proto, src); + snprintf(SNPARGS(proto, len), " %s", dst); + break; + } + +#ifdef INET6 + if (IS_IP6_FLOW_ID(&(args->f_id))) { + if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) + snprintf(SNPARGS(fragment, 0), + " (frag %08x:%d@%d%s)", + args->f_id.frag_id6, + ntohs(ip6->ip6_plen) - hlen, + ntohs(offset & IP6F_OFF_MASK) << 3, + (offset & IP6F_MORE_FRAG) ? "+" : ""); + } else +#endif + { + int ip_off, ip_len; + if (1 || eh != NULL) { /* layer 2 packets are as on the wire */ + ip_off = ntohs(ip->ip_off); + ip_len = ntohs(ip->ip_len); + } else { + ip_off = ip->ip_off; + ip_len = ip->ip_len; + } + if (ip_off & (IP_MF | IP_OFFMASK)) + snprintf(SNPARGS(fragment, 0), + " (frag %d:%d@%d%s)", + ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), + offset << 3, + (ip_off & IP_MF) ? "+" : ""); + } + } + if (oif || m->m_pkthdr.rcvif) + log(LOG_SECURITY | LOG_INFO, + "ipfw: %d %s %s %s via %s%s\n", + f ? f->rulenum : -1, + action, proto, oif ? "out" : "in", + oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, + fragment); + else + log(LOG_SECURITY | LOG_INFO, + "ipfw: %d %s %s [no if info]%s\n", + f ? f->rulenum : -1, + action, proto, fragment); + if (limit_reached) + log(LOG_SECURITY | LOG_NOTICE, + "ipfw: limit %d reached on entry %d\n", + limit_reached, f ? f->rulenum : -1); +} + +/* + * IMPORTANT: the hash function for dynamic rules must be commutative + * in source and destination (ip,port), because rules are bidirectional + * and we want to find both in the same bucket. + */ +static __inline int +hash_packet(struct ipfw_flow_id *id) +{ + u_int32_t i; + +#ifdef INET6 + if (IS_IP6_FLOW_ID(id)) + i = hash_packet6(id); + else +#endif /* INET6 */ + i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); + i &= (curr_dyn_buckets - 1); + return i; +} + +/** + * unlink a dynamic rule from a chain. prev is a pointer to + * the previous one, q is a pointer to the rule to delete, + * head is a pointer to the head of the queue. + * Modifies q and potentially also head. + */ +#define UNLINK_DYN_RULE(prev, head, q) { \ + ipfw_dyn_rule *old_q = q; \ + \ + /* remove a refcount to the parent */ \ + if (q->dyn_type == O_LIMIT) \ + q->parent->count--; \ + DEB(printf("ipfw: unlink entry 0x%08x %d -> 0x%08x %d, %d left\n",\ + (q->id.src_ip), (q->id.src_port), \ + (q->id.dst_ip), (q->id.dst_port), dyn_count-1 ); ) \ + if (prev != NULL) \ + prev->next = q = q->next; \ + else \ + head = q = q->next; \ + dyn_count--; \ + uma_zfree(ipfw_dyn_rule_zone, old_q); } + +#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) + +/** + * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. + * + * If keep_me == NULL, rules are deleted even if not expired, + * otherwise only expired rules are removed. + * + * The value of the second parameter is also used to point to identify + * a rule we absolutely do not want to remove (e.g. because we are + * holding a reference to it -- this is the case with O_LIMIT_PARENT + * rules). The pointer is only used for comparison, so any non-null + * value will do. + */ +static void +remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) +{ + static u_int32_t last_remove = 0; + +#define FORCE (keep_me == NULL) + + ipfw_dyn_rule *prev, *q; + int i, pass = 0, max_pass = 0; + + IPFW_DYN_LOCK_ASSERT(); + + if (ipfw_dyn_v == NULL || dyn_count == 0) + return; + /* do not expire more than once per second, it is useless */ + if (!FORCE && last_remove == time_uptime) + return; + last_remove = time_uptime; + + /* + * because O_LIMIT refer to parent rules, during the first pass only + * remove child and mark any pending LIMIT_PARENT, and remove + * them in a second pass. + */ +next_pass: + for (i = 0 ; i < curr_dyn_buckets ; i++) { + for (prev=NULL, q = ipfw_dyn_v[i] ; q ; ) { + /* + * Logic can become complex here, so we split tests. + */ + if (q == keep_me) + goto next; + if (rule != NULL && rule != q->rule) + goto next; /* not the one we are looking for */ + if (q->dyn_type == O_LIMIT_PARENT) { + /* + * handle parent in the second pass, + * record we need one. + */ + max_pass = 1; + if (pass == 0) + goto next; + if (FORCE && q->count != 0 ) { + /* XXX should not happen! */ + printf("ipfw: OUCH! cannot remove rule," + " count %d\n", q->count); + } + } else { + if (!FORCE && + !TIME_LEQ( q->expire, time_uptime )) + goto next; + } + if (q->dyn_type != O_LIMIT_PARENT || !q->count) { + UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + continue; + } +next: + prev=q; + q=q->next; + } + } + if (pass++ < max_pass) + goto next_pass; +} + + +/** + * lookup a dynamic rule. + */ +static ipfw_dyn_rule * +lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, + struct tcphdr *tcp) +{ + /* + * stateful ipfw extensions. + * Lookup into dynamic session queue + */ +#define MATCH_REVERSE 0 +#define MATCH_FORWARD 1 +#define MATCH_NONE 2 +#define MATCH_UNKNOWN 3 + int i, dir = MATCH_NONE; + ipfw_dyn_rule *prev, *q=NULL; + + IPFW_DYN_LOCK_ASSERT(); + + if (ipfw_dyn_v == NULL) + goto done; /* not found */ + i = hash_packet( pkt ); + for (prev=NULL, q = ipfw_dyn_v[i] ; q != NULL ; ) { + if (q->dyn_type == O_LIMIT_PARENT && q->count) + goto next; + if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ + UNLINK_DYN_RULE(prev, ipfw_dyn_v[i], q); + continue; + } + if (pkt->proto == q->id.proto && + q->dyn_type != O_LIMIT_PARENT) { + if (IS_IP6_FLOW_ID(pkt)) { + if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), + &(q->id.src_ip6)) && + IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), + &(q->id.dst_ip6)) && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port ) { + dir = MATCH_FORWARD; + break; + } + if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), + &(q->id.dst_ip6)) && + IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), + &(q->id.src_ip6)) && + pkt->src_port == q->id.dst_port && + pkt->dst_port == q->id.src_port ) { + dir = MATCH_REVERSE; + break; + } + } else { + if (pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port ) { + dir = MATCH_FORWARD; + break; + } + if (pkt->src_ip == q->id.dst_ip && + pkt->dst_ip == q->id.src_ip && + pkt->src_port == q->id.dst_port && + pkt->dst_port == q->id.src_port ) { + dir = MATCH_REVERSE; + break; + } + } + } +next: + prev = q; + q = q->next; + } + if (q == NULL) + goto done; /* q = NULL, not found */ + + if ( prev != NULL) { /* found and not in front */ + prev->next = q->next; + q->next = ipfw_dyn_v[i]; + ipfw_dyn_v[i] = q; + } + if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ + u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); + +#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) +#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) + q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); + switch (q->state) { + case TH_SYN: /* opening */ + q->expire = time_uptime + dyn_syn_lifetime; + break; + + case BOTH_SYN: /* move to established */ + case BOTH_SYN | TH_FIN : /* one side tries to close */ + case BOTH_SYN | (TH_FIN << 8) : + if (tcp) { +#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) + u_int32_t ack = ntohl(tcp->th_ack); + if (dir == MATCH_FORWARD) { + if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) + q->ack_fwd = ack; + else { /* ignore out-of-sequence */ + break; + } + } else { + if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) + q->ack_rev = ack; + else { /* ignore out-of-sequence */ + break; + } + } + } + q->expire = time_uptime + dyn_ack_lifetime; + break; + + case BOTH_SYN | BOTH_FIN: /* both sides closed */ + if (dyn_fin_lifetime >= dyn_keepalive_period) + dyn_fin_lifetime = dyn_keepalive_period - 1; + q->expire = time_uptime + dyn_fin_lifetime; + break; + + default: +#if 0 + /* + * reset or some invalid combination, but can also + * occur if we use keep-state the wrong way. + */ + if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) + printf("invalid state: 0x%x\n", q->state); +#endif + if (dyn_rst_lifetime >= dyn_keepalive_period) + dyn_rst_lifetime = dyn_keepalive_period - 1; + q->expire = time_uptime + dyn_rst_lifetime; + break; + } + } else if (pkt->proto == IPPROTO_UDP) { + q->expire = time_uptime + dyn_udp_lifetime; + } else { + /* other protocols */ + q->expire = time_uptime + dyn_short_lifetime; + } +done: + if (match_direction) + *match_direction = dir; + return q; +} + +static ipfw_dyn_rule * +lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, + struct tcphdr *tcp) +{ + ipfw_dyn_rule *q; + + IPFW_DYN_LOCK(); + q = lookup_dyn_rule_locked(pkt, match_direction, tcp); + if (q == NULL) + IPFW_DYN_UNLOCK(); + /* NB: return table locked when q is not NULL */ + return q; +} + +static void +realloc_dynamic_table(void) +{ + IPFW_DYN_LOCK_ASSERT(); + + /* + * Try reallocation, make sure we have a power of 2 and do + * not allow more than 64k entries. In case of overflow, + * default to 1024. + */ + + if (dyn_buckets > 65536) + dyn_buckets = 1024; + if ((dyn_buckets & (dyn_buckets-1)) != 0) { /* not a power of 2 */ + dyn_buckets = curr_dyn_buckets; /* reset */ + return; + } + curr_dyn_buckets = dyn_buckets; + if (ipfw_dyn_v != NULL) + free(ipfw_dyn_v, M_IPFW); + for (;;) { + ipfw_dyn_v = malloc(curr_dyn_buckets * sizeof(ipfw_dyn_rule *), + M_IPFW, M_NOWAIT | M_ZERO); + if (ipfw_dyn_v != NULL || curr_dyn_buckets <= 2) + break; + curr_dyn_buckets /= 2; + } +} + +/** + * Install state of type 'type' for a dynamic session. + * The hash table contains two type of rules: + * - regular rules (O_KEEP_STATE) + * - rules for sessions with limited number of sess per user + * (O_LIMIT). When they are created, the parent is + * increased by 1, and decreased on delete. In this case, + * the third parameter is the parent rule and not the chain. + * - "parent" rules for the above (O_LIMIT_PARENT). + */ +static ipfw_dyn_rule * +add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) +{ + ipfw_dyn_rule *r; + int i; + + IPFW_DYN_LOCK_ASSERT(); + + if (ipfw_dyn_v == NULL || + (dyn_count == 0 && dyn_buckets != curr_dyn_buckets)) { + realloc_dynamic_table(); + if (ipfw_dyn_v == NULL) + return NULL; /* failed ! */ + } + i = hash_packet(id); + + r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); + if (r == NULL) { + printf ("ipfw: sorry cannot allocate state\n"); + return NULL; + } + + /* increase refcount on parent, and set pointer */ + if (dyn_type == O_LIMIT) { + ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; + if ( parent->dyn_type != O_LIMIT_PARENT) + panic("invalid parent"); + parent->count++; + r->parent = parent; + rule = parent->rule; + } + + r->id = *id; + r->expire = time_uptime + dyn_syn_lifetime; + r->rule = rule; + r->dyn_type = dyn_type; + r->pcnt = r->bcnt = 0; + r->count = 0; + + r->bucket = i; + r->next = ipfw_dyn_v[i]; + ipfw_dyn_v[i] = r; + dyn_count++; + DEB(printf("ipfw: add dyn entry ty %d 0x%08x %d -> 0x%08x %d, total %d\n", + dyn_type, + (r->id.src_ip), (r->id.src_port), + (r->id.dst_ip), (r->id.dst_port), + dyn_count ); ) + return r; +} + +/** + * lookup dynamic parent rule using pkt and rule as search keys. + * If the lookup fails, then install one. + */ +static ipfw_dyn_rule * +lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) +{ + ipfw_dyn_rule *q; + int i; + + IPFW_DYN_LOCK_ASSERT(); + + if (ipfw_dyn_v) { + int is_v6 = IS_IP6_FLOW_ID(pkt); + i = hash_packet( pkt ); + for (q = ipfw_dyn_v[i] ; q != NULL ; q=q->next) + if (q->dyn_type == O_LIMIT_PARENT && + rule== q->rule && + pkt->proto == q->id.proto && + pkt->src_port == q->id.src_port && + pkt->dst_port == q->id.dst_port && + ( + (is_v6 && + IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), + &(q->id.src_ip6)) && + IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), + &(q->id.dst_ip6))) || + (!is_v6 && + pkt->src_ip == q->id.src_ip && + pkt->dst_ip == q->id.dst_ip) + ) + ) { + q->expire = time_uptime + dyn_short_lifetime; + DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) + return q; + } + } + return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); +} + +/** + * Install dynamic state for rule type cmd->o.opcode + * + * Returns 1 (failure) if state is not installed because of errors or because + * session limitations are enforced. + */ +static int +install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, + struct ip_fw_args *args, uint32_t tablearg) +{ + static int last_log; + ipfw_dyn_rule *q; + struct in_addr da; + char src[48], dst[48]; + + src[0] = '\0'; + dst[0] = '\0'; + + DEB( + printf("ipfw: %s: type %d 0x%08x %u -> 0x%08x %u\n", + __func__, cmd->o.opcode, + (args->f_id.src_ip), (args->f_id.src_port), + (args->f_id.dst_ip), (args->f_id.dst_port)); + ) + + IPFW_DYN_LOCK(); + + q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); + + if (q != NULL) { /* should never occur */ + if (last_log != time_uptime) { + last_log = time_uptime; + printf("ipfw: %s: entry already present, done\n", + __func__); + } + IPFW_DYN_UNLOCK(); + return (0); + } + + if (dyn_count >= dyn_max) + /* Run out of slots, try to remove any expired rule. */ + remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); + + if (dyn_count >= dyn_max) { + if (last_log != time_uptime) { + last_log = time_uptime; + printf("ipfw: %s: Too many dynamic rules\n", __func__); + } + IPFW_DYN_UNLOCK(); + return (1); /* cannot install, notify caller */ + } + + switch (cmd->o.opcode) { + case O_KEEP_STATE: /* bidir rule */ + add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); + break; + + case O_LIMIT: { /* limit number of sessions */ + struct ipfw_flow_id id; + ipfw_dyn_rule *parent; + uint32_t conn_limit; + uint16_t limit_mask = cmd->limit_mask; + + conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? + tablearg : cmd->conn_limit; + + DEB( + if (cmd->conn_limit == IP_FW_TABLEARG) + printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " + "(tablearg)\n", __func__, conn_limit); + else + printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", + __func__, conn_limit); + ) + + id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; + id.proto = args->f_id.proto; + id.addr_type = args->f_id.addr_type; + id.fib = M_GETFIB(args->m); + + if (IS_IP6_FLOW_ID (&(args->f_id))) { + if (limit_mask & DYN_SRC_ADDR) + id.src_ip6 = args->f_id.src_ip6; + if (limit_mask & DYN_DST_ADDR) + id.dst_ip6 = args->f_id.dst_ip6; + } else { + if (limit_mask & DYN_SRC_ADDR) + id.src_ip = args->f_id.src_ip; + if (limit_mask & DYN_DST_ADDR) + id.dst_ip = args->f_id.dst_ip; + } + if (limit_mask & DYN_SRC_PORT) + id.src_port = args->f_id.src_port; + if (limit_mask & DYN_DST_PORT) + id.dst_port = args->f_id.dst_port; + if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { + printf("ipfw: %s: add parent failed\n", __func__); + IPFW_DYN_UNLOCK(); + return (1); + } + + if (parent->count >= conn_limit) { + /* See if we can remove some expired rule. */ + remove_dyn_rule(rule, parent); + if (parent->count >= conn_limit) { + if (fw_verbose && last_log != time_uptime) { + last_log = time_uptime; +#ifdef INET6 + /* + * XXX IPv6 flows are not + * supported yet. + */ + if (IS_IP6_FLOW_ID(&(args->f_id))) { + char ip6buf[INET6_ADDRSTRLEN]; + snprintf(src, sizeof(src), + "[%s]", ip6_sprintf(ip6buf, + &args->f_id.src_ip6)); + snprintf(dst, sizeof(dst), + "[%s]", ip6_sprintf(ip6buf, + &args->f_id.dst_ip6)); + } else +#endif + { + da.s_addr = + htonl(args->f_id.src_ip); + inet_ntoa_r(da, src); + da.s_addr = + htonl(args->f_id.dst_ip); + inet_ntoa_r(da, dst); + } + log(LOG_SECURITY | LOG_DEBUG, + "ipfw: %d %s %s:%u -> %s:%u, %s\n", + parent->rule->rulenum, + "drop session", + src, (args->f_id.src_port), + dst, (args->f_id.dst_port), + "too many entries"); + } + IPFW_DYN_UNLOCK(); + return (1); + } + } + add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); + break; + } + default: + printf("ipfw: %s: unknown dynamic rule type %u\n", + __func__, cmd->o.opcode); + IPFW_DYN_UNLOCK(); + return (1); + } + + /* XXX just set lifetime */ + lookup_dyn_rule_locked(&args->f_id, NULL, NULL); + + IPFW_DYN_UNLOCK(); + return (0); +} + +/* + * Generate a TCP packet, containing either a RST or a keepalive. + * When flags & TH_RST, we are sending a RST packet, because of a + * "reset" action matched the packet. + * Otherwise we are sending a keepalive, and flags & TH_ + * The 'replyto' mbuf is the mbuf being replied to, if any, and is required + * so that MAC can label the reply appropriately. + */ +static struct mbuf * +send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, + u_int32_t ack, int flags) +{ +#if defined( __linux__ ) || defined( _WIN32 ) + return NULL; +#else + struct mbuf *m; + struct ip *ip; + struct tcphdr *tcp; + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (m == 0) + return (NULL); + m->m_pkthdr.rcvif = (struct ifnet *)0; + + M_SETFIB(m, id->fib); +#ifdef MAC + if (replyto != NULL) + mac_create_mbuf_netlayer(replyto, m); + else + mac_create_mbuf_from_firewall(m); +#else + (void)replyto; /* don't warn about unused arg */ +#endif + + m->m_pkthdr.len = m->m_len = sizeof(struct ip) + sizeof(struct tcphdr); + m->m_data += max_linkhdr; + + ip = mtod(m, struct ip *); + bzero(ip, m->m_len); + tcp = (struct tcphdr *)(ip + 1); /* no IP options */ + ip->ip_p = IPPROTO_TCP; + tcp->th_off = 5; + /* + * Assume we are sending a RST (or a keepalive in the reverse + * direction), swap src and destination addresses and ports. + */ + ip->ip_src.s_addr = htonl(id->dst_ip); + ip->ip_dst.s_addr = htonl(id->src_ip); + tcp->th_sport = htons(id->dst_port); + tcp->th_dport = htons(id->src_port); + if (flags & TH_RST) { /* we are sending a RST */ + if (flags & TH_ACK) { + tcp->th_seq = htonl(ack); + tcp->th_ack = htonl(0); + tcp->th_flags = TH_RST; + } else { + if (flags & TH_SYN) + seq++; + tcp->th_seq = htonl(0); + tcp->th_ack = htonl(seq); + tcp->th_flags = TH_RST | TH_ACK; + } + } else { + /* + * We are sending a keepalive. flags & TH_SYN determines + * the direction, forward if set, reverse if clear. + * NOTE: seq and ack are always assumed to be correct + * as set by the caller. This may be confusing... + */ + if (flags & TH_SYN) { + /* + * we have to rewrite the correct addresses! + */ + ip->ip_dst.s_addr = htonl(id->dst_ip); + ip->ip_src.s_addr = htonl(id->src_ip); + tcp->th_dport = htons(id->dst_port); + tcp->th_sport = htons(id->src_port); + } + tcp->th_seq = htonl(seq); + tcp->th_ack = htonl(ack); + tcp->th_flags = TH_ACK; + } + /* + * set ip_len to the payload size so we can compute + * the tcp checksum on the pseudoheader + * XXX check this, could save a couple of words ? + */ + ip->ip_len = htons(sizeof(struct tcphdr)); + tcp->th_sum = in_cksum(m, m->m_pkthdr.len); + /* + * now fill fields left out earlier + */ + ip->ip_ttl = ip_defttl; + ip->ip_len = m->m_pkthdr.len; + m->m_flags |= M_SKIP_FIREWALL; + return (m); +#endif /* !__linux__ */ +} + +/* + * sends a reject message, consuming the mbuf passed as an argument. + */ +static void +send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip) +{ + +#if 0 + /* XXX When ip is not guaranteed to be at mtod() we will + * need to account for this */ + * The mbuf will however be thrown away so we can adjust it. + * Remember we did an m_pullup on it already so we + * can make some assumptions about contiguousness. + */ + if (args->L3offset) + m_adj(m, args->L3offset); +#endif + if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ + /* We need the IP header in host order for icmp_error(). */ +#if !defined( __linux__ ) && !defined( _WIN32 ) + if (args->eh != NULL) { + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + } +#endif + icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); + } else if (args->f_id.proto == IPPROTO_TCP) { + struct tcphdr *const tcp = + L3HDR(struct tcphdr, mtod(args->m, struct ip *)); + if ( (tcp->th_flags & TH_RST) == 0) { + struct mbuf *m; + m = send_pkt(args->m, &(args->f_id), + ntohl(tcp->th_seq), ntohl(tcp->th_ack), + tcp->th_flags | TH_RST); + if (m != NULL) + ip_output(m, NULL, NULL, 0, NULL, NULL); + } + m_freem(args->m); + } else + m_freem(args->m); + args->m = NULL; +} + +/** + * + * Given an ip_fw *, lookup_next_rule will return a pointer + * to the next rule, which can be either the jump + * target (for skipto instructions) or the next one in the list (in + * all other cases including a missing jump target). + * The result is also written in the "next_rule" field of the rule. + * Backward jumps are not allowed, so start looking from the next + * rule... + * + * This never returns NULL -- in case we do not have an exact match, + * the next rule is returned. When the ruleset is changed, + * pointers are flushed so we are always correct. + */ + +static struct ip_fw * +lookup_next_rule(struct ip_fw *me, u_int32_t tablearg) +{ + struct ip_fw *rule = NULL; + ipfw_insn *cmd; + u_int16_t rulenum; + + /* look for action, in case it is a skipto */ + cmd = ACTION_PTR(me); + if (cmd->opcode == O_LOG) + cmd += F_LEN(cmd); + if (cmd->opcode == O_ALTQ) + cmd += F_LEN(cmd); + if (cmd->opcode == O_TAG) + cmd += F_LEN(cmd); + if (cmd->opcode == O_SKIPTO ) { + if (tablearg != 0) { + rulenum = (u_int16_t)tablearg; + } else { + rulenum = cmd->arg1; + } + for (rule = me->next; rule ; rule = rule->next) { + if (rule->rulenum >= rulenum) { + break; + } + } + } + if (rule == NULL) /* failure or not a skipto */ + rule = me->next; + me->next_rule = rule; + return rule; +} + +#ifdef radix +static int +add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, + uint8_t mlen, uint32_t value) +{ + struct radix_node_head *rnh; + struct table_entry *ent; + struct radix_node *rn; + + if (tbl >= IPFW_TABLES_MAX) + return (EINVAL); + rnh = ch->tables[tbl]; + ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); + if (ent == NULL) + return (ENOMEM); + ent->value = value; + ent->addr.sin_len = ent->mask.sin_len = 8; + ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; + IPFW_WLOCK(ch); + rn = rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent); + if (rn == NULL) { + IPFW_WUNLOCK(ch); + free(ent, M_IPFW_TBL); + return (EEXIST); + } + IPFW_WUNLOCK(ch); + return (0); +} + +static int +del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, + uint8_t mlen) +{ + struct radix_node_head *rnh; + struct table_entry *ent; + struct sockaddr_in sa, mask; + + if (tbl >= IPFW_TABLES_MAX) + return (EINVAL); + rnh = ch->tables[tbl]; + sa.sin_len = mask.sin_len = 8; + mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); + sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; + IPFW_WLOCK(ch); + ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); + if (ent == NULL) { + IPFW_WUNLOCK(ch); + return (ESRCH); + } + IPFW_WUNLOCK(ch); + free(ent, M_IPFW_TBL); + return (0); +} + +static int +flush_table_entry(struct radix_node *rn, void *arg) +{ + struct radix_node_head * const rnh = arg; + struct table_entry *ent; + + ent = (struct table_entry *) + rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); + if (ent != NULL) + free(ent, M_IPFW_TBL); + return (0); +} + +static int +flush_table(struct ip_fw_chain *ch, uint16_t tbl) +{ + struct radix_node_head *rnh; + + IPFW_WLOCK_ASSERT(ch); + + if (tbl >= IPFW_TABLES_MAX) + return (EINVAL); + rnh = ch->tables[tbl]; + KASSERT(rnh != NULL, ("NULL IPFW table")); + rnh->rnh_walktree(rnh, flush_table_entry, rnh); + return (0); +} +#endif + +static void +flush_tables(struct ip_fw_chain *ch) +{ +#ifdef radix + uint16_t tbl; + + IPFW_WLOCK_ASSERT(ch); + + for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) + flush_table(ch, tbl); +#endif +} + +static int +init_tables(struct ip_fw_chain *ch) +{ +#ifdef radix + int i; + uint16_t j; + + for (i = 0; i < IPFW_TABLES_MAX; i++) { + if (!rn_inithead((void **)&ch->tables[i], 32)) { + for (j = 0; j < i; j++) { + (void) flush_table(ch, j); + } + return (ENOMEM); + } + } +#endif + return (0); +} + +static int +lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, + uint32_t *val) +{ +#ifdef radix + struct radix_node_head *rnh; + struct table_entry *ent; + struct sockaddr_in sa; + + if (tbl >= IPFW_TABLES_MAX) + return (0); + rnh = ch->tables[tbl]; + sa.sin_len = 8; + sa.sin_addr.s_addr = addr; + ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); + if (ent != NULL) { + *val = ent->value; + return (1); + } +#endif + return (0); +} + +#ifdef radix +static int +count_table_entry(struct radix_node *rn, void *arg) +{ + u_int32_t * const cnt = arg; + + (*cnt)++; + return (0); +} + +static int +count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) +{ + struct radix_node_head *rnh; + + if (tbl >= IPFW_TABLES_MAX) + return (EINVAL); + rnh = ch->tables[tbl]; + *cnt = 0; + rnh->rnh_walktree(rnh, count_table_entry, cnt); + return (0); +} + +static int +dump_table_entry(struct radix_node *rn, void *arg) +{ + struct table_entry * const n = (struct table_entry *)rn; + ipfw_table * const tbl = arg; + ipfw_table_entry *ent; + + if (tbl->cnt == tbl->size) + return (1); + ent = &tbl->ent[tbl->cnt]; + ent->tbl = tbl->tbl; + if (in_nullhost(n->mask.sin_addr)) + ent->masklen = 0; + else + ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); + ent->addr = n->addr.sin_addr.s_addr; + ent->value = n->value; + tbl->cnt++; + return (0); +} + +static int +dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) +{ + struct radix_node_head *rnh; + + if (tbl->tbl >= IPFW_TABLES_MAX) + return (EINVAL); + rnh = ch->tables[tbl->tbl]; + tbl->cnt = 0; + rnh->rnh_walktree(rnh, dump_table_entry, tbl); + return (0); +} +#endif + +#if 0 +static void +fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp) +{ + struct ucred *cr; + + cr = inp->inp_cred; + ugp->fw_prid = jailed(cr) ? cr->cr_prison->pr_id : -1; + ugp->fw_uid = cr->cr_uid; + ugp->fw_ngroups = cr->cr_ngroups; + bcopy(cr->cr_groups, ugp->fw_groups, sizeof(ugp->fw_groups)); +} +#endif /* no uigid support */ + +static int +check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, + struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, + u_int16_t src_port, struct ip_fw_ugid *ugp, int *ugid_lookupp, + struct inpcb *inp) +{ +#if 1 + return 0; +#else + struct inpcbinfo *pi; + int wildcard; + struct inpcb *pcb; + int match; + gid_t *gp; + + /* + * Check to see if the UDP or TCP stack supplied us with + * the PCB. If so, rather then holding a lock and looking + * up the PCB, we can use the one that was supplied. + */ + if (inp && *ugid_lookupp == 0) { + INP_LOCK_ASSERT(inp); + if (inp->inp_socket != NULL) { + fill_ugid_cache(inp, ugp); + *ugid_lookupp = 1; + } else + *ugid_lookupp = -1; + } + /* + * If we have already been here and the packet has no + * PCB entry associated with it, then we can safely + * assume that this is a no match. + */ + if (*ugid_lookupp == -1) + return (0); + if (proto == IPPROTO_TCP) { + wildcard = 0; + pi = &tcbinfo; + } else if (proto == IPPROTO_UDP) { + wildcard = INPLOOKUP_WILDCARD; + pi = &udbinfo; + } else + return 0; + match = 0; + if (*ugid_lookupp == 0) { + INP_INFO_RLOCK(pi); + pcb = (oif) ? + in_pcblookup_hash(pi, + dst_ip, htons(dst_port), + src_ip, htons(src_port), + wildcard, oif) : + in_pcblookup_hash(pi, + src_ip, htons(src_port), + dst_ip, htons(dst_port), + wildcard, NULL); + if (pcb != NULL) { + fill_ugid_cache(pcb, ugp); + *ugid_lookupp = 1; + } + INP_INFO_RUNLOCK(pi); + if (*ugid_lookupp == 0) { + /* + * If the lookup did not yield any results, there + * is no sense in coming back and trying again. So + * we can set lookup to -1 and ensure that we wont + * bother the pcb system again. + */ + *ugid_lookupp = -1; + return (0); + } + } + if (insn->o.opcode == O_UID) + match = (ugp->fw_uid == (uid_t)insn->d[0]); + else if (insn->o.opcode == O_GID) { + for (gp = ugp->fw_groups; + gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++) + if (*gp == (gid_t)insn->d[0]) { + match = 1; + break; + } + } else if (insn->o.opcode == O_JAIL) + match = (ugp->fw_prid == (int)insn->d[0]); + return match; +#endif +} + +/* + * The main check routine for the firewall. + * + * All arguments are in args so we can modify them and return them + * back to the caller. + * + * Parameters: + * + * args->m (in/out) The packet; we set to NULL when/if we nuke it. + * Starts with the IP header. + * args->eh (in) Mac header if present, or NULL for layer3 packet. + * args->L3offset Number of bytes bypassed if we came from L2. + * e.g. often sizeof(eh) ** NOTYET ** + * args->oif Outgoing interface, or NULL if packet is incoming. + * The incoming interface is in the mbuf. (in) + * args->divert_rule (in/out) + * Skip up to the first rule past this rule number; + * upon return, non-zero port number for divert or tee. + * + * args->rule Pointer to the last matching rule (in/out) + * args->next_hop Socket we are forwarding to (out). + * args->f_id Addresses grabbed from the packet (out) + * args->cookie a cookie depending on rule action + * + * Return value: + * + * IP_FW_PASS the packet must be accepted + * IP_FW_DENY the packet must be dropped + * IP_FW_DIVERT divert packet, port in m_tag + * IP_FW_TEE tee packet, port in m_tag + * IP_FW_DUMMYNET to dummynet, pipe in args->cookie + * IP_FW_NETGRAPH into netgraph, cookie args->cookie + * + */ +int +ipfw_chk(struct ip_fw_args *args) +{ + /* + * Local variables holding state during the processing of a packet: + * + * IMPORTANT NOTE: to speed up the processing of rules, there + * are some assumption on the values of the variables, which + * are documented here. Should you change them, please check + * the implementation of the various instructions to make sure + * that they still work. + * + * args->eh The MAC header. It is non-null for a layer2 + * packet, it is NULL for a layer-3 packet. + * **notyet** + * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. + * + * m | args->m Pointer to the mbuf, as received from the caller. + * It may change if ipfw_chk() does an m_pullup, or if it + * consumes the packet because it calls send_reject(). + * XXX This has to change, so that ipfw_chk() never modifies + * or consumes the buffer. + * ip is the beginning of the ip(4 or 6) header. + * Calculated by adding the L3offset to the start of data. + * (Until we start using L3offset, the packet is + * supposed to start with the ip header). + */ + struct mbuf *m = args->m; + struct ip *ip = mtod(m, struct ip *); + + /* + * For rules which contain uid/gid or jail constraints, cache + * a copy of the users credentials after the pcb lookup has been + * executed. This will speed up the processing of rules with + * these types of constraints, as well as decrease contention + * on pcb related locks. + */ + struct ip_fw_ugid fw_ugid_cache; + int ugid_lookup = 0; + + /* + * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG + * associated with a packet input on a divert socket. This + * will allow to distinguish traffic and its direction when + * it originates from a divert socket. + */ + u_int divinput_flags = 0; + + /* + * oif | args->oif If NULL, ipfw_chk has been called on the + * inbound path (ether_input, ip_input). + * If non-NULL, ipfw_chk has been called on the outbound path + * (ether_output, ip_output). + */ + struct ifnet *oif = args->oif; + + struct ip_fw *f = NULL; /* matching rule */ + int retval = 0; + + /* + * hlen The length of the IP header. + */ + u_int hlen = 0; /* hlen >0 means we have an IP pkt */ + + /* + * offset The offset of a fragment. offset != 0 means that + * we have a fragment at this offset of an IPv4 packet. + * offset == 0 means that (if this is an IPv4 packet) + * this is the first or only fragment. + * For IPv6 offset == 0 means there is no Fragment Header. + * If offset != 0 for IPv6 always use correct mask to + * get the correct offset because we add IP6F_MORE_FRAG + * to be able to dectect the first fragment which would + * otherwise have offset = 0. + */ + u_short offset = 0; + + /* + * Local copies of addresses. They are only valid if we have + * an IP packet. + * + * proto The protocol. Set to 0 for non-ip packets, + * or to the protocol read from the packet otherwise. + * proto != 0 means that we have an IPv4 packet. + * + * src_port, dst_port port numbers, in HOST format. Only + * valid for TCP and UDP packets. + * + * src_ip, dst_ip ip addresses, in NETWORK format. + * Only valid for IPv4 packets. + */ + u_int8_t proto; + u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ + struct in_addr src_ip, dst_ip; /* NOTE: network format */ + u_int16_t ip_len=0; + int pktlen; + u_int16_t etype = 0; /* Host order stored ether type */ + + /* + * dyn_dir = MATCH_UNKNOWN when rules unchecked, + * MATCH_NONE when checked and not matched (q = NULL), + * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) + */ + int dyn_dir = MATCH_UNKNOWN; + ipfw_dyn_rule *q = NULL; + struct ip_fw_chain *chain = &layer3_chain; + struct m_tag *mtag; + + /* + * We store in ulp a pointer to the upper layer protocol header. + * In the ipv4 case this is easy to determine from the header, + * but for ipv6 we might have some additional headers in the middle. + * ulp is NULL if not found. + */ + void *ulp = NULL; /* upper layer protocol pointer. */ + /* XXX ipv6 variables */ + int is_ipv6 = 0; + u_int16_t ext_hd = 0; /* bits vector for extension header filtering */ + /* end of ipv6 variables */ + int is_ipv4 = 0; + + if (m->m_flags & M_SKIP_FIREWALL) + return (IP_FW_PASS); /* accept */ + + dst_ip.s_addr = 0; /* make sure it is initialized */ + src_ip.s_addr = 0; /* make sure it is initialized */ + pktlen = m->m_pkthdr.len; + args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ + proto = args->f_id.proto = 0; /* mark f_id invalid */ + /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ + +/* + * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, + * then it sets p to point at the offset "len" in the mbuf. WARNING: the + * pointer might become stale after other pullups (but we never use it + * this way). + */ +#define PULLUP_TO(_len, p, T) \ +do { \ + int x = (_len) + sizeof(T); \ + if ((m)->m_len < x) { \ + goto pullup_failed; \ + } \ + p = (mtod(m, char *) + (_len)); \ +} while (0) + + /* + * if we have an ether header, + */ + if (args->eh) + etype = ntohs(args->eh->ether_type); + + /* Identify IP packets and fill up variables. */ + if (pktlen >= sizeof(struct ip6_hdr) && + (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { + struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; + is_ipv6 = 1; + args->f_id.addr_type = 6; + hlen = sizeof(struct ip6_hdr); + proto = ip6->ip6_nxt; + + /* Search extension headers to find upper layer protocols */ + while (ulp == NULL) { + switch (proto) { + case IPPROTO_ICMPV6: + PULLUP_TO(hlen, ulp, struct icmp6_hdr); + args->f_id.flags = ICMP6(ulp)->icmp6_type; + break; + + case IPPROTO_TCP: + PULLUP_TO(hlen, ulp, struct tcphdr); + dst_port = TCP(ulp)->th_dport; + src_port = TCP(ulp)->th_sport; + args->f_id.flags = TCP(ulp)->th_flags; + break; + + case IPPROTO_SCTP: + PULLUP_TO(hlen, ulp, struct sctphdr); + src_port = SCTP(ulp)->src_port; + dst_port = SCTP(ulp)->dest_port; + break; + + case IPPROTO_UDP: + PULLUP_TO(hlen, ulp, struct udphdr); + dst_port = UDP(ulp)->uh_dport; + src_port = UDP(ulp)->uh_sport; + break; + + case IPPROTO_HOPOPTS: /* RFC 2460 */ + PULLUP_TO(hlen, ulp, struct ip6_hbh); + ext_hd |= EXT_HOPOPTS; + hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; + proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; + ulp = NULL; + break; + + case IPPROTO_ROUTING: /* RFC 2460 */ + PULLUP_TO(hlen, ulp, struct ip6_rthdr); + switch (((struct ip6_rthdr *)ulp)->ip6r_type) { + case 0: + ext_hd |= EXT_RTHDR0; + break; + case 2: + ext_hd |= EXT_RTHDR2; + break; + default: + printf("IPFW2: IPV6 - Unknown Routing " + "Header type(%d)\n", + ((struct ip6_rthdr *)ulp)->ip6r_type); + if (fw_deny_unknown_exthdrs) + return (IP_FW_DENY); + break; + } + ext_hd |= EXT_ROUTING; + hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; + proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; + ulp = NULL; + break; + + case IPPROTO_FRAGMENT: /* RFC 2460 */ + PULLUP_TO(hlen, ulp, struct ip6_frag); + ext_hd |= EXT_FRAGMENT; + hlen += sizeof (struct ip6_frag); + proto = ((struct ip6_frag *)ulp)->ip6f_nxt; + offset = ((struct ip6_frag *)ulp)->ip6f_offlg & + IP6F_OFF_MASK; + /* Add IP6F_MORE_FRAG for offset of first + * fragment to be != 0. */ + offset |= ((struct ip6_frag *)ulp)->ip6f_offlg & + IP6F_MORE_FRAG; + if (offset == 0) { + printf("IPFW2: IPV6 - Invalid Fragment " + "Header\n"); + if (fw_deny_unknown_exthdrs) + return (IP_FW_DENY); + break; + } + args->f_id.frag_id6 = + ntohl(((struct ip6_frag *)ulp)->ip6f_ident); + ulp = NULL; + break; + + case IPPROTO_DSTOPTS: /* RFC 2460 */ + PULLUP_TO(hlen, ulp, struct ip6_hbh); + ext_hd |= EXT_DSTOPTS; + hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; + proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; + ulp = NULL; + break; + + case IPPROTO_AH: /* RFC 2402 */ + PULLUP_TO(hlen, ulp, struct ip6_ext); + ext_hd |= EXT_AH; + hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; + proto = ((struct ip6_ext *)ulp)->ip6e_nxt; + ulp = NULL; + break; + + case IPPROTO_ESP: /* RFC 2406 */ + PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ + /* Anything past Seq# is variable length and + * data past this ext. header is encrypted. */ + ext_hd |= EXT_ESP; + break; + + case IPPROTO_NONE: /* RFC 2460 */ + /* + * Packet ends here, and IPv6 header has + * already been pulled up. If ip6e_len!=0 + * then octets must be ignored. + */ + ulp = ip; /* non-NULL to get out of loop. */ + break; + + case IPPROTO_OSPFIGP: + /* XXX OSPF header check? */ + PULLUP_TO(hlen, ulp, struct ip6_ext); + break; + + case IPPROTO_PIM: + /* XXX PIM header check? */ + PULLUP_TO(hlen, ulp, struct pim); + break; + + case IPPROTO_CARP: + PULLUP_TO(hlen, ulp, struct carp_header); + if (((struct carp_header *)ulp)->carp_version != + CARP_VERSION) + return (IP_FW_DENY); + if (((struct carp_header *)ulp)->carp_type != + CARP_ADVERTISEMENT) + return (IP_FW_DENY); + break; + + case IPPROTO_IPV6: /* RFC 2893 */ + PULLUP_TO(hlen, ulp, struct ip6_hdr); + break; + + case IPPROTO_IPV4: /* RFC 2893 */ + PULLUP_TO(hlen, ulp, struct ip); + break; + + default: + printf("IPFW2: IPV6 - Unknown Extension " + "Header(%d), ext_hd=%x\n", proto, ext_hd); + if (fw_deny_unknown_exthdrs) + return (IP_FW_DENY); + PULLUP_TO(hlen, ulp, struct ip6_ext); + break; + } /*switch */ + } + ip = mtod(m, struct ip *); + ip6 = (struct ip6_hdr *)ip; + args->f_id.src_ip6 = ip6->ip6_src; + args->f_id.dst_ip6 = ip6->ip6_dst; + args->f_id.src_ip = 0; + args->f_id.dst_ip = 0; + args->f_id.flow_id6 = ntohl(ip6->ip6_flow); + } else if (pktlen >= sizeof(struct ip) && + (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { + is_ipv4 = 1; + hlen = ip->ip_hl << 2; + args->f_id.addr_type = 4; + + /* + * Collect parameters into local variables for faster matching. + */ + proto = ip->ip_p; + src_ip = ip->ip_src; + dst_ip = ip->ip_dst; + + if (1 || args->eh != NULL) { /* layer 2 packets are as on the wire */ + offset = ntohs(ip->ip_off) & IP_OFFMASK; + ip_len = ntohs(ip->ip_len); + } else { + offset = ip->ip_off & IP_OFFMASK; + ip_len = ip->ip_len; + } + pktlen = ip_len < pktlen ? ip_len : pktlen; + + if (offset == 0) { + switch (proto) { + case IPPROTO_TCP: + PULLUP_TO(hlen, ulp, struct tcphdr); + dst_port = TCP(ulp)->th_dport; + src_port = TCP(ulp)->th_sport; + args->f_id.flags = TCP(ulp)->th_flags; + break; + + case IPPROTO_UDP: + PULLUP_TO(hlen, ulp, struct udphdr); + dst_port = UDP(ulp)->uh_dport; + src_port = UDP(ulp)->uh_sport; + break; + + case IPPROTO_ICMP: + PULLUP_TO(hlen, ulp, struct icmphdr); + args->f_id.flags = ICMP(ulp)->icmp_type; + break; + + default: + break; + } + } + + ip = mtod(m, struct ip *); + args->f_id.src_ip = ntohl(src_ip.s_addr); + args->f_id.dst_ip = ntohl(dst_ip.s_addr); + } +#undef PULLUP_TO + if (proto) { /* we may have port numbers, store them */ + args->f_id.proto = proto; + args->f_id.src_port = src_port = ntohs(src_port); + args->f_id.dst_port = dst_port = ntohs(dst_port); + } + + IPFW_RLOCK(chain); + mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); + if (args->rule) { + /* + * Packet has already been tagged. Look for the next rule + * to restart processing. + * + * If fw_one_pass != 0 then just accept it. + * XXX should not happen here, but optimized out in + * the caller. + */ + if (fw_one_pass) { + IPFW_RUNLOCK(chain); + return (IP_FW_PASS); + } + + f = args->rule->next_rule; + if (f == NULL) + f = lookup_next_rule(args->rule, 0); + } else { + /* + * Find the starting rule. It can be either the first + * one, or the one after divert_rule if asked so. + */ + int skipto = mtag ? divert_cookie(mtag) : 0; + + f = chain->rules; + if (args->eh == NULL && skipto != 0) { + if (skipto >= IPFW_DEFAULT_RULE) { + IPFW_RUNLOCK(chain); + return (IP_FW_DENY); /* invalid */ + } + while (f && f->rulenum <= skipto) + f = f->next; + if (f == NULL) { /* drop packet */ + IPFW_RUNLOCK(chain); + return (IP_FW_DENY); + } + } + } + /* reset divert rule to avoid confusion later */ + if (mtag) { + divinput_flags = divert_info(mtag) & + (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG); + m_tag_delete(m, mtag); + } + + /* + * Now scan the rules, and parse microinstructions for each rule. + */ + for (; f; f = f->next) { + ipfw_insn *cmd; + uint32_t tablearg = 0; + int l, cmdlen, skip_or; /* skip rest of OR block */ + +again: + if (set_disable & (1 << f->set) ) + continue; + + skip_or = 0; + for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; + l -= cmdlen, cmd += cmdlen) { + int match; + + /* + * check_body is a jump target used when we find a + * CHECK_STATE, and need to jump to the body of + * the target rule. + */ + +check_body: + cmdlen = F_LEN(cmd); + /* + * An OR block (insn_1 || .. || insn_n) has the + * F_OR bit set in all but the last instruction. + * The first match will set "skip_or", and cause + * the following instructions to be skipped until + * past the one with the F_OR bit clear. + */ + if (skip_or) { /* skip this instruction */ + if ((cmd->len & F_OR) == 0) + skip_or = 0; /* next one is good */ + continue; + } + match = 0; /* set to 1 if we succeed */ + + switch (cmd->opcode) { + /* + * The first set of opcodes compares the packet's + * fields with some pattern, setting 'match' if a + * match is found. At the end of the loop there is + * logic to deal with F_NOT and F_OR flags associated + * with the opcode. + */ + case O_NOP: + match = 1; + break; + + case O_FORWARD_MAC: + printf("ipfw: opcode %d unimplemented\n", + cmd->opcode); + break; + + case O_GID: + case O_UID: + case O_JAIL: + /* + * We only check offset == 0 && proto != 0, + * as this ensures that we have a + * packet with the ports info. + */ + if (offset!=0) + break; + if (is_ipv6) /* XXX to be fixed later */ + break; + if (proto == IPPROTO_TCP || + proto == IPPROTO_UDP) + match = check_uidgid( + (ipfw_insn_u32 *)cmd, + proto, oif, + dst_ip, dst_port, + src_ip, src_port, &fw_ugid_cache, + &ugid_lookup, args->inp); + break; + + case O_RECV: + match = iface_match(m->m_pkthdr.rcvif, + (ipfw_insn_if *)cmd); + break; + + case O_XMIT: + match = iface_match(oif, (ipfw_insn_if *)cmd); + break; + + case O_VIA: + match = iface_match(oif ? oif : + m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); + break; + + case O_MACADDR2: + if (args->eh != NULL) { /* have MAC header */ + u_int32_t *want = (u_int32_t *) + ((ipfw_insn_mac *)cmd)->addr; + u_int32_t *mask = (u_int32_t *) + ((ipfw_insn_mac *)cmd)->mask; + u_int32_t *hdr = (u_int32_t *)args->eh; + + match = + ( want[0] == (hdr[0] & mask[0]) && + want[1] == (hdr[1] & mask[1]) && + want[2] == (hdr[2] & mask[2]) ); + } + break; + + case O_MAC_TYPE: + if (args->eh != NULL) { + u_int16_t *p = + ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = cmdlen - 1; !match && i>0; + i--, p += 2) + match = (etype >= p[0] && + etype <= p[1]); + } + break; + + case O_FRAG: + match = (offset != 0); + break; + + case O_IN: /* "out" is "not in" */ + match = (oif == NULL); + break; + + case O_LAYER2: + match = (args->eh != NULL); + break; + + case O_DIVERTED: + match = (cmd->arg1 & 1 && divinput_flags & + IP_FW_DIVERT_LOOPBACK_FLAG) || + (cmd->arg1 & 2 && divinput_flags & + IP_FW_DIVERT_OUTPUT_FLAG); + break; + + case O_PROTO: + /* + * We do not allow an arg of 0 so the + * check of "proto" only suffices. + */ + match = (proto == cmd->arg1); + break; + + case O_IP_SRC: + match = is_ipv4 && + (((ipfw_insn_ip *)cmd)->addr.s_addr == + src_ip.s_addr); + break; + + case O_IP_SRC_LOOKUP: + case O_IP_DST_LOOKUP: + if (is_ipv4) { + uint32_t a = + (cmd->opcode == O_IP_DST_LOOKUP) ? + dst_ip.s_addr : src_ip.s_addr; + uint32_t v = 0; + + match = lookup_table(chain, cmd->arg1, a, + &v); + if (!match) + break; + if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) + match = + ((ipfw_insn_u32 *)cmd)->d[0] == v; + else + tablearg = v; + } + break; + + case O_IP_SRC_MASK: + case O_IP_DST_MASK: + if (is_ipv4) { + uint32_t a = + (cmd->opcode == O_IP_DST_MASK) ? + dst_ip.s_addr : src_ip.s_addr; + uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; + int i = cmdlen-1; + + for (; !match && i>0; i-= 2, p+= 2) + match = (p[0] == (a & p[1])); + } + break; + + case O_IP_SRC_ME: + if (is_ipv4) { + struct ifnet *tif; + + INADDR_TO_IFP(src_ip, tif); + match = (tif != NULL); + } + break; + + case O_IP_DST_SET: + case O_IP_SRC_SET: + if (is_ipv4) { + u_int32_t *d = (u_int32_t *)(cmd+1); + u_int32_t addr = + cmd->opcode == O_IP_DST_SET ? + args->f_id.dst_ip : + args->f_id.src_ip; + + if (addr < d[0]) + break; + addr -= d[0]; /* subtract base */ + match = (addr < cmd->arg1) && + ( d[ 1 + (addr>>5)] & + (1<<(addr & 0x1f)) ); + } + break; + + case O_IP_DST: + match = is_ipv4 && + (((ipfw_insn_ip *)cmd)->addr.s_addr == + dst_ip.s_addr); + break; + + case O_IP_DST_ME: + if (is_ipv4) { + struct ifnet *tif; + + INADDR_TO_IFP(dst_ip, tif); + match = (tif != NULL); + } + break; + + case O_IP_SRCPORT: + case O_IP_DSTPORT: + /* + * offset == 0 && proto != 0 is enough + * to guarantee that we have a + * packet with port info. + */ + if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) + && offset == 0) { + u_int16_t x = + (cmd->opcode == O_IP_SRCPORT) ? + src_port : dst_port ; + u_int16_t *p = + ((ipfw_insn_u16 *)cmd)->ports; + int i; + + for (i = cmdlen - 1; !match && i>0; + i--, p += 2) + match = (x>=p[0] && x<=p[1]); + } + break; + + case O_ICMPTYPE: + match = (offset == 0 && proto==IPPROTO_ICMP && + icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); + break; + +#ifdef INET6 + case O_ICMP6TYPE: + match = is_ipv6 && offset == 0 && + proto==IPPROTO_ICMPV6 && + icmp6type_match( + ICMP6(ulp)->icmp6_type, + (ipfw_insn_u32 *)cmd); + break; +#endif /* INET6 */ + + case O_IPOPT: + match = (is_ipv4 && + ipopts_match(ip, cmd) ); + break; + + case O_IPVER: + match = (is_ipv4 && + cmd->arg1 == ip->ip_v); + break; + + case O_IPID: + case O_IPLEN: + case O_IPTTL: + if (is_ipv4) { /* only for IP packets */ + uint16_t x; + uint16_t *p; + int i; + + if (cmd->opcode == O_IPLEN) + x = ip_len; + else if (cmd->opcode == O_IPTTL) + x = ip->ip_ttl; + else /* must be IPID */ + x = ntohs(ip->ip_id); + if (cmdlen == 1) { + match = (cmd->arg1 == x); + break; + } + /* otherwise we have ranges */ + p = ((ipfw_insn_u16 *)cmd)->ports; + i = cmdlen - 1; + for (; !match && i>0; i--, p += 2) + match = (x >= p[0] && x <= p[1]); + } + break; + + case O_IPPRECEDENCE: + match = (is_ipv4 && + (cmd->arg1 == (ip->ip_tos & 0xe0)) ); + break; + + case O_IPTOS: + match = (is_ipv4 && + flags_match(cmd, ip->ip_tos)); + break; + + case O_TCPDATALEN: + if (proto == IPPROTO_TCP && offset == 0) { + struct tcphdr *tcp; + uint16_t x; + uint16_t *p; + int i; + + tcp = TCP(ulp); + x = ip_len - + ((ip->ip_hl + tcp->th_off) << 2); + if (cmdlen == 1) { + match = (cmd->arg1 == x); + break; + } + /* otherwise we have ranges */ + p = ((ipfw_insn_u16 *)cmd)->ports; + i = cmdlen - 1; + for (; !match && i>0; i--, p += 2) + match = (x >= p[0] && x <= p[1]); + } + break; + + case O_TCPFLAGS: + match = (proto == IPPROTO_TCP && offset == 0 && + flags_match(cmd, TCP(ulp)->th_flags)); + break; + + case O_TCPOPTS: + match = (proto == IPPROTO_TCP && offset == 0 && + tcpopts_match(TCP(ulp), cmd)); + break; + + case O_TCPSEQ: + match = (proto == IPPROTO_TCP && offset == 0 && + ((ipfw_insn_u32 *)cmd)->d[0] == + TCP(ulp)->th_seq); + break; + + case O_TCPACK: + match = (proto == IPPROTO_TCP && offset == 0 && + ((ipfw_insn_u32 *)cmd)->d[0] == + TCP(ulp)->th_ack); + break; + + case O_TCPWIN: + match = (proto == IPPROTO_TCP && offset == 0 && + cmd->arg1 == TCP(ulp)->th_win); + break; + + case O_ESTAB: + /* reject packets which have SYN only */ + /* XXX should i also check for TH_ACK ? */ + match = (proto == IPPROTO_TCP && offset == 0 && + (TCP(ulp)->th_flags & + (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); + break; + + case O_ALTQ: { + struct pf_mtag *at; + ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; + + match = 1; + at = pf_find_mtag(m); + if (at != NULL && at->qid != 0) + break; + at = pf_get_mtag(m); + if (at == NULL) { + /* + * Let the packet fall back to the + * default ALTQ. + */ + break; + } + at->qid = altq->qid; + if (is_ipv4) + at->af = AF_INET; + else + at->af = AF_LINK; + at->hdr = ip; + break; + } + + case O_LOG: + if (fw_verbose) + ipfw_log(f, hlen, args, m, + oif, offset, tablearg, ip); + match = 1; + break; + + case O_PROB: + match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); + break; + +#if 0 + case O_VERREVPATH: + /* Outgoing packets automatically pass/match */ + match = ((oif != NULL) || + (m->m_pkthdr.rcvif == NULL) || + ( +#ifdef INET6 + is_ipv6 ? + verify_path6(&(args->f_id.src_ip6), + m->m_pkthdr.rcvif) : +#endif + verify_path(src_ip, m->m_pkthdr.rcvif, + args->f_id.fib))); + break; + + case O_VERSRCREACH: + /* Outgoing packets automatically pass/match */ + match = (hlen > 0 && ((oif != NULL) || +#ifdef INET6 + is_ipv6 ? + verify_path6(&(args->f_id.src_ip6), + NULL) : +#endif + verify_path(src_ip, NULL, args->f_id.fib))); + break; + + case O_ANTISPOOF: + /* Outgoing packets automatically pass/match */ + if (oif == NULL && hlen > 0 && + ( (is_ipv4 && in_localaddr(src_ip)) +#ifdef INET6 + || (is_ipv6 && + in6_localaddr(&(args->f_id.src_ip6))) +#endif + )) + match = +#ifdef INET6 + is_ipv6 ? verify_path6( + &(args->f_id.src_ip6), + m->m_pkthdr.rcvif) : +#endif + verify_path(src_ip, + m->m_pkthdr.rcvif, + args->f_id.fib); + else + match = 1; + break; +#endif + + case O_IPSEC: +#ifdef IPSEC + match = (m_tag_find(m, + PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); +#endif + /* otherwise no match */ + break; + +#ifdef INET6 + case O_IP6_SRC: + match = is_ipv6 && + IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, + &((ipfw_insn_ip6 *)cmd)->addr6); + break; + + case O_IP6_DST: + match = is_ipv6 && + IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, + &((ipfw_insn_ip6 *)cmd)->addr6); + break; + case O_IP6_SRC_MASK: + case O_IP6_DST_MASK: + if (is_ipv6) { + int i = cmdlen - 1; + struct in6_addr p; + struct in6_addr *d = + &((ipfw_insn_ip6 *)cmd)->addr6; + + for (; !match && i > 0; d += 2, + i -= F_INSN_SIZE(struct in6_addr) + * 2) { + p = (cmd->opcode == + O_IP6_SRC_MASK) ? + args->f_id.src_ip6: + args->f_id.dst_ip6; + APPLY_MASK(&p, &d[1]); + match = + IN6_ARE_ADDR_EQUAL(&d[0], + &p); + } + } + break; + + case O_IP6_SRC_ME: + match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6); + break; + + case O_IP6_DST_ME: + match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6); + break; + + case O_FLOW6ID: + match = is_ipv6 && + flow6id_match(args->f_id.flow_id6, + (ipfw_insn_u32 *) cmd); + break; + + case O_EXT_HDR: + match = is_ipv6 && + (ext_hd & ((ipfw_insn *) cmd)->arg1); + break; + + case O_IP6: + match = is_ipv6; + break; +#endif + + case O_IP4: + match = is_ipv4; + break; + +#if 0 + case O_TAG: { + uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? + tablearg : cmd->arg1; + + /* Packet is already tagged with this tag? */ + mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); + + /* We have `untag' action when F_NOT flag is + * present. And we must remove this mtag from + * mbuf and reset `match' to zero (`match' will + * be inversed later). + * Otherwise we should allocate new mtag and + * push it into mbuf. + */ + if (cmd->len & F_NOT) { /* `untag' action */ + if (mtag != NULL) + m_tag_delete(m, mtag); + } else if (mtag == NULL) { + if ((mtag = m_tag_alloc(MTAG_IPFW, + tag, 0, M_NOWAIT)) != NULL) + m_tag_prepend(m, mtag); + } + match = (cmd->len & F_NOT) ? 0: 1; + break; + } + + case O_FIB: /* try match the specified fib */ + if (args->f_id.fib == cmd->arg1) + match = 1; + break; + + case O_TAGGED: { + uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? + tablearg : cmd->arg1; + + if (cmdlen == 1) { + match = m_tag_locate(m, MTAG_IPFW, + tag, NULL) != NULL; + break; + } + + /* we have ranges */ + for (mtag = m_tag_first(m); + mtag != NULL && !match; + mtag = m_tag_next(m, mtag)) { + uint16_t *p; + int i; + + if (mtag->m_tag_cookie != MTAG_IPFW) + continue; + + p = ((ipfw_insn_u16 *)cmd)->ports; + i = cmdlen - 1; + for(; !match && i > 0; i--, p += 2) + match = + mtag->m_tag_id >= p[0] && + mtag->m_tag_id <= p[1]; + } + break; + } +#endif + + /* + * The second set of opcodes represents 'actions', + * i.e. the terminal part of a rule once the packet + * matches all previous patterns. + * Typically there is only one action for each rule, + * and the opcode is stored at the end of the rule + * (but there are exceptions -- see below). + * + * In general, here we set retval and terminate the + * outer loop (would be a 'break 3' in some language, + * but we need to do a 'goto done'). + * + * Exceptions: + * O_COUNT and O_SKIPTO actions: + * instead of terminating, we jump to the next rule + * ('goto next_rule', equivalent to a 'break 2'), + * or to the SKIPTO target ('goto again' after + * having set f, cmd and l), respectively. + * + * O_TAG, O_LOG and O_ALTQ action parameters: + * perform some action and set match = 1; + * + * O_LIMIT and O_KEEP_STATE: these opcodes are + * not real 'actions', and are stored right + * before the 'action' part of the rule. + * These opcodes try to install an entry in the + * state tables; if successful, we continue with + * the next opcode (match=1; break;), otherwise + * the packet * must be dropped + * ('goto done' after setting retval); + * + * O_PROBE_STATE and O_CHECK_STATE: these opcodes + * cause a lookup of the state table, and a jump + * to the 'action' part of the parent rule + * ('goto check_body') if an entry is found, or + * (CHECK_STATE only) a jump to the next rule if + * the entry is not found ('goto next_rule'). + * The result of the lookup is cached to make + * further instances of these opcodes are + * effectively NOPs. + */ + case O_LIMIT: + case O_KEEP_STATE: + if (install_state(f, + (ipfw_insn_limit *)cmd, args, tablearg)) { + retval = IP_FW_DENY; + goto done; /* error/limit violation */ + } + match = 1; + break; + + case O_PROBE_STATE: + case O_CHECK_STATE: + /* + * dynamic rules are checked at the first + * keep-state or check-state occurrence, + * with the result being stored in dyn_dir. + * The compiler introduces a PROBE_STATE + * instruction for us when we have a + * KEEP_STATE (because PROBE_STATE needs + * to be run first). + */ + if (dyn_dir == MATCH_UNKNOWN && + (q = lookup_dyn_rule(&args->f_id, + &dyn_dir, proto == IPPROTO_TCP ? + TCP(ulp) : NULL)) + != NULL) { + /* + * Found dynamic entry, update stats + * and jump to the 'action' part of + * the parent rule. + */ + q->pcnt++; + q->bcnt += pktlen; + f = q->rule; + cmd = ACTION_PTR(f); + l = f->cmd_len - f->act_ofs; + IPFW_DYN_UNLOCK(); + goto check_body; + } + /* + * Dynamic entry not found. If CHECK_STATE, + * skip to next rule, if PROBE_STATE just + * ignore and continue with next opcode. + */ + if (cmd->opcode == O_CHECK_STATE) + goto next_rule; + match = 1; + break; + + case O_ACCEPT: + retval = 0; /* accept */ + goto done; + + case O_PIPE: + case O_QUEUE: + args->rule = f; /* report matching rule */ + if (cmd->arg1 == IP_FW_TABLEARG) + args->cookie = tablearg; + else + args->cookie = cmd->arg1; + retval = IP_FW_DUMMYNET; + goto done; + +#if 0 + case O_DIVERT: + case O_TEE: { + struct divert_tag *dt; + + if (args->eh) /* not on layer 2 */ + break; + mtag = m_tag_get(PACKET_TAG_DIVERT, + sizeof(struct divert_tag), + M_NOWAIT); + if (mtag == NULL) { + /* XXX statistic */ + /* drop packet */ + IPFW_RUNLOCK(chain); + return (IP_FW_DENY); + } + dt = (struct divert_tag *)(mtag+1); + dt->cookie = f->rulenum; + if (cmd->arg1 == IP_FW_TABLEARG) + dt->info = tablearg; + else + dt->info = cmd->arg1; + m_tag_prepend(m, mtag); + retval = (cmd->opcode == O_DIVERT) ? + IP_FW_DIVERT : IP_FW_TEE; + goto done; + } +#endif + + case O_COUNT: + case O_SKIPTO: + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; + if (cmd->opcode == O_COUNT) + goto next_rule; + /* handle skipto */ + if (cmd->arg1 == IP_FW_TABLEARG) { + f = lookup_next_rule(f, tablearg); + } else { + if (f->next_rule == NULL) + lookup_next_rule(f, 0); + f = f->next_rule; + } + goto again; + + case O_REJECT: + /* + * Drop the packet and send a reject notice + * if the packet is not ICMP (or is an ICMP + * query), and it is not multicast/broadcast. + */ + if (hlen > 0 && is_ipv4 && offset == 0 && + (proto != IPPROTO_ICMP || + is_icmp_query(ICMP(ulp))) && + !(m->m_flags & (M_BCAST|M_MCAST)) && + !IN_MULTICAST(ntohl(dst_ip.s_addr))) { + send_reject(args, cmd->arg1, ip_len, ip); + m = args->m; + } + /* FALLTHROUGH */ +#ifdef INET6 + case O_UNREACH6: + if (hlen > 0 && is_ipv6 && + ((offset & IP6F_OFF_MASK) == 0) && + (proto != IPPROTO_ICMPV6 || + (is_icmp6_query(args->f_id.flags) == 1)) && + !(m->m_flags & (M_BCAST|M_MCAST)) && + !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { + send_reject6( + args, cmd->arg1, hlen, + (struct ip6_hdr *)ip); + m = args->m; + } + /* FALLTHROUGH */ +#endif + case O_DENY: + retval = IP_FW_DENY; + goto done; + + case O_FORWARD_IP: { + struct sockaddr_in *sa; + sa = &(((ipfw_insn_sa *)cmd)->sa); + if (args->eh) /* not valid on layer2 pkts */ + break; + if (!q || dyn_dir == MATCH_FORWARD) { + if (sa->sin_addr.s_addr == INADDR_ANY) { + bcopy(sa, &args->hopstore, + sizeof(*sa)); + args->hopstore.sin_addr.s_addr = + htonl(tablearg); + args->next_hop = + &args->hopstore; + } else { + args->next_hop = sa; + } + } + retval = IP_FW_PASS; + } + goto done; + + case O_NETGRAPH: + case O_NGTEE: + args->rule = f; /* report matching rule */ + if (cmd->arg1 == IP_FW_TABLEARG) + args->cookie = tablearg; + else + args->cookie = cmd->arg1; + retval = (cmd->opcode == O_NETGRAPH) ? + IP_FW_NETGRAPH : IP_FW_NGTEE; + goto done; + +#if 0 + case O_SETFIB: + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; + M_SETFIB(m, cmd->arg1); + args->f_id.fib = cmd->arg1; + goto next_rule; + + case O_NAT: { + struct cfg_nat *t; + int nat_id; + + if (IPFW_NAT_LOADED) { + args->rule = f; /* Report matching rule. */ + t = ((ipfw_insn_nat *)cmd)->nat; + if (t == NULL) { + nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? + tablearg : cmd->arg1; + LOOKUP_NAT(layer3_chain, nat_id, t); + if (t == NULL) { + retval = IP_FW_DENY; + goto done; + } + if (cmd->arg1 != IP_FW_TABLEARG) + ((ipfw_insn_nat *)cmd)->nat = t; + } + retval = ipfw_nat_ptr(args, t, m); + } else + retval = IP_FW_DENY; + goto done; + } +#endif + + default: + break; // XXX we disabled some + panic("-- unknown opcode %d\n", cmd->opcode); + } /* end of switch() on opcodes */ + + if (cmd->len & F_NOT) + match = !match; + + if (match) { + if (cmd->len & F_OR) + skip_or = 1; + } else { + if (!(cmd->len & F_OR)) /* not an OR block, */ + break; /* try next rule */ + } + + } /* end of inner for, scan opcodes */ + +next_rule:; /* try next rule */ + + } /* end of outer for, scan rules */ + printf("ipfw: ouch!, skip past end of rules, denying packet\n"); + IPFW_RUNLOCK(chain); + return (IP_FW_DENY); + +done: + /* Update statistics */ + f->pcnt++; + f->bcnt += pktlen; + f->timestamp = time_uptime; + IPFW_RUNLOCK(chain); + return (retval); + +pullup_failed: + if (fw_verbose) + printf("ipfw: pullup failed\n"); + return (IP_FW_DENY); +} + +/* + * When a rule is added/deleted, clear the next_rule pointers in all rules. + * These will be reconstructed on the fly as packets are matched. + */ +static void +flush_rule_ptrs(struct ip_fw_chain *chain) +{ + struct ip_fw *rule; + + IPFW_WLOCK_ASSERT(chain); + + for (rule = chain->rules; rule; rule = rule->next) + rule->next_rule = NULL; +} + +/* + * Add a new rule to the list. Copy the rule into a malloc'ed area, then + * possibly create a rule number and add the rule to the list. + * Update the rule_number in the input struct so the caller knows it as well. + */ +static int +add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) +{ + struct ip_fw *rule, *f, *prev; + int l = RULESIZE(input_rule); + + if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) + return (EINVAL); + + rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO); + if (rule == NULL) + return (ENOSPC); + + bcopy(input_rule, rule, l); + + rule->next = NULL; + rule->next_rule = NULL; + + rule->pcnt = 0; + rule->bcnt = 0; + rule->timestamp = 0; + + IPFW_WLOCK(chain); + + if (chain->rules == NULL) { /* default rule */ + chain->rules = rule; + goto done; + } + + /* + * If rulenum is 0, find highest numbered rule before the + * default rule, and add autoinc_step + */ + if (autoinc_step < 1) + autoinc_step = 1; + else if (autoinc_step > 1000) + autoinc_step = 1000; + if (rule->rulenum == 0) { + /* + * locate the highest numbered rule before default + */ + for (f = chain->rules; f; f = f->next) { + if (f->rulenum == IPFW_DEFAULT_RULE) + break; + rule->rulenum = f->rulenum; + } + if (rule->rulenum < IPFW_DEFAULT_RULE - autoinc_step) + rule->rulenum += autoinc_step; + input_rule->rulenum = rule->rulenum; + } + + /* + * Now insert the new rule in the right place in the sorted list. + */ + for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { + if (f->rulenum > rule->rulenum) { /* found the location */ + if (prev) { + rule->next = f; + prev->next = rule; + } else { /* head insert */ + rule->next = chain->rules; + chain->rules = rule; + } + break; + } + } + flush_rule_ptrs(chain); +done: + static_count++; + static_len += l; + IPFW_WUNLOCK(chain); + DEB(printf("ipfw: installed rule %d, static count now %d\n", + rule->rulenum, static_count);) + return (0); +} + +/** + * Remove a static rule (including derived * dynamic rules) + * and place it on the ``reap list'' for later reclamation. + * The caller is in charge of clearing rule pointers to avoid + * dangling pointers. + * @return a pointer to the next entry. + * Arguments are not checked, so they better be correct. + */ +static struct ip_fw * +remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, + struct ip_fw *prev) +{ + struct ip_fw *n; + int l = RULESIZE(rule); + + IPFW_WLOCK_ASSERT(chain); + + n = rule->next; + IPFW_DYN_LOCK(); + remove_dyn_rule(rule, NULL /* force removal */); + IPFW_DYN_UNLOCK(); + if (prev == NULL) + chain->rules = n; + else + prev->next = n; + static_count--; + static_len -= l; + + rule->next = chain->reap; + chain->reap = rule; + + return n; +} + +/* + * Hook for cleaning up dummynet when an ipfw rule is deleted. + * Set/cleared when dummynet module is loaded/unloaded. + */ +void (*ip_dn_ruledel_ptr)(void *) = NULL; + +/** + * Reclaim storage associated with a list of rules. This is + * typically the list created using remove_rule. + * A NULL pointer on input is handled correctly. + */ +static void +reap_rules(struct ip_fw *head) +{ + struct ip_fw *rule; + + while ((rule = head) != NULL) { + head = head->next; + if (ip_dn_ruledel_ptr) + ip_dn_ruledel_ptr(rule); + free(rule, M_IPFW); + } +} + +/* + * Remove all rules from a chain (except rules in set RESVD_SET + * unless kill_default = 1). The caller is responsible for + * reclaiming storage for the rules left in chain->reap. + */ +static void +free_chain(struct ip_fw_chain *chain, int kill_default) +{ + struct ip_fw *prev, *rule; + + IPFW_WLOCK_ASSERT(chain); + + flush_rule_ptrs(chain); /* more efficient to do outside the loop */ + for (prev = NULL, rule = chain->rules; rule ; ) + if (kill_default || rule->set != RESVD_SET) + rule = remove_rule(chain, rule, prev); + else { + prev = rule; + rule = rule->next; + } +} + +/** + * Remove all rules with given number, and also do set manipulation. + * Assumes chain != NULL && *chain != NULL. + * + * The argument is an u_int32_t. The low 16 bit are the rule or set number, + * the next 8 bits are the new set, the top 8 bits are the command: + * + * 0 delete rules with given number + * 1 delete rules with given set number + * 2 move rules with given number to new set + * 3 move rules with given set number to new set + * 4 swap sets with given numbers + * 5 delete rules with given number and with given set number + */ +static int +del_entry(struct ip_fw_chain *chain, u_int32_t arg) +{ + struct ip_fw *prev = NULL, *rule; + u_int16_t rulenum; /* rule or old_set */ + u_int8_t cmd, new_set; + + rulenum = arg & 0xffff; + cmd = (arg >> 24) & 0xff; + new_set = (arg >> 16) & 0xff; + + if (cmd > 5 || new_set > RESVD_SET) + return EINVAL; + if (cmd == 0 || cmd == 2 || cmd == 5) { + if (rulenum >= IPFW_DEFAULT_RULE) + return EINVAL; + } else { + if (rulenum > RESVD_SET) /* old_set */ + return EINVAL; + } + + IPFW_WLOCK(chain); + rule = chain->rules; + chain->reap = NULL; + switch (cmd) { + case 0: /* delete rules with given number */ + /* + * locate first rule to delete + */ + for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) + ; + if (rule->rulenum != rulenum) { + IPFW_WUNLOCK(chain); + return EINVAL; + } + + /* + * flush pointers outside the loop, then delete all matching + * rules. prev remains the same throughout the cycle. + */ + flush_rule_ptrs(chain); + while (rule->rulenum == rulenum) + rule = remove_rule(chain, rule, prev); + break; + + case 1: /* delete all rules with given set number */ + flush_rule_ptrs(chain); + rule = chain->rules; + while (rule->rulenum < IPFW_DEFAULT_RULE) + if (rule->set == rulenum) + rule = remove_rule(chain, rule, prev); + else { + prev = rule; + rule = rule->next; + } + break; + + case 2: /* move rules with given number to new set */ + rule = chain->rules; + for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + if (rule->rulenum == rulenum) + rule->set = new_set; + break; + + case 3: /* move rules with given set number to new set */ + for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + if (rule->set == rulenum) + rule->set = new_set; + break; + + case 4: /* swap two sets */ + for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) + if (rule->set == rulenum) + rule->set = new_set; + else if (rule->set == new_set) + rule->set = rulenum; + break; + case 5: /* delete rules with given number and with given set number. + * rulenum - given rule number; + * new_set - given set number. + */ + for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) + ; + if (rule->rulenum != rulenum) { + IPFW_WUNLOCK(chain); + return (EINVAL); + } + flush_rule_ptrs(chain); + while (rule->rulenum == rulenum) { + if (rule->set == new_set) + rule = remove_rule(chain, rule, prev); + else { + prev = rule; + rule = rule->next; + } + } + } + /* + * Look for rules to reclaim. We grab the list before + * releasing the lock then reclaim them w/o the lock to + * avoid a LOR with dummynet. + */ + rule = chain->reap; + chain->reap = NULL; + IPFW_WUNLOCK(chain); + if (rule) + reap_rules(rule); + return 0; +} + +/* + * Clear counters for a specific rule. + * The enclosing "table" is assumed locked. + */ +static void +clear_counters(struct ip_fw *rule, int log_only) +{ + ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); + + if (log_only == 0) { + rule->bcnt = rule->pcnt = 0; + rule->timestamp = 0; + } + if (l->o.opcode == O_LOG) + l->log_left = l->max_log; +} + +/** + * Reset some or all counters on firewall rules. + * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, + * the next 8 bits are the set number, the top 8 bits are the command: + * 0 work with rules from all set's; + * 1 work with rules only from specified set. + * Specified rule number is zero if we want to clear all entries. + * log_only is 1 if we only want to reset logs, zero otherwise. + */ +static int +zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) +{ + struct ip_fw *rule; + char *msg; + + uint16_t rulenum = arg & 0xffff; + uint8_t set = (arg >> 16) & 0xff; + uint8_t cmd = (arg >> 24) & 0xff; + + if (cmd > 1) + return (EINVAL); + if (cmd == 1 && set > RESVD_SET) + return (EINVAL); + + IPFW_WLOCK(chain); + if (rulenum == 0) { + norule_counter = 0; + for (rule = chain->rules; rule; rule = rule->next) { + /* Skip rules from another set. */ + if (cmd == 1 && rule->set != set) + continue; + clear_counters(rule, log_only); + } + msg = log_only ? "All logging counts reset" : + "Accounting cleared"; + } else { + int cleared = 0; + /* + * We can have multiple rules with the same number, so we + * need to clear them all. + */ + for (rule = chain->rules; rule; rule = rule->next) + if (rule->rulenum == rulenum) { + while (rule && rule->rulenum == rulenum) { + if (cmd == 0 || rule->set == set) + clear_counters(rule, log_only); + rule = rule->next; + } + cleared = 1; + break; + } + if (!cleared) { /* we did not find any matching rules */ + IPFW_WUNLOCK(chain); + return (EINVAL); + } + msg = log_only ? "logging count reset" : "cleared"; + } + IPFW_WUNLOCK(chain); + + if (fw_verbose) { +#define lev LOG_SECURITY | LOG_NOTICE + + if (rulenum) + log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); + else + log(lev, "ipfw: %s.\n", msg); + } + return (0); +} + +/* + * Check validity of the structure before insert. + * Fortunately rules are simple, so this mostly need to check rule sizes. + */ +static int +check_ipfw_struct(struct ip_fw *rule, int size) +{ + int l, cmdlen = 0; + int have_action=0; + ipfw_insn *cmd; + + if (size < sizeof(*rule)) { + printf("ipfw: rule too short\n"); + return (EINVAL); + } + /* first, check for valid size */ + l = RULESIZE(rule); + if (l != size) { + printf("ipfw: size mismatch (have %d want %d)\n", size, l); + return (EINVAL); + } + if (rule->act_ofs >= rule->cmd_len) { + printf("ipfw: bogus action offset (%u > %u)\n", + rule->act_ofs, rule->cmd_len - 1); + return (EINVAL); + } + /* + * Now go for the individual checks. Very simple ones, basically only + * instruction sizes. + */ + for (l = rule->cmd_len, cmd = rule->cmd ; + l > 0 ; l -= cmdlen, cmd += cmdlen) { + cmdlen = F_LEN(cmd); + if (cmdlen > l) { + printf("ipfw: opcode %d size truncated\n", + cmd->opcode); + return EINVAL; + } + DEB(printf("ipfw: opcode %d\n", cmd->opcode);) + switch (cmd->opcode) { + case O_PROBE_STATE: + case O_KEEP_STATE: + case O_PROTO: + case O_IP_SRC_ME: + case O_IP_DST_ME: + case O_LAYER2: + case O_IN: + case O_FRAG: + case O_DIVERTED: + case O_IPOPT: + case O_IPTOS: + case O_IPPRECEDENCE: + case O_IPVER: + case O_TCPWIN: + case O_TCPFLAGS: + case O_TCPOPTS: + case O_ESTAB: + case O_VERREVPATH: + case O_VERSRCREACH: + case O_ANTISPOOF: + case O_IPSEC: +#ifdef INET6 + case O_IP6_SRC_ME: + case O_IP6_DST_ME: + case O_EXT_HDR: + case O_IP6: +#endif + case O_IP4: + case O_TAG: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + break; + + case O_FIB: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + if (cmd->arg1 >= rt_numfibs) { + printf("ipfw: invalid fib number %d\n", + cmd->arg1); + return EINVAL; + } + break; + + case O_SETFIB: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + if (cmd->arg1 >= rt_numfibs) { + printf("ipfw: invalid fib number %d\n", + cmd->arg1); + return EINVAL; + } + goto check_action; + + case O_UID: + case O_GID: + case O_JAIL: + case O_IP_SRC: + case O_IP_DST: + case O_TCPSEQ: + case O_TCPACK: + case O_PROB: + case O_ICMPTYPE: + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + break; + + case O_LIMIT: + if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) + goto bad_size; + break; + + case O_LOG: + if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) + goto bad_size; + + ((ipfw_insn_log *)cmd)->log_left = + ((ipfw_insn_log *)cmd)->max_log; + + break; + + case O_IP_SRC_MASK: + case O_IP_DST_MASK: + /* only odd command lengths */ + if ( !(cmdlen & 1) || cmdlen > 31) + goto bad_size; + break; + + case O_IP_SRC_SET: + case O_IP_DST_SET: + if (cmd->arg1 == 0 || cmd->arg1 > 256) { + printf("ipfw: invalid set size %d\n", + cmd->arg1); + return EINVAL; + } + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + + (cmd->arg1+31)/32 ) + goto bad_size; + break; + + case O_IP_SRC_LOOKUP: + case O_IP_DST_LOOKUP: + if (cmd->arg1 >= IPFW_TABLES_MAX) { + printf("ipfw: invalid table number %d\n", + cmd->arg1); + return (EINVAL); + } + if (cmdlen != F_INSN_SIZE(ipfw_insn) && + cmdlen != F_INSN_SIZE(ipfw_insn_u32)) + goto bad_size; + break; + + case O_MACADDR2: + if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) + goto bad_size; + break; + + case O_NOP: + case O_IPID: + case O_IPTTL: + case O_IPLEN: + case O_TCPDATALEN: + case O_TAGGED: + if (cmdlen < 1 || cmdlen > 31) + goto bad_size; + break; + + case O_MAC_TYPE: + case O_IP_SRCPORT: + case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ + if (cmdlen < 2 || cmdlen > 31) + goto bad_size; + break; + + case O_RECV: + case O_XMIT: + case O_VIA: + if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) + goto bad_size; + break; + + case O_ALTQ: + if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) + goto bad_size; + break; + + case O_PIPE: + case O_QUEUE: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; + goto check_action; + + case O_FORWARD_IP: +#ifdef IPFIREWALL_FORWARD + if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) + goto bad_size; + goto check_action; +#else + return EINVAL; +#endif + + case O_DIVERT: + case O_TEE: + if (ip_divert_ptr == NULL) + return EINVAL; + else + goto check_size; + case O_NETGRAPH: + case O_NGTEE: + if (!NG_IPFW_LOADED) + return EINVAL; + else + goto check_size; + case O_NAT: + if (!IPFW_NAT_LOADED) + return EINVAL; + if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) + goto bad_size; + goto check_action; + case O_FORWARD_MAC: /* XXX not implemented yet */ + case O_CHECK_STATE: + case O_COUNT: + case O_ACCEPT: + case O_DENY: + case O_REJECT: +#ifdef INET6 + case O_UNREACH6: +#endif + case O_SKIPTO: +check_size: + if (cmdlen != F_INSN_SIZE(ipfw_insn)) + goto bad_size; +check_action: + if (have_action) { + printf("ipfw: opcode %d, multiple actions" + " not allowed\n", + cmd->opcode); + return EINVAL; + } + have_action = 1; + if (l != cmdlen) { + printf("ipfw: opcode %d, action must be" + " last opcode\n", + cmd->opcode); + return EINVAL; + } + break; +#ifdef INET6 + case O_IP6_SRC: + case O_IP6_DST: + if (cmdlen != F_INSN_SIZE(struct in6_addr) + + F_INSN_SIZE(ipfw_insn)) + goto bad_size; + break; + + case O_FLOW6ID: + if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + + ((ipfw_insn_u32 *)cmd)->o.arg1) + goto bad_size; + break; + + case O_IP6_SRC_MASK: + case O_IP6_DST_MASK: + if ( !(cmdlen & 1) || cmdlen > 127) + goto bad_size; + break; + case O_ICMP6TYPE: + if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) + goto bad_size; + break; +#endif + + default: + switch (cmd->opcode) { +#ifndef INET6 + case O_IP6_SRC_ME: + case O_IP6_DST_ME: + case O_EXT_HDR: + case O_IP6: + case O_UNREACH6: + case O_IP6_SRC: + case O_IP6_DST: + case O_FLOW6ID: + case O_IP6_SRC_MASK: + case O_IP6_DST_MASK: + case O_ICMP6TYPE: + printf("ipfw: no IPv6 support in kernel\n"); + return EPROTONOSUPPORT; +#endif + default: + printf("ipfw: opcode %d, unknown opcode\n", + cmd->opcode); + return EINVAL; + } + } + } + if (have_action == 0) { + printf("ipfw: missing action\n"); + return EINVAL; + } + return 0; + +bad_size: + printf("ipfw: opcode %d size %d wrong\n", + cmd->opcode, cmdlen); + return EINVAL; +} + +/* + * Copy the static and dynamic rules to the supplied buffer + * and return the amount of space actually used. + */ +static size_t +ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) +{ + char *bp = buf; + char *ep = bp + space; + struct ip_fw *rule; + int i; + time_t boot_seconds; + + boot_seconds = boottime.tv_sec; + /* XXX this can take a long time and locking will block packet flow */ + IPFW_RLOCK(chain); + for (rule = chain->rules; rule ; rule = rule->next) { + /* + * Verify the entry fits in the buffer in case the + * rules changed between calculating buffer space and + * now. This would be better done using a generation + * number but should suffice for now. + */ + i = RULESIZE(rule); + if (bp + i <= ep) { + bcopy(rule, bp, i); + /* + * XXX HACK. Store the disable mask in the "next" pointer + * in a wild attempt to keep the ABI the same. + * Why do we do this on EVERY rule? + */ + bcopy(&set_disable, &(((struct ip_fw *)bp)->next_rule), + sizeof(set_disable)); + if (((struct ip_fw *)bp)->timestamp) + ((struct ip_fw *)bp)->timestamp += boot_seconds; + bp += i; + } + } + IPFW_RUNLOCK(chain); + if (ipfw_dyn_v) { + ipfw_dyn_rule *p, *last = NULL; + + IPFW_DYN_LOCK(); + for (i = 0 ; i < curr_dyn_buckets; i++) + for (p = ipfw_dyn_v[i] ; p != NULL; p = p->next) { + if (bp + sizeof *p <= ep) { + ipfw_dyn_rule *dst = + (ipfw_dyn_rule *)bp; + bcopy(p, dst, sizeof *p); + bcopy(&(p->rule->rulenum), &(dst->rule), + sizeof(p->rule->rulenum)); + /* + * store set number into high word of + * dst->rule pointer. + */ + bcopy(&(p->rule->set), + (char *)&dst->rule + + sizeof(p->rule->rulenum), + sizeof(p->rule->set)); + /* + * store a non-null value in "next". + * The userland code will interpret a + * NULL here as a marker + * for the last dynamic rule. + */ + bcopy(&dst, &dst->next, sizeof(dst)); + last = dst; + dst->expire = + TIME_LEQ(dst->expire, time_uptime) ? + 0 : dst->expire - time_uptime ; + bp += sizeof(ipfw_dyn_rule); + } + } + IPFW_DYN_UNLOCK(); + if (last != NULL) /* mark last dynamic rule */ + bzero(&last->next, sizeof(last)); + } + return (bp - (char *)buf); +} + + +/** + * {set|get}sockopt parser. + */ +static int +ipfw_ctl(struct sockopt *sopt) +{ +#define RULE_MAXSIZE (256*sizeof(u_int32_t)) + int error; + size_t size; + struct ip_fw *buf, *rule; + u_int32_t rulenum[2]; + + error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); + if (error) + return (error); + + /* + * Disallow modifications in really-really secure mode, but still allow + * the logging counters to be reset. + */ + if (sopt->sopt_name == IP_FW_ADD || + (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { + error = securelevel_ge(sopt->sopt_td->td_ucred, 3); + if (error) + return (error); + } + + error = 0; + + switch (sopt->sopt_name) { + case IP_FW_GET: + /* + * pass up a copy of the current rules. Static rules + * come first (the last of which has number IPFW_DEFAULT_RULE), + * followed by a possibly empty list of dynamic rule. + * The last dynamic rule has NULL in the "next" field. + * + * Note that the calculated size is used to bound the + * amount of data returned to the user. The rule set may + * change between calculating the size and returning the + * data in which case we'll just return what fits. + */ + size = static_len; /* size of static rules */ + if (ipfw_dyn_v) /* add size of dyn.rules */ + size += (dyn_count * sizeof(ipfw_dyn_rule)); + + /* + * XXX todo: if the user passes a short length just to know + * how much room is needed, do not bother filling up the + * buffer, just jump to the sooptcopyout. + */ + buf = malloc(size, M_TEMP, M_WAITOK); + error = sooptcopyout(sopt, buf, + ipfw_getrules(&layer3_chain, buf, size)); + free(buf, M_TEMP); + break; + + case IP_FW_FLUSH: + /* + * Normally we cannot release the lock on each iteration. + * We could do it here only because we start from the head all + * the times so there is no risk of missing some entries. + * On the other hand, the risk is that we end up with + * a very inconsistent ruleset, so better keep the lock + * around the whole cycle. + * + * XXX this code can be improved by resetting the head of + * the list to point to the default rule, and then freeing + * the old list without the need for a lock. + */ + + IPFW_WLOCK(&layer3_chain); + layer3_chain.reap = NULL; + free_chain(&layer3_chain, 0 /* keep default rule */); + rule = layer3_chain.reap; + layer3_chain.reap = NULL; + IPFW_WUNLOCK(&layer3_chain); + if (rule != NULL) + reap_rules(rule); + break; + + case IP_FW_ADD: + rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); + error = sooptcopyin(sopt, rule, RULE_MAXSIZE, + sizeof(struct ip_fw) ); + if (error == 0) + error = check_ipfw_struct(rule, sopt->sopt_valsize); + if (error == 0) { + error = add_rule(&layer3_chain, rule); + size = RULESIZE(rule); + if (!error && sopt->sopt_dir == SOPT_GET) + error = sooptcopyout(sopt, rule, size); + } + free(rule, M_TEMP); + break; + + case IP_FW_DEL: + /* + * IP_FW_DEL is used for deleting single rules or sets, + * and (ab)used to atomically manipulate sets. Argument size + * is used to distinguish between the two: + * sizeof(u_int32_t) + * delete single rule or set of rules, + * or reassign rules (or sets) to a different set. + * 2*sizeof(u_int32_t) + * atomic disable/enable sets. + * first u_int32_t contains sets to be disabled, + * second u_int32_t contains sets to be enabled. + */ + error = sooptcopyin(sopt, rulenum, + 2*sizeof(u_int32_t), sizeof(u_int32_t)); + if (error) + break; + size = sopt->sopt_valsize; + if (size == sizeof(u_int32_t)) /* delete or reassign */ + error = del_entry(&layer3_chain, rulenum[0]); + else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */ + set_disable = + (set_disable | rulenum[0]) & ~rulenum[1] & + ~(1<sopt_val != 0) { + error = sooptcopyin(sopt, rulenum, + sizeof(u_int32_t), sizeof(u_int32_t)); + if (error) + break; + } + error = zero_entry(&layer3_chain, rulenum[0], + sopt->sopt_name == IP_FW_RESETLOG); + break; + +#ifdef radix + case IP_FW_TABLE_ADD: + { + ipfw_table_entry ent; + + error = sooptcopyin(sopt, &ent, + sizeof(ent), sizeof(ent)); + if (error) + break; + error = add_table_entry(&layer3_chain, ent.tbl, + ent.addr, ent.masklen, ent.value); + } + break; + + case IP_FW_TABLE_DEL: + { + ipfw_table_entry ent; + + error = sooptcopyin(sopt, &ent, + sizeof(ent), sizeof(ent)); + if (error) + break; + error = del_table_entry(&layer3_chain, ent.tbl, + ent.addr, ent.masklen); + } + break; + + case IP_FW_TABLE_FLUSH: + { + u_int16_t tbl; + + error = sooptcopyin(sopt, &tbl, + sizeof(tbl), sizeof(tbl)); + if (error) + break; + IPFW_WLOCK(&layer3_chain); + error = flush_table(&layer3_chain, tbl); + IPFW_WUNLOCK(&layer3_chain); + } + break; + + case IP_FW_TABLE_GETSIZE: + { + u_int32_t tbl, cnt; + + if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), + sizeof(tbl)))) + break; + IPFW_RLOCK(&layer3_chain); + error = count_table(&layer3_chain, tbl, &cnt); + IPFW_RUNLOCK(&layer3_chain); + if (error) + break; + error = sooptcopyout(sopt, &cnt, sizeof(cnt)); + } + break; + + case IP_FW_TABLE_LIST: + { + ipfw_table *tbl; + + if (sopt->sopt_valsize < sizeof(*tbl)) { + error = EINVAL; + break; + } + size = sopt->sopt_valsize; + tbl = malloc(size, M_TEMP, M_WAITOK); + error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); + if (error) { + free(tbl, M_TEMP); + break; + } + tbl->size = (size - sizeof(*tbl)) / + sizeof(ipfw_table_entry); + IPFW_RLOCK(&layer3_chain); + error = dump_table(&layer3_chain, tbl); + IPFW_RUNLOCK(&layer3_chain); + if (error) { + free(tbl, M_TEMP); + break; + } + error = sooptcopyout(sopt, tbl, size); + free(tbl, M_TEMP); + } + break; + +#endif /* radix */ + + case IP_FW_NAT_CFG: + if (IPFW_NAT_LOADED) + error = ipfw_nat_cfg_ptr(sopt); + else { + printf("IP_FW_NAT_CFG: %s\n", + "ipfw_nat not present, please load it"); + error = EINVAL; + } + break; + + case IP_FW_NAT_DEL: + if (IPFW_NAT_LOADED) + error = ipfw_nat_del_ptr(sopt); + else { + printf("IP_FW_NAT_DEL: %s\n", + "ipfw_nat not present, please load it"); + error = EINVAL; + } + break; + + case IP_FW_NAT_GET_CONFIG: + if (IPFW_NAT_LOADED) + error = ipfw_nat_get_cfg_ptr(sopt); + else { + printf("IP_FW_NAT_GET_CFG: %s\n", + "ipfw_nat not present, please load it"); + error = EINVAL; + } + break; + + case IP_FW_NAT_GET_LOG: + if (IPFW_NAT_LOADED) + error = ipfw_nat_get_log_ptr(sopt); + else { + printf("IP_FW_NAT_GET_LOG: %s\n", + "ipfw_nat not present, please load it"); + error = EINVAL; + } + break; + + default: + printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); + error = EINVAL; + } + + return (error); +#undef RULE_MAXSIZE +} + +/** + * dummynet needs a reference to the default rule, because rules can be + * deleted while packets hold a reference to them. When this happens, + * dummynet changes the reference to the default rule (it could well be a + * NULL pointer, but this way we do not need to check for the special + * case, plus here he have info on the default behaviour). + */ +struct ip_fw *ip_fw_default_rule; + +/* + * This procedure is only used to handle keepalives. It is invoked + * every dyn_keepalive_period + */ +static void +ipfw_tick(void * __unused unused) +{ + struct mbuf *m0, *m, *mnext, **mtailp; + int i; + ipfw_dyn_rule *q; + + if (dyn_keepalive == 0 || ipfw_dyn_v == NULL || dyn_count == 0) + goto done; + + /* + * We make a chain of packets to go out here -- not deferring + * until after we drop the IPFW dynamic rule lock would result + * in a lock order reversal with the normal packet input -> ipfw + * call stack. + */ + m0 = NULL; + mtailp = &m0; + IPFW_DYN_LOCK(); + for (i = 0 ; i < curr_dyn_buckets ; i++) { + for (q = ipfw_dyn_v[i] ; q ; q = q->next ) { + if (q->dyn_type == O_LIMIT_PARENT) + continue; + if (q->id.proto != IPPROTO_TCP) + continue; + if ( (q->state & BOTH_SYN) != BOTH_SYN) + continue; + if (TIME_LEQ( time_uptime+dyn_keepalive_interval, + q->expire)) + continue; /* too early */ + if (TIME_LEQ(q->expire, time_uptime)) + continue; /* too late, rule expired */ + + *mtailp = send_pkt(NULL, &(q->id), q->ack_rev - 1, + q->ack_fwd, TH_SYN); + if (*mtailp != NULL) + mtailp = &(*mtailp)->m_nextpkt; + *mtailp = send_pkt(NULL, &(q->id), q->ack_fwd - 1, + q->ack_rev, 0); + if (*mtailp != NULL) + mtailp = &(*mtailp)->m_nextpkt; + } + } + IPFW_DYN_UNLOCK(); + for (m = mnext = m0; m != NULL; m = mnext) { + mnext = m->m_nextpkt; + m->m_nextpkt = NULL; + ip_output(m, NULL, NULL, 0, NULL, NULL); + } +done: + callout_reset(&ipfw_timeout, dyn_keepalive_period*hz, ipfw_tick, NULL); +} + +int +ipfw_init(void) +{ + struct ip_fw default_rule; + int error; + +#ifdef INET6 + /* Setup IPv6 fw sysctl tree. */ + sysctl_ctx_init(&ip6_fw_sysctl_ctx); + ip6_fw_sysctl_tree = SYSCTL_ADD_NODE(&ip6_fw_sysctl_ctx, + SYSCTL_STATIC_CHILDREN(_net_inet6_ip6), OID_AUTO, "fw", + CTLFLAG_RW | CTLFLAG_SECURE, 0, "Firewall"); + SYSCTL_ADD_PROC(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree), + OID_AUTO, "enable", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, + &fw6_enable, 0, ipfw_chg_hook, "I", "Enable ipfw+6"); + SYSCTL_ADD_INT(&ip6_fw_sysctl_ctx, SYSCTL_CHILDREN(ip6_fw_sysctl_tree), + OID_AUTO, "deny_unknown_exthdrs", CTLFLAG_RW | CTLFLAG_SECURE, + &fw_deny_unknown_exthdrs, 0, + "Deny packets with unknown IPv6 Extension Headers"); +#endif + + layer3_chain.rules = NULL; + IPFW_LOCK_INIT(&layer3_chain); + ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", + sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + IPFW_DYN_LOCK_INIT(); + callout_init(&ipfw_timeout, CALLOUT_MPSAFE); + + bzero(&default_rule, sizeof default_rule); + + default_rule.act_ofs = 0; + default_rule.rulenum = IPFW_DEFAULT_RULE; + default_rule.cmd_len = 1; + default_rule.set = RESVD_SET; + + default_rule.cmd[0].len = 1; + default_rule.cmd[0].opcode = +#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT + 1 ? O_ACCEPT : +#endif + O_DENY; + + error = add_rule(&layer3_chain, &default_rule); + if (error != 0) { + printf("ipfw2: error %u initializing default rule " + "(support disabled)\n", error); + IPFW_DYN_LOCK_DESTROY(); + IPFW_LOCK_DESTROY(&layer3_chain); + uma_zdestroy(ipfw_dyn_rule_zone); + return (error); + } + + ip_fw_default_rule = layer3_chain.rules; + printf("ipfw2 " +#ifdef INET6 + "(+ipv6) " +#endif + "initialized, divert %s, nat %s, " + "rule-based forwarding " +#ifdef IPFIREWALL_FORWARD + "enabled, " +#else + "disabled, " +#endif + "default to %s, logging ", +#ifdef IPDIVERT + "enabled", +#else + "loadable", +#endif +#ifdef IPFIREWALL_NAT + "enabled", +#else + "loadable", +#endif + + default_rule.cmd[0].opcode == O_ACCEPT ? "accept" : "deny"); + +#ifdef IPFIREWALL_VERBOSE + fw_verbose = 1; +#endif +#ifdef IPFIREWALL_VERBOSE_LIMIT + verbose_limit = IPFIREWALL_VERBOSE_LIMIT; +#endif + if (fw_verbose == 0) + printf("disabled\n"); + else if (verbose_limit == 0) + printf("unlimited\n"); + else + printf("limited to %d packets/entry by default\n", + verbose_limit); + + error = init_tables(&layer3_chain); + if (error) { + IPFW_DYN_LOCK_DESTROY(); + IPFW_LOCK_DESTROY(&layer3_chain); + uma_zdestroy(ipfw_dyn_rule_zone); + return (error); + } + ip_fw_ctl_ptr = ipfw_ctl; + ip_fw_chk_ptr = ipfw_chk; + callout_reset(&ipfw_timeout, hz, ipfw_tick, NULL); + LIST_INIT(&layer3_chain.nat); + return (0); +} + +void +ipfw_destroy(void) +{ + struct ip_fw *reap; + + ip_fw_chk_ptr = NULL; + ip_fw_ctl_ptr = NULL; + callout_drain(&ipfw_timeout); + IPFW_WLOCK(&layer3_chain); + flush_tables(&layer3_chain); + layer3_chain.reap = NULL; + free_chain(&layer3_chain, 1 /* kill default rule */); + reap = layer3_chain.reap, layer3_chain.reap = NULL; + IPFW_WUNLOCK(&layer3_chain); + if (reap != NULL) + reap_rules(reap); + IPFW_DYN_LOCK_DESTROY(); + uma_zdestroy(ipfw_dyn_rule_zone); + if (ipfw_dyn_v != NULL) + free(ipfw_dyn_v, M_IPFW); + IPFW_LOCK_DESTROY(&layer3_chain); + +#ifdef INET6 + /* Free IPv6 fw sysctl tree. */ + sysctl_ctx_free(&ip6_fw_sysctl_ctx); +#endif + + printf("IP firewall unloaded\n"); +} diff --git a/dummynet/ip_fw_pfil.c b/dummynet/ip_fw_pfil.c new file mode 100644 index 0000000..9d65e7f --- /dev/null +++ b/dummynet/ip_fw_pfil.c @@ -0,0 +1,571 @@ +/*- + * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.25.2.2 2008/04/25 10:26:30 oleg Exp $"); + +#if !defined(KLD_MODULE) +#include "opt_ipfw.h" +#include "opt_ipdn.h" +#include "opt_inet.h" +#ifndef INET +#error IPFIREWALL requires INET. +#endif /* INET */ +#endif /* KLD_MODULE */ +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "missing.h" + +int fw_enable = 1; +#ifdef INET6 +int fw6_enable = 1; +#endif + +int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); + +/* Divert hooks. */ +ip_divert_packet_t *ip_divert_ptr = NULL; + +/* ng_ipfw hooks. */ +ng_ipfw_input_t *ng_ipfw_input_p = NULL; + +/* Forward declarations. */ +static int ipfw_divert(struct mbuf **, int, int); +#define DIV_DIR_IN 1 +#define DIV_DIR_OUT 0 + +int +ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, + struct inpcb *inp) +{ + struct ip_fw_args args; + struct ng_ipfw_tag *ng_tag; + struct m_tag *dn_tag; + int ipfw = 0; + int divert; + int tee; +#ifdef IPFIREWALL_FORWARD + struct m_tag *fwd_tag; +#endif + + KASSERT(dir == PFIL_IN, ("ipfw_check_in wrong direction!")); + + bzero(&args, sizeof(args)); + + ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, + NULL); + if (ng_tag != NULL) { + KASSERT(ng_tag->dir == NG_IPFW_IN, + ("ng_ipfw tag with wrong direction")); + args.rule = ng_tag->rule; + m_tag_delete(*m0, (struct m_tag *)ng_tag); + } + +again: + dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); + if (dn_tag != NULL){ + struct dn_pkt_tag *dt; + + dt = (struct dn_pkt_tag *)(dn_tag+1); + args.rule = dt->rule; + + m_tag_delete(*m0, dn_tag); + } + + args.m = *m0; + args.inp = inp; + ipfw = ipfw_chk(&args); + *m0 = args.m; + tee = 0; + + KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", + __func__)); + + switch (ipfw) { + case IP_FW_PASS: + if (args.next_hop == NULL) + goto pass; + +#ifdef IPFIREWALL_FORWARD + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) + goto drop; + bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in)); + m_tag_prepend(*m0, fwd_tag); + + if (in_localip(args.next_hop->sin_addr)) + (*m0)->m_flags |= M_FASTFWD_OURS; + goto pass; +#endif + break; /* not reached */ + + case IP_FW_DENY: + goto drop; + break; /* not reached */ + + case IP_FW_DUMMYNET: + if (ip_dn_io_ptr == NULL) + goto drop; + if (mtod(*m0, struct ip *)->ip_v == 4) + ip_dn_io_ptr(m0, DN_TO_IP_IN, &args); + else if (mtod(*m0, struct ip *)->ip_v == 6) + ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args); + if (*m0 != NULL) + goto again; + return 0; /* packet consumed */ + + case IP_FW_TEE: + tee = 1; + /* fall through */ + + case IP_FW_DIVERT: + divert = ipfw_divert(m0, DIV_DIR_IN, tee); + if (divert) { + *m0 = NULL; + return 0; /* packet consumed */ + } else { + args.rule = NULL; + goto again; /* continue with packet */ + } + + case IP_FW_NGTEE: + if (!NG_IPFW_LOADED) + goto drop; + (void)ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 1); + goto again; /* continue with packet */ + + case IP_FW_NETGRAPH: + if (!NG_IPFW_LOADED) + goto drop; + return ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 0); + + case IP_FW_NAT: + goto again; /* continue with packet */ + + case IP_FW_REASS: + goto again; + + default: + KASSERT(0, ("%s: unknown retval", __func__)); + } + +drop: + if (*m0) + m_freem(*m0); + *m0 = NULL; + return (EACCES); +pass: + return 0; /* not filtered */ +} + +int +ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, + struct inpcb *inp) +{ + struct ip_fw_args args; + struct ng_ipfw_tag *ng_tag; + struct m_tag *dn_tag; + int ipfw = 0; + int divert; + int tee; +#ifdef IPFIREWALL_FORWARD + struct m_tag *fwd_tag; +#endif + + KASSERT(dir == PFIL_OUT, ("ipfw_check_out wrong direction!")); + + bzero(&args, sizeof(args)); + + ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, + NULL); + if (ng_tag != NULL) { + KASSERT(ng_tag->dir == NG_IPFW_OUT, + ("ng_ipfw tag with wrong direction")); + args.rule = ng_tag->rule; + m_tag_delete(*m0, (struct m_tag *)ng_tag); + } + +again: + dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); + if (dn_tag != NULL) { + struct dn_pkt_tag *dt; + + dt = (struct dn_pkt_tag *)(dn_tag+1); + args.rule = dt->rule; + + m_tag_delete(*m0, dn_tag); + } + + args.m = *m0; + args.oif = ifp; + args.inp = inp; + ipfw = ipfw_chk(&args); + *m0 = args.m; + tee = 0; + + KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", + __func__)); + + switch (ipfw) { + case IP_FW_PASS: + if (args.next_hop == NULL) + goto pass; +#ifdef IPFIREWALL_FORWARD + /* Overwrite existing tag. */ + fwd_tag = m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); + if (fwd_tag == NULL) { + fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, + sizeof(struct sockaddr_in), M_NOWAIT); + if (fwd_tag == NULL) + goto drop; + } else + m_tag_unlink(*m0, fwd_tag); + bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in)); + m_tag_prepend(*m0, fwd_tag); + + if (in_localip(args.next_hop->sin_addr)) + (*m0)->m_flags |= M_FASTFWD_OURS; + goto pass; +#endif + break; /* not reached */ + + case IP_FW_DENY: + goto drop; + break; /* not reached */ + + case IP_FW_DUMMYNET: + if (ip_dn_io_ptr == NULL) + break; + if (mtod(*m0, struct ip *)->ip_v == 4) + ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args); + else if (mtod(*m0, struct ip *)->ip_v == 6) + ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args); + if (*m0 != NULL) + goto again; + return 0; /* packet consumed */ + + break; + + case IP_FW_TEE: + tee = 1; + /* fall through */ + + case IP_FW_DIVERT: + divert = ipfw_divert(m0, DIV_DIR_OUT, tee); + if (divert) { + *m0 = NULL; + return 0; /* packet consumed */ + } else { + args.rule = NULL; + goto again; /* continue with packet */ + } + + case IP_FW_NGTEE: + if (!NG_IPFW_LOADED) + goto drop; + (void)ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 1); + goto again; /* continue with packet */ + + case IP_FW_NETGRAPH: + if (!NG_IPFW_LOADED) + goto drop; + return ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 0); + + case IP_FW_NAT: + goto again; /* continue with packet */ + + case IP_FW_REASS: + goto again; + + default: + KASSERT(0, ("%s: unknown retval", __func__)); + } + +drop: + if (*m0) + m_freem(*m0); + *m0 = NULL; + return (EACCES); +pass: + return 0; /* not filtered */ +} + +static int +ipfw_divert(struct mbuf **m, int incoming, int tee) +{ + /* + * ipfw_chk() has already tagged the packet with the divert tag. + * If tee is set, copy packet and return original. + * If not tee, consume packet and send it to divert socket. + */ + struct mbuf *clone, *reass; + struct ip *ip; + int hlen; + + reass = NULL; + + /* Is divert module loaded? */ + if (ip_divert_ptr == NULL) + goto nodivert; + + /* Cloning needed for tee? */ + if (tee) + clone = m_dup(*m, M_DONTWAIT); + else + clone = *m; + + /* In case m_dup was unable to allocate mbufs. */ + if (clone == NULL) + goto teeout; + + /* + * Divert listeners can only handle non-fragmented packets. + * However when tee is set we will *not* de-fragment the packets; + * Doing do would put the reassembly into double-jeopardy. On top + * of that someone doing a tee will probably want to get the packet + * in its original form. + */ + ip = mtod(clone, struct ip *); + if (!tee && ip->ip_off & (IP_MF | IP_OFFMASK)) { + + /* Reassemble packet. */ + reass = ip_reass(clone); + + /* + * IP header checksum fixup after reassembly and leave header + * in network byte order. + */ + if (reass != NULL) { + ip = mtod(reass, struct ip *); + hlen = ip->ip_hl << 2; + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + ip->ip_sum = 0; + if (hlen == sizeof(struct ip)) + ip->ip_sum = in_cksum_hdr(ip); + else + ip->ip_sum = in_cksum(reass, hlen); + clone = reass; + } else + clone = NULL; + } else { + /* Convert header to network byte order. */ + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + } + + /* Do the dirty job... */ + if (clone && ip_divert_ptr != NULL) + ip_divert_ptr(clone, incoming); + +teeout: + /* + * For tee we leave the divert tag attached to original packet. + * It will then continue rule evaluation after the tee rule. + */ + if (tee) + return 0; + + /* Packet diverted and consumed */ + return 1; + +nodivert: + m_freem(*m); + return 1; +} + +static int +ipfw_hook(void) +{ + struct pfil_head *pfh_inet; + + pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); + if (pfh_inet == NULL) + return ENOENT; + + pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); + pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + + return 0; +} + +static int +ipfw_unhook(void) +{ + struct pfil_head *pfh_inet; + + pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); + if (pfh_inet == NULL) + return ENOENT; + + pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet); + pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet); + + return 0; +} + +#ifdef INET6 +static int +ipfw6_hook(void) +{ + struct pfil_head *pfh_inet6; + + pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); + if (pfh_inet6 == NULL) + return ENOENT; + + pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); + pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + + return 0; +} + +static int +ipfw6_unhook(void) +{ + struct pfil_head *pfh_inet6; + + pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); + if (pfh_inet6 == NULL) + return ENOENT; + + pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, pfh_inet6); + pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, pfh_inet6); + + return 0; +} +#endif /* INET6 */ + +int +ipfw_chg_hook(SYSCTL_HANDLER_ARGS) +{ + int enable = *(int *)arg1; + int error; + + error = sysctl_handle_int(oidp, &enable, 0, req); + if (error) + return (error); + + enable = (enable) ? 1 : 0; + + if (enable == *(int *)arg1) + return (0); + + if (arg1 == &fw_enable) { + if (enable) + error = ipfw_hook(); + else + error = ipfw_unhook(); + } +#ifdef INET6 + if (arg1 == &fw6_enable) { + if (enable) + error = ipfw6_hook(); + else + error = ipfw6_unhook(); + } +#endif + + if (error) + return (error); + + *(int *)arg1 = enable; + + return (0); +} + +static int +ipfw_modevent(module_t mod, int type, void *unused) +{ + int err = 0; + + switch (type) { + case MOD_LOAD: + if ((err = ipfw_init()) != 0) { + printf("ipfw_init() error\n"); + break; + } + if ((err = ipfw_hook()) != 0) { + printf("ipfw_hook() error\n"); + break; + } +#ifdef INET6 + if ((err = ipfw6_hook()) != 0) { + printf("ipfw_hook() error\n"); + break; + } +#endif + break; + + case MOD_UNLOAD: + if ((err = ipfw_unhook()) > 0) + break; +#ifdef INET6 + if ((err = ipfw6_unhook()) > 0) + break; +#endif + ipfw_destroy(); + break; + + default: + return EOPNOTSUPP; + break; + } + return err; +} + +static moduledata_t ipfwmod = { + "ipfw", + ipfw_modevent, + 0 +}; +DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY - 256); +MODULE_VERSION(ipfw, 2); diff --git a/dummynet/ipfw2_mod.c b/dummynet/ipfw2_mod.c new file mode 100644 index 0000000..ff5a92f --- /dev/null +++ b/dummynet/ipfw2_mod.c @@ -0,0 +1,545 @@ +/* + * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Id$ + * + * The main interface to build ipfw+dummynet as a linux module. + * (and possibly as a windows module as well, though that part + * is not complete yet). + * + * The control interface uses the sockopt mechanism + * on a socket(AF_INET, SOCK_RAW, IPPROTO_RAW). + * + * The data interface uses the netfilter interface, at the moment + * hooked to the PRE_ROUTING and POST_ROUTING hooks. + * Unfortunately the netfilter interface is a moving target, + * so we need a set of macros to adapt to the various cases. + * + * In the netfilter hook we just mark packet as 'QUEUE' and then + * let the queue handler to do the whole work (filtering and + * possibly emulation). + * As we receive packets, we wrap them with an mbuf descriptor + * so the existing ipfw+dummynet code runs unmodified. + */ + +#include +#include /* sizeof struct mbuf */ + +#ifdef __linux__ +#include +#include +#include +#include /* NF_IP_PRI_FILTER */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) +#include /* nf_queue */ +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +#define __read_mostly +#endif + +#endif /* !__linux__ */ + +#include /* in_addr */ +#include /* ip_fw_ctl_t, ip_fw_chk_t */ +#include /* ip_dn_ctl_t, ip_dn_io_t */ +#include /* PFIL_IN, PFIL_OUT */ + +/* + * Here we allocate some global variables used in the firewall. + */ +ip_dn_ctl_t *ip_dn_ctl_ptr; +ip_fw_ctl_t *ip_fw_ctl_ptr; + +ip_dn_io_t *ip_dn_io_ptr; +ip_fw_chk_t *ip_fw_chk_ptr; + +void (*bridge_dn_p)(struct mbuf *, struct ifnet *); + +/* + * Glue code to implement the registration of children with the parent. + * Each child should call my_mod_register() when linking, so that + * module_init() and module_exit() can call init_children() and + * fini_children() to provide the necessary initialization. + */ +#include +struct mod_args { + struct moduledata *mod; + const char *name; + int order; +}; + +static unsigned int mod_idx; +static struct mod_args mods[10]; /* hard limit to 10 modules */ + +/* + * my_mod_register should be called automatically as the init + * functions in the submodules. Unfortunately this compiler/linker + * trick is not supported yet so we call it manually. + */ +int +my_mod_register(struct moduledata *mod, const char *name, int order) +{ + struct mod_args m = { mod, name, order }; + + printf("%s %s called\n", __FUNCTION__, name); + if (mod_idx < sizeof(mods) / sizeof(mods[0])) + mods[mod_idx++] = m; + return 0; +} + +static void +init_children(void) +{ + unsigned int i; + + /* Call the functions registered at init time. */ + printf("%s mod_idx value %d\n", __FUNCTION__, mod_idx); + for (i = 0; i < mod_idx; i++) { + printf("+++ start module %d %s %s at %p order 0x%x\n", + i, mods[i].name, mods[i].mod->name, + mods[i].mod, mods[i].order); + mods[i].mod->evhand(NULL, MOD_LOAD, mods[i].mod->priv); + } +} + +static void +fini_children(void) +{ + int i; + + /* Call the functions registered at init time. */ + for (i = mod_idx - 1; i >= 0; i--) { + printf("+++ end module %d %s %s at %p order 0x%x\n", + i, mods[i].name, mods[i].mod->name, + mods[i].mod, mods[i].order); + mods[i].mod->evhand(NULL, MOD_UNLOAD, mods[i].mod->priv); + } +} +/* end of module bindinghelper functions */ + +/* + * Control hooks: + * ipfw_ctl_h() is a wrapper for linux to FreeBSD sockopt call convention. + * then call the ipfw handler in order to manage requests. + * In turn this is called by the linux set/get handlers. + */ +static int +ipfw_ctl_h(struct sockopt *s, int cmd, int dir, int len, void __user *user) +{ + struct thread t; + int ret = EINVAL; + + memset(s, 0, sizeof(s)); + s->sopt_name = cmd; + s->sopt_dir = dir; + s->sopt_valsize = len; + s->sopt_val = user; + + /* sopt_td is not used but it is referenced */ + memset(&t, 0, sizeof(t)); + s->sopt_td = &t; + + printf("%s called with cmd %d len %d\n", __FUNCTION__, cmd, len); + + if (cmd < IP_DUMMYNET_CONFIGURE && ip_fw_ctl_ptr) + ret = ip_fw_ctl_ptr(s); + else if (cmd >= IP_DUMMYNET_CONFIGURE && ip_dn_ctl_ptr) + ret = ip_dn_ctl_ptr(s); + + return -ret; /* errors are < 0 on linux */ +} + +#ifdef _WIN32 + +void +netisr_dispatch(int __unused num, struct mbuf *m) +{ +} + +int +ip_output(struct mbuf *m, struct mbuf __unused *opt, + struct route __unused *ro, int __unused flags, + struct ip_moptions __unused *imo, struct inpcb __unused *inp) +{ + netisr_dispatch(0, m); + return 0; +} + +#else /* this is the linux glue */ +/* + * setsockopt hook has no return value other than the error code. + */ +static int +do_ipfw_set_ctl(struct sock __unused *sk, int cmd, + void __user *user, unsigned int len) +{ + struct sockopt s; /* pass arguments */ + + return ipfw_ctl_h(&s, cmd, SOPT_SET, len, user); +} + +/* + * getsockopt can can return a block of data in response. + */ +static int +do_ipfw_get_ctl(struct sock __unused *sk, + int cmd, void __user *user, int *len) +{ + struct sockopt s; /* pass arguments */ + int ret = ipfw_ctl_h(&s, cmd, SOPT_GET, *len, user); + + *len = s.sopt_valsize; /* return lenght back to the caller */ + return ret; +} + +/* + * declare our [get|set]sockopt hooks + */ +static struct nf_sockopt_ops ipfw_sockopts = { + .pf = PF_INET, + .set_optmin = _IPFW_SOCKOPT_BASE, + .set_optmax = _IPFW_SOCKOPT_END, + .set = do_ipfw_set_ctl, + .get_optmin = _IPFW_SOCKOPT_BASE, + .get_optmax = _IPFW_SOCKOPT_END, + .get = do_ipfw_get_ctl, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) + .owner = THIS_MODULE, +#endif +}; + +/* + * declare hook to grab packets from the netfilter interface. + * The NF_* names change in different versions of linux, in some + * cases they are #defines, in others they are enum, so we + * need to adapt. + */ +#ifndef NF_IP_PRE_ROUTING +#define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING +#endif +#ifndef NF_IP_POST_ROUTING +#define NF_IP_POST_ROUTING NF_INET_POST_ROUTING +#endif + +/* + * The main netfilter hook. + * To make life simple, we queue everything and then do all the + * decision in the queue handler. + * + * XXX note that in 2.4 the skbuf is passed as sk_buff** + */ +static unsigned int +call_ipfw(unsigned int __unused hooknum, +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // in 2.6.22 we have ** + struct sk_buff __unused **skb, +#else + struct sk_buff __unused *skb, +#endif + const struct net_device __unused *in, + const struct net_device __unused *out, + int __unused (*okfn)(struct sk_buff *)) +{ + return NF_QUEUE; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#define NF_STOP NF_ACCEPT +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +#define nf_queue_entry nf_info /* for simplicity */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* unsure on the exact boundary */ +/* on 2.4 we use nf_info */ +#define QH_ARGS struct sk_buff *skb, struct nf_info *info, void *data +#else /* 2.6.1.. 2.6.24 */ +#define QH_ARGS struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data +#endif + +#define DEFINE_SKB /* nothing, already an argument */ +#define REINJECT(_inf, _verd) nf_reinject(skb, _inf, _verd) + +#else /* 2.6.25 and above */ + +#define QH_ARGS struct nf_queue_entry *info, unsigned int queuenum +#define DEFINE_SKB struct sk_buff *skb = info->skb; +#define REINJECT(_inf, _verd) nf_reinject(_inf, _verd) +#endif + +/* + * used by dummynet when dropping packets + * XXX use dummynet_send() + */ +void +reinject_drop(struct mbuf* m) +{ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* unsure on the exact boundary */ + struct sk_buff *skb = (struct sk_buff *)m; +#endif + REINJECT(m->queue_entry, NF_DROP); +} + +/* + * The real call to the firewall. nf_queue_entry points to the skbuf, + * and eventually we need to return both through nf_reinject(). + */ +static int +ipfw2_queue_handler(QH_ARGS) +{ + DEFINE_SKB /* no semicolon here, goes in the macro */ + int ret = 0; /* return value */ + struct mbuf *m; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + if (skb->nh.iph == NULL) { + printf("null dp, len %d reinject now\n", skb->len); + REINJECT(info, NF_ACCEPT); + return 0; + } +#endif + m = malloc(sizeof(*m), 0, 0); + if (m == NULL) { + printf("malloc fail, len %d reinject now\n", skb->len); + REINJECT(info, NF_ACCEPT); + return 0; + } + + m->m_skb = skb; + m->m_len = skb->len; /* len in this skbuf */ + m->m_pkthdr.len = skb->len; /* total packet len */ + m->m_pkthdr.rcvif = info->indev; + m->queue_entry = info; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + m->m_data = skb->nh.iph; +#else + m->m_data = skb_network_header(skb); +#endif + /* XXX add the interface */ + if (info->hook == NF_IP_PRE_ROUTING) { + ret = ipfw_check_in(NULL, &m, info->indev, PFIL_IN, NULL); + } else { + ret = ipfw_check_out(NULL, &m, info->outdev, PFIL_OUT, NULL); + } + + if (m != NULL) { /* Accept. reinject and free the mbuf */ + REINJECT(info, NF_STOP); + free(m, M_IPFW); + } else if (ret == 0) { + /* dummynet has kept the packet, will reinject later. */ + } else { + /* + * Packet dropped by ipfw or dummynet, reinject as NF_DROP + * mbuf already released by ipfw itself + */ + REINJECT(info, NF_DROP); + } + return 0; +} + +struct route; +struct ip_moptions; +struct inpcb; + + +/* XXX should include prototypes for netisr_dispatch and ip_output */ +/* + * The reinjection routine after a packet comes out from dummynet. + * We must update the skb timestamp so ping reports the right time. + */ +void +netisr_dispatch(int num, struct mbuf *m) +{ + struct nf_queue_entry *info = m->queue_entry; + struct sk_buff *skb = m->m_skb; /* always used */ + + free(m, M_IPFW); + KASSERT((info != NULL), ("%s info null!\n", __FUNCTION__)); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) // XXX above 2.6.x ? + __net_timestamp(skb); /* update timestamp */ +#endif + + /* XXX to obey one-pass, possibly call the queue handler here */ + REINJECT(info, ((num == -1)?NF_DROP:NF_STOP)); /* accept but no more firewall */ +} + +int +ip_output(struct mbuf *m, struct mbuf __unused *opt, + struct route __unused *ro, int __unused flags, + struct ip_moptions __unused *imo, struct inpcb __unused *inp) +{ + netisr_dispatch(0, m); + return 0; +} + + +/* + * Now prepare to hook the various functions. + * Linux 2.4 has a different API so we need some adaptation + * for register and unregister hooks + * + * the unregister function changed arguments between 2.6.22 and 2.6.24 + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +static int +nf_register_hooks(struct nf_hook_ops *ops, int n) +{ + int i, ret = 0; + for (i = 0; i < n; i++) { + ret = nf_register_hook(ops + i); + if (ret < 0) + break; + } + return ret; +} + +static void +nf_unregister_hooks(struct nf_hook_ops *ops, int n) +{ + int i; + for (i = 0; i < n; i++) { + nf_unregister_hook(ops + i); + } +} +#define REG_QH_ARG(fn) fn, NULL /* argument for nf_[un]register_queue_handler */ +#define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ +#define SET_MOD_OWNER + +#else /* linux >= 2.6.0 */ + +struct nf_queue_handler ipfw2_queue_handler_desc = { + .outfn = ipfw2_queue_handler, + .name = "ipfw2 dummynet queue", +}; +#define REG_QH_ARG(fn) &(fn ## _desc) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) +#define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ +#else +#define UNREG_QH_ARG(fn) , &(fn ## _desc) +#endif /* 2.6.0 < LINUX > 2.6.24 */ + +#define SET_MOD_OWNER .owner = THIS_MODULE, + +#endif /* !LINUX < 2.6.0 */ + +static struct nf_hook_ops ipfw_ops[] __read_mostly = { + { + .hook = call_ipfw, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_FILTER, + SET_MOD_OWNER + }, + { + .hook = call_ipfw, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_FILTER, + SET_MOD_OWNER + }, +}; +#endif /* !__linux__ */ + +/* descriptors for the children */ +extern moduledata_t *moddesc_ipfw; +extern moduledata_t *moddesc_dummynet; + +/* + * Module glue - init and exit function. + */ +static int __init +ipfw_module_init(void) +{ + int ret = 0; + + printf("%s called\n", __FUNCTION__); + + my_mod_register(moddesc_ipfw, "ipfw", 1); + my_mod_register(moddesc_dummynet, "dummynet", 2); + init_children(); + +#ifdef _WIN32 + return ret; + +#else /* linux hook */ + /* sockopt register, in order to talk with user space */ + ret = nf_register_sockopt(&ipfw_sockopts); + if (ret < 0) { + printf("error %d in nf_register_sockopt\n", ret); + goto clean_modules; + } + + /* queue handler registration, in order to get network + * packet under a private queue */ + ret = nf_register_queue_handler(PF_INET, REG_QH_ARG(ipfw2_queue_handler) ); + if (ret < 0) /* queue busy */ + goto unregister_sockopt; + + ret = nf_register_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); + if (ret < 0) + goto unregister_sockopt; + + printf("%s loaded\n", __FUNCTION__); + return 0; + + +/* handle errors on load */ +unregister_sockopt: + nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); + nf_unregister_sockopt(&ipfw_sockopts); + +clean_modules: + fini_children(); + printf("%s error\n", __FUNCTION__); + + return ret; +#endif /* linux */ +} + +/* module shutdown */ +static void __exit +ipfw_module_exit(void) +{ +#ifdef _WIN32 +#else /* linux hook */ + nf_unregister_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); + /* maybe drain the queue before unregistering ? */ + nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); + nf_unregister_sockopt(&ipfw_sockopts); +#endif /* linux */ + + fini_children(); + + printf("%s unloaded\n", __FUNCTION__); +} + +#ifdef __linux__ +module_init(ipfw_module_init) +module_exit(ipfw_module_exit) +MODULE_LICENSE("GPL"); /* mandatory */ +#endif diff --git a/dummynet/ipfw_mod.c b/dummynet/ipfw_mod.c new file mode 100644 index 0000000..e69de29 diff --git a/dummynet/missing.h b/dummynet/missing.h new file mode 100644 index 0000000..76b8153 --- /dev/null +++ b/dummynet/missing.h @@ -0,0 +1,418 @@ +/* + * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Id$ + * Header for kernel variables and functions that are not available in + * userland. + */ + +#ifndef _MISSING_H_ +#define _MISSING_H_ + +#ifdef _WIN32 + +#ifndef DEFINE_SPINLOCK +#define DEFINE_SPINLOCK(x) FAST_MUTEX x +#endif +/* spinlock --> Guarded Mutex KGUARDED_MUTEX */ +/* http://www.reactos.org/wiki/index.php/Guarded_Mutex */ +#define spin_lock_init(_l) +#define spin_lock_bh(_l) +#define spin_unlock_bh(_l) + +#include /* bsd-compat.c */ +#include /* bsd-compat.c */ +#include /* local version */ + +#else /* __linux__ */ + +#include /* do_gettimeofday */ +#include /* local version */ +struct inpcb; + +/* + * Kernel locking support. + * FreeBSD uses mtx in dummynet.c, and rwlocks in ipfw.c + * + * In linux we use spinlock_bh to implement both. + */ + +#ifndef DEFINE_SPINLOCK /* this is for linux 2.4 */ +#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED +#endif + +#endif /* __linux__ */ + +#define rw_assert(a, b) +#define rw_destroy(_l) +#define rw_init(_l, msg) spin_lock_init(_l) +#define rw_rlock(_l) spin_lock_bh(_l) +#define rw_runlock(_l) spin_unlock_bh(_l) +#define rw_wlock(_l) spin_lock_bh(_l) +#define rw_wunlock(_l) spin_unlock_bh(_l) + +#define mtx_assert(a, b) +#define mtx_destroy(m) +#define mtx_init(m, a,b,c) spin_lock_init(m) +#define mtx_lock(_l) spin_lock_bh(_l) +#define mtx_unlock(_l) spin_unlock_bh(_l) + +/* end of locking support */ + +/* ethernet stuff */ +#define ETHERTYPE_IP 0x0800 /* IP protocol */ +#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ +struct ether_header { + u_char ether_dhost[ETHER_ADDR_LEN]; + u_char ether_shost[ETHER_ADDR_LEN]; + u_short ether_type; +}; + +#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ +#define ETHER_TYPE_LEN 2 /* length of the Ethernet type field */ +#define ETHER_HDR_LEN (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN) + +/* ip_dummynet.c */ +#define __FreeBSD_version 500035 + +#ifdef __linux__ +struct moduledata; +int my_mod_register(struct moduledata *mod, const char *name, int order); + +/* define some macro for ip_dummynet */ + +struct malloc_type { +}; + +#define MALLOC_DEFINE(type, shortdesc, longdesc) \ + struct malloc_type type[1]; void *md_dummy_ ## type = type + +#define CTASSERT(x) + +#define log(_level, fmt, arg...) printk(KERN_ERR fmt, ##arg) + +/* + * gettimeofday would be in sys/time.h but it is not + * visible if _KERNEL is defined + */ +int gettimeofday(struct timeval *, struct timezone *); + +#else /* _WIN32 */ +#define MALLOC_DEFINE(a,b,c) +#endif /* _WIN32 */ + +extern int hz; +extern long tick; /* exists in 2.4 but not in 2.6 */ +extern int bootverbose; +extern time_t time_uptime; +extern struct timeval boottime; + +extern int max_linkhdr; +extern int ip_defttl; +extern u_long in_ifaddrhmask; /* mask for hash table */ +extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ + +/*-------------------------------------------------*/ + +/* define, includes and functions missing in linux */ +/* include and define */ +#include /* inet_ntoa */ + +struct mbuf; + +/* used by ip_dummynet.c */ +void reinject_drop(struct mbuf* m); + +#include /* error define */ +#include /* IFNAMESIZ */ + +/* + * some network structure can be defined in the bsd way + * by using the _FAVOR_BSD definition. This is not true + * for icmp structure. + * XXX struct icmp contains bsd names in + * /usr/include/netinet/ip_icmp.h + */ +#ifdef __linux__ +#define icmp_code code +#define icmp_type type + +/* linux in6_addr has no member __u6_addr + * replace the whole structure ? + */ +#define __u6_addr in6_u +#define __u6_addr32 u6_addr32 +#endif /* __linux__ */ + +/* defined in linux/sctp.h with no bsd definition */ +struct sctphdr { + uint16_t src_port; /* source port */ + uint16_t dest_port; /* destination port */ + uint32_t v_tag; /* verification tag of packet */ + uint32_t checksum; /* Adler32 C-Sum */ + /* chunks follow... */ +}; + +/* missing definition */ +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_ACK 0x10 + +#define RTF_CLONING 0x100 /* generate new routes on use */ + +#define IPPROTO_OSPFIGP 89 /* OSPFIGP */ +#define IPPROTO_CARP 112 /* CARP */ +#ifndef _WIN32 +#define IPPROTO_IPV4 IPPROTO_IPIP /* for compatibility */ +#endif + +#define CARP_VERSION 2 +#define CARP_ADVERTISEMENT 0x01 + +#define PRIV_NETINET_IPFW 491 /* Administer IPFW firewall. */ + +#define IP_FORWARDING 0x1 /* most of ip header exists */ + +#define NETISR_IP 2 /* same as AF_INET */ + +#define PRIV_NETINET_DUMMYNET 494 /* Administer DUMMYNET. */ + +extern int securelevel; + +struct carp_header { +#if BYTE_ORDER == LITTLE_ENDIAN + u_int8_t carp_type:4, + carp_version:4; +#endif +#if BYTE_ORDER == BIG_ENDIAN + u_int8_t carp_version:4, + carp_type:4; +#endif +}; + +struct pim { + int dummy; /* windows compiler does not like empty definition */ +}; + +struct route { + struct rtentry *ro_rt; + struct sockaddr ro_dst; +}; + +struct ifaltq { + void *ifq_head; +}; + +/* + * ifnet->if_snd is used in ip_dummynet.c to take the transmission + * clock. + */ +#if defined( __linux__) +#define if_xname name +#define if_snd XXX +#elif defined( _WIN32 ) +/* used in ip_dummynet.c */ +struct ifnet { + char if_xname[IFNAMSIZ]; /* external name (name + unit) */ +// struct ifaltq if_snd; /* output queue (includes altq) */ +}; + +struct net_device { + char if_xname[IFNAMSIZ]; /* external name (name + unit) */ +}; +#endif + +/* involves mbufs */ +int in_cksum(struct mbuf *m, int len); +#define divert_cookie(mtag) 0 +#define divert_info(mtag) 0 +#define INADDR_TO_IFP(a, b) b = NULL +#define pf_find_mtag(a) NULL +#define pf_get_mtag(a) NULL +#ifndef _WIN32 +#define AF_LINK AF_ASH /* ? our sys/socket.h */ +#endif + +struct pf_mtag { + void *hdr; /* saved hdr pos in mbuf, for ECN */ + sa_family_t af; /* for ECN */ + u_int32_t qid; /* queue id */ +}; + +/* radix related */ + +struct radix_node { + caddr_t rn_key; /* object of search */ + caddr_t rn_mask; /* netmask, if present */ +}; + +/* missing kernel functions */ +char *inet_ntoa(struct in_addr ina); +int random(void); + +/* + * Return the risult of a/b + * + * this is used in linux kernel space, + * since the 64bit division needs to + * be done using a macro + */ +int64_t +div64(int64_t a, int64_t b); + +char * +inet_ntoa_r(struct in_addr ina, char *buf); + +/* from bsd sys/queue.h */ +#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = TAILQ_FIRST((head)); \ + (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ + (var) = (tvar)) + +#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ + for ((var) = SLIST_FIRST((head)); \ + (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ + (var) = (tvar)) + +/* depending of linux version */ +#ifndef ETHERTYPE_IPV6 +#define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */ +#endif + +/*-------------------------------------------------*/ +#define RT_NUMFIBS 1 +extern u_int rt_numfibs; + +/* involves kernel locking function */ +#ifdef RTFREE +#undef RTFREE +#define RTFREE(a) fprintf(stderr, "RTFREE: commented out locks\n"); +#endif + +void getmicrouptime(struct timeval *tv); + +/* from sys/netinet/ip_output.c */ +struct ip_moptions; +struct route; +struct ip; + +struct mbuf *ip_reass(struct mbuf *); +u_short in_cksum_hdr(struct ip *); +int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, + struct ip_moptions *imo, struct inpcb *inp); + +/* from net/netisr.c */ +void netisr_dispatch(int num, struct mbuf *m); + +/* definition moved in missing.c */ +int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len); + +int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen); + +/* defined in session.c */ +int priv_check(struct thread *td, int priv); + +int securelevel_ge(struct ucred *cr, int level); + +struct sysctl_oid; +struct sysctl_req; + +/* + * sysctl are mapped into /sys/module/ipfw_mod parameters + */ +#define CTLFLAG_RD 1 +#define CTLFLAG_RW 2 +#define CTLFLAG_SECURE3 0 // unsupported + +#ifdef _WIN32 +#define module_param_named(_name, _var, _ty, _perm) +#else +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +#define module_param_named(_name, _var, _ty, _perm) \ + //module_param(_name, _ty, 0644) +#endif +#endif /* __linux__ */ + +#define SYSCTL_DECL(_1) +#define SYSCTL_NODE(_1, _2, _3, _4, _5, _6) +#define _SYSCTL_BASE(_name, _var, _ty, _perm) \ + module_param_named(_name, *(_var), _ty, \ + ( (_perm) == CTLFLAG_RD) ? 0444: 0644 ) +#define SYSCTL_PROC(_base, _oid, _name, _mode, _var, _val, _desc, _a, _b) + +#define SYSCTL_INT(_base, _oid, _name, _mode, _var, _val, _desc) \ + _SYSCTL_BASE(_name, _var, int, _mode) + +#define SYSCTL_LONG(_base, _oid, _name, _mode, _var, _val, _desc) \ + _SYSCTL_BASE(_name, _var, long, _mode) + +#define SYSCTL_ULONG(_base, _oid, _name, _mode, _var, _val, _desc) \ + _SYSCTL_BASE(_name, _var, ulong, _mode) + +#define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \ + // _SYSCTL_BASE(_name, _var, uint, _mode) + +#define SYSCTL_HANDLER_ARGS \ + struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req +int sysctl_handle_int(SYSCTL_HANDLER_ARGS); +int sysctl_handle_long(SYSCTL_HANDLER_ARGS); + +void ether_demux(struct ifnet *ifp, struct mbuf *m); + +int ether_output_frame(struct ifnet *ifp, struct mbuf *m); + +void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); + +void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu); + +void rtfree(struct rtentry *rt); + +u_short in_cksum_skip(struct mbuf *m, int len, int skip); + +#ifdef INP_LOCK_ASSERT +#undef INP_LOCK_ASSERT +#define INP_LOCK_ASSERT(a) +#endif + +int rn_inithead(void **head, int off); + +int jailed(struct ucred *cred); + +/* +* Return 1 if an internet address is for a ``local'' host +* (one to which we have a connection). If subnetsarelocal +* is true, this includes other subnets of the local net. +* Otherwise, it includes only the directly-connected (sub)nets. +*/ +int in_localaddr(struct in_addr in); + +/* the prototype is already in the headers */ +//int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); + +int fnmatch(const char *pattern, const char *string, int flags); + +#endif /* !_MISSING_H_ */ diff --git a/glue.h b/glue.h new file mode 100644 index 0000000..8a6a014 --- /dev/null +++ b/glue.h @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * + * $Id$ + * + * glue code to adapt the FreeBSD version to linux and windows, + * userland and kernel. + * This is included before any other headers, so we do not have + * a chance to override any #define that should appear in other + * headers. + */ + +#ifndef _GLUE_H +#define _GLUE_H + +/* + * common definitions to allow portability + */ +#ifndef __FBSDID +#define __FBSDID(x) +#endif /* FBSDID */ + +/* + * emulation of FreeBSD's sockopt and thread + * This was in sockopt.h + */ +enum sopt_dir { SOPT_GET, SOPT_SET }; + +#ifndef KERNEL_MODULE /* Userland part */ + +#include /* linux needs this in addition to sys/types.h */ + +#include /* for size_t */ +#include +#include + +#include + +#else /* KERNEL_MODULE, kernel part */ + +#ifndef _WIN32 +#include + +#define ifnet net_device /* remap */ +#define _KERNEL # make kernel structure visible +#define KLD_MODULE # add the module glue +#define INET # want inet support + +#include /* linux kernel */ +#include /* linux kernel */ + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) // or 2.4.x +#include /* linux/msg.h require this */ +#include /* just MAX_ADDR_LEN 8 on 2.4 32 on 2.6, also brings in byteorder */ +#endif + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // under 2.6.22 compilation is required by msg.h +#include +#endif +#include /* XXX m_type define conflict with include/sys/mbuf.h, + * so early include this file (to be solved) */ +#include +#include /* struct in_addr */ +#include +/* + * LIST_HEAD in queue.h conflict with linux/list.h + * some previous linux include need list.h definition + */ +#undef LIST_HEAD + +#define IF_NAMESIZE 16 +typedef uint32_t in_addr_t; + +#define printf(fmt, arg...) printk(KERN_ERR fmt, ##arg) + +#endif /* !_WIN32 */ +#endif /* KERNEL_MODULE */ + +/* + * In windows, we need to emulate the sockopt interface + * so also the userland needs to have the struct sockopt defined. + * No need to declare struct thread on linux, but we need on windows. + */ + +struct thread { + void *sopt_td; + void *td_ucred; +}; + +struct sockopt { + enum sopt_dir sopt_dir; /* is this a get or a set? */ + int sopt_level; /* second arg of [gs]etsockopt */ + int sopt_name; /* third arg of [gs]etsockopt */ + void *sopt_val; /* fourth arg of [gs]etsockopt */ + size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */ + struct thread *sopt_td; /* calling thread or null if kernel */ +}; + + +/* This must be included here after list.h */ +#include /* both the kernel side and nat.c needs this */ + +#ifndef KERNEL_MODULE + +/* define internals for struct in6_addr netinet/in6.h on FreeBSD */ +#define __u6_addr in6_u +#define __u6_addr32 u6_addr32 +/* define missing type for ipv6 (linux 2.6.28) */ +#define in6_u __in6_u + +/* missing in linux netinet/ip.h */ +#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */ +#define IPTOS_ECN_CE 0x03 /* congestion experienced */ + +/* defined in freebsd netinet/icmp6.h */ +#define ICMP6_MAXTYPE 201 + +/* on freebsd sys/socket.h pf specific */ +#define NET_RT_IFLIST 3 /* survey interface list */ + +/* on freebsd net/if.h XXX used */ +struct if_data { + + /* ... */ + u_long ifi_mtu; /* maximum transmission unit */ +}; + +/* + * Message format for use in obtaining information about interfaces + * from getkerninfo and the routing socket. + * This is used in nat.c + */ +struct if_msghdr { + u_short ifm_msglen; /* to skip over non-understood messages */ + u_char ifm_version; /* future binary compatibility */ + u_char ifm_type; /* message type */ + int ifm_addrs; /* like rtm_addrs */ + int ifm_flags; /* value of if_flags */ + u_short ifm_index; /* index for associated ifp */ + struct if_data ifm_data;/* statistics and other data about if */ +}; + +/* + * Message format for use in obtaining information about interface addresses + * from getkerninfo and the routing socket + */ +struct ifa_msghdr { + u_short ifam_msglen; /* to skip over non-understood messages */ + u_char ifam_version; /* future binary compatibility */ + u_char ifam_type; /* message type */ + int ifam_addrs; /* like rtm_addrs */ + int ifam_flags; /* value of ifa_flags */ + u_short ifam_index; /* index for associated ifp */ + int ifam_metric; /* value of ifa_metric */ +}; + +#ifndef NO_RTM /* conflicting with netlink */ +/* missing in net/route.h */ +#define RTM_VERSION 5 /* Up the ante and ignore older versions */ +#define RTM_IFINFO 0xe /* iface going up/down etc. */ +#define RTM_NEWADDR 0xc /* address being added to iface */ +#define RTA_IFA 0x20 /* interface addr sockaddr present */ +#endif /* NO_RTM */ + +/* SA_SIZE is used in the userland nat.c modified */ +#define SA_SIZE(sa) \ + ( (!(sa) ) ? \ + sizeof(long) : \ + 1 + ( (sizeof(struct sockaddr) - 1) | (sizeof(long) - 1) ) ) + +/* sys/time.h */ +/* + * Getkerninfo clock information structure + */ +struct clockinfo { + int hz; /* clock frequency */ + int tick; /* micro-seconds per hz tick */ + int spare; + int stathz; /* statistics clock frequency */ + int profhz; /* profiling clock frequency */ +}; + + +/* + * linux does not have heapsort + */ +#define heapsort(_a, _b, _c, _d) qsort(_a, _b, _c, _d) + +#define setprogname(x) /* not present in linux */ + +extern int optreset; /* not present in linux */ + +size_t strlcpy(char * dst, const char * src, size_t siz); +long long int +strtonum(const char *nptr, long long minval, long long maxval, + const char **errstr); + +int +sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen); + +#else /* KERNEL_MODULE */ + +/* linux and windows kernel do not have bcopy ? */ +#define bcopy(_s, _d, _l) memcpy(_d, _s, _l) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) // or 2.4.x +#include +#endif + +/* definitions useful for the kernel side */ + +struct route_in6 { }; + +#endif /* KERNEL_MODULE */ + +/* + * List of values used for set/getsockopt options. + * The base value on FreeBSD is defined as a macro, + * if not available we will use our own enum. + * The TABLE_BASE value is used in the kernel. + */ +#ifndef IP_FW_TABLE_ADD +#define _IPFW_SOCKOPT_BASE 100 /* 40 on freebsd */ +enum ipfw_msg_type { + IP_FW_TABLE_ADD = _IPFW_SOCKOPT_BASE, + IP_FW_TABLE_DEL, + IP_FW_TABLE_FLUSH, + IP_FW_TABLE_GETSIZE, + IP_FW_TABLE_LIST, + + IP_FW_ADD = _IPFW_SOCKOPT_BASE + 10, + IP_FW_DEL, + IP_FW_FLUSH, + IP_FW_ZERO, + IP_FW_GET, + IP_FW_RESETLOG, + + IP_FW_NAT_CFG, + IP_FW_NAT_DEL, + IP_FW_NAT_GET_CONFIG, + IP_FW_NAT_GET_LOG, + + IP_DUMMYNET_CONFIGURE, + IP_DUMMYNET_DEL , + IP_DUMMYNET_FLUSH, + /* 63 is missing */ + IP_DUMMYNET_GET = _IPFW_SOCKOPT_BASE + 24, + _IPFW_SOCKOPT_END +}; +#endif /* IP_FW_TABLE_ADD */ + +#endif /* !_GLUE_H */ diff --git a/include_e/altq/if_altq.h b/include_e/altq/if_altq.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/arpa/inet.h b/include_e/arpa/inet.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/machine/in_cksum.h b/include_e/machine/in_cksum.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/net/ethernet.h b/include_e/net/ethernet.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/net/netisr.h b/include_e/net/netisr.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/net/pf_mtag.h b/include_e/net/pf_mtag.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/net/radix.h b/include_e/net/radix.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/ether.h b/include_e/netinet/ether.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/icmp6.h b/include_e/netinet/icmp6.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/if_ether.h b/include_e/netinet/if_ether.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/in.h b/include_e/netinet/in.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/in_pcb.h b/include_e/netinet/in_pcb.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/in_var.h b/include_e/netinet/in_var.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/ip_carp.h b/include_e/netinet/ip_carp.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/ip_var.h b/include_e/netinet/ip_var.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/pim.h b/include_e/netinet/pim.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/sctp.h b/include_e/netinet/sctp.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/tcp_timer.h b/include_e/netinet/tcp_timer.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/tcpip.h b/include_e/netinet/tcpip.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet/udp_var.h b/include_e/netinet/udp_var.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/netinet6/ip6_var.h b/include_e/netinet6/ip6_var.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/opt_inet6.h b/include_e/opt_inet6.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/opt_ipfw.h b/include_e/opt_ipfw.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/opt_ipsec.h b/include_e/opt_ipsec.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/opt_mac.h b/include_e/opt_mac.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/opt_mbuf_stress_test.h b/include_e/opt_mbuf_stress_test.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/opt_param.h b/include_e/opt_param.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/_lock.h b/include_e/sys/_lock.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/_mutex.h b/include_e/sys/_mutex.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/jail.h b/include_e/sys/jail.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/limits.h b/include_e/sys/limits.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/lock.h b/include_e/sys/lock.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/mutex.h b/include_e/sys/mutex.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/priv.h b/include_e/sys/priv.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/proc.h b/include_e/sys/proc.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/rwlock.h b/include_e/sys/rwlock.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/socket.h b/include_e/sys/socket.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/socketvar.h b/include_e/sys/socketvar.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/sysctl.h b/include_e/sys/sysctl.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/time.h b/include_e/sys/time.h new file mode 100644 index 0000000..e69de29 diff --git a/include_e/sys/ucred.h b/include_e/sys/ucred.h new file mode 100644 index 0000000..e69de29 diff --git a/ipfw-cleanup b/ipfw-cleanup new file mode 100755 index 0000000..429328a --- /dev/null +++ b/ipfw-cleanup @@ -0,0 +1,55 @@ +#!/bin/sh +# +# Marta Carbone +# Copyright (C) 2009 Universita` di Pisa +# $Id$ +# +# This script parse the ipfw rules +# and remove the old ones. +# +# The ipfw output is parsed and each time +# value stored as comment is compared against +# the current time. +# If the time value is older than current, +# the rules and related pipes will be deleted. +# +# $Id$ + +RULE_LIST="ipfw show" +# Get $NOW referred to UTC +NOW=`date -u +%s` + +# check for module existence +/sbin/lsmod | grep ipfw +if [ x"$?" == x"1" ]; then + echo "ipfw module does not exist"; + exit 0; +fi + +${RULE_LIST} | +awk ' + BEGIN { + print now a "Start to clean rules "; + cleaned=0; + } + + # delete rules and pipes + function delete_rule(rule_id) { + command="/sbin/ipfw delete " rule_id "; ipfw pipe delete " rule_id; + system(command); + } + + # awk main body + /\/\/\ [0-9]*/ { # select timeout string + + timeout=$13; + + if (now > timeout) { + delete_rule($1); + cleaned++; + } + } + + END { print " " cleaned " rules cleaned"; + } +' now=${NOW} diff --git a/ipfw-slice.spec b/ipfw-slice.spec new file mode 100644 index 0000000..bff2031 --- /dev/null +++ b/ipfw-slice.spec @@ -0,0 +1,60 @@ +# +# $Id$ +# +# TODO: +# restart crond +# modprobe ipfw_mod.ko (depmod ?) +# +%define url $URL: http://onelab1.iet.unipi.it/svn/trunk/ipfw-slice.spec $ + +# Marta Carbone +# 2009 - Universita` di Pisa +# License is BSD. + +%define name ipfw-slice +%define version 0.9 +%define taglevel 1 + +%define release %{kernel_version}.%{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} +%define kernel_id_arch %{kernel_version}-%{kernel_release}-%{kernel_arch} +%define kernel_id %{kernel_version}-%{kernel_release} + +Summary: ipfw and dummynet for Linux +Name: %{name} +Version: %{version} +Release: %{release} +License: BSD +Group: System Environment/Kernel +Source0: %{name}-%{version}.tar.bz2 +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot + +Vendor: unipi +Packager: PlanetLab +Distribution: PlanetLab %{plrelease} +URL: %(echo %{url} | cut -d ' ' -f 2) + +%description +the frontent part of the ipfw planetlab package + +%prep +%setup + +%build +rm -rf $RPM_BUILD_ROOT + +%install +install -D -m 755 slice/netconfig $RPM_BUILD_ROOT/sbin/netconfig +install -D -m 755 slice/ipfw.8.gz $RPM_BUILD_ROOT/%{_mandir}/man8/ipfw.8.gz + +%clean +rm -rf $RPM_BUILD_ROOT + +# here there is a list of the final installation directories +%files +%defattr(-,root,root) +/sbin/netconfig +%{_mandir}/man8/ipfw.8* + +%changelog +* Thu Jun 25 2009 Marta Carbone +- Initial release diff --git a/ipfw.cron b/ipfw.cron new file mode 100644 index 0000000..f6a6486 --- /dev/null +++ b/ipfw.cron @@ -0,0 +1,3 @@ +# Runs every 5 minutes and clean ipfw expired rules +# $Id$ +*/5 * * * * root /usr/bin/ipfw-cleanup > /dev/null 2>&1 diff --git a/ipfw.spec b/ipfw.spec new file mode 100644 index 0000000..455bb3f --- /dev/null +++ b/ipfw.spec @@ -0,0 +1,81 @@ +# +# $Id$ +# +# TODO: +# restart crond +# modprobe ipfw_mod.ko (depmod ?) +# +%define url $URL: http://onelab1.iet.unipi.it/svn/trunk/ipfw.spec $ + +# Marta Carbone +# 2009 - Universita` di Pisa +# License is BSD. + +# kernel_release, kernel_version and kernel_arch are expected to be set by the build to e.g. +# kernel_release : vs2.3.0.29.1.planetlab +# kernel_version : 2.6.22.14 + +%define name ipfw +%define version 0.9 +%define taglevel 1 + +%define release %{kernel_version}.%{taglevel}%{?pldistro:.%{pldistro}}%{?date:.%{date}} +%define kernel_id_arch %{kernel_version}-%{kernel_release}-%{kernel_arch} +%define kernel_id %{kernel_version}-%{kernel_release} + +Summary: ipfw and dummynet for Linux +Name: %{name} +Version: %{version} +Release: %{release} +License: BSD +Group: System Environment/Kernel +Source0: %{name}-%{version}.tar.bz2 +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot +Requires: vixie-cron + +Vendor: unipi +Packager: PlanetLab +# XXX ask +Distribution: PlanetLab %{plrelease} +URL: %(echo %{url} | cut -d ' ' -f 2) + +%description +ipfw is the Linux port of the FreeBSD ipfw and dummynet packages + +%prep +%setup + +%build +# clean the rpm build directory + +rm -rf $RPM_BUILD_ROOT + +# with the new build, we use the kernel-devel rpm for building +%define kernelpath /usr/src/kernels/%{kernel_id_arch} + +%__make KERNELPATH=%kernelpath clean +%__make KERNELPATH=%kernelpath + +%install +install -D -m 755 dummynet/ipfw_mod.ko $RPM_BUILD_ROOT/lib/modules/%{kernel_id}/net/netfilter/ipfw_mod.ko +install -D -m 755 ipfw/ipfw $RPM_BUILD_ROOT/sbin/ipfw +install -D -m 755 ipfw-cleanup $RPM_BUILD_ROOT/usr/bin/ipfw-cleanup +install -D -m 755 ipfw.cron $RPM_BUILD_ROOT/%{_sysconfdir}/cron.d/ipfw.cron + +%clean +rm -rf $RPM_BUILD_ROOT + +# here there is a list of the final installation directories +%files +%defattr(-,root,root) +%dir /lib/modules/%{kernel_id} +/lib/modules/%{kernel_id}/net/netfilter/ipfw_mod.ko +/sbin/ipfw +/usr/bin/ipfw-cleanup +%{_sysconfdir}/cron.d/ipfw.cron + +%changelog +* Thu Jun 25 2009 Marta Carbone +- post installation removed for deployment, moved manpages to the slice package +* Fri Apr 17 2009 Marta Carbone +- Initial release diff --git a/ipfw/Makefile b/ipfw/Makefile new file mode 100644 index 0000000..5c3ba6a --- /dev/null +++ b/ipfw/Makefile @@ -0,0 +1,45 @@ +# +# $Id$ +# +# GNUMakefile to build the userland part of ipfw on Linux +# +# enable extra debugging information +# Do not set with = or := so we can inherit from the caller +$(warning Building userland ipfw for $(VER)) +EXTRA_CFLAGS += +EXTRA_CFLAGS += -O0 +EXTRA_CFLAGS += -include ../glue.h + +LDFLAGS= + +EXTRA_CFLAGS += -I ./include + +ifneq ($(VER),openwrt) +OSARCH := $(shell uname) +ifeq ($(OSARCH),Linux) + EXTRA_CFLAGS += -D__BSD_VISIBLE +else + HAVE_NAT := $(shell grep O_NAT /usr/include/netinet/ip_fw.h) + # EXTRA_CFLAGS += ... +endif +endif # !openwrt + +CFLAGS += $(EXTRA_CFLAGS) + +OBJS = ipfw2.o dummynet.o main.o ipv6.o altq.o +ifneq ($(HAVE_NAT),) + OBJS += nat.o + EXTRA_CFLAGS += -DHAVE_NAT +endif +OBJS += glue.o + +all: ipfw + echo "VER is $(VER)" + +ipfw: $(OBJS) + $(CC) $(LDFLAGS) -o $@ $^ + +$(OBJS) : ipfw2.h ../glue.h + +clean distclean: + -rm -f $(OBJS) ipfw diff --git a/ipfw/add_rules b/ipfw/add_rules new file mode 100755 index 0000000..1f90c75 --- /dev/null +++ b/ipfw/add_rules @@ -0,0 +1,25 @@ +#!/bin/bash + +PRG=./ipfw + +myfun() { + $PRG add 10 count icmp from any to 131.114.9.128 + $PRG add 20 count icmp from 131.114.9.128 to any + $PRG add 20 count icmp from any to 131.114.9.130 + $PRG add 30 count icmp from 131.114.9.130 to any + $PRG add 40 count icmp from any to 131.114.9.129 + $PRG add 50 count icmp from 131.114.9.129 to any + $PRG add 60 count icmp from 131.114.9.236 to any + sleep 1 + $PRG del 10 + $PRG del 20 + $PRG del 20 + $PRG del 30 + $PRG del 40 + $PRG del 50 + $PRG del 60 +} + +for ((i=0;i<100;i++)) ; do + myfun +done diff --git a/ipfw/altq.c b/ipfw/altq.c new file mode 100644 index 0000000..702487f --- /dev/null +++ b/ipfw/altq.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/altq.c 187983 2009-02-01 16:00:49Z luigi $ + * + * altq interface + */ + +#include +#include +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include /* IFNAMSIZ */ +#include +#include +#include + +/* + * Map between current altq queue id numbers and names. + */ +static TAILQ_HEAD(, pf_altq) altq_entries = + TAILQ_HEAD_INITIALIZER(altq_entries); + +void +altq_set_enabled(int enabled) +{ + int pffd; + + pffd = open("/dev/pf", O_RDWR); + if (pffd == -1) + err(EX_UNAVAILABLE, + "altq support opening pf(4) control device"); + if (enabled) { + if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST) + err(EX_UNAVAILABLE, "enabling altq"); + } else { + if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT) + err(EX_UNAVAILABLE, "disabling altq"); + } + close(pffd); +} + +static void +altq_fetch(void) +{ + struct pfioc_altq pfioc; + struct pf_altq *altq; + int pffd; + unsigned int mnr; + static int altq_fetched = 0; + + if (altq_fetched) + return; + altq_fetched = 1; + pffd = open("/dev/pf", O_RDONLY); + if (pffd == -1) { + warn("altq support opening pf(4) control device"); + return; + } + bzero(&pfioc, sizeof(pfioc)); + if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) { + warn("altq support getting queue list"); + close(pffd); + return; + } + mnr = pfioc.nr; + for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) { + if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) { + if (errno == EBUSY) + break; + warn("altq support getting queue list"); + close(pffd); + return; + } + if (pfioc.altq.qid == 0) + continue; + altq = safe_calloc(1, sizeof(*altq)); + *altq = pfioc.altq; + TAILQ_INSERT_TAIL(&altq_entries, altq, entries); + } + close(pffd); +} + +u_int32_t +altq_name_to_qid(const char *name) +{ + struct pf_altq *altq; + + altq_fetch(); + TAILQ_FOREACH(altq, &altq_entries, entries) + if (strcmp(name, altq->qname) == 0) + break; + if (altq == NULL) + errx(EX_DATAERR, "altq has no queue named `%s'", name); + return altq->qid; +} + +static const char * +altq_qid_to_name(u_int32_t qid) +{ + struct pf_altq *altq; + + altq_fetch(); + TAILQ_FOREACH(altq, &altq_entries, entries) + if (qid == altq->qid) + break; + if (altq == NULL) + return NULL; + return altq->qname; +} + +void +print_altq_cmd(ipfw_insn_altq *altqptr) +{ + if (altqptr) { + const char *qname; + + qname = altq_qid_to_name(altqptr->qid); + if (qname == NULL) + printf(" altq ?<%u>", altqptr->qid); + else + printf(" altq %s", qname); + } +} diff --git a/ipfw/dummynet.c b/ipfw/dummynet.c new file mode 100644 index 0000000..fd8fc4d --- /dev/null +++ b/ipfw/dummynet.c @@ -0,0 +1,1061 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/dummynet.c 187769 2009-01-27 11:06:59Z luigi $ + * + * dummynet support + */ + +#include +#include +#include +/* XXX there are several sysctl leftover here */ +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include /* inet_ntoa */ + +static struct _s_x dummynet_params[] = { + { "plr", TOK_PLR }, + { "noerror", TOK_NOERROR }, + { "buckets", TOK_BUCKETS }, + { "dst-ip", TOK_DSTIP }, + { "src-ip", TOK_SRCIP }, + { "dst-port", TOK_DSTPORT }, + { "src-port", TOK_SRCPORT }, + { "proto", TOK_PROTO }, + { "weight", TOK_WEIGHT }, + { "all", TOK_ALL }, + { "mask", TOK_MASK }, + { "droptail", TOK_DROPTAIL }, + { "red", TOK_RED }, + { "gred", TOK_GRED }, + { "bw", TOK_BW }, + { "bandwidth", TOK_BW }, + { "delay", TOK_DELAY }, + { "pipe", TOK_PIPE }, + { "queue", TOK_QUEUE }, + { "flow-id", TOK_FLOWID}, + { "dst-ipv6", TOK_DSTIP6}, + { "dst-ip6", TOK_DSTIP6}, + { "src-ipv6", TOK_SRCIP6}, + { "src-ip6", TOK_SRCIP6}, + { "profile", TOK_PIPE_PROFILE}, + { "dummynet-params", TOK_NULL }, + { NULL, 0 } /* terminator */ +}; + +static int +sort_q(const void *pa, const void *pb) +{ + int rev = (co.do_sort < 0); + int field = rev ? -co.do_sort : co.do_sort; + long long res = 0; + const struct dn_flow_queue *a = pa; + const struct dn_flow_queue *b = pb; + + switch (field) { + case 1: /* pkts */ + res = a->len - b->len; + break; + case 2: /* bytes */ + res = a->len_bytes - b->len_bytes; + break; + + case 3: /* tot pkts */ + res = a->tot_pkts - b->tot_pkts; + break; + + case 4: /* tot bytes */ + res = a->tot_bytes - b->tot_bytes; + break; + } + if (res < 0) + res = -1; + if (res > 0) + res = 1; + return (int)(rev ? res : -res); +} + +static void +list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q) +{ + int l; + int index_printed, indexes = 0; + char buff[255]; + struct protoent *pe; + + if (fs->rq_elements == 0) + return; + + if (co.do_sort != 0) + heapsort(q, fs->rq_elements, sizeof *q, sort_q); + + /* Print IPv4 flows */ + index_printed = 0; + for (l = 0; l < fs->rq_elements; l++) { + struct in_addr ina; + + /* XXX: Should check for IPv4 flows */ + if (IS_IP6_FLOW_ID(&(q[l].id))) + continue; + + if (!index_printed) { + index_printed = 1; + if (indexes > 0) /* currently a no-op */ + printf("\n"); + indexes++; + printf(" " + "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n", + fs->flow_mask.proto, + fs->flow_mask.src_ip, fs->flow_mask.src_port, + fs->flow_mask.dst_ip, fs->flow_mask.dst_port); + + printf("BKT Prot ___Source IP/port____ " + "____Dest. IP/port____ " + "Tot_pkt/bytes Pkt/Byte Drp\n"); + } + + printf("%3d ", q[l].hash_slot); + pe = getprotobynumber(q[l].id.proto); + if (pe) + printf("%-4s ", pe->p_name); + else + printf("%4u ", q[l].id.proto); + ina.s_addr = htonl(q[l].id.src_ip); + printf("%15s/%-5d ", + inet_ntoa(ina), q[l].id.src_port); + ina.s_addr = htonl(q[l].id.dst_ip); + printf("%15s/%-5d ", + inet_ntoa(ina), q[l].id.dst_port); + printf("%4llu %8llu %2u %4u %3u\n", + align_uint64(&q[l].tot_pkts), + align_uint64(&q[l].tot_bytes), + q[l].len, q[l].len_bytes, q[l].drops); + if (co.verbose) + printf(" S %20llu F %20llu\n", + align_uint64(&q[l].S), align_uint64(&q[l].F)); + } + + /* Print IPv6 flows */ + index_printed = 0; + for (l = 0; l < fs->rq_elements; l++) { + if (!IS_IP6_FLOW_ID(&(q[l].id))) + continue; + + if (!index_printed) { + index_printed = 1; + if (indexes > 0) + printf("\n"); + indexes++; + printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ", + fs->flow_mask.proto, fs->flow_mask.flow_id6); + inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6), + buff, sizeof(buff)); + printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port); + inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6), + buff, sizeof(buff) ); + printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port); + + printf("BKT ___Prot___ _flow-id_ " + "______________Source IPv6/port_______________ " + "_______________Dest. IPv6/port_______________ " + "Tot_pkt/bytes Pkt/Byte Drp\n"); + } + printf("%3d ", q[l].hash_slot); + pe = getprotobynumber(q[l].id.proto); + if (pe != NULL) + printf("%9s ", pe->p_name); + else + printf("%9u ", q[l].id.proto); + printf("%7d %39s/%-5d ", q[l].id.flow_id6, + inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)), + q[l].id.src_port); + printf(" %39s/%-5d ", + inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)), + q[l].id.dst_port); + printf(" %4llu %8llu %2u %4u %3u\n", + align_uint64(&q[l].tot_pkts), + align_uint64(&q[l].tot_bytes), + q[l].len, q[l].len_bytes, q[l].drops); + if (co.verbose) + printf(" S %20llu F %20llu\n", + align_uint64(&q[l].S), + align_uint64(&q[l].F)); + } +} + +static void +print_flowset_parms(struct dn_flow_set *fs, char *prefix) +{ + int l; + char qs[30]; + char plr[30]; + char red[90]; /* Display RED parameters */ + + l = fs->qsize; + if (fs->flags_fs & DN_QSIZE_IS_BYTES) { + if (l >= 8192) + sprintf(qs, "%d KB", l / 1024); + else + sprintf(qs, "%d B", l); + } else + sprintf(qs, "%3d sl.", l); + if (fs->plr) + sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff)); + else + plr[0] = '\0'; + if (fs->flags_fs & DN_IS_RED) /* RED parameters */ + sprintf(red, + "\n\t %cRED w_q %f min_th %d max_th %d max_p %f", + (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ', + 1.0 * fs->w_q / (double)(1 << SCALE_RED), + SCALE_VAL(fs->min_th), + SCALE_VAL(fs->max_th), + 1.0 * fs->max_p / (double)(1 << SCALE_RED)); + else + sprintf(red, "droptail"); + + printf("%s %s%s %d queues (%d buckets) %s\n", + prefix, qs, plr, fs->rq_elements, fs->rq_size, red); +} + +static void +print_extra_delay_parms(struct dn_pipe *p, char *prefix) +{ + double loss; + if (p->samples_no <= 0) + return; + + loss = p->loss_level; + loss /= p->samples_no; + printf("%s profile: name \"%s\" loss %f samples %d\n", + prefix, p->name, loss, p->samples_no); +} + +void +ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]) +{ + int rulenum; + void *next = data; + struct dn_pipe *p = (struct dn_pipe *) data; + struct dn_flow_set *fs; + struct dn_flow_queue *q; + int l; + + if (ac > 0) + rulenum = strtoul(*av++, NULL, 10); + else + rulenum = 0; + for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) { + double b = p->bandwidth; + char buf[30]; + char prefix[80]; + + if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE) + break; /* done with pipes, now queues */ + + /* + * compute length, as pipe have variable size + */ + l = sizeof(*p) + p->fs.rq_elements * sizeof(*q); + next = (char *)p + l; + nbytes -= l; + + if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2) + continue; + + /* + * Print rate (or clocking interface) + */ + if (p->if_name[0] != '\0') + sprintf(buf, "%s", p->if_name); + else if (b == 0) + sprintf(buf, "unlimited"); + else if (b >= 1000000) + sprintf(buf, "%7.3f Mbit/s", b/1000000); + else if (b >= 1000) + sprintf(buf, "%7.3f Kbit/s", b/1000); + else + sprintf(buf, "%7.3f bit/s ", b); + + sprintf(prefix, "%05d: %s %4d ms ", + p->pipe_nr, buf, p->delay); + + print_extra_delay_parms(p, prefix); + + print_flowset_parms(&(p->fs), prefix); + + q = (struct dn_flow_queue *)(p+1); + list_queues(&(p->fs), q); + } + for (fs = next; nbytes >= sizeof *fs; fs = next) { + char prefix[80]; + + if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE) + break; + l = sizeof(*fs) + fs->rq_elements * sizeof(*q); + next = (char *)fs + l; + nbytes -= l; + + if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) || + (rulenum != fs->parent_nr && co.do_pipe == 1))) { + continue; + } + + q = (struct dn_flow_queue *)(fs+1); + sprintf(prefix, "q%05d: weight %d pipe %d ", + fs->fs_nr, fs->weight, fs->parent_nr); + print_flowset_parms(fs, prefix); + list_queues(fs, q); + } +} + +/* + * Delete pipe or queue i + */ +int +ipfw_delete_pipe(int pipe_or_queue, int i) +{ + struct dn_pipe p; + + memset(&p, 0, sizeof p); + if (pipe_or_queue == 1) + p.pipe_nr = i; /* pipe */ + else + p.fs.fs_nr = i; /* queue */ + i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p); + if (i) { + i = 1; + warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i); + } + return i; +} + +/* + * Code to parse delay profiles. + * + * Some link types introduce extra delays in the transmission + * of a packet, e.g. because of MAC level framing, contention on + * the use of the channel, MAC level retransmissions and so on. + * From our point of view, the channel is effectively unavailable + * for this extra time, which is constant or variable depending + * on the link type. Additionally, packets may be dropped after this + * time (e.g. on a wireless link after too many retransmissions). + * We can model the additional delay with an empirical curve + * that represents its distribution. + * + * cumulative probability + * 1.0 ^ + * | + * L +-- loss-level x + * | ****** + * | * + * | ***** + * | * + * | ** + * | * + * +-------*-------------------> + * delay + * + * The empirical curve may have both vertical and horizontal lines. + * Vertical lines represent constant delay for a range of + * probabilities; horizontal lines correspond to a discontinuty + * in the delay distribution: the pipe will use the largest delay + * for a given probability. + * + * To pass the curve to dummynet, we must store the parameters + * in a file as described below, and issue the command + * + * ipfw pipe config ... bw XXX profile ... + * + * The file format is the following, with whitespace acting as + * a separator and '#' indicating the beginning a comment: + * + * samples N + * the number of samples used in the internal + * representation (2..1024; default 100); + * + * loss-level L + * The probability above which packets are lost. + * (0.0 <= L <= 1.0, default 1.0 i.e. no loss); + * + * name identifier + * Optional a name (listed by "ipfw pipe show") + * to identify the distribution; + * + * "delay prob" | "prob delay" + * One of these two lines is mandatory and defines + * the format of the following lines with data points. + * + * XXX YYY + * 2 or more lines representing points in the curve, + * with either delay or probability first, according + * to the chosen format. + * The unit for delay is milliseconds. + * + * Data points does not need to be ordered or equal to the number + * specified in the "samples" line. ipfw will sort and interpolate + * the curve as needed. + * + * Example of a profile file: + + name bla_bla_bla + samples 100 + loss-level 0.86 + prob delay + 0 200 # minimum overhead is 200ms + 0.5 200 + 0.5 300 + 0.8 1000 + 0.9 1300 + 1 1300 + + * Internally, we will convert the curve to a fixed number of + * samples, and when it is time to transmit a packet we will + * model the extra delay as extra bits in the packet. + * + */ + +/* XXX move to an array definition ? */ +#define ED_MAX_LINE_LEN 256+ED_MAX_NAME_LEN +#define ED_TOK_SAMPLES "samples" +#define ED_TOK_LOSS "loss-level" +#define ED_TOK_NAME "name" +#define ED_TOK_DELAY "delay" +#define ED_TOK_PROB "prob" +#define ED_TOK_BW "bw" +#define ED_SEPARATORS " \t\n" +#define ED_MIN_SAMPLES_NO 2 + +/* + * returns 1 if s is a non-negative number, with at least one '.' + */ +static int +is_valid_number(const char *s) +{ + int i, dots_found = 0; + int len = strlen(s); + + for (i = 0; i 1)) + return 0; + return 1; +} + +/* + * Take as input a string describing a bandwidth value + * and return the numeric bandwidth value. + * set clocking interface or bandwidth value + */ +void +read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen) +{ + if (*bandwidth != -1) + warn("duplicate token, override bandwidth value!"); + + if (arg[0] >= 'a' && arg[0] <= 'z') { + if (namelen >= IFNAMSIZ) + warn("interface name truncated"); + namelen--; + /* interface name */ + strncpy(if_name, arg, namelen); + if_name[namelen] = '\0'; + *bandwidth = 0; + } else { /* read bandwidth value */ + int bw; + char *end = NULL; + + bw = strtoul(arg, &end, 0); + if (*end == 'K' || *end == 'k') { + end++; + bw *= 1000; + } else if (*end == 'M') { + end++; + bw *= 1000000; + } + if ((*end == 'B' && + _substrcmp2(end, "Bi", "Bit/s") != 0) || + _substrcmp2(end, "by", "bytes") == 0) + bw *= 8; + + if (bw < 0) + errx(EX_DATAERR, "bandwidth too large"); + + *bandwidth = bw; + if_name[0] = '\0'; + } +} + +struct point { + double prob; + double delay; +}; + +int +compare_points(const void *vp1, const void *vp2) +{ + const struct point *p1 = vp1; + const struct point *p2 = vp2; + double res = 0; + + res = p1->prob - p2->prob; + if (res == 0) + res = p1->delay - p2->delay; + if (res < 0) + return -1; + else if (res > 0) + return 1; + else + return 0; +} + +#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno + +static void +load_extra_delays(const char *filename, struct dn_pipe *p) +{ + char line[ED_MAX_LINE_LEN]; + FILE *f; + int lineno = 0; + int i; + + int samples = -1; + double loss = -1.0; + char profile_name[ED_MAX_NAME_LEN]; + int delay_first = -1; + int do_points = 0; + struct point points[ED_MAX_SAMPLES_NO]; + int points_no = 0; + + profile_name[0] = '\0'; + f = fopen(filename, "r"); + if (f == NULL) + err(EX_UNAVAILABLE, "fopen: %s", filename); + + while (fgets(line, ED_MAX_LINE_LEN, f)) { /* read commands */ + char *s, *cur = line, *name = NULL, *arg = NULL; + + ++lineno; + + /* parse the line */ + while (cur) { + s = strsep(&cur, ED_SEPARATORS); + if (s == NULL || *s == '#') + break; + if (*s == '\0') + continue; + if (arg) + errx(ED_EFMT("too many arguments")); + if (name == NULL) + name = s; + else + arg = s; + } + if (name == NULL) /* empty line */ + continue; + if (arg == NULL) + errx(ED_EFMT("missing arg for %s"), name); + + if (!strcasecmp(name, ED_TOK_SAMPLES)) { + if (samples > 0) + errx(ED_EFMT("duplicate ``samples'' line")); + if (atoi(arg) <=0) + errx(ED_EFMT("invalid number of samples")); + samples = atoi(arg); + if (samples>ED_MAX_SAMPLES_NO) + errx(ED_EFMT("too many samples, maximum is %d"), + ED_MAX_SAMPLES_NO); + do_points = 0; + } else if (!strcasecmp(name, ED_TOK_BW)) { + read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name)); + } else if (!strcasecmp(name, ED_TOK_LOSS)) { + if (loss != -1.0) + errx(ED_EFMT("duplicated token: %s"), name); + if (!is_valid_number(arg)) + errx(ED_EFMT("invalid %s"), arg); + loss = atof(arg); + if (loss > 1) + errx(ED_EFMT("%s greater than 1.0"), name); + do_points = 0; + } else if (!strcasecmp(name, ED_TOK_NAME)) { + if (profile_name[0] != '\0') + errx(ED_EFMT("duplicated token: %s"), name); + strncpy(profile_name, arg, sizeof(profile_name) - 1); + profile_name[sizeof(profile_name)-1] = '\0'; + do_points = 0; + } else if (!strcasecmp(name, ED_TOK_DELAY)) { + if (do_points) + errx(ED_EFMT("duplicated token: %s"), name); + delay_first = 1; + do_points = 1; + } else if (!strcasecmp(name, ED_TOK_PROB)) { + if (do_points) + errx(ED_EFMT("duplicated token: %s"), name); + delay_first = 0; + do_points = 1; + } else if (do_points) { + if (!is_valid_number(name) || !is_valid_number(arg)) + errx(ED_EFMT("invalid point found")); + if (delay_first) { + points[points_no].delay = atof(name); + points[points_no].prob = atof(arg); + } else { + points[points_no].delay = atof(arg); + points[points_no].prob = atof(name); + } + if (points[points_no].prob > 1.0) + errx(ED_EFMT("probability greater than 1.0")); + ++points_no; + } else { + errx(ED_EFMT("unrecognised command '%s'"), name); + } + } + + if (samples == -1) { + warnx("'%s' not found, assuming 100", ED_TOK_SAMPLES); + samples = 100; + } + + if (loss == -1.0) { + warnx("'%s' not found, assuming no loss", ED_TOK_LOSS); + loss = 1; + } + + /* make sure that there are enough points. */ + if (points_no < ED_MIN_SAMPLES_NO) + errx(ED_EFMT("too few samples, need at least %d"), + ED_MIN_SAMPLES_NO); + + qsort(points, points_no, sizeof(struct point), compare_points); + + /* interpolation */ + for (i = 0; isamples[index] = x1; + } else { + double m = (y2-y1)/(x2-x1); + double c = y1 - m*x1; + for (; indexsamples[index] = (index - c)/m; + } + } + p->samples_no = samples; + p->loss_level = loss * samples; + strncpy(p->name, profile_name, sizeof(p->name)); +} + +void +ipfw_config_pipe(int ac, char **av) +{ + int samples[ED_MAX_SAMPLES_NO]; + struct dn_pipe p; + int i; + char *end; + void *par = NULL; + + memset(&p, 0, sizeof p); + p.bandwidth = -1; + + av++; ac--; + /* Pipe number */ + if (ac && isdigit(**av)) { + i = atoi(*av); av++; ac--; + if (co.do_pipe == 1) + p.pipe_nr = i; + else + p.fs.fs_nr = i; + } + while (ac > 0) { + double d; + int tok = match_token(dummynet_params, *av); + ac--; av++; + + switch(tok) { + case TOK_NOERROR: + p.fs.flags_fs |= DN_NOERROR; + break; + + case TOK_PLR: + NEED1("plr needs argument 0..1\n"); + d = strtod(av[0], NULL); + if (d > 1) + d = 1; + else if (d < 0) + d = 0; + p.fs.plr = (int)(d*0x7fffffff); + ac--; av++; + break; + + case TOK_QUEUE: + NEED1("queue needs queue size\n"); + end = NULL; + p.fs.qsize = strtoul(av[0], &end, 0); + if (*end == 'K' || *end == 'k') { + p.fs.flags_fs |= DN_QSIZE_IS_BYTES; + p.fs.qsize *= 1024; + } else if (*end == 'B' || + _substrcmp2(end, "by", "bytes") == 0) { + p.fs.flags_fs |= DN_QSIZE_IS_BYTES; + } + ac--; av++; + break; + + case TOK_BUCKETS: + NEED1("buckets needs argument\n"); + p.fs.rq_size = strtoul(av[0], NULL, 0); + ac--; av++; + break; + + case TOK_MASK: + NEED1("mask needs mask specifier\n"); + /* + * per-flow queue, mask is dst_ip, dst_port, + * src_ip, src_port, proto measured in bits + */ + par = NULL; + + bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask)); + end = NULL; + + while (ac >= 1) { + uint32_t *p32 = NULL; + uint16_t *p16 = NULL; + uint32_t *p20 = NULL; + struct in6_addr *pa6 = NULL; + uint32_t a; + + tok = match_token(dummynet_params, *av); + ac--; av++; + switch(tok) { + case TOK_ALL: + /* + * special case, all bits significant + */ + p.fs.flow_mask.dst_ip = ~0; + p.fs.flow_mask.src_ip = ~0; + p.fs.flow_mask.dst_port = ~0; + p.fs.flow_mask.src_port = ~0; + p.fs.flow_mask.proto = ~0; + n2mask(&(p.fs.flow_mask.dst_ip6), 128); + n2mask(&(p.fs.flow_mask.src_ip6), 128); + p.fs.flow_mask.flow_id6 = ~0; + p.fs.flags_fs |= DN_HAVE_FLOW_MASK; + goto end_mask; + + case TOK_DSTIP: + p32 = &p.fs.flow_mask.dst_ip; + break; + + case TOK_SRCIP: + p32 = &p.fs.flow_mask.src_ip; + break; + + case TOK_DSTIP6: + pa6 = &(p.fs.flow_mask.dst_ip6); + break; + + case TOK_SRCIP6: + pa6 = &(p.fs.flow_mask.src_ip6); + break; + + case TOK_FLOWID: + p20 = &p.fs.flow_mask.flow_id6; + break; + + case TOK_DSTPORT: + p16 = &p.fs.flow_mask.dst_port; + break; + + case TOK_SRCPORT: + p16 = &p.fs.flow_mask.src_port; + break; + + case TOK_PROTO: + break; + + default: + ac++; av--; /* backtrack */ + goto end_mask; + } + if (ac < 1) + errx(EX_USAGE, "mask: value missing"); + if (*av[0] == '/') { + a = strtoul(av[0]+1, &end, 0); + if (pa6 == NULL) + a = (a == 32) ? ~0 : (1 << a) - 1; + } else + a = strtoul(av[0], &end, 0); + if (p32 != NULL) + *p32 = a; + else if (p16 != NULL) { + if (a > 0xFFFF) + errx(EX_DATAERR, + "port mask must be 16 bit"); + *p16 = (uint16_t)a; + } else if (p20 != NULL) { + if (a > 0xfffff) + errx(EX_DATAERR, + "flow_id mask must be 20 bit"); + *p20 = (uint32_t)a; + } else if (pa6 != NULL) { + if (a > 128) + errx(EX_DATAERR, + "in6addr invalid mask len"); + else + n2mask(pa6, a); + } else { + if (a > 0xFF) + errx(EX_DATAERR, + "proto mask must be 8 bit"); + p.fs.flow_mask.proto = (uint8_t)a; + } + if (a != 0) + p.fs.flags_fs |= DN_HAVE_FLOW_MASK; + ac--; av++; + } /* end while, config masks */ +end_mask: + break; + + case TOK_RED: + case TOK_GRED: + NEED1("red/gred needs w_q/min_th/max_th/max_p\n"); + p.fs.flags_fs |= DN_IS_RED; + if (tok == TOK_GRED) + p.fs.flags_fs |= DN_IS_GENTLE_RED; + /* + * the format for parameters is w_q/min_th/max_th/max_p + */ + if ((end = strsep(&av[0], "/"))) { + double w_q = strtod(end, NULL); + if (w_q > 1 || w_q <= 0) + errx(EX_DATAERR, "0 < w_q <= 1"); + p.fs.w_q = (int) (w_q * (1 << SCALE_RED)); + } + if ((end = strsep(&av[0], "/"))) { + p.fs.min_th = strtoul(end, &end, 0); + if (*end == 'K' || *end == 'k') + p.fs.min_th *= 1024; + } + if ((end = strsep(&av[0], "/"))) { + p.fs.max_th = strtoul(end, &end, 0); + if (*end == 'K' || *end == 'k') + p.fs.max_th *= 1024; + } + if ((end = strsep(&av[0], "/"))) { + double max_p = strtod(end, NULL); + if (max_p > 1 || max_p <= 0) + errx(EX_DATAERR, "0 < max_p <= 1"); + p.fs.max_p = (int)(max_p * (1 << SCALE_RED)); + } + ac--; av++; + break; + + case TOK_DROPTAIL: + p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED); + break; + + case TOK_BW: + NEED1("bw needs bandwidth or interface\n"); + if (co.do_pipe != 1) + errx(EX_DATAERR, "bandwidth only valid for pipes"); + read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name)); + ac--; av++; + break; + + case TOK_DELAY: + if (co.do_pipe != 1) + errx(EX_DATAERR, "delay only valid for pipes"); + NEED1("delay needs argument 0..10000ms\n"); + p.delay = strtoul(av[0], NULL, 0); + ac--; av++; + break; + + case TOK_WEIGHT: + if (co.do_pipe == 1) + errx(EX_DATAERR,"weight only valid for queues"); + NEED1("weight needs argument 0..100\n"); + p.fs.weight = strtoul(av[0], &end, 0); + ac--; av++; + break; + + case TOK_PIPE: + if (co.do_pipe == 1) + errx(EX_DATAERR,"pipe only valid for queues"); + NEED1("pipe needs pipe_number\n"); + p.fs.parent_nr = strtoul(av[0], &end, 0); + ac--; av++; + break; + + case TOK_PIPE_PROFILE: + if (co.do_pipe != 1) + errx(EX_DATAERR, "extra delay only valid for pipes"); + NEED1("extra delay needs the file name\n"); + p.samples = &samples[0]; + load_extra_delays(av[0], &p); + --ac; ++av; + break; + + default: + errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); + } + } + if (co.do_pipe == 1) { + if (p.pipe_nr == 0) + errx(EX_DATAERR, "pipe_nr must be > 0"); + if (p.delay > 10000) + errx(EX_DATAERR, "delay must be < 10000"); + } else { /* co.do_pipe == 2, queue */ + if (p.fs.parent_nr == 0) + errx(EX_DATAERR, "pipe must be > 0"); + if (p.fs.weight >100) + errx(EX_DATAERR, "weight must be <= 100"); + } + + /* check for bandwidth value */ + if (p.bandwidth == -1) { + p.bandwidth = 0; + if (p.samples_no > 0) + errx(EX_DATAERR, "profile requires a bandwidth limit"); + } + + if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) { + size_t len; + long limit; + + len = sizeof(limit); + if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit", + &limit, &len, NULL, 0) == -1) + limit = 1024*1024; + if (p.fs.qsize > limit) + errx(EX_DATAERR, "queue size must be < %ldB", limit); + } else { + size_t len; + long limit; + + len = sizeof(limit); + if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit", + &limit, &len, NULL, 0) == -1) + limit = 100; + if (p.fs.qsize > limit) + errx(EX_DATAERR, "2 <= queue size <= %ld", limit); + } + if (p.fs.flags_fs & DN_IS_RED) { + size_t len; + int lookup_depth, avg_pkt_size; + double s, idle, weight, w_q; + struct clockinfo ck; + int t; + + if (p.fs.min_th >= p.fs.max_th) + errx(EX_DATAERR, "min_th %d must be < than max_th %d", + p.fs.min_th, p.fs.max_th); + if (p.fs.max_th == 0) + errx(EX_DATAERR, "max_th must be > 0"); + + len = sizeof(int); + if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth", + &lookup_depth, &len, NULL, 0) == -1) + errx(1, "sysctlbyname(\"%s\")", + "net.inet.ip.dummynet.red_lookup_depth"); + if (lookup_depth == 0) + errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth" + " must be greater than zero"); + + len = sizeof(int); + if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size", + &avg_pkt_size, &len, NULL, 0) == -1) + + errx(1, "sysctlbyname(\"%s\")", + "net.inet.ip.dummynet.red_avg_pkt_size"); + if (avg_pkt_size == 0) + errx(EX_DATAERR, + "net.inet.ip.dummynet.red_avg_pkt_size must" + " be greater than zero"); + + len = sizeof(struct clockinfo); + if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1) + errx(1, "sysctlbyname(\"%s\")", "kern.clockrate"); + + /* + * Ticks needed for sending a medium-sized packet. + * Unfortunately, when we are configuring a WF2Q+ queue, we + * do not have bandwidth information, because that is stored + * in the parent pipe, and also we have multiple queues + * competing for it. So we set s=0, which is not very + * correct. But on the other hand, why do we want RED with + * WF2Q+ ? + */ + if (p.bandwidth==0) /* this is a WF2Q+ queue */ + s = 0; + else + s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth; + + /* + * max idle time (in ticks) before avg queue size becomes 0. + * NOTA: (3/w_q) is approx the value x so that + * (1-w_q)^x < 10^-3. + */ + w_q = ((double)p.fs.w_q) / (1 << SCALE_RED); + idle = s * 3. / w_q; + p.fs.lookup_step = (int)idle / lookup_depth; + if (!p.fs.lookup_step) + p.fs.lookup_step = 1; + weight = 1 - w_q; + for (t = p.fs.lookup_step; t > 1; --t) + weight *= 1 - w_q; + p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED)); + } + if (p.samples_no <= 0) { + i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p); + } else { + struct dn_pipe_max pm; + int len = sizeof(pm); + + memcpy(&pm.pipe, &p, sizeof(pm.pipe)); + memcpy(&pm.samples, samples, sizeof(pm.samples)); + + i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len); + } + + if (i) + err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE"); +} diff --git a/ipfw/glue.c b/ipfw/glue.c new file mode 100644 index 0000000..c70cd18 --- /dev/null +++ b/ipfw/glue.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * $Id$ + * + * Userland functions missing in linux + */ + +#include +#include + +#ifndef HAVE_NAT +/* dummy nat functions */ +void +ipfw_show_nat(int ac, char **av) +{ + fprintf(stderr, "%s unsupported\n", __FUNCTION__); +} + +void +ipfw_config_nat(int ac, char **av) +{ + fprintf(stderr, "%s unsupported\n", __FUNCTION__); +} +#endif + +#ifdef __linux__ +int optreset; /* missing in linux */ +#endif + +#if defined( __linux__ ) || defined(_WIN32) +/* + * not implemented in linux. + * taken from /usr/src/lib/libc/string/strlcpy.c + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) +{ + char *d = dst; + const char *s = src; + size_t n = siz; + + /* Copy as many bytes as will fit */ + if (n != 0 && --n != 0) { + do { + if ((*d++ = *s++) == 0) + break; + } while (--n != 0); + } + + /* Not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + + return(s - src - 1); /* count does not include NUL */ +} + + +/* missing in linux and windows */ +long long int +strtonum(const char *nptr, long long minval, long long maxval, + const char **errstr) +{ + return strtoll(nptr, (char **)errstr, 0); +} + +int +sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, + size_t newlen) +{ + return -1; +} +#endif /* __linux__ || _WIN32 */ diff --git a/ipfw/include/alias.h b/ipfw/include/alias.h new file mode 100644 index 0000000..888bd0d --- /dev/null +++ b/ipfw/include/alias.h @@ -0,0 +1,71 @@ +#ifndef _ALIAS_H_ +#define _ALIAS_H_ + +#define LIBALIAS_BUF_SIZE 128 + +/* + * If PKT_ALIAS_LOG is set, a message will be printed to /var/log/alias.log + * every time a link is created or deleted. This is useful for debugging. + */ +#define PKT_ALIAS_LOG 0x01 + +/* + * If PKT_ALIAS_DENY_INCOMING is set, then incoming connections (e.g. to ftp, + * telnet or web servers will be prevented by the aliasing mechanism. + */ +#define PKT_ALIAS_DENY_INCOMING 0x02 + +/* + * If PKT_ALIAS_SAME_PORTS is set, packets will be attempted sent from the + * same port as they originated on. This allows e.g. rsh to work *99% of the + * time*, but _not_ 100% (it will be slightly flakey instead of not working + * at all). This mode bit is set by PacketAliasInit(), so it is a default + * mode of operation. + */ +#define PKT_ALIAS_SAME_PORTS 0x04 + +/* + * If PKT_ALIAS_USE_SOCKETS is set, then when partially specified links (e.g. + * destination port and/or address is zero), the packet aliasing engine will + * attempt to allocate a socket for the aliasing port it chooses. This will + * avoid interference with the host machine. Fully specified links do not + * require this. This bit is set after a call to PacketAliasInit(), so it is + * a default mode of operation. + */ +#ifndef NO_USE_SOCKETS +#define PKT_ALIAS_USE_SOCKETS 0x08 +#endif +/*- + * If PKT_ALIAS_UNREGISTERED_ONLY is set, then only packets with + * unregistered source addresses will be aliased. Private + * addresses are those in the following ranges: + * + * 10.0.0.0 -> 10.255.255.255 + * 172.16.0.0 -> 172.31.255.255 + * 192.168.0.0 -> 192.168.255.255 + */ +#define PKT_ALIAS_UNREGISTERED_ONLY 0x10 + +/* + * If PKT_ALIAS_RESET_ON_ADDR_CHANGE is set, then the table of dynamic + * aliasing links will be reset whenever PacketAliasSetAddress() changes the + * default aliasing address. If the default aliasing address is left + * unchanged by this function call, then the table of dynamic aliasing links + * will be left intact. This bit is set after a call to PacketAliasInit(). + */ +#define PKT_ALIAS_RESET_ON_ADDR_CHANGE 0x20 + + +/* + * If PKT_ALIAS_PROXY_ONLY is set, then NAT will be disabled and only + * transparent proxying is performed. + */ +#define PKT_ALIAS_PROXY_ONLY 0x40 + +/* + * If PKT_ALIAS_REVERSE is set, the actions of PacketAliasIn() and + * PacketAliasOut() are reversed. + */ +#define PKT_ALIAS_REVERSE 0x80 + +#endif /* !_ALIAS_H_ */ diff --git a/ipfw/include/net/if_dl.h b/ipfw/include/net/if_dl.h new file mode 100644 index 0000000..4d2b4f7 --- /dev/null +++ b/ipfw/include/net/if_dl.h @@ -0,0 +1,82 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)if_dl.h 8.1 (Berkeley) 6/10/93 + * $FreeBSD: src/sys/net/if_dl.h,v 1.14 2005/01/07 01:45:34 imp Exp $ + */ + +#ifndef _NET_IF_DL_H_ +#define _NET_IF_DL_H_ + +/* + * A Link-Level Sockaddr may specify the interface in one of two + * ways: either by means of a system-provided index number (computed + * anew and possibly differently on every reboot), or by a human-readable + * string such as "il0" (for managerial convenience). + * + * Census taking actions, such as something akin to SIOCGCONF would return + * both the index and the human name. + * + * High volume transactions (such as giving a link-level ``from'' address + * in a recvfrom or recvmsg call) may be likely only to provide the indexed + * form, (which requires fewer copy operations and less space). + * + * The form and interpretation of the link-level address is purely a matter + * of convention between the device driver and its consumers; however, it is + * expected that all drivers for an interface of a given if_type will agree. + */ + +/* + * Structure of a Link-Level sockaddr: + */ +struct sockaddr_dl { + u_char sdl_len; /* Total length of sockaddr */ + u_char sdl_family; /* AF_LINK */ + u_short sdl_index; /* if != 0, system given index for interface */ + u_char sdl_type; /* interface type */ + u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */ + u_char sdl_alen; /* link level address length */ + u_char sdl_slen; /* link layer selector length */ + char sdl_data[46]; /* minimum work area, can be larger; + contains both if name and ll address */ +}; + +#define LLADDR(s) ((caddr_t)((s)->sdl_data + (s)->sdl_nlen)) + +#ifndef _KERNEL + +#include + +__BEGIN_DECLS +void link_addr(const char *, struct sockaddr_dl *); +char *link_ntoa(const struct sockaddr_dl *); +__END_DECLS + +#endif /* !_KERNEL */ + +#endif diff --git a/ipfw/include/net/pfvar.h b/ipfw/include/net/pfvar.h new file mode 100644 index 0000000..304cb16 --- /dev/null +++ b/ipfw/include/net/pfvar.h @@ -0,0 +1,32 @@ +#ifndef _PF_VAR_H_ +#define _PF_VAR_H_ + +/* + * replacement for FreeBSD's pfqueue.h + */ +#include + +#define DIOCSTARTALTQ _IO ('D', 42) +#define DIOCSTOPALTQ _IO ('D', 43) + +struct pf_altq { + TAILQ_ENTRY(pf_altq) entries; + /* ... */ + u_int32_t qid; /* return value */ + +#define PF_QNAME_SIZE 64 + char qname[PF_QNAME_SIZE]; /* queue name */ + +}; + +struct pfioc_altq { + u_int32_t action; + u_int32_t ticket; + u_int32_t nr; + struct pf_altq altq; +}; + +#define DIOCGETALTQS _IOWR('D', 47, struct pfioc_altq) +#define DIOCGETALTQ _IOWR('D', 48, struct pfioc_altq) + +#endif /* !_PF_VAR_H */ diff --git a/ipfw/include/netinet/ip_dummynet.h b/ipfw/include/netinet/ip_dummynet.h new file mode 100644 index 0000000..c6a6575 --- /dev/null +++ b/ipfw/include/netinet/ip_dummynet.h @@ -0,0 +1,399 @@ +/*- + * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa + * Portions Copyright (c) 2000 Akamba Corp. + * All rights reserved + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40.2.1 2008/04/25 10:26:30 oleg Exp $ + */ + +#ifndef _IP_DUMMYNET_H +#define _IP_DUMMYNET_H + +/* + * Definition of dummynet data structures. In the structures, I decided + * not to use the macros in in the hope of making the code + * easier to port to other architectures. The type of lists and queue we + * use here is pretty simple anyways. + */ + +/* + * We start with a heap, which is used in the scheduler to decide when + * to transmit packets etc. + * + * The key for the heap is used for two different values: + * + * 1. timer ticks- max 10K/second, so 32 bits are enough; + * + * 2. virtual times. These increase in steps of len/x, where len is the + * packet length, and x is either the weight of the flow, or the + * sum of all weights. + * If we limit to max 1000 flows and a max weight of 100, then + * x needs 17 bits. The packet size is 16 bits, so we can easily + * overflow if we do not allow errors. + * So we use a key "dn_key" which is 64 bits. Some macros are used to + * compare key values and handle wraparounds. + * MAX64 returns the largest of two key values. + * MY_M is used as a shift count when doing fixed point arithmetic + * (a better name would be useful...). + */ +typedef u_int64_t dn_key ; /* sorting key */ +#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) +#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) +#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) +#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) +#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) +#define MY_M 16 /* number of left shift to obtain a larger precision */ + +/* + * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the + * virtual time wraps every 15 days. + */ + + +/* + * The maximum hash table size for queues. This value must be a power + * of 2. + */ +#define DN_MAX_HASH_SIZE 65536 + +/* + * A heap entry is made of a key and a pointer to the actual + * object stored in the heap. + * The heap is an array of dn_heap_entry entries, dynamically allocated. + * Current size is "size", with "elements" actually in use. + * The heap normally supports only ordered insert and extract from the top. + * If we want to extract an object from the middle of the heap, we + * have to know where the object itself is located in the heap (or we + * need to scan the whole array). To this purpose, an object has a + * field (int) which contains the index of the object itself into the + * heap. When the object is moved, the field must also be updated. + * The offset of the index in the object is stored in the 'offset' + * field in the heap descriptor. The assumption is that this offset + * is non-zero if we want to support extract from the middle. + */ +struct dn_heap_entry { + dn_key key ; /* sorting key. Topmost element is smallest one */ + void *object ; /* object pointer */ +} ; + +struct dn_heap { + int size ; + int elements ; + int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ + struct dn_heap_entry *p ; /* really an array of "size" entries */ +} ; + +#ifdef _KERNEL +/* + * Packets processed by dummynet have an mbuf tag associated with + * them that carries their dummynet state. This is used within + * the dummynet code as well as outside when checking for special + * processing requirements. + */ +struct dn_pkt_tag { + struct ip_fw *rule; /* matching rule */ + int dn_dir; /* action when packet comes out. */ +#define DN_TO_IP_OUT 1 +#define DN_TO_IP_IN 2 +/* Obsolete: #define DN_TO_BDG_FWD 3 */ +#define DN_TO_ETH_DEMUX 4 +#define DN_TO_ETH_OUT 5 +#define DN_TO_IP6_IN 6 +#define DN_TO_IP6_OUT 7 +#define DN_TO_IFB_FWD 8 + + dn_key output_time; /* when the pkt is due for delivery */ + struct ifnet *ifp; /* interface, for ip_output */ + struct _ip6dn_args ip6opt; /* XXX ipv6 options */ +}; +#endif /* _KERNEL */ + +/* + * Overall structure of dummynet (with WF2Q+): + +In dummynet, packets are selected with the firewall rules, and passed +to two different objects: PIPE or QUEUE. + +A QUEUE is just a queue with configurable size and queue management +policy. It is also associated with a mask (to discriminate among +different flows), a weight (used to give different shares of the +bandwidth to different flows) and a "pipe", which essentially +supplies the transmit clock for all queues associated with that +pipe. + +A PIPE emulates a fixed-bandwidth link, whose bandwidth is +configurable. The "clock" for a pipe can come from either an +internal timer, or from the transmit interrupt of an interface. +A pipe is also associated with one (or more, if masks are used) +queue, where all packets for that pipe are stored. + +The bandwidth available on the pipe is shared by the queues +associated with that pipe (only one in case the packet is sent +to a PIPE) according to the WF2Q+ scheduling algorithm and the +configured weights. + +In general, incoming packets are stored in the appropriate queue, +which is then placed into one of a few heaps managed by a scheduler +to decide when the packet should be extracted. +The scheduler (a function called dummynet()) is run at every timer +tick, and grabs queues from the head of the heaps when they are +ready for processing. + +There are three data structures definining a pipe and associated queues: + + + dn_pipe, which contains the main configuration parameters related + to delay and bandwidth; + + dn_flow_set, which contains WF2Q+ configuration, flow + masks, plr and RED configuration; + + dn_flow_queue, which is the per-flow queue (containing the packets) + +Multiple dn_flow_set can be linked to the same pipe, and multiple +dn_flow_queue can be linked to the same dn_flow_set. +All data structures are linked in a linear list which is used for +housekeeping purposes. + +During configuration, we create and initialize the dn_flow_set +and dn_pipe structures (a dn_pipe also contains a dn_flow_set). + +At runtime: packets are sent to the appropriate dn_flow_set (either +WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), +which in turn dispatches them to the appropriate dn_flow_queue +(created dynamically according to the masks). + +The transmit clock for fixed rate flows (ready_event()) selects the +dn_flow_queue to be used to transmit the next packet. For WF2Q, +wfq_ready_event() extract a pipe which in turn selects the right +flow using a number of heaps defined into the pipe itself. + + * + */ + +/* + * per flow queue. This contains the flow identifier, the queue + * of packets, counters, and parameters used to support both RED and + * WF2Q+. + * + * A dn_flow_queue is created and initialized whenever a packet for + * a new flow arrives. + */ +struct dn_flow_queue { + struct dn_flow_queue *next ; + struct ipfw_flow_id id ; + + struct mbuf *head, *tail ; /* queue of packets */ + u_int len ; + u_int len_bytes ; + + /* + * When we emulate MAC overheads, or channel unavailability due + * to other traffic on a shared medium, we augment the packet at + * the head of the queue with an 'extra_bits' field representsing + * the additional delay the packet will be subject to: + * extra_bits = bw*unavailable_time. + * With large bandwidth and large delays, extra_bits (and also numbytes) + * can become very large, so better play safe and use 64 bit + */ + uint64_t numbytes ; /* credit for transmission (dynamic queues) */ + int64_t extra_bits; /* extra bits simulating unavailable channel */ + + u_int64_t tot_pkts ; /* statistics counters */ + u_int64_t tot_bytes ; + u_int32_t drops ; + + int hash_slot ; /* debugging/diagnostic */ + + /* RED parameters */ + int avg ; /* average queue length est. (scaled) */ + int count ; /* arrivals since last RED drop */ + int random ; /* random value (scaled) */ + dn_key q_time; /* start of queue idle time */ + + /* WF2Q+ support */ + struct dn_flow_set *fs ; /* parent flow set */ + int heap_pos ; /* position (index) of struct in heap */ + dn_key sched_time ; /* current time when queue enters ready_heap */ + + dn_key S,F ; /* start time, finish time */ + /* + * Setting F < S means the timestamp is invalid. We only need + * to test this when the queue is empty. + */ +} ; + +/* + * flow_set descriptor. Contains the "template" parameters for the + * queue configuration, and pointers to the hash table of dn_flow_queue's. + * + * The hash table is an array of lists -- we identify the slot by + * hashing the flow-id, then scan the list looking for a match. + * The size of the hash table (buckets) is configurable on a per-queue + * basis. + * + * A dn_flow_set is created whenever a new queue or pipe is created (in the + * latter case, the structure is located inside the struct dn_pipe). + */ +struct dn_flow_set { + SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ + + u_short fs_nr ; /* flow_set number */ + u_short flags_fs; +#define DN_HAVE_FLOW_MASK 0x0001 +#define DN_IS_RED 0x0002 +#define DN_IS_GENTLE_RED 0x0004 +#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ +#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ +#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ +#define DN_IS_PIPE 0x4000 +#define DN_IS_QUEUE 0x8000 + + struct dn_pipe *pipe ; /* pointer to parent pipe */ + u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ + + int weight ; /* WFQ queue weight */ + int qsize ; /* queue size in slots or bytes */ + int plr ; /* pkt loss rate (2^31-1 means 100%) */ + + struct ipfw_flow_id flow_mask ; + + /* hash table of queues onto this flow_set */ + int rq_size ; /* number of slots */ + int rq_elements ; /* active elements */ + struct dn_flow_queue **rq; /* array of rq_size entries */ + + u_int32_t last_expired ; /* do not expire too frequently */ + int backlogged ; /* #active queues for this flowset */ + + /* RED parameters */ +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) + int w_q ; /* queue weight (scaled) */ + int max_th ; /* maximum threshold for queue (scaled) */ + int min_th ; /* minimum threshold for queue (scaled) */ + int max_p ; /* maximum value for p_b (scaled) */ + u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ + u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ + u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ + u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ + u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ + u_int lookup_depth ; /* depth of lookup table */ + int lookup_step ; /* granularity inside the lookup table */ + int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ + int avg_pkt_size ; /* medium packet size */ + int max_pkt_size ; /* max packet size */ +}; +SLIST_HEAD(dn_flow_set_head, dn_flow_set); + +/* + * Pipe descriptor. Contains global parameters, delay-line queue, + * and the flow_set used for fixed-rate queues. + * + * For WF2Q+ support it also has 3 heaps holding dn_flow_queue: + * not_eligible_heap, for queues whose start time is higher + * than the virtual time. Sorted by start time. + * scheduler_heap, for queues eligible for scheduling. Sorted by + * finish time. + * idle_heap, all flows that are idle and can be removed. We + * do that on each tick so we do not slow down too much + * operations during forwarding. + * + */ +struct dn_pipe { /* a pipe */ + SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */ + + int pipe_nr ; /* number */ + int bandwidth; /* really, bytes/tick. */ + int delay ; /* really, ticks */ + + struct mbuf *head, *tail ; /* packets in delay line */ + + /* WF2Q+ */ + struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ + struct dn_heap not_eligible_heap; /* top extract- key Start time */ + struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ + + dn_key V ; /* virtual time */ + int sum; /* sum of weights of all active sessions */ + + /* Same as in dn_flow_queue, numbytes can become large */ + int64_t numbytes; /* bits I can transmit (more or less). */ + + dn_key sched_time ; /* time pipe was scheduled in ready_heap */ + + /* + * When the tx clock come from an interface (if_name[0] != '\0'), its name + * is stored below, whereas the ifp is filled when the rule is configured. + */ + char if_name[IFNAMSIZ]; + struct ifnet *ifp ; + int ready ; /* set if ifp != NULL and we got a signal from it */ + + struct dn_flow_set fs ; /* used with fixed-rate flows */ + + /* fields to simulate a delay profile */ + +#define ED_MAX_NAME_LEN 32 + char name[ED_MAX_NAME_LEN]; + int loss_level; + int samples_no; + int *samples; +}; + +/* dn_pipe_max is used to pass pipe configuration from userland onto + * kernel space and back + */ +#define ED_MAX_SAMPLES_NO 1024 +struct dn_pipe_max { + struct dn_pipe pipe; + int samples[ED_MAX_SAMPLES_NO]; +}; + +SLIST_HEAD(dn_pipe_head, dn_pipe); + +#ifdef _KERNEL +typedef int ip_dn_ctl_t(struct sockopt *); /* raw_ip.c */ +typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ +typedef int ip_dn_io_t(struct mbuf **m, int dir, struct ip_fw_args *fwa); +extern ip_dn_ctl_t *ip_dn_ctl_ptr; +extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; +extern ip_dn_io_t *ip_dn_io_ptr; +#define DUMMYNET_LOADED (ip_dn_io_ptr != NULL) + +/* + * Return the IPFW rule associated with the dummynet tag; if any. + * Make sure that the dummynet tag is not reused by lower layers. + */ +static __inline struct ip_fw * +ip_dn_claim_rule(struct mbuf *m) +{ + struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); + if (mtag != NULL) { + mtag->m_tag_id = PACKET_TAG_NONE; + return (((struct dn_pkt_tag *)(mtag+1))->rule); + } else + return (NULL); +} +#endif +#endif /* _IP_DUMMYNET_H */ diff --git a/ipfw/include/netinet/ip_fw.h b/ipfw/include/netinet/ip_fw.h new file mode 100644 index 0000000..62617f5 --- /dev/null +++ b/ipfw/include/netinet/ip_fw.h @@ -0,0 +1,676 @@ +/*- + * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.110.2.6 2008/10/14 08:03:58 rwatson Exp $ + */ + +#ifndef _IPFW2_H +#define _IPFW2_H + +/* + * The default rule number. By the design of ip_fw, the default rule + * is the last one, so its number can also serve as the highest number + * allowed for a rule. The ip_fw code relies on both meanings of this + * constant. + */ +#define IPFW_DEFAULT_RULE 65535 + +/* + * The kernel representation of ipfw rules is made of a list of + * 'instructions' (for all practical purposes equivalent to BPF + * instructions), which specify which fields of the packet + * (or its metadata) should be analysed. + * + * Each instruction is stored in a structure which begins with + * "ipfw_insn", and can contain extra fields depending on the + * instruction type (listed below). + * Note that the code is written so that individual instructions + * have a size which is a multiple of 32 bits. This means that, if + * such structures contain pointers or other 64-bit entities, + * (there is just one instance now) they may end up unaligned on + * 64-bit architectures, so the must be handled with care. + * + * "enum ipfw_opcodes" are the opcodes supported. We can have up + * to 256 different opcodes. When adding new opcodes, they should + * be appended to the end of the opcode list before O_LAST_OPCODE, + * this will prevent the ABI from being broken, otherwise users + * will have to recompile ipfw(8) when they update the kernel. + */ + +enum ipfw_opcodes { /* arguments (4 byte each) */ + O_NOP, + + O_IP_SRC, /* u32 = IP */ + O_IP_SRC_MASK, /* ip = IP/mask */ + O_IP_SRC_ME, /* none */ + O_IP_SRC_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_DST, /* u32 = IP */ + O_IP_DST_MASK, /* ip = IP/mask */ + O_IP_DST_ME, /* none */ + O_IP_DST_SET, /* u32=base, arg1=len, bitmap */ + + O_IP_SRCPORT, /* (n)port list:mask 4 byte ea */ + O_IP_DSTPORT, /* (n)port list:mask 4 byte ea */ + O_PROTO, /* arg1=protocol */ + + O_MACADDR2, /* 2 mac addr:mask */ + O_MAC_TYPE, /* same as srcport */ + + O_LAYER2, /* none */ + O_IN, /* none */ + O_FRAG, /* none */ + + O_RECV, /* none */ + O_XMIT, /* none */ + O_VIA, /* none */ + + O_IPOPT, /* arg1 = 2*u8 bitmap */ + O_IPLEN, /* arg1 = len */ + O_IPID, /* arg1 = id */ + + O_IPTOS, /* arg1 = id */ + O_IPPRECEDENCE, /* arg1 = precedence << 5 */ + O_IPTTL, /* arg1 = TTL */ + + O_IPVER, /* arg1 = version */ + O_UID, /* u32 = id */ + O_GID, /* u32 = id */ + O_ESTAB, /* none (tcp established) */ + O_TCPFLAGS, /* arg1 = 2*u8 bitmap */ + O_TCPWIN, /* arg1 = desired win */ + O_TCPSEQ, /* u32 = desired seq. */ + O_TCPACK, /* u32 = desired seq. */ + O_ICMPTYPE, /* u32 = icmp bitmap */ + O_TCPOPTS, /* arg1 = 2*u8 bitmap */ + + O_VERREVPATH, /* none */ + O_VERSRCREACH, /* none */ + + O_PROBE_STATE, /* none */ + O_KEEP_STATE, /* none */ + O_LIMIT, /* ipfw_insn_limit */ + O_LIMIT_PARENT, /* dyn_type, not an opcode. */ + + /* + * These are really 'actions'. + */ + + O_LOG, /* ipfw_insn_log */ + O_PROB, /* u32 = match probability */ + + O_CHECK_STATE, /* none */ + O_ACCEPT, /* none */ + O_DENY, /* none */ + O_REJECT, /* arg1=icmp arg (same as deny) */ + O_COUNT, /* none */ + O_SKIPTO, /* arg1=next rule number */ + O_PIPE, /* arg1=pipe number */ + O_QUEUE, /* arg1=queue number */ + O_DIVERT, /* arg1=port number */ + O_TEE, /* arg1=port number */ + O_FORWARD_IP, /* fwd sockaddr */ + O_FORWARD_MAC, /* fwd mac */ + O_NAT, /* nope */ + O_REASS, /* none */ + + /* + * More opcodes. + */ + O_IPSEC, /* has ipsec history */ + O_IP_SRC_LOOKUP, /* arg1=table number, u32=value */ + O_IP_DST_LOOKUP, /* arg1=table number, u32=value */ + O_ANTISPOOF, /* none */ + O_JAIL, /* u32 = id */ + O_ALTQ, /* u32 = altq classif. qid */ + O_DIVERTED, /* arg1=bitmap (1:loop, 2:out) */ + O_TCPDATALEN, /* arg1 = tcp data len */ + O_IP6_SRC, /* address without mask */ + O_IP6_SRC_ME, /* my addresses */ + O_IP6_SRC_MASK, /* address with the mask */ + O_IP6_DST, + O_IP6_DST_ME, + O_IP6_DST_MASK, + O_FLOW6ID, /* for flow id tag in the ipv6 pkt */ + O_ICMP6TYPE, /* icmp6 packet type filtering */ + O_EXT_HDR, /* filtering for ipv6 extension header */ + O_IP6, + + /* + * actions for ng_ipfw + */ + O_NETGRAPH, /* send to ng_ipfw */ + O_NGTEE, /* copy to ng_ipfw */ + + O_IP4, + + O_UNREACH6, /* arg1=icmpv6 code arg (deny) */ + + O_TAG, /* arg1=tag number */ + O_TAGGED, /* arg1=tag number */ + + O_SETFIB, /* arg1=FIB number */ + O_FIB, /* arg1=FIB desired fib number */ + + O_LAST_OPCODE /* not an opcode! */ +}; + +/* + * The extension header are filtered only for presence using a bit + * vector with a flag for each header. + */ +#define EXT_FRAGMENT 0x1 +#define EXT_HOPOPTS 0x2 +#define EXT_ROUTING 0x4 +#define EXT_AH 0x8 +#define EXT_ESP 0x10 +#define EXT_DSTOPTS 0x20 +#define EXT_RTHDR0 0x40 +#define EXT_RTHDR2 0x80 + +/* + * Template for instructions. + * + * ipfw_insn is used for all instructions which require no operands, + * a single 16-bit value (arg1), or a couple of 8-bit values. + * + * For other instructions which require different/larger arguments + * we have derived structures, ipfw_insn_*. + * + * The size of the instruction (in 32-bit words) is in the low + * 6 bits of "len". The 2 remaining bits are used to implement + * NOT and OR on individual instructions. Given a type, you can + * compute the length to be put in "len" using F_INSN_SIZE(t) + * + * F_NOT negates the match result of the instruction. + * + * F_OR is used to build or blocks. By default, instructions + * are evaluated as part of a logical AND. An "or" block + * { X or Y or Z } contains F_OR set in all but the last + * instruction of the block. A match will cause the code + * to skip past the last instruction of the block. + * + * NOTA BENE: in a couple of places we assume that + * sizeof(ipfw_insn) == sizeof(u_int32_t) + * this needs to be fixed. + * + */ +typedef struct _ipfw_insn { /* template for instructions */ + enum ipfw_opcodes opcode:8; + u_int8_t len; /* number of 32-bit words */ +#define F_NOT 0x80 +#define F_OR 0x40 +#define F_LEN_MASK 0x3f +#define F_LEN(cmd) ((cmd)->len & F_LEN_MASK) + + u_int16_t arg1; +} ipfw_insn; + +/* + * The F_INSN_SIZE(type) computes the size, in 4-byte words, of + * a given type. + */ +#define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t)) + +#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ + +/* + * This is used to store an array of 16-bit entries (ports etc.) + */ +typedef struct _ipfw_insn_u16 { + ipfw_insn o; + u_int16_t ports[2]; /* there may be more */ +} ipfw_insn_u16; + +/* + * This is used to store an array of 32-bit entries + * (uid, single IPv4 addresses etc.) + */ +typedef struct _ipfw_insn_u32 { + ipfw_insn o; + u_int32_t d[1]; /* one or more */ +} ipfw_insn_u32; + +/* + * This is used to store IP addr-mask pairs. + */ +typedef struct _ipfw_insn_ip { + ipfw_insn o; + struct in_addr addr; + struct in_addr mask; +} ipfw_insn_ip; + +/* + * This is used to forward to a given address (ip). + */ +typedef struct _ipfw_insn_sa { + ipfw_insn o; + struct sockaddr_in sa; +} ipfw_insn_sa; + +/* + * This is used for MAC addr-mask pairs. + */ +typedef struct _ipfw_insn_mac { + ipfw_insn o; + u_char addr[12]; /* dst[6] + src[6] */ + u_char mask[12]; /* dst[6] + src[6] */ +} ipfw_insn_mac; + +/* + * This is used for interface match rules (recv xx, xmit xx). + */ +typedef struct _ipfw_insn_if { + ipfw_insn o; + union { + struct in_addr ip; + int glob; + } p; + char name[IFNAMSIZ]; +} ipfw_insn_if; + +/* + * This is used for storing an altq queue id number. + */ +typedef struct _ipfw_insn_altq { + ipfw_insn o; + u_int32_t qid; +} ipfw_insn_altq; + +/* + * This is used for limit rules. + */ +typedef struct _ipfw_insn_limit { + ipfw_insn o; + u_int8_t _pad; + u_int8_t limit_mask; /* combination of DYN_* below */ +#define DYN_SRC_ADDR 0x1 +#define DYN_SRC_PORT 0x2 +#define DYN_DST_ADDR 0x4 +#define DYN_DST_PORT 0x8 + + u_int16_t conn_limit; +} ipfw_insn_limit; + +/* + * This is used for log instructions. + */ +typedef struct _ipfw_insn_log { + ipfw_insn o; + u_int32_t max_log; /* how many do we log -- 0 = all */ + u_int32_t log_left; /* how many left to log */ +} ipfw_insn_log; + +/* + * Data structures required by both ipfw(8) and ipfw(4) but not part of the + * management API are protected by IPFW_INTERNAL. + */ +#ifdef IPFW_INTERNAL +/* Server pool support (LSNAT). */ +struct cfg_spool { + LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */ + struct in_addr addr; + u_short port; +}; +#endif + +/* Redirect modes id. */ +#define REDIR_ADDR 0x01 +#define REDIR_PORT 0x02 +#define REDIR_PROTO 0x04 + +#ifdef IPFW_INTERNAL +/* Nat redirect configuration. */ +struct cfg_redir { + LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */ + u_int16_t mode; /* type of redirect mode */ + struct in_addr laddr; /* local ip address */ + struct in_addr paddr; /* public ip address */ + struct in_addr raddr; /* remote ip address */ + u_short lport; /* local port */ + u_short pport; /* public port */ + u_short rport; /* remote port */ + u_short pport_cnt; /* number of public ports */ + u_short rport_cnt; /* number of remote ports */ + int proto; /* protocol: tcp/udp */ + struct alias_link **alink; + /* num of entry in spool chain */ + u_int16_t spool_cnt; + /* chain of spool instances */ + LIST_HEAD(spool_chain, cfg_spool) spool_chain; +}; +#endif + +#define NAT_BUF_LEN 1024 + +#ifdef IPFW_INTERNAL +/* Nat configuration data struct. */ +struct cfg_nat { + /* chain of nat instances */ + LIST_ENTRY(cfg_nat) _next; + int id; /* nat id */ + struct in_addr ip; /* nat ip address */ + char if_name[IF_NAMESIZE]; /* interface name */ + int mode; /* aliasing mode */ + struct libalias *lib; /* libalias instance */ + /* number of entry in spool chain */ + int redir_cnt; + /* chain of redir instances */ + LIST_HEAD(redir_chain, cfg_redir) redir_chain; +}; +#endif + +#define SOF_NAT sizeof(struct cfg_nat) +#define SOF_REDIR sizeof(struct cfg_redir) +#define SOF_SPOOL sizeof(struct cfg_spool) + +/* Nat command. */ +typedef struct _ipfw_insn_nat { + ipfw_insn o; + struct cfg_nat *nat; +} ipfw_insn_nat; + +/* Apply ipv6 mask on ipv6 addr */ +#define APPLY_MASK(addr,mask) \ + (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \ + (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \ + (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \ + (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3]; + +/* Structure for ipv6 */ +typedef struct _ipfw_insn_ip6 { + ipfw_insn o; + struct in6_addr addr6; + struct in6_addr mask6; +} ipfw_insn_ip6; + +/* Used to support icmp6 types */ +typedef struct _ipfw_insn_icmp6 { + ipfw_insn o; + uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h + * define ICMP6_MAXTYPE + * as follows: n = ICMP6_MAXTYPE/32 + 1 + * Actually is 203 + */ +} ipfw_insn_icmp6; + +/* + * Here we have the structure representing an ipfw rule. + * + * It starts with a general area (with link fields and counters) + * followed by an array of one or more instructions, which the code + * accesses as an array of 32-bit values. + * + * Given a rule pointer r: + * + * r->cmd is the start of the first instruction. + * ACTION_PTR(r) is the start of the first action (things to do + * once a rule matched). + * + * When assembling instruction, remember the following: + * + * + if a rule has a "keep-state" (or "limit") option, then the + * first instruction (at r->cmd) MUST BE an O_PROBE_STATE + * + if a rule has a "log" option, then the first action + * (at ACTION_PTR(r)) MUST be O_LOG + * + if a rule has an "altq" option, it comes after "log" + * + if a rule has an O_TAG option, it comes after "log" and "altq" + * + * NOTE: we use a simple linked list of rules because we never need + * to delete a rule without scanning the list. We do not use + * queue(3) macros for portability and readability. + */ + +struct ip_fw { + struct ip_fw *next; /* linked list of rules */ + struct ip_fw *next_rule; /* ptr to next [skipto] rule */ + /* 'next_rule' is used to pass up 'set_disable' status */ + + u_int16_t act_ofs; /* offset of action in 32-bit units */ + u_int16_t cmd_len; /* # of 32-bit words in cmd */ + u_int16_t rulenum; /* rule number */ + u_int8_t set; /* rule set (0..31) */ +#define RESVD_SET 31 /* set for default and persistent rules */ + u_int8_t _pad; /* padding */ + + /* These fields are present in all rules. */ + u_int64_t pcnt; /* Packet counter */ + u_int64_t bcnt; /* Byte counter */ + u_int32_t timestamp; /* tv_sec of last match */ + + ipfw_insn cmd[1]; /* storage for commands */ +}; + +#define ACTION_PTR(rule) \ + (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) + +#define RULESIZE(rule) (sizeof(struct ip_fw) + \ + ((struct ip_fw *)(rule))->cmd_len * 4 - 4) + +/* + * This structure is used as a flow mask and a flow id for various + * parts of the code. + */ +struct ipfw_flow_id { + u_int32_t dst_ip; + u_int32_t src_ip; + u_int16_t dst_port; + u_int16_t src_port; + u_int8_t fib; + u_int8_t proto; + u_int8_t flags; /* protocol-specific flags */ + uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */ + struct in6_addr dst_ip6; /* could also store MAC addr! */ + struct in6_addr src_ip6; + u_int32_t flow_id6; + u_int32_t frag_id6; +}; + +#define IS_IP6_FLOW_ID(id) ((id)->addr_type == 6) + +/* + * Dynamic ipfw rule. + */ +typedef struct _ipfw_dyn_rule ipfw_dyn_rule; + +struct _ipfw_dyn_rule { + ipfw_dyn_rule *next; /* linked list of rules. */ + struct ip_fw *rule; /* pointer to rule */ + /* 'rule' is used to pass up the rule number (from the parent) */ + + ipfw_dyn_rule *parent; /* pointer to parent rule */ + u_int64_t pcnt; /* packet match counter */ + u_int64_t bcnt; /* byte match counter */ + struct ipfw_flow_id id; /* (masked) flow id */ + u_int32_t expire; /* expire time */ + u_int32_t bucket; /* which bucket in hash table */ + u_int32_t state; /* state of this rule (typically a + * combination of TCP flags) + */ + u_int32_t ack_fwd; /* most recent ACKs in forward */ + u_int32_t ack_rev; /* and reverse directions (used */ + /* to generate keepalives) */ + u_int16_t dyn_type; /* rule type */ + u_int16_t count; /* refcount */ +}; + +/* + * Definitions for IP option names. + */ +#define IP_FW_IPOPT_LSRR 0x01 +#define IP_FW_IPOPT_SSRR 0x02 +#define IP_FW_IPOPT_RR 0x04 +#define IP_FW_IPOPT_TS 0x08 + +/* + * Definitions for TCP option names. + */ +#define IP_FW_TCPOPT_MSS 0x01 +#define IP_FW_TCPOPT_WINDOW 0x02 +#define IP_FW_TCPOPT_SACK 0x04 +#define IP_FW_TCPOPT_TS 0x08 +#define IP_FW_TCPOPT_CC 0x10 + +#define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ +#define ICMP6_UNREACH_RST 0x100 /* fake ICMPv6 code (send a TCP RST) */ + +/* + * These are used for lookup tables. + */ +typedef struct _ipfw_table_entry { + in_addr_t addr; /* network address */ + u_int32_t value; /* value */ + u_int16_t tbl; /* table number */ + u_int8_t masklen; /* mask length */ +} ipfw_table_entry; + +typedef struct _ipfw_table { + u_int32_t size; /* size of entries in bytes */ + u_int32_t cnt; /* # of entries */ + u_int16_t tbl; /* table number */ + ipfw_table_entry ent[0]; /* entries */ +} ipfw_table; + +#define IP_FW_TABLEARG 65535 + +/* + * Main firewall chains definitions and global var's definitions. + */ +#ifdef _KERNEL + +/* Return values from ipfw_chk() */ +enum { + IP_FW_PASS = 0, + IP_FW_DENY, + IP_FW_DIVERT, + IP_FW_TEE, + IP_FW_DUMMYNET, + IP_FW_NETGRAPH, + IP_FW_NGTEE, + IP_FW_NAT, + IP_FW_REASS, +}; + +/* flags for divert mtag */ +#define IP_FW_DIVERT_LOOPBACK_FLAG 0x00080000 +#define IP_FW_DIVERT_OUTPUT_FLAG 0x00100000 + +/* + * Structure for collecting parameters to dummynet for ip6_output forwarding + */ +struct _ip6dn_args { + struct ip6_pktopts *opt_or; + struct route_in6 ro_or; + int flags_or; + struct ip6_moptions *im6o_or; + struct ifnet *origifp_or; + struct ifnet *ifp_or; + struct sockaddr_in6 dst_or; + u_long mtu_or; + struct route_in6 ro_pmtu_or; +}; + +/* + * Arguments for calling ipfw_chk() and dummynet_io(). We put them + * all into a structure because this way it is easier and more + * efficient to pass variables around and extend the interface. + */ +struct ip_fw_args { + struct mbuf *m; /* the mbuf chain */ + struct ifnet *oif; /* output interface */ + struct sockaddr_in *next_hop; /* forward address */ + struct ip_fw *rule; /* matching rule */ + struct ether_header *eh; /* for bridged packets */ + + struct ipfw_flow_id f_id; /* grabbed from IP header */ + u_int32_t cookie; /* a cookie depending on rule action */ + struct inpcb *inp; + + struct _ip6dn_args dummypar; /* dummynet->ip6_output */ + struct sockaddr_in hopstore; /* store here if cannot use a pointer */ +}; + +/* + * Function definitions. + */ + +/* Firewall hooks */ +struct sockopt; +struct dn_flow_set; + +int ipfw_check_in(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); +int ipfw_check_out(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); + +int ipfw_chk(struct ip_fw_args *); + +int ipfw_init(void); +void ipfw_destroy(void); + +typedef int ip_fw_ctl_t(struct sockopt *); +extern ip_fw_ctl_t *ip_fw_ctl_ptr; +extern int fw_one_pass; +extern int fw_enable; +#ifdef INET6 +extern int fw6_enable; +#endif + +/* For kernel ipfw_ether and ipfw_bridge. */ +typedef int ip_fw_chk_t(struct ip_fw_args *args); +extern ip_fw_chk_t *ip_fw_chk_ptr; +#define IPFW_LOADED (ip_fw_chk_ptr != NULL) + +#ifdef IPFW_INTERNAL + +#define IPFW_TABLES_MAX 128 +struct ip_fw_chain { + struct ip_fw *rules; /* list of rules */ + struct ip_fw *reap; /* list of rules to reap */ + LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ + struct radix_node_head *tables[IPFW_TABLES_MAX]; + struct rwlock rwmtx; +}; +#define IPFW_LOCK_INIT(_chain) \ + rw_init(&(_chain)->rwmtx, "IPFW static rules") +#define IPFW_LOCK_DESTROY(_chain) rw_destroy(&(_chain)->rwmtx) +#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) + +#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) +#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) +#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) +#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) + +#define LOOKUP_NAT(l, i, p) do { \ + LIST_FOREACH((p), &(l.nat), _next) { \ + if ((p)->id == (i)) { \ + break; \ + } \ + } \ + } while (0) + +typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); +typedef int ipfw_nat_cfg_t(struct sockopt *); +#endif + +#endif /* _KERNEL */ +#endif /* _IPFW2_H */ diff --git a/ipfw/include/netinet/tcp.h b/ipfw/include/netinet/tcp.h new file mode 100644 index 0000000..37992e2 --- /dev/null +++ b/ipfw/include/netinet/tcp.h @@ -0,0 +1,14 @@ +/* + * a subset of FreeBSD's netinet/tcp.h + */ +#ifndef _NETINET_TCP_H_ +#define _NETINET_TCP_H_ + +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 + +#endif /* _NETINET_TCP_H_ */ diff --git a/ipfw/include/sys/sockio.h b/ipfw/include/sys/sockio.h new file mode 100644 index 0000000..e69de29 diff --git a/ipfw/include/timeconv.h b/ipfw/include/timeconv.h new file mode 100644 index 0000000..f3b8d22 --- /dev/null +++ b/ipfw/include/timeconv.h @@ -0,0 +1,29 @@ +/* + * simple override for _long_to_time() + */ +#ifndef _TIMECONV_H_ +#define _TIMECONV_H_ +static __inline time_t +_long_to_time(long tlong) +{ + if (sizeof(long) == sizeof(__int32_t)) + return((time_t)(__int32_t)(tlong)); + return((time_t)tlong); +} + +#ifdef __linux__ + +/* + * some linux headers have variables called __unused, whereas the name + * is an alias for the gcc attribute on FreeBSD. + * We have to define __unused appropriately, but this cannot be + * global because it would clash with the linux headers. + * + * __unused is defined here because there is not a better place + * and this file is included by ipfw2.c where the offending linux + * headers are not included. + */ +#define __unused __attribute__ ((__unused__)) +#endif + +#endif /* _TIMECONV_H_ */ diff --git a/ipfw/ipfw2.c b/ipfw/ipfw2.c new file mode 100644 index 0000000..ed12d9c --- /dev/null +++ b/ipfw/ipfw2.c @@ -0,0 +1,3823 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/ipfw2.c 187983 2009-02-01 16:00:49Z luigi $ + */ + +#include +#include +#include +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* ctime */ +#include /* _long_to_time */ +#include +#include + +#include +#include /* only IFNAMSIZ */ +#include +#include /* only n_short, n_long */ +#include +#include +#include +#include +#include + +struct cmdline_opts co; /* global options */ + +int resvd_set_number = RESVD_SET; + +#define GET_UINT_ARG(arg, min, max, tok, s_x) do { \ + if (!ac) \ + errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \ + if (_substrcmp(*av, "tablearg") == 0) { \ + arg = IP_FW_TABLEARG; \ + break; \ + } \ + \ + { \ + long val; \ + char *end; \ + \ + val = strtol(*av, &end, 10); \ + \ + if (!isdigit(**av) || *end != '\0' || (val == 0 && errno == EINVAL)) \ + errx(EX_DATAERR, "%s: invalid argument: %s", \ + match_value(s_x, tok), *av); \ + \ + if (errno == ERANGE || val < min || val > max) \ + errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \ + match_value(s_x, tok), min, max, *av); \ + \ + if (val == IP_FW_TABLEARG) \ + errx(EX_DATAERR, "%s: illegal argument value: %s", \ + match_value(s_x, tok), *av); \ + arg = val; \ + } \ +} while (0) + +static void +PRINT_UINT_ARG(const char *str, uint32_t arg) +{ + if (str != NULL) + printf("%s",str); + if (arg == IP_FW_TABLEARG) + printf("tablearg"); + else + printf("%u", arg); +} + +static struct _s_x f_tcpflags[] = { + { "syn", TH_SYN }, + { "fin", TH_FIN }, + { "ack", TH_ACK }, + { "psh", TH_PUSH }, + { "rst", TH_RST }, + { "urg", TH_URG }, + { "tcp flag", 0 }, + { NULL, 0 } +}; + +static struct _s_x f_tcpopts[] = { + { "mss", IP_FW_TCPOPT_MSS }, + { "maxseg", IP_FW_TCPOPT_MSS }, + { "window", IP_FW_TCPOPT_WINDOW }, + { "sack", IP_FW_TCPOPT_SACK }, + { "ts", IP_FW_TCPOPT_TS }, + { "timestamp", IP_FW_TCPOPT_TS }, + { "cc", IP_FW_TCPOPT_CC }, + { "tcp option", 0 }, + { NULL, 0 } +}; + +/* + * IP options span the range 0 to 255 so we need to remap them + * (though in fact only the low 5 bits are significant). + */ +static struct _s_x f_ipopts[] = { + { "ssrr", IP_FW_IPOPT_SSRR}, + { "lsrr", IP_FW_IPOPT_LSRR}, + { "rr", IP_FW_IPOPT_RR}, + { "ts", IP_FW_IPOPT_TS}, + { "ip option", 0 }, + { NULL, 0 } +}; + +static struct _s_x f_iptos[] = { + { "lowdelay", IPTOS_LOWDELAY}, + { "throughput", IPTOS_THROUGHPUT}, + { "reliability", IPTOS_RELIABILITY}, + { "mincost", IPTOS_MINCOST}, + { "congestion", IPTOS_ECN_CE}, + { "ecntransport", IPTOS_ECN_ECT0}, + { "ip tos option", 0}, + { NULL, 0 } +}; + +static struct _s_x limit_masks[] = { + {"all", DYN_SRC_ADDR|DYN_SRC_PORT|DYN_DST_ADDR|DYN_DST_PORT}, + {"src-addr", DYN_SRC_ADDR}, + {"src-port", DYN_SRC_PORT}, + {"dst-addr", DYN_DST_ADDR}, + {"dst-port", DYN_DST_PORT}, + {NULL, 0} +}; + +/* + * we use IPPROTO_ETHERTYPE as a fake protocol id to call the print routines + * This is only used in this code. + */ +#define IPPROTO_ETHERTYPE 0x1000 +static struct _s_x ether_types[] = { + /* + * Note, we cannot use "-:&/" in the names because they are field + * separators in the type specifications. Also, we use s = NULL as + * end-delimiter, because a type of 0 can be legal. + */ + { "ip", 0x0800 }, + { "ipv4", 0x0800 }, + { "ipv6", 0x86dd }, + { "arp", 0x0806 }, + { "rarp", 0x8035 }, + { "vlan", 0x8100 }, + { "loop", 0x9000 }, + { "trail", 0x1000 }, + { "at", 0x809b }, + { "atalk", 0x809b }, + { "aarp", 0x80f3 }, + { "pppoe_disc", 0x8863 }, + { "pppoe_sess", 0x8864 }, + { "ipx_8022", 0x00E0 }, + { "ipx_8023", 0x0000 }, + { "ipx_ii", 0x8137 }, + { "ipx_snap", 0x8137 }, + { "ipx", 0x8137 }, + { "ns", 0x0600 }, + { NULL, 0 } +}; + + +static struct _s_x rule_actions[] = { + { "accept", TOK_ACCEPT }, + { "pass", TOK_ACCEPT }, + { "allow", TOK_ACCEPT }, + { "permit", TOK_ACCEPT }, + { "count", TOK_COUNT }, + { "pipe", TOK_PIPE }, + { "queue", TOK_QUEUE }, + { "divert", TOK_DIVERT }, + { "tee", TOK_TEE }, + { "netgraph", TOK_NETGRAPH }, + { "ngtee", TOK_NGTEE }, + { "fwd", TOK_FORWARD }, + { "forward", TOK_FORWARD }, + { "skipto", TOK_SKIPTO }, + { "deny", TOK_DENY }, + { "drop", TOK_DENY }, + { "reject", TOK_REJECT }, + { "reset6", TOK_RESET6 }, + { "reset", TOK_RESET }, + { "unreach6", TOK_UNREACH6 }, + { "unreach", TOK_UNREACH }, + { "check-state", TOK_CHECKSTATE }, + { "//", TOK_COMMENT }, + { "nat", TOK_NAT }, + { "reass", TOK_REASS }, + { "setfib", TOK_SETFIB }, + { NULL, 0 } /* terminator */ +}; + +static struct _s_x rule_action_params[] = { + { "altq", TOK_ALTQ }, + { "log", TOK_LOG }, + { "tag", TOK_TAG }, + { "untag", TOK_UNTAG }, + { NULL, 0 } /* terminator */ +}; + +static struct _s_x rule_options[] = { + { "tagged", TOK_TAGGED }, + { "uid", TOK_UID }, + { "gid", TOK_GID }, + { "jail", TOK_JAIL }, + { "in", TOK_IN }, + { "limit", TOK_LIMIT }, + { "keep-state", TOK_KEEPSTATE }, + { "bridged", TOK_LAYER2 }, + { "layer2", TOK_LAYER2 }, + { "out", TOK_OUT }, + { "diverted", TOK_DIVERTED }, + { "diverted-loopback", TOK_DIVERTEDLOOPBACK }, + { "diverted-output", TOK_DIVERTEDOUTPUT }, + { "xmit", TOK_XMIT }, + { "recv", TOK_RECV }, + { "via", TOK_VIA }, + { "fragment", TOK_FRAG }, + { "frag", TOK_FRAG }, + { "fib", TOK_FIB }, + { "ipoptions", TOK_IPOPTS }, + { "ipopts", TOK_IPOPTS }, + { "iplen", TOK_IPLEN }, + { "ipid", TOK_IPID }, + { "ipprecedence", TOK_IPPRECEDENCE }, + { "iptos", TOK_IPTOS }, + { "ipttl", TOK_IPTTL }, + { "ipversion", TOK_IPVER }, + { "ipver", TOK_IPVER }, + { "estab", TOK_ESTAB }, + { "established", TOK_ESTAB }, + { "setup", TOK_SETUP }, + { "tcpdatalen", TOK_TCPDATALEN }, + { "tcpflags", TOK_TCPFLAGS }, + { "tcpflgs", TOK_TCPFLAGS }, + { "tcpoptions", TOK_TCPOPTS }, + { "tcpopts", TOK_TCPOPTS }, + { "tcpseq", TOK_TCPSEQ }, + { "tcpack", TOK_TCPACK }, + { "tcpwin", TOK_TCPWIN }, + { "icmptype", TOK_ICMPTYPES }, + { "icmptypes", TOK_ICMPTYPES }, + { "dst-ip", TOK_DSTIP }, + { "src-ip", TOK_SRCIP }, + { "dst-port", TOK_DSTPORT }, + { "src-port", TOK_SRCPORT }, + { "proto", TOK_PROTO }, + { "MAC", TOK_MAC }, + { "mac", TOK_MAC }, + { "mac-type", TOK_MACTYPE }, + { "verrevpath", TOK_VERREVPATH }, + { "versrcreach", TOK_VERSRCREACH }, + { "antispoof", TOK_ANTISPOOF }, + { "ipsec", TOK_IPSEC }, + { "icmp6type", TOK_ICMP6TYPES }, + { "icmp6types", TOK_ICMP6TYPES }, + { "ext6hdr", TOK_EXT6HDR}, + { "flow-id", TOK_FLOWID}, + { "ipv6", TOK_IPV6}, + { "ip6", TOK_IPV6}, + { "ipv4", TOK_IPV4}, + { "ip4", TOK_IPV4}, + { "dst-ipv6", TOK_DSTIP6}, + { "dst-ip6", TOK_DSTIP6}, + { "src-ipv6", TOK_SRCIP6}, + { "src-ip6", TOK_SRCIP6}, + { "//", TOK_COMMENT }, + + { "not", TOK_NOT }, /* pseudo option */ + { "!", /* escape ? */ TOK_NOT }, /* pseudo option */ + { "or", TOK_OR }, /* pseudo option */ + { "|", /* escape */ TOK_OR }, /* pseudo option */ + { "{", TOK_STARTBRACE }, /* pseudo option */ + { "(", TOK_STARTBRACE }, /* pseudo option */ + { "}", TOK_ENDBRACE }, /* pseudo option */ + { ")", TOK_ENDBRACE }, /* pseudo option */ + { NULL, 0 } /* terminator */ +}; + +/* + * The following is used to generate a printable argument for + * 64-bit numbers, irrespective of platform alignment and bit size. + * Because all the printf in this program use %llu as a format, + * we just return an unsigned long long, which is larger than + * we need in certain cases, but saves the hassle of using + * PRIu64 as a format specifier. + * We don't care about inlining, this is not performance critical code. + */ +unsigned long long +align_uint64(const uint64_t *pll) +{ + uint64_t ret; + + bcopy (pll, &ret, sizeof(ret)); + return ret; +} + +void * +safe_calloc(size_t number, size_t size) +{ + void *ret = calloc(number, size); + + if (ret == NULL) + err(EX_OSERR, "calloc"); + return ret; +} + +void * +safe_realloc(void *ptr, size_t size) +{ + void *ret = realloc(ptr, size); + + if (ret == NULL) + err(EX_OSERR, "realloc"); + return ret; +} + +/* + * conditionally runs the command. + */ +int +do_cmd(int optname, void *optval, uintptr_t optlen) +{ + static int s = -1; /* the socket */ + int i; + + if (co.test_only) + return 0; + + if (s == -1) + s = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + if (s < 0) + err(EX_UNAVAILABLE, "socket"); + + if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET || + optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST || + optname == IP_FW_TABLE_GETSIZE || + optname == IP_FW_NAT_GET_CONFIG || + optname == IP_FW_NAT_GET_LOG) + i = getsockopt(s, IPPROTO_IP, optname, optval, + (socklen_t *)optlen); + else + i = setsockopt(s, IPPROTO_IP, optname, optval, optlen); + return i; +} + +/** + * match_token takes a table and a string, returns the value associated + * with the string (-1 in case of failure). + */ +int +match_token(struct _s_x *table, char *string) +{ + struct _s_x *pt; + uint i = strlen(string); + + for (pt = table ; i && pt->s != NULL ; pt++) + if (strlen(pt->s) == i && !bcmp(string, pt->s, i)) + return pt->x; + return -1; +} + +/** + * match_value takes a table and a value, returns the string associated + * with the value (NULL in case of failure). + */ +char const * +match_value(struct _s_x *p, int value) +{ + for (; p->s != NULL; p++) + if (p->x == value) + return p->s; + return NULL; +} + +/* + * _substrcmp takes two strings and returns 1 if they do not match, + * and 0 if they match exactly or the first string is a sub-string + * of the second. A warning is printed to stderr in the case that the + * first string is a sub-string of the second. + * + * This function will be removed in the future through the usual + * deprecation process. + */ +int +_substrcmp(const char *str1, const char* str2) +{ + + if (strncmp(str1, str2, strlen(str1)) != 0) + return 1; + + if (strlen(str1) != strlen(str2)) + warnx("DEPRECATED: '%s' matched '%s' as a sub-string", + str1, str2); + return 0; +} + +/* + * _substrcmp2 takes three strings and returns 1 if the first two do not match, + * and 0 if they match exactly or the second string is a sub-string + * of the first. A warning is printed to stderr in the case that the + * first string does not match the third. + * + * This function exists to warn about the bizzare construction + * strncmp(str, "by", 2) which is used to allow people to use a shotcut + * for "bytes". The problem is that in addition to accepting "by", + * "byt", "byte", and "bytes", it also excepts "by_rabid_dogs" and any + * other string beginning with "by". + * + * This function will be removed in the future through the usual + * deprecation process. + */ +int +_substrcmp2(const char *str1, const char* str2, const char* str3) +{ + + if (strncmp(str1, str2, strlen(str2)) != 0) + return 1; + + if (strcmp(str1, str3) != 0) + warnx("DEPRECATED: '%s' matched '%s'", + str1, str3); + return 0; +} + +/* + * prints one port, symbolic or numeric + */ +static void +print_port(int proto, uint16_t port) +{ + + if (proto == IPPROTO_ETHERTYPE) { + char const *s; + + if (co.do_resolv && (s = match_value(ether_types, port)) ) + printf("%s", s); + else + printf("0x%04x", port); + } else { + struct servent *se = NULL; + if (co.do_resolv) { + struct protoent *pe = getprotobynumber(proto); + + se = getservbyport(htons(port), pe ? pe->p_name : NULL); + } + if (se) + printf("%s", se->s_name); + else + printf("%d", port); + } +} + +static struct _s_x _port_name[] = { + {"dst-port", O_IP_DSTPORT}, + {"src-port", O_IP_SRCPORT}, + {"ipid", O_IPID}, + {"iplen", O_IPLEN}, + {"ipttl", O_IPTTL}, + {"mac-type", O_MAC_TYPE}, + {"tcpdatalen", O_TCPDATALEN}, + {"tagged", O_TAGGED}, + {NULL, 0} +}; + +/* + * Print the values in a list 16-bit items of the types above. + * XXX todo: add support for mask. + */ +static void +print_newports(ipfw_insn_u16 *cmd, int proto, int opcode) +{ + uint16_t *p = cmd->ports; + int i; + char const *sep; + + if (opcode != 0) { + sep = match_value(_port_name, opcode); + if (sep == NULL) + sep = "???"; + printf (" %s", sep); + } + sep = " "; + for (i = F_LEN((ipfw_insn *)cmd) - 1; i > 0; i--, p += 2) { + printf("%s", sep); + print_port(proto, p[0]); + if (p[0] != p[1]) { + printf("-"); + print_port(proto, p[1]); + } + sep = ","; + } +} + +/* + * Like strtol, but also translates service names into port numbers + * for some protocols. + * In particular: + * proto == -1 disables the protocol check; + * proto == IPPROTO_ETHERTYPE looks up an internal table + * proto == matches the values there. + * Returns *end == s in case the parameter is not found. + */ +static int +strtoport(char *s, char **end, int base, int proto) +{ + char *p, *buf; + char *s1; + int i; + + *end = s; /* default - not found */ + if (*s == '\0') + return 0; /* not found */ + + if (isdigit(*s)) + return strtol(s, end, base); + + /* + * find separator. '\\' escapes the next char. + */ + for (s1 = s; *s1 && (isalnum(*s1) || *s1 == '\\') ; s1++) + if (*s1 == '\\' && s1[1] != '\0') + s1++; + + buf = safe_calloc(s1 - s + 1, 1); + + /* + * copy into a buffer skipping backslashes + */ + for (p = s, i = 0; p != s1 ; p++) + if (*p != '\\') + buf[i++] = *p; + buf[i++] = '\0'; + + if (proto == IPPROTO_ETHERTYPE) { + i = match_token(ether_types, buf); + free(buf); + if (i != -1) { /* found */ + *end = s1; + return i; + } + } else { + struct protoent *pe = NULL; + struct servent *se; + + if (proto != 0) + pe = getprotobynumber(proto); + setservent(1); + se = getservbyname(buf, pe ? pe->p_name : NULL); + free(buf); + if (se != NULL) { + *end = s1; + return ntohs(se->s_port); + } + } + return 0; /* not found */ +} + +/* + * Fill the body of the command with the list of port ranges. + */ +static int +fill_newports(ipfw_insn_u16 *cmd, char *av, int proto) +{ + uint16_t a, b, *p = cmd->ports; + int i = 0; + char *s = av; + + while (*s) { + a = strtoport(av, &s, 0, proto); + if (s == av) /* empty or invalid argument */ + return (0); + + switch (*s) { + case '-': /* a range */ + av = s + 1; + b = strtoport(av, &s, 0, proto); + /* Reject expressions like '1-abc' or '1-2-3'. */ + if (s == av || (*s != ',' && *s != '\0')) + return (0); + p[0] = a; + p[1] = b; + break; + case ',': /* comma separated list */ + case '\0': + p[0] = p[1] = a; + break; + default: + warnx("port list: invalid separator <%c> in <%s>", + *s, av); + return (0); + } + + i++; + p += 2; + av = s + 1; + } + if (i > 0) { + if (i + 1 > F_LEN_MASK) + errx(EX_DATAERR, "too many ports/ranges\n"); + cmd->o.len |= i + 1; /* leave F_NOT and F_OR untouched */ + } + return (i); +} + +static struct _s_x icmpcodes[] = { + { "net", ICMP_UNREACH_NET }, + { "host", ICMP_UNREACH_HOST }, + { "protocol", ICMP_UNREACH_PROTOCOL }, + { "port", ICMP_UNREACH_PORT }, + { "needfrag", ICMP_UNREACH_NEEDFRAG }, + { "srcfail", ICMP_UNREACH_SRCFAIL }, + { "net-unknown", ICMP_UNREACH_NET_UNKNOWN }, + { "host-unknown", ICMP_UNREACH_HOST_UNKNOWN }, + { "isolated", ICMP_UNREACH_ISOLATED }, + { "net-prohib", ICMP_UNREACH_NET_PROHIB }, + { "host-prohib", ICMP_UNREACH_HOST_PROHIB }, + { "tosnet", ICMP_UNREACH_TOSNET }, + { "toshost", ICMP_UNREACH_TOSHOST }, + { "filter-prohib", ICMP_UNREACH_FILTER_PROHIB }, + { "host-precedence", ICMP_UNREACH_HOST_PRECEDENCE }, + { "precedence-cutoff", ICMP_UNREACH_PRECEDENCE_CUTOFF }, + { NULL, 0 } +}; + +static void +fill_reject_code(u_short *codep, char *str) +{ + int val; + char *s; + + val = strtoul(str, &s, 0); + if (s == str || *s != '\0' || val >= 0x100) + val = match_token(icmpcodes, str); + if (val < 0) + errx(EX_DATAERR, "unknown ICMP unreachable code ``%s''", str); + *codep = val; + return; +} + +static void +print_reject_code(uint16_t code) +{ + char const *s = match_value(icmpcodes, code); + + if (s != NULL) + printf("unreach %s", s); + else + printf("unreach %u", code); +} + +/* + * Returns the number of bits set (from left) in a contiguous bitmask, + * or -1 if the mask is not contiguous. + * XXX this needs a proper fix. + * This effectively works on masks in big-endian (network) format. + * when compiled on little endian architectures. + * + * First bit is bit 7 of the first byte -- note, for MAC addresses, + * the first bit on the wire is bit 0 of the first byte. + * len is the max length in bits. + */ +int +contigmask(uint8_t *p, int len) +{ + int i, n; + + for (i=0; iarg1 & 0xff; + uint8_t clear = (cmd->arg1 >> 8) & 0xff; + + if (list == f_tcpflags && set == TH_SYN && clear == TH_ACK) { + printf(" setup"); + return; + } + + printf(" %s ", name); + for (i=0; list[i].x != 0; i++) { + if (set & list[i].x) { + set &= ~list[i].x; + printf("%s%s", comma, list[i].s); + comma = ","; + } + if (clear & list[i].x) { + clear &= ~list[i].x; + printf("%s!%s", comma, list[i].s); + comma = ","; + } + } +} + +/* + * Print the ip address contained in a command. + */ +static void +print_ip(ipfw_insn_ip *cmd, char const *s) +{ + struct hostent *he = NULL; + int len = F_LEN((ipfw_insn *)cmd); + uint32_t *a = ((ipfw_insn_u32 *)cmd)->d; + + printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); + + if (cmd->o.opcode == O_IP_SRC_ME || cmd->o.opcode == O_IP_DST_ME) { + printf("me"); + return; + } + if (cmd->o.opcode == O_IP_SRC_LOOKUP || + cmd->o.opcode == O_IP_DST_LOOKUP) { + printf("table(%u", ((ipfw_insn *)cmd)->arg1); + if (len == F_INSN_SIZE(ipfw_insn_u32)) + printf(",%u", *a); + printf(")"); + return; + } + if (cmd->o.opcode == O_IP_SRC_SET || cmd->o.opcode == O_IP_DST_SET) { + uint32_t x, *map = (uint32_t *)&(cmd->mask); + int i, j; + char comma = '{'; + + x = cmd->o.arg1 - 1; + x = htonl( ~x ); + cmd->addr.s_addr = htonl(cmd->addr.s_addr); + printf("%s/%d", inet_ntoa(cmd->addr), + contigmask((uint8_t *)&x, 32)); + x = cmd->addr.s_addr = htonl(cmd->addr.s_addr); + x &= 0xff; /* base */ + /* + * Print bits and ranges. + * Locate first bit set (i), then locate first bit unset (j). + * If we have 3+ consecutive bits set, then print them as a + * range, otherwise only print the initial bit and rescan. + */ + for (i=0; i < cmd->o.arg1; i++) + if (map[i/32] & (1<<(i & 31))) { + for (j=i+1; j < cmd->o.arg1; j++) + if (!(map[ j/32] & (1<<(j & 31)))) + break; + printf("%c%d", comma, i+x); + if (j>i+2) { /* range has at least 3 elements */ + printf("-%d", j-1+x); + i = j-1; + } + comma = ','; + } + printf("}"); + return; + } + /* + * len == 2 indicates a single IP, whereas lists of 1 or more + * addr/mask pairs have len = (2n+1). We convert len to n so we + * use that to count the number of entries. + */ + for (len = len / 2; len > 0; len--, a += 2) { + int mb = /* mask length */ + (cmd->o.opcode == O_IP_SRC || cmd->o.opcode == O_IP_DST) ? + 32 : contigmask((uint8_t *)&(a[1]), 32); + if (mb == 32 && co.do_resolv) + he = gethostbyaddr((char *)&(a[0]), sizeof(u_long), AF_INET); + if (he != NULL) /* resolved to name */ + printf("%s", he->h_name); + else if (mb == 0) /* any */ + printf("any"); + else { /* numeric IP followed by some kind of mask */ + printf("%s", inet_ntoa( *((struct in_addr *)&a[0]) ) ); + if (mb < 0) + printf(":%s", inet_ntoa( *((struct in_addr *)&a[1]) ) ); + else if (mb < 32) + printf("/%d", mb); + } + if (len > 1) + printf(","); + } +} + +/* + * prints a MAC address/mask pair + */ +static void +print_mac(uint8_t *addr, uint8_t *mask) +{ + int l = contigmask(mask, 48); + + if (l == 0) + printf(" any"); + else { + printf(" %02x:%02x:%02x:%02x:%02x:%02x", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + if (l == -1) + printf("&%02x:%02x:%02x:%02x:%02x:%02x", + mask[0], mask[1], mask[2], + mask[3], mask[4], mask[5]); + else if (l < 48) + printf("/%d", l); + } +} + +static void +fill_icmptypes(ipfw_insn_u32 *cmd, char *av) +{ + uint8_t type; + + cmd->d[0] = 0; + while (*av) { + if (*av == ',') + av++; + + type = strtoul(av, &av, 0); + + if (*av != ',' && *av != '\0') + errx(EX_DATAERR, "invalid ICMP type"); + + if (type > 31) + errx(EX_DATAERR, "ICMP type out of range"); + + cmd->d[0] |= 1 << type; + } + cmd->o.opcode = O_ICMPTYPE; + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); +} + +static void +print_icmptypes(ipfw_insn_u32 *cmd) +{ + int i; + char sep= ' '; + + printf(" icmptypes"); + for (i = 0; i < 32; i++) { + if ( (cmd->d[0] & (1 << (i))) == 0) + continue; + printf("%c%d", sep, i); + sep = ','; + } +} + +/* + * show_ipfw() prints the body of an ipfw rule. + * Because the standard rule has at least proto src_ip dst_ip, we use + * a helper function to produce these entries if not provided explicitly. + * The first argument is the list of fields we have, the second is + * the list of fields we want to be printed. + * + * Special cases if we have provided a MAC header: + * + if the rule does not contain IP addresses/ports, do not print them; + * + if the rule does not contain an IP proto, print "all" instead of "ip"; + * + * Once we have 'have_options', IP header fields are printed as options. + */ +#define HAVE_PROTO 0x0001 +#define HAVE_SRCIP 0x0002 +#define HAVE_DSTIP 0x0004 +#define HAVE_PROTO4 0x0008 +#define HAVE_PROTO6 0x0010 +#define HAVE_OPTIONS 0x8000 + +#define HAVE_IP (HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP) +static void +show_prerequisites(int *flags, int want, int cmd __unused) +{ + if (co.comment_only) + return; + if ( (*flags & HAVE_IP) == HAVE_IP) + *flags |= HAVE_OPTIONS; + + if ( !(*flags & HAVE_OPTIONS)) { + if ( !(*flags & HAVE_PROTO) && (want & HAVE_PROTO)) { + if ( (*flags & HAVE_PROTO4)) + printf(" ip4"); + else if ( (*flags & HAVE_PROTO6)) + printf(" ip6"); + else + printf(" ip"); + } + if ( !(*flags & HAVE_SRCIP) && (want & HAVE_SRCIP)) + printf(" from any"); + if ( !(*flags & HAVE_DSTIP) && (want & HAVE_DSTIP)) + printf(" to any"); + } + *flags |= want; +} + +static void +show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) +{ + static int twidth = 0; + int l; + ipfw_insn *cmd, *tagptr = NULL; + const char *comment = NULL; /* ptr to comment if we have one */ + int proto = 0; /* default */ + int flags = 0; /* prerequisites */ + ipfw_insn_log *logptr = NULL; /* set if we find an O_LOG */ + ipfw_insn_altq *altqptr = NULL; /* set if we find an O_ALTQ */ + int or_block = 0; /* we are in an or block */ + uint32_t set_disable; + + bcopy(&rule->next_rule, &set_disable, sizeof(set_disable)); + + if (set_disable & (1 << rule->set)) { /* disabled */ + if (!co.show_sets) + return; + else + printf("# DISABLED "); + } + printf("%05u ", rule->rulenum); + + if (pcwidth>0 || bcwidth>0) + printf("%*llu %*llu ", pcwidth, align_uint64(&rule->pcnt), + bcwidth, align_uint64(&rule->bcnt)); + + if (co.do_time == 2) + printf("%10u ", rule->timestamp); + else if (co.do_time == 1) { + char timestr[30]; + time_t t = (time_t)0; + + if (twidth == 0) { + strcpy(timestr, ctime(&t)); + *strchr(timestr, '\n') = '\0'; + twidth = strlen(timestr); + } + if (rule->timestamp) { + t = _long_to_time(rule->timestamp); + + strcpy(timestr, ctime(&t)); + *strchr(timestr, '\n') = '\0'; + printf("%s ", timestr); + } else { + printf("%*s", twidth, " "); + } + } + + if (co.show_sets) + printf("set %d ", rule->set); + + /* + * print the optional "match probability" + */ + if (rule->cmd_len > 0) { + cmd = rule->cmd ; + if (cmd->opcode == O_PROB) { + ipfw_insn_u32 *p = (ipfw_insn_u32 *)cmd; + double d = 1.0 * p->d[0]; + + d = (d / 0x7fffffff); + printf("prob %f ", d); + } + } + + /* + * first print actions + */ + for (l = rule->cmd_len - rule->act_ofs, cmd = ACTION_PTR(rule); + l > 0 ; l -= F_LEN(cmd), cmd += F_LEN(cmd)) { + switch(cmd->opcode) { + case O_CHECK_STATE: + printf("check-state"); + flags = HAVE_IP; /* avoid printing anything else */ + break; + + case O_ACCEPT: + printf("allow"); + break; + + case O_COUNT: + printf("count"); + break; + + case O_DENY: + printf("deny"); + break; + + case O_REJECT: + if (cmd->arg1 == ICMP_REJECT_RST) + printf("reset"); + else if (cmd->arg1 == ICMP_UNREACH_HOST) + printf("reject"); + else + print_reject_code(cmd->arg1); + break; + + case O_UNREACH6: + if (cmd->arg1 == ICMP6_UNREACH_RST) + printf("reset6"); + else + print_unreach6_code(cmd->arg1); + break; + + case O_SKIPTO: + PRINT_UINT_ARG("skipto ", cmd->arg1); + break; + + case O_PIPE: + PRINT_UINT_ARG("pipe ", cmd->arg1); + break; + + case O_QUEUE: + PRINT_UINT_ARG("queue ", cmd->arg1); + break; + + case O_DIVERT: + PRINT_UINT_ARG("divert ", cmd->arg1); + break; + + case O_TEE: + PRINT_UINT_ARG("tee ", cmd->arg1); + break; + + case O_NETGRAPH: + PRINT_UINT_ARG("netgraph ", cmd->arg1); + break; + + case O_NGTEE: + PRINT_UINT_ARG("ngtee ", cmd->arg1); + break; + + case O_FORWARD_IP: + { + ipfw_insn_sa *s = (ipfw_insn_sa *)cmd; + + if (s->sa.sin_addr.s_addr == INADDR_ANY) { + printf("fwd tablearg"); + } else { + printf("fwd %s", inet_ntoa(s->sa.sin_addr)); + } + if (s->sa.sin_port) + printf(",%d", s->sa.sin_port); + } + break; + + case O_LOG: /* O_LOG is printed last */ + logptr = (ipfw_insn_log *)cmd; + break; + + case O_ALTQ: /* O_ALTQ is printed after O_LOG */ + altqptr = (ipfw_insn_altq *)cmd; + break; + + case O_TAG: + tagptr = cmd; + break; + + case O_NAT: + PRINT_UINT_ARG("nat ", cmd->arg1); + break; + + case O_SETFIB: + PRINT_UINT_ARG("setfib ", cmd->arg1); + break; + + case O_REASS: + printf("reass"); + break; + + default: + printf("** unrecognized action %d len %d ", + cmd->opcode, cmd->len); + } + } + if (logptr) { + if (logptr->max_log > 0) + printf(" log logamount %d", logptr->max_log); + else + printf(" log"); + } + if (altqptr) { + print_altq_cmd(altqptr); + } + if (tagptr) { + if (tagptr->len & F_NOT) + PRINT_UINT_ARG(" untag ", tagptr->arg1); + else + PRINT_UINT_ARG(" tag ", tagptr->arg1); + } + + /* + * then print the body. + */ + for (l = rule->act_ofs, cmd = rule->cmd ; + l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { + if ((cmd->len & F_OR) || (cmd->len & F_NOT)) + continue; + if (cmd->opcode == O_IP4) { + flags |= HAVE_PROTO4; + break; + } else if (cmd->opcode == O_IP6) { + flags |= HAVE_PROTO6; + break; + } + } + if (rule->_pad & 1) { /* empty rules before options */ + if (!co.do_compact) { + show_prerequisites(&flags, HAVE_PROTO, 0); + printf(" from any to any"); + } + flags |= HAVE_IP | HAVE_OPTIONS; + } + + if (co.comment_only) + comment = "..."; + + for (l = rule->act_ofs, cmd = rule->cmd ; + l > 0 ; l -= F_LEN(cmd) , cmd += F_LEN(cmd)) { + /* useful alias */ + ipfw_insn_u32 *cmd32 = (ipfw_insn_u32 *)cmd; + + if (co.comment_only) { + if (cmd->opcode != O_NOP) + continue; + printf(" // %s\n", (char *)(cmd + 1)); + return; + } + + show_prerequisites(&flags, 0, cmd->opcode); + + switch(cmd->opcode) { + case O_PROB: + break; /* done already */ + + case O_PROBE_STATE: + break; /* no need to print anything here */ + + case O_IP_SRC: + case O_IP_SRC_LOOKUP: + case O_IP_SRC_MASK: + case O_IP_SRC_ME: + case O_IP_SRC_SET: + show_prerequisites(&flags, HAVE_PROTO, 0); + if (!(flags & HAVE_SRCIP)) + printf(" from"); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + print_ip((ipfw_insn_ip *)cmd, + (flags & HAVE_OPTIONS) ? " src-ip" : ""); + flags |= HAVE_SRCIP; + break; + + case O_IP_DST: + case O_IP_DST_LOOKUP: + case O_IP_DST_MASK: + case O_IP_DST_ME: + case O_IP_DST_SET: + show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); + if (!(flags & HAVE_DSTIP)) + printf(" to"); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + print_ip((ipfw_insn_ip *)cmd, + (flags & HAVE_OPTIONS) ? " dst-ip" : ""); + flags |= HAVE_DSTIP; + break; + + case O_IP6_SRC: + case O_IP6_SRC_MASK: + case O_IP6_SRC_ME: + show_prerequisites(&flags, HAVE_PROTO, 0); + if (!(flags & HAVE_SRCIP)) + printf(" from"); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + print_ip6((ipfw_insn_ip6 *)cmd, + (flags & HAVE_OPTIONS) ? " src-ip6" : ""); + flags |= HAVE_SRCIP | HAVE_PROTO; + break; + + case O_IP6_DST: + case O_IP6_DST_MASK: + case O_IP6_DST_ME: + show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); + if (!(flags & HAVE_DSTIP)) + printf(" to"); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + print_ip6((ipfw_insn_ip6 *)cmd, + (flags & HAVE_OPTIONS) ? " dst-ip6" : ""); + flags |= HAVE_DSTIP; + break; + + case O_FLOW6ID: + print_flow6id( (ipfw_insn_u32 *) cmd ); + flags |= HAVE_OPTIONS; + break; + + case O_IP_DSTPORT: + show_prerequisites(&flags, HAVE_IP, 0); + case O_IP_SRCPORT: + show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + if (cmd->len & F_NOT) + printf(" not"); + print_newports((ipfw_insn_u16 *)cmd, proto, + (flags & HAVE_OPTIONS) ? cmd->opcode : 0); + break; + + case O_PROTO: { + struct protoent *pe = NULL; + + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + if (cmd->len & F_NOT) + printf(" not"); + proto = cmd->arg1; + pe = getprotobynumber(cmd->arg1); + if ((flags & (HAVE_PROTO4 | HAVE_PROTO6)) && + !(flags & HAVE_PROTO)) + show_prerequisites(&flags, + HAVE_IP | HAVE_OPTIONS, 0); + if (flags & HAVE_OPTIONS) + printf(" proto"); + if (pe) + printf(" %s", pe->p_name); + else + printf(" %u", cmd->arg1); + } + flags |= HAVE_PROTO; + break; + + default: /*options ... */ + if (!(cmd->len & (F_OR|F_NOT))) + if (((cmd->opcode == O_IP6) && + (flags & HAVE_PROTO6)) || + ((cmd->opcode == O_IP4) && + (flags & HAVE_PROTO4))) + break; + show_prerequisites(&flags, HAVE_IP | HAVE_OPTIONS, 0); + if ((cmd->len & F_OR) && !or_block) + printf(" {"); + if (cmd->len & F_NOT && cmd->opcode != O_IN) + printf(" not"); + switch(cmd->opcode) { + case O_MACADDR2: { + ipfw_insn_mac *m = (ipfw_insn_mac *)cmd; + + printf(" MAC"); + print_mac(m->addr, m->mask); + print_mac(m->addr + 6, m->mask + 6); + } + break; + + case O_MAC_TYPE: + print_newports((ipfw_insn_u16 *)cmd, + IPPROTO_ETHERTYPE, cmd->opcode); + break; + + + case O_FRAG: + printf(" frag"); + break; + + case O_FIB: + printf(" fib %u", cmd->arg1 ); + break; + + case O_IN: + printf(cmd->len & F_NOT ? " out" : " in"); + break; + + case O_DIVERTED: + switch (cmd->arg1) { + case 3: + printf(" diverted"); + break; + case 1: + printf(" diverted-loopback"); + break; + case 2: + printf(" diverted-output"); + break; + default: + printf(" diverted-?<%u>", cmd->arg1); + break; + } + break; + + case O_LAYER2: + printf(" layer2"); + break; + case O_XMIT: + case O_RECV: + case O_VIA: + { + char const *s; + ipfw_insn_if *cmdif = (ipfw_insn_if *)cmd; + + if (cmd->opcode == O_XMIT) + s = "xmit"; + else if (cmd->opcode == O_RECV) + s = "recv"; + else /* if (cmd->opcode == O_VIA) */ + s = "via"; + if (cmdif->name[0] == '\0') + printf(" %s %s", s, + inet_ntoa(cmdif->p.ip)); + else + printf(" %s %s", s, cmdif->name); + + break; + } + case O_IPID: + if (F_LEN(cmd) == 1) + printf(" ipid %u", cmd->arg1 ); + else + print_newports((ipfw_insn_u16 *)cmd, 0, + O_IPID); + break; + + case O_IPTTL: + if (F_LEN(cmd) == 1) + printf(" ipttl %u", cmd->arg1 ); + else + print_newports((ipfw_insn_u16 *)cmd, 0, + O_IPTTL); + break; + + case O_IPVER: + printf(" ipver %u", cmd->arg1 ); + break; + + case O_IPPRECEDENCE: + printf(" ipprecedence %u", (cmd->arg1) >> 5 ); + break; + + case O_IPLEN: + if (F_LEN(cmd) == 1) + printf(" iplen %u", cmd->arg1 ); + else + print_newports((ipfw_insn_u16 *)cmd, 0, + O_IPLEN); + break; + + case O_IPOPT: + print_flags("ipoptions", cmd, f_ipopts); + break; + + case O_IPTOS: + print_flags("iptos", cmd, f_iptos); + break; + + case O_ICMPTYPE: + print_icmptypes((ipfw_insn_u32 *)cmd); + break; + + case O_ESTAB: + printf(" established"); + break; + + case O_TCPDATALEN: + if (F_LEN(cmd) == 1) + printf(" tcpdatalen %u", cmd->arg1 ); + else + print_newports((ipfw_insn_u16 *)cmd, 0, + O_TCPDATALEN); + break; + + case O_TCPFLAGS: + print_flags("tcpflags", cmd, f_tcpflags); + break; + + case O_TCPOPTS: + print_flags("tcpoptions", cmd, f_tcpopts); + break; + + case O_TCPWIN: + printf(" tcpwin %d", ntohs(cmd->arg1)); + break; + + case O_TCPACK: + printf(" tcpack %d", ntohl(cmd32->d[0])); + break; + + case O_TCPSEQ: + printf(" tcpseq %d", ntohl(cmd32->d[0])); + break; + + case O_UID: + { + struct passwd *pwd = getpwuid(cmd32->d[0]); + + if (pwd) + printf(" uid %s", pwd->pw_name); + else + printf(" uid %u", cmd32->d[0]); + } + break; + + case O_GID: + { + struct group *grp = getgrgid(cmd32->d[0]); + + if (grp) + printf(" gid %s", grp->gr_name); + else + printf(" gid %u", cmd32->d[0]); + } + break; + + case O_JAIL: + printf(" jail %d", cmd32->d[0]); + break; + + case O_VERREVPATH: + printf(" verrevpath"); + break; + + case O_VERSRCREACH: + printf(" versrcreach"); + break; + + case O_ANTISPOOF: + printf(" antispoof"); + break; + + case O_IPSEC: + printf(" ipsec"); + break; + + case O_NOP: + comment = (char *)(cmd + 1); + break; + + case O_KEEP_STATE: + printf(" keep-state"); + break; + + case O_LIMIT: { + struct _s_x *p = limit_masks; + ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; + uint8_t x = c->limit_mask; + char const *comma = " "; + + printf(" limit"); + for (; p->x != 0 ; p++) + if ((x & p->x) == p->x) { + x &= ~p->x; + printf("%s%s", comma, p->s); + comma = ","; + } + PRINT_UINT_ARG(" ", c->conn_limit); + break; + } + + case O_IP6: + printf(" ip6"); + break; + + case O_IP4: + printf(" ip4"); + break; + + case O_ICMP6TYPE: + print_icmp6types((ipfw_insn_u32 *)cmd); + break; + + case O_EXT_HDR: + print_ext6hdr( (ipfw_insn *) cmd ); + break; + + case O_TAGGED: + if (F_LEN(cmd) == 1) + PRINT_UINT_ARG(" tagged ", cmd->arg1); + else + print_newports((ipfw_insn_u16 *)cmd, 0, + O_TAGGED); + break; + + default: + printf(" [opcode %d len %d]", + cmd->opcode, cmd->len); + } + } + if (cmd->len & F_OR) { + printf(" or"); + or_block = 1; + } else if (or_block) { + printf(" }"); + or_block = 0; + } + } + show_prerequisites(&flags, HAVE_IP, 0); + if (comment) + printf(" // %s", comment); + printf("\n"); +} + +static void +show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth) +{ + struct protoent *pe; + struct in_addr a; + uint16_t rulenum; + char buf[INET6_ADDRSTRLEN]; + + if (!co.do_expired) { + if (!d->expire && !(d->dyn_type == O_LIMIT_PARENT)) + return; + } + bcopy(&d->rule, &rulenum, sizeof(rulenum)); + printf("%05d", rulenum); + if (pcwidth>0 || bcwidth>0) + printf(" %*llu %*llu (%ds)", pcwidth, + align_uint64(&d->pcnt), bcwidth, + align_uint64(&d->bcnt), d->expire); + switch (d->dyn_type) { + case O_LIMIT_PARENT: + printf(" PARENT %d", d->count); + break; + case O_LIMIT: + printf(" LIMIT"); + break; + case O_KEEP_STATE: /* bidir, no mask */ + printf(" STATE"); + break; + } + + if ((pe = getprotobynumber(d->id.proto)) != NULL) + printf(" %s", pe->p_name); + else + printf(" proto %u", d->id.proto); + + if (d->id.addr_type == 4) { + a.s_addr = htonl(d->id.src_ip); + printf(" %s %d", inet_ntoa(a), d->id.src_port); + + a.s_addr = htonl(d->id.dst_ip); + printf(" <-> %s %d", inet_ntoa(a), d->id.dst_port); + } else if (d->id.addr_type == 6) { + printf(" %s %d", inet_ntop(AF_INET6, &d->id.src_ip6, buf, + sizeof(buf)), d->id.src_port); + printf(" <-> %s %d", inet_ntop(AF_INET6, &d->id.dst_ip6, buf, + sizeof(buf)), d->id.dst_port); + } else + printf(" UNKNOWN <-> UNKNOWN\n"); + + printf("\n"); +} + +/* + * This one handles all set-related commands + * ipfw set { show | enable | disable } + * ipfw set swap X Y + * ipfw set move X to Y + * ipfw set move rule X to Y + */ +void +ipfw_sets_handler(int ac, char *av[]) +{ + uint32_t set_disable, masks[2]; + int i, nbytes; + uint16_t rulenum; + uint8_t cmd, new_set; + + ac--; + av++; + + if (!ac) + errx(EX_USAGE, "set needs command"); + if (_substrcmp(*av, "show") == 0) { + void *data; + char const *msg; + + nbytes = sizeof(struct ip_fw); + data = safe_calloc(1, nbytes); + if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0) + err(EX_OSERR, "getsockopt(IP_FW_GET)"); + bcopy(&((struct ip_fw *)data)->next_rule, + &set_disable, sizeof(set_disable)); + + for (i = 0, msg = "disable" ; i < RESVD_SET; i++) + if ((set_disable & (1< RESVD_SET) + errx(EX_DATAERR, "invalid set number %s\n", av[0]); + if (!isdigit(*(av[1])) || new_set > RESVD_SET) + errx(EX_DATAERR, "invalid set number %s\n", av[1]); + masks[0] = (4 << 24) | (new_set << 16) | (rulenum); + i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); + } else if (_substrcmp(*av, "move") == 0) { + ac--; av++; + if (ac && _substrcmp(*av, "rule") == 0) { + cmd = 2; + ac--; av++; + } else + cmd = 3; + if (ac != 3 || _substrcmp(av[1], "to") != 0) + errx(EX_USAGE, "syntax: set move [rule] X to Y\n"); + rulenum = atoi(av[0]); + new_set = atoi(av[2]); + if (!isdigit(*(av[0])) || (cmd == 3 && rulenum > RESVD_SET) || + (cmd == 2 && rulenum == IPFW_DEFAULT_RULE) ) + errx(EX_DATAERR, "invalid source number %s\n", av[0]); + if (!isdigit(*(av[2])) || new_set > RESVD_SET) + errx(EX_DATAERR, "invalid dest. set %s\n", av[1]); + masks[0] = (cmd << 24) | (new_set << 16) | (rulenum); + i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t)); + } else if (_substrcmp(*av, "disable") == 0 || + _substrcmp(*av, "enable") == 0 ) { + int which = _substrcmp(*av, "enable") == 0 ? 1 : 0; + + ac--; av++; + masks[0] = masks[1] = 0; + + while (ac) { + if (isdigit(**av)) { + i = atoi(*av); + if (i < 0 || i > RESVD_SET) + errx(EX_DATAERR, + "invalid set number %d\n", i); + masks[which] |= (1<= nalloc) { + nalloc = nalloc * 2 + 200; + nbytes = nalloc; + data = safe_realloc(data, nbytes); + if (do_cmd(ocmd, data, (uintptr_t)&nbytes) < 0) + err(EX_OSERR, "getsockopt(IP_%s_GET)", + co.do_pipe ? "DUMMYNET" : "FW"); + } + + if (co.do_pipe) { + ipfw_list_pipes(data, nbytes, ac, av); + goto done; + } + + /* + * Count static rules. They have variable size so we + * need to scan the list to count them. + */ + for (nstat = 1, r = data, lim = (char *)data + nbytes; + r->rulenum < IPFW_DEFAULT_RULE && (char *)r < lim; + ++nstat, r = NEXT(r) ) + ; /* nothing */ + + /* + * Count dynamic rules. This is easier as they have + * fixed size. + */ + r = NEXT(r); + dynrules = (ipfw_dyn_rule *)r ; + n = (char *)r - (char *)data; + ndyn = (nbytes - n) / sizeof *dynrules; + + /* if showing stats, figure out column widths ahead of time */ + bcwidth = pcwidth = 0; + if (show_counters) { + for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) { + /* skip rules from another set */ + if (co.use_set && r->set != co.use_set - 1) + continue; + + /* packet counter */ + width = snprintf(NULL, 0, "%llu", + align_uint64(&r->pcnt)); + if (width > pcwidth) + pcwidth = width; + + /* byte counter */ + width = snprintf(NULL, 0, "%llu", + align_uint64(&r->bcnt)); + if (width > bcwidth) + bcwidth = width; + } + } + if (co.do_dynamic && ndyn) { + for (n = 0, d = dynrules; n < ndyn; n++, d++) { + if (co.use_set) { + /* skip rules from another set */ + bcopy((char *)&d->rule + sizeof(uint16_t), + &set, sizeof(uint8_t)); + if (set != co.use_set - 1) + continue; + } + width = snprintf(NULL, 0, "%llu", + align_uint64(&d->pcnt)); + if (width > pcwidth) + pcwidth = width; + + width = snprintf(NULL, 0, "%llu", + align_uint64(&d->bcnt)); + if (width > bcwidth) + bcwidth = width; + } + } + /* if no rule numbers were specified, list all rules */ + if (ac == 0) { + for (n = 0, r = data; n < nstat; n++, r = NEXT(r)) { + if (co.use_set && r->set != co.use_set - 1) + continue; + show_ipfw(r, pcwidth, bcwidth); + } + + if (co.do_dynamic && ndyn) { + printf("## Dynamic rules (%d):\n", ndyn); + for (n = 0, d = dynrules; n < ndyn; n++, d++) { + if (co.use_set) { + bcopy((char *)&d->rule + sizeof(uint16_t), + &set, sizeof(uint8_t)); + if (set != co.use_set - 1) + continue; + } + show_dyn_ipfw(d, pcwidth, bcwidth); + } + } + goto done; + } + + /* display specific rules requested on command line */ + + for (lac = ac, lav = av; lac != 0; lac--) { + /* convert command line rule # */ + last = rnum = strtoul(*lav++, &endptr, 10); + if (*endptr == '-') + last = strtoul(endptr+1, &endptr, 10); + if (*endptr) { + exitval = EX_USAGE; + warnx("invalid rule number: %s", *(lav - 1)); + continue; + } + for (n = seen = 0, r = data; n < nstat; n++, r = NEXT(r) ) { + if (r->rulenum > last) + break; + if (co.use_set && r->set != co.use_set - 1) + continue; + if (r->rulenum >= rnum && r->rulenum <= last) { + show_ipfw(r, pcwidth, bcwidth); + seen = 1; + } + } + if (!seen) { + /* give precedence to other error(s) */ + if (exitval == EX_OK) + exitval = EX_UNAVAILABLE; + warnx("rule %lu does not exist", rnum); + } + } + + if (co.do_dynamic && ndyn) { + printf("## Dynamic rules:\n"); + for (lac = ac, lav = av; lac != 0; lac--) { + last = rnum = strtoul(*lav++, &endptr, 10); + if (*endptr == '-') + last = strtoul(endptr+1, &endptr, 10); + if (*endptr) + /* already warned */ + continue; + for (n = 0, d = dynrules; n < ndyn; n++, d++) { + uint16_t rulenum; + + bcopy(&d->rule, &rulenum, sizeof(rulenum)); + if (rulenum > rnum) + break; + if (co.use_set) { + bcopy((char *)&d->rule + sizeof(uint16_t), + &set, sizeof(uint8_t)); + if (set != co.use_set - 1) + continue; + } + if (r->rulenum >= rnum && r->rulenum <= last) + show_dyn_ipfw(d, pcwidth, bcwidth); + } + } + } + + ac = 0; + +done: + free(data); + + if (exitval != EX_OK) + exit(exitval); +#undef NEXT +} + +static int +lookup_host (char *host, struct in_addr *ipaddr) +{ + struct hostent *he; + + if (!inet_aton(host, ipaddr)) { + if ((he = gethostbyname(host)) == NULL) + return(-1); + *ipaddr = *(struct in_addr *)he->h_addr_list[0]; + } + return(0); +} + +/* + * fills the addr and mask fields in the instruction as appropriate from av. + * Update length as appropriate. + * The following formats are allowed: + * me returns O_IP_*_ME + * 1.2.3.4 single IP address + * 1.2.3.4:5.6.7.8 address:mask + * 1.2.3.4/24 address/mask + * 1.2.3.4/26{1,6,5,4,23} set of addresses in a subnet + * We can have multiple comma-separated address/mask entries. + */ +static void +fill_ip(ipfw_insn_ip *cmd, char *av) +{ + int len = 0; + uint32_t *d = ((ipfw_insn_u32 *)cmd)->d; + + cmd->o.len &= ~F_LEN_MASK; /* zero len */ + + if (_substrcmp(av, "any") == 0) + return; + + if (_substrcmp(av, "me") == 0) { + cmd->o.len |= F_INSN_SIZE(ipfw_insn); + return; + } + + if (strncmp(av, "table(", 6) == 0) { + char *p = strchr(av + 6, ','); + + if (p) + *p++ = '\0'; + cmd->o.opcode = O_IP_DST_LOOKUP; + cmd->o.arg1 = strtoul(av + 6, NULL, 0); + if (p) { + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); + d[0] = strtoul(p, NULL, 0); + } else + cmd->o.len |= F_INSN_SIZE(ipfw_insn); + return; + } + + while (av) { + /* + * After the address we can have '/' or ':' indicating a mask, + * ',' indicating another address follows, '{' indicating a + * set of addresses of unspecified size. + */ + char *t = NULL, *p = strpbrk(av, "/:,{"); + int masklen; + char md, nd = '\0'; + + if (p) { + md = *p; + *p++ = '\0'; + if ((t = strpbrk(p, ",{")) != NULL) { + nd = *t; + *t = '\0'; + } + } else + md = '\0'; + + if (lookup_host(av, (struct in_addr *)&d[0]) != 0) + errx(EX_NOHOST, "hostname ``%s'' unknown", av); + switch (md) { + case ':': + if (!inet_aton(p, (struct in_addr *)&d[1])) + errx(EX_DATAERR, "bad netmask ``%s''", p); + break; + case '/': + masklen = atoi(p); + if (masklen == 0) + d[1] = htonl(0); /* mask */ + else if (masklen > 32) + errx(EX_DATAERR, "bad width ``%s''", p); + else + d[1] = htonl(~0 << (32 - masklen)); + break; + case '{': /* no mask, assume /24 and put back the '{' */ + d[1] = htonl(~0 << (32 - 24)); + *(--p) = md; + break; + + case ',': /* single address plus continuation */ + *(--p) = md; + /* FALLTHROUGH */ + case 0: /* initialization value */ + default: + d[1] = htonl(~0); /* force /32 */ + break; + } + d[0] &= d[1]; /* mask base address with mask */ + if (t) + *t = nd; + /* find next separator */ + if (p) + p = strpbrk(p, ",{"); + if (p && *p == '{') { + /* + * We have a set of addresses. They are stored as follows: + * arg1 is the set size (powers of 2, 2..256) + * addr is the base address IN HOST FORMAT + * mask.. is an array of arg1 bits (rounded up to + * the next multiple of 32) with bits set + * for each host in the map. + */ + uint32_t *map = (uint32_t *)&cmd->mask; + int low, high; + int i = contigmask((uint8_t *)&(d[1]), 32); + + if (len > 0) + errx(EX_DATAERR, "address set cannot be in a list"); + if (i < 24 || i > 31) + errx(EX_DATAERR, "invalid set with mask %d\n", i); + cmd->o.arg1 = 1<<(32-i); /* map length */ + d[0] = ntohl(d[0]); /* base addr in host format */ + cmd->o.opcode = O_IP_DST_SET; /* default */ + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + (cmd->o.arg1+31)/32; + for (i = 0; i < (cmd->o.arg1+31)/32 ; i++) + map[i] = 0; /* clear map */ + + av = p + 1; + low = d[0] & 0xff; + high = low + cmd->o.arg1 - 1; + /* + * Here, i stores the previous value when we specify a range + * of addresses within a mask, e.g. 45-63. i = -1 means we + * have no previous value. + */ + i = -1; /* previous value in a range */ + while (isdigit(*av)) { + char *s; + int a = strtol(av, &s, 0); + + if (s == av) { /* no parameter */ + if (*av != '}') + errx(EX_DATAERR, "set not closed\n"); + if (i != -1) + errx(EX_DATAERR, "incomplete range %d-", i); + break; + } + if (a < low || a > high) + errx(EX_DATAERR, "addr %d out of range [%d-%d]\n", + a, low, high); + a -= low; + if (i == -1) /* no previous in range */ + i = a; + else { /* check that range is valid */ + if (i > a) + errx(EX_DATAERR, "invalid range %d-%d", + i+low, a+low); + if (*s == '-') + errx(EX_DATAERR, "double '-' in range"); + } + for (; i <= a; i++) + map[i/32] |= 1<<(i & 31); + i = -1; + if (*s == '-') + i = a; + else if (*s == '}') + break; + av = s+1; + } + return; + } + av = p; + if (av) /* then *av must be a ',' */ + av++; + + /* Check this entry */ + if (d[1] == 0) { /* "any", specified as x.x.x.x/0 */ + /* + * 'any' turns the entire list into a NOP. + * 'not any' never matches, so it is removed from the + * list unless it is the only item, in which case we + * report an error. + */ + if (cmd->o.len & F_NOT) { /* "not any" never matches */ + if (av == NULL && len == 0) /* only this entry */ + errx(EX_DATAERR, "not any never matches"); + } + /* else do nothing and skip this entry */ + return; + } + /* A single IP can be stored in an optimized format */ + if (d[1] == ~0 && av == NULL && len == 0) { + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32); + return; + } + len += 2; /* two words... */ + d += 2; + } /* end while */ + if (len + 1 > F_LEN_MASK) + errx(EX_DATAERR, "address list too long"); + cmd->o.len |= len+1; +} + + +/* n2mask sets n bits of the mask */ +void +n2mask(struct in6_addr *mask, int n) +{ + static int minimask[9] = + { 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff }; + u_char *p; + + memset(mask, 0, sizeof(struct in6_addr)); + p = (u_char *) mask; + for (; n > 0; p++, n -= 8) { + if (n >= 8) + *p = 0xff; + else + *p = minimask[n]; + } + return; +} + + +/* + * helper function to process a set of flags and set bits in the + * appropriate masks. + */ +static void +fill_flags(ipfw_insn *cmd, enum ipfw_opcodes opcode, + struct _s_x *flags, char *p) +{ + uint8_t set=0, clear=0; + + while (p && *p) { + char *q; /* points to the separator */ + int val; + uint8_t *which; /* mask we are working on */ + + if (*p == '!') { + p++; + which = &clear; + } else + which = &set; + q = strchr(p, ','); + if (q) + *q++ = '\0'; + val = match_token(flags, p); + if (val <= 0) + errx(EX_DATAERR, "invalid flag %s", p); + *which |= (uint8_t)val; + p = q; + } + cmd->opcode = opcode; + cmd->len = (cmd->len & (F_NOT | F_OR)) | 1; + cmd->arg1 = (set & 0xff) | ( (clear & 0xff) << 8); +} + + +void +ipfw_delete(int ac, char *av[]) +{ + uint32_t rulenum; + int i; + int exitval = EX_OK; + int do_set = 0; + + + av++; ac--; + NEED1("missing rule specification"); + if (ac > 0 && _substrcmp(*av, "set") == 0) { + /* Do not allow using the following syntax: + * ipfw set N delete set M + */ + if (co.use_set) + errx(EX_DATAERR, "invalid syntax"); + do_set = 1; /* delete set */ + ac--; av++; + } + + /* Rule number */ + while (ac && isdigit(**av)) { + i = atoi(*av); av++; ac--; + if (co.do_nat) { + exitval = do_cmd(IP_FW_NAT_DEL, &i, sizeof i); + if (exitval) { + exitval = EX_UNAVAILABLE; + warn("rule %u not available", i); + } + } else if (co.do_pipe) { + exitval = ipfw_delete_pipe(co.do_pipe, i); + } else { + if (co.use_set) + rulenum = (i & 0xffff) | (5 << 24) | + ((co.use_set - 1) << 16); + else + rulenum = (i & 0xffff) | (do_set << 24); + i = do_cmd(IP_FW_DEL, &rulenum, sizeof rulenum); + if (i) { + exitval = EX_UNAVAILABLE; + warn("rule %u: setsockopt(IP_FW_DEL)", + rulenum); + } + } + } + if (exitval != EX_OK) + exit(exitval); +} + + +/* + * fill the interface structure. We do not check the name as we can + * create interfaces dynamically, so checking them at insert time + * makes relatively little sense. + * Interface names containing '*', '?', or '[' are assumed to be shell + * patterns which match interfaces. + */ +static void +fill_iface(ipfw_insn_if *cmd, char *arg) +{ + cmd->name[0] = '\0'; + cmd->o.len |= F_INSN_SIZE(ipfw_insn_if); + + /* Parse the interface or address */ + if (strcmp(arg, "any") == 0) + cmd->o.len = 0; /* effectively ignore this command */ + else if (!isdigit(*arg)) { + strlcpy(cmd->name, arg, sizeof(cmd->name)); + cmd->p.glob = strpbrk(arg, "*?[") != NULL ? 1 : 0; + } else if (!inet_aton(arg, &cmd->p.ip)) + errx(EX_DATAERR, "bad ip address ``%s''", arg); +} + +static void +get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask) +{ + int i, l; + char *ap, *ptr, *optr; + struct ether_addr *mac; + const char *macset = "0123456789abcdefABCDEF:"; + + if (strcmp(p, "any") == 0) { + for (i = 0; i < ETHER_ADDR_LEN; i++) + addr[i] = mask[i] = 0; + return; + } + + optr = ptr = strdup(p); + if ((ap = strsep(&ptr, "&/")) != NULL && *ap != 0) { + l = strlen(ap); + if (strspn(ap, macset) != l || (mac = ether_aton(ap)) == NULL) + errx(EX_DATAERR, "Incorrect MAC address"); + bcopy(mac, addr, ETHER_ADDR_LEN); + } else + errx(EX_DATAERR, "Incorrect MAC address"); + + if (ptr != NULL) { /* we have mask? */ + if (p[ptr - optr - 1] == '/') { /* mask len */ + l = strtol(ptr, &ap, 10); + if (*ap != 0 || l > ETHER_ADDR_LEN * 8 || l < 0) + errx(EX_DATAERR, "Incorrect mask length"); + for (i = 0; l > 0 && i < ETHER_ADDR_LEN; l -= 8, i++) + mask[i] = (l >= 8) ? 0xff: (~0) << (8 - l); + } else { /* mask */ + l = strlen(ptr); + if (strspn(ptr, macset) != l || + (mac = ether_aton(ptr)) == NULL) + errx(EX_DATAERR, "Incorrect mask"); + bcopy(mac, mask, ETHER_ADDR_LEN); + } + } else { /* default mask: ff:ff:ff:ff:ff:ff */ + for (i = 0; i < ETHER_ADDR_LEN; i++) + mask[i] = 0xff; + } + for (i = 0; i < ETHER_ADDR_LEN; i++) + addr[i] &= mask[i]; + + free(optr); +} + +/* + * helper function, updates the pointer to cmd with the length + * of the current command, and also cleans up the first word of + * the new command in case it has been clobbered before. + */ +static ipfw_insn * +next_cmd(ipfw_insn *cmd) +{ + cmd += F_LEN(cmd); + bzero(cmd, sizeof(*cmd)); + return cmd; +} + +/* + * Takes arguments and copies them into a comment + */ +static void +fill_comment(ipfw_insn *cmd, int ac, char **av) +{ + int i, l; + char *p = (char *)(cmd + 1); + + cmd->opcode = O_NOP; + cmd->len = (cmd->len & (F_NOT | F_OR)); + + /* Compute length of comment string. */ + for (i = 0, l = 0; i < ac; i++) + l += strlen(av[i]) + 1; + if (l == 0) + return; + if (l > 84) + errx(EX_DATAERR, + "comment too long (max 80 chars)"); + l = 1 + (l+3)/4; + cmd->len = (cmd->len & (F_NOT | F_OR)) | l; + for (i = 0; i < ac; i++) { + strcpy(p, av[i]); + p += strlen(av[i]); + *p++ = ' '; + } + *(--p) = '\0'; +} + +/* + * A function to fill simple commands of size 1. + * Existing flags are preserved. + */ +static void +fill_cmd(ipfw_insn *cmd, enum ipfw_opcodes opcode, int flags, uint16_t arg) +{ + cmd->opcode = opcode; + cmd->len = ((cmd->len | flags) & (F_NOT | F_OR)) | 1; + cmd->arg1 = arg; +} + +/* + * Fetch and add the MAC address and type, with masks. This generates one or + * two microinstructions, and returns the pointer to the last one. + */ +static ipfw_insn * +add_mac(ipfw_insn *cmd, int ac, char *av[]) +{ + ipfw_insn_mac *mac; + + if (ac < 2) + errx(EX_DATAERR, "MAC dst src"); + + cmd->opcode = O_MACADDR2; + cmd->len = (cmd->len & (F_NOT | F_OR)) | F_INSN_SIZE(ipfw_insn_mac); + + mac = (ipfw_insn_mac *)cmd; + get_mac_addr_mask(av[0], mac->addr, mac->mask); /* dst */ + get_mac_addr_mask(av[1], &(mac->addr[ETHER_ADDR_LEN]), + &(mac->mask[ETHER_ADDR_LEN])); /* src */ + return cmd; +} + +static ipfw_insn * +add_mactype(ipfw_insn *cmd, int ac, char *av) +{ + if (ac < 1) + errx(EX_DATAERR, "missing MAC type"); + if (strcmp(av, "any") != 0) { /* we have a non-null type */ + fill_newports((ipfw_insn_u16 *)cmd, av, IPPROTO_ETHERTYPE); + cmd->opcode = O_MAC_TYPE; + return cmd; + } else + return NULL; +} + +static ipfw_insn * +add_proto0(ipfw_insn *cmd, char *av, u_char *protop) +{ + struct protoent *pe; + char *ep; + int proto; + + proto = strtol(av, &ep, 10); + if (*ep != '\0' || proto <= 0) { + if ((pe = getprotobyname(av)) == NULL) + return NULL; + proto = pe->p_proto; + } + + fill_cmd(cmd, O_PROTO, 0, proto); + *protop = proto; + return cmd; +} + +static ipfw_insn * +add_proto(ipfw_insn *cmd, char *av, u_char *protop) +{ + u_char proto = IPPROTO_IP; + + if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0) + ; /* do not set O_IP4 nor O_IP6 */ + else if (strcmp(av, "ip4") == 0) + /* explicit "just IPv4" rule */ + fill_cmd(cmd, O_IP4, 0, 0); + else if (strcmp(av, "ip6") == 0) { + /* explicit "just IPv6" rule */ + proto = IPPROTO_IPV6; + fill_cmd(cmd, O_IP6, 0, 0); + } else + return add_proto0(cmd, av, protop); + + *protop = proto; + return cmd; +} + +static ipfw_insn * +add_proto_compat(ipfw_insn *cmd, char *av, u_char *protop) +{ + u_char proto = IPPROTO_IP; + + if (_substrcmp(av, "all") == 0 || strcmp(av, "ip") == 0) + ; /* do not set O_IP4 nor O_IP6 */ + else if (strcmp(av, "ipv4") == 0 || strcmp(av, "ip4") == 0) + /* explicit "just IPv4" rule */ + fill_cmd(cmd, O_IP4, 0, 0); + else if (strcmp(av, "ipv6") == 0 || strcmp(av, "ip6") == 0) { + /* explicit "just IPv6" rule */ + proto = IPPROTO_IPV6; + fill_cmd(cmd, O_IP6, 0, 0); + } else + return add_proto0(cmd, av, protop); + + *protop = proto; + return cmd; +} + +static ipfw_insn * +add_srcip(ipfw_insn *cmd, char *av) +{ + fill_ip((ipfw_insn_ip *)cmd, av); + if (cmd->opcode == O_IP_DST_SET) /* set */ + cmd->opcode = O_IP_SRC_SET; + else if (cmd->opcode == O_IP_DST_LOOKUP) /* table */ + cmd->opcode = O_IP_SRC_LOOKUP; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ + cmd->opcode = O_IP_SRC_ME; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ + cmd->opcode = O_IP_SRC; + else /* addr/mask */ + cmd->opcode = O_IP_SRC_MASK; + return cmd; +} + +static ipfw_insn * +add_dstip(ipfw_insn *cmd, char *av) +{ + fill_ip((ipfw_insn_ip *)cmd, av); + if (cmd->opcode == O_IP_DST_SET) /* set */ + ; + else if (cmd->opcode == O_IP_DST_LOOKUP) /* table */ + ; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) /* me */ + cmd->opcode = O_IP_DST_ME; + else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn_u32)) /* one IP */ + cmd->opcode = O_IP_DST; + else /* addr/mask */ + cmd->opcode = O_IP_DST_MASK; + return cmd; +} + +static ipfw_insn * +add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode) +{ + if (_substrcmp(av, "any") == 0) { + return NULL; + } else if (fill_newports((ipfw_insn_u16 *)cmd, av, proto)) { + /* XXX todo: check that we have a protocol with ports */ + cmd->opcode = opcode; + return cmd; + } + return NULL; +} + +static ipfw_insn * +add_src(ipfw_insn *cmd, char *av, u_char proto) +{ + struct in6_addr a; + char *host, *ch; + ipfw_insn *ret = NULL; + + if ((host = strdup(av)) == NULL) + return NULL; + if ((ch = strrchr(host, '/')) != NULL) + *ch = '\0'; + + if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 || + inet_pton(AF_INET6, host, &a)) + ret = add_srcip6(cmd, av); + /* XXX: should check for IPv4, not !IPv6 */ + if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || + !inet_pton(AF_INET6, host, &a))) + ret = add_srcip(cmd, av); + if (ret == NULL && strcmp(av, "any") != 0) + ret = cmd; + + free(host); + return ret; +} + +static ipfw_insn * +add_dst(ipfw_insn *cmd, char *av, u_char proto) +{ + struct in6_addr a; + char *host, *ch; + ipfw_insn *ret = NULL; + + if ((host = strdup(av)) == NULL) + return NULL; + if ((ch = strrchr(host, '/')) != NULL) + *ch = '\0'; + + if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 || + inet_pton(AF_INET6, host, &a)) + ret = add_dstip6(cmd, av); + /* XXX: should check for IPv4, not !IPv6 */ + if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 || + !inet_pton(AF_INET6, host, &a))) + ret = add_dstip(cmd, av); + if (ret == NULL && strcmp(av, "any") != 0) + ret = cmd; + + free(host); + return ret; +} + +/* + * Parse arguments and assemble the microinstructions which make up a rule. + * Rules are added into the 'rulebuf' and then copied in the correct order + * into the actual rule. + * + * The syntax for a rule starts with the action, followed by + * optional action parameters, and the various match patterns. + * In the assembled microcode, the first opcode must be an O_PROBE_STATE + * (generated if the rule includes a keep-state option), then the + * various match patterns, log/altq actions, and the actual action. + * + */ +void +ipfw_add(int ac, char *av[]) +{ + /* + * rules are added into the 'rulebuf' and then copied in + * the correct order into the actual rule. + * Some things that need to go out of order (prob, action etc.) + * go into actbuf[]. + */ + static uint32_t rulebuf[255], actbuf[255], cmdbuf[255]; + + ipfw_insn *src, *dst, *cmd, *action, *prev=NULL; + ipfw_insn *first_cmd; /* first match pattern */ + + struct ip_fw *rule; + + /* + * various flags used to record that we entered some fields. + */ + ipfw_insn *have_state = NULL; /* check-state or keep-state */ + ipfw_insn *have_log = NULL, *have_altq = NULL, *have_tag = NULL; + size_t len; + + int i; + + int open_par = 0; /* open parenthesis ( */ + + /* proto is here because it is used to fetch ports */ + u_char proto = IPPROTO_IP; /* default protocol */ + + double match_prob = 1; /* match probability, default is always match */ + + bzero(actbuf, sizeof(actbuf)); /* actions go here */ + bzero(cmdbuf, sizeof(cmdbuf)); + bzero(rulebuf, sizeof(rulebuf)); + + rule = (struct ip_fw *)rulebuf; + cmd = (ipfw_insn *)cmdbuf; + action = (ipfw_insn *)actbuf; + + av++; ac--; + + /* [rule N] -- Rule number optional */ + if (ac && isdigit(**av)) { + rule->rulenum = atoi(*av); + av++; + ac--; + } + + /* [set N] -- set number (0..RESVD_SET), optional */ + if (ac > 1 && _substrcmp(*av, "set") == 0) { + int set = strtoul(av[1], NULL, 10); + if (set < 0 || set > RESVD_SET) + errx(EX_DATAERR, "illegal set %s", av[1]); + rule->set = set; + av += 2; ac -= 2; + } + + /* [prob D] -- match probability, optional */ + if (ac > 1 && _substrcmp(*av, "prob") == 0) { + match_prob = strtod(av[1], NULL); + + if (match_prob <= 0 || match_prob > 1) + errx(EX_DATAERR, "illegal match prob. %s", av[1]); + av += 2; ac -= 2; + } + + /* action -- mandatory */ + NEED1("missing action"); + i = match_token(rule_actions, *av); + ac--; av++; + action->len = 1; /* default */ + switch(i) { + case TOK_CHECKSTATE: + have_state = action; + action->opcode = O_CHECK_STATE; + break; + + case TOK_ACCEPT: + action->opcode = O_ACCEPT; + break; + + case TOK_DENY: + action->opcode = O_DENY; + action->arg1 = 0; + break; + + case TOK_REJECT: + action->opcode = O_REJECT; + action->arg1 = ICMP_UNREACH_HOST; + break; + + case TOK_RESET: + action->opcode = O_REJECT; + action->arg1 = ICMP_REJECT_RST; + break; + + case TOK_RESET6: + action->opcode = O_UNREACH6; + action->arg1 = ICMP6_UNREACH_RST; + break; + + case TOK_UNREACH: + action->opcode = O_REJECT; + NEED1("missing reject code"); + fill_reject_code(&action->arg1, *av); + ac--; av++; + break; + + case TOK_UNREACH6: + action->opcode = O_UNREACH6; + NEED1("missing unreach code"); + fill_unreach6_code(&action->arg1, *av); + ac--; av++; + break; + + case TOK_COUNT: + action->opcode = O_COUNT; + break; + + case TOK_NAT: + action->opcode = O_NAT; + action->len = F_INSN_SIZE(ipfw_insn_nat); + goto chkarg; + + case TOK_QUEUE: + action->opcode = O_QUEUE; + goto chkarg; + case TOK_PIPE: + action->opcode = O_PIPE; + goto chkarg; + case TOK_SKIPTO: + action->opcode = O_SKIPTO; + goto chkarg; + case TOK_NETGRAPH: + action->opcode = O_NETGRAPH; + goto chkarg; + case TOK_NGTEE: + action->opcode = O_NGTEE; + goto chkarg; + case TOK_DIVERT: + action->opcode = O_DIVERT; + goto chkarg; + case TOK_TEE: + action->opcode = O_TEE; +chkarg: + if (!ac) + errx(EX_USAGE, "missing argument for %s", *(av - 1)); + if (isdigit(**av)) { + action->arg1 = strtoul(*av, NULL, 10); + if (action->arg1 <= 0 || action->arg1 >= IP_FW_TABLEARG) + errx(EX_DATAERR, "illegal argument for %s", + *(av - 1)); + } else if (_substrcmp(*av, "tablearg") == 0) { + action->arg1 = IP_FW_TABLEARG; + } else if (i == TOK_DIVERT || i == TOK_TEE) { + struct servent *s; + setservent(1); + s = getservbyname(av[0], "divert"); + if (s != NULL) + action->arg1 = ntohs(s->s_port); + else + errx(EX_DATAERR, "illegal divert/tee port"); + } else + errx(EX_DATAERR, "illegal argument for %s", *(av - 1)); + ac--; av++; + break; + + case TOK_FORWARD: { + ipfw_insn_sa *p = (ipfw_insn_sa *)action; + char *s, *end; + + NEED1("missing forward address[:port]"); + + action->opcode = O_FORWARD_IP; + action->len = F_INSN_SIZE(ipfw_insn_sa); + + /* + * In the kernel we assume AF_INET and use only + * sin_port and sin_addr. + */ + p->sa.sin_family = AF_INET; + p->sa.sin_port = 0; + /* + * locate the address-port separator (':' or ',') + */ + s = strchr(*av, ':'); + if (s == NULL) + s = strchr(*av, ','); + if (s != NULL) { + *(s++) = '\0'; + i = strtoport(s, &end, 0 /* base */, 0 /* proto */); + if (s == end) + errx(EX_DATAERR, + "illegal forwarding port ``%s''", s); + p->sa.sin_port = (u_short)i; + } + if (_substrcmp(*av, "tablearg") == 0) + p->sa.sin_addr.s_addr = INADDR_ANY; + else + lookup_host(*av, &(p->sa.sin_addr)); + ac--; av++; + break; + } + case TOK_COMMENT: + /* pretend it is a 'count' rule followed by the comment */ + action->opcode = O_COUNT; + ac++; av--; /* go back... */ + break; + + case TOK_SETFIB: + { + int numfibs; + size_t intsize = sizeof(int); + + action->opcode = O_SETFIB; + NEED1("missing fib number"); + action->arg1 = strtoul(*av, NULL, 10); + if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1) + errx(EX_DATAERR, "fibs not suported.\n"); + if (action->arg1 >= numfibs) /* Temporary */ + errx(EX_DATAERR, "fib too large.\n"); + ac--; av++; + break; + } + + case TOK_REASS: + action->opcode = O_REASS; + break; + + default: + errx(EX_DATAERR, "invalid action %s\n", av[-1]); + } + action = next_cmd(action); + + /* + * [altq queuename] -- altq tag, optional + * [log [logamount N]] -- log, optional + * + * If they exist, it go first in the cmdbuf, but then it is + * skipped in the copy section to the end of the buffer. + */ + while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) { + ac--; av++; + switch (i) { + case TOK_LOG: + { + ipfw_insn_log *c = (ipfw_insn_log *)cmd; + int l; + + if (have_log) + errx(EX_DATAERR, + "log cannot be specified more than once"); + have_log = (ipfw_insn *)c; + cmd->len = F_INSN_SIZE(ipfw_insn_log); + cmd->opcode = O_LOG; + if (ac && _substrcmp(*av, "logamount") == 0) { + ac--; av++; + NEED1("logamount requires argument"); + l = atoi(*av); + if (l < 0) + errx(EX_DATAERR, + "logamount must be positive"); + c->max_log = l; + ac--; av++; + } else { + len = sizeof(c->max_log); + if (sysctlbyname("net.inet.ip.fw.verbose_limit", + &c->max_log, &len, NULL, 0) == -1) + errx(1, "sysctlbyname(\"%s\")", + "net.inet.ip.fw.verbose_limit"); + } + } + break; + + case TOK_ALTQ: + { + ipfw_insn_altq *a = (ipfw_insn_altq *)cmd; + + NEED1("missing altq queue name"); + if (have_altq) + errx(EX_DATAERR, + "altq cannot be specified more than once"); + have_altq = (ipfw_insn *)a; + cmd->len = F_INSN_SIZE(ipfw_insn_altq); + cmd->opcode = O_ALTQ; + a->qid = altq_name_to_qid(*av); + ac--; av++; + } + break; + + case TOK_TAG: + case TOK_UNTAG: { + uint16_t tag; + + if (have_tag) + errx(EX_USAGE, "tag and untag cannot be " + "specified more than once"); + GET_UINT_ARG(tag, 1, IPFW_DEFAULT_RULE - 1, i, + rule_action_params); + have_tag = cmd; + fill_cmd(cmd, O_TAG, (i == TOK_TAG) ? 0: F_NOT, tag); + ac--; av++; + break; + } + + default: + abort(); + } + cmd = next_cmd(cmd); + } + + if (have_state) /* must be a check-state, we are done */ + goto done; + +#define OR_START(target) \ + if (ac && (*av[0] == '(' || *av[0] == '{')) { \ + if (open_par) \ + errx(EX_USAGE, "nested \"(\" not allowed\n"); \ + prev = NULL; \ + open_par = 1; \ + if ( (av[0])[1] == '\0') { \ + ac--; av++; \ + } else \ + (*av)++; \ + } \ + target: \ + + +#define CLOSE_PAR \ + if (open_par) { \ + if (ac && ( \ + strcmp(*av, ")") == 0 || \ + strcmp(*av, "}") == 0)) { \ + prev = NULL; \ + open_par = 0; \ + ac--; av++; \ + } else \ + errx(EX_USAGE, "missing \")\"\n"); \ + } + +#define NOT_BLOCK \ + if (ac && _substrcmp(*av, "not") == 0) { \ + if (cmd->len & F_NOT) \ + errx(EX_USAGE, "double \"not\" not allowed\n"); \ + cmd->len |= F_NOT; \ + ac--; av++; \ + } + +#define OR_BLOCK(target) \ + if (ac && _substrcmp(*av, "or") == 0) { \ + if (prev == NULL || open_par == 0) \ + errx(EX_DATAERR, "invalid OR block"); \ + prev->len |= F_OR; \ + ac--; av++; \ + goto target; \ + } \ + CLOSE_PAR; + + first_cmd = cmd; + +#if 0 + /* + * MAC addresses, optional. + * If we have this, we skip the part "proto from src to dst" + * and jump straight to the option parsing. + */ + NOT_BLOCK; + NEED1("missing protocol"); + if (_substrcmp(*av, "MAC") == 0 || + _substrcmp(*av, "mac") == 0) { + ac--; av++; /* the "MAC" keyword */ + add_mac(cmd, ac, av); /* exits in case of errors */ + cmd = next_cmd(cmd); + ac -= 2; av += 2; /* dst-mac and src-mac */ + NOT_BLOCK; + NEED1("missing mac type"); + if (add_mactype(cmd, ac, av[0])) + cmd = next_cmd(cmd); + ac--; av++; /* any or mac-type */ + goto read_options; + } +#endif + + /* + * protocol, mandatory + */ + OR_START(get_proto); + NOT_BLOCK; + NEED1("missing protocol"); + if (add_proto_compat(cmd, *av, &proto)) { + av++; ac--; + if (F_LEN(cmd) != 0) { + prev = cmd; + cmd = next_cmd(cmd); + } + } else if (first_cmd != cmd) { + errx(EX_DATAERR, "invalid protocol ``%s''", *av); + } else + goto read_options; + OR_BLOCK(get_proto); + + /* + * "from", mandatory + */ + if (!ac || _substrcmp(*av, "from") != 0) + errx(EX_USAGE, "missing ``from''"); + ac--; av++; + + /* + * source IP, mandatory + */ + OR_START(source_ip); + NOT_BLOCK; /* optional "not" */ + NEED1("missing source address"); + if (add_src(cmd, *av, proto)) { + ac--; av++; + if (F_LEN(cmd) != 0) { /* ! any */ + prev = cmd; + cmd = next_cmd(cmd); + } + } else + errx(EX_USAGE, "bad source address %s", *av); + OR_BLOCK(source_ip); + + /* + * source ports, optional + */ + NOT_BLOCK; /* optional "not" */ + if (ac) { + if (_substrcmp(*av, "any") == 0 || + add_ports(cmd, *av, proto, O_IP_SRCPORT)) { + ac--; av++; + if (F_LEN(cmd) != 0) + cmd = next_cmd(cmd); + } + } + + /* + * "to", mandatory + */ + if (!ac || _substrcmp(*av, "to") != 0) + errx(EX_USAGE, "missing ``to''"); + av++; ac--; + + /* + * destination, mandatory + */ + OR_START(dest_ip); + NOT_BLOCK; /* optional "not" */ + NEED1("missing dst address"); + if (add_dst(cmd, *av, proto)) { + ac--; av++; + if (F_LEN(cmd) != 0) { /* ! any */ + prev = cmd; + cmd = next_cmd(cmd); + } + } else + errx( EX_USAGE, "bad destination address %s", *av); + OR_BLOCK(dest_ip); + + /* + * dest. ports, optional + */ + NOT_BLOCK; /* optional "not" */ + if (ac) { + if (_substrcmp(*av, "any") == 0 || + add_ports(cmd, *av, proto, O_IP_DSTPORT)) { + ac--; av++; + if (F_LEN(cmd) != 0) + cmd = next_cmd(cmd); + } + } + +read_options: + if (ac && first_cmd == cmd) { + /* + * nothing specified so far, store in the rule to ease + * printout later. + */ + rule->_pad = 1; + } + prev = NULL; + while (ac) { + char *s; + ipfw_insn_u32 *cmd32; /* alias for cmd */ + + s = *av; + cmd32 = (ipfw_insn_u32 *)cmd; + + if (*s == '!') { /* alternate syntax for NOT */ + if (cmd->len & F_NOT) + errx(EX_USAGE, "double \"not\" not allowed\n"); + cmd->len = F_NOT; + s++; + } + i = match_token(rule_options, s); + ac--; av++; + switch(i) { + case TOK_NOT: + if (cmd->len & F_NOT) + errx(EX_USAGE, "double \"not\" not allowed\n"); + cmd->len = F_NOT; + break; + + case TOK_OR: + if (open_par == 0 || prev == NULL) + errx(EX_USAGE, "invalid \"or\" block\n"); + prev->len |= F_OR; + break; + + case TOK_STARTBRACE: + if (open_par) + errx(EX_USAGE, "+nested \"(\" not allowed\n"); + open_par = 1; + break; + + case TOK_ENDBRACE: + if (!open_par) + errx(EX_USAGE, "+missing \")\"\n"); + open_par = 0; + prev = NULL; + break; + + case TOK_IN: + fill_cmd(cmd, O_IN, 0, 0); + break; + + case TOK_OUT: + cmd->len ^= F_NOT; /* toggle F_NOT */ + fill_cmd(cmd, O_IN, 0, 0); + break; + + case TOK_DIVERTED: + fill_cmd(cmd, O_DIVERTED, 0, 3); + break; + + case TOK_DIVERTEDLOOPBACK: + fill_cmd(cmd, O_DIVERTED, 0, 1); + break; + + case TOK_DIVERTEDOUTPUT: + fill_cmd(cmd, O_DIVERTED, 0, 2); + break; + + case TOK_FRAG: + fill_cmd(cmd, O_FRAG, 0, 0); + break; + + case TOK_LAYER2: + fill_cmd(cmd, O_LAYER2, 0, 0); + break; + + case TOK_XMIT: + case TOK_RECV: + case TOK_VIA: + NEED1("recv, xmit, via require interface name" + " or address"); + fill_iface((ipfw_insn_if *)cmd, av[0]); + ac--; av++; + if (F_LEN(cmd) == 0) /* not a valid address */ + break; + if (i == TOK_XMIT) + cmd->opcode = O_XMIT; + else if (i == TOK_RECV) + cmd->opcode = O_RECV; + else if (i == TOK_VIA) + cmd->opcode = O_VIA; + break; + + case TOK_ICMPTYPES: + NEED1("icmptypes requires list of types"); + fill_icmptypes((ipfw_insn_u32 *)cmd, *av); + av++; ac--; + break; + + case TOK_ICMP6TYPES: + NEED1("icmptypes requires list of types"); + fill_icmp6types((ipfw_insn_icmp6 *)cmd, *av); + av++; ac--; + break; + + case TOK_IPTTL: + NEED1("ipttl requires TTL"); + if (strpbrk(*av, "-,")) { + if (!add_ports(cmd, *av, 0, O_IPTTL)) + errx(EX_DATAERR, "invalid ipttl %s", *av); + } else + fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPID: + NEED1("ipid requires id"); + if (strpbrk(*av, "-,")) { + if (!add_ports(cmd, *av, 0, O_IPID)) + errx(EX_DATAERR, "invalid ipid %s", *av); + } else + fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPLEN: + NEED1("iplen requires length"); + if (strpbrk(*av, "-,")) { + if (!add_ports(cmd, *av, 0, O_IPLEN)) + errx(EX_DATAERR, "invalid ip len %s", *av); + } else + fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPVER: + NEED1("ipver requires version"); + fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_IPPRECEDENCE: + NEED1("ipprecedence requires value"); + fill_cmd(cmd, O_IPPRECEDENCE, 0, + (strtoul(*av, NULL, 0) & 7) << 5); + ac--; av++; + break; + + case TOK_IPOPTS: + NEED1("missing argument for ipoptions"); + fill_flags(cmd, O_IPOPT, f_ipopts, *av); + ac--; av++; + break; + + case TOK_IPTOS: + NEED1("missing argument for iptos"); + fill_flags(cmd, O_IPTOS, f_iptos, *av); + ac--; av++; + break; + + case TOK_UID: + NEED1("uid requires argument"); + { + char *end; + uid_t uid; + struct passwd *pwd; + + cmd->opcode = O_UID; + uid = strtoul(*av, &end, 0); + pwd = (*end == '\0') ? getpwuid(uid) : getpwnam(*av); + if (pwd == NULL) + errx(EX_DATAERR, "uid \"%s\" nonexistent", *av); + cmd32->d[0] = pwd->pw_uid; + cmd->len |= F_INSN_SIZE(ipfw_insn_u32); + ac--; av++; + } + break; + + case TOK_GID: + NEED1("gid requires argument"); + { + char *end; + gid_t gid; + struct group *grp; + + cmd->opcode = O_GID; + gid = strtoul(*av, &end, 0); + grp = (*end == '\0') ? getgrgid(gid) : getgrnam(*av); + if (grp == NULL) + errx(EX_DATAERR, "gid \"%s\" nonexistent", *av); + cmd32->d[0] = grp->gr_gid; + cmd->len |= F_INSN_SIZE(ipfw_insn_u32); + ac--; av++; + } + break; + + case TOK_JAIL: + NEED1("jail requires argument"); + { + char *end; + int jid; + + cmd->opcode = O_JAIL; + jid = (int)strtol(*av, &end, 0); + if (jid < 0 || *end != '\0') + errx(EX_DATAERR, "jail requires prison ID"); + cmd32->d[0] = (uint32_t)jid; + cmd->len |= F_INSN_SIZE(ipfw_insn_u32); + ac--; av++; + } + break; + + case TOK_ESTAB: + fill_cmd(cmd, O_ESTAB, 0, 0); + break; + + case TOK_SETUP: + fill_cmd(cmd, O_TCPFLAGS, 0, + (TH_SYN) | ( (TH_ACK) & 0xff) <<8 ); + break; + + case TOK_TCPDATALEN: + NEED1("tcpdatalen requires length"); + if (strpbrk(*av, "-,")) { + if (!add_ports(cmd, *av, 0, O_TCPDATALEN)) + errx(EX_DATAERR, "invalid tcpdata len %s", *av); + } else + fill_cmd(cmd, O_TCPDATALEN, 0, + strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_TCPOPTS: + NEED1("missing argument for tcpoptions"); + fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av); + ac--; av++; + break; + + case TOK_TCPSEQ: + case TOK_TCPACK: + NEED1("tcpseq/tcpack requires argument"); + cmd->len = F_INSN_SIZE(ipfw_insn_u32); + cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK; + cmd32->d[0] = htonl(strtoul(*av, NULL, 0)); + ac--; av++; + break; + + case TOK_TCPWIN: + NEED1("tcpwin requires length"); + fill_cmd(cmd, O_TCPWIN, 0, + htons(strtoul(*av, NULL, 0))); + ac--; av++; + break; + + case TOK_TCPFLAGS: + NEED1("missing argument for tcpflags"); + cmd->opcode = O_TCPFLAGS; + fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av); + ac--; av++; + break; + + case TOK_KEEPSTATE: + if (open_par) + errx(EX_USAGE, "keep-state cannot be part " + "of an or block"); + if (have_state) + errx(EX_USAGE, "only one of keep-state " + "and limit is allowed"); + have_state = cmd; + fill_cmd(cmd, O_KEEP_STATE, 0, 0); + break; + + case TOK_LIMIT: { + ipfw_insn_limit *c = (ipfw_insn_limit *)cmd; + int val; + + if (open_par) + errx(EX_USAGE, + "limit cannot be part of an or block"); + if (have_state) + errx(EX_USAGE, "only one of keep-state and " + "limit is allowed"); + have_state = cmd; + + cmd->len = F_INSN_SIZE(ipfw_insn_limit); + cmd->opcode = O_LIMIT; + c->limit_mask = c->conn_limit = 0; + + while (ac > 0) { + if ((val = match_token(limit_masks, *av)) <= 0) + break; + c->limit_mask |= val; + ac--; av++; + } + + if (c->limit_mask == 0) + errx(EX_USAGE, "limit: missing limit mask"); + + GET_UINT_ARG(c->conn_limit, 1, IPFW_DEFAULT_RULE - 1, + TOK_LIMIT, rule_options); + + ac--; av++; + break; + } + + case TOK_PROTO: + NEED1("missing protocol"); + if (add_proto(cmd, *av, &proto)) { + ac--; av++; + } else + errx(EX_DATAERR, "invalid protocol ``%s''", + *av); + break; + + case TOK_SRCIP: + NEED1("missing source IP"); + if (add_srcip(cmd, *av)) { + ac--; av++; + } + break; + + case TOK_DSTIP: + NEED1("missing destination IP"); + if (add_dstip(cmd, *av)) { + ac--; av++; + } + break; + + case TOK_SRCIP6: + NEED1("missing source IP6"); + if (add_srcip6(cmd, *av)) { + ac--; av++; + } + break; + + case TOK_DSTIP6: + NEED1("missing destination IP6"); + if (add_dstip6(cmd, *av)) { + ac--; av++; + } + break; + + case TOK_SRCPORT: + NEED1("missing source port"); + if (_substrcmp(*av, "any") == 0 || + add_ports(cmd, *av, proto, O_IP_SRCPORT)) { + ac--; av++; + } else + errx(EX_DATAERR, "invalid source port %s", *av); + break; + + case TOK_DSTPORT: + NEED1("missing destination port"); + if (_substrcmp(*av, "any") == 0 || + add_ports(cmd, *av, proto, O_IP_DSTPORT)) { + ac--; av++; + } else + errx(EX_DATAERR, "invalid destination port %s", + *av); + break; + + case TOK_MAC: + if (add_mac(cmd, ac, av)) { + ac -= 2; av += 2; + } + break; + + case TOK_MACTYPE: + NEED1("missing mac type"); + if (!add_mactype(cmd, ac, *av)) + errx(EX_DATAERR, "invalid mac type %s", *av); + ac--; av++; + break; + + case TOK_VERREVPATH: + fill_cmd(cmd, O_VERREVPATH, 0, 0); + break; + + case TOK_VERSRCREACH: + fill_cmd(cmd, O_VERSRCREACH, 0, 0); + break; + + case TOK_ANTISPOOF: + fill_cmd(cmd, O_ANTISPOOF, 0, 0); + break; + + case TOK_IPSEC: + fill_cmd(cmd, O_IPSEC, 0, 0); + break; + + case TOK_IPV6: + fill_cmd(cmd, O_IP6, 0, 0); + break; + + case TOK_IPV4: + fill_cmd(cmd, O_IP4, 0, 0); + break; + + case TOK_EXT6HDR: + fill_ext6hdr( cmd, *av ); + ac--; av++; + break; + + case TOK_FLOWID: + if (proto != IPPROTO_IPV6 ) + errx( EX_USAGE, "flow-id filter is active " + "only for ipv6 protocol\n"); + fill_flow6( (ipfw_insn_u32 *) cmd, *av ); + ac--; av++; + break; + + case TOK_COMMENT: + fill_comment(cmd, ac, av); + av += ac; + ac = 0; + break; + + case TOK_TAGGED: + if (ac > 0 && strpbrk(*av, "-,")) { + if (!add_ports(cmd, *av, 0, O_TAGGED)) + errx(EX_DATAERR, "tagged: invalid tag" + " list: %s", *av); + } + else { + uint16_t tag; + + GET_UINT_ARG(tag, 1, IPFW_DEFAULT_RULE - 1, + TOK_TAGGED, rule_options); + fill_cmd(cmd, O_TAGGED, 0, tag); + } + ac--; av++; + break; + + case TOK_FIB: + NEED1("fib requires fib number"); + fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0)); + ac--; av++; + break; + + default: + errx(EX_USAGE, "unrecognised option [%d] %s\n", i, s); + } + if (F_LEN(cmd) > 0) { /* prepare to advance */ + prev = cmd; + cmd = next_cmd(cmd); + } + } + +done: + /* + * Now copy stuff into the rule. + * If we have a keep-state option, the first instruction + * must be a PROBE_STATE (which is generated here). + * If we have a LOG option, it was stored as the first command, + * and now must be moved to the top of the action part. + */ + dst = (ipfw_insn *)rule->cmd; + + /* + * First thing to write into the command stream is the match probability. + */ + if (match_prob != 1) { /* 1 means always match */ + dst->opcode = O_PROB; + dst->len = 2; + *((int32_t *)(dst+1)) = (int32_t)(match_prob * 0x7fffffff); + dst += dst->len; + } + + /* + * generate O_PROBE_STATE if necessary + */ + if (have_state && have_state->opcode != O_CHECK_STATE) { + fill_cmd(dst, O_PROBE_STATE, 0, 0); + dst = next_cmd(dst); + } + + /* copy all commands but O_LOG, O_KEEP_STATE, O_LIMIT, O_ALTQ, O_TAG */ + for (src = (ipfw_insn *)cmdbuf; src != cmd; src += i) { + i = F_LEN(src); + + switch (src->opcode) { + case O_LOG: + case O_KEEP_STATE: + case O_LIMIT: + case O_ALTQ: + case O_TAG: + break; + default: + bcopy(src, dst, i * sizeof(uint32_t)); + dst += i; + } + } + + /* + * put back the have_state command as last opcode + */ + if (have_state && have_state->opcode != O_CHECK_STATE) { + i = F_LEN(have_state); + bcopy(have_state, dst, i * sizeof(uint32_t)); + dst += i; + } + /* + * start action section + */ + rule->act_ofs = dst - rule->cmd; + + /* put back O_LOG, O_ALTQ, O_TAG if necessary */ + if (have_log) { + i = F_LEN(have_log); + bcopy(have_log, dst, i * sizeof(uint32_t)); + dst += i; + } + if (have_altq) { + i = F_LEN(have_altq); + bcopy(have_altq, dst, i * sizeof(uint32_t)); + dst += i; + } + if (have_tag) { + i = F_LEN(have_tag); + bcopy(have_tag, dst, i * sizeof(uint32_t)); + dst += i; + } + /* + * copy all other actions + */ + for (src = (ipfw_insn *)actbuf; src != action; src += i) { + i = F_LEN(src); + bcopy(src, dst, i * sizeof(uint32_t)); + dst += i; + } + + rule->cmd_len = (uint32_t *)dst - (uint32_t *)(rule->cmd); + i = (char *)dst - (char *)rule; + if (do_cmd(IP_FW_ADD, rule, (uintptr_t)&i) == -1) + err(EX_UNAVAILABLE, "getsockopt(%s)", "IP_FW_ADD"); + if (!co.do_quiet) + show_ipfw(rule, 0, 0); +} + +/* + * clear the counters or the log counters. + */ +void +ipfw_zero(int ac, char *av[], int optname /* 0 = IP_FW_ZERO, 1 = IP_FW_RESETLOG */) +{ + uint32_t arg, saved_arg; + int failed = EX_OK; + char const *errstr; + char const *name = optname ? "RESETLOG" : "ZERO"; + + optname = optname ? IP_FW_RESETLOG : IP_FW_ZERO; + + av++; ac--; + + if (!ac) { + /* clear all entries */ + if (do_cmd(optname, NULL, 0) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_%s)", name); + if (!co.do_quiet) + printf("%s.\n", optname == IP_FW_ZERO ? + "Accounting cleared":"Logging counts reset"); + + return; + } + + while (ac) { + /* Rule number */ + if (isdigit(**av)) { + arg = strtonum(*av, 0, 0xffff, &errstr); + if (errstr) + errx(EX_DATAERR, + "invalid rule number %s\n", *av); + saved_arg = arg; + if (co.use_set) + arg |= (1 << 24) | ((co.use_set - 1) << 16); + av++; + ac--; + if (do_cmd(optname, &arg, sizeof(arg))) { + warn("rule %u: setsockopt(IP_FW_%s)", + saved_arg, name); + failed = EX_UNAVAILABLE; + } else if (!co.do_quiet) + printf("Entry %d %s.\n", saved_arg, + optname == IP_FW_ZERO ? + "cleared" : "logging count reset"); + } else { + errx(EX_USAGE, "invalid rule number ``%s''", *av); + } + } + if (failed != EX_OK) + exit(failed); +} + +void +ipfw_flush(int force) +{ + int cmd = co.do_pipe ? IP_DUMMYNET_FLUSH : IP_FW_FLUSH; + + if (!force && !co.do_quiet) { /* need to ask user */ + int c; + + printf("Are you sure? [yn] "); + fflush(stdout); + do { + c = toupper(getc(stdin)); + while (c != '\n' && getc(stdin) != '\n') + if (feof(stdin)) + return; /* and do not flush */ + } while (c != 'Y' && c != 'N'); + printf("\n"); + if (c == 'N') /* user said no */ + return; + } + /* `ipfw set N flush` - is the same that `ipfw delete set N` */ + if (co.use_set) { + uint32_t arg = ((co.use_set - 1) & 0xffff) | (1 << 24); + if (do_cmd(IP_FW_DEL, &arg, sizeof(arg)) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_FW_DEL)"); + } else if (do_cmd(cmd, NULL, 0) < 0) + err(EX_UNAVAILABLE, "setsockopt(IP_%s_FLUSH)", + co.do_pipe ? "DUMMYNET" : "FW"); + if (!co.do_quiet) + printf("Flushed all %s.\n", co.do_pipe ? "pipes" : "rules"); +} + + +static void table_list(ipfw_table_entry ent, int need_header); + +/* + * This one handles all table-related commands + * ipfw table N add addr[/masklen] [value] + * ipfw table N delete addr[/masklen] + * ipfw table {N | all} flush + * ipfw table {N | all} list + */ +void +ipfw_table_handler(int ac, char *av[]) +{ + ipfw_table_entry ent; + int do_add; + int is_all; + size_t len; + char *p; + uint32_t a; + uint32_t tables_max; + + len = sizeof(tables_max); + if (sysctlbyname("net.inet.ip.fw.tables_max", &tables_max, &len, + NULL, 0) == -1) { +#ifdef IPFW_TABLES_MAX + warn("Warn: Failed to get the max tables number via sysctl. " + "Using the compiled in defaults. \nThe reason was"); + tables_max = IPFW_TABLES_MAX; +#else + errx(1, "Failed sysctlbyname(\"net.inet.ip.fw.tables_max\")"); +#endif + } + + ac--; av++; + if (ac && isdigit(**av)) { + ent.tbl = atoi(*av); + is_all = 0; + ac--; av++; + } else if (ac && _substrcmp(*av, "all") == 0) { + ent.tbl = 0; + is_all = 1; + ac--; av++; + } else + errx(EX_USAGE, "table number or 'all' keyword required"); + if (ent.tbl >= tables_max) + errx(EX_USAGE, "The table number exceeds the maximum allowed " + "value (%d)", tables_max - 1); + NEED1("table needs command"); + if (is_all && _substrcmp(*av, "list") != 0 + && _substrcmp(*av, "flush") != 0) + errx(EX_USAGE, "table number required"); + + if (_substrcmp(*av, "add") == 0 || + _substrcmp(*av, "delete") == 0) { + do_add = **av == 'a'; + ac--; av++; + if (!ac) + errx(EX_USAGE, "IP address required"); + p = strchr(*av, '/'); + if (p) { + *p++ = '\0'; + ent.masklen = atoi(p); + if (ent.masklen > 32) + errx(EX_DATAERR, "bad width ``%s''", p); + } else + ent.masklen = 32; + if (lookup_host(*av, (struct in_addr *)&ent.addr) != 0) + errx(EX_NOHOST, "hostname ``%s'' unknown", *av); + ac--; av++; + if (do_add && ac) { + unsigned int tval; + /* isdigit is a bit of a hack here.. */ + if (strchr(*av, (int)'.') == NULL && isdigit(**av)) { + ent.value = strtoul(*av, NULL, 0); + } else { + if (lookup_host(*av, (struct in_addr *)&tval) == 0) { + /* The value must be stored in host order * + * so that the values < 65k can be distinguished */ + ent.value = ntohl(tval); + } else { + errx(EX_NOHOST, "hostname ``%s'' unknown", *av); + } + } + } else + ent.value = 0; + if (do_cmd(do_add ? IP_FW_TABLE_ADD : IP_FW_TABLE_DEL, + &ent, sizeof(ent)) < 0) { + /* If running silent, don't bomb out on these errors. */ + if (!(co.do_quiet && (errno == (do_add ? EEXIST : ESRCH)))) + err(EX_OSERR, "setsockopt(IP_FW_TABLE_%s)", + do_add ? "ADD" : "DEL"); + /* In silent mode, react to a failed add by deleting */ + if (do_add) { + do_cmd(IP_FW_TABLE_DEL, &ent, sizeof(ent)); + if (do_cmd(IP_FW_TABLE_ADD, + &ent, sizeof(ent)) < 0) + err(EX_OSERR, + "setsockopt(IP_FW_TABLE_ADD)"); + } + } + } else if (_substrcmp(*av, "flush") == 0) { + a = is_all ? tables_max : (ent.tbl + 1); + do { + if (do_cmd(IP_FW_TABLE_FLUSH, &ent.tbl, + sizeof(ent.tbl)) < 0) + err(EX_OSERR, "setsockopt(IP_FW_TABLE_FLUSH)"); + } while (++ent.tbl < a); + } else if (_substrcmp(*av, "list") == 0) { + a = is_all ? tables_max : (ent.tbl + 1); + do { + table_list(ent, is_all); + } while (++ent.tbl < a); + } else + errx(EX_USAGE, "invalid table command %s", *av); +} + +static void +table_list(ipfw_table_entry ent, int need_header) +{ + ipfw_table *tbl; + socklen_t l; + uint32_t a; + + a = ent.tbl; + l = sizeof(a); + if (do_cmd(IP_FW_TABLE_GETSIZE, &a, (uintptr_t)&l) < 0) + err(EX_OSERR, "getsockopt(IP_FW_TABLE_GETSIZE)"); + + /* If a is zero we have nothing to do, the table is empty. */ + if (a == 0) + return; + + l = sizeof(*tbl) + a * sizeof(ipfw_table_entry); + tbl = safe_calloc(1, l); + tbl->tbl = ent.tbl; + if (do_cmd(IP_FW_TABLE_LIST, tbl, (uintptr_t)&l) < 0) + err(EX_OSERR, "getsockopt(IP_FW_TABLE_LIST)"); + if (tbl->cnt && need_header) + printf("---table(%d)---\n", tbl->tbl); + for (a = 0; a < tbl->cnt; a++) { + unsigned int tval; + tval = tbl->ent[a].value; + if (co.do_value_as_ip) { + char tbuf[128]; + strncpy(tbuf, inet_ntoa(*(struct in_addr *) + &tbl->ent[a].addr), 127); + /* inet_ntoa expects network order */ + tval = htonl(tval); + printf("%s/%u %s\n", tbuf, tbl->ent[a].masklen, + inet_ntoa(*(struct in_addr *)&tval)); + } else { + printf("%s/%u %u\n", + inet_ntoa(*(struct in_addr *)&tbl->ent[a].addr), + tbl->ent[a].masklen, tval); + } + } + free(tbl); +} diff --git a/ipfw/ipfw2.h b/ipfw/ipfw2.h new file mode 100644 index 0000000..1de0f3c --- /dev/null +++ b/ipfw/ipfw2.h @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/ipfw2.h 187983 2009-02-01 16:00:49Z luigi $ + */ + +/* + * Options that can be set on the command line. + * When reading commands from a file, a subset of the options can also + * be applied globally by specifying them before the file name. + * After that, each line can contain its own option that changes + * the global value. + * XXX The context is not restored after each line. + */ + +struct cmdline_opts { + /* boolean options: */ + int do_value_as_ip; /* show table value as IP */ + int do_resolv; /* try to resolve all ip to names */ + int do_time; /* Show time stamps */ + int do_quiet; /* Be quiet in add and flush */ + int do_pipe; /* this cmd refers to a pipe */ + int do_nat; /* this cmd refers to a nat config */ + int do_dynamic; /* display dynamic rules */ + int do_expired; /* display expired dynamic rules */ + int do_compact; /* show rules in compact mode */ + int do_force; /* do not ask for confirmation */ + int show_sets; /* display the set each rule belongs to */ + int test_only; /* only check syntax */ + int comment_only; /* only print action and comment */ + int verbose; /* be verbose on some commands */ + + /* The options below can have multiple values. */ + + int do_sort; /* field to sort results (0 = no) */ + /* valid fields are 1 and above */ + + int use_set; /* work with specified set number */ + /* 0 means all sets, otherwise apply to set use_set - 1 */ + +}; + +extern struct cmdline_opts co; + +/* + * _s_x is a structure that stores a string <-> token pairs, used in + * various places in the parser. Entries are stored in arrays, + * with an entry with s=NULL as terminator. + * The search routines are match_token() and match_value(). + * Often, an element with x=0 contains an error string. + * + */ +struct _s_x { + char const *s; + int x; +}; + +enum tokens { + TOK_NULL=0, + + TOK_OR, + TOK_NOT, + TOK_STARTBRACE, + TOK_ENDBRACE, + + TOK_ACCEPT, + TOK_COUNT, + TOK_PIPE, + TOK_QUEUE, + TOK_DIVERT, + TOK_TEE, + TOK_NETGRAPH, + TOK_NGTEE, + TOK_FORWARD, + TOK_SKIPTO, + TOK_DENY, + TOK_REJECT, + TOK_RESET, + TOK_UNREACH, + TOK_CHECKSTATE, + TOK_NAT, + TOK_REASS, + + TOK_ALTQ, + TOK_LOG, + TOK_TAG, + TOK_UNTAG, + + TOK_TAGGED, + TOK_UID, + TOK_GID, + TOK_JAIL, + TOK_IN, + TOK_LIMIT, + TOK_KEEPSTATE, + TOK_LAYER2, + TOK_OUT, + TOK_DIVERTED, + TOK_DIVERTEDLOOPBACK, + TOK_DIVERTEDOUTPUT, + TOK_XMIT, + TOK_RECV, + TOK_VIA, + TOK_FRAG, + TOK_IPOPTS, + TOK_IPLEN, + TOK_IPID, + TOK_IPPRECEDENCE, + TOK_IPTOS, + TOK_IPTTL, + TOK_IPVER, + TOK_ESTAB, + TOK_SETUP, + TOK_TCPDATALEN, + TOK_TCPFLAGS, + TOK_TCPOPTS, + TOK_TCPSEQ, + TOK_TCPACK, + TOK_TCPWIN, + TOK_ICMPTYPES, + TOK_MAC, + TOK_MACTYPE, + TOK_VERREVPATH, + TOK_VERSRCREACH, + TOK_ANTISPOOF, + TOK_IPSEC, + TOK_COMMENT, + + TOK_PLR, + TOK_NOERROR, + TOK_BUCKETS, + TOK_DSTIP, + TOK_SRCIP, + TOK_DSTPORT, + TOK_SRCPORT, + TOK_ALL, + TOK_MASK, + TOK_BW, + TOK_DELAY, + TOK_PIPE_PROFILE, + TOK_RED, + TOK_GRED, + TOK_DROPTAIL, + TOK_PROTO, + TOK_WEIGHT, + TOK_IP, + TOK_IF, + TOK_ALOG, + TOK_DENY_INC, + TOK_SAME_PORTS, + TOK_UNREG_ONLY, + TOK_RESET_ADDR, + TOK_ALIAS_REV, + TOK_PROXY_ONLY, + TOK_REDIR_ADDR, + TOK_REDIR_PORT, + TOK_REDIR_PROTO, + + TOK_IPV6, + TOK_FLOWID, + TOK_ICMP6TYPES, + TOK_EXT6HDR, + TOK_DSTIP6, + TOK_SRCIP6, + + TOK_IPV4, + TOK_UNREACH6, + TOK_RESET6, + + TOK_FIB, + TOK_SETFIB, +}; +/* + * the following macro returns an error message if we run out of + * arguments. + */ +#define NEED1(msg) {if (!ac) errx(EX_USAGE, msg);} + +unsigned long long align_uint64(const uint64_t *pll); + +/* memory allocation support */ +void *safe_calloc(size_t number, size_t size); +void *safe_realloc(void *ptr, size_t size); + +/* string comparison functions used for historical compatibility */ +int _substrcmp(const char *str1, const char* str2); +int _substrcmp2(const char *str1, const char* str2, const char* str3); + +/* utility functions */ +int match_token(struct _s_x *table, char *string); +char const *match_value(struct _s_x *p, int value); + +int do_cmd(int optname, void *optval, uintptr_t optlen); + +struct in6_addr; +void n2mask(struct in6_addr *mask, int n); +int contigmask(uint8_t *p, int len); + +/* + * Forward declarations to avoid include way too many headers. + * C does not allow duplicated typedefs, so we use the base struct + * that the typedef points to. + * Should the typedefs use a different type, the compiler will + * still detect the change when compiling the body of the + * functions involved, so we do not lose error checking. + */ +struct _ipfw_insn; +struct _ipfw_insn_altq; +struct _ipfw_insn_u32; +struct _ipfw_insn_ip6; +struct _ipfw_insn_icmp6; + +/* + * The reserved set numer. This is a constant in ip_fw.h + * but we store it in a variable so other files do not depend + * in that header just for one constant. + */ +extern int resvd_set_number; + +/* first-level command handlers */ +void ipfw_add(int ac, char *av[]); +void ipfw_show_nat(int ac, char **av); +void ipfw_config_pipe(int ac, char **av); +void ipfw_config_nat(int ac, char **av); +void ipfw_sets_handler(int ac, char *av[]); +void ipfw_table_handler(int ac, char *av[]); +void ipfw_sysctl_handler(int ac, char *av[], int which); +void ipfw_delete(int ac, char *av[]); +void ipfw_flush(int force); +void ipfw_zero(int ac, char *av[], int optname); +void ipfw_list(int ac, char *av[], int show_counters); + +/* altq.c */ +void altq_set_enabled(int enabled); +u_int32_t altq_name_to_qid(const char *name); + +void print_altq_cmd(struct _ipfw_insn_altq *altqptr); + +/* dummynet.c */ +void ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]); +int ipfw_delete_pipe(int pipe_or_queue, int n); + +/* ipv6.c */ +void print_unreach6_code(uint16_t code); +void print_ip6(struct _ipfw_insn_ip6 *cmd, char const *s); +void print_flow6id(struct _ipfw_insn_u32 *cmd); +void print_icmp6types(struct _ipfw_insn_u32 *cmd); +void print_ext6hdr(struct _ipfw_insn *cmd ); + +struct _ipfw_insn *add_srcip6(struct _ipfw_insn *cmd, char *av); +struct _ipfw_insn *add_dstip6(struct _ipfw_insn *cmd, char *av); + +void fill_flow6(struct _ipfw_insn_u32 *cmd, char *av ); +void fill_unreach6_code(u_short *codep, char *str); +void fill_icmp6types(struct _ipfw_insn_icmp6 *cmd, char *av); +int fill_ext6hdr(struct _ipfw_insn *cmd, char *av); diff --git a/ipfw/ipv6.c b/ipfw/ipv6.c new file mode 100644 index 0000000..e1c0dee --- /dev/null +++ b/ipfw/ipv6.c @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/ipv6.c 187770 2009-01-27 12:01:30Z luigi $ + * + * ipv6 support + */ + +#include +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +static struct _s_x icmp6codes[] = { + { "no-route", ICMP6_DST_UNREACH_NOROUTE }, + { "admin-prohib", ICMP6_DST_UNREACH_ADMIN }, + { "address", ICMP6_DST_UNREACH_ADDR }, + { "port", ICMP6_DST_UNREACH_NOPORT }, + { NULL, 0 } +}; + +void +fill_unreach6_code(u_short *codep, char *str) +{ + int val; + char *s; + + val = strtoul(str, &s, 0); + if (s == str || *s != '\0' || val >= 0x100) + val = match_token(icmp6codes, str); + if (val < 0) + errx(EX_DATAERR, "unknown ICMPv6 unreachable code ``%s''", str); + *codep = val; + return; +} + +void +print_unreach6_code(uint16_t code) +{ + char const *s = match_value(icmp6codes, code); + + if (s != NULL) + printf("unreach6 %s", s); + else + printf("unreach6 %u", code); +} + +/* + * Print the ip address contained in a command. + */ +void +print_ip6(ipfw_insn_ip6 *cmd, char const *s) +{ + struct hostent *he = NULL; + int len = F_LEN((ipfw_insn *) cmd) - 1; + struct in6_addr *a = &(cmd->addr6); + char trad[255]; + + printf("%s%s ", cmd->o.len & F_NOT ? " not": "", s); + + if (cmd->o.opcode == O_IP6_SRC_ME || cmd->o.opcode == O_IP6_DST_ME) { + printf("me6"); + return; + } + if (cmd->o.opcode == O_IP6) { + printf(" ip6"); + return; + } + + /* + * len == 4 indicates a single IP, whereas lists of 1 or more + * addr/mask pairs have len = (2n+1). We convert len to n so we + * use that to count the number of entries. + */ + + for (len = len / 4; len > 0; len -= 2, a += 2) { + int mb = /* mask length */ + (cmd->o.opcode == O_IP6_SRC || cmd->o.opcode == O_IP6_DST) ? + 128 : contigmask((uint8_t *)&(a[1]), 128); + + if (mb == 128 && co.do_resolv) + he = gethostbyaddr((char *)a, sizeof(*a), AF_INET6); + if (he != NULL) /* resolved to name */ + printf("%s", he->h_name); + else if (mb == 0) /* any */ + printf("any"); + else { /* numeric IP followed by some kind of mask */ + if (inet_ntop(AF_INET6, a, trad, sizeof( trad ) ) == NULL) + printf("Error ntop in print_ip6\n"); + printf("%s", trad ); + if (mb < 0) /* XXX not really legal... */ + printf(":%s", + inet_ntop(AF_INET6, &a[1], trad, sizeof(trad))); + else if (mb < 128) + printf("/%d", mb); + } + if (len > 2) + printf(","); + } +} + +void +fill_icmp6types(ipfw_insn_icmp6 *cmd, char *av) +{ + uint8_t type; + + bzero(cmd, sizeof(*cmd)); + while (*av) { + if (*av == ',') + av++; + type = strtoul(av, &av, 0); + if (*av != ',' && *av != '\0') + errx(EX_DATAERR, "invalid ICMP6 type"); + /* + * XXX: shouldn't this be 0xFF? I can't see any reason why + * we shouldn't be able to filter all possiable values + * regardless of the ability of the rest of the kernel to do + * anything useful with them. + */ + if (type > ICMP6_MAXTYPE) + errx(EX_DATAERR, "ICMP6 type out of range"); + cmd->d[type / 32] |= ( 1 << (type % 32)); + } + cmd->o.opcode = O_ICMP6TYPE; + cmd->o.len |= F_INSN_SIZE(ipfw_insn_icmp6); +} + + +void +print_icmp6types(ipfw_insn_u32 *cmd) +{ + int i, j; + char sep= ' '; + + printf(" ip6 icmp6types"); + for (i = 0; i < 7; i++) + for (j=0; j < 32; ++j) { + if ( (cmd->d[i] & (1 << (j))) == 0) + continue; + printf("%c%d", sep, (i*32 + j)); + sep = ','; + } +} + +void +print_flow6id( ipfw_insn_u32 *cmd) +{ + uint16_t i, limit = cmd->o.arg1; + char sep = ','; + + printf(" flow-id "); + for( i=0; i < limit; ++i) { + if (i == limit - 1) + sep = ' '; + printf("%d%c", cmd->d[i], sep); + } +} + +/* structure and define for the extension header in ipv6 */ +static struct _s_x ext6hdrcodes[] = { + { "frag", EXT_FRAGMENT }, + { "hopopt", EXT_HOPOPTS }, + { "route", EXT_ROUTING }, + { "dstopt", EXT_DSTOPTS }, + { "ah", EXT_AH }, + { "esp", EXT_ESP }, + { "rthdr0", EXT_RTHDR0 }, + { "rthdr2", EXT_RTHDR2 }, + { NULL, 0 } +}; + +/* fills command for the extension header filtering */ +int +fill_ext6hdr( ipfw_insn *cmd, char *av) +{ + int tok; + char *s = av; + + cmd->arg1 = 0; + + while(s) { + av = strsep( &s, ",") ; + tok = match_token(ext6hdrcodes, av); + switch (tok) { + case EXT_FRAGMENT: + cmd->arg1 |= EXT_FRAGMENT; + break; + + case EXT_HOPOPTS: + cmd->arg1 |= EXT_HOPOPTS; + break; + + case EXT_ROUTING: + cmd->arg1 |= EXT_ROUTING; + break; + + case EXT_DSTOPTS: + cmd->arg1 |= EXT_DSTOPTS; + break; + + case EXT_AH: + cmd->arg1 |= EXT_AH; + break; + + case EXT_ESP: + cmd->arg1 |= EXT_ESP; + break; + + case EXT_RTHDR0: + cmd->arg1 |= EXT_RTHDR0; + break; + + case EXT_RTHDR2: + cmd->arg1 |= EXT_RTHDR2; + break; + + default: + errx( EX_DATAERR, "invalid option for ipv6 exten header" ); + break; + } + } + if (cmd->arg1 == 0 ) + return 0; + cmd->opcode = O_EXT_HDR; + cmd->len |= F_INSN_SIZE( ipfw_insn ); + return 1; +} + +void +print_ext6hdr( ipfw_insn *cmd ) +{ + char sep = ' '; + + printf(" extension header:"); + if (cmd->arg1 & EXT_FRAGMENT ) { + printf("%cfragmentation", sep); + sep = ','; + } + if (cmd->arg1 & EXT_HOPOPTS ) { + printf("%chop options", sep); + sep = ','; + } + if (cmd->arg1 & EXT_ROUTING ) { + printf("%crouting options", sep); + sep = ','; + } + if (cmd->arg1 & EXT_RTHDR0 ) { + printf("%crthdr0", sep); + sep = ','; + } + if (cmd->arg1 & EXT_RTHDR2 ) { + printf("%crthdr2", sep); + sep = ','; + } + if (cmd->arg1 & EXT_DSTOPTS ) { + printf("%cdestination options", sep); + sep = ','; + } + if (cmd->arg1 & EXT_AH ) { + printf("%cauthentication header", sep); + sep = ','; + } + if (cmd->arg1 & EXT_ESP ) { + printf("%cencapsulated security payload", sep); + } +} + +/* Try to find ipv6 address by hostname */ +static int +lookup_host6 (char *host, struct in6_addr *ip6addr) +{ + struct hostent *he; + + if (!inet_pton(AF_INET6, host, ip6addr)) { + if ((he = gethostbyname2(host, AF_INET6)) == NULL) + return(-1); + memcpy(ip6addr, he->h_addr_list[0], sizeof( struct in6_addr)); + } + return(0); +} + + +/* + * fill the addr and mask fields in the instruction as appropriate from av. + * Update length as appropriate. + * The following formats are allowed: + * any matches any IP6. Actually returns an empty instruction. + * me returns O_IP6_*_ME + * + * 03f1::234:123:0342 single IP6 addres + * 03f1::234:123:0342/24 address/mask + * 03f1::234:123:0342/24,03f1::234:123:0343/ List of address + * + * Set of address (as in ipv6) not supported because ipv6 address + * are typically random past the initial prefix. + * Return 1 on success, 0 on failure. + */ +static int +fill_ip6(ipfw_insn_ip6 *cmd, char *av) +{ + int len = 0; + struct in6_addr *d = &(cmd->addr6); + /* + * Needed for multiple address. + * Note d[1] points to struct in6_add r mask6 of cmd + */ + + cmd->o.len &= ~F_LEN_MASK; /* zero len */ + + if (strcmp(av, "any") == 0) + return (1); + + + if (strcmp(av, "me") == 0) { /* Set the data for "me" opt*/ + cmd->o.len |= F_INSN_SIZE(ipfw_insn); + return (1); + } + + if (strcmp(av, "me6") == 0) { /* Set the data for "me" opt*/ + cmd->o.len |= F_INSN_SIZE(ipfw_insn); + return (1); + } + + av = strdup(av); + while (av) { + /* + * After the address we can have '/' indicating a mask, + * or ',' indicating another address follows. + */ + + char *p; + int masklen; + char md = '\0'; + + if ((p = strpbrk(av, "/,")) ) { + md = *p; /* save the separator */ + *p = '\0'; /* terminate address string */ + p++; /* and skip past it */ + } + /* now p points to NULL, mask or next entry */ + + /* lookup stores address in *d as a side effect */ + if (lookup_host6(av, d) != 0) { + /* XXX: failed. Free memory and go */ + errx(EX_DATAERR, "bad address \"%s\"", av); + } + /* next, look at the mask, if any */ + masklen = (md == '/') ? atoi(p) : 128; + if (masklen > 128 || masklen < 0) + errx(EX_DATAERR, "bad width \"%s\''", p); + else + n2mask(&d[1], masklen); + + APPLY_MASK(d, &d[1]) /* mask base address with mask */ + + /* find next separator */ + + if (md == '/') { /* find separator past the mask */ + p = strpbrk(p, ","); + if (p != NULL) + p++; + } + av = p; + + /* Check this entry */ + if (masklen == 0) { + /* + * 'any' turns the entire list into a NOP. + * 'not any' never matches, so it is removed from the + * list unless it is the only item, in which case we + * report an error. + */ + if (cmd->o.len & F_NOT && av == NULL && len == 0) + errx(EX_DATAERR, "not any never matches"); + continue; + } + + /* + * A single IP can be stored alone + */ + if (masklen == 128 && av == NULL && len == 0) { + len = F_INSN_SIZE(struct in6_addr); + break; + } + + /* Update length and pointer to arguments */ + len += F_INSN_SIZE(struct in6_addr)*2; + d += 2; + } /* end while */ + + /* + * Total length of the command, remember that 1 is the size of + * the base command. + */ + if (len + 1 > F_LEN_MASK) + errx(EX_DATAERR, "address list too long"); + cmd->o.len |= len+1; + free(av); + return (1); +} + +/* + * fills command for ipv6 flow-id filtering + * note that the 20 bit flow number is stored in a array of u_int32_t + * it's supported lists of flow-id, so in the o.arg1 we store how many + * additional flow-id we want to filter, the basic is 1 + */ +void +fill_flow6( ipfw_insn_u32 *cmd, char *av ) +{ + u_int32_t type; /* Current flow number */ + u_int16_t nflow = 0; /* Current flow index */ + char *s = av; + cmd->d[0] = 0; /* Initializing the base number*/ + + while (s) { + av = strsep( &s, ",") ; + type = strtoul(av, &av, 0); + if (*av != ',' && *av != '\0') + errx(EX_DATAERR, "invalid ipv6 flow number %s", av); + if (type > 0xfffff) + errx(EX_DATAERR, "flow number out of range %s", av); + cmd->d[nflow] |= type; + nflow++; + } + if( nflow > 0 ) { + cmd->o.opcode = O_FLOW6ID; + cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32) + nflow; + cmd->o.arg1 = nflow; + } + else { + errx(EX_DATAERR, "invalid ipv6 flow number %s", av); + } +} + +ipfw_insn * +add_srcip6(ipfw_insn *cmd, char *av) +{ + + fill_ip6((ipfw_insn_ip6 *)cmd, av); + if (F_LEN(cmd) == 0) { /* any */ + } else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) { /* "me" */ + cmd->opcode = O_IP6_SRC_ME; + } else if (F_LEN(cmd) == + (F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn))) { + /* single IP, no mask*/ + cmd->opcode = O_IP6_SRC; + } else { /* addr/mask opt */ + cmd->opcode = O_IP6_SRC_MASK; + } + return cmd; +} + +ipfw_insn * +add_dstip6(ipfw_insn *cmd, char *av) +{ + + fill_ip6((ipfw_insn_ip6 *)cmd, av); + if (F_LEN(cmd) == 0) { /* any */ + } else if (F_LEN(cmd) == F_INSN_SIZE(ipfw_insn)) { /* "me" */ + cmd->opcode = O_IP6_DST_ME; + } else if (F_LEN(cmd) == + (F_INSN_SIZE(struct in6_addr) + F_INSN_SIZE(ipfw_insn))) { + /* single IP, no mask*/ + cmd->opcode = O_IP6_DST; + } else { /* addr/mask opt */ + cmd->opcode = O_IP6_DST_MASK; + } + return cmd; +} diff --git a/ipfw/main.c b/ipfw/main.c new file mode 100644 index 0000000..860539b --- /dev/null +++ b/ipfw/main.c @@ -0,0 +1,539 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * Command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/main.c 187767 2009-01-27 10:18:55Z luigi $ + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ipfw2.h" + +static void +help(void) +{ + fprintf(stderr, +"ipfw syntax summary (but please do read the ipfw(8) manpage):\n\n" +"\tipfw [-abcdefhnNqStTv] \n\n" +"where is one of the following:\n\n" +"add [num] [set N] [prob x] RULE-BODY\n" +"{pipe|queue} N config PIPE-BODY\n" +"[pipe|queue] {zero|delete|show} [N{,N}]\n" +"nat N config {ip IPADDR|if IFNAME|log|deny_in|same_ports|unreg_only|reset|\n" +" reverse|proxy_only|redirect_addr linkspec|\n" +" redirect_port linkspec|redirect_proto linkspec}\n" +"set [disable N... enable N...] | move [rule] X to Y | swap X Y | show\n" +"set N {show|list|zero|resetlog|delete} [N{,N}] | flush\n" +"table N {add ip[/bits] [value] | delete ip[/bits] | flush | list}\n" +"table all {flush | list}\n" +"\n" +"RULE-BODY: check-state [PARAMS] | ACTION [PARAMS] ADDR [OPTION_LIST]\n" +"ACTION: check-state | allow | count | deny | unreach{,6} CODE |\n" +" skipto N | {divert|tee} PORT | forward ADDR |\n" +" pipe N | queue N | nat N | setfib FIB | reass\n" +"PARAMS: [log [logamount LOGLIMIT]] [altq QUEUE_NAME]\n" +"ADDR: [ MAC dst src ether_type ] \n" +" [ ip from IPADDR [ PORT ] to IPADDR [ PORTLIST ] ]\n" +" [ ipv6|ip6 from IP6ADDR [ PORT ] to IP6ADDR [ PORTLIST ] ]\n" +"IPADDR: [not] { any | me | ip/bits{x,y,z} | table(t[,v]) | IPLIST }\n" +"IP6ADDR: [not] { any | me | me6 | ip6/bits | IP6LIST }\n" +"IP6LIST: { ip6 | ip6/bits }[,IP6LIST]\n" +"IPLIST: { ip | ip/bits | ip:mask }[,IPLIST]\n" +"OPTION_LIST: OPTION [OPTION_LIST]\n" +"OPTION: bridged | diverted | diverted-loopback | diverted-output |\n" +" {dst-ip|src-ip} IPADDR | {dst-ip6|src-ip6|dst-ipv6|src-ipv6} IP6ADDR |\n" +" {dst-port|src-port} LIST |\n" +" estab | frag | {gid|uid} N | icmptypes LIST | in | out | ipid LIST |\n" +" iplen LIST | ipoptions SPEC | ipprecedence | ipsec | iptos SPEC |\n" +" ipttl LIST | ipversion VER | keep-state | layer2 | limit ... |\n" +" icmp6types LIST | ext6hdr LIST | flow-id N[,N] | fib FIB |\n" +" mac ... | mac-type LIST | proto LIST | {recv|xmit|via} {IF|IPADDR} |\n" +" setup | {tcpack|tcpseq|tcpwin} NN | tcpflags SPEC | tcpoptions SPEC |\n" +" tcpdatalen LIST | verrevpath | versrcreach | antispoof\n" +); + + exit(0); +} + +/* + * Free a the (locally allocated) copy of command line arguments. + */ +static void +free_args(int ac, char **av) +{ + int i; + + for (i=0; i < ac; i++) + free(av[i]); + free(av); +} + +/* + * Called with the arguments, including program name because getopt + * wants it to be present. + * Returns 0 if successful, 1 if empty command, errx() in case of errors. + */ +static int +ipfw_main(int oldac, char **oldav) +{ + int ch, ac, save_ac; + const char *errstr; + char **av, **save_av; + int do_acct = 0; /* Show packet/byte count */ + int try_next = 0; /* set if pipe cmd not found */ + +#define WHITESP " \t\f\v\n\r" + if (oldac < 2) + return 1; /* need at least one argument */ + + if (oldac == 2) { + /* + * If we are called with a single string, try to split it into + * arguments for subsequent parsing. + * But first, remove spaces after a ',', by copying the string + * in-place. + */ + char *arg = oldav[1]; /* The string is the first arg. */ + int l = strlen(arg); + int copy = 0; /* 1 if we need to copy, 0 otherwise */ + int i, j; + + for (i = j = 0; i < l; i++) { + if (arg[i] == '#') /* comment marker */ + break; + if (copy) { + arg[j++] = arg[i]; + copy = !index("," WHITESP, arg[i]); + } else { + copy = !index(WHITESP, arg[i]); + if (copy) + arg[j++] = arg[i]; + } + } + if (!copy && j > 0) /* last char was a 'blank', remove it */ + j--; + l = j; /* the new argument length */ + arg[j++] = '\0'; + if (l == 0) /* empty string! */ + return 1; + + /* + * First, count number of arguments. Because of the previous + * processing, this is just the number of blanks plus 1. + */ + for (i = 0, ac = 1; i < l; i++) + if (index(WHITESP, arg[i]) != NULL) + ac++; + + /* + * Allocate the argument list, including one entry for + * the program name because getopt expects it. + */ + av = safe_calloc(ac + 1, sizeof(char *)); + + /* + * Second, copy arguments from arg[] to av[]. For each one, + * j is the initial character, i is the one past the end. + */ + for (ac = 1, i = j = 0; i < l; i++) + if (index(WHITESP, arg[i]) != NULL || i == l-1) { + if (i == l-1) + i++; + av[ac] = safe_calloc(i-j+1, 1); + bcopy(arg+j, av[ac], i-j); + ac++; + j = i + 1; + } + } else { + /* + * If an argument ends with ',' join with the next one. + */ + int first, i, l; + + av = safe_calloc(oldac, sizeof(char *)); + for (first = i = ac = 1, l = 0; i < oldac; i++) { + char *arg = oldav[i]; + int k = strlen(arg); + + l += k; + if (arg[k-1] != ',' || i == oldac-1) { + /* Time to copy. */ + av[ac] = safe_calloc(l+1, 1); + for (l=0; first <= i; first++) { + strcat(av[ac]+l, oldav[first]); + l += strlen(oldav[first]); + } + ac++; + l = 0; + first = i+1; + } + } + } + + av[0] = strdup(oldav[0]); /* copy progname from the caller */ + /* Set the force flag for non-interactive processes */ + if (!co.do_force) + co.do_force = !isatty(STDIN_FILENO); + + /* Save arguments for final freeing of memory. */ + save_ac = ac; + save_av = av; + + optind = optreset = 1; /* restart getopt() */ + while ((ch = getopt(ac, av, "abcdefhinNqs:STtv")) != -1) + switch (ch) { + case 'a': + do_acct = 1; + break; + + case 'b': + co.comment_only = 1; + co.do_compact = 1; + break; + + case 'c': + co.do_compact = 1; + break; + + case 'd': + co.do_dynamic = 1; + break; + + case 'e': + co.do_expired = 1; + break; + + case 'f': + co.do_force = 1; + break; + + case 'h': /* help */ + free_args(save_ac, save_av); + help(); + break; /* NOTREACHED */ + + case 'i': + co.do_value_as_ip = 1; + break; + + case 'n': + co.test_only = 1; + break; + + case 'N': + co.do_resolv = 1; + break; + + case 'q': + co.do_quiet = 1; + break; + + case 's': /* sort */ + co.do_sort = atoi(optarg); + break; + + case 'S': + co.show_sets = 1; + break; + + case 't': + co.do_time = 1; + break; + + case 'T': + co.do_time = 2; /* numeric timestamp */ + break; + + case 'v': /* verbose */ + co.verbose = 1; + break; + + default: + free_args(save_ac, save_av); + return 1; + } + + ac -= optind; + av += optind; + NEED1("bad arguments, for usage summary ``ipfw''"); + + /* + * An undocumented behaviour of ipfw1 was to allow rule numbers first, + * e.g. "100 add allow ..." instead of "add 100 allow ...". + * In case, swap first and second argument to get the normal form. + */ + if (ac > 1 && isdigit(*av[0])) { + char *p = av[0]; + + av[0] = av[1]; + av[1] = p; + } + + /* + * Optional: pipe, queue or nat. + */ + co.do_nat = 0; + co.do_pipe = 0; + if (!strncmp(*av, "nat", strlen(*av))) + co.do_nat = 1; + else if (!strncmp(*av, "pipe", strlen(*av))) + co.do_pipe = 1; + else if (_substrcmp(*av, "queue") == 0) + co.do_pipe = 2; + else if (!strncmp(*av, "set", strlen(*av))) { + if (ac > 1 && isdigit(av[1][0])) { + co.use_set = strtonum(av[1], 0, resvd_set_number, + &errstr); + if (errstr) + errx(EX_DATAERR, + "invalid set number %s\n", av[1]); + ac -= 2; av += 2; co.use_set++; + } + } + + if (co.do_pipe || co.do_nat) { + ac--; + av++; + } + NEED1("missing command"); + + /* + * For pipes, queues and nats we normally say 'nat|pipe NN config' + * but the code is easier to parse as 'nat|pipe config NN' + * so we swap the two arguments. + */ + if ((co.do_pipe || co.do_nat) && ac > 1 && isdigit(*av[0])) { + char *p = av[0]; + + av[0] = av[1]; + av[1] = p; + } + + if (co.use_set == 0) { + if (_substrcmp(*av, "add") == 0) + ipfw_add(ac, av); + else if (co.do_nat && _substrcmp(*av, "show") == 0) + ipfw_show_nat(ac, av); + else if (co.do_pipe && _substrcmp(*av, "config") == 0) + ipfw_config_pipe(ac, av); + else if (co.do_nat && _substrcmp(*av, "config") == 0) + ipfw_config_nat(ac, av); + else if (_substrcmp(*av, "set") == 0) + ipfw_sets_handler(ac, av); + else if (_substrcmp(*av, "table") == 0) + ipfw_table_handler(ac, av); + else if (_substrcmp(*av, "enable") == 0) + ipfw_sysctl_handler(ac, av, 1); + else if (_substrcmp(*av, "disable") == 0) + ipfw_sysctl_handler(ac, av, 0); + else + try_next = 1; + } + + if (co.use_set || try_next) { + if (_substrcmp(*av, "delete") == 0) + ipfw_delete(ac, av); + else if (_substrcmp(*av, "flush") == 0) + ipfw_flush(co.do_force); + else if (_substrcmp(*av, "zero") == 0) + ipfw_zero(ac, av, 0 /* IP_FW_ZERO */); + else if (_substrcmp(*av, "resetlog") == 0) + ipfw_zero(ac, av, 1 /* IP_FW_RESETLOG */); + else if (_substrcmp(*av, "print") == 0 || + _substrcmp(*av, "list") == 0) + ipfw_list(ac, av, do_acct); + else if (_substrcmp(*av, "show") == 0) + ipfw_list(ac, av, 1 /* show counters */); + else + errx(EX_USAGE, "bad command `%s'", *av); + } + + /* Free memory allocated in the argument parsing. */ + free_args(save_ac, save_av); + return 0; +} + + +static void +ipfw_readfile(int ac, char *av[]) +{ +#define MAX_ARGS 32 + char buf[BUFSIZ]; + char *progname = av[0]; /* original program name */ + const char *cmd = NULL; /* preprocessor name, if any */ + const char *filename = av[ac-1]; /* file to read */ + int c, lineno=0; + FILE *f = NULL; + pid_t preproc = 0; + + while ((c = getopt(ac, av, "cfNnp:qS")) != -1) { + switch(c) { + case 'c': + co.do_compact = 1; + break; + + case 'f': + co.do_force = 1; + break; + + case 'N': + co.do_resolv = 1; + break; + + case 'n': + co.test_only = 1; + break; + + case 'p': + /* + * ipfw -p cmd [args] filename + * + * We are done with getopt(). All arguments + * except the filename go to the preprocessor, + * so we need to do the following: + * - check that a filename is actually present; + * - advance av by optind-1 to skip arguments + * already processed; + * - decrease ac by optind, to remove the args + * already processed and the final filename; + * - set the last entry in av[] to NULL so + * popen() can detect the end of the array; + * - set optind=ac to let getopt() terminate. + */ + if (optind == ac) + errx(EX_USAGE, "no filename argument"); + cmd = optarg; + av[ac-1] = NULL; + av += optind - 1; + ac -= optind; + optind = ac; + break; + + case 'q': + co.do_quiet = 1; + break; + + case 'S': + co.show_sets = 1; + break; + + default: + errx(EX_USAGE, "bad arguments, for usage" + " summary ``ipfw''"); + } + + } + + if (cmd == NULL && ac != optind + 1) + errx(EX_USAGE, "extraneous filename arguments %s", av[ac-1]); + + if ((f = fopen(filename, "r")) == NULL) + err(EX_UNAVAILABLE, "fopen: %s", filename); + + if (cmd != NULL) { /* pipe through preprocessor */ + int pipedes[2]; + + if (pipe(pipedes) == -1) + err(EX_OSERR, "cannot create pipe"); + + preproc = fork(); + if (preproc == -1) + err(EX_OSERR, "cannot fork"); + + if (preproc == 0) { + /* + * Child, will run the preprocessor with the + * file on stdin and the pipe on stdout. + */ + if (dup2(fileno(f), 0) == -1 + || dup2(pipedes[1], 1) == -1) + err(EX_OSERR, "dup2()"); + fclose(f); + close(pipedes[1]); + close(pipedes[0]); + execvp(cmd, av); + err(EX_OSERR, "execvp(%s) failed", cmd); + } else { /* parent, will reopen f as the pipe */ + fclose(f); + close(pipedes[1]); + if ((f = fdopen(pipedes[0], "r")) == NULL) { + int savederrno = errno; + + (void)kill(preproc, SIGTERM); + errno = savederrno; + err(EX_OSERR, "fdopen()"); + } + } + } + + while (fgets(buf, BUFSIZ, f)) { /* read commands */ + char linename[10]; + char *args[2]; + + lineno++; + sprintf(linename, "Line %d", lineno); + setprogname(linename); /* XXX */ + args[0] = progname; + args[1] = buf; + ipfw_main(2, args); + } + fclose(f); + if (cmd != NULL) { + int status; + + if (waitpid(preproc, &status, 0) == -1) + errx(EX_OSERR, "waitpid()"); + if (WIFEXITED(status) && WEXITSTATUS(status) != EX_OK) + errx(EX_UNAVAILABLE, + "preprocessor exited with status %d", + WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + errx(EX_UNAVAILABLE, + "preprocessor exited with signal %d", + WTERMSIG(status)); + } +} + +int +main(int ac, char *av[]) +{ + /* + * If the last argument is an absolute pathname, interpret it + * as a file to be preprocessed. + */ + + if (ac > 1 && av[ac - 1][0] == '/' && access(av[ac - 1], R_OK) == 0) + ipfw_readfile(ac, av); + else { + if (ipfw_main(ac, av)) { + errx(EX_USAGE, + "usage: ipfw [options]\n" + "do \"ipfw -h\" or \"man ipfw\" for details"); + } + } + return EX_OK; +} diff --git a/ipfw/nat.c b/ipfw/nat.c new file mode 100644 index 0000000..08efb59 --- /dev/null +++ b/ipfw/nat.c @@ -0,0 +1,940 @@ +/* + * Copyright (c) 2002-2003 Luigi Rizzo + * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp + * Copyright (c) 1994 Ugen J.S.Antsilevich + * + * Idea and grammar partially left from: + * Copyright (c) 1993 Daniel Boulet + * + * Redistribution and use in source forms, with and without modification, + * are permitted provided that this entire comment appears intact. + * + * Redistribution in binary form may occur without any restrictions. + * Obviously, it would be nice if you gave credit where credit is due + * but requiring it would be too onerous. + * + * This software is provided ``AS IS'' without any warranties of any kind. + * + * NEW command line interface for IP firewall facility + * + * $FreeBSD: head/sbin/ipfw/nat.c 187770 2009-01-27 12:01:30Z luigi $ + * + * In-kernel nat support + */ + +#include +#include +#include + +#include "ipfw2.h" + +#include +#include +#include +#include +#include +#include +#include + +#define IPFW_INTERNAL /* Access to protected structures in ip_fw.h. */ + +#include +#include +#include /* def. of struct route */ +#include +#include +#include +#include + +static struct _s_x nat_params[] = { + { "ip", TOK_IP }, + { "if", TOK_IF }, + { "log", TOK_ALOG }, + { "deny_in", TOK_DENY_INC }, + { "same_ports", TOK_SAME_PORTS }, + { "unreg_only", TOK_UNREG_ONLY }, + { "reset", TOK_RESET_ADDR }, + { "reverse", TOK_ALIAS_REV }, + { "proxy_only", TOK_PROXY_ONLY }, + { "redirect_addr", TOK_REDIR_ADDR }, + { "redirect_port", TOK_REDIR_PORT }, + { "redirect_proto", TOK_REDIR_PROTO }, + { NULL, 0 } /* terminator */ +}; + + +/* + * Search for interface with name "ifn", and fill n accordingly: + * + * n->ip ip address of interface "ifn" + * n->if_name copy of interface name "ifn" + */ +static void +set_addr_dynamic(const char *ifn, struct cfg_nat *n) +{ + size_t needed; + int mib[6]; + char *buf, *lim, *next; + struct if_msghdr *ifm; + struct ifa_msghdr *ifam; + struct sockaddr_dl *sdl; + struct sockaddr_in *sin; + int ifIndex, ifMTU; + + mib[0] = CTL_NET; + mib[1] = PF_ROUTE; + mib[2] = 0; + mib[3] = AF_INET; + mib[4] = NET_RT_IFLIST; + mib[5] = 0; +/* + * Get interface data. + */ + if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1) + err(1, "iflist-sysctl-estimate"); + buf = safe_calloc(1, needed); + if (sysctl(mib, 6, buf, &needed, NULL, 0) == -1) + err(1, "iflist-sysctl-get"); + lim = buf + needed; +/* + * Loop through interfaces until one with + * given name is found. This is done to + * find correct interface index for routing + * message processing. + */ + ifIndex = 0; + next = buf; + while (next < lim) { + ifm = (struct if_msghdr *)next; + next += ifm->ifm_msglen; + if (ifm->ifm_version != RTM_VERSION) { + if (co.verbose) + warnx("routing message version %d " + "not understood", ifm->ifm_version); + continue; + } + if (ifm->ifm_type == RTM_IFINFO) { + sdl = (struct sockaddr_dl *)(ifm + 1); + if (strlen(ifn) == sdl->sdl_nlen && + strncmp(ifn, sdl->sdl_data, sdl->sdl_nlen) == 0) { + ifIndex = ifm->ifm_index; + ifMTU = ifm->ifm_data.ifi_mtu; + break; + } + } + } + if (!ifIndex) + errx(1, "unknown interface name %s", ifn); +/* + * Get interface address. + */ + sin = NULL; + while (next < lim) { + ifam = (struct ifa_msghdr *)next; + next += ifam->ifam_msglen; + if (ifam->ifam_version != RTM_VERSION) { + if (co.verbose) + warnx("routing message version %d " + "not understood", ifam->ifam_version); + continue; + } + if (ifam->ifam_type != RTM_NEWADDR) + break; + if (ifam->ifam_addrs & RTA_IFA) { + int i; + char *cp = (char *)(ifam + 1); + + for (i = 1; i < RTA_IFA; i <<= 1) { + if (ifam->ifam_addrs & i) + cp += SA_SIZE((struct sockaddr *)cp); + } + if (((struct sockaddr *)cp)->sa_family == AF_INET) { + sin = (struct sockaddr_in *)cp; + break; + } + } + } + if (sin == NULL) + errx(1, "%s: cannot get interface address", ifn); + + n->ip = sin->sin_addr; + strncpy(n->if_name, ifn, IF_NAMESIZE); + + free(buf); +} + +/* + * XXX - The following functions, macros and definitions come from natd.c: + * it would be better to move them outside natd.c, in a file + * (redirect_support.[ch]?) shared by ipfw and natd, but for now i can live + * with it. + */ + +/* + * Definition of a port range, and macros to deal with values. + * FORMAT: HI 16-bits == first port in range, 0 == all ports. + * LO 16-bits == number of ports in range + * NOTES: - Port values are not stored in network byte order. + */ + +#define port_range u_long + +#define GETLOPORT(x) ((x) >> 0x10) +#define GETNUMPORTS(x) ((x) & 0x0000ffff) +#define GETHIPORT(x) (GETLOPORT((x)) + GETNUMPORTS((x))) + +/* Set y to be the low-port value in port_range variable x. */ +#define SETLOPORT(x,y) ((x) = ((x) & 0x0000ffff) | ((y) << 0x10)) + +/* Set y to be the number of ports in port_range variable x. */ +#define SETNUMPORTS(x,y) ((x) = ((x) & 0xffff0000) | (y)) + +static void +StrToAddr (const char* str, struct in_addr* addr) +{ + struct hostent* hp; + + if (inet_aton (str, addr)) + return; + + hp = gethostbyname (str); + if (!hp) + errx (1, "unknown host %s", str); + + memcpy (addr, hp->h_addr, sizeof (struct in_addr)); +} + +static int +StrToPortRange (const char* str, const char* proto, port_range *portRange) +{ + char* sep; + struct servent* sp; + char* end; + u_short loPort; + u_short hiPort; + + /* First see if this is a service, return corresponding port if so. */ + sp = getservbyname (str,proto); + if (sp) { + SETLOPORT(*portRange, ntohs(sp->s_port)); + SETNUMPORTS(*portRange, 1); + return 0; + } + + /* Not a service, see if it's a single port or port range. */ + sep = strchr (str, '-'); + if (sep == NULL) { + SETLOPORT(*portRange, strtol(str, &end, 10)); + if (end != str) { + /* Single port. */ + SETNUMPORTS(*portRange, 1); + return 0; + } + + /* Error in port range field. */ + errx (EX_DATAERR, "%s/%s: unknown service", str, proto); + } + + /* Port range, get the values and sanity check. */ + sscanf (str, "%hu-%hu", &loPort, &hiPort); + SETLOPORT(*portRange, loPort); + SETNUMPORTS(*portRange, 0); /* Error by default */ + if (loPort <= hiPort) + SETNUMPORTS(*portRange, hiPort - loPort + 1); + + if (GETNUMPORTS(*portRange) == 0) + errx (EX_DATAERR, "invalid port range %s", str); + + return 0; +} + +static int +StrToProto (const char* str) +{ + if (!strcmp (str, "tcp")) + return IPPROTO_TCP; + + if (!strcmp (str, "udp")) + return IPPROTO_UDP; + + if (!strcmp (str, "sctp")) + return IPPROTO_SCTP; + errx (EX_DATAERR, "unknown protocol %s. Expected sctp, tcp or udp", str); +} + +static int +StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto, + port_range *portRange) +{ + char* ptr; + + ptr = strchr (str, ':'); + if (!ptr) + errx (EX_DATAERR, "%s is missing port number", str); + + *ptr = '\0'; + ++ptr; + + StrToAddr (str, addr); + return StrToPortRange (ptr, proto, portRange); +} + +/* End of stuff taken from natd.c. */ + +#define INC_ARGCV() do { \ + (*_av)++; \ + (*_ac)--; \ + av = *_av; \ + ac = *_ac; \ +} while(0) + +/* + * The next 3 functions add support for the addr, port and proto redirect and + * their logic is loosely based on SetupAddressRedirect(), SetupPortRedirect() + * and SetupProtoRedirect() from natd.c. + * + * Every setup_* function fills at least one redirect entry + * (struct cfg_redir) and zero or more server pool entry (struct cfg_spool) + * in buf. + * + * The format of data in buf is: + * + * + * cfg_nat cfg_redir cfg_spool ...... cfg_spool + * + * ------------------------------------- ------------ + * | | .....X ... | | | | ..... + * ------------------------------------- ...... ------------ + * ^ + * spool_cnt n=0 ...... n=(X-1) + * + * len points to the amount of available space in buf + * space counts the memory consumed by every function + * + * XXX - Every function get all the argv params so it + * has to check, in optional parameters, that the next + * args is a valid option for the redir entry and not + * another token. Only redir_port and redir_proto are + * affected by this. + */ + +static int +setup_redir_addr(char *spool_buf, unsigned int len, + int *_ac, char ***_av) +{ + char **av, *sep; /* Token separator. */ + /* Temporary buffer used to hold server pool ip's. */ + char tmp_spool_buf[NAT_BUF_LEN]; + int ac, space, lsnat; + struct cfg_redir *r; + struct cfg_spool *tmp; + + av = *_av; + ac = *_ac; + space = 0; + lsnat = 0; + if (len >= SOF_REDIR) { + r = (struct cfg_redir *)spool_buf; + /* Skip cfg_redir at beginning of buf. */ + spool_buf = &spool_buf[SOF_REDIR]; + space = SOF_REDIR; + len -= SOF_REDIR; + } else + goto nospace; + r->mode = REDIR_ADDR; + /* Extract local address. */ + if (ac == 0) + errx(EX_DATAERR, "redirect_addr: missing local address"); + sep = strchr(*av, ','); + if (sep) { /* LSNAT redirection syntax. */ + r->laddr.s_addr = INADDR_NONE; + /* Preserve av, copy spool servers to tmp_spool_buf. */ + strncpy(tmp_spool_buf, *av, strlen(*av)+1); + lsnat = 1; + } else + StrToAddr(*av, &r->laddr); + INC_ARGCV(); + + /* Extract public address. */ + if (ac == 0) + errx(EX_DATAERR, "redirect_addr: missing public address"); + StrToAddr(*av, &r->paddr); + INC_ARGCV(); + + /* Setup LSNAT server pool. */ + if (sep) { + sep = strtok(tmp_spool_buf, ","); + while (sep != NULL) { + tmp = (struct cfg_spool *)spool_buf; + if (len < SOF_SPOOL) + goto nospace; + len -= SOF_SPOOL; + space += SOF_SPOOL; + StrToAddr(sep, &tmp->addr); + tmp->port = ~0; + r->spool_cnt++; + /* Point to the next possible cfg_spool. */ + spool_buf = &spool_buf[SOF_SPOOL]; + sep = strtok(NULL, ","); + } + } + return(space); +nospace: + errx(EX_DATAERR, "redirect_addr: buf is too small\n"); +} + +static int +setup_redir_port(char *spool_buf, unsigned int len, + int *_ac, char ***_av) +{ + char **av, *sep, *protoName; + char tmp_spool_buf[NAT_BUF_LEN]; + int ac, space, lsnat; + struct cfg_redir *r; + struct cfg_spool *tmp; + u_short numLocalPorts; + port_range portRange; + + av = *_av; + ac = *_ac; + space = 0; + lsnat = 0; + numLocalPorts = 0; + + if (len >= SOF_REDIR) { + r = (struct cfg_redir *)spool_buf; + /* Skip cfg_redir at beginning of buf. */ + spool_buf = &spool_buf[SOF_REDIR]; + space = SOF_REDIR; + len -= SOF_REDIR; + } else + goto nospace; + r->mode = REDIR_PORT; + /* + * Extract protocol. + */ + if (ac == 0) + errx (EX_DATAERR, "redirect_port: missing protocol"); + r->proto = StrToProto(*av); + protoName = *av; + INC_ARGCV(); + + /* + * Extract local address. + */ + if (ac == 0) + errx (EX_DATAERR, "redirect_port: missing local address"); + + sep = strchr(*av, ','); + /* LSNAT redirection syntax. */ + if (sep) { + r->laddr.s_addr = INADDR_NONE; + r->lport = ~0; + numLocalPorts = 1; + /* Preserve av, copy spool servers to tmp_spool_buf. */ + strncpy(tmp_spool_buf, *av, strlen(*av)+1); + lsnat = 1; + } else { + /* + * The sctp nat does not allow the port numbers to be mapped to + * new port numbers. Therefore, no ports are to be specified + * in the target port field. + */ + if (r->proto == IPPROTO_SCTP) { + if (strchr (*av, ':')) + errx(EX_DATAERR, "redirect_port:" + "port numbers do not change in sctp, so do not " + "specify them as part of the target"); + else + StrToAddr(*av, &r->laddr); + } else { + if (StrToAddrAndPortRange (*av, &r->laddr, protoName, + &portRange) != 0) + errx(EX_DATAERR, "redirect_port:" + "invalid local port range"); + + r->lport = GETLOPORT(portRange); + numLocalPorts = GETNUMPORTS(portRange); + } + } + INC_ARGCV(); + + /* + * Extract public port and optionally address. + */ + if (ac == 0) + errx (EX_DATAERR, "redirect_port: missing public port"); + + sep = strchr (*av, ':'); + if (sep) { + if (StrToAddrAndPortRange (*av, &r->paddr, protoName, + &portRange) != 0) + errx(EX_DATAERR, "redirect_port:" + "invalid public port range"); + } else { + r->paddr.s_addr = INADDR_ANY; + if (StrToPortRange (*av, protoName, &portRange) != 0) + errx(EX_DATAERR, "redirect_port:" + "invalid public port range"); + } + + r->pport = GETLOPORT(portRange); + if (r->proto == IPPROTO_SCTP) { /* so the logic below still works */ + numLocalPorts = GETNUMPORTS(portRange); + r->lport = r->pport; + } + r->pport_cnt = GETNUMPORTS(portRange); + INC_ARGCV(); + + /* + * Extract remote address and optionally port. + */ + /* + * NB: isalpha(**av) => we've to check that next parameter is really an + * option for this redirect entry, else stop here processing arg[cv]. + */ + if (ac != 0 && !isalpha(**av)) { + sep = strchr (*av, ':'); + if (sep) { + if (StrToAddrAndPortRange (*av, &r->raddr, protoName, + &portRange) != 0) + errx(EX_DATAERR, "redirect_port:" + "invalid remote port range"); + } else { + SETLOPORT(portRange, 0); + SETNUMPORTS(portRange, 1); + StrToAddr (*av, &r->raddr); + } + INC_ARGCV(); + } else { + SETLOPORT(portRange, 0); + SETNUMPORTS(portRange, 1); + r->raddr.s_addr = INADDR_ANY; + } + r->rport = GETLOPORT(portRange); + r->rport_cnt = GETNUMPORTS(portRange); + + /* + * Make sure port ranges match up, then add the redirect ports. + */ + if (numLocalPorts != r->pport_cnt) + errx(EX_DATAERR, "redirect_port:" + "port ranges must be equal in size"); + + /* Remote port range is allowed to be '0' which means all ports. */ + if (r->rport_cnt != numLocalPorts && + (r->rport_cnt != 1 || r->rport != 0)) + errx(EX_DATAERR, "redirect_port: remote port must" + "be 0 or equal to local port range in size"); + + /* + * Setup LSNAT server pool. + */ + if (lsnat) { + sep = strtok(tmp_spool_buf, ","); + while (sep != NULL) { + tmp = (struct cfg_spool *)spool_buf; + if (len < SOF_SPOOL) + goto nospace; + len -= SOF_SPOOL; + space += SOF_SPOOL; + /* + * The sctp nat does not allow the port numbers to be mapped to new port numbers + * Therefore, no ports are to be specified in the targetport field + */ + if (r->proto == IPPROTO_SCTP) { + if (strchr (sep, ':')) { + errx(EX_DATAERR, "redirect_port:" + "port numbers do not change in " + "sctp, so do not specify them as " + "part of the target"); + } else { + StrToAddr(sep, &tmp->addr); + tmp->port = r->pport; + } + } else { + if (StrToAddrAndPortRange(sep, &tmp->addr, + protoName, &portRange) != 0) + errx(EX_DATAERR, "redirect_port:" + "invalid local port range"); + if (GETNUMPORTS(portRange) != 1) + errx(EX_DATAERR, "redirect_port:" + " local port must be single in " + "this context"); + tmp->port = GETLOPORT(portRange); + } + r->spool_cnt++; + /* Point to the next possible cfg_spool. */ + spool_buf = &spool_buf[SOF_SPOOL]; + sep = strtok(NULL, ","); + } + } + return (space); +nospace: + errx(EX_DATAERR, "redirect_port: buf is too small\n"); +} + +static int +setup_redir_proto(char *spool_buf, unsigned int len, + int *_ac, char ***_av) +{ + char **av; + int ac, space; + struct protoent *protoent; + struct cfg_redir *r; + + av = *_av; + ac = *_ac; + if (len >= SOF_REDIR) { + r = (struct cfg_redir *)spool_buf; + /* Skip cfg_redir at beginning of buf. */ + spool_buf = &spool_buf[SOF_REDIR]; + space = SOF_REDIR; + len -= SOF_REDIR; + } else + goto nospace; + r->mode = REDIR_PROTO; + /* + * Extract protocol. + */ + if (ac == 0) + errx(EX_DATAERR, "redirect_proto: missing protocol"); + + protoent = getprotobyname(*av); + if (protoent == NULL) + errx(EX_DATAERR, "redirect_proto: unknown protocol %s", *av); + else + r->proto = protoent->p_proto; + + INC_ARGCV(); + + /* + * Extract local address. + */ + if (ac == 0) + errx(EX_DATAERR, "redirect_proto: missing local address"); + else + StrToAddr(*av, &r->laddr); + + INC_ARGCV(); + + /* + * Extract optional public address. + */ + if (ac == 0) { + r->paddr.s_addr = INADDR_ANY; + r->raddr.s_addr = INADDR_ANY; + } else { + /* see above in setup_redir_port() */ + if (!isalpha(**av)) { + StrToAddr(*av, &r->paddr); + INC_ARGCV(); + + /* + * Extract optional remote address. + */ + /* see above in setup_redir_port() */ + if (ac!=0 && !isalpha(**av)) { + StrToAddr(*av, &r->raddr); + INC_ARGCV(); + } + } + } + return (space); +nospace: + errx(EX_DATAERR, "redirect_proto: buf is too small\n"); +} + +static void +print_nat_config(unsigned char *buf) +{ + struct cfg_nat *n; + int i, cnt, flag, off; + struct cfg_redir *t; + struct cfg_spool *s; + struct protoent *p; + + n = (struct cfg_nat *)buf; + flag = 1; + off = sizeof(*n); + printf("ipfw nat %u config", n->id); + if (strlen(n->if_name) != 0) + printf(" if %s", n->if_name); + else if (n->ip.s_addr != 0) + printf(" ip %s", inet_ntoa(n->ip)); + while (n->mode != 0) { + if (n->mode & PKT_ALIAS_LOG) { + printf(" log"); + n->mode &= ~PKT_ALIAS_LOG; + } else if (n->mode & PKT_ALIAS_DENY_INCOMING) { + printf(" deny_in"); + n->mode &= ~PKT_ALIAS_DENY_INCOMING; + } else if (n->mode & PKT_ALIAS_SAME_PORTS) { + printf(" same_ports"); + n->mode &= ~PKT_ALIAS_SAME_PORTS; + } else if (n->mode & PKT_ALIAS_UNREGISTERED_ONLY) { + printf(" unreg_only"); + n->mode &= ~PKT_ALIAS_UNREGISTERED_ONLY; + } else if (n->mode & PKT_ALIAS_RESET_ON_ADDR_CHANGE) { + printf(" reset"); + n->mode &= ~PKT_ALIAS_RESET_ON_ADDR_CHANGE; + } else if (n->mode & PKT_ALIAS_REVERSE) { + printf(" reverse"); + n->mode &= ~PKT_ALIAS_REVERSE; + } else if (n->mode & PKT_ALIAS_PROXY_ONLY) { + printf(" proxy_only"); + n->mode &= ~PKT_ALIAS_PROXY_ONLY; + } + } + /* Print all the redirect's data configuration. */ + for (cnt = 0; cnt < n->redir_cnt; cnt++) { + t = (struct cfg_redir *)&buf[off]; + off += SOF_REDIR; + switch (t->mode) { + case REDIR_ADDR: + printf(" redirect_addr"); + if (t->spool_cnt == 0) + printf(" %s", inet_ntoa(t->laddr)); + else + for (i = 0; i < t->spool_cnt; i++) { + s = (struct cfg_spool *)&buf[off]; + if (i) + printf(","); + else + printf(" "); + printf("%s", inet_ntoa(s->addr)); + off += SOF_SPOOL; + } + printf(" %s", inet_ntoa(t->paddr)); + break; + case REDIR_PORT: + p = getprotobynumber(t->proto); + printf(" redirect_port %s ", p->p_name); + if (!t->spool_cnt) { + printf("%s:%u", inet_ntoa(t->laddr), t->lport); + if (t->pport_cnt > 1) + printf("-%u", t->lport + + t->pport_cnt - 1); + } else + for (i=0; i < t->spool_cnt; i++) { + s = (struct cfg_spool *)&buf[off]; + if (i) + printf(","); + printf("%s:%u", inet_ntoa(s->addr), + s->port); + off += SOF_SPOOL; + } + + printf(" "); + if (t->paddr.s_addr) + printf("%s:", inet_ntoa(t->paddr)); + printf("%u", t->pport); + if (!t->spool_cnt && t->pport_cnt > 1) + printf("-%u", t->pport + t->pport_cnt - 1); + + if (t->raddr.s_addr) { + printf(" %s", inet_ntoa(t->raddr)); + if (t->rport) { + printf(":%u", t->rport); + if (!t->spool_cnt && t->rport_cnt > 1) + printf("-%u", t->rport + + t->rport_cnt - 1); + } + } + break; + case REDIR_PROTO: + p = getprotobynumber(t->proto); + printf(" redirect_proto %s %s", p->p_name, + inet_ntoa(t->laddr)); + if (t->paddr.s_addr != 0) { + printf(" %s", inet_ntoa(t->paddr)); + if (t->raddr.s_addr) + printf(" %s", inet_ntoa(t->raddr)); + } + break; + default: + errx(EX_DATAERR, "unknown redir mode"); + break; + } + } + printf("\n"); +} + +void +ipfw_config_nat(int ac, char **av) +{ + struct cfg_nat *n; /* Nat instance configuration. */ + int i, len, off, tok; + char *id, buf[NAT_BUF_LEN]; /* Buffer for serialized data. */ + + len = NAT_BUF_LEN; + /* Offset in buf: save space for n at the beginning. */ + off = sizeof(*n); + memset(buf, 0, sizeof(buf)); + n = (struct cfg_nat *)buf; + + av++; ac--; + /* Nat id. */ + if (ac && isdigit(**av)) { + id = *av; + i = atoi(*av); + ac--; av++; + n->id = i; + } else + errx(EX_DATAERR, "missing nat id"); + if (ac == 0) + errx(EX_DATAERR, "missing option"); + + while (ac > 0) { + tok = match_token(nat_params, *av); + ac--; av++; + switch (tok) { + case TOK_IP: + if (ac == 0) + errx(EX_DATAERR, "missing option"); + if (!inet_aton(av[0], &(n->ip))) + errx(EX_DATAERR, "bad ip address ``%s''", + av[0]); + ac--; av++; + break; + case TOK_IF: + if (ac == 0) + errx(EX_DATAERR, "missing option"); + set_addr_dynamic(av[0], n); + ac--; av++; + break; + case TOK_ALOG: + n->mode |= PKT_ALIAS_LOG; + break; + case TOK_DENY_INC: + n->mode |= PKT_ALIAS_DENY_INCOMING; + break; + case TOK_SAME_PORTS: + n->mode |= PKT_ALIAS_SAME_PORTS; + break; + case TOK_UNREG_ONLY: + n->mode |= PKT_ALIAS_UNREGISTERED_ONLY; + break; + case TOK_RESET_ADDR: + n->mode |= PKT_ALIAS_RESET_ON_ADDR_CHANGE; + break; + case TOK_ALIAS_REV: + n->mode |= PKT_ALIAS_REVERSE; + break; + case TOK_PROXY_ONLY: + n->mode |= PKT_ALIAS_PROXY_ONLY; + break; + /* + * All the setup_redir_* functions work directly in the final + * buffer, see above for details. + */ + case TOK_REDIR_ADDR: + case TOK_REDIR_PORT: + case TOK_REDIR_PROTO: + switch (tok) { + case TOK_REDIR_ADDR: + i = setup_redir_addr(&buf[off], len, &ac, &av); + break; + case TOK_REDIR_PORT: + i = setup_redir_port(&buf[off], len, &ac, &av); + break; + case TOK_REDIR_PROTO: + i = setup_redir_proto(&buf[off], len, &ac, &av); + break; + } + n->redir_cnt++; + off += i; + len -= i; + break; + default: + errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]); + } + } + + i = do_cmd(IP_FW_NAT_CFG, buf, off); + if (i) + err(1, "setsockopt(%s)", "IP_FW_NAT_CFG"); + + if (!co.do_quiet) { + /* After every modification, we show the resultant rule. */ + int _ac = 3; + const char *_av[] = {"show", "config", id}; + ipfw_show_nat(_ac, (char **)(void *)_av); + } +} + + +void +ipfw_show_nat(int ac, char **av) +{ + struct cfg_nat *n; + struct cfg_redir *e; + int cmd, i, nbytes, do_cfg, do_rule, frule, lrule, nalloc, size; + int nat_cnt, redir_cnt, r; + uint8_t *data, *p; + char *endptr; + + do_rule = 0; + nalloc = 1024; + size = 0; + data = NULL; + frule = 0; + lrule = IPFW_DEFAULT_RULE; /* max ipfw rule number */ + ac--; av++; + + if (co.test_only) + return; + + /* Parse parameters. */ + for (cmd = IP_FW_NAT_GET_LOG, do_cfg = 0; ac != 0; ac--, av++) { + if (!strncmp(av[0], "config", strlen(av[0]))) { + cmd = IP_FW_NAT_GET_CONFIG, do_cfg = 1; + continue; + } + /* Convert command line rule #. */ + frule = lrule = strtoul(av[0], &endptr, 10); + if (*endptr == '-') + lrule = strtoul(endptr+1, &endptr, 10); + if (lrule == 0) + err(EX_USAGE, "invalid rule number: %s", av[0]); + do_rule = 1; + } + + nbytes = nalloc; + while (nbytes >= nalloc) { + nalloc = nalloc * 2; + nbytes = nalloc; + data = safe_realloc(data, nbytes); + if (do_cmd(cmd, data, (uintptr_t)&nbytes) < 0) + err(EX_OSERR, "getsockopt(IP_FW_GET_%s)", + (cmd == IP_FW_NAT_GET_LOG) ? "LOG" : "CONFIG"); + } + if (nbytes == 0) + exit(0); + if (do_cfg) { + nat_cnt = *((int *)data); + for (i = sizeof(nat_cnt); nat_cnt; nat_cnt--) { + n = (struct cfg_nat *)&data[i]; + if (frule <= n->id && lrule >= n->id) + print_nat_config(&data[i]); + i += sizeof(struct cfg_nat); + for (redir_cnt = 0; redir_cnt < n->redir_cnt; redir_cnt++) { + e = (struct cfg_redir *)&data[i]; + i += sizeof(struct cfg_redir) + e->spool_cnt * + sizeof(struct cfg_spool); + } + } + } else { + for (i = 0; 1; i += LIBALIAS_BUF_SIZE + sizeof(int)) { + p = &data[i]; + if (p == data + nbytes) + break; + bcopy(p, &r, sizeof(int)); + if (do_rule) { + if (!(frule <= r && lrule >= r)) + continue; + } + printf("nat %u: %s\n", r, p+sizeof(int)); + } + } +} diff --git a/ipfw/svn-commit. b/ipfw/svn-commit. new file mode 100644 index 0000000..873d4bb --- /dev/null +++ b/ipfw/svn-commit. @@ -0,0 +1,4 @@ +Fixed void retur; +--This line, and those below, will be ignored-- + +M ipfw2.c diff --git a/slice/ipfw.8.gz b/slice/ipfw.8.gz new file mode 100644 index 0000000000000000000000000000000000000000..c2db9233519c3f1d9716c42d886c11b8d01ca60f GIT binary patch literal 26396 zcmV(#K;*w4iwFP!000001I@i_d)wHRCi*$}6^OehkrI-oWLt5$pPuK)lAO{hTXG~h zRhg+idO#AS5R(86fRdS1{`*<)y6g)`$#SJSGnt;UNCNx1_S)(MI?`SU>98c22)oj(Y>5Cp-`2&5`rl;@Ue4oBd2IoyOxV%Zv4&R+5 zgR@17Zs?B*J&;a|I={>2^E4|bX9{J}x|*lUx|-G5f*ZN~%h~z+ z%hO9VL@h7VH}iD-DH$9s(rhwG28VT8ujYC9b2gsj)AH=Hz2a}^$tG`8dhje6yskgt zi+Mpq|B3po?)VvPg}$QkHEn;M(eH!7AZ&I@zkb?mkv>f4tL7$rbjxGf@g$$;E&BOe zURU%_ozo-pYG$J-SBnv?gzK7K$;)hn32F;&0e?!fdHad}smlC%nKcdlImt(>8U1^k z*Q2UIKa+KN{V~s%*}S;TU7M4l`2`z&zjO^_!sk`$yND;1o{v*m8JX2RQ${O`3Qfzy zvcvva`l_w`yU}~iT^6q7wkLSkgF&~Qp$Ki^3;Kt4ufdd_;$Qk|ku@LZd8sp@FK)B> zDu1Xg-Se=yWw*J9-VexY`vah6KOx=HWwFFoeOl$KaP7DaI%v~k#^1`H{c713RoR3@ z-01wKMLwV0(`=Wjx16hRdxJGEv-T@SWiRUar_=q3IKp|A4Co_wv#fJs8)Ny>f9_A` z*^C-EAIty8FIjt2W{aHJ%kjzO(Z%WetJCwd*RnaQQklbFBhZ{0I0W@(6|=aBu}epn#qb+Q~yXJE1FNzX;h9|{zN{y6lx}4Lj8yYAt2CZpZjfqD$Ej5cUd(}0cvhet$KOcU~?E*D8t`V*r@iNg>o6%13Q|XSQJ*K$b^zNphHKW0n zRg3X1Dk7|cey47QX-s+vCd`9Eqcm=vqinA&O4C&1BEv-QiuOiMR=0ayl)S)noQkjk+Wx;bQ+Ket$~|7){f?|sA{7jKP>7Cmt&ufNSj;l z^I}o7i7+ a&tO*J|Mwli!%G=E*3($!-feO1-og?@-!?riUxbx4nezc$@P0?D{O}c|Ch&CBr(y}kBx@~stFgVJP9gr2EO=voO^mJco zj5)I)ni_4)yqM-VtUYEhw4Kxlox0Yj5{zm*UeQr0$HKjY?c+gNEMjgCtsXk>@wm}J zyIvM!+SD~TG@ty6wl+QjRoS293ueVtGS6oPjSu}87p}_rdXQY^x$NBK=k)UG@ap8v zhqvjQ(~Fbu58u88dGaqOKODY$|MujP9%q_th}MXr_y{o_6eW(g@;hZ$x}mAtQ0CPn z6YCM9G0L{>2x(#UZLNvMXm50pr}lqxCxP~`~YlTbmj6f<)-WnzVmS($_gd60dzDQCk8;kgy_R0V}hAqlQ#vo*>k&> zIyP>r=-PpP;M#FoLcG3*^QP*_b;kS;^hmb^ABB?l^4Y$>$%Vt<^5~gxLPSd^DcU)_ zoVC?Lj~0DbELU@!8NAv{n-%qzH7dXN5*71%B6rI(5Z#%RyW2J69YE7`5r8?<_&f*R>Dr5GiM5+PTfCnMbx(!Xgo#bP-xa)_jKP00ub*7hhlCq^bMb|eWQ-NhmY zH^427FGkg5E#f2AeUO|=+bm6wQ`hWRM4ucZD!N9cQ5XzU$JT6EuFB}vBP ziwi_ut)yE!R_T*sDbi_Hu7xVrPl#8dljQD(nK(^yQqgoVd8yeuFBMcul# zr^#`}lOny(bP2kyl=9>YJBcI}^AQ+lbOIor5t%KTghsO{8eDpGU7l4eAvisec28uM znq|(C&H|}2Tf`?5_jM_1eU|o<74T4~q@Kkae>eaZ@+&QjP-)79>KK zZT8tNHak*6Y>lT+qFcFgph&3rv@mr-G3JkNXBe5vM6%uPQR0)IRt4nrk=$|m43?qHJ$L1dhdSNxS}V(~p8yn0+0%Qo4`2eSc<_D`f2-u#$8&D-(Q zdOUDq*e;EadZAXmjsckl)I}RZ4X(YaK|`-eF$8nU7Zu$PGp6;kM zY!^BWoe6iWlvI^wVr8Cay=aALV&p^~O*SL``#P5+?r%a6IQO5crm4@;$)DY{sj5>n z+u|1bR-%L&Eme;hhlzLF785i~&5Y+Xkx63L1wEPJ?y4gtS?+J`Sho?(;&6<=?4;0i z=}6@1UA3BXYbxImY1Sg7*ZG*lgW8>j%MO>(o5k)jyNQ!+Vvh1U=;U*{cXcNNuB_6@ z#OY|Xz%qpd{RPY}wsyrsg3Sv!sx-a4)f^JG8vCc8-mY@zc((Z|mwS+QR)sdx5je$A!?|Ob1M3Sv75Np6PT8I{+g-nTy<1k$BqVX~(2hhl&BQMloI}LlE7vXc9+c zB4Vq)tbAWhHasCj>lazdQd@# zI8~$%%}6u*APhY0#@T1s?l@j9(>>85=+okPy2s){Xdq3TltpAws}e$tofei;+9*l=$&YR@3#8*FCk5(#1(Tew)5Oy!tl5h>5VWT8w3Z(d$TWZEyyfOq+cq z?x1{=4S&Rrx2`8sL<+$cSJb4g@>UXDe|TP8OdTA=4c884PVJvq>`YJU`-D&m#j6;?CYgiPPI zDzow#W200fX1F=sNw~_Q!@?v=3}s$?^kWm7Ykpq zxK=T`F>XS%fP0cSGkqkv$MI-CE$%at#Y`D;W_(bjQ&Yt$;3NbrvYBR_T}`$rGJ^Ng z&Z%a>^6KL7&70Gs^zz%o_a_(W&hdwL@BVUjar+~ybvhz0@DYklPOOf(|GHWUgA{^dX#?D9GGT&H z@t${+&Ulhdh@P69Rx~HYdu$3$B;U3mkHMv8%FMP@Cpei>zFH ztHQXA+bOt=%eb6`Ir9!&rHvpw)ry8VAY*#p1t`~UsV;P2l@ zU2ORub@9K8<<7zGby3nK`d`6M`n@O~@&ggWfbO)Gz8~nZHe|0QFg3-;rHe;vSe_7!9JJ+IUM9!+M~#5eDUgr! zS;b%SwH_aBaFkovxkhxBaUBhr)}N=5A48k*zPBXsy~*iNPf~QZ2)8_gTEMmjzQ#mM zoz7YO_9N2sdqXmkH(VxAqnArD1&e`~R;*~X%StX=uvJ(B=pR)doY71#M%=M7pc5#P zkYTEYcSNY*u96K8=iT9vj)iaauJaI^A;A-#CU>BPj?@$ttiHCcydrAaPj5PXxSwJv zhj3@t)bnC_U|*%)T+75FtIZ0HXS3>F+s18SS7S@5GNxWqu)i@EE^NM4a7vY$_$@Yn zu*XI1qTW3lxno7;72OJ)86At~P;>^8Lx}^Hth=aLL51{81+-g|CMLX>ejwBZR%t9! zjEP{Rf~)Q%ZD-L=k}L>^c(9%utC{Zh66dS3p?SrQzwV{jW>Mnl%cLUKoVDUhpdAw< zJ{z)aYOH0Vkwg@6SsZB_F7lIw?vHp+MdX5S!TBmrTZtD~HL63y9o!Tn2_?YEm)j75 zO$*38#gupyZfiQ-JuJvwUBDPl11rR&;nnWM8S!DmOS46#O1<--%QZO=+wn(I!m1=v zveecC;$U%$zRf>pAgfmo-g5uuIRf}waGY3^^(;8y_y-6@dqW)o{WTo2O`~J$m1}Yz z!4IAG-L`LOZU4U3u=!~kXbWuV4 zj%>I@dVCi}WT00<5^f6kZm=u60}GvCZl=`k1dtKost+Qh#bnGSTX7 zF=53wfl~%fimrk2#-eUJ8i>pxNW-dikC$l}o*JuyM6hkV2O)M=9-^83cxb zxnEto72C)O1+7{p1I3LvFg^M3S!NpTe_}F4U<86e`^Az-cEJDd>HhQH-u@r>!`{=E zk9t1cc;U%cyzxX108uEsap{}$i+6`t>TF||fH=#Qm1#VdTLPVJoy}%Mrk%+SUVlvc zOm1O-=e32KMd6wBPv-|HMiQMz-2NJ$GU4QGI?2?VD9^$)@BYfzst|@I%1<0sN#APR z9D>wd*BmDahv8487tz1?ZXCTU5o>VQ(Mo-E?80C2=E*ZNtut5H6&<=>#O5y-^|Y9) zS5H}6FSu61B}V97$Fce3G7j<@q+6m&p12+P;fEj6zWN5)Q27=Nu)lBDKAT({4YP8R zAs#$^fBT$Uq(2VQ_o_=KS4Z!A>4#(b&*{-S`fuKj#RB@6aq)x4MXfI3u6sDtg7M^4 zot4d^kmw0|DF^3q(SXV8HM@CK{zYH>*1Mt^4`4cpr}$?QohANfz05`XWmi!zon*AW zbdwMREoUuBFa@ZXHd5rLUP9N*jQKNLx_8Q|?6a$g`jBNNx#$WJ99LEPmZNfc{Zbb~ zDa(0~o_ub>K@t5VvV8JJn^tY%+iKY#t^4$kz1&MKDscsUE3foyrEC4ZZf_>_u$R88 zMzGUyNBrGB?a_N$Uu$aM2l9qUtbckOtQYNTXTk6d^Tu?C?yz3^=9Kt1H~xi7(C$v5?Zy@?JTG6!qzig zv3W(s4vu1?7aHAi^#*uNleG(FTcR%qe2Vr}u5VRVpdnz%-^a%p#PO84fE;TMs+mvV zZh)PPE$vk`h1s^L0&eFdccrjw4kwQHs?Am;=g{%a)tT3@O_X|F&DpVWe~d@xiaBxq zDkyi|eRqiq_PE;1sP9>x?hOVUK-u%&`Z;SO>nb z&`73Lo!_&lhpV<)aHL4Ecp3r3ebPv2Wl5r|^Mx9Yt@WhjFhp?+2d|scf@2!&G<58s z5+kpsyFsyp(JrI0P)FCa)DQ)*p4vKFeo5={eo=F7o1SQr@kl++cjl^{!NIhyG-!R1 zeJ&O&jhR*Vsg8QLW766_O_`Gl9I)`T#MgxxZI!zoaaw|>qr);B44&;F&Z~9WZWKf4 ze83=3F?b475W}#=#=@*avze~SNyfL~+}V;f>R-a*oLcTe<18w{csxXY^LsQLelcB3 zV1h=%YdjkNMno%d3lnuAe|l}MlMp(%cqW2NK(6m1ZTZR;Wd*e*Cq zLM5GP$HFp&R-LXC2T!IuFu0iZq+xyRoANP@_M-kO-B<&=gJ9Ht0q4!dE zA3?sjj^>#ytDH3Fi4EV|na(STiCtEOlN~zd*S%fu@CmFdOeDd{VArt-aE%2{CPcIo zy1ezyozxJs>STw*=gPmXO7Uh%L~WEhlI|u#P!f*G3Z;0M;N5fXm#n>MoEnZ8g;h&} zc%K&ql^)z3l84&PxVr#!Xl#6O)`k!<`{?|!dv*2+_!ws zG$iTjL>jlR59?H#kfD=tsRMF?q9|I#%&L*2=L2r+%p~;lOp{<}sfqQ{7bG_&#&wQw z)3@h;yuLg+`fzc2^_PT1es%U=NwCdKL<&SIvz^sqjW)PA5pai#Yy1${j>gvPB|8@T z5JeK1+d4{9SS*XVQZtY9^t}bFU;p{!;`RCE3DJxBK0QgA9ZP2p(I~6+oMkQ1M@@Ge z5t!yfy5Il=ROgd$U1e7i$3K~il~w2GA_|feYso@9b>jbbZj;9^*|jt=fIVwewIYx} zfAu%%ki(sB?TXip;`cOA157;{P7R9~25C>b*zCSu8pH#WF`ca1UGDaz2JxExJBY{6 z{o+ah8r?h=J-H>YJ3NrqI!D5mmcheW4SWhi@|RKQscgjWDbWin_6QSqA{wRjbCC@0 zSZjcW&1s4l^+W@Rci>N6wmmlHsJdn01};ZLPAp{NpL0TC_HZXuX_baK<_689Fe7eN zhUtRaq~+)CDkM>v_tJ%8=umNJ(JalbQ6o5O@(AWwWHlYHbVrDR<6$`3AE5%YZfxkg zBVAxObz!+9AKswaI^U7rD*&M#WKnM1784G7H4{9W|JB}_wzU{w<-T#2hDHu^rC~ag zO0FonU)KFm#+Rk(I0Ns#chdY~rO9;ArL({X28SdKkH02rc+$}M|G0kABVI}J4RbeG zRCwKRH)}K$+bOjZ5)HC$wp(ifmUIS+Ibxz|r1UKx%WNL>_z{U3J}6n5L{Pw5tX$$0 z3izLTcWSWjM!BQDMu*eRygeH&Ge}qMIk@8OI^_8DC}e^|-PO3P4`Ie~Ba71gJaJ09 z1iFd8jRWKA9JxLX9XYpN*^E;~@{sU}154tXh4WukCLV2XB5px!hI)8H&Oti+*tYr+ zD@>~^tHs7{cCW2nxLPijuT?33bzgA{)~rV2Vor86uniGsqt*19$MKE3J9i}wfiF#> z9FSSVc3z)DYka}tilT`kk0v@gJRp<3P2?^9mP^nZjc-(1caF5;al*7vVYIBizIYk1!jO3|$B^8WAn&eDi$TV${_NJ-HQ#?b?r@ zKi-R{fB5$5PtNM;uFa75OCQKS;GN~JAr*Jo{va?k@p5p}%D|ZJl((78ZtQx-wDE-4 z!4n7(fU^+gNHiwqZn>u0(rfZKyX68iToZO*1gRKAODhl^$ImWkFA$9`oS0MZg2NdG zzxG_Zr|NCOI9P*~%j&LZa&t&Q39EU+$rF_ypUA@LWx=MIl}7Mlby#lmA^x^EK{BcL zNEvbPBDOxOv*h^u#_uEPT214u*9i@or8PNxNLsT4$RT&R1Ol>>smz1n05HrA8k2!> zqD<6DN{3^K6)+A?FkMbHeDF<Yu!y~kXq6q!3SWmiksY)3s$}9{P9ZG6WQ~vr2H7+>xvWrQk{!db zxWl1gi^#nZ)tufLiT6T?TvZqDcv3WDqKBeXOCkx!H49sMSe>;%<3%h3QYg)zVwYMU z80k2ybS~yUIzuJL7R|+LQ|6M(z;ZV!;%pYY zndu7ClD4rC8jRlJg_36G%?#oBKEmHkIO;G2D@x{bAyZvQ2wec%t1Z9v zEvwd0^nz}BqUV9TC!A61MAXl%t=veWqmF`k$kCigs*Wa)W6eYTeB61ygKE*4Y!n0> zWb=P9SKqHQ+X6l;Cz)lYhu-T*mZ<7BQODdvExN>zO+6tHvC!B+TTQ<-i0Rz~uvFq$!&r(X!wCH(-nXot zDWXtM5rw$?To4$0EZB^tf*f zn-gTV4!tvuWQV$T{q3?yIx9&~m^eHD8>87r!#T?6B#F@V6QmbbV0@YK5r{T^0+GAe z+rv)Z%ajgzMnva{>Iq>ir$o#3_ucyMi2GJUF(z0l34r6k!Cn-c$6>D$0M4Os)Y+AR zKTvndqJ5sZKQxN=(8Q6lV1KOtoxfOE|@S}UPVSQ z5rtQQQ>(ga0Op@U z(!R@yc!?7BW%A7c`wsA)kaJmApVu5?BiZ4akXZUG8xTP{Cc(N|c!Nehv3$G8%r28# zN~F-yg!Dv(fe1|RZ_Y2iKfE}8k~;QH^JfD=)?Sx&@*c8FXi&n>3~GVQvmG1_GpN{# z@r33U$?phj!VyerlYNzhDfwq7SKpsse3u>`A77kYULup~?D8#VOQkz!hgZ8UXDUR0 z+Q3sVus5(a`giN_JK$!6<4=+i#xofaR}qPhMFO3|B&QRt8#?=H#6ksK= zJK1Yhu4* ze~HKg!K=vBevIg)DO-Z85hOuyV3wH~T0vQWAy$78pq!nHOCkzd?_&H#8IIpL_`hpR z8X)e$5xp#zT+(kW6~mw^aXYGQJ8U&&_*v)?Nxvj5i=`%r!JHxn?7MFEfO zD@nisW~K0no=*qu$y)UEk$i&qR{FN7$AImizv#XGsze-Tsqg!bk`mNZbJm>YS!(*G z7}=xlQ(HAW0-@ci(Se9LZsR2qDkq2}HC@(!(ZNa#cxd4uMstK_5)w>XbcU!DJj2UdM7@>q|SoCo(5PtBYB<&FD0Ih_VK zt=M1dT+MhqYuB)t$`u+?Y)N9pf9M4Eq8yV41%OWYf}j6P`HD{du)|c?yS!H+4O5-Y$| zH3*qoX<{S$oCWdUzVJMu| z7AM^@1`8eAs_u_S5&!6*e}S~r)+<|1lC)gVv70)LbMT=que7 zI|-+{{XP8$Uq?x7k0YM%Rw^@}L-XoUYw`D#vqiBaoC*H7^dHfhFj^a~qdBerTzI1& zo}r|Gyk3*qRI6`wft$R!$>;M%BMCLyRlU27`gUDQiJN5LJk2Jz2B{&5dRYqp8{^-k zKmHh{48vlQaHtN0gGjVv4C~?{Qo4tnskm^*!)3E)_nhX$bbT|~)p84}*e!`@C$>^ZdKeI@{cZdDCFaH}iQqJkb=3W$>=LYKhro;#4|4#MXSwmiqRGUo0nP>hR-H25=Rp4^-w8}PIoh^RUf zT1N7o$o*30ex-qCCSEyf=(#icTdHKbrR71?|NVQedA2*52#0pdq`RznTMD@1lnVG8 zupKyf3uf4sGdx&`b>^ahQ4-GPS;Z$o&Nq>iX1%Hl%f`kZgK?!uge6|EL4cgWdFbuQ zJ4{6&9to^vJfPhq0B|<%IHGq4t_jCf1BNYVc*@UsIds_J?2XY02hG} zPT`$s?2!{6NfPE@vK-22Lc3axqxqQ1QbIn$iPb?6WzxPMh5#}>({rBgzd&R1X^2mr zC;>r-aaiJ{)TWUqIroMC+}r;{y0H$gf_3;;E`q3iv=EiJ-1+Zb61S_Qk%BbtTbEb+ zFJ26W1OBfp&=Av(z7AGuZ}2<%`F9}*tLZ7s68TPpHdXf7@-SPGyq{on4u*)dyf&{5 zp}KCJmHGklQc1wd8%eYLtE=|`U#;N43eG!c{K6oZ*LB6l)Y42I0b_t3@G7jNwb?oD zJNkm90r`QsiG1Y*Q{^y_Ammijai;%5_+)uugXJ){`2xjw*oI$@a_%@nfx}+77DwSP zfg;H5@CtVthb+UDZ!kP%Wf(M_*C1BdY1-KaQzK3hmVI^y$;mulgaHjak`2WKK%Bx2 zajO)fGjrgl)g}lj#xzFDf{|P*zM+f1`HbeAm1np15^d@-Q*e_?-j+>pUobzO?Q6(d zi0GUYtq*DC+BrPv{pS|lp+_cD->b!FNR=3=B0>F@6NXC3YEJ-C8bh;9+@&B^Jj3U9 zmLN8tNyeWdB!DwwVK`j7<%kBh&~R8MZUI*B>x`4gSuiC5XN9~TQOUtEJ_}Ts#oaX; z9uqT_tlV-v(h@i({QU2G`!9RXUi4qQ=)L^rA4$lPyT1ieLwv7>U<s5coJLHtM(S9x-Y z(;AXnh%sLcyKa?Ozw_4w5925KhnDTf-}HkpN&M+=jxd0-k-#Wtf{+W)rWHihdGkC6 z17Mv#9KYZ7HH4h240rT;gWZOO#AN7dyH~c`bI@y(WVANk4&B?`3|bPajJphN+seED zG{^iCj|1bdqB2dTgz1$B5m>v}jh*2#P{_2)sJ_LHU|^cnMiGsgbl0XEB0!)6OYr1L z26b-~`pAWd;b+t80yoXGpEnNe z9zN%NZ>m0c9F+YNPlVoa#i__lGMX6R&07z;wX86jhS019c|;w}(IMDg=<0!K?FiHl zk7~ZW$w+tupBDGr?w72wPoJ6xR&2lmb9h?PiD(rb*)ho$-qb93FbtI@!4`?Et!$+m zIJGZm7C@J%O;@w`r2r5jpW~(sw2^67AMUfl6_q{YbV!+f#`bv;0A0?S#T5sZNA@ViX7JbV=w=Y<%{|7F`K#y}>+xN!H%Hn_fQ8srl0D;UQS$@z z6@8QT*i&e5jxkK4Ju0aYy`+Pg7(bj|p^c$?hXEDor4UhZ+Z+4Q8~gOeaaeXa9`pn? zvoL7EWXx8ycx7t|w1x+%4`(;2sOs`?UXu9GtR&=)GEMyf&1xlj^b(;f>6 zLDTOB!H2|MTMUfM4A=^BJMlJX(2o%Ehz%+_99-{9iBNpU+eIWzf1+b-h@l3w)m;QfSZ)J1%PANvGDrtO5dfE`prqLGzy8i}7N~NTKLmAVVD?2DqZifF1H^$4tnLOEan-6&wb&sX~H^i5N$| z)_URr@U)2?CCgo}PNq(s9Iz;OA!?EVzqyzjc8xmL>48!`_sqvPurx2{YdinoT;a_U z1hLv9&tOXzj>zb(H}=DG`oZ#B?7J8Ao%TWxH}3nFcoysx#tldhMZ;!J`~4gLdsA_= znZ(xU&wCgrOfg{c5F-@l3WB&kvB%uf&dMnBzwGO~QckTiu)SxUUxUTj!wf^mTKC-- z8^4EdAEYkKL`*W>*Z;s=yYooc!poRe4;~=au7KikaJyD*^^*MGK(RKrk_53T5Iwg> zK05eoUr_@!!8zKZbkVG6hKq)+v`i=w%6!#;WB}baC6T}99@JW@xvG^}V_F%8bk-my z!l&C5OJSJBq#yY!HYHV87-OQQcU+nZv?;-46T7suF_+LAL=Kx}Q?b%4d;%AXM!!14 zO45y3638gvsb-B2rU+J8P33G`5!04c7ykwa#;4iq`nQ zes*>%BDRHI-nh#3j9}+<9xk-k6u>MXXFE66Y&al+Jf$zv4G#Q1EI_TcZiH4hHLSZW zof%gOhhg*0 zh6_MTVKQRjliYQZ7FNR|z}0qGvC1qkTMjzX!XX+o89ye0f~=+D`OJWk*@?_jBrEMK z1{!vpg`$V?e!LBv#ffCfnqC{uTqEv_iTb@n#mB=q`ZBjSn&4T+kWN93Z)$~4g5}200(xt!O@46? zZ>L95#$=lPz*Qak!udK}lZs(@c;Jeeaqw`B^jaFp_ zp;84n_teoz`Y=0UdF9c}sBKwqpntw}BdZ$^p6^=~h~j(L+W6OBRT|*Tk!z6^W#h84 zSZK#ZCE|=tCfZy8^73JcpghZmh@zO7Q;a#171q26%0`IX5N$^}tfoLJlhU?gXC4(3 zE6V{Ut{6yG<>;QAm(%x8nV)zh3MuX8;%8m>`VHHF&66}QraAB4juxYwn+;Xs6}IPq zqguUjF4qEs$4-go0fLWl71iVKoD4w%7sF$CoMSfj#cly;ZMfGp6vSZ$vrBy|1PW^LI!8|eX7#0%Cl_z;Y7<(!gXBnxa0AeSN6RO9%9VFH z9og8UTcy8qjQ>x7UP=FVrf=%V!q^6DvgKxj`7N zTC~+W-@I)GG_ibb2&r^ zK4rHWj3Vsbzp{9^4UJCDp7OPghz#24o5}^~=ZMzY@ChY{LB;8(N!N4Y@vAq76@h^U z!7;oi&z^0(p>;w!Dru0&gq7YKzSJR=lJ z0tsX{Dp^ez<6;DRn#O>@Oe(4Q`krO1!b)>H&yc~7)D`~k0m&7AmdDhgY!xpMX-(5v zup1etBeRb?0!QmS-NVnjJqyxZhwu-}x+u*t8@}&hl6Y16XBHNq;fvk!Hg19(xV4Is zb)Xk5mV%4o>rjXWWpc+R71%}SH(lfGyr|+JOUAc!_k6}h{Sz=K>GtKkgkt&weZhZd zZ0zs!^RleuFI*tquFaa=^WfKc2w*G1JdCd10M^qGE6coy4DPhBV?lu|gDMY(~*c}>9QwCj>1$ePZu5vn4DF>>XQ28^Mm z>ugvCSnQ-zuEwI=umg;QReIoCs{|)?E~P@8?{+6#N=~J6eZO9Z1zk|Spr@IrA>zs+ z5-$YbowOdVc3@o#WJI@bDW0`0FPG5~i{(Z+QlOtR$>o!w5M4gmdlIcPuauvj!KLOL z0|7K8x%wZpuR;Nu`_LR&@Tf`2Dhu1~HZn*qtI*2ol2>=kNJV;=lxK!nVV4zoNaP!P zY|&f}pmZJLWj?sOuUy$T#b=1IWdtpagI2k9(+Y~#V^+FBV#MHzn>jep}z1##%Pai5gjvxLH%#oAN0L{ znf-ywIv9MYuN4s(OFh6h5b#4NW2Y9Tz<4LF_**_1E+GJ_Kqexh4{J>}B4vM}KvuW8P5wzr?tS`-o$VA>-Fht0 zaM%&){OE}O`HGIDGV9j%(O%eo*9Z3t)9Kgw;({`=F|97>CfczoicK!e1zAz*ZPQY; zJBYN_$wvVU`d9;6CT=)aZwh_XU*L_ar{le6`_EGEm2=Lz@mPL7c>X+XpQ0CUm6Isd zU(|!Q5+626R%&Ygc@aN&-v#_|W7@wYi$~Euae?G+@DD9zo!_Du9$QhIWa^~Rtb!xzOPDJHyux8H0_fu8#EuCNVmxhE$5+g@H{kKtK zrXi*(XvI$`3_(vNel2HU)oAga>^oBYBQ6>0#^aNC5DaE?YC*=(nfhxR|L&A^Zk2>=`be#I5q#Gy7{2heMp zzw8)^I=ME7WMd9ZRK)=x$DP+L z_3EaBPVzRy9pQc7$mZr`zim0)y~O8`>MGx?%8AlxgG&YO=9_8-PX^CGliyiFs$Jy3 zic{bA_Fo3!G!Y<5&to(TsKx#tgmSpLwcxa{)k;&k)KyeQxD4mZjRarD6!4ka{|xrU zBhWsAj>W@v8h_cXT*3CsumCv{_xVg?ac~r9f#N}MDjTnv97ciiQ}rq70@?sqBaXw$ zqxH=0hs=p8ngsq&Zw(e_oi7g}fS!|Fr{Jq7Y4z4(8Uyk?yc9#VGa5>dSw<|`J2N~W zO}Us%IQTiJ)7N=I%R@qH5NW|&D4(-MQTn)LHbVd(A7#4z*Zfga{Z6&?DUAKRpwb}t zh0Bf0(u-_Td~ryKt}&d?z1$Y6VfE)4`j?O;O7OV^nX`C{<}qWLK=yO1cIPR|o45 zxU79;b#QG%x3Y9q+`@NNhqFz>WKVH7gkYo!R2I5DcfDbIW5wJoyJ@K%N4;Y8Dh71c z_P8!WPpSy?G(E2eob+A*Qp`BNJ(SYuDWrPRT{>-23YensWiKpys=snzN>6Q`viE!f z-Dq9Kj!kz7R~%X^4Jz}N8H!yjaXB_h25Q@mU@q9_XrK)-RMyE4hwt9w9tulOsAt~Q zd2@TH@jSa|mjz8P?mn(aZ8I`4bu|C_w{fi7dfgB70p5m+)3(w)~uQ7a{cFu^AHq?Fy<$TMc-J27P|hl zD#jnF-<^0IyJi9-hSfW`U&uve{$h_3|EFsP4cpm&1- zFrP@)ISMN)`d=ON>z$$@1@%d_fRB1^wQ+-NXf{BN2Fa05N6Pc5P%I0<5QFo%n)|*G4e_qm989>*e zf{U2a=@_dbzR1eeZ=0`8q!Fd8wt6SYW_?5kkPHxkF~jaPYv9;i$!iiuUGOeGmTn<{ z0UuK`Yga;@__yyWwp8tTX4V@Hd5 zm2qQ7$)F1%2&_V)k;1;7ugfWr$jqRyyJ*abIa1{EGGJyL!3j~&m;*R3L*yYHUE)V4 zsTy8y&b#$6(_R?uZ02;SX=#>zXh~-a9l0F)|T%A-S(Q;Uo4*4mI0T%) z@$y@s*1bhbgZ=K^y9tc%R>L{sW4=wY&1*=ItXcSQc#r06@y?k`kqkInH)p4;(##{t zI6>2@hLWvFce~Bxv*zmDxEQ^8iq^a4X$bnVG#$mnrPu98lQ!}EY*qu_gq8? zwrHLQE6eU4gqSF)yNH;AQCPhF52$z`&**ya>;if7EDdG8)~X1KT5er3osgPbZVTOze92)w?yU!G%CF!m#uYp_EXiqsgMR!TU4q3G2HA(>iC;r&}=`~tS?pvcT ztd4uJyE^ef7STdsE%};)tJ83Jq_m9-=0$HSF9BnGc5G?K*hs;71PhhxIk&@Sc8JEO z*COAQmw&lDx_X=b`S9XYtdI^BS>5jvp$)QEY9!9pk+n=4aHTp>K7v$-YNEFwS9`*0 zWt>Cg>+)JoiJO}X9#W1Eqq)FYj}XEqLKRD5cIk(caTmXvgzB%nem5X^v7L1J2HpFQ zEzAhX#4xIz2D{=r5vi=DFv7j#J1t39H(w;;F)r}6h_P*Kt64S};XvCC3V`{Nm2}SFx#M9mP@LR^l(c^k4Ax+7-h6L?_ zNB7&t_qRJpuWnWi4_JEDK+{HQFOHWa(yzz#ce2dQcg{Ef8Jr2Te3u0n{T_=~j-Bl{ zwie>js?~7@*3RcXS_?L~`lU#Z)wsd7XSZFwV6dA1WM+oZ+VQ`z_Lr^Gps23-U15kj zFLzxHFs!PDB_)a=4-$L7{Z>}vuy9@}FkJeKwJ<;Eu1dTMr5eFM=0TqjtBhjI0+#M% zh0?p!;J_^b=E)CFx|M>osB9xjGJEX6rrq7G+^WcHm&s5{kna?3I3G62&5V(lAFQ;J ztT(X^EHepfAa!34_1{7QSx0mSsr+hg%XqE14s5dXeFa^Tvmo9;(EmO!W;eLH_|u=z zBn@O&Sl-%WgC{UlVY3P}$TZ{^O5HB#Fxzlm5`im9wd3S zsCBqa2*adbIsf#M?)#bqd=w$p>p?RH<@oE2+4BNXZnmOrrE6102xBU&M^oE9T~a@^ zoZ+gV7=~?2Jy&gr}59{<>VF|2##{4+t?tgiuQrBXz;z#tw% zzfSvVz;_swcNl};yq`2{CwcAppF9l~?Pm)I`n2O=oO(z1h1#pG9Ip_K=V1*zL$6E1sIf=bMTv86B{ z@7A6zHO=(daQGdMQFINCoC=bZ(oQ~|k5q7=Zt7NiRB!F797;znhQ=k6jJe1X`dev` zyk@%k{$*f;++RJ1@lisM*4bP1L{}Tn6yw3`6HU7|PCMNr?G; zOm=+0#@=wD<{&NCm(Hqc;`kKnNh=5Wk;sIOKc!F1R`>1y`p>=LkT!xPE4XF_RFHN8 zajxn#-dnKy57Hg1Nh)Hw@eg}0k(KLI)7%GPk*}{iqXaJa!M~{eek6KBV~x3s z%ux@X93<~-jfLf=J5f5mIG~Kr>ZP?C4BN$MCDAQuSH6c2XU066k&woU0{&y>98LL` z0Yn6*7t^7;|1P8U^k<2Va;6cvzT-OF+_q4hzGTHS3{Zfv&G!ZaqJ*A82>2L{J-?N2 z3XO+oziv;FmzVHusSY#}gXd|Ce__031Nvg4c(79-FoN8VE}cD%-g^AsjA*ND+O@U_*cR#i@pm^-7EEy>sfy2ypAExKaG zPyG>QdlJFZ+y^}`^NXo-rUuPeHJ0@8Dd$n@ZOYs#QQ32|8x=(F?I+L%*nAI#3u_ch z-t)CAkeKU*K}E3KZm6=bJ08kPUeRH2BOCmfu2pcf4nvOBmR-@Q4xMNm0Z-Ig3aEp) z7@+`kK|6dRi$~Me`d9wTgU+`;3v}bl?xP_-$0@2b!2%X(YcDbtcuEYrBeP^-Z zC^5|-(!FH(|5~Hz+{GngO;0~2LF}El>Z$j#F%P54fzti7Kw0F~lA}^uyDBY(OnPsW zw0SxigEjyFZMvF^S03?RI2AsEa+D_}!=uVY1Di(v8Rl%rrrBIPBaON);P}4FPlchIBXp=eAI5$ae>^{%JBT*8k zYZH|fKHJm0pF?I`dm3ah3}ikYcaOw-`-&uq|G|AL(y5KYaup=7G79xYk)S;VtqJBQ z`=tmE&Ae)%LGf)W5D(v>gjkdO$yua&E+Y*O8{vNkr3*UO#ufi!C{OA}CYt1Jj=;dw zg-vxhB|=V-U4u8B)$wJ{DRbpViR#eUF^d*cwVT_NTylDG4Y)5e=Q*Y(0DN|af)gkv zI4re_+b4tBpohsUvd=6QDbN+hYDS)&y%)nF*Ia8vwM-90E(;>0#X$o&t4HuPbY1V!J}+(k)PZX%aR5Ly)w zGKAruNU@TP^}Fj&QLUl`35isA9M3gV;~nN$LY?6)P6eQLT<+#?17+H z-ONQ+W;3y4X7d?LZ8r;n%Arv>3@&M&f!37^Bq0}Qx@;ib87gM4-;{7>?Xv7q=dE{;@^XnVeTVT>i=3Y zEv_f|k`}t7dLmLI8?VK@fi*L!Tm3Q0knYS5!_EM3Y9x;Agv#Lli-Vq1A7?E8s>cdW zkliniGP|8!FF&@d4s}L}o03_b4N7lbW|Pq%UhM6=K_ow85KQS`7=)egU=TgAE=h1Q zmP%HwLzIpdqalOwKD2PEW2>Is34&&*4ncc%wCPg>K<1XE!+gqx&Ib_bBpH+FlGSX@ zk>mqxa$mi(-b)yZCdwY6$m3(WSUcpFO;1uJ)GFxl(7;*(9 z+0RMN7c9*Sm$7)W^*oM9=*tC)?}C^Si$y#@+<7ESVqF-0D5~7E1tpy%kax)D2|*ys z++z}?mhvpkImAx^D+oHT7zZe2IL$FllEqRhaI0oOnp&n7qB75F(}|nP0wgAtiN2g^ zp|O})-7T*m9)KfmvQ=C4frZVT3jm2hn#ne^h%QC%R!#ks6~(6(Q1KM@;HMITdMzOX zl5|7!JC;yiB3x5qH{}7J1o!6j>?D1EetLFwxv@>&&p$5e@oIrmqsRsq;X43>utFi4 zT~fk8TbLg!Q34)BI0_ME$3e;#(~z-ipEhV;1ujHIda#9Fl=c6Z_EKyxT`62K7cW~w z9P$>{&G>R_$#XX*dDYs=O9>9M8tS5)yP6g~POIQ96ApfARuT~(R8E%_H(15gu&B&b?X+CM`O1}MV;C)c%bDT%matlU?A^m*t=BNI z3oYKVl}ST$8kY??<5&Rz+DK>uE64HDCQdPC;fCdZuVu5fufHdYpy!#ZvB^fDNc=4W zq4{g1CFGPQ`iA@(tGUD$;nkeiFNoe)Ag?lXQ`C&-RC;%}Au(CT?30 zW?qHJ$LJGrXqA<^e4uykigN;8Ymgi@K9O!IcH9(5(XqI$3^h=g@eYg)2^O*35w7Hi z()*&Cs3lfY*8~Q`)sLFf#Xe6Q;!FCBv#{86ow#H|gb5R8m~wKwzw;5gzOA`t*K^SK zhezL?T&2gSe?GamM2y;@h`>^6?s;Ox<}lAKUKHrAYzlOsdvkYDCsyIk#iS~}{FFBxCqbzT+1jk9$ei+u!Q^`I=1c}(E z2oz^0SKpsse3u>`A77kYULxG@?D8##^rbszhgZ9b)X=6|9qEP|_eeMT>yIS(SM{W^ zG{clNDEi}C%`t}c4nL~>w&5WE=qPsZ6&|=R)ul8LyK6&}J7(1iLIM3`>B|}^$=*EF zZlqp4^7@n={lz%>0Dr5!1m1dfdKA6W3)3MaWd$--!L|l^__s~ismK!^$Ge&(C#3?0 z2xH|qSctR?1|aglz`JwH^)=m$#|Xa#7jb|K0Wc7=7!u+ns<^7>9uXV71&JY_Ggz5#%8I4eUi? zb^JaGb26B$E0WG2ITk0bbs_?=Riw&XaB`zK(#(ybc2V6dz#tR?$8yh(C_D|d#s7*E zlQ`kL7|8+&P(iavGj#cUUdz1CM0@!x67@8&ot&+5f%Sl@89%pK-d2@hg>F->HfyN2_ch~;AA&gjz{fDSQjIF z{ZRV_dX{VkVMlSxYH`-MWoa7J&MCOU%q~ujPcKf6uF}J^j_69_#w<2 zYEUz*x*_L>50@h_JBx5&UshpD8fX#}vym%Csx=0Y)@e^`(xxW#ss?*ka;zstKVze- zb4?bt%eECrgq;y@H)HOy2pxDa!ztM1afgf3*{X#{%-NDdD=zg6R1W@GmLts|cm)I{q)~mq?UA<+Y1tc%TnG@g z0~a=)v-?*PrqT9ObxuDt3yVwUISe6Isf_555@krDOf=~eJpDvMptlCR3#=${joW

7YOp~tj!4T)<@gMo7MRn1|pTbh#?cqsiFQSc8p3zH$;XGM7p zNK-j32g#MB^2<6~M2}exed=vQ%&f~ZZ3$h7cqUqHtC0*rU%zw2>fpDdV9@vkuzDv) z2@ve-sB4!CCWn3^ak1j{KYEZq{O2%zczL=8Xj!5C7YAQu5SF)I?CpP*1y^_D#c-Is z_oA(#goynLHt)=mQ$?8|!cwhP$}idW!5SZWNoXX0&K~7V(t!-0zkKnBgD=169ZUA& zP-}R*l6t0KFdwQ?4RGx)`md z-WW(qzDR1W&0Z2XG#S=QjkQj9l{g;@|FTTQX5~WPU8igzb^*&jJkXv1a5fWNMLinrYqpuV$oi0c_{c@up-1z zAFu;^Te}YKned8XkrpKfvk9)g?Hn2X<)hWiF7P80clg_xl$U85#wSz5*@WwUlT>qW zgV6-_7+>AwKW_#IvXqskn~kIS>HYnlf`w_^+o=Wa5+<7OrbVXf}l=!>Q5V)LyCC^+K@| z8jr#+_U**J*Ee0^uOJ`up{WzYlS#ka`v$eXy4Poio*aV6O`*jO6&k6mH2ZW*ifN+h zzeq`EXb!$DOP53Y(D-Pfuiczv*NFk$|2%0#o#*!-dVE@RAK!c6>AGpZ;_207*?IcK zLjy-F%EnanO4-n|tmAN=GnCglU((wfXIz7)oR*ryZZOUz z*yKKnRkWZE7YVi9IOZ+Z#b6K-Q$ErLI3{V4Ax<@Sa->o6z79H#6^iekKlRl zY`T4IBET134~5ZT@>bK=qKki#F2pSl2YIn@R1q>~!j&zak7( zeOzeZdmGUM12)JEcpG(UPD-M4&(<0)DcRqTTmVri$slf! zX<)M#F$_K2jXX2?s&3R&3FDVDo_W{ylu>IdsU6paC}O{p;ieg$k+-GH*k6-23kB<9 zU{|%qY2eyrlfFK@JUucukrZJ*J^Q1AJE~ZfO(umxTPz@tRa2^=6&FNJNfG4Lo<5*=5rdK$KNG#g5FxMcG(7T8%@?~S79q5;! zN)`$%jOGQvRKI{1Lrb#9`wL&_9viIkcHe!oFHzE8ZseJJ<(W1TeiJq_7!1A+gGO)N zMk`-C3>|?AR4@!&O^PrO#D)m)PoZv6Uw5D@`&i6CfJ{YFYEZ|a_4tabW#Szi5@pO4 zFIc^XCjB&Wn`Q-kMvT_YY{z%!nWcbk`-8)Bk)$5b$|N3#$)X4*3mQG+Q-paDg1FIm z6t|Xvz9+XCU`%rZn+f5j1$L|MkC4wOpYCc@m89FC&KEi3Vhf0-hkuiNIxF+xFe0r) zx8PfFgXnTR$9ZUa5+gx$6Ob|@OJ=Ke^PfNd?$h7Xz2RVQu+InW@B72vvlqRCgWlfD ze_(?2+y3AgetY@%gWj{=-rhgbf6!Y>N~6rR6SnfQ_v~fwU>H2Oos#Xe|NGxg9sl;edE2)ruSPszRmm5sG1>ZTtV#St|1?z<|U^{O5i3=9-%)%IV zwUk_7-L;g2S?{5V7z<+VXc$Pq>KJ9YCJMM6u>=K6et~6qE;!ZXj)exhJLq( zY#acjr^~7m$9Nus5-x%ANVN&T_shT+YbY*B5RnDFwbsgCjFcLh&vb2Y9HgatYCHO( z2oLO~g{o|WwU2f;iFP;r#dp_N{`h4!SNmWFmux8tpo@ul{}Vg!QQ=7vd0V7=-|P?e z4qgt1eAbg*Xy}B8lOCCVUJerp1#<-(g5UNM2kR5Wm^A6}U(OESk)S0Az#&wq(iUfb zv|0yYI!An5*0$N0YX;m_^IKjO;=W;^qLt2UGI1ww0$6<|tl53n=2u+*i*>(z5G&4T zgRiOpK~Gd7gK>+EJa#cA5`i3uW_oqBS?%^#uyVqC@jl=<1pKXPUlL>silqVyD(W#? z2J^t$umw>u*>14ovQC{R`FNBHbA3>>`2%4LVO6Q zN6nT?!J2D_-MskISz2O(=Pli zUu7_3M|_juZ1sgp$pNf56BCt1X8HZ5T`Yj5mH~^9eNxKhY(-FljSqr@L|HQ|yaae< zUx)|s6$;IQ>D}L*pb%LV&LQv-^&ZS#uVF4E5%?F_g+69Jz?xJJU@8T@w=#5L08m(p zi3qmOIS-8>A8UVpc4k)RJMhu^@VdhiW<}Pc3InfZGoxlqdY8LgXuV-Hz>y6`65gn49op*<54}C9?w|G- zXpTm)xd_`}k<=ysVkpVPvMgl)YZetofUR7^yKCnLx0}{X$s()YF_MwX?Ez9A^j`+` zcDS06yul8TDkuxaTJ=Uba7H>6`5{Fq|&7pZPH*E=R z{?Z=AmDRhKH9x$6@6ot_lMbFod$c!{yzq^8_8;|*N&w=6(6h%*P${eBNL!Ph-NNRK z&X%y>WSRlh^80*5+c8rk5@KYmm`>gZ#E5`R!wFR-QSCRyvJ$-JOft*a*32PxDi+#3 z?@AsBl&FI41o>lDz$4j)JHR1H^J#ywB5mk%3MjGFBHhth9uiZ{ud?~aUSjF*z4Y|r z_;AKD#T!D4=^mK5t>15o=8cVXR8&R`fEOIETl1P6aF>Cokle09; z7qQ*GY5-z-Bw2G?D)d(o?Sn-4qQfcn=)U8kEQ8^2|LL>g)0e}i!@-{Nc%P)iKm)<2#oB+$UBX$w5jTuKXM!rw((9k(zfLdtVusz;RkKg3I9J@;P> z7Y(*_@0}sy-NQ%jeI7utdEIBY90%6m)4(BI3nzednoCj7<*Hs*T<7ml_zh>$X+mX_ zI9%6t}$;o{t7sU^&|>sI^+}nbN5EwrbG`@%G2P{A7~*<)c^>K1NaWH&+*Zq=2~fBY#-erf z)GA*sZ+KE{#kD$I7ARv1Kb;s&y^<9bowYO@7%`kaTR1iD5AZF%-U9t-99Mq-rKGR> z4{qo6ht9CzI|n2a3l>Kh z6UNsI+$hX4=&bZZ>p}B)Mjsx#E}~ZM4JM1f-9LGVW;Bn@`75`vWst)h7wEgH#IP7& zX1;)xQxEX_-pejg;af|N*?ZX?*x8xB#zo-lfBy!ktUTk~Yk?Sh(rHmWqTHaEYBi1L z-eEe4QIk|9Yri{MZAHf9+Ggwt=MBe7n~Dp^{|6~lz~g#0L|*y!&Yk^lDlxF(==1qp+}=w`c9ePckE~W47WWBv@g$LC zjf|m(ur{n8+S-{*ex6TTEgu&JkU3Q|3E(+2yn`DWRiAsI3ExLVX^A?CK@*T#p0ghh zy|?0+XAg?XSp$rU+7#_dH3{8zwzsLvMHAMoDA^XLpzal3dUAgYZj@VE=wEJaWQ-#V z!nV!@=2V<7Uo$jLlR7)wa%7^GAK#!hz)ce0I6J)RZArOI-biqX%W~8Ti*_?1oFzz7 zMo7>$`3ld_PlD5UPaI3BFgJRMcbnzMIl0uv;EYH%7(riKCZ5aEdp!xa;0rXg69-~z z511F~LWZwP;2A}JtrXsdBhGuWpVF4*!D^~;H<>u)vIl5Qu5RFdU~^=sPuYIuMf&cV z*?Q?UO&mX(6St;$;F9+oWOd7ep0oc&$KsvC(USJMvvB3sFiVtQ0B(b{FRuIDM+|g` z+s#Y|SC#1qF}&@Uh{oFGLnv>@2a{HyvD)+M2UXY{EQ#dE?Ctfx`R1FgP0LT61K%B} z>%cX>t*wnAo{P~;PmIQ85eY4d&yumrk?D~7U}S69l>dcwwbriHb0|w}e;gd>HS()p z{mMVeE)FQy0qMMYIfUtF>*EZ<3KiR)-y1xC7JS&-_LhvtcQEV)AHp|&h5OG2J{#lXXshp+bcpQN4EX=1+$+Euqy>%&c&<-BMm_GRmOf8}bs7xy<_{PNnnmpAwL z(L3SVcz(~w@7tH~L%5E=a*~SQscv5{?wzdP%ZE2E_`FY_U+Ai6?C|BuNqYG9@|+!E z<0UNuV1ViG#dG&(Hg7-CqEbo)D;G(xpAFZ$p{dI^ zE|$-}5Lg@iU_e0jxz0yb<$pF;um4D_j6n!!^533bUY%e3#bNQRT6WY~5G{unDSE-1 z&namg$c>8~hId@jqP3`Z_vrorWg%Rd*W9AhAs`CaW#^UW()!tgMb7=rhSN*xH3uB) za9=4)_}kwqD|12L_RES-cXIgtG#zpN4Hxv3;_5Yy zeaY#B^z3m~7Wq7VO=HMgcCfy_JU-ph)@j*Sh@KExqSM;@teRKpdlC)XB0c;>x@uGX zKX2+hr$#wkJmca{tpug%lF{Es7#S_y3lJD$6}0L!3{_S6aA`doi$uj9e#{o5j2f($ zvJJ04{P9xiDhuKT5WZ$L5)(M<8rS8T2uW03)^*^e2dfMr%BL%VHu-=MfOb`K#g-n| z8)6_Wj(u$laV1*p$S{3u$FG0=>-77>i?h?S zKjLrx{$0_8-~&;l6-?CtYh)cmp;p;92`6y5(t77yBR0|0gEg(>N>Ym^SQs-Ax}zViY}larV`ZX@jYRhai{={#9iOI5Y?iQ&=a%y zzSL}Uij1^184N72a03YM%FldS}rHgu~1{g zGWNU%7DoHi`-wS4sA)vCQ?qG+bYm{kMd|<<9a!WVz;7J#oN&Bzw<4z_gkth^TeCq? zZ=Oanil$(AZNY0wNsRX2s8G;i;QNUL>7W{#VbbU@Tt%Ik!Ysw3vg0aZGluF3+_%CxcGb#OwsM vF;hP7!V%@>>!nMelsY3!N)5md#Lu3c9y!`%iBCmTNId^POERIDIy(UXY7dJc literal 0 HcmV?d00001 diff --git a/slice/netconfig b/slice/netconfig new file mode 100755 index 0000000..50fe781 --- /dev/null +++ b/slice/netconfig @@ -0,0 +1,124 @@ +#!/bin/sh +# +# Marta Carbone +# Copyright (C) 2009 Universita` di Pisa +# $Id$ +# +# This script is the frontend to be used with +# the vsys system. +# It allows to configure dummynet pipes and queues. +# In detail it: +# - get user input +# - send command to the vsys input pipe +# - wait a bit +# - get the reply from the output vsys pipe +# - show the output to the user +# +# A simple usage is: +# ./netconfig -p 9898 +# mandatory fields are the port to be configured +# and dummynet configuration parameters + +PIPE_IN=/vsys/ipfw-be.in +PIPE_OUT=/vsys/ipfw-be.out +LOGFILE=/tmp/netconfig.log + +SED=/bin/sed # used to extract the profile name +DEBUG=0 # 0 disable debug messages +prog=$0 # this program + +# Print the first argument and exit +abort() +{ + echo "$1"; exit 1 +} + +# if $DEBUG is enabled, print a debug message +# $1 the message to print +debug() +{ + [ "${DEBUG}" != "0" ] && echo "$1" >> ${LOGFILE} +} + +# print the usage and exit +help() +{ + cat << EOF +Usage: $prog [-n node_ip][-s slice] [-i key] -d dummynetbox_ip -p port [-U] [-t timeout[STRING]] ipfw_configuration_parameters \n + -h show help \n + -p specify the port to configure \n + -t specify the timeout. STRING can be "1minute" or "4hours" (default 1 hour) \n + + ipfw configuration parameters (mandatory) \n + extra-delay bw delay proto... plr... \n +EOF + exit 0 +} + +# parse input line and fill missing fields +parse_input() +{ + # counters used to check if at least + # two ipfw configuration parameters are present + local OPT_ARGS=2 # optional (timeout) + local MAND_ARGS=2 # mandatory (port) + + [ -z "$1" ] && help; + + while getopts ":hp:t:" opt; do + case $opt in + h | \?) help;; + p) PORT=$OPTARG;; + t) TIMEOUT=$OPTARG;; + esac + done + + # check for mandatory arguments + [ -z ${PORT} ] && abort "a port value is mandatory"; + + # the default value for the timeout is 1H + if [ -z ${TIMEOUT} ]; then + TIMEOUT="1hour" + OPT_ARGS=$((${OPT_ARGS}-2)) + fi + + # compute residue argument, we need at least 2 + # mandatory arguments (for ipfw), exit on error + #debug "Passed args $# Mandatory ${MAND_ARGS} Optional ${OPT_ARGS} Extra $(($#-${MAND_ARGS}-${OPT_ARGS}))" + if [ $(($#-${MAND_ARGS}-${OPT_ARGS})) -lt 2 ]; then + help + fi +} + +# main starts here + + # allow ipfw show and ipfw pipe show + if [ x$1 = x"ipfw" ]; then + echo "received" $1 + echo "ipfw" >> ${PIPE_IN} + cat ${PIPE_OUT} + exit 0; + else if [ x$1 = x"pipe" ]; then + echo "pipe" >> ${PIPE_IN} + cat ${PIPE_OUT} + exit 0; + fi + fi + + # parse the input + parse_input $* + shift $((${OPTIND}-1)); + + # print debug + debug "PORT ${PORT}" + debug "TIMEOUT ${TIMEOUT}" + debug "PIPE_CONFIGURATION $*" + + # format CMD as expected by the backend script + CMD="${PORT} ${TIMEOUT} $*"; + + # send the command + debug "Sending: ${CMD}" + echo ${CMD} >> ${PIPE_IN} + + cat ${PIPE_OUT} -- 2.43.0