--- /dev/null
+--- include/asm-mips/param_orig.h 2010-02-23 12:45:58.000000000 +0100
++++ include/asm-mips/param.h 2010-02-23 12:00:31.000000000 +0100
+@@ -41,7 +41,7 @@
+ counter is increasing. This value is independent from the external value
+ and can be changed in order to suit the hardware and application
+ requirements. */
+-# define HZ 100
++# define HZ 1000
+ # define hz_to_std(a) (a)
+
+ #endif /* Not a DECstation */
# We assume that $(USRDIR) contains include/ and lib/ used to build userland.
DATE ?= $(shell date +%Y%m%d)
-SNAPSHOT_NAME=ipfw_mod-$(DATE)
+SNAPSHOT_NAME=$(DATE)-ipfw3.tgz
+BINDIST=$(DATE)-dummynet-linux.tgz
+WINDIST=$(DATE)-dummynet-windows.zip
+
+###########################################
+# windows x86 and x64 specific variables #
+###########################################
+# DRIVE must be the hard drive letter where DDK is installed
+# DDKDIR must be the path to the DDK root directory, without drive letter
+# TARGETOS (x64 only) must be one of the following:
+# wnet -> windows server 2003
+# wlh -> windows vista and windows server 2008
+# win7 -> windows 7
+# future version must be added here
+export DDK
+export DRIVE
+export DDKDIR
+DRIVE = C:
+DDKDIR = /WinDDK/7600.16385.1
+DDK = $(DRIVE)$(DDKDIR)
+
+TARGETOS=win7
_all: all
-all clean distclean:
+clean distclean:
echo target is $(@)
(cd ipfw && $(MAKE) $(@) )
(cd dummynet2 && $(MAKE) $(@) )
+ # -- windows x64 only
+ - rm -rf dummynet2-64
+ - rm -rf ipfw-64
+ - rm -rf binary64
+
+all:
+ echo target is $(@)
+ (cd ipfw && $(MAKE) $(@) )
+ (cd dummynet2 && $(MAKE) $(@) )
+ # -- windows only
+ - [ -f ipfw/ipfw.exe ] && cp ipfw/ipfw.exe binary/ipfw.exe
+ - [ -f dummynet2/objchk_wxp_x86/i386/ipfw.sys ] && \
+ cp dummynet2/objchk_wxp_x86/i386/ipfw.sys binary/ipfw.sys
snapshot:
- (cd ..; tar cvzhf /tmp/$(SNAPSHOT_NAME).tgz --exclude .svn \
+ $(MAKE) distclean
+ (cd ..; tar cvzhf /tmp/$(SNAPSHOT_NAME) --exclude .svn \
--exclude README.openwrt --exclude tags --exclude NOTES \
- ipfw_mod )
+ --exclude tcc-0.9.25-bsd \
+ --exclude original_passthru \
+ --exclude ipfw3.diff --exclude add_rules \
+ ipfw3 )
+
+bindist:
+ $(MAKE) clean
+ $(MAKE) all
+ tar cvzf /tmp/$(BINDIST) ipfw/ipfw ipfw/ipfw.8 dummynet2/ipfw_mod.ko
+
+windist:
+ $(MAKE) clean
+ -$(MAKE) all
+ -rm /tmp/$(WINDIST)
+ zip -r /tmp/$(WINDIST) binary -x \*.svn\*
+
+win64: clean
+ (cd dummynet2 && $(MAKE) include_e)
+ cp -r ipfw ipfw-64
+ echo "EXTRA_CFLAGS += -D_X64EMU" >> ipfw-64/Makefile
+ (cd ipfw-64 && $(MAKE) all)
+ cp -r dummynet2 dummynet2-64
+ rm -f dummynet2-64/Makefile
+ cp win64/sources dummynet2-64/sources
+ mkdir dummynet2-64/tmpbuild
+ mkdir binary64
+ win64/mysetenv.sh $(DRIVE) $(DDKDIR) $(TARGETOS)
+ cp binary/cygwin1.dll binary64/cygwin1.dll
+ cp ipfw-64/ipfw.exe binary64/ipfw.exe
+ cp win64/*.inf binary64
+ cp binary/testme.bat binary64/testme.bat
+ cp binary/wget.exe binary64/wget.exe
+
+planetlab_update:
+ # clean and create a local working directory
+ rm -rf /tmp/pl-tmp
+ mkdir -p /tmp/pl-tmp/pl
+ mkdir -p /tmp/pl-tmp/ol2
+ # get the trunk version of the PlanetLab repository
+ # to specify the sshkey use the .ssh/config file
+ (cd /tmp/pl-tmp/pl; \
+ svn co svn+ssh://svn.planet-lab.org/svn/ipfw/trunk)
+ # get an updated copy of the main ipfw repository
+ (cd /tmp/pl-tmp/ol2; \
+ svn export svn+ssh://onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw3)
+ # copy the new version over the old one
+ (cd /tmp/pl-tmp; cp -rP ol2/ipfw3/* pl/trunk)
+ # files cleanup in the old version
+ (cd /tmp/pl-tmp; diff -r ol2/ipfw3 pl/trunk | \
+ grep -v "svn" | awk '{print $$3 $$4}' | \
+ sed 's/:/\//' | xargs rm -rf)
+ # local adjustmens here
+ rm -rf /tmp/pl-tmp/pl/trunk/planetlab/check_planetlab_sync
+ # commit to the remote repo
+ @echo "Please, revise the update with the commands:"
+ @echo "(cd /tmp/pl-tmp/pl/trunk; svn diff)"
+ @echo "(cd /tmp/pl-tmp/pl/trunk; svn status)"
+ @echo "and commit with:"
+ @echo "(cd /tmp/pl-tmp/pl/trunk; svn ci -m 'Update from the mail ipfw repo.')"
install:
# Makefile to build the package in openwrt.
-# goes into package/ipfw2/Makefile
+# goes into package/ipfw3/Makefile
#
# Edit IPFW_DIR to point to the directory with the sources for ipfw
-IPFW_DIR := $(TOPDIR)/../ipfw_mod
+IPFW_DIR := $(TOPDIR)/../ipfw3
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
-PKG_NAME:=kmod-ipfw2
+PKG_NAME:=kmod-ipfw3
PKG_RELEASE:=1
# MV is undefined
include $(INCLUDE_DIR)/package.mk
# Description for the package.
-# The names KernelPackage/ipfw2 must match the arguments to the
-# call $(eval $(call KernelPackage,ipfw2)) used to build it
+# The names KernelPackage/ipfw3 must match the arguments to the
+# call $(eval $(call KernelPackage,ipfw3)) used to build it
-define KernelPackage/ipfw2
+define KernelPackage/ipfw3
SUBMENU:=Other modules
TITLE:= IPFW and dummynet
# FILES is what makes up the module, both kernel and userland
# It must be in the KernelPackage section
- FILES := $(PKG_BUILD_DIR)/dummynet/ipfw_mod.o $(PKG_BUILD_DIR)/ipfw/ipfw
+ FILES := $(PKG_BUILD_DIR)/dummynet2/ipfw_mod.o $(PKG_BUILD_DIR)/ipfw/ipfw
# AUTOLOAD:=$(call AutoLoad,80,ipfw_mod)
endef
-define KernelPackage/ipfw2/description
+define KernelPackage/ipfw3/description
This package contains the ipfw and dummynet module
endef
mkdir -p $(PKG_BUILD_DIR)
$(CP) -Rp $(IPFW_DIR)/* $(PKG_BUILD_DIR)/
(cd $(PKG_BUILD_DIR)/ipfw && $(MAKE) include_e )
- (cd $(PKG_BUILD_DIR)/dummynet && $(MAKE) include_e )
(cd $(PKG_BUILD_DIR)/dummynet2 && $(MAKE) include_e )
endef
define Build/Compile
# compile the kernel part for openwrt
- $(MAKE) -C "$(LINUX_DIR)" \
- CROSS_COMPILE="$(TARGET_CROSS)" \
- ARCH="$(LINUX_KARCH)" \
- SUBDIRS="$(PKG_BUILD_DIR)/dummynet" \
- VER=openwrt modules
$(MAKE) -C "$(LINUX_DIR)" \
CROSS_COMPILE="$(TARGET_CROSS)" \
ARCH="$(LINUX_KARCH)" \
# compile the userland part for openwrt
$(MAKE) -C $(PKG_BUILD_DIR)/ipfw \
$(TARGET_CONFIGURE_OPTS) \
- CFLAGS="$(TARGET_CFLAGS) -I./include_e -I./include -include ../glue.h" \
+ CFLAGS="$(TARGET_CFLAGS) -DSYSCTL_NODE -DEMULATE_SYSCTL -I./include_e -I./include -include ../glue.h -DNO_ALTQ" \
VER=openwrt all
endef
-define Package/ipfw2-userland
+define Package/ipfw3-userland
SECTION:=utils
CATEGORY:=Utilities
TITLE := /sbin/ipfw
DESCRIPTION := This is the control program for ipfw and dummynet
endef
-define Package/ipfw2-userland/install
+define Package/ipfw3-userland/install
$(INSTALL_DIR) $(1) /sbin
endef
# XXX not entirely clear why the install entry for userland works,
-# given that /sbin/ipfw is in KernelPackage/ipfw2
+# given that /sbin/ipfw is in KernelPackage/ipfw3
-$(eval $(call Package,ipfw2-userland))
-$(eval $(call KernelPackage,ipfw2))
+$(eval $(call Package,ipfw3-userland))
+$(eval $(call KernelPackage,ipfw3))
#
-# $Id: NOTES 2844 2009-06-22 10:59:35Z luigi $
+# $Id: NOTES 5355 2010-02-18 18:58:43Z luigi $
#
---------------------------------------------------------------------
data.
-TODO (LINUX), 20090622:
+TODO 20100205:
+ use an appropriate identifier instead of LINUX24
+ find the discharging module hook, in order to force a queue flush
-+ use the output interface as a clock for the pipe
+ better matching on interface names (case insensitive etc ?)
+ match by interface address
+ verify path
+ send keepalives
-+ tables support
-+ uid/gid match (through the socket ?)
-+ pullup or data in external buffers
++ pullup of data in external buffers
+ O_TAG
+ O_DIVERT
+ O_TEE
+ O_SETFIB
+ kmem_cache_alloc
-TODO (WINDOWS) 20090622
-+ all of the above, once it works
-# svn
-https://svn.sourceforge.net/svnroot/wipfw
-
TODO (OpenWRT) 20090622
+ add a module compilation for 2.6
------ WINDOWS PORT ------
-A port to windows is still in progress.
-This directory contain a port of ipfw and dummynet to Windows.
-
---- BACKGROUND:
-
We started from the wipfw port available at [WIPFW] , but
most of the port is done from scratch using the most recent
version of ipfw+dummynet from HEAD/RELENG_7 as of March 2009
gcc attributes are also not present.
C99 types are not present, remapped in <sys/cdefs.h>
+Also, we don't have C99 initializers which sometimes gives trouble.
--- USEFUL LINKS:
[CYGWIN]
http://www.cygwin.com/setup.exe
+Windows Driver Kit
+http://www.microsoft.com/whdc/DevTools/WDK/WDKpkg.mspx
+
+Debug Symbols for WinXP SP3
+http://www.microsoft.com/whdc/devtools/debugging/symbolpkg.mspx#d
+
+DbgView
+http://technet.microsoft.com/en-us/sysinternals/bb896647.aspx
+
+Cygwin
+http://www.cygwin.com/
+(installazione pacchetti di default + categoria devel)
+
+Winrar (il WDK e' distribuito in un file .iso)
+http://www.rarlab.com/download.htm
+
+puttycyg (terminale per cygwin)
+http://code.google.com/p/puttycyg/
+
+Tortoise SVN
+http://tortoisesvn.net/downloads
+
+EditPlus
+http://www.editplus.com/
#
-# $Id: README 4502 2009-12-15 11:10:33Z marta $
+# $Id: README 6070 2010-04-15 11:58:21Z marta $
#
-This directory contains a port of ipfw and dummynet to Linux and OpenWrt
-(including PlanetLab). A Windows version is in the works but not ready yet.
-Building the code produces:
-
- a kernel module, ipfw_mod.ko
- a userland program, /sbin/ipfw
-
+This directory contains a port of ipfw and dummynet to Linux/OpenWrt
+(including PlanetLab) and Windows. This version of ipfw and dummynet
+is called "ipfw3" as it is the third major rewrite of the code.
The source code here comes straight from FreeBSD (roughly the
-version in RELENG_7 and HEAD as of December 2009), plus some glue code
+version in HEAD as of February 2010), plus some glue code
and headers written from scratch.
Unless specified otherwise, all the code here is under a BSD license.
-Note:
- - the linux version miss the "one_pass" feature
+Specific build instructions are below, and in general produce
+
+ a kernel module, ipfw_mod.ko (ipfw.sys on windows)
+ a userland program, /sbin/ipfw (ipfw.exe on windows)
+
+which you need to install on your system.
+
+CREDITS:
+ Luigi Rizzo (main design and development)
+ Marta Carbone (Linux and Planetlab ports)
+ Riccardo Panicucci (modular scheduler support)
+ Francesco Magno (Windows port)
+ Fabio Checconi (the QFQ scheduler)
+ Funding from Universita` di Pisa (NETOS project),
+ European Commission (ONELAB2 project)
+
+=========== INSTALL/REMOVE INSTRUCTIONS ========================
+
+FreeBSD, OSX:
+ INSTALL:
+ kldload ipfw.ko ; kldload dummynet.ko
+ REMOVE:
+ kldunload dummynet.ko; kldunload ipfw.ko
+
+Linux
+ INSTALL:
+ # Do the following as root
+ insmod ./dummynet2/ipfw_mod.ko
+ cp ipfw/ipfw /usr/local/sbin
+ REMOVE:
+ rmmod ipfw_mod.ko
+
+OpenWRT
+ INSTALL: # use the correct name for your system
+ opkg install kmod-ipfw3_2.4.35.4-brcm-2.4-1_mipsel.ipk #install
+ ls -l ls -l /lib/modules/2.4.35.4/ipfw* # check
+ insmod /lib/modules/2.4.35.4/ipfw_mod.o # load the module
+ /lib/modules/2.4.35.4/ipfw show # launch the userspace tool
+ REMOVE:
+ rmmod ipfw_mod.o # remove the module
+
+Windows:
+ INSTALL THE NDIS DRIVER
+
+ - open the configuration panel for the network card in use
+ (right click on the icon on the SYSTRAY, or go to
+ Control Panel -> Network and select one card)
+
+ - click on Properties->Install->Service->Add
+ - click on 'Driver Disk' and select 'netipfw.inf' in this folder
+ - select 'ipfw+dummynet' which is the only service you should see
+ - click accept on the warnings for the installation of an unknown
+ driver (roughly twice per existing network card)
+
+ Now you are ready to use the emulator. To configure it, open a 'cmd'
+ window and you can use the ipfw command from the command line.
+ Otherwise click on the 'TESTME.bat' which is a batch program that
+ runs various tests.
+
+ REMOVE:
+ - select a network card as above.
+ - click on Properties
+ - select 'ipfw+dummynet'
+ - click on 'Remove'
+
=================== BUILD INSTRUCTIONS ==========================
+***** Windows XP ******
+ You can find a pre-built version in the binary/ subdirectory.
+ To build your own version of the package you need:
+ - MSVC DDK available from ...
+ http://www.microsoft.com/whdc/DevTools/WDK/WDKpkg.mspx
+
+ - optionally, DbgView if you want to see diagnostic
+ http://technet.microsoft.com/en-us/sysinternals/bb896647.aspx
+
+ - cygwin, http://www.cygwin.com/
+ with base packages, make, c compiler, possibly an editor
+ and subversion.
+
+ Edit Makefile in the root directory, and set configuration
+ variables to match your current system (hard drive
+ and path where DDK is installed)
+ Open a shell from cygwin, move to this directory, and simply
+ run "make". The output of the build will be in this
+ directory, made of 4 files:
+ ipfw.exe (you also need cygwin.dll)
+ ipfw.sys (an NDIS intermediate filter driver)
+ dummynet.inf and dummynet_m.inf (installer files)
+
+***** Windows crosscompilation for 64 bit using DDK ******
+ Edit root directory's Makefile and set target
+ operating system
+ From the root directory, run 'make win64', this will:
+ - create ipfw-64 and dummynet2-64 subdirs
+ - patch ipfw makefile to support comunication
+ with 64bit module and build it
+ - replace dummynet makefile with proprietary
+ WinDDK one, named 'sources', and build the module
+ - create a binary64 directory containing
+ module and .inf install files, program
+ binary and relative cygwin dll
+ - install the driver from this directory in the
+ usual way.
+
***** Linux 2.6.x ******
make KERNELPATH=/path/to/linux USRDIR=/path/to/usr
KERNELPATH=/lib/modules/`uname -r`/build --- XXX check ?
NOTE: make sure CONFIG_NETFILTER is enabled in the kernel
- configuration file. You can enable it by doing
+ configuration file. You need the ncurses devel library,
+ that can be installed according your distro with:
+ apt-get install ncurses-dev # for debian based distro
+ yum -y install ncurses-dev # for fedora based distro
+ You can enable CONFIG_NETFILTER by doing:
"(cd ${KERNELPATH}; make menuconfig)"
Networking options --->
[*] Network packet filtering framework (Netfilter)
- If you have not yet compiled your kernel source, you need to
- prepare the build environment:
+ If you have not yet compiled your kernel source, you need to
+ prepare the build environment:
(cd $(KERNELPATH); make oldconfig; make prepare; make scripts)
wget http://downloads.openwrt.org/kamikaze/8.09.1/kamikaze_8.09.1_source.tar.bz2
tar xvjf kamikaze_8.09.1_source.tar.bz2
- + "cd" to the directory with the OpenWrt sources (the one that
+ + move to the directory with the OpenWrt sources (the one that
contains Config.in, rules.mk ...)
cd kamikaze_8.09.1
+ + Optional: Add support for 1ms resolution.
+
+ By default OpenWRT kernel is compiled with HZ=100; this implies
+ that all timeouts are rounded to 10ms, too coarse for dummynet.
+ The file 020-mips-hz1000.patch contains a kernel patch to build
+ a kernel with HZ=1000 (i.e. 1ms resolution) as in Linux/FreeBSD.
+ To apply this patch, go in the kernel source directory and
+ patch the kernel
+
+ cd build_dir/linux-brcm-2.4/linux-2.4.35.4
+ cat $IPFW3_SOURCES/020-mips-hz1000.patch | patch -p0
+
+ where IPFW3_SOURCES contains the ipfw3 source code.
+ Now, the next kernel recompilation will use the right HZ value
+
+ Optional: to be sure that the tools are working, make a first
- compilation as follows:
+ build as follows:
- run "make menuconfig" and set the correct target device,
drivers, and so on;
- run "make" to do the build
- + Add ipfw2 to the openwrt package, as follows:
+ + Add ipfw3 to the openwrt package, as follows:
- copy the code from this directory to the place used for the build:
- cp -Rp /path_to_ipfw_mod ../ipfw_mod;
+ cp -Rp /path_to_ipfw3 ../ipfw3;
If you want, you can fetch a newer version from the web
- (cd ..; rm -rf ipfw_mod;
- wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_mod-latest.tgz;\
- tar xvzf ipfw_mod-latest.tgz)
+ (cd ..; rm -rf ipfw3; \
+ wget http://info.iet.unipi.it/~luigi/dummynet/ipfw3-latest.tgz;\
+ tar xvzf ipfw3-latest.tgz)
- run the following commands:
- (mkdir package/ipfw2;
- cp ../ipfw_mod/Makefile.openwrt package/ipfw2/Makefile)
+ (mkdir package/ipfw3; \
+ cp ../ipfw3/Makefile.openwrt package/ipfw3/Makefile)
- to create the package/ipfw2 directory in the OpenWrt source
- directory, and copy Makefile.openwrt to package/ipfw2/Makefile:
+ to create the package/ipfw3 directory in the OpenWrt source
+ directory, and copy Makefile.openwrt to package/ipfw3/Makefile ;
- - if necessary, edit package/ipfw2/Makefile and set IPFW_DIR to point to
- the directory ipfw_mod, which contains the ipfw sources
+ - if necessary, edit package/ipfw3/Makefile and set IPFW_DIR to point to
+ the directory ipfw3, which contains the sources;
- - run "make menuconfig" and select ipfw2 as a module <M> in
- Kernel Modules -> Other modules -> kmod-ipfw2
+ - run "make menuconfig" and select kmod-ipfw3 as a module <M> in
+ Kernel Modules -> Other modules -> kmod-ipfw3
- run "make" to build the package, "make V=99" for verbose build.
- to modify the code, assuming you are in directory "kamikaze_8.09.1"
- (cd ../ipfw_mod && vi ...the files you are interested in )
- rm -rf build_dir/linux-brcm-2.4/kmod-ipfw2
- make package/ipfw2/compile V=99
+ (cd ../ipfw3 && vi ...the files you are interested in )
+ rm -rf build_dir/linux-brcm-2.4/kmod-ipfw3
+ make package/ipfw3/compile V=99
- The resulting package is located in bin/packages/mipsel/kmod-ipfw2*,
+ The resulting package is located in bin/packages/mipsel/kmod-ipfw3*,
upload the file and install on the target system, as follows:
- opkg install kmod-ipfw2_2.4.35.4-brcm-2.4-1_mipsel.ipk #install
+ opkg install kmod-ipfw3_2.4.35.4-brcm-2.4-1_mipsel.ipk #install
ls -l ls -l /lib/modules/2.4.35.4/ipfw* # check
insmod /lib/modules/2.4.35.4/ipfw_mod.o # load the module
/lib/modules/2.4.35.4/ipfw show # launch the userspace tool
rmmod ipfw_mod.o # remove the module
***** PLANETLAB BUILD (within a slice) *****
-These instruction can be used by PlanetLab developers to compile the dummynet module
-on a node. To install the module on the node users need root access in root context.
-PlanetLab users that want to use the dummynet package should ask to PlanetLab support
-for nodes with dummynet emulation capabilities.
+These instruction can be used by PlanetLab developers to compile
+the dummynet module on a node. To install the module on the node
+users need root access in root context. PlanetLab users that want
+to use the dummynet package should ask to PlanetLab support for
+nodes with dummynet emulation capabilities.
Follow the instructions below. You can just cut&paste
then after you have updated the repository again
(cd test/XYZ; sudo make ipfwslice ipfwroot)
---- other, instructions (to be verified) ---
-
-To build a kernel module for the PlanetLab distribution you need a build system.
-For an up-to-date and detailed information on how to build a local myplc installation,
-a local mirror, a PlanetLab test system see[1]
-
-To create a build system you need to do the following steps:
-
- 1. install CentOS 5, detailed information[2]
-
- 1.A download the image from the main site[3] for example:
-
- wget http://mi.mirror.garr.it/mirrors/CentOS/5.4/isos/i386/CentOS-5.4-i386-netinstall.iso
-
- 1.B Add the repository
-
- cat >> /etc/yum.repos.d/dhozac-vserver.repo <<EOF
- [dhozac-vserver]
-name=Linux-VServer related packages for CentOS $releasever - $basearch
-baseurl=http://rpm.hozac.com/dhozac/centos/$releasever/vserver/$basearch
-gpgkey=http://rpm.hozac.com/conf/keys/RPM-DHOZAC-GPG-KEY
-EOF
-
- 1.C Update, install and config the system
-
- yum update yum
- yum install kernel
- yum install util-vserver{,-core,-lib,-sysv,-build}
- yum install vim
- yum install subversion
- /etc/init.d/vprocunhide start
- chkconfig vservers-default on
-
- 2. create a vserver
-
- 2.A Checkout the planetlab build
-
- cd
- svn co http://svn.planet-lab.org/svn/build/trunk svn-build
-
- 2.B Search for a working RPM distribution in:
-
- http://build.onelab.eu/onelab/
- # good distribution ends in .ok, bad in .ko
- # in this example we used the following:
- http://build.onelab.eu/onelab/2008.03.02--onelab-f8-linux32/RPMS/
-
- 2.C Creating a vserver
-
- cd ~/svn-build
- ./vtest-init-vserver.sh -f f8 -d onelab -p linux32 mybuild \
- http://build.onelab.eu/onelab/2008.03.02--onelab-f8-linux32/RPMS/ \
- -- --interface eth0:138.96.255.221 --hostname vnode01.inria.fr &> mybuild.log&
-
- 3. create the build
-
- 3.A Enter on the vserver, and create the build
-
- vserver mybuild enter
- cd \
- svn co http://svn.planet-lab.org/svn/build/trunk build
-
- 4. build
-
- 4.A build[4]
- cd /build
-
- # full cleanup
- make distclean
-
- # the compilation is composed by several steps,
- # make help for more information
- # the first for the onelab compilation will download
- # the SPEC file from the repository specified in
- # onelab-tags.mk
- make stage1=true PLDISTRO=onelab
-
- # to download and build a module, for example ipfw:
- make ipfw
-
- # to do local changes
- cd /build/CODEBASE
- rm -rf ipfw
- # download the ipfw sources and extract it into ./ipfw
- # by svn
- svn+ssh://onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw_mod ./ipfw
- # from web
- wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_mod-latest.tgz
- tar xvzf ipfw_mod-latest.tgz
-
- # start the compilation
- rm -rf SOURCES/ipfw*
- rm -rf BUILD/ipfw-0.1/
- rm -rf SRPMS/ipfw*
- rm -rf RPMS/i386/ipfw*
- make ipfw
-
- 5. download and install sources into a node
-
- 5.A Copy RPMS into the node and install it:
- # exit from the root context
- exit
- scp /vserver/mybuild/build/RPMS/i386/ipfw-* root@node.iet.unipi.it:
- ssh root@node.iet.unipi.it
- rpm -e ipfw
- rpm -ivh ./ipfw-0-9...TAB
- modprobe ipfw_mod
-
- # the ipfw package should be installed
- ipfw show
-
--- References
[1] https://svn.planet-lab.org/wiki/VserverCentos
[2] http://wiki.linux-vserver.org/Installation_on_CentOS
+++ /dev/null
-OpenWrt on ASUSWL-520GC (admin, admin)
-
-Notes:
- The firmware installed is the version: 2.0.1.1, the ip address
- is 192.168.1.1, the web gui can be used by admin, admin.
- After reflashing the board, the old firmware should be available
- in the cdrom shipped with the router.
-
-OpenWRT compatility:
-1. The 2.4 kernel version has some troubles accessing the flash
- and this will actually make the board hang after a while.
-
-2. The 2.6 kernel does not have the same issue, except for the
- open source b43 wireless driver, that make the board reboot
- as soon as it is loaded.
-
-For this reason, when configuring the kernel option from the toolchain
-menu, the wireless target profile to choose should be `No WiFi'
-
-Flash the board:
-1. The compiled binary images are under the main tree, ./bin the file
- to be used is openwrt-brcm47xx-squashfs.trx.
-2. Start the router in diag mode and goes with tftp, as follows:
- tftp 192.168.1.1
- tftp> bin
- tftp> put openwrt-brcm47xx-squashfs.trx
- Sent 1904640 bytes in 5.4 seconds
- tftp>
-3. Wait for 10 minutes, the reboot
--- /dev/null
+This directory contains the binaries to install and use IPFW and\r
+DUMMYNET on a Windows Machine. The kernel part is an NDIS module,\r
+whereas the user interface is a command line program.\r
+\r
+1. INSTALL THE NDIS DRIVER\r
+\r
+- open the configuration panel for the network card in use\r
+ (either right click on the icon on the SYSTRAY, or go to\r
+ Control Panel -> Network and select one card)\r
+\r
+- click on Properties->Install->Service->Add\r
+- click on 'Driver Disk' and select 'netipfw.inf' in this folder\r
+- select 'ipfw+dummynet' which is the only service you should see\r
+- click accept on the warnings for the installation of an unknown\r
+ driver (roughly twice per existing network card)\r
+\r
+Now you are ready to use the emulator. To configure it, open a 'cmd'\r
+window and you can use the ipfw command from the command line.\r
+Otherwise click on the 'TESTME.bat' which is a batch program that\r
+runs various tests.\r
+\r
+2. UNINSTALL THE DRIVER\r
+\r
+- select a network card as above.\r
+- click on Properties\r
+- select 'ipfw+dummynet'\r
+- click on 'Remove'\r
--- /dev/null
+; version section\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = NetService\r
+ClassGUID = {4D36E974-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Unipi%\r
+DriverVer = 26/02/2010,3.0.0.1\r
+\r
+; manufacturer section\r
+[Manufacturer]\r
+%Unipi% = UNIPI,NTx86\r
+\r
+; control flags section\r
+; optional, unused in netipfw.inf inf, used in netipfw_m.inf\r
+[ControlFlags]\r
+\r
+; models section\r
+[UNIPI] ; Win2k\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+[UNIPI.NTx86] ;For WinXP and later\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+\r
+; ddinstall section\r
+[Ipfw.ndi]\r
+AddReg = Ipfw.ndi.AddReg, Ipfw.AddReg\r
+Characteristics = 0x4410 ; NCF_FILTER | NCF_NDIS_PROTOCOL !--Filter Specific--!!\r
+CopyFiles = Ipfw.Files.Sys\r
+CopyInf = netipfw_m.inf\r
+\r
+; remove section\r
+[Ipfw.ndi.Remove]\r
+DelFiles = Ipfw.Files.Sys\r
+\r
+;ddinstall.services section\r
+[Ipfw.ndi.Services]\r
+AddService = Ipfw,,Ipfw.AddService\r
+\r
+[Ipfw.AddService]\r
+DisplayName = %ServiceDesc%\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\ipfw.sys\r
+AddReg = Ipfw.AddService.AddReg\r
+\r
+[Ipfw.AddService.AddReg]\r
+\r
+;file copy related sections\r
+[SourceDisksNames]\r
+1=%DiskDescription%,"",,\r
+\r
+[SourceDisksFiles]\r
+ipfw.sys=1\r
+\r
+[DestinationDirs]\r
+DefaultDestDir = 12\r
+Ipfw.Files.Sys = 12 ; %windir%\System32\drivers\r
+\r
+; ddinstall->copyfiles points here\r
+[Ipfw.Files.Sys]\r
+ipfw.sys,,,2\r
+\r
+; ddinstall->addreg points here\r
+[Ipfw.ndi.AddReg]\r
+HKR, Ndi, HelpText, , %HELP% ; this is displayed at the bottom of the General page of the Connection Properties dialog box\r
+HKR, Ndi, FilterClass, , failover\r
+HKR, Ndi, FilterDeviceInfId, , unipi_ipfwmp\r
+HKR, Ndi, Service, , Ipfw\r
+HKR, Ndi\Interfaces, UpperRange, , noupper\r
+HKR, Ndi\Interfaces, LowerRange, , nolower\r
+HKR, Ndi\Interfaces, FilterMediaTypes, , "ethernet, tokenring, fddi, wan"\r
+\r
+;strings section\r
+[Strings]\r
+Unipi = "Unipi"\r
+DiskDescription = "Ipfw Driver Disk"\r
+Desc = "ipfw+dummynet"\r
+HELP = "This is ipfw and dummynet network emulator, developed by unipi.it"\r
+ServiceDesc = "ipfw service"\r
--- /dev/null
+; version section\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = Net\r
+ClassGUID = {4D36E972-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Unipi%\r
+DriverVer = 26/02/2010,3.0.0.1\r
+\r
+; control flags section\r
+; optional, unused in netipfw.inf inf, used in netipfw_m.inf\r
+[ControlFlags]\r
+ExcludeFromSelect = unipi_ipfwmp\r
+\r
+; destinationdirs section, optional\r
+[DestinationDirs]\r
+DefaultDestDir=12\r
+; No files to copy \r
+\r
+; manufacturer section\r
+[Manufacturer]\r
+%Unipi% = UNIPI,NTx86\r
+\r
+; models section\r
+[UNIPI] ; Win2k\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+[UNIPI.NTx86] ;For WinXP and later\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+\r
+; ddinstall section\r
+[IpfwMP.ndi]\r
+AddReg = IpfwMP.ndi.AddReg\r
+Characteristics = 0x29 ;NCF_NOT_USER_REMOVABLE | NCF_VIRTUAL | NCF_HIDDEN\r
+\r
+; ddinstall->addreg points here\r
+[IpfwMP.ndi.AddReg]\r
+HKR, Ndi, Service, 0, IpfwMP\r
+\r
+;ddinstall.services section\r
+[IpfwMP.ndi.Services]\r
+AddService = IpfwMP,0x2, IpfwMP.AddService\r
+\r
+[IpfwMP.AddService]\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\ipfw.sys\r
+AddReg = IpfwMP.AddService.AddReg\r
+\r
+[IpfwMP.AddService.AddReg]\r
+; None\r
+\r
+[Strings]\r
+Unipi = "Unipi"\r
+Desc = "Ipfw Miniport"
\ No newline at end of file
--- /dev/null
+@echo on\r
+@set CYGWIN=nodosfilewarning\r
+\r
+@ipfw -q flush\r
+@ipfw -q pipe flush\r
+@echo ######################################################################\r
+@echo ## Setting delay to 100ms for both incoming and outgoing ip packets ##\r
+@echo ## and sending 4 echo request to Google ##\r
+@echo ######################################################################\r
+ipfw pipe 3 config delay 100ms mask all\r
+ipfw add pipe 3 ip from any to any\r
+ipfw pipe show\r
+ping -n 4 www.google.it\r
+\r
+@echo ##############################################\r
+@echo ## Raising delay to 300ms and pinging again ##\r
+@echo ##############################################\r
+ipfw pipe 3 config delay 300ms mask all\r
+ipfw pipe show\r
+ping -n 4 www.google.com\r
+\r
+@echo ##################################\r
+@echo ## Shaping bandwidth to 500kbps ##\r
+@echo ##################################\r
+ipfw pipe 3 config bw 500Kbit/s mask all\r
+ipfw pipe show\r
+wget http://info.iet.unipi.it/~luigi/1m\r
+@del 1m\r
+\r
+@echo ###################################\r
+@echo ## Lowering bandwidth to 250kbps ##\r
+@echo ###################################\r
+ipfw pipe 3 config bw 250Kbit/s mask all\r
+ipfw pipe show\r
+wget http://info.iet.unipi.it/~luigi/1m\r
+@del 1m\r
+\r
+@echo ###################################################################\r
+@echo ## Simulating 50 percent packet loss and sending 15 echo request ##\r
+@echo ###################################################################\r
+@ipfw -q flush\r
+@ipfw -q pipe flush\r
+ipfw add prob 0.5 deny proto icmp in\r
+ping -n 15 -w 300 www.google.it\r
+@ipfw -q flush\r
+\r
+@echo ##############################\r
+@echo ## Showing SYSCTL variables ##\r
+@echo ##############################\r
+ipfw sysctl -a\r
+\r
+@echo #############################################\r
+@echo ## Inserting rules to test command parsing ##\r
+@echo #############################################\r
+@echo -- dropping all packets of a specific protocol --\r
+ipfw add deny proto icmp\r
+@echo -- dropping packets of all protocols except a specific one --\r
+ipfw add deny not proto tcp\r
+@echo -- dropping all packets from IP x to IP y --\r
+ipfw add deny src-ip 1.2.3.4 dst-ip 5.6.7.8\r
+@echo -- dropping all ssh outgoing connections --\r
+ipfw add deny out dst-port 22\r
+@echo -- allowing already opened browser connections --\r
+@echo -- but preventing new ones from being opened --\r
+ipfw add deny out proto tcp dst-port 80 tcpflags syn\r
+@echo -- another way to do the same thing --\r
+ipfw add allow out proto tcp dst-port 80 established\r
+ipfw add deny out proto tcp dst-port 80 setup\r
+@echo -- checking what rules have been inserted --\r
+ipfw -c show\r
+@ipfw -q flush\r
+\r
+@echo #################\r
+@echo ## Cleaning up ##\r
+@echo #################\r
+ipfw -q flush\r
+ipfw -q pipe flush\r
+\r
+pause\r
-#
-# $Id: Makefile 4657 2010-01-04 11:20:53Z marta $
-#
-# gnu Makefile to build linux module for ipfw+dummynet.
+# $Id: Makefile 5858 2010-03-24 16:16:19Z svn_magno $
+# gnu Makefile to build linux/Windows module for ipfw+dummynet.
#
# The defaults are set to build without modifications on PlanetLab
# and possibly 2.6 versions.
+# On Windows, we use gnu-make and MSC
# Some variables need to have specific names, because they are used
# by the build infrastructure on Linux and OpenWrt. They are:
# VER linux version we are building for (2.4 2.6 or openwrt)
#---
+UNAME:=$(shell uname)
$(warning including dummynet/Makefile)
# lets default for 2.6 for planetlab builds
#-- the list of source files. IPFW_SRCS is our own name.
# Original ipfw and dummynet sources + FreeBSD stuff,
-IPFW_SRCS := ip_fw2.c ip_dummynet.c ip_fw_pfil.c ip_fw_sockopt.c
+IPFW_SRCS := ip_fw2.c ip_fw_pfil.c ip_fw_sockopt.c
IPFW_SRCS += ip_fw_dynamic.c ip_fw_table.c ip_fw_log.c
IPFW_SRCS += radix.c in_cksum.c
+IPFW_SRCS += ip_dummynet.c ip_dn_io.c ip_dn_glue.c
+IPFW_SRCS += dn_heap.c
+IPFW_SRCS += dn_sched_fifo.c dn_sched_wf2q.c
+IPFW_SRCS += dn_sched_rr.c dn_sched_qfq.c
+IPFW_SRCS += dn_sched_prio.c
# Module glue and functions missing in linux
IPFW_SRCS += ipfw2_mod.c bsd_compat.c
$(warning "---- Building dummynet kernel module for Version $(VER)")
+ifneq (,$(findstring CYGWIN,$(shell uname)))
+ ISWIN=1
+endif
+ifneq ($(TCC),)
+ ISWIN=1
+endif
+ifeq ($(ISWIN),1)
+ M ?= $(shell pwd)
+ WIN_SRCS += md_win.c
+ WIN_SRCS += miniport.c protocol.c passthru.c debug.c
+ #compiler, linker, target, sources and objects
+ #DDK is exported from the root makefile
+ #DDK = C:/WinDDK/7600.16385.1
+ OBJDIR=objchk_wxp_x86/i386/
+
+ TARGET = ipfw
+
+ CSOURCES = $(IPFW_SRCS) $(WIN_SRCS)
+
+ COBJS := $(CSOURCES:.c=.obj)
+ COBJS := $(addprefix $(OBJDIR),$(COBJS))
+
+ #include paths
+ INCLUDE_PATHS = -Ii386 -Iinclude -Iinclude_e -I.
+ # INCLUDE_PATHS += -I$(OBJDIR)
+ INCLUDE_PATHS += -I$(DDK)/inc/api
+ INCLUDE_PATHS += -I$(DDK)/inc/ddk
+ INCLUDE_PATHS += -I$(DDK)/inc/crt
+
+ # #preprocessor MS defines
+ PREPROC = -D_X86_=1 -Di386=1 -DSTD_CALL -DCONDITION_HANDLING=1
+ PREPROC += -DNT_UP=0 -DNT_INST=0 -DWIN32=100 -D_NT1X_=100 -DWINNT=1
+ PREPROC += -D_WIN32_WINNT=0x0501 -DWINVER=0x0501 -D_WIN32_IE=0x0603
+ PREPROC += -DWIN32_LEAN_AND_MEAN=1
+ PREPROC += -D__BUILDMACHINE__=WinDDK -DFPO=0 -D_DLL=1
+ PREPROC += -DNDIS_MINIPORT_DRIVER -DNDIS_WDM=1
+ PREPROC += -DNDIS51_MINIPORT=1 -DNDIS51=1
+ PREPROC += -DMSC_NOOPT -DNTDDI_VERSION=0x05010200
+ PREPROC += -DKMDF_MAJOR_VERSION_STRING=01 -DKMDF_MINOR_VERSION_STRING=009
+ #PREPROC += -DDBG=1 #debug
+ PREPROC += -DNDEBUG #always up, seems no effect, possibly no debug?
+ PREPROC += -DDEVL=1 #always up, seems no effect
+ #macroing module name, WARNING: must match the one in .inf files
+ PREPROC += -DMODULENAME=Ipfw
+
+ #our defines
+ OUR_PREPROC = -D_KERNEL -DKERNEL_MODULE -DKLD_MODULE
+ OUR_PREPROC += -D__BSD_VISIBLE -DIPFIREWALL_DEFAULT_TO_ACCEPT
+ OUR_PREPROC += -D__LITTLE_ENDIAN -DSYSCTL_NODE -DEMULATE_SYSCTL
+
+ifeq ($(TCC),)
+ CC = $(DDK)/bin/x86/x86/cl.exe
+ LD = $(DDK)/bin/x86/x86/link.exe
+ # #complier options
+ CFLAGS = -Fo$(OBJDIR) -c -FC -Zc:wchar_t-
+ CFLAGS += -Zl -Zp8 -Gy -Gm- -GF -cbstring -Gz -hotpatch -EHs-c-
+ CFLAGS += -W2 # -W3 gives too many conversion errors
+ CFLAGS += -GR- -GF -GS -Zi # XXX do we need this ?
+ CFLAGS += -Fd$(OBJDIR)
+ CFLAGS += -wd4603 -wd4627 -typedil-
+ CFLAGS += -FI $(DDK)/inc/api/warning.h
+ CFLAGS += -FI winmissing.h
+ CFLAGS += -FI missing.h # headers
+ CFLAGS += -FI ../glue.h # headers
+
+ #optimization options
+ OPTIMIZE = -Od -Oi -Oy-
+
+ #linker options
+ LDFLAGS = /MERGE:_PAGE=PAGE /MERGE:_TEXT=.text
+ LDFLAGS += /SECTION:INIT,d /OPT:REF /OPT:ICF
+ LDFLAGS += /IGNORE:4198,4010,4037,4039,4065,4070,4078,4087,4089,4221
+ LDFLAGS += /INCREMENTAL:NO /release /NODEFAULTLIB /WX
+ LDFLAGS += /debug /debugtype:cv,fixup,pdata
+ LDFLAGS += /version:6.1 /osversion:6.1 /functionpadmin:5
+ LDFLAGS += /safeseh /pdbcompress
+ LDFLAGS += /STACK:0x40000,0x1000 /driver /base:0x10000 /align:0x80
+ LDFLAGS += /stub:$(DDK)\\lib\\wxp\\stub512.com
+ LDFLAGS += /subsystem:native,5.01 /entry:GsDriverEntry@8
+ LDFLAGS += /out:$(OBJDIR)/ipfw.sys
+
+ #libraries to build against
+ LIBS = $(DDK)/lib/wxp/i386/BufferOverflowK.lib
+ LIBS += $(DDK)/lib/wxp/i386/ntoskrnl.lib
+ LIBS += $(DDK)/lib/wxp/i386/hal.lib
+ LIBS += $(DDK)/lib/wxp/i386/wmilib.lib
+ LIBS += $(DDK)/lib/wxp/i386/ndis.lib
+ LIBS += $(DDK)/lib/wxp/i386/sehupd.lib
+else
+ # TCC points to the root of tcc tree
+ CC=$(TCC)/bin/wintcc
+ EXTRA_CFLAGS += -DTCC -I..
+ EXTRA_CFLAGS += -I$(TCC)/include/winapi -I$(TCC)/include
+ EXTRA_CFLAGS += -nostdinc
+
+ CFLAGS += -include winmissing.h -include missing.h -include ../glue.h
+ CFLAGS += -I../../inc/api -I../../inc/ddk -I../../inc/crt
+ CFLAGS += -DRC_INVOKED
+endif
+
+ #empty include directory to be built
+ M ?= $(shell pwd)
+ EDIRS += asm linux
+ EFILES += asm/div64.h
+ EFILES += linux/if.h linux/random.h linux/errno.h
+ EFILES += net/if_types.h net/inet_hashtables.h net/route.h
+
+ #targets
+all: $(TARGET)
+
+$(TARGET): include_e
+ rm -rf objchk_wxp_x86
+ mkdir -p objchk_wxp_x86/i386
+ $(CC) $(INCLUDE_PATHS) $(PREPROC) $(OUR_PREPROC) $(CFLAGS) $(OPTIMIZE) $(CSOURCES)
+ $(LD) $(LDFLAGS) $(COBJS) $(LIBS)
+
+else # !windows
+
# We have three sections for OpenWrt, Linux 2.4 and Linux 2.6
ifeq ($(VER),openwrt)
xcflags-y += -O1 -DLINUX_24
xcflags-y += -g
- EXTRA_CFLAGS := $(xcflags-y) $(ipfw-cflags)
+ EXTRA_CFLAGS := $(xcflags-y) $(ipfw-cflags) -DSYSCTL_NODE -DEMULATE_SYSCTL
# we should not export anything
#export-objs := ipfw2_mod.o
# so we allow it to be overridden
M ?= $(shell pwd)
endif # !openwrt
+endif # !windows
#--- various common targets
clean:
-rm -f *.o *.ko Module.symvers *.mod.c
+ -rm -rf objchk_wxp_x86
-rm -rf include_e
distclean: clean
-rm -f .*cmd modules.order opt_*
-rm -rf .tmp_versions include_e
- -rm -rf .*.o.d
+ -rm -rf .*.o.d _CL_*
# support to create empty dirs and files in include_e/
# EDIRS is the list of directories, EFILES is the list of files.
-EDIRS= altq arpa machine net netinet netinet6 sys
+EDIRS += altq arpa machine net netinet netinet6 sys
EFILES += opt_inet6.h opt_ipfw.h opt_ipsec.h opt_mpath.h
EFILES += opt_mbuf_stress_test.h opt_param.h
*/
/*
- * $Id: bsd_compat.c 4665 2010-01-04 12:35:39Z luigi $
+ * $Id: bsd_compat.c 5813 2010-03-22 18:05:13Z svn_magno $
*
* kernel variables and functions that are not available in linux.
*/
#include <sys/cdefs.h>
#include <asm/div64.h> /* do_div on 2.4 */
#include <linux/random.h> /* get_random_bytes on 2.4 */
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#include <sys/malloc.h>
/*
* gettimeofday would be in sys/time.h but it is not
int hz = 1000; /* default clock time */
long tick = 1000; /* XXX is this 100000/hz ? */
int bootverbose = 0;
-time_t time_uptime = 0;
struct timeval boottime;
int ip_defttl;
/* credentials check */
#include <netinet/ip_fw.h>
+#ifdef __linux__
int
cred_check(void *_insn, int proto, struct ifnet *oif,
struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
match = (u->gid == (uid_t)insn->d[0]);
return match;
}
+#endif /* __linux__ */
int
jailed(struct ucred *cred)
if (len < valsize)
sopt->sopt_valsize = valsize = len;
+ //printf("copyout buf = %p, sopt = %p, soptval = %p, len = %d \n", buf, sopt, sopt->sopt_val, len);
bcopy(buf, sopt->sopt_val, valsize);
return 0;
}
return EINVAL;
if (valsize > len)
sopt->sopt_valsize = valsize = len;
+ //printf("copyin buf = %p, sopt = %p, soptval = %p, len = %d \n", buf, sopt, sopt->sopt_val, len);
bcopy(sopt->sopt_val, buf, valsize);
return 0;
}
void
getmicrouptime(struct timeval *tv)
{
-#ifdef _WIN32
-#else
do_gettimeofday(tv);
-#endif
}
random(void)
{
#ifdef _WIN32
- return 0x123456;
+ static unsigned long seed;
+ if (seed == 0) {
+ LARGE_INTEGER tm;
+ KeQuerySystemTime(&tm);
+ seed = tm.LowPart;
+ }
+ return RtlRandomEx(&seed) & 0x7fffffff;
#else
int r;
get_random_bytes(&r, sizeof(r));
#endif
}
+#ifdef __MIPSEL__
+size_t
+strlcpy(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0 && --n != 0) {
+ do {
+ if ((*d++ = *s++) == 0)
+ break;
+ } while (--n != 0);
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
+#endif // __MIPSEL__
+
/*
* compact version of fnmatch.
*/
return 1; /* no match */
}
-#ifdef _WIN32
-/*
- * as good as anywhere, place here the missing calls
+/* support for sysctl emulation.
+ * XXX this is actually MI code that should be enabled also on openwrt
*/
+#ifdef EMULATE_SYSCTL
+static struct sysctltable GST;
-void *
-my_alloc(int size)
+int
+kesysctl_emu_get(struct sockopt* sopt)
{
- void *_ret = ExAllocatePoolWithTag(0, size, 'wfpi');
- if (_ret)
- memset(_ret, 0, size);
- return _ret;
+ struct dn_id* oid = sopt->sopt_val;
+ struct sysctlhead* entry;
+ int sizeneeded = sizeof(struct dn_id) + GST.totalsize +
+ sizeof(struct sysctlhead);
+ unsigned char* pstring;
+ unsigned char* pdata;
+ int i;
+
+ if (sopt->sopt_valsize < sizeneeded) {
+ // this is a probe to retrieve the space needed for
+ // a dump of the sysctl table
+ oid->id = sizeneeded;
+ sopt->sopt_valsize = sizeof(struct dn_id);
+ return 0;
+ }
+
+ entry = (struct sysctlhead*)(oid+1);
+ for( i=0; i<GST.count; i++) {
+ entry->blocklen = GST.entry[i].head.blocklen;
+ entry->namelen = GST.entry[i].head.namelen;
+ entry->flags = GST.entry[i].head.flags;
+ entry->datalen = GST.entry[i].head.datalen;
+ pdata = (unsigned char*)(entry+1);
+ pstring = pdata+GST.entry[i].head.datalen;
+ bcopy(GST.entry[i].data, pdata, GST.entry[i].head.datalen);
+ bcopy(GST.entry[i].name, pstring, GST.entry[i].head.namelen);
+ entry = (struct sysctlhead*)
+ ((unsigned char*)(entry) + GST.entry[i].head.blocklen);
+ }
+ sopt->sopt_valsize = sizeneeded;
+ return 0;
}
-void
-panic(const char *fmt, ...)
+int
+kesysctl_emu_set(void* p, int l)
+{
+ struct sysctlhead* entry;
+ unsigned char* pdata;
+ unsigned char* pstring;
+ int i = 0;
+
+ entry = (struct sysctlhead*)(((struct dn_id*)p)+1);
+ pdata = (unsigned char*)(entry+1);
+ pstring = pdata + entry->datalen;
+
+ for (i=0; i<GST.count; i++) {
+ if (strcmp(GST.entry[i].name, pstring) != 0)
+ continue;
+ printf("%s: match found! %s\n",__FUNCTION__,pstring);
+ //sanity check on len, not really useful now since
+ //we only accept int32
+ if (entry->datalen != GST.entry[i].head.datalen) {
+ printf("%s: len mismatch, user %d vs kernel %d\n",
+ __FUNCTION__, entry->datalen,
+ GST.entry[i].head.datalen);
+ return -1;
+ }
+ // check access (at the moment flags handles only the R/W rights
+ //later on will be type + access
+ if( (GST.entry[i].head.flags & 3) == CTLFLAG_RD) {
+ printf("%s: the entry %s is read only\n",
+ __FUNCTION__,GST.entry[i].name);
+ return -1;
+ }
+ bcopy(pdata, GST.entry[i].data, GST.entry[i].head.datalen);
+ return 0;
+ }
+ printf("%s: match not found\n",__FUNCTION__);
+ return 0;
+}
+
+/* convert all _ to . until the first . */
+static void
+underscoretopoint(char* s)
{
- printf("%s", fmt);
- for (;;);
+ for (; *s && *s != '.'; s++)
+ if (*s == '_')
+ *s = '.';
}
-#include <stdarg.h>
+static int
+formatnames()
+{
+ int i;
+ int size=0;
+ char* name;
+
+ for (i=0; i<GST.count; i++)
+ size += GST.entry[i].head.namelen;
+ GST.namebuffer = malloc(size, 0, 0);
+ if (GST.namebuffer == NULL)
+ return -1;
+ name = GST.namebuffer;
+ for (i=0; i<GST.count; i++) {
+ bcopy(GST.entry[i].name, name, GST.entry[i].head.namelen);
+ underscoretopoint(name);
+ GST.entry[i].name = name;
+ name += GST.entry[i].head.namelen;
+ }
+ return 0;
+}
-extern int _vsnprintf(char *buf, int buf_size, char * fmt, va_list ap);
+static void
+dumpGST()
+{
+ int i;
+
+ for (i=0; i<GST.count; i++) {
+ printf("SYSCTL: entry %i\n", i);
+ printf("name %s\n", GST.entry[i].name);
+ printf("namelen %i\n", GST.entry[i].head.namelen);
+ printf("type %i access %i\n",
+ GST.entry[i].head.flags >> 2,
+ GST.entry[i].head.flags & 0x00000003);
+ printf("data %i\n", *(int*)(GST.entry[i].data));
+ printf("datalen %i\n", GST.entry[i].head.datalen);
+ printf("blocklen %i\n", GST.entry[i].head.blocklen);
+ }
+}
-/*
- * Windows' _snprintf doesn't terminate buffer with zero if size > buf_size
- */
-int
-snprintf(char *buf, int buf_size, char *fmt, ...)
+void sysctl_addgroup_f1();
+void sysctl_addgroup_f2();
+void sysctl_addgroup_f3();
+void sysctl_addgroup_f4();
+
+void
+keinit_GST()
{
- va_list ap;
- va_start(ap, fmt);
- if (_vsnprintf(buf, buf_size, fmt, ap) < 0)
- buf[buf_size - 1] = '\0';
- va_end(ap);
+ int ret;
+
+ sysctl_addgroup_f1();
+ sysctl_addgroup_f2();
+ sysctl_addgroup_f3();
+ sysctl_addgroup_f4();
+ ret = formatnames();
+ if (ret != 0)
+ printf("conversion of names failed for some reason\n");
+ //dumpGST();
+ printf("*** Global Sysctl Table entries = %i, total size = %i ***\n",
+ GST.count, GST.totalsize);
+}
- return 0;
+void
+keexit_GST()
+{
+ if (GST.namebuffer != NULL)
+ free(GST.namebuffer,0);
+ bzero(&GST, sizeof(GST));
}
-#endif
+
+void
+sysctl_pushback(char* name, int flags, int datalen, void* data)
+{
+ if (GST.count >= GST_HARD_LIMIT) {
+ printf("WARNING: global sysctl table full, this entry will not be added,"
+ "please recompile the module increasing the table size\n");
+ return;
+ }
+ GST.entry[GST.count].head.namelen = strlen(name)+1; //add space for '\0'
+ GST.entry[GST.count].name = name;
+ GST.entry[GST.count].head.flags = flags;
+ GST.entry[GST.count].data = data;
+ GST.entry[GST.count].head.datalen = datalen;
+ GST.entry[GST.count].head.blocklen =
+ ((sizeof(struct sysctlhead) + GST.entry[GST.count].head.namelen +
+ GST.entry[GST.count].head.datalen)+3) & ~3;
+ GST.totalsize += GST.entry[GST.count].head.blocklen;
+ GST.count++;
+}
+#endif /* EMULATE_SYSCTL */
--- /dev/null
+#include <ntddk.h>
+
+const char* texify_cmd(int i)
+{
+ if (i==110)
+ return("IP_FW_ADD");
+ if (i==111)
+ return("IP_FW_DEL");
+ if (i==112)
+ return("IP_FW_FLUSH");
+ if (i==113)
+ return("IP_FW_ZERO");
+ if (i==114)
+ return("IP_FW_GET");
+ if (i==115)
+ return("IP_FW_RESETLOG");
+ if (i==116)
+ return("IP_FW_NAT_CFG");
+ if (i==117)
+ return("IP_FW_NAT_DEL");
+ if (i==118)
+ return("IP_FW_NAT_GET_CONFIG");
+ if (i==119)
+ return("IP_FW_NAT_GET_LOG");
+ if (i==120)
+ return("IP_DUMMYNET_CONFIGURE");
+ if (i==121)
+ return("IP_DUMMYNET_DEL");
+ if (i==122)
+ return("IP_DUMMYNET_FLUSH");
+ if (i==124)
+ return("IP_DUMMYNET_GET");
+ if (i==108)
+ return("IP_FW3");
+ if (i==109)
+ return("IP_DUMMYNET3");
+ return ("BOH");
+}
+
+const char* texify_proto(unsigned int p)
+{
+ if (p==1)
+ return("ICMP");
+ if (p==6)
+ return("TCP");
+ if (p==17)
+ return("UDP");
+ return("OTHER");
+}
+
+void hexdump(unsigned char* addr, int len, const char *msg)
+{
+ int i;
+ const int cicli = len/8;
+ const int resto = len%8;
+ unsigned char d[8];
+
+ DbgPrint("%s at %p len %d\n", msg, addr, len);
+ for (i=0; i<=cicli; i++) {
+ bzero(d, 8);
+ bcopy(addr+i*8, d, i < cicli ? 8 : resto);
+ DbgPrint("%04X %02X %02X %02X %02X %02X %02X %02X %02X\n",
+ i*8, d[0], d[1], d[2], d[3], d[4],
+ d[5], d[6], d[7]);
+ }
+ DbgPrint("\n");
+}
--- /dev/null
+/*-
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Binary heap and hash tables, used in dummynet
+ *
+ * $Id: dn_heap.c 5646 2010-03-08 12:48:30Z luigi $
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#ifdef _KERNEL
+__FBSDID("$FreeBSD: user/luigi/ipfw3-head/sys/netinet/ipfw/dn_heap.c 203279 2010-01-31 12:20:29Z luigi $");
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <netinet/ipfw/dn_heap.h>
+#ifndef log
+#define log(x, arg...)
+#endif
+
+#else /* !_KERNEL */
+
+#include <stdio.h>
+#include <dn_test.h>
+#include <strings.h>
+#include <stdlib.h>
+
+#include "dn_heap.h"
+#define log(x, arg...) fprintf(stderr, ## arg)
+#define panic(x...) fprintf(stderr, ## x), exit(1)
+#define MALLOC_DEFINE(a, b, c)
+static void *my_malloc(int s) { return malloc(s); }
+static void my_free(void *p) { free(p); }
+#define malloc(s, t, w) my_malloc(s)
+#define free(p, t) my_free(p)
+#endif /* !_KERNEL */
+
+MALLOC_DEFINE(M_DN_HEAP, "dummynet", "dummynet heap");
+
+/*
+ * Heap management functions.
+ *
+ * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
+ * Some macros help finding parent/children so we can optimize them.
+ *
+ * heap_init() is called to expand the heap when needed.
+ * Increment size in blocks of 16 entries.
+ * Returns 1 on error, 0 on success
+ */
+#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
+#define HEAP_LEFT(x) ( (x)+(x) + 1 )
+#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
+#define HEAP_INCREMENT 15
+
+static int
+heap_resize(struct dn_heap *h, unsigned int new_size)
+{
+ struct dn_heap_entry *p;
+
+ if (h->size >= new_size ) /* have enough room */
+ return 0;
+#if 1 /* round to the next power of 2 */
+ new_size |= new_size >> 1;
+ new_size |= new_size >> 2;
+ new_size |= new_size >> 4;
+ new_size |= new_size >> 8;
+ new_size |= new_size >> 16;
+#else
+ new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT;
+#endif
+ p = malloc(new_size * sizeof(*p), M_DN_HEAP, M_NOWAIT);
+ if (p == NULL) {
+ printf("--- %s, resize %d failed\n", __func__, new_size );
+ return 1; /* error */
+ }
+ if (h->size > 0) {
+ bcopy(h->p, p, h->size * sizeof(*p) );
+ free(h->p, M_DN_HEAP);
+ }
+ h->p = p;
+ h->size = new_size;
+ return 0;
+}
+
+int
+heap_init(struct dn_heap *h, int size, int ofs)
+{
+ if (heap_resize(h, size))
+ return 1;
+ h->elements = 0;
+ h->ofs = ofs;
+ return 0;
+}
+
+/*
+ * Insert element in heap. Normally, p != NULL, we insert p in
+ * a new position and bubble up. If p == NULL, then the element is
+ * already in place, and key is the position where to start the
+ * bubble-up.
+ * Returns 1 on failure (cannot allocate new heap entry)
+ *
+ * If ofs > 0 the position (index, int) of the element in the heap is
+ * also stored in the element itself at the given offset in bytes.
+ */
+#define SET_OFFSET(h, i) do { \
+ if (h->ofs > 0) \
+ *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = i; \
+ } while (0)
+/*
+ * RESET_OFFSET is used for sanity checks. It sets ofs
+ * to an invalid value.
+ */
+#define RESET_OFFSET(h, i) do { \
+ if (h->ofs > 0) \
+ *((int32_t *)((char *)(h->p[i].object) + h->ofs)) = -16; \
+ } while (0)
+
+int
+heap_insert(struct dn_heap *h, uint64_t key1, void *p)
+{
+ int son = h->elements;
+
+ //log("%s key %llu p %p\n", __FUNCTION__, key1, p);
+ if (p == NULL) { /* data already there, set starting point */
+ son = key1;
+ } else { /* insert new element at the end, possibly resize */
+ son = h->elements;
+ if (son == h->size) /* need resize... */
+ // XXX expand by 16 or so
+ if (heap_resize(h, h->elements+16) )
+ return 1; /* failure... */
+ h->p[son].object = p;
+ h->p[son].key = key1;
+ h->elements++;
+ }
+ /* make sure that son >= father along the path */
+ while (son > 0) {
+ int father = HEAP_FATHER(son);
+ struct dn_heap_entry tmp;
+
+ if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
+ break; /* found right position */
+ /* son smaller than father, swap and repeat */
+ HEAP_SWAP(h->p[son], h->p[father], tmp);
+ SET_OFFSET(h, son);
+ son = father;
+ }
+ SET_OFFSET(h, son);
+ return 0;
+}
+
+/*
+ * remove top element from heap, or obj if obj != NULL
+ */
+void
+heap_extract(struct dn_heap *h, void *obj)
+{
+ int child, father, max = h->elements - 1;
+
+ if (max < 0) {
+ printf("--- %s: empty heap 0x%p\n", __FUNCTION__, h);
+ return;
+ }
+ if (obj == NULL)
+ father = 0; /* default: move up smallest child */
+ else { /* extract specific element, index is at offset */
+ if (h->ofs <= 0)
+ panic("%s: extract from middle not set on %p\n",
+ __FUNCTION__, h);
+ father = *((int *)((char *)obj + h->ofs));
+ if (father < 0 || father >= h->elements) {
+ panic("%s: father %d out of bound 0..%d\n",
+ __FUNCTION__, father, h->elements);
+ }
+ }
+ /*
+ * below, father is the index of the empty element, which
+ * we replace at each step with the smallest child until we
+ * reach the bottom level.
+ */
+ // XXX why removing RESET_OFFSET increases runtime by 10% ?
+ RESET_OFFSET(h, father);
+ while ( (child = HEAP_LEFT(father)) <= max ) {
+ if (child != max &&
+ DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
+ child++; /* take right child, otherwise left */
+ h->p[father] = h->p[child];
+ SET_OFFSET(h, father);
+ father = child;
+ }
+ h->elements--;
+ if (father != max) {
+ /*
+ * Fill hole with last entry and bubble up,
+ * reusing the insert code
+ */
+ h->p[father] = h->p[max];
+ heap_insert(h, father, NULL);
+ }
+}
+
+#if 0
+/*
+ * change object position and update references
+ * XXX this one is never used!
+ */
+static void
+heap_move(struct dn_heap *h, uint64_t new_key, void *object)
+{
+ int temp, i, max = h->elements-1;
+ struct dn_heap_entry *p, buf;
+
+ if (h->ofs <= 0)
+ panic("cannot move items on this heap");
+ p = h->p; /* shortcut */
+
+ i = *((int *)((char *)object + h->ofs));
+ if (DN_KEY_LT(new_key, p[i].key) ) { /* must move up */
+ p[i].key = new_key;
+ for (; i>0 &&
+ DN_KEY_LT(new_key, p[(temp = HEAP_FATHER(i))].key);
+ i = temp ) { /* bubble up */
+ HEAP_SWAP(p[i], p[temp], buf);
+ SET_OFFSET(h, i);
+ }
+ } else { /* must move down */
+ p[i].key = new_key;
+ while ( (temp = HEAP_LEFT(i)) <= max ) {
+ /* found left child */
+ if (temp != max &&
+ DN_KEY_LT(p[temp+1].key, p[temp].key))
+ temp++; /* select child with min key */
+ if (DN_KEY_LT(>p[temp].key, new_key)) {
+ /* go down */
+ HEAP_SWAP(p[i], p[temp], buf);
+ SET_OFFSET(h, i);
+ } else
+ break;
+ i = temp;
+ }
+ }
+ SET_OFFSET(h, i);
+}
+#endif /* heap_move, unused */
+
+/*
+ * heapify() will reorganize data inside an array to maintain the
+ * heap property. It is needed when we delete a bunch of entries.
+ */
+static void
+heapify(struct dn_heap *h)
+{
+ int i;
+
+ for (i = 0; i < h->elements; i++ )
+ heap_insert(h, i , NULL);
+}
+
+int
+heap_scan(struct dn_heap *h, int (*fn)(void *, uintptr_t),
+ uintptr_t arg)
+{
+ int i, ret, found;
+
+ for (i = found = 0 ; i < h->elements ;) {
+ ret = fn(h->p[i].object, arg);
+ if (ret & HEAP_SCAN_DEL) {
+ h->elements-- ;
+ h->p[i] = h->p[h->elements] ;
+ found++ ;
+ } else
+ i++ ;
+ if (ret & HEAP_SCAN_END)
+ break;
+ }
+ if (found)
+ heapify(h);
+ return found;
+}
+
+/*
+ * cleanup the heap and free data structure
+ */
+void
+heap_free(struct dn_heap *h)
+{
+ if (h->size >0 )
+ free(h->p, M_DN_HEAP);
+ bzero(h, sizeof(*h) );
+}
+
+/*
+ * hash table support.
+ */
+
+struct dn_ht {
+ int buckets; /* how many buckets, really buckets - 1*/
+ int entries; /* how many entries */
+ int ofs; /* offset of link field */
+ uint32_t (*hash)(uintptr_t, int, void *arg);
+ int (*match)(void *_el, uintptr_t key, int, void *);
+ void *(*newh)(uintptr_t, int, void *);
+ void **ht; /* bucket heads */
+};
+/*
+ * Initialize, allocating bucket pointers inline.
+ * Recycle previous record if possible.
+ * If the 'newh' function is not supplied, we assume that the
+ * key passed to ht_find is the same object to be stored in.
+ */
+struct dn_ht *
+dn_ht_init(struct dn_ht *ht, int buckets, int ofs,
+ uint32_t (*h)(uintptr_t, int, void *),
+ int (*match)(void *, uintptr_t, int, void *),
+ void *(*newh)(uintptr_t, int, void *))
+{
+ int l;
+
+ /*
+ * Notes about rounding bucket size to a power of two.
+ * Given the original bucket size, we compute the nearest lower and
+ * higher power of two, minus 1 (respectively b_min and b_max) because
+ * this value will be used to do an AND with the index returned
+ * by hash function.
+ * To choice between these two values, the original bucket size is
+ * compared with b_min. If the original size is greater than 4/3 b_min,
+ * we round the bucket size to b_max, else to b_min.
+ * This ratio try to round to the nearest power of two, advantaging
+ * the greater size if the different between two power is relatively
+ * big.
+ * Rounding the bucket size to a power of two avoid the use of
+ * module when calculating the correct bucket.
+ * The ht->buckets variable store the bucket size - 1 to simply
+ * do an AND between the index returned by hash function and ht->bucket
+ * instead of a module.
+ */
+ int b_min; /* min buckets */
+ int b_max; /* max buckets */
+ int b_ori; /* original buckets */
+
+ if (h == NULL || match == NULL) {
+ printf("--- missing hash or match function");
+ return NULL;
+ }
+ if (buckets < 1 || buckets > 65536)
+ return NULL;
+
+ b_ori = buckets;
+ /* calculate next power of 2, - 1*/
+ buckets |= buckets >> 1;
+ buckets |= buckets >> 2;
+ buckets |= buckets >> 4;
+ buckets |= buckets >> 8;
+ buckets |= buckets >> 16;
+
+ b_max = buckets; /* Next power */
+ b_min = buckets >> 1; /* Previous power */
+
+ /* Calculate the 'nearest' bucket size */
+ if (b_min * 4000 / 3000 < b_ori)
+ buckets = b_max;
+ else
+ buckets = b_min;
+
+ if (ht) { /* see if we can reuse */
+ if (buckets <= ht->buckets) {
+ ht->buckets = buckets;
+ } else {
+ /* free pointers if not allocated inline */
+ if (ht->ht != (void *)(ht + 1))
+ free(ht->ht, M_DN_HEAP);
+ free(ht, M_DN_HEAP);
+ ht = NULL;
+ }
+ }
+ if (ht == NULL) {
+ /* Allocate buckets + 1 entries because buckets is use to
+ * do the AND with the index returned by hash function
+ */
+ l = sizeof(*ht) + (buckets + 1) * sizeof(void **);
+ ht = malloc(l, M_DN_HEAP, M_NOWAIT | M_ZERO);
+ }
+ if (ht) {
+ ht->ht = (void **)(ht + 1);
+ ht->buckets = buckets;
+ ht->ofs = ofs;
+ ht->hash = h;
+ ht->match = match;
+ ht->newh = newh;
+ }
+ return ht;
+}
+
+/* dummy callback for dn_ht_free to unlink all */
+static int
+do_del(void *obj, void *arg)
+{
+ return DNHT_SCAN_DEL;
+}
+
+void
+dn_ht_free(struct dn_ht *ht, int flags)
+{
+ if (ht == NULL)
+ return;
+ if (flags & DNHT_REMOVE) {
+ (void)dn_ht_scan(ht, do_del, NULL);
+ } else {
+ if (ht->ht && ht->ht != (void *)(ht + 1))
+ free(ht->ht, M_DN_HEAP);
+ free(ht, M_DN_HEAP);
+ }
+}
+
+int
+dn_ht_entries(struct dn_ht *ht)
+{
+ return ht ? ht->entries : 0;
+}
+
+/* lookup and optionally create or delete element */
+void *
+dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
+{
+ int i;
+ void **pp, *p;
+
+ if (ht == NULL) /* easy on an empty hash */
+ return NULL;
+ i = (ht->buckets == 1) ? 0 :
+ (ht->hash(key, flags, arg) & ht->buckets);
+
+ for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
+ if (flags & DNHT_MATCH_PTR) {
+ if (key == (uintptr_t)p)
+ break;
+ } else if (ht->match(p, key, flags, arg)) /* found match */
+ break;
+ }
+ if (p) {
+ if (flags & DNHT_REMOVE) {
+ /* link in the next element */
+ *pp = *(void **)((char *)p + ht->ofs);
+ *(void **)((char *)p + ht->ofs) = NULL;
+ ht->entries--;
+ }
+ } else if (flags & DNHT_INSERT) {
+ // printf("%s before calling new, bucket %d ofs %d\n",
+ // __FUNCTION__, i, ht->ofs);
+ p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
+ // printf("%s newh returns %p\n", __FUNCTION__, p);
+ if (p) {
+ ht->entries++;
+ *(void **)((char *)p + ht->ofs) = ht->ht[i];
+ ht->ht[i] = p;
+ }
+ }
+ return p;
+}
+
+/*
+ * do a scan with the option to delete the object. Extract next before
+ * running the callback because the element may be destroyed there.
+ */
+int
+dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
+{
+ int i, ret, found = 0;
+ void **curp, *cur, *next;
+
+ if (ht == NULL || fn == NULL)
+ return 0;
+ for (i = 0; i <= ht->buckets; i++) {
+ curp = &ht->ht[i];
+ while ( (cur = *curp) != NULL) {
+ next = *(void **)((char *)cur + ht->ofs);
+ ret = fn(cur, arg);
+ if (ret & DNHT_SCAN_DEL) {
+ found++;
+ ht->entries--;
+ *curp = next;
+ } else {
+ curp = (void **)((char *)cur + ht->ofs);
+ }
+ if (ret & DNHT_SCAN_END)
+ return found;
+ }
+ }
+ return found;
+}
+
+/*
+ * Similar to dn_ht_scan(), except thah the scan is performed only
+ * in the bucket 'bucket'. The function returns a correct bucket number if
+ * the original is invalid
+ */
+int
+dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
+ void *arg)
+{
+ int i, ret, found = 0;
+ void **curp, *cur, *next;
+
+ if (ht == NULL || fn == NULL)
+ return 0;
+ if (*bucket > ht->buckets)
+ *bucket = 0;
+ i = *bucket;
+
+ curp = &ht->ht[i];
+ while ( (cur = *curp) != NULL) {
+ next = *(void **)((char *)cur + ht->ofs);
+ ret = fn(cur, arg);
+ if (ret & DNHT_SCAN_DEL) {
+ found++;
+ ht->entries--;
+ *curp = next;
+ } else {
+ curp = (void **)((char *)cur + ht->ofs);
+ }
+ if (ret & DNHT_SCAN_END)
+ return found;
+ }
+ return found;
+}
+
--- /dev/null
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: dn_sched_fifo.c 5621 2010-03-04 16:51:27Z luigi $
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+/*
+ * This file implements a FIFO scheduler for a single queue.
+ * The queue is allocated as part of the scheduler instance,
+ * and there is a single flowset is in the template which stores
+ * queue size and policy.
+ * Enqueue and dequeue use the default library functions.
+ */
+static int
+fifo_enqueue(struct dn_sch_inst *si, struct dn_queue *q, struct mbuf *m)
+{
+ /* XXX if called with q != NULL and m=NULL, this is a
+ * re-enqueue from an existing scheduler, which we should
+ * handle.
+ */
+ return dn_enqueue((struct dn_queue *)(si+1), m, 0);
+}
+
+static struct mbuf *
+fifo_dequeue(struct dn_sch_inst *si)
+{
+ return dn_dequeue((struct dn_queue *)(si + 1));
+}
+
+static int
+fifo_new_sched(struct dn_sch_inst *si)
+{
+ /* This scheduler instance contains the queue */
+ struct dn_queue *q = (struct dn_queue *)(si + 1);
+
+ set_oid(&q->ni.oid, DN_QUEUE, sizeof(*q));
+ q->_si = si;
+ q->fs = si->sched->fs;
+ return 0;
+}
+
+static int
+fifo_free_sched(struct dn_sch_inst *si)
+{
+ struct dn_queue *q = (struct dn_queue *)(si + 1);
+ dn_free_pkts(q->mq.head);
+ bzero(q, sizeof(*q));
+ return 0;
+}
+
+/*
+ * FIFO scheduler descriptor
+ * contains the type of the scheduler, the name, the size of extra
+ * data structures, and function pointers.
+ */
+static struct dn_alg fifo_desc = {
+ _SI( .type = ) DN_SCHED_FIFO,
+ _SI( .name = ) "FIFO",
+ _SI( .flags = ) 0,
+
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct dn_queue),
+ _SI( .q_datalen = ) 0,
+
+ _SI( .enqueue = ) fifo_enqueue,
+ _SI( .dequeue = ) fifo_dequeue,
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) fifo_new_sched,
+ _SI( .free_sched = ) fifo_free_sched,
+ _SI( .new_fsk = ) NULL,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) NULL,
+ _SI( .free_queue = ) NULL,
+};
+
+DECLARE_DNSCHED_MODULE(dn_fifo, &fifo_desc);
--- /dev/null
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: dn_sched_prio.c 5797 2010-03-21 16:31:08Z luigi $
+ */
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#define DN_SCHED_PRIO 5 //XXX
+
+#if !defined(_KERNEL) || !defined(__linux__)
+#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
+#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+#ifdef __MIPSEL__
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+/* Size of the array of queues pointers. */
+#define BITMAP_T unsigned long
+#define MAXPRIO (sizeof(BITMAP_T) * 8)
+
+/*
+ * The scheduler instance contains an array of pointers to queues,
+ * one for each priority, and a bitmap listing backlogged queues.
+ */
+struct prio_si {
+ BITMAP_T bitmap; /* array bitmap */
+ struct dn_queue *q_array[MAXPRIO]; /* Array of queues pointers */
+};
+
+/*
+ * If a queue with the same priority is already backlogged, use
+ * that one instead of the queue passed as argument.
+ */
+static int
+prio_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+ struct prio_si *si = (struct prio_si *)(_si + 1);
+ int prio = q->fs->fs.par[0];
+
+ if (test_bit(prio, &si->bitmap) == 0) {
+ /* No queue with this priority, insert */
+ __set_bit(prio, &si->bitmap);
+ si->q_array[prio] = q;
+ } else { /* use the existing queue */
+ q = si->q_array[prio];
+ }
+ if (dn_enqueue(q, m, 0))
+ return 1;
+ return 0;
+}
+
+/*
+ * Packets are dequeued only from the highest priority queue.
+ * The function ffs() return the lowest bit in the bitmap that rapresent
+ * the array index (-1) which contains the pointer to the highest priority
+ * queue.
+ * After the dequeue, if this queue become empty, it is index is removed
+ * from the bitmap.
+ * Scheduler is idle if the bitmap is empty
+ *
+ * NOTE: highest priority is 0, lowest is sched->max_prio_q
+ */
+static struct mbuf *
+prio_dequeue(struct dn_sch_inst *_si)
+{
+ struct prio_si *si = (struct prio_si *)(_si + 1);
+ struct mbuf *m;
+ struct dn_queue *q;
+ int prio;
+
+ if (si->bitmap == 0) /* scheduler idle */
+ return NULL;
+
+ prio = ffs(si->bitmap) - 1;
+
+ /* Take the highest priority queue in the scheduler */
+ q = si->q_array[prio];
+ // assert(q)
+
+ m = dn_dequeue(q);
+ if (q->mq.head == NULL) {
+ /* Queue is now empty, remove from scheduler
+ * and mark it
+ */
+ si->q_array[prio] = NULL;
+ __clear_bit(prio, &si->bitmap);
+ }
+ return m;
+}
+
+static int
+prio_new_sched(struct dn_sch_inst *_si)
+{
+ struct prio_si *si = (struct prio_si *)(_si + 1);
+
+ bzero(si->q_array, sizeof(si->q_array));
+ si->bitmap = 0;
+
+ return 0;
+}
+
+static int
+prio_new_fsk(struct dn_fsk *fs)
+{
+ /* Check if the prioritiy is between 0 and MAXPRIO-1 */
+ ipdn_bound_var(&fs->fs.par[0], 0, 0, MAXPRIO - 1, "PRIO priority");
+ return 0;
+}
+
+static int
+prio_new_queue(struct dn_queue *q)
+{
+ struct prio_si *si = (struct prio_si *)(q->_si + 1);
+ int prio = q->fs->fs.par[0];
+ struct dn_queue *oldq;
+
+ q->ni.oid.subtype = DN_SCHED_PRIO;
+
+ if (q->mq.head == NULL)
+ return 0;
+
+ /* Queue already full, must insert in the scheduler or append
+ * mbufs to existing queue. This partly duplicates prio_enqueue
+ */
+ if (test_bit(prio, &si->bitmap) == 0) {
+ /* No queue with this priority, insert */
+ __set_bit(prio, &si->bitmap);
+ si->q_array[prio] = q;
+ } else if ( (oldq = si->q_array[prio]) != q) {
+ /* must append to the existing queue.
+ * can simply append q->mq.head to q2->...
+ * and add the counters to those of q2
+ */
+ oldq->mq.tail->m_nextpkt = q->mq.head;
+ oldq->mq.tail = q->mq.tail;
+ oldq->ni.length += q->ni.length;
+ q->ni.length = 0;
+ oldq->ni.len_bytes += q->ni.len_bytes;
+ q->ni.len_bytes = 0;
+ q->mq.tail = q->mq.head = NULL;
+ }
+ return 0;
+}
+
+static int
+prio_free_queue(struct dn_queue *q)
+{
+ int prio = q->fs->fs.par[0];
+ struct prio_si *si = (struct prio_si *)(q->_si + 1);
+
+ if (si->q_array[prio] == q) {
+ si->q_array[prio] = NULL;
+ __clear_bit(prio, &si->bitmap);
+ }
+ return 0;
+}
+
+
+static struct dn_alg prio_desc = {
+ _SI( .type = ) DN_SCHED_PRIO,
+ _SI( .name = ) "PRIO",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ /* we need extra space in the si and the queue */
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct prio_si),
+ _SI( .q_datalen = ) 0,
+
+ _SI( .enqueue = ) prio_enqueue,
+ _SI( .dequeue = ) prio_dequeue,
+
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) prio_new_sched,
+ _SI( .free_sched = ) NULL,
+
+ _SI( .new_fsk = ) prio_new_fsk,
+ _SI( .free_fsk = ) NULL,
+
+ _SI( .new_queue = ) prio_new_queue,
+ _SI( .free_queue = ) prio_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_prio, &prio_desc);
--- /dev/null
+/*
+ * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: dn_sched_qfq.c 5621 2010-03-04 16:51:27Z luigi $
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#ifdef QFQ_DEBUG
+struct qfq_sched;
+static void dump_sched(struct qfq_sched *q, const char *msg);
+#define NO(x) x
+#else
+#define NO(x)
+#endif
+#define DN_SCHED_QFQ 4 // XXX Where?
+typedef unsigned long bitmap;
+
+/*
+ * bitmaps ops are critical. Some linux versions have __fls
+ * and the bitmap ops. Some machines have ffs
+ */
+#if defined(_WIN32)
+int fls(unsigned int n)
+{
+ int i = 0;
+ for (i = 0; n > 0; n >>= 1, i++)
+ ;
+ return i;
+}
+#endif
+
+#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32)
+static inline unsigned long __fls(unsigned long word)
+{
+ return fls(word) - 1;
+}
+#endif
+
+#if !defined(_KERNEL) || !defined(__linux__)
+#ifdef QFQ_DEBUG
+int test_bit(int ix, bitmap *p)
+{
+ if (ix < 0 || ix > 31)
+ D("bad index %d", ix);
+ return *p & (1<<ix);
+}
+void __set_bit(int ix, bitmap *p)
+{
+ if (ix < 0 || ix > 31)
+ D("bad index %d", ix);
+ *p |= (1<<ix);
+}
+void __clear_bit(int ix, bitmap *p)
+{
+ if (ix < 0 || ix > 31)
+ D("bad index %d", ix);
+ *p &= ~(1<<ix);
+}
+#else /* !QFQ_DEBUG */
+/* XXX do we have fast version, or leave it to the compiler ? */
+#define test_bit(ix, pData) ((*pData) & (1<<(ix)))
+#define __set_bit(ix, pData) (*pData) |= (1<<(ix))
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif /* !QFQ_DEBUG */
+#endif /* !__linux__ */
+
+#ifdef __MIPSEL__
+#define __clear_bit(ix, pData) (*pData) &= ~(1<<(ix))
+#endif
+
+/*-------------------------------------------*/
+/*
+
+Virtual time computations.
+
+S, F and V are all computed in fixed point arithmetic with
+FRAC_BITS decimal bits.
+
+ QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+ one bit per index.
+ QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+ The layout of the bits is as below:
+
+ [ MTU_SHIFT ][ FRAC_BITS ]
+ [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
+ ^.__grp->index = 0
+ *.__grp->slot_shift
+
+ where MIN_SLOT_SHIFT is derived by difference from the others.
+
+The max group index corresponds to Lmax/w_min, where
+Lmax=1<<MTU_SHIFT, w_min = 1 .
+From this, and knowing how many groups (MAX_INDEX) we want,
+we can derive the shift corresponding to each group.
+
+Because we often need to compute
+ F = S + len/w_i and V = V + len/wsum
+instead of storing w_i store the value
+ inv_w = (1<<FRAC_BITS)/w_i
+so we can do F = S + len * inv_w * wsum.
+We use W_TOT in the formulas so we can easily move between
+static and adaptive weight sum.
+
+The per-scheduler-instance data contain all the data structures
+for the scheduler: bitmaps and bucket lists.
+
+ */
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group. This is approx lmax/lmin + 5.
+ * XXX check because it poses constraints on MAX_INDEX
+ */
+#define QFQ_MAX_SLOTS 32
+/*
+ * Shifts used for class<->group mapping. Class weights are
+ * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
+ * group with the smallest index that can support the L_i / r_i
+ * configured for the class.
+ *
+ * grp->index is the index of the group; and grp->slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ *
+ * When computing the group index, we do (len<<FP_SHIFT)/weight,
+ * then compute an FLS (which is like a log2()), and if the result
+ * is below the MAX_INDEX region we use 0 (which is the same as
+ * using a larger len).
+ */
+#define QFQ_MAX_INDEX 19
+#define QFQ_MAX_WSHIFT 16 /* log2(max_weight) */
+
+#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
+#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
+//#define IWSUM (q->i_wsum)
+#define IWSUM ((1<<FRAC_BITS)/QFQ_MAX_WSUM)
+
+#define FRAC_BITS 30 /* fixed point arithmetic */
+#define ONE_FP (1UL << FRAC_BITS)
+
+#define QFQ_MTU_SHIFT 11 /* log2(max_len) */
+#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+
+/*
+ * Possible group states, also indexes for the bitmaps array in
+ * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
+ */
+enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
+
+struct qfq_group;
+/*
+ * additional queue info. Some of this info should come from
+ * the flowset, we copy them here for faster processing.
+ * This is an overlay of the struct dn_queue
+ */
+struct qfq_class {
+ struct dn_queue _q;
+ uint64_t S, F; /* flow timestamps (exact) */
+ struct qfq_class *next; /* Link for the slot list. */
+
+ /* group we belong to. In principle we would need the index,
+ * which is log_2(lmax/weight), but we never reference it
+ * directly, only the group.
+ */
+ struct qfq_group *grp;
+
+ /* these are copied from the flowset. */
+ uint32_t inv_w; /* ONE_FP/weight */
+ uint32_t lmax; /* Max packet size for this flow. */
+};
+
+/* Group descriptor, see the paper for details.
+ * Basically this contains the bucket lists
+ */
+struct qfq_group {
+ uint64_t S, F; /* group timestamps (approx). */
+ unsigned int slot_shift; /* Slot shift. */
+ unsigned int index; /* Group index. */
+ unsigned int front; /* Index of the front slot. */
+ bitmap full_slots; /* non-empty slots */
+
+ /* Array of lists of active classes. */
+ struct qfq_class *slots[QFQ_MAX_SLOTS];
+};
+
+/* scheduler instance descriptor. */
+struct qfq_sched {
+ uint64_t V; /* Precise virtual time. */
+ uint32_t wsum; /* weight sum */
+ NO(uint32_t i_wsum; /* ONE_FP/w_sum */
+ uint32_t _queued; /* debugging */
+ uint32_t loops; /* debugging */)
+ bitmap bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
+ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
+};
+
+/*---- support functions ----------------------------*/
+
+/* Generic comparison function, handling wraparound. */
+static inline int qfq_gt(uint64_t a, uint64_t b)
+{
+ return (int64_t)(a - b) > 0;
+}
+
+/* Round a precise timestamp to its slotted value. */
+static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
+{
+ return ts & ~((1ULL << shift) - 1);
+}
+
+/* return the pointer to the group with lowest index in the bitmap */
+static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
+ unsigned long bitmap)
+{
+ int index = ffs(bitmap) - 1; // zero-based
+ return &q->groups[index];
+}
+
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
+{
+ uint64_t slot_size = (uint64_t)maxlen *inv_w;
+ unsigned long size_map;
+ int index = 0;
+
+ size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
+ if (!size_map)
+ goto out;
+
+ index = __fls(size_map) + 1; // basically a log_2()
+ index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+
+ if (index < 0)
+ index = 0;
+
+out:
+ ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
+ return index;
+}
+/*---- end support functions ----*/
+
+/*-------- API calls --------------------------------*/
+/*
+ * Validate and copy parameters from flowset.
+ */
+static int
+qfq_new_queue(struct dn_queue *_q)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
+ struct qfq_class *cl = (struct qfq_class *)_q;
+ int i;
+ uint32_t w; /* approximated weight */
+
+ /* import parameters from the flowset. They should be correct
+ * already.
+ */
+ w = _q->fs->fs.par[0];
+ cl->lmax = _q->fs->fs.par[1];
+ if (!w || w > QFQ_MAX_WEIGHT) {
+ w = 1;
+ D("rounding weight to 1");
+ }
+ cl->inv_w = ONE_FP/w;
+ w = ONE_FP/cl->inv_w;
+ if (q->wsum + w > QFQ_MAX_WSUM)
+ return EINVAL;
+
+ i = qfq_calc_index(cl->inv_w, cl->lmax);
+ cl->grp = &q->groups[i];
+ q->wsum += w;
+ // XXX cl->S = q->V; ?
+ // XXX compute q->i_wsum
+ return 0;
+}
+
+/* remove an empty queue */
+static int
+qfq_free_queue(struct dn_queue *_q)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
+ struct qfq_class *cl = (struct qfq_class *)_q;
+ if (cl->inv_w) {
+ q->wsum -= ONE_FP/cl->inv_w;
+ cl->inv_w = 0; /* reset weight to avoid run twice */
+ }
+ return 0;
+}
+
+/* Calculate a mask to mimic what would be ffs_from(). */
+static inline unsigned long
+mask_from(unsigned long bitmap, int from)
+{
+ return bitmap & ~((1UL << from) - 1);
+}
+
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->S, q->V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static inline unsigned int
+qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
+{
+ /* if S > V we are not eligible */
+ unsigned int state = qfq_gt(grp->S, q->V);
+ unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
+ struct qfq_group *next;
+
+ if (mask) {
+ next = qfq_ffs(q, mask);
+ if (qfq_gt(grp->F, next->F))
+ state |= EB;
+ }
+
+ return state;
+}
+
+/*
+ * In principle
+ * q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ * q->bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void
+qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
+{
+ q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ q->bitmaps[src] &= ~mask;
+}
+
+static inline void
+qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
+{
+ unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
+ struct qfq_group *next;
+
+ if (mask) {
+ next = qfq_ffs(q, mask);
+ if (!qfq_gt(next->F, old_finish))
+ return;
+ }
+
+ mask = (1UL << index) - 1;
+ qfq_move_groups(q, mask, EB, ER);
+ qfq_move_groups(q, mask, IB, IR);
+}
+
+/*
+ * perhaps
+ *
+ old_V ^= q->V;
+ old_V >>= QFQ_MIN_SLOT_SHIFT;
+ if (old_V) {
+ ...
+ }
+ *
+ */
+static inline void
+qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
+{
+ unsigned long mask, vslot, old_vslot;
+
+ vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
+ old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+
+ if (vslot != old_vslot) {
+ mask = (2UL << (__fls(vslot ^ old_vslot))) - 1;
+ qfq_move_groups(q, mask, IR, ER);
+ qfq_move_groups(q, mask, IB, EB);
+ }
+}
+
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->S rounded on grp->slot_shift bits.
+ */
+static inline void
+qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
+{
+ uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
+ unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+
+ cl->next = grp->slots[i];
+ grp->slots[i] = cl;
+ __set_bit(slot, &grp->full_slots);
+}
+
+/*
+ * remove the entry from the slot
+ */
+static inline void
+qfq_front_slot_remove(struct qfq_group *grp)
+{
+ struct qfq_class **h = &grp->slots[grp->front];
+
+ *h = (*h)->next;
+ if (!*h)
+ __clear_bit(0, &grp->full_slots);
+}
+
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in full_slots.
+ */
+static inline struct qfq_class *
+qfq_slot_scan(struct qfq_group *grp)
+{
+ int i;
+
+ ND("grp %d full %x", grp->index, grp->full_slots);
+ if (!grp->full_slots)
+ return NULL;
+
+ i = ffs(grp->full_slots) - 1; // zero-based
+ if (i > 0) {
+ grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
+ grp->full_slots >>= i;
+ }
+
+ return grp->slots[grp->front];
+}
+
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static inline void
+qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
+{
+ unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
+
+ grp->full_slots <<= i;
+ grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
+}
+
+
+static inline void
+qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
+{
+ bitmap ineligible;
+
+ ineligible = q->bitmaps[IR] | q->bitmaps[IB];
+ if (ineligible) {
+ if (!q->bitmaps[ER]) {
+ struct qfq_group *grp;
+ grp = qfq_ffs(q, ineligible);
+ if (qfq_gt(grp->S, q->V))
+ q->V = grp->S;
+ }
+ qfq_make_eligible(q, old_V);
+ }
+}
+
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static inline int
+qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
+ struct qfq_class *cl)
+{
+
+ cl->S = cl->F;
+ if (cl->_q.mq.head == NULL) {
+ qfq_front_slot_remove(grp);
+ } else {
+ unsigned int len;
+ uint64_t roundedS;
+
+ len = cl->_q.mq.head->m_pkthdr.len;
+ cl->F = cl->S + (uint64_t)len * cl->inv_w;
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ if (roundedS == grp->S)
+ return 0;
+
+ qfq_front_slot_remove(grp);
+ qfq_slot_insert(grp, cl, roundedS);
+ }
+ return 1;
+}
+
+static struct mbuf *
+qfq_dequeue(struct dn_sch_inst *si)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+ struct qfq_group *grp;
+ struct qfq_class *cl;
+ struct mbuf *m;
+ uint64_t old_V;
+
+ NO(q->loops++;)
+ if (!q->bitmaps[ER]) {
+ NO(if (q->queued)
+ dump_sched(q, "start dequeue");)
+ return NULL;
+ }
+
+ grp = qfq_ffs(q, q->bitmaps[ER]);
+
+ cl = grp->slots[grp->front];
+ /* extract from the first bucket in the bucket list */
+ m = dn_dequeue(&cl->_q);
+
+ if (!m) {
+ D("BUG/* non-workconserving leaf */");
+ return NULL;
+ }
+ NO(q->queued--;)
+ old_V = q->V;
+ q->V += (uint64_t)m->m_pkthdr.len * IWSUM;
+ ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
+
+ if (qfq_update_class(q, grp, cl)) {
+ uint64_t old_F = grp->F;
+ cl = qfq_slot_scan(grp);
+ if (!cl) { /* group gone, remove from ER */
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ // grp->S = grp->F + 1; // XXX debugging only
+ } else {
+ uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ unsigned int s;
+
+ if (grp->S == roundedS)
+ goto skip_unblock;
+ grp->S = roundedS;
+ grp->F = roundedS + (2ULL << grp->slot_shift);
+ /* remove from ER and put in the new set */
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ s = qfq_calc_state(q, grp);
+ __set_bit(grp->index, &q->bitmaps[s]);
+ }
+ /* we need to unblock even if the group has gone away */
+ qfq_unblock_groups(q, grp->index, old_F);
+ }
+
+skip_unblock:
+ qfq_update_eligible(q, old_V);
+ NO(if (!q->bitmaps[ER] && q->queued)
+ dump_sched(q, "end dequeue");)
+
+ return m;
+}
+
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for \hat(F) are multiples of \sigma_i
+ * no greater than V+\sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static inline void
+qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+{
+ unsigned long mask;
+ uint32_t limit, roundedF;
+ int slot_shift = cl->grp->slot_shift;
+
+ roundedF = qfq_round_down(cl->F, slot_shift);
+ limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
+
+ if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
+ /* timestamp was stale */
+ mask = mask_from(q->bitmaps[ER], cl->grp->index);
+ if (mask) {
+ struct qfq_group *next = qfq_ffs(q, mask);
+ if (qfq_gt(roundedF, next->F)) {
+ cl->S = next->F;
+ return;
+ }
+ }
+ cl->S = q->V;
+ } else { /* timestamp is not stale */
+ cl->S = cl->F;
+ }
+}
+
+static int
+qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+ struct qfq_group *grp;
+ struct qfq_class *cl = (struct qfq_class *)_q;
+ uint64_t roundedS;
+ int s;
+
+ NO(q->loops++;)
+ DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
+ _q, cl->inv_w, cl->grp->index);
+ /* XXX verify that the packet obeys the parameters */
+ if (m != _q->mq.head) {
+ if (dn_enqueue(_q, m, 0)) /* packet was dropped */
+ return 1;
+ NO(q->queued++;)
+ if (m != _q->mq.head)
+ return 0;
+ }
+ /* If reach this point, queue q was idle */
+ grp = cl->grp;
+ qfq_update_start(q, cl); /* adjust start time */
+ /* compute new finish time and rounded start. */
+ cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+
+ /*
+ * insert cl in the correct bucket.
+ * If cl->S >= grp->S we don't need to adjust the
+ * bucket list and simply go to the insertion phase.
+ * Otherwise grp->S is decreasing, we must make room
+ * in the bucket list, and also recompute the group state.
+ * Finally, if there were no flows in this group and nobody
+ * was in ER make sure to adjust V.
+ */
+ if (grp->full_slots) {
+ if (!qfq_gt(grp->S, cl->S))
+ goto skip_update;
+ /* create a slot for this cl->S */
+ qfq_slot_rotate(q, grp, roundedS);
+ /* group was surely ineligible, remove */
+ __clear_bit(grp->index, &q->bitmaps[IR]);
+ __clear_bit(grp->index, &q->bitmaps[IB]);
+ } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+ q->V = roundedS;
+
+ grp->S = roundedS;
+ grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
+ s = qfq_calc_state(q, grp);
+ __set_bit(grp->index, &q->bitmaps[s]);
+ ND("new state %d 0x%x", s, q->bitmaps[s]);
+ ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
+skip_update:
+ qfq_slot_insert(grp, cl, roundedS);
+
+ return 0;
+}
+
+
+#if 0
+static inline void
+qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
+ struct qfq_class *cl, struct qfq_class **pprev)
+{
+ unsigned int i, offset;
+ uint64_t roundedS;
+
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ offset = (roundedS - grp->S) >> grp->slot_shift;
+ i = (grp->front + offset) % QFQ_MAX_SLOTS;
+
+#ifdef notyet
+ if (!pprev) {
+ pprev = &grp->slots[i];
+ while (*pprev && *pprev != cl)
+ pprev = &(*pprev)->next;
+ }
+#endif
+
+ *pprev = cl->next;
+ if (!grp->slots[i])
+ __clear_bit(offset, &grp->full_slots);
+}
+
+/*
+ * called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ * XXX description to be completed.
+ */
+static void
+qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
+ struct qfq_class **pprev)
+{
+ struct qfq_group *grp = &q->groups[cl->index];
+ unsigned long mask;
+ uint64_t roundedS;
+ int s;
+
+ cl->F = cl->S; // not needed if the class goes away.
+ qfq_slot_remove(q, grp, cl, pprev);
+
+ if (!grp->full_slots) {
+ /* nothing left in the group, remove from all sets.
+ * Do ER last because if we were blocking other groups
+ * we must unblock them.
+ */
+ __clear_bit(grp->index, &q->bitmaps[IR]);
+ __clear_bit(grp->index, &q->bitmaps[EB]);
+ __clear_bit(grp->index, &q->bitmaps[IB]);
+
+ if (test_bit(grp->index, &q->bitmaps[ER]) &&
+ !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
+ mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
+ if (mask)
+ mask = ~((1UL << __fls(mask)) - 1);
+ else
+ mask = ~0UL;
+ qfq_move_groups(q, mask, EB, ER);
+ qfq_move_groups(q, mask, IB, IR);
+ }
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ } else if (!grp->slots[grp->front]) {
+ cl = qfq_slot_scan(grp);
+ roundedS = qfq_round_down(cl->S, grp->slot_shift);
+ if (grp->S != roundedS) {
+ __clear_bit(grp->index, &q->bitmaps[ER]);
+ __clear_bit(grp->index, &q->bitmaps[IR]);
+ __clear_bit(grp->index, &q->bitmaps[EB]);
+ __clear_bit(grp->index, &q->bitmaps[IB]);
+ grp->S = roundedS;
+ grp->F = roundedS + (2ULL << grp->slot_shift);
+ s = qfq_calc_state(q, grp);
+ __set_bit(grp->index, &q->bitmaps[s]);
+ }
+ }
+ qfq_update_eligible(q, q->V);
+}
+#endif
+
+static int
+qfq_new_fsk(struct dn_fsk *f)
+{
+ ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
+ ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
+ ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
+ return 0;
+}
+
+/*
+ * initialize a new scheduler instance
+ */
+static int
+qfq_new_sched(struct dn_sch_inst *si)
+{
+ struct qfq_sched *q = (struct qfq_sched *)(si + 1);
+ struct qfq_group *grp;
+ int i;
+
+ for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+ grp = &q->groups[i];
+ grp->index = i;
+ grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
+ (QFQ_MAX_INDEX - i);
+ }
+ return 0;
+}
+
+/*
+ * QFQ scheduler descriptor
+ */
+static struct dn_alg qfq_desc = {
+ _SI( .type = ) DN_SCHED_QFQ,
+ _SI( .name = ) "QFQ",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct qfq_sched),
+ _SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
+
+ _SI( .enqueue = ) qfq_enqueue,
+ _SI( .dequeue = ) qfq_dequeue,
+
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) qfq_new_sched,
+ _SI( .free_sched = ) NULL,
+ _SI( .new_fsk = ) qfq_new_fsk,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) qfq_new_queue,
+ _SI( .free_queue = ) qfq_free_queue,
+};
+
+DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
+
+#ifdef QFQ_DEBUG
+static void
+dump_groups(struct qfq_sched *q, uint32_t mask)
+{
+ int i, j;
+
+ for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
+ struct qfq_group *g = &q->groups[i];
+
+ if (0 == (mask & (1<<i)))
+ continue;
+ for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+ if (g->slots[j])
+ D(" bucket %d %p", j, g->slots[j]);
+ }
+ D("full_slots 0x%x", g->full_slots);
+ D(" %2d S 0x%20llx F 0x%llx %c", i,
+ g->S, g->F,
+ mask & (1<<i) ? '1' : '0');
+ }
+}
+
+static void
+dump_sched(struct qfq_sched *q, const char *msg)
+{
+ D("--- in %s: ---", msg);
+ ND("loops %d queued %d V 0x%llx", q->loops, q->queued, q->V);
+ D(" ER 0x%08x", q->bitmaps[ER]);
+ D(" EB 0x%08x", q->bitmaps[EB]);
+ D(" IR 0x%08x", q->bitmaps[IR]);
+ D(" IB 0x%08x", q->bitmaps[IB]);
+ dump_groups(q, 0xffffffff);
+};
+#endif /* QFQ_DEBUG */
--- /dev/null
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: dn_sched_rr.c 5621 2010-03-04 16:51:27Z luigi $
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#define DN_SCHED_RR 3 // XXX Where?
+
+struct rr_queue {
+ struct dn_queue q; /* Standard queue */
+ int status; /* 1: queue is in the list */
+ int credit; /* Number of bytes to transmit */
+ int quantum; /* quantum * C */
+ struct rr_queue *qnext; /* */
+};
+
+/* struct rr_schk contains global config parameters
+ * and is right after dn_schk
+ */
+struct rr_schk {
+ int min_q; /* Min quantum */
+ int max_q; /* Max quantum */
+ int q_bytes; /* Bytes per quantum */
+};
+
+/* per-instance round robin list, right after dn_sch_inst */
+struct rr_si {
+ struct rr_queue *head, *tail; /* Pointer to current queue */
+};
+
+/* Append a queue to the rr list */
+static inline void
+rr_append(struct rr_queue *q, struct rr_si *si)
+{
+ q->status = 1; /* mark as in-rr_list */
+ q->credit = q->quantum; /* initialize credit */
+
+ /* append to the tail */
+ if (si->head == NULL)
+ si->head = q;
+ else
+ si->tail->qnext = q;
+ si->tail = q; /* advance the tail pointer */
+ q->qnext = si->head; /* make it circular */
+}
+
+/* Remove the head queue from circular list. */
+static inline void
+rr_remove_head(struct rr_si *si)
+{
+ if (si->head == NULL)
+ return; /* empty queue */
+ si->head->status = 0;
+
+ if (si->head == si->tail) {
+ si->head = si->tail = NULL;
+ return;
+ }
+
+ si->head = si->head->qnext;
+ si->tail->qnext = si->head;
+}
+
+/* Remove a queue from circular list.
+ * XXX see if ti can be merge with remove_queue()
+ */
+static inline void
+remove_queue_q(struct rr_queue *q, struct rr_si *si)
+{
+ struct rr_queue *prev;
+
+ if (q->status != 1)
+ return;
+ if (q == si->head) {
+ rr_remove_head(si);
+ return;
+ }
+
+ for (prev = si->head; prev; prev = prev->qnext) {
+ if (prev->qnext != q)
+ continue;
+ prev->qnext = q->qnext;
+ if (q == si->tail)
+ si->tail = prev;
+ q->status = 0;
+ break;
+ }
+}
+
+
+static inline void
+next_pointer(struct rr_si *si)
+{
+ if (si->head == NULL)
+ return; /* empty queue */
+
+ si->head = si->head->qnext;
+ si->tail = si->tail->qnext;
+}
+
+static int
+rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+ struct rr_si *si;
+ struct rr_queue *rrq;
+
+ if (m != q->mq.head) {
+ if (dn_enqueue(q, m, 0)) /* packet was dropped */
+ return 1;
+ if (m != q->mq.head)
+ return 0;
+ }
+
+ /* If reach this point, queue q was idle */
+ si = (struct rr_si *)(_si + 1);
+ rrq = (struct rr_queue *)q;
+
+ if (rrq->status == 1) /* Queue is already in the queue list */
+ return 0;
+
+ /* Insert the queue in the queue list */
+ rr_append(rrq, si);
+
+ return 0;
+}
+
+static struct mbuf *
+rr_dequeue(struct dn_sch_inst *_si)
+{
+ /* Access scheduler instance private data */
+ struct rr_si *si = (struct rr_si *)(_si + 1);
+ struct rr_queue *rrq;
+ uint64_t len;
+
+ while ( (rrq = si->head) ) {
+ struct mbuf *m = rrq->q.mq.head;
+ if ( m == NULL) {
+ /* empty queue, remove from list */
+ rr_remove_head(si);
+ continue;
+ }
+ len = m->m_pkthdr.len;
+
+ if (len > rrq->credit) {
+ /* Packet too big */
+ rrq->credit += rrq->quantum;
+ /* Try next queue */
+ next_pointer(si);
+ } else {
+ rrq->credit -= len;
+ return dn_dequeue(&rrq->q);
+ }
+ }
+
+ /* no packet to dequeue*/
+ return NULL;
+}
+
+static int
+rr_config(struct dn_schk *_schk)
+{
+ struct rr_schk *schk = (struct rr_schk *)(_schk + 1);
+ ND("called");
+
+ /* use reasonable quantums (64..2k bytes, default 1500) */
+ schk->min_q = 64;
+ schk->max_q = 2048;
+ schk->q_bytes = 1500; /* quantum */
+
+ return 0;
+}
+
+static int
+rr_new_sched(struct dn_sch_inst *_si)
+{
+ struct rr_si *si = (struct rr_si *)(_si + 1);
+
+ ND("called");
+ si->head = si->tail = NULL;
+
+ return 0;
+}
+
+static int
+rr_free_sched(struct dn_sch_inst *_si)
+{
+ ND("called");
+ /* Nothing to do? */
+ return 0;
+}
+
+static int
+rr_new_fsk(struct dn_fsk *fs)
+{
+ struct rr_schk *schk = (struct rr_schk *)(fs->sched + 1);
+ /* par[0] is the weight, par[1] is the quantum step */
+ ipdn_bound_var(&fs->fs.par[0], 1,
+ 1, 65536, "RR weight");
+ ipdn_bound_var(&fs->fs.par[1], schk->q_bytes,
+ schk->min_q, schk->max_q, "RR quantum");
+ return 0;
+}
+
+static int
+rr_new_queue(struct dn_queue *_q)
+{
+ struct rr_queue *q = (struct rr_queue *)_q;
+
+ _q->ni.oid.subtype = DN_SCHED_RR;
+
+ q->quantum = _q->fs->fs.par[0] * _q->fs->fs.par[1];
+ ND("called, q->quantum %d", q->quantum);
+ q->credit = q->quantum;
+ q->status = 0;
+
+ if (_q->mq.head != NULL) {
+ /* Queue NOT empty, insert in the queue list */
+ rr_append(q, (struct rr_si *)(_q->_si + 1));
+ }
+ return 0;
+}
+
+static int
+rr_free_queue(struct dn_queue *_q)
+{
+ struct rr_queue *q = (struct rr_queue *)_q;
+
+ ND("called");
+ if (q->status == 1) {
+ struct rr_si *si = (struct rr_si *)(_q->_si + 1);
+ remove_queue_q(q, si);
+ }
+ return 0;
+}
+
+/*
+ * RR scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the
+ * structures and function pointers.
+ */
+static struct dn_alg rr_desc = {
+ _SI( .type = ) DN_SCHED_RR,
+ _SI( .name = ) "RR",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct rr_si),
+ _SI( .q_datalen = ) sizeof(struct rr_queue) - sizeof(struct dn_queue),
+
+ _SI( .enqueue = ) rr_enqueue,
+ _SI( .dequeue = ) rr_dequeue,
+
+ _SI( .config = ) rr_config,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) rr_new_sched,
+ _SI( .free_sched = ) rr_free_sched,
+ _SI( .new_fsk = ) rr_new_fsk,
+ _SI( .free_fsk = ) NULL,
+ _SI( .new_queue = ) rr_new_queue,
+ _SI( .free_queue = ) rr_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_rr, &rr_desc);
--- /dev/null
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: dn_sched_wf2q.c 5621 2010-03-04 16:51:27Z luigi $
+ */
+
+#ifdef _KERNEL
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <net/if.h> /* IFNAMSIZ */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ipfw_rule_ref */
+#include <netinet/ip_fw.h> /* flow_id */
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+#else
+#include <dn_test.h>
+#endif
+
+#ifndef MAX64
+#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
+#endif
+
+/*
+ * timestamps are computed on 64 bit using fixed point arithmetic.
+ * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
+ * and sum of weights, respectively. FRAC_BITS is the number of
+ * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
+ * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
+ * using an unsigned 32-bit division, and to avoid wraparounds we need
+ * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
+ * As an example
+ * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
+ */
+#ifndef FRAC_BITS
+#define FRAC_BITS 28 /* shift for fixed point arithmetic */
+#define ONE_FP (1UL << FRAC_BITS)
+#endif
+
+/*
+ * Private information for the scheduler instance:
+ * sch_heap (key is Finish time) returns the next queue to serve
+ * ne_heap (key is Start time) stores not-eligible queues
+ * idle_heap (key=start/finish time) stores idle flows. It must
+ * support extract-from-middle.
+ * A flow is only in 1 of the three heaps.
+ * XXX todo: use a more efficient data structure, e.g. a tree sorted
+ * by F with min_subtree(S) in each node
+ */
+struct wf2qp_si {
+ struct dn_heap sch_heap; /* top extract - key Finish time */
+ struct dn_heap ne_heap; /* top extract - key Start time */
+ struct dn_heap idle_heap; /* random extract - key Start=Finish time */
+ uint64_t V; /* virtual time */
+ uint32_t inv_wsum; /* inverse of sum of weights */
+ uint32_t wsum; /* sum of weights */
+};
+
+struct wf2qp_queue {
+ struct dn_queue _q;
+ uint64_t S, F; /* start time, finish time */
+ uint32_t inv_w; /* ONE_FP / weight */
+ int32_t heap_pos; /* position (index) of struct in heap */
+};
+
+/*
+ * This file implements a WF2Q+ scheduler as it has been in dummynet
+ * since 2000.
+ * The scheduler supports per-flow queues and has O(log N) complexity.
+ *
+ * WF2Q+ needs to drain entries from the idle heap so that we
+ * can keep the sum of weights up to date. We can do it whenever
+ * we get a chance, or periodically, or following some other
+ * strategy. The function idle_check() drains at most N elements
+ * from the idle heap.
+ */
+static void
+idle_check(struct wf2qp_si *si, int n, int force)
+{
+ struct dn_heap *h = &si->idle_heap;
+ while (n-- > 0 && h->elements > 0 &&
+ (force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
+ struct dn_queue *q = HEAP_TOP(h)->object;
+ struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+
+ heap_extract(h, NULL);
+ /* XXX to let the flowset delete the queue we should
+ * mark it as 'unused' by the scheduler.
+ */
+ alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
+ si->wsum -= q->fs->fs.par[0]; /* adjust sum of weights */
+ if (si->wsum > 0)
+ si->inv_wsum = ONE_FP/si->wsum;
+ }
+}
+
+static int
+wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
+{
+ struct dn_fsk *fs = q->fs;
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ struct wf2qp_queue *alg_fq;
+ uint64_t len = m->m_pkthdr.len;
+
+ if (m != q->mq.head) {
+ if (dn_enqueue(q, m, 0)) /* packet was dropped */
+ return 1;
+ if (m != q->mq.head) /* queue was already busy */
+ return 0;
+ }
+
+ /* If reach this point, queue q was idle */
+ alg_fq = (struct wf2qp_queue *)q;
+
+ if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
+ /* F<S means timestamps are invalid ->brand new queue. */
+ alg_fq->S = si->V; /* init start time */
+ si->wsum += fs->fs.par[0]; /* add weight of new queue. */
+ si->inv_wsum = ONE_FP/si->wsum;
+ } else { /* if it was idle then it was in the idle heap */
+ heap_extract(&si->idle_heap, q);
+ alg_fq->S = MAX64(alg_fq->F, si->V); /* compute new S */
+ }
+ alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
+
+ /* if nothing is backlogged, make sure this flow is eligible */
+ if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
+ si->V = MAX64(alg_fq->S, si->V);
+
+ /*
+ * Look at eligibility. A flow is not eligibile if S>V (when
+ * this happens, it means that there is some other flow already
+ * scheduled for the same pipe, so the sch_heap cannot be
+ * empty). If the flow is not eligible we just store it in the
+ * ne_heap. Otherwise, we store in the sch_heap.
+ * Note that for all flows in sch_heap (SCH), S_i <= V,
+ * and for all flows in ne_heap (NEH), S_i > V.
+ * So when we need to compute max(V, min(S_i)) forall i in
+ * SCH+NEH, we only need to look into NEH.
+ */
+ if (DN_KEY_LT(si->V, alg_fq->S)) {
+ /* S>V means flow Not eligible. */
+ if (si->sch_heap.elements == 0)
+ D("++ ouch! not eligible but empty scheduler!");
+ heap_insert(&si->ne_heap, alg_fq->S, q);
+ } else {
+ heap_insert(&si->sch_heap, alg_fq->F, q);
+ }
+ return 0;
+}
+
+/* XXX invariant: sch > 0 || V >= min(S in neh) */
+static struct mbuf *
+wf2qp_dequeue(struct dn_sch_inst *_si)
+{
+ /* Access scheduler instance private data */
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ struct mbuf *m;
+ struct dn_queue *q;
+ struct dn_heap *sch = &si->sch_heap;
+ struct dn_heap *neh = &si->ne_heap;
+ struct wf2qp_queue *alg_fq;
+
+ if (sch->elements == 0 && neh->elements == 0) {
+ /* we have nothing to do. We could kill the idle heap
+ * altogether and reset V
+ */
+ idle_check(si, 0x7fffffff, 1);
+ si->V = 0;
+ si->wsum = 0; /* should be set already */
+ return NULL; /* quick return if nothing to do */
+ }
+ idle_check(si, 1, 0); /* drain something from the idle heap */
+
+ /* make sure at least one element is eligible, bumping V
+ * and moving entries that have become eligible.
+ * We need to repeat the first part twice, before and
+ * after extracting the candidate, or enqueue() will
+ * find the data structure in a wrong state.
+ */
+ m = NULL;
+ for(;;) {
+ /*
+ * Compute V = max(V, min(S_i)). Remember that all elements
+ * in sch have by definition S_i <= V so if sch is not empty,
+ * V is surely the max and we must not update it. Conversely,
+ * if sch is empty we only need to look at neh.
+ * We don't need to move the queues, as it will be done at the
+ * next enqueue
+ */
+ if (sch->elements == 0 && neh->elements > 0) {
+ si->V = MAX64(si->V, HEAP_TOP(neh)->key);
+ }
+ while (neh->elements > 0 &&
+ DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
+ q = HEAP_TOP(neh)->object;
+ alg_fq = (struct wf2qp_queue *)q;
+ heap_extract(neh, NULL);
+ heap_insert(sch, alg_fq->F, q);
+ }
+ if (m) /* pkt found in previous iteration */
+ break;
+ /* ok we have at least one eligible pkt */
+ q = HEAP_TOP(sch)->object;
+ alg_fq = (struct wf2qp_queue *)q;
+ m = dn_dequeue(q);
+ heap_extract(sch, NULL); /* Remove queue from heap. */
+ si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
+ alg_fq->S = alg_fq->F; /* Update start time. */
+ if (q->mq.head == 0) { /* not backlogged any more. */
+ heap_insert(&si->idle_heap, alg_fq->F, q);
+ } else { /* Still backlogged. */
+ /* Update F, store in neh or sch */
+ uint64_t len = q->mq.head->m_pkthdr.len;
+ alg_fq->F += len * alg_fq->inv_w;
+ if (DN_KEY_LEQ(alg_fq->S, si->V)) {
+ heap_insert(sch, alg_fq->F, q);
+ } else {
+ heap_insert(neh, alg_fq->S, q);
+ }
+ }
+ }
+ return m;
+}
+
+static int
+wf2qp_new_sched(struct dn_sch_inst *_si)
+{
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+ int ofs = offsetof(struct wf2qp_queue, heap_pos);
+
+ /* all heaps support extract from middle */
+ if (heap_init(&si->idle_heap, 16, ofs) ||
+ heap_init(&si->sch_heap, 16, ofs) ||
+ heap_init(&si->ne_heap, 16, ofs)) {
+ heap_free(&si->ne_heap);
+ heap_free(&si->sch_heap);
+ heap_free(&si->idle_heap);
+ return ENOMEM;
+ }
+ return 0;
+}
+
+static int
+wf2qp_free_sched(struct dn_sch_inst *_si)
+{
+ struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
+
+ heap_free(&si->sch_heap);
+ heap_free(&si->ne_heap);
+ heap_free(&si->idle_heap);
+
+ return 0;
+}
+
+static int
+wf2qp_new_fsk(struct dn_fsk *fs)
+{
+ ipdn_bound_var(&fs->fs.par[0], 1,
+ 1, 100, "WF2Q+ weight");
+ return 0;
+}
+
+static int
+wf2qp_new_queue(struct dn_queue *_q)
+{
+ struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
+
+ _q->ni.oid.subtype = DN_SCHED_WF2QP;
+ q->F = 0; /* not strictly necessary */
+ q->S = q->F + 1; /* mark timestamp as invalid. */
+ q->inv_w = ONE_FP / _q->fs->fs.par[0];
+ if (_q->mq.head != NULL) {
+ wf2qp_enqueue(_q->_si, _q, _q->mq.head);
+ }
+ return 0;
+}
+
+/*
+ * Called when the infrastructure removes a queue (e.g. flowset
+ * is reconfigured). Nothing to do if we did not 'own' the queue,
+ * otherwise remove it from the right heap and adjust the sum
+ * of weights.
+ */
+static int
+wf2qp_free_queue(struct dn_queue *q)
+{
+ struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
+ struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
+
+ if (alg_fq->S >= alg_fq->F + 1)
+ return 0; /* nothing to do, not in any heap */
+ si->wsum -= q->fs->fs.par[0];
+ if (si->wsum > 0)
+ si->inv_wsum = ONE_FP/si->wsum;
+
+ /* extract from the heap. XXX TODO we may need to adjust V
+ * to make sure the invariants hold.
+ */
+ if (q->mq.head == NULL) {
+ heap_extract(&si->idle_heap, q);
+ } else if (DN_KEY_LT(si->V, alg_fq->S)) {
+ heap_extract(&si->ne_heap, q);
+ } else {
+ heap_extract(&si->sch_heap, q);
+ }
+ return 0;
+}
+
+/*
+ * WF2Q+ scheduler descriptor
+ * contains the type of the scheduler, the name, the size of the
+ * structures and function pointers.
+ */
+static struct dn_alg wf2qp_desc = {
+ _SI( .type = ) DN_SCHED_WF2QP,
+ _SI( .name = ) "WF2Q+",
+ _SI( .flags = ) DN_MULTIQUEUE,
+
+ /* we need extra space in the si and the queue */
+ _SI( .schk_datalen = ) 0,
+ _SI( .si_datalen = ) sizeof(struct wf2qp_si),
+ _SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
+ sizeof(struct dn_queue),
+
+ _SI( .enqueue = ) wf2qp_enqueue,
+ _SI( .dequeue = ) wf2qp_dequeue,
+
+ _SI( .config = ) NULL,
+ _SI( .destroy = ) NULL,
+ _SI( .new_sched = ) wf2qp_new_sched,
+ _SI( .free_sched = ) wf2qp_free_sched,
+
+ _SI( .new_fsk = ) wf2qp_new_fsk,
+ _SI( .free_fsk = ) NULL,
+
+ _SI( .new_queue = ) wf2qp_new_queue,
+ _SI( .free_queue = ) wf2qp_free_queue,
+};
+
+
+DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);
void rn_init(int);
int rn_inithead(void **, int);
+int rn_detachhead(void **);
int rn_refines(void *, void *);
struct radix_node
*rn_addmask(void *, int, int),
*/
struct ip {
#if BYTE_ORDER == LITTLE_ENDIAN
- u_int ip_hl:4, /* header length */
+ u_char ip_hl:4, /* header length */
ip_v:4; /* version */
#endif
#if BYTE_ORDER == BIG_ENDIAN
- u_int ip_v:4, /* version */
+ u_char ip_v:4, /* version */
ip_hl:4; /* header length */
#endif
u_char ip_tos; /* type of service */
/*-
- * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
* Portions Copyright (c) 2000 Akamba Corp.
* All rights reserved
*
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40.2.1 2008/04/25 10:26:30 oleg Exp $
+ * $FreeBSD: user/luigi/ipfw3-head/sys/netinet/ip_dummynet.h 203321 2010-01-31 21:39:25Z luigi $
*/
#ifndef _IP_DUMMYNET_H
#define _IP_DUMMYNET_H
/*
- * Definition of dummynet data structures. In the structures, I decided
- * not to use the macros in <sys/queue.h> in the hope of making the code
- * easier to port to other architectures. The type of lists and queue we
- * use here is pretty simple anyways.
- */
-
-/*
- * We start with a heap, which is used in the scheduler to decide when
- * to transmit packets etc.
+ * Definition of the kernel-userland API for dummynet.
*
- * The key for the heap is used for two different values:
+ * Setsockopt() and getsockopt() pass a batch of objects, each
+ * of them starting with a "struct dn_id" which should fully identify
+ * the object and its relation with others in the sequence.
+ * The first object in each request should have
+ * type= DN_CMD_*, id = DN_API_VERSION.
+ * For other objects, type and subtype specify the object, len indicates
+ * the total length including the header, and 'id' identifies the specific
+ * object.
*
- * 1. timer ticks- max 10K/second, so 32 bits are enough;
- *
- * 2. virtual times. These increase in steps of len/x, where len is the
- * packet length, and x is either the weight of the flow, or the
- * sum of all weights.
- * If we limit to max 1000 flows and a max weight of 100, then
- * x needs 17 bits. The packet size is 16 bits, so we can easily
- * overflow if we do not allow errors.
- * So we use a key "dn_key" which is 64 bits. Some macros are used to
- * compare key values and handle wraparounds.
- * MAX64 returns the largest of two key values.
- * MY_M is used as a shift count when doing fixed point arithmetic
- * (a better name would be useful...).
- */
-typedef u_int64_t dn_key ; /* sorting key */
-#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0)
-#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0)
-#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0)
-#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0)
-#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
-#define MY_M 16 /* number of left shift to obtain a larger precision */
-
-/*
- * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the
- * virtual time wraps every 15 days.
+ * Most objects are numbered with an identifier in the range 1..65535.
+ * DN_MAX_ID indicates the first value outside the range.
*/
+#define DN_API_VERSION 12500000
+#define DN_MAX_ID 0x10000
-/*
- * The maximum hash table size for queues. This value must be a power
- * of 2.
- */
-#define DN_MAX_HASH_SIZE 65536
+struct dn_id {
+ uint16_t len; /* total obj len including this header */
+ uint8_t type;
+ uint8_t subtype;
+ uint32_t id; /* generic id */
+};
/*
- * A heap entry is made of a key and a pointer to the actual
- * object stored in the heap.
- * The heap is an array of dn_heap_entry entries, dynamically allocated.
- * Current size is "size", with "elements" actually in use.
- * The heap normally supports only ordered insert and extract from the top.
- * If we want to extract an object from the middle of the heap, we
- * have to know where the object itself is located in the heap (or we
- * need to scan the whole array). To this purpose, an object has a
- * field (int) which contains the index of the object itself into the
- * heap. When the object is moved, the field must also be updated.
- * The offset of the index in the object is stored in the 'offset'
- * field in the heap descriptor. The assumption is that this offset
- * is non-zero if we want to support extract from the middle.
- */
-struct dn_heap_entry {
- dn_key key ; /* sorting key. Topmost element is smallest one */
- void *object ; /* object pointer */
-} ;
-
-struct dn_heap {
- int size ;
- int elements ;
- int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */
- struct dn_heap_entry *p ; /* really an array of "size" entries */
-} ;
-
-#ifdef _KERNEL
-/*
- * Packets processed by dummynet have an mbuf tag associated with
- * them that carries their dummynet state. This is used within
- * the dummynet code as well as outside when checking for special
- * processing requirements.
- * Note that the first part is the reinject info and is common to
- * other forms of packet reinjection.
+ * These values are in the type field of struct dn_id.
+ * To preserve the ABI, never rearrange the list or delete
+ * entries with the exception of DN_LAST
*/
-struct dn_pkt_tag {
- struct ipfw_rule_ref rule; /* matching rule */
-
- /* second part, dummynet specific */
- int dn_dir; /* action when packet comes out. */
- /* see ip_fw_private.h */
+enum {
+ DN_NONE = 0,
+ DN_LINK = 1,
+ DN_FS,
+ DN_SCH,
+ DN_SCH_I,
+ DN_QUEUE,
+ DN_DELAY_LINE,
+ DN_PROFILE,
+ DN_FLOW, /* struct dn_flow */
+ DN_TEXT, /* opaque text is the object */
+
+ DN_CMD_CONFIG = 0x80, /* objects follow */
+ DN_CMD_DELETE, /* subtype + list of entries */
+ DN_CMD_GET, /* subtype + list of entries */
+ DN_CMD_FLUSH,
+ /* for compatibility with FreeBSD 7.2/8 */
+ DN_COMPAT_PIPE,
+ DN_COMPAT_QUEUE,
+ DN_GET_COMPAT,
+
+ /* special commands for emulation of sysctl variables */
+ DN_SYSCTL_GET,
+ DN_SYSCTL_SET,
+
+ DN_LAST,
+};
+
+enum { /* subtype for schedulers, flowset and the like */
+ DN_SCHED_UNKNOWN = 0,
+ DN_SCHED_FIFO = 1,
+ DN_SCHED_WF2QP = 2,
+ /* others are in individual modules */
+};
- dn_key output_time; /* when the pkt is due for delivery */
- struct ifnet *ifp; /* interface, for ip_output */
- struct _ip6dn_args ip6opt; /* XXX ipv6 options */
+enum { /* user flags */
+ DN_HAVE_MASK = 0x0001, /* fs or sched has a mask */
+ DN_NOERROR = 0x0002, /* do not report errors */
+ DN_QHT_HASH = 0x0004, /* qht is a hash table */
+ DN_QSIZE_BYTES = 0x0008, /* queue size is in bytes */
+ DN_HAS_PROFILE = 0x0010, /* a link has a profile */
+ DN_IS_RED = 0x0020,
+ DN_IS_GENTLE_RED= 0x0040,
+ DN_PIPE_CMD = 0x1000, /* pipe config... */
};
-#endif /* _KERNEL */
/*
- * Overall structure of dummynet (with WF2Q+):
-
-In dummynet, packets are selected with the firewall rules, and passed
-to two different objects: PIPE or QUEUE.
-
-A QUEUE is just a queue with configurable size and queue management
-policy. It is also associated with a mask (to discriminate among
-different flows), a weight (used to give different shares of the
-bandwidth to different flows) and a "pipe", which essentially
-supplies the transmit clock for all queues associated with that
-pipe.
-
-A PIPE emulates a fixed-bandwidth link, whose bandwidth is
-configurable. The "clock" for a pipe can come from either an
-internal timer, or from the transmit interrupt of an interface.
-A pipe is also associated with one (or more, if masks are used)
-queue, where all packets for that pipe are stored.
-
-The bandwidth available on the pipe is shared by the queues
-associated with that pipe (only one in case the packet is sent
-to a PIPE) according to the WF2Q+ scheduling algorithm and the
-configured weights.
-
-In general, incoming packets are stored in the appropriate queue,
-which is then placed into one of a few heaps managed by a scheduler
-to decide when the packet should be extracted.
-The scheduler (a function called dummynet()) is run at every timer
-tick, and grabs queues from the head of the heaps when they are
-ready for processing.
-
-There are three data structures definining a pipe and associated queues:
-
- + dn_pipe, which contains the main configuration parameters related
- to delay and bandwidth;
- + dn_flow_set, which contains WF2Q+ configuration, flow
- masks, plr and RED configuration;
- + dn_flow_queue, which is the per-flow queue (containing the packets)
-
-Multiple dn_flow_set can be linked to the same pipe, and multiple
-dn_flow_queue can be linked to the same dn_flow_set.
-All data structures are linked in a linear list which is used for
-housekeeping purposes.
-
-During configuration, we create and initialize the dn_flow_set
-and dn_pipe structures (a dn_pipe also contains a dn_flow_set).
-
-At runtime: packets are sent to the appropriate dn_flow_set (either
-WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows),
-which in turn dispatches them to the appropriate dn_flow_queue
-(created dynamically according to the masks).
-
-The transmit clock for fixed rate flows (ready_event()) selects the
-dn_flow_queue to be used to transmit the next packet. For WF2Q,
-wfq_ready_event() extract a pipe which in turn selects the right
-flow using a number of heaps defined into the pipe itself.
-
- *
+ * link template.
*/
+struct dn_link {
+ struct dn_id oid;
+
+ /*
+ * Userland sets bw and delay in bits/s and milliseconds.
+ * The kernel converts this back and forth to bits/tick and ticks.
+ * XXX what about burst ?
+ */
+ int32_t link_nr;
+ int bandwidth; /* bit/s or bits/tick. */
+ int delay; /* ms and ticks */
+ uint64_t burst; /* scaled. bits*Hz XXX */
+};
/*
- * per flow queue. This contains the flow identifier, the queue
- * of packets, counters, and parameters used to support both RED and
- * WF2Q+.
- *
- * A dn_flow_queue is created and initialized whenever a packet for
- * a new flow arrives.
+ * A flowset, which is a template for flows. Contains parameters
+ * from the command line: id, target scheduler, queue sizes, plr,
+ * flow masks, buckets for the flow hash, and possibly scheduler-
+ * specific parameters (weight, quantum and so on).
*/
-struct dn_flow_queue {
- struct dn_flow_queue *next ;
- struct ipfw_flow_id id ;
-
- struct mbuf *head, *tail ; /* queue of packets */
- u_int len ;
- u_int len_bytes ;
-
- /*
- * When we emulate MAC overheads, or channel unavailability due
- * to other traffic on a shared medium, we augment the packet at
- * the head of the queue with an 'extra_bits' field representsing
- * the additional delay the packet will be subject to:
- * extra_bits = bw*unavailable_time.
- * With large bandwidth and large delays, extra_bits (and also numbytes)
- * can become very large, so better play safe and use 64 bit
- */
- uint64_t numbytes ; /* credit for transmission (dynamic queues) */
- int64_t extra_bits; /* extra bits simulating unavailable channel */
-
- u_int64_t tot_pkts ; /* statistics counters */
- u_int64_t tot_bytes ;
- u_int32_t drops ;
-
- int hash_slot ; /* debugging/diagnostic */
-
- /* RED parameters */
- int avg ; /* average queue length est. (scaled) */
- int count ; /* arrivals since last RED drop */
- int random ; /* random value (scaled) */
- dn_key idle_time; /* start of queue idle time */
-
- /* WF2Q+ support */
- struct dn_flow_set *fs ; /* parent flow set */
- int heap_pos ; /* position (index) of struct in heap */
- dn_key sched_time ; /* current time when queue enters ready_heap */
-
- dn_key S,F ; /* start time, finish time */
- /*
- * Setting F < S means the timestamp is invalid. We only need
- * to test this when the queue is empty.
- */
-} ;
+struct dn_fs {
+ struct dn_id oid;
+ uint32_t fs_nr; /* the flowset number */
+ uint32_t flags; /* userland flags */
+ int qsize; /* queue size in slots or bytes */
+ int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */
+ uint32_t buckets; /* buckets used for the queue hash table */
+
+ struct ipfw_flow_id flow_mask;
+ uint32_t sched_nr; /* the scheduler we attach to */
+ /* generic scheduler parameters. Leave them at -1 if unset.
+ * Now we use 0: weight, 1: lmax, 2: priority
+ */
+ int par[4];
+
+ /* RED/GRED parameters.
+ * weight and probabilities are in the range 0..1 represented
+ * in fixed point arithmetic with SCALE_RED decimal bits.
+ */
+#define SCALE_RED 16
+#define SCALE(x) ( (x) << SCALE_RED )
+#define SCALE_VAL(x) ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
+ int w_q ; /* queue weight (scaled) */
+ int max_th ; /* maximum threshold for queue (scaled) */
+ int min_th ; /* minimum threshold for queue (scaled) */
+ int max_p ; /* maximum value for p_b (scaled) */
-/*
- * flow_set descriptor. Contains the "template" parameters for the
- * queue configuration, and pointers to the hash table of dn_flow_queue's.
- *
- * The hash table is an array of lists -- we identify the slot by
- * hashing the flow-id, then scan the list looking for a match.
- * The size of the hash table (buckets) is configurable on a per-queue
- * basis.
- *
- * A dn_flow_set is created whenever a new queue or pipe is created (in the
- * latter case, the structure is located inside the struct dn_pipe).
- */
-struct dn_flow_set {
- SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */
-
- u_short fs_nr ; /* flow_set number */
- u_short flags_fs;
-#define DN_HAVE_FLOW_MASK 0x0001
-#define DN_IS_RED 0x0002
-#define DN_IS_GENTLE_RED 0x0004
-#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
-#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */
-#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
-#define DN_IS_PIPE 0x4000
-#define DN_IS_QUEUE 0x8000
-
- struct dn_pipe *pipe ; /* pointer to parent pipe */
- u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */
-
- int weight ; /* WFQ queue weight */
- int qsize ; /* queue size in slots or bytes */
- int plr ; /* pkt loss rate (2^31-1 means 100%) */
-
- struct ipfw_flow_id flow_mask ;
-
- /* hash table of queues onto this flow_set */
- int rq_size ; /* number of slots */
- int rq_elements ; /* active elements */
- struct dn_flow_queue **rq; /* array of rq_size entries */
-
- u_int32_t last_expired ; /* do not expire too frequently */
- int backlogged ; /* #active queues for this flowset */
-
- /* RED parameters */
-#define SCALE_RED 16
-#define SCALE(x) ( (x) << SCALE_RED )
-#define SCALE_VAL(x) ( (x) >> SCALE_RED )
-#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
- int w_q ; /* queue weight (scaled) */
- int max_th ; /* maximum threshold for queue (scaled) */
- int min_th ; /* minimum threshold for queue (scaled) */
- int max_p ; /* maximum value for p_b (scaled) */
- u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
- u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
- u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
- u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
- u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
- u_int lookup_depth ; /* depth of lookup table */
- int lookup_step ; /* granularity inside the lookup table */
- int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
- int avg_pkt_size ; /* medium packet size */
- int max_pkt_size ; /* max packet size */
};
-SLIST_HEAD(dn_flow_set_head, dn_flow_set);
/*
- * Pipe descriptor. Contains global parameters, delay-line queue,
- * and the flow_set used for fixed-rate queues.
- *
- * For WF2Q+ support it also has 3 heaps holding dn_flow_queue:
- * not_eligible_heap, for queues whose start time is higher
- * than the virtual time. Sorted by start time.
- * scheduler_heap, for queues eligible for scheduling. Sorted by
- * finish time.
- * idle_heap, all flows that are idle and can be removed. We
- * do that on each tick so we do not slow down too much
- * operations during forwarding.
- *
+ * dn_flow collects flow_id and stats for queues and scheduler
+ * instances, and is used to pass these info to userland.
+ * oid.type/oid.subtype describe the object, oid.id is number
+ * of the parent object.
*/
-struct dn_pipe { /* a pipe */
- SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */
-
- int pipe_nr ; /* number */
- int bandwidth; /* really, bytes/tick. */
- int delay ; /* really, ticks */
-
- struct mbuf *head, *tail ; /* packets in delay line */
+struct dn_flow {
+ struct dn_id oid;
+ struct ipfw_flow_id fid;
+ uint64_t tot_pkts; /* statistics counters */
+ uint64_t tot_bytes;
+ uint32_t length; /* Queue lenght, in packets */
+ uint32_t len_bytes; /* Queue lenght, in bytes */
+ uint32_t drops;
+};
- /* WF2Q+ */
- struct dn_heap scheduler_heap ; /* top extract - key Finish time*/
- struct dn_heap not_eligible_heap; /* top extract- key Start time */
- struct dn_heap idle_heap ; /* random extract - key Start=Finish time */
- dn_key V ; /* virtual time */
- int sum; /* sum of weights of all active sessions */
+/*
+ * Scheduler template, mostly indicating the name, number,
+ * sched_mask and buckets.
+ */
+struct dn_sch {
+ struct dn_id oid;
+ uint32_t sched_nr; /* N, scheduler number */
+ uint32_t buckets; /* number of buckets for the instances */
+ uint32_t flags; /* have_mask, ... */
+
+ char name[16]; /* null terminated */
+ /* mask to select the appropriate scheduler instance */
+ struct ipfw_flow_id sched_mask; /* M */
+};
- /* Same as in dn_flow_queue, numbytes can become large */
- int64_t numbytes; /* bits I can transmit (more or less). */
- uint64_t burst; /* burst size, scaled: bits * hz */
- dn_key sched_time ; /* time pipe was scheduled in ready_heap */
- dn_key idle_time; /* start of pipe idle time */
+/* A delay profile is attached to a link.
+ * Note that a profile, as any other object, cannot be longer than 2^16
+ */
+#define ED_MAX_SAMPLES_NO 1024
+struct dn_profile {
+ struct dn_id oid;
+ /* fields to simulate a delay profile */
+#define ED_MAX_NAME_LEN 32
+ char name[ED_MAX_NAME_LEN];
+ int link_nr;
+ int loss_level;
+ int bandwidth; // XXX use link bandwidth?
+ int samples_no; /* actual length of samples[] */
+ int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
+};
- /*
- * When the tx clock come from an interface (if_name[0] != '\0'), its name
- * is stored below, whereas the ifp is filled when the rule is configured.
- */
- char if_name[IFNAMSIZ];
- struct ifnet *ifp ;
- int ready ; /* set if ifp != NULL and we got a signal from it */
- struct dn_flow_set fs ; /* used with fixed-rate flows */
- /* fields to simulate a delay profile */
+/*
+ * Overall structure of dummynet
-#define ED_MAX_NAME_LEN 32
- char name[ED_MAX_NAME_LEN];
- int loss_level;
- int samples_no;
- int *samples;
-};
+In dummynet, packets are selected with the firewall rules, and passed
+to two different objects: PIPE or QUEUE (bad name).
+
+A QUEUE defines a classifier, which groups packets into flows
+according to a 'mask', puts them into independent queues (one
+per flow) with configurable size and queue management policy,
+and passes flows to a scheduler:
+
+ (flow_mask|sched_mask) sched_mask
+ +---------+ weight Wx +-------------+
+ | |->-[flow]-->--| |-+
+ -->--| QUEUE x | ... | | |
+ | |->-[flow]-->--| SCHEDuler N | |
+ +---------+ | | |
+ ... | +--[LINK N]-->--
+ +---------+ weight Wy | | +--[LINK N]-->--
+ | |->-[flow]-->--| | |
+ -->--| QUEUE y | ... | | |
+ | |->-[flow]-->--| | |
+ +---------+ +-------------+ |
+ +-------------+
+
+Many QUEUE objects can connect to the same scheduler, each
+QUEUE object can have its own set of parameters.
+
+In turn, the SCHEDuler 'forks' multiple instances according
+to a 'sched_mask', each instance manages its own set of queues
+and transmits on a private instance of a configurable LINK.
+
+A PIPE is a simplified version of the above, where there
+is no flow_mask, and each scheduler instance handles a single queue.
+
+The following data structures (visible from userland) describe
+the objects used by dummynet:
+
+ + dn_link, contains the main configuration parameters related
+ to delay and bandwidth;
+ + dn_profile describes a delay profile;
+ + dn_flow describes the flow status (flow id, statistics)
+
+ + dn_sch describes a scheduler
+ + dn_fs describes a flowset (msk, weight, queue parameters)
-/* dn_pipe_max is used to pass pipe configuration from userland onto
- * kernel space and back
+ *
*/
-#define ED_MAX_SAMPLES_NO 1024
-struct dn_pipe_max {
- struct dn_pipe pipe;
- int samples[ED_MAX_SAMPLES_NO];
-};
-
-SLIST_HEAD(dn_pipe_head, dn_pipe);
#endif /* _IP_DUMMYNET_H */
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $FreeBSD: head/sys/netinet/ip_fw.h 200580 2009-12-15 16:15:14Z luigi $
+ * $FreeBSD: user/luigi/ipfw3-head/sys/netinet/ip_fw.h 202072 2010-01-11 10:12:35Z luigi $
*/
#ifndef _IPFW2_H
*/
struct ip_fw {
+#ifdef _X64EMU
+ int32_t pad1;
+#endif
struct ip_fw *x_next; /* linked list of rules */
+#ifdef _X64EMU
+ int32_t pad2;
+#endif
struct ip_fw *next_rule; /* ptr to next [skipto] rule */
/* 'next_rule' is used to pass up 'set_disable' status */
#define RULESIZE(rule) (sizeof(struct ip_fw) + \
((struct ip_fw *)(rule))->cmd_len * 4 - 4)
+#if 1 // should be moved to in.h
/*
* This structure is used as a flow mask and a flow id for various
* parts of the code.
+ * addr_type is used in userland and kernel to mark the address type.
+ * fib is used in the kernel to record the fib in use.
+ * _flags is used in the kernel to store tcp flags for dynamic rules.
*/
struct ipfw_flow_id {
- u_int32_t dst_ip;
- u_int32_t src_ip;
- u_int16_t dst_port;
- u_int16_t src_port;
- u_int8_t fib;
- u_int8_t proto;
- u_int8_t flags; /* protocol-specific flags */
- uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */
- struct in6_addr dst_ip6; /* could also store MAC addr! */
+ uint32_t dst_ip;
+ uint32_t src_ip;
+ uint16_t dst_port;
+ uint16_t src_port;
+ uint8_t fib;
+ uint8_t proto;
+ uint8_t _flags; /* protocol-specific flags */
+ uint8_t addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
+ struct in6_addr dst_ip6;
struct in6_addr src_ip6;
- u_int32_t flow_id6;
- u_int32_t frag_id6;
+ uint32_t flow_id6;
+ uint32_t extra; /* queue/pipe or frag_id */
};
+#endif
#define IS_IP6_FLOW_ID(id) ((id)->addr_type == 6)
--- /dev/null
+/*-
+ * Copyright (c) 1998-2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Binary heap and hash tables, header file
+ *
+ * $FreeBSD: head/sys/netinet/ipfw/dn_heap.h 204865 2010-03-08 11:27:08Z luigi $
+ */
+
+#ifndef _IP_DN_HEAP_H
+#define _IP_DN_HEAP_H
+
+#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0)
+#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0)
+
+/*
+ * This module implements a binary heap supporting random extraction.
+ *
+ * A heap entry contains an uint64_t key and a pointer to object.
+ * DN_KEY_LT(a,b) returns true if key 'a' is smaller than 'b'
+ *
+ * The heap is a struct dn_heap plus a dynamically allocated
+ * array of dn_heap_entry entries. 'size' represents the size of
+ * the array, 'elements' count entries in use. The topmost
+ * element has the smallest key.
+ * The heap supports ordered insert, and extract from the top.
+ * To extract an object from the middle of the heap, we the object
+ * must reserve an 'int32_t' to store the position of the object
+ * in the heap itself, and the location of this field must be
+ * passed as an argument to heap_init() -- use -1 if the feature
+ * is not used.
+ */
+struct dn_heap_entry {
+ uint64_t key; /* sorting key, smallest comes first */
+ void *object; /* object pointer */
+};
+
+struct dn_heap {
+ int size; /* the size of the array */
+ int elements; /* elements in use */
+ int ofs; /* offset in the object of heap index */
+ struct dn_heap_entry *p; /* array of "size" entries */
+};
+
+enum {
+ HEAP_SCAN_DEL = 1,
+ HEAP_SCAN_END = 2,
+};
+
+/*
+ * heap_init() reinitializes the heap setting the size and the offset
+ * of the index for random extraction (use -1 if not used).
+ * The 'elements' counter is set to 0.
+ *
+ * SET_HEAP_OFS() indicates where, in the object, is stored the index
+ * for random extractions from the heap.
+ *
+ * heap_free() frees the memory associated to a heap.
+ *
+ * heap_insert() adds a key-pointer pair to the heap
+ *
+ * HEAP_TOP() returns a pointer to the top element of the heap,
+ * but makes no checks on its existance (XXX should we change ?)
+ *
+ * heap_extract() removes the entry at the top, returing the pointer.
+ * (the key should have been read before).
+ *
+ * heap_scan() invokes a callback on each entry of the heap.
+ * The callback can return a combination of HEAP_SCAN_DEL and
+ * HEAP_SCAN_END. HEAP_SCAN_DEL means the current element must
+ * be removed, and HEAP_SCAN_END means to terminate the scan.
+ * heap_scan() returns the number of elements removed.
+ * Because the order is not guaranteed, we should use heap_scan()
+ * only as a last resort mechanism.
+ */
+#define HEAP_TOP(h) ((h)->p)
+#define SET_HEAP_OFS(h, n) do { (h)->ofs = n; } while (0)
+int heap_init(struct dn_heap *h, int size, int ofs);
+int heap_insert(struct dn_heap *h, uint64_t key1, void *p);
+void heap_extract(struct dn_heap *h, void *obj);
+void heap_free(struct dn_heap *h);
+int heap_scan(struct dn_heap *, int (*)(void *, uintptr_t), uintptr_t);
+
+/*------------------------------------------------------
+ * This module implements a generic hash table with support for
+ * running callbacks on the entire table. To avoid allocating
+ * memory during hash table operations, objects must reserve
+ * space for a link field. XXX if the heap is moderately full,
+ * an SLIST suffices, and we can tolerate the cost of a hash
+ * computation on each removal.
+ *
+ * dn_ht_init() initializes the table, setting the number of
+ * buckets, the offset of the link field, the main callbacks.
+ * Callbacks are:
+ *
+ * hash(key, flags, arg) called to return a bucket index.
+ * match(obj, key, flags, arg) called to determine if key
+ * matches the current 'obj' in the heap
+ * newh(key, flags, arg) optional, used to allocate a new
+ * object during insertions.
+ *
+ * dn_ht_free() frees the heap or unlink elements.
+ * DNHT_REMOVE unlink elements, 0 frees the heap.
+ * You need two calls to do both.
+ *
+ * dn_ht_find() is the main lookup function, which can also be
+ * used to insert or delete elements in the hash table.
+ * The final 'arg' is passed to all callbacks.
+ *
+ * dn_ht_scan() is used to invoke a callback on all entries of
+ * the heap, or possibly on just one bucket. The callback
+ * is invoked with a pointer to the object, and must return
+ * one of DNHT_SCAN_DEL or DNHT_SCAN_END to request the
+ * removal of the object from the heap and the end of the
+ * scan, respectively.
+ *
+ * dn_ht_scan_bucket() is similar to dn_ht_scan(), except that it scans
+ * only the specific bucket of the table. The bucket is a in-out
+ * parameter and return a valid bucket number if the original
+ * is invalid.
+ *
+ * A combination of flags can be used to modify the operation
+ * of the dn_ht_find(), and of the callbacks:
+ *
+ * DNHT_KEY_IS_OBJ means the key is the object pointer.
+ * It is usally of interest for the hash and match functions.
+ *
+ * DNHT_MATCH_PTR during a lookup, match pointers instead
+ * of calling match(). Normally used when removing specific
+ * entries. Does not imply KEY_IS_OBJ as the latter _is_ used
+ * by the match function.
+ *
+ * DNHT_INSERT insert the element if not found.
+ * Calls new() to allocates a new object unless
+ * DNHT_KEY_IS_OBJ is set.
+ *
+ * DNHT_UNIQUE only insert if object not found.
+ * XXX should it imply DNHT_INSERT ?
+ *
+ * DNHT_REMOVE remove objects if we find them.
+ */
+struct dn_ht; /* should be opaque */
+
+struct dn_ht *dn_ht_init(struct dn_ht *, int buckets, int ofs,
+ uint32_t (*hash)(uintptr_t, int, void *),
+ int (*match)(void *, uintptr_t, int, void *),
+ void *(*newh)(uintptr_t, int, void *));
+void dn_ht_free(struct dn_ht *, int flags);
+
+void *dn_ht_find(struct dn_ht *, uintptr_t, int, void *);
+int dn_ht_scan(struct dn_ht *, int (*)(void *, void *), void *);
+int dn_ht_scan_bucket(struct dn_ht *, int * , int (*)(void *, void *), void *);
+int dn_ht_entries(struct dn_ht *);
+
+enum { /* flags values.
+ * first two are returned by the scan callback to indicate
+ * to delete the matching element or to end the scan
+ */
+ DNHT_SCAN_DEL = 0x0001,
+ DNHT_SCAN_END = 0x0002,
+ DNHT_KEY_IS_OBJ = 0x0004, /* key is the obj pointer */
+ DNHT_MATCH_PTR = 0x0008, /* match by pointer, not match() */
+ DNHT_INSERT = 0x0010, /* insert if not found */
+ DNHT_UNIQUE = 0x0020, /* report error if already there */
+ DNHT_REMOVE = 0x0040, /* remove on find or dn_ht_free */
+};
+
+#endif /* _IP_DN_HEAP_H */
--- /dev/null
+/*
+ * Copyright (c) 2010 Riccardo Panicucci, Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * The API to write a packet scheduling algorithm for dummynet.
+ *
+ * $FreeBSD: head/sys/netinet/ipfw/dn_sched.h 204591 2010-03-02 17:40:48Z luigi $
+ */
+
+#ifndef _DN_SCHED_H
+#define _DN_SCHED_H
+
+#define DN_MULTIQUEUE 0x01
+/*
+ * Descriptor for a scheduling algorithm.
+ * Contains all function pointers for a given scheduler
+ * This is typically created when a module is loaded, and stored
+ * in a global list of schedulers.
+ */
+struct dn_alg {
+ uint32_t type; /* the scheduler type */
+ const char *name; /* scheduler name */
+ uint32_t flags; /* DN_MULTIQUEUE if supports multiple queues */
+
+ /*
+ * The following define the size of 3 optional data structures
+ * that may need to be allocated at runtime, and are appended
+ * to each of the base data structures: scheduler, sched.inst,
+ * and queue. We don't have a per-flowset structure.
+ */
+ /* + parameters attached to the template, e.g.
+ * default queue sizes, weights, quantum size, and so on;
+ */
+ size_t schk_datalen;
+
+ /* + per-instance parameters, such as timestamps,
+ * containers for queues, etc;
+ */
+ size_t si_datalen;
+
+ size_t q_datalen; /* per-queue parameters (e.g. S,F) */
+
+ /*
+ * Methods implemented by the scheduler:
+ * enqueue enqueue packet 'm' on scheduler 's', queue 'q'.
+ * q is NULL for !MULTIQUEUE.
+ * Return 0 on success, 1 on drop (packet consumed anyways).
+ * Note that q should be interpreted only as a hint
+ * on the flow that the mbuf belongs to: while a
+ * scheduler will normally enqueue m into q, it is ok
+ * to leave q alone and put the mbuf elsewhere.
+ * This function is called in two cases:
+ * - when a new packet arrives to the scheduler;
+ * - when a scheduler is reconfigured. In this case the
+ * call is issued by the new_queue callback, with a
+ * non empty queue (q) and m pointing to the first
+ * mbuf in the queue. For this reason, the function
+ * should internally check for (m != q->mq.head)
+ * before calling dn_enqueue().
+ *
+ * dequeue Called when scheduler instance 's' can
+ * dequeue a packet. Return NULL if none are available.
+ * XXX what about non work-conserving ?
+ *
+ * config called on 'sched X config ...', normally writes
+ * in the area of size sch_arg
+ *
+ * destroy called on 'sched delete', frees everything
+ * in sch_arg (other parts are handled by more specific
+ * functions)
+ *
+ * new_sched called when a new instance is created, e.g.
+ * to create the local queue for !MULTIQUEUE, set V or
+ * copy parameters for WFQ, and so on.
+ *
+ * free_sched called when deleting an instance, cleans
+ * extra data in the per-instance area.
+ *
+ * new_fsk called when a flowset is linked to a scheduler,
+ * e.g. to validate parameters such as weights etc.
+ * free_fsk when a flowset is unlinked from a scheduler.
+ * (probably unnecessary)
+ *
+ * new_queue called to set the per-queue parameters,
+ * e.g. S and F, adjust sum of weights in the parent, etc.
+ *
+ * The new_queue callback is normally called from when
+ * creating a new queue. In some cases (such as a
+ * scheduler change or reconfiguration) it can be called
+ * with a non empty queue. In this case, the queue
+ * In case of non empty queue, the new_queue callback could
+ * need to call the enqueue function. In this case,
+ * the callback should eventually call enqueue() passing
+ * as m the first element in the queue.
+ *
+ * free_queue actions related to a queue removal, e.g. undo
+ * all the above. If the queue has data in it, also remove
+ * from the scheduler. This can e.g. happen during a reconfigure.
+ */
+ int (*enqueue)(struct dn_sch_inst *, struct dn_queue *,
+ struct mbuf *);
+ struct mbuf * (*dequeue)(struct dn_sch_inst *);
+
+ int (*config)(struct dn_schk *);
+ int (*destroy)(struct dn_schk*);
+ int (*new_sched)(struct dn_sch_inst *);
+ int (*free_sched)(struct dn_sch_inst *);
+ int (*new_fsk)(struct dn_fsk *f);
+ int (*free_fsk)(struct dn_fsk *f);
+ int (*new_queue)(struct dn_queue *q);
+ int (*free_queue)(struct dn_queue *q);
+
+ /* run-time fields */
+ int ref_count; /* XXX number of instances in the system */
+ SLIST_ENTRY(dn_alg) next; /* Next scheduler in the list */
+};
+
+/* MSVC does not support initializers so we need this ugly macro */
+#ifdef _WIN32
+#define _SI(fld)
+#else
+#define _SI(fld) fld
+#endif
+
+/*
+ * Additionally, dummynet exports some functions and macros
+ * to be used by schedulers:
+ */
+
+void dn_free_pkts(struct mbuf *mnext);
+int dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop);
+/* bound a variable between min and max */
+int ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg);
+
+/*
+ * Extract the head of a queue, update stats. Must be the very last
+ * thing done on a dequeue as the queue itself may go away.
+ */
+static __inline struct mbuf*
+dn_dequeue(struct dn_queue *q)
+{
+ struct mbuf *m = q->mq.head;
+ if (m == NULL)
+ return NULL;
+ q->mq.head = m->m_nextpkt;
+ q->ni.length--;
+ q->ni.len_bytes -= m->m_pkthdr.len;
+ if (q->_si) {
+ q->_si->ni.length--;
+ q->_si->ni.len_bytes -= m->m_pkthdr.len;
+ }
+ if (q->ni.length == 0) /* queue is now idle */
+ q->q_time = dn_cfg.curr_time;
+ return m;
+}
+
+int dn_sched_modevent(module_t mod, int cmd, void *arg);
+
+#define DECLARE_DNSCHED_MODULE(name, dnsched) \
+ static moduledata_t name##_mod = { \
+ #name, dn_sched_modevent, dnsched \
+ }; \
+ DECLARE_MODULE(name, name##_mod, \
+ SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \
+ MODULE_DEPEND(name, dummynet, 3, 3, 3);
+#endif /* _DN_SCHED_H */
--- /dev/null
+/*-
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * internal dummynet APIs.
+ *
+ * $FreeBSD: head/sys/netinet/ipfw/ip_dn_private.h 204591 2010-03-02 17:40:48Z luigi $
+ */
+
+#ifndef _IP_DN_PRIVATE_H
+#define _IP_DN_PRIVATE_H
+
+/* debugging support
+ * use ND() to remove debugging, D() to print a line,
+ * DX(level, ...) to print above a certain level
+ * If you redefine D() you are expected to redefine all.
+ */
+#ifndef D
+#define ND(fmt, ...) do {} while (0)
+#define D1(fmt, ...) do {} while (0)
+#define D(fmt, ...) printf("%-10s " fmt "\n", \
+ __FUNCTION__, ## __VA_ARGS__)
+#define DX(lev, fmt, ...) do { \
+ if (dn_cfg.debug > lev) D(fmt, ## __VA_ARGS__); } while (0)
+#endif
+
+MALLOC_DECLARE(M_DUMMYNET);
+
+#ifndef FREE_PKT
+#define FREE_PKT(m) m_freem(m)
+#endif
+
+#ifndef __linux__
+#define div64(a, b) ((int64_t)(a) / (int64_t)(b))
+#endif
+
+#define DN_LOCK_INIT() do { \
+ mtx_init(&dn_cfg.uh_mtx, "dn_uh", NULL, MTX_DEF); \
+ mtx_init(&dn_cfg.bh_mtx, "dn_bh", NULL, MTX_DEF); \
+ } while (0)
+#define DN_LOCK_DESTROY() do { \
+ mtx_destroy(&dn_cfg.uh_mtx); \
+ mtx_destroy(&dn_cfg.bh_mtx); \
+ } while (0)
+#if 0 /* not used yet */
+#define DN_UH_RLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_UH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_UH_WLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_UH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_UH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
+#endif
+
+#define DN_BH_RLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_BH_RUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_BH_WLOCK() mtx_lock(&dn_cfg.uh_mtx)
+#define DN_BH_WUNLOCK() mtx_unlock(&dn_cfg.uh_mtx)
+#define DN_BH_LOCK_ASSERT() mtx_assert(&dn_cfg.uh_mtx, MA_OWNED)
+
+SLIST_HEAD(dn_schk_head, dn_schk);
+SLIST_HEAD(dn_sch_inst_head, dn_sch_inst);
+SLIST_HEAD(dn_fsk_head, dn_fsk);
+SLIST_HEAD(dn_queue_head, dn_queue);
+SLIST_HEAD(dn_alg_head, dn_alg);
+
+struct mq { /* a basic queue of packets*/
+ struct mbuf *head, *tail;
+};
+
+static inline void
+set_oid(struct dn_id *o, int type, int len)
+{
+ o->type = type;
+ o->len = len;
+ o->subtype = 0;
+};
+
+/*
+ * configuration and global data for a dummynet instance
+ *
+ * When a configuration is modified from userland, 'id' is incremented
+ * so we can use the value to check for stale pointers.
+ */
+struct dn_parms {
+ uint32_t id; /* configuration version */
+
+ /* defaults (sysctl-accessible) */
+ int red_lookup_depth;
+ int red_avg_pkt_size;
+ int red_max_pkt_size;
+ int hash_size;
+ int max_hash_size;
+ long byte_limit; /* max queue sizes */
+ long slot_limit;
+
+ int io_fast;
+ int debug;
+
+ /* timekeeping */
+ struct timeval prev_t; /* last time dummynet_tick ran */
+ struct dn_heap evheap; /* scheduled events */
+
+ /* counters of objects -- used for reporting space */
+ int schk_count;
+ int si_count;
+ int fsk_count;
+ int queue_count;
+
+ /* ticks and other stuff */
+ uint64_t curr_time;
+ /* flowsets and schedulers are in hash tables, with 'hash_size'
+ * buckets. fshash is looked up at every packet arrival
+ * so better be generous if we expect many entries.
+ */
+ struct dn_ht *fshash;
+ struct dn_ht *schedhash;
+ /* list of flowsets without a scheduler -- use sch_chain */
+ struct dn_fsk_head fsu; /* list of unlinked flowsets */
+ struct dn_alg_head schedlist; /* list of algorithms */
+
+ /* Store the fs/sch to scan when draining. The value is the
+ * bucket number of the hash table. Expire can be disabled
+ * with net.inet.ip.dummynet.expire=0, or it happens every
+ * expire ticks.
+ **/
+ int drain_fs;
+ int drain_sch;
+ uint32_t expire;
+ uint32_t expire_cycle; /* tick count */
+
+ /* if the upper half is busy doing something long,
+ * can set the busy flag and we will enqueue packets in
+ * a queue for later processing.
+ */
+ int busy;
+ struct mq pending;
+
+#ifdef _KERNEL
+ /*
+ * This file is normally used in the kernel, unless we do
+ * some userland tests, in which case we do not need a mtx.
+ * uh_mtx arbitrates between system calls and also
+ * protects fshash, schedhash and fsunlinked.
+ * These structures are readonly for the lower half.
+ * bh_mtx protects all other structures which may be
+ * modified upon packet arrivals
+ */
+#if defined( __linux__ ) || defined( _WIN32 )
+ spinlock_t uh_mtx;
+ spinlock_t bh_mtx;
+#else
+ struct mtx uh_mtx;
+ struct mtx bh_mtx;
+#endif
+
+#endif /* _KERNEL */
+};
+
+/*
+ * Delay line, contains all packets on output from a link.
+ * Every scheduler instance has one.
+ */
+struct delay_line {
+ struct dn_id oid;
+ struct dn_sch_inst *si;
+ struct mq mq;
+};
+
+/*
+ * The kernel side of a flowset. It is linked in a hash table
+ * of flowsets, and in a list of children of their parent scheduler.
+ * qht is either the queue or (if HAVE_MASK) a hash table queues.
+ * Note that the mask to use is the (flow_mask|sched_mask), which
+ * changes as we attach/detach schedulers. So we store it here.
+ *
+ * XXX If we want to add scheduler-specific parameters, we need to
+ * put them in external storage because the scheduler may not be
+ * available when the fsk is created.
+ */
+struct dn_fsk { /* kernel side of a flowset */
+ struct dn_fs fs;
+ SLIST_ENTRY(dn_fsk) fsk_next; /* hash chain for fshash */
+
+ struct ipfw_flow_id fsk_mask;
+
+ /* qht is a hash table of queues, or just a single queue
+ * a bit in fs.flags tells us which one
+ */
+ struct dn_ht *qht;
+ struct dn_schk *sched; /* Sched we are linked to */
+ SLIST_ENTRY(dn_fsk) sch_chain; /* list of fsk attached to sched */
+
+ /* bucket index used by drain routine to drain queues for this
+ * flowset
+ */
+ int drain_bucket;
+ /* Parameter realted to RED / GRED */
+ /* original values are in dn_fs*/
+ int w_q ; /* queue weight (scaled) */
+ int max_th ; /* maximum threshold for queue (scaled) */
+ int min_th ; /* minimum threshold for queue (scaled) */
+ int max_p ; /* maximum value for p_b (scaled) */
+
+ u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
+ u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
+ u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
+ u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
+ u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
+ u_int lookup_depth ; /* depth of lookup table */
+ int lookup_step ; /* granularity inside the lookup table */
+ int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+ int avg_pkt_size ; /* medium packet size */
+ int max_pkt_size ; /* max packet size */
+};
+
+/*
+ * A queue is created as a child of a flowset unless it belongs to
+ * a !MULTIQUEUE scheduler. It is normally in a hash table in the
+ * flowset. fs always points to the parent flowset.
+ * si normally points to the sch_inst, unless the flowset has been
+ * detached from the scheduler -- in this case si == NULL and we
+ * should not enqueue.
+ */
+struct dn_queue {
+ struct dn_flow ni; /* oid, flow_id, stats */
+ struct mq mq; /* packets queue */
+ struct dn_sch_inst *_si; /* owner scheduler instance */
+ SLIST_ENTRY(dn_queue) q_next; /* hash chain list for qht */
+ struct dn_fsk *fs; /* parent flowset. */
+
+ /* RED parameters */
+ int avg; /* average queue length est. (scaled) */
+ int count; /* arrivals since last RED drop */
+ int random; /* random value (scaled) */
+ uint64_t q_time; /* start of queue idle time */
+
+};
+
+/*
+ * The kernel side of a scheduler. Contains the userland config,
+ * a link, pointer to extra config arguments from command line,
+ * kernel flags, and a pointer to the scheduler methods.
+ * It is stored in a hash table, and holds a list of all
+ * flowsets and scheduler instances.
+ * XXX sch must be at the beginning, see schk_hash().
+ */
+struct dn_schk {
+ struct dn_sch sch;
+ struct dn_alg *fp; /* Pointer to scheduler functions */
+ struct dn_link link; /* The link, embedded */
+ struct dn_profile *profile; /* delay profile, if any */
+ struct dn_id *cfg; /* extra config arguments */
+
+ SLIST_ENTRY(dn_schk) schk_next; /* hash chain for schedhash */
+
+ struct dn_fsk_head fsk_list; /* all fsk linked to me */
+ struct dn_fsk *fs; /* Flowset for !MULTIQUEUE */
+
+ /* bucket index used by the drain routine to drain the scheduler
+ * instance for this flowset.
+ */
+ int drain_bucket;
+
+ /* Hash table of all instances (through sch.sched_mask)
+ * or single instance if no mask. Always valid.
+ */
+ struct dn_ht *siht;
+};
+
+
+/*
+ * Scheduler instance.
+ * Contains variables and all queues relative to a this instance.
+ * This struct is created a runtime.
+ */
+struct dn_sch_inst {
+ struct dn_flow ni; /* oid, flowid and stats */
+ SLIST_ENTRY(dn_sch_inst) si_next; /* hash chain for siht */
+ struct delay_line dline;
+ struct dn_schk *sched; /* the template */
+ int kflags; /* DN_ACTIVE */
+
+ int64_t credit; /* bits I can transmit (more or less). */
+ uint64_t sched_time; /* time link was scheduled in ready_heap */
+ uint64_t idle_time; /* start of scheduler instance idle time */
+
+ /* q_count is the number of queues that this instance is using.
+ * The counter is incremented or decremented when
+ * a reference from the queue is created or deleted.
+ * It is used to make sure that a scheduler instance can be safely
+ * deleted by the drain routine. See notes below.
+ */
+ int q_count;
+
+};
+
+/*
+ * NOTE about object drain.
+ * The system will automatically (XXX check when) drain queues and
+ * scheduler instances when they are idle.
+ * A queue is idle when it has no packets; an instance is idle when
+ * it is not in the evheap heap, and the corresponding delay line is empty.
+ * A queue can be safely deleted when it is idle because of the scheduler
+ * function xxx_free_queue() will remove any references to it.
+ * An instance can be only deleted when no queues reference it. To be sure
+ * of that, a counter (q_count) stores the number of queues that are pointing
+ * to the instance.
+ *
+ * XXX
+ * Order of scan:
+ * - take all flowset in a bucket for the flowset hash table
+ * - take all queues in a bucket for the flowset
+ * - increment the queue bucket
+ * - scan next flowset bucket
+ * Nothing is done if a bucket contains no entries.
+ *
+ * The same schema is used for sceduler instances
+ */
+
+
+/* kernel-side flags. Linux has DN_DELETE in fcntl.h
+ */
+enum {
+ /* 1 and 2 are reserved for the SCAN flags */
+ DN_DESTROY = 0x0004, /* destroy */
+ DN_DELETE_FS = 0x0008, /* destroy flowset */
+ DN_DETACH = 0x0010,
+ DN_ACTIVE = 0x0020, /* object is in evheap */
+ DN_F_DLINE = 0x0040, /* object is a delay line */
+ DN_F_SCHI = 0x00C0, /* object is a sched.instance */
+ DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */
+};
+
+extern struct dn_parms dn_cfg;
+
+int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+void dummynet_task(void *context, int pending);
+void dn_reschedule(void);
+
+struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
+ struct ipfw_flow_id *);
+struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *);
+
+/*
+ * copy_range is a template for requests for ranges of pipes/queues/scheds.
+ * The number of ranges is variable and can be derived by o.len.
+ * As a default, we use a small number of entries so that the struct
+ * fits easily on the stack and is sufficient for most common requests.
+ */
+#define DEFAULT_RANGES 5
+struct copy_range {
+ struct dn_id o;
+ uint32_t r[ 2 * DEFAULT_RANGES ];
+};
+
+struct copy_args {
+ char **start;
+ char *end;
+ int flags;
+ int type;
+ struct copy_range *extra; /* extra filtering */
+};
+
+struct sockopt;
+int ip_dummynet_compat(struct sockopt *sopt);
+int dummynet_get(struct sockopt *sopt, void **compat);
+int dn_c_copy_q (void *_ni, void *arg);
+int dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq);
+int dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq);
+int dn_compat_copy_queue(struct copy_args *a, void *_o);
+int dn_compat_copy_pipe(struct copy_args *a, void *_o);
+int copy_data_helper_compat(void *_o, void *_arg);
+int dn_compat_calc_size(void);
+int do_config(void *p, int l);
+
+/* function to drain idle object */
+void dn_drain_scheduler(void);
+void dn_drain_queue(void);
+
+#endif /* _IP_DN_PRIVATE_H */
#ifdef _KERNEL
-#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */
-#define MTAG_IPFW_RULE 1262273568 /* rule reference */
+/*
+ * For platforms that do not have SYSCTL support, we wrap the
+ * SYSCTL_* into a function (one per file) to collect the values
+ * into an array at module initialization. The wrapping macros,
+ * SYSBEGIN() and SYSEND, are empty in the default case.
+ */
+#ifndef SYSBEGIN
+#define SYSBEGIN(x)
+#endif
+#ifndef SYSEND
+#define SYSEND
+#endif
/* Return values from ipfw_chk() */
enum {
struct route_in6 ro_pmtu_or;
};
-/*
- * Reference to an ipfw rule that can be carried outside critical sections.
- * A rule is identified by rulenum:rule_id which is ordered.
- * In version chain_id the rule can be found in slot 'slot', so
- * we don't need a lookup if chain_id == chain->id.
- *
- * On exit from the firewall this structure refers to the rule after
- * the matching one (slot points to the new rule; rulenum:rule_id-1
- * is the matching rule), and additional info (e.g. info often contains
- * the insn argument or tablearg in the low 16 bits, in host format).
- * On entry, the structure is valid if slot>0, and refers to the starting
- * rules. 'info' contains the reason for reinject, e.g. divert port,
- * divert direction, and so on.
- */
-struct ipfw_rule_ref {
- uint32_t slot; /* slot for matching rule */
- uint32_t rulenum; /* matching rule number */
- uint32_t rule_id; /* matching rule id */
- uint32_t chain_id; /* ruleset id */
- uint32_t info; /* see below */
-};
-
-enum {
- IPFW_INFO_MASK = 0x0000ffff,
- IPFW_INFO_OUT = 0x00000000, /* outgoing, just for convenience */
- IPFW_INFO_IN = 0x80000000, /* incoming, overloads dir */
- IPFW_ONEPASS = 0x40000000, /* One-pass, do not reinject */
- IPFW_IS_MASK = 0x30000000, /* which source ? */
- IPFW_IS_DIVERT = 0x20000000,
- IPFW_IS_DUMMYNET =0x10000000,
- IPFW_IS_PIPE = 0x08000000, /* pip1=1, queue = 0 */
-};
/*
* Arguments for calling ipfw_chk() and dummynet_io(). We put them
};
/* wrapper for freeing a packet, in case we need to do more work */
-#ifdef __linux__
+#ifndef FREE_PKT
+#if defined(__linux__) || defined(_WIN32)
#define FREE_PKT(m) netisr_dispatch(-1, m)
#else
#define FREE_PKT(m) m_freem(m)
#endif
+#endif /* !FREE_PKT */
/*
* Function definitions.
int ipfw_chk(struct ip_fw_args *args);
void ipfw_reap_rules(struct ip_fw *head);
+/* In ip_fw_pfil */
+int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
+ struct inpcb *inp);
+
/* In ip_fw_table.c */
struct radix_node;
int ipfw_lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
uint32_t *val);
int ipfw_init_tables(struct ip_fw_chain *ch);
+void ipfw_destroy_tables(struct ip_fw_chain *ch);
int ipfw_flush_table(struct ip_fw_chain *ch, uint16_t tbl);
-void ipfw_flush_tables(struct ip_fw_chain *ch);
int ipfw_add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr,
uint8_t mlen, uint32_t value);
int ipfw_dump_table_entry(struct radix_node *rn, void *arg);
int ipfw_count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt);
int ipfw_dump_table(struct ip_fw_chain *ch, ipfw_table *tbl);
-/* hooks for divert */
-extern void (*ip_divert_ptr)(struct mbuf *m, int incoming);
-
-/* In ip_fw_nat.c */
+/* In ip_fw_nat.c -- XXX to be moved to ip_var.h */
extern struct cfg_nat *(*lookup_nat_ptr)(struct nat_list *, int);
extern ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr;
extern ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
-/* netgraph prototypes */
-
-typedef int ng_ipfw_input_t(struct mbuf **, int, struct ip_fw_args *, int);
-extern ng_ipfw_input_t *ng_ipfw_input_p;
-#define NG_IPFW_LOADED (ng_ipfw_input_p != NULL)
-
-#define TAGSIZ (sizeof(struct ng_ipfw_tag) - sizeof(struct m_tag))
-
-
#endif /* _KERNEL */
#endif /* _IPFW2_PRIVATE_H */
tcp_seq th_seq; /* sequence number */
tcp_seq th_ack; /* acknowledgement number */
#if BYTE_ORDER == LITTLE_ENDIAN
- u_int th_x2:4, /* (unused) */
+ u_char th_x2:4, /* (unused) */
th_off:4; /* data offset */
#endif
#if BYTE_ORDER == BIG_ENDIAN
- u_int th_off:4, /* data offset */
+ u_char th_off:4, /* data offset */
th_x2:4; /* (unused) */
#endif
u_char th_flags;
* ExFreePoolWithTag(ptr, tag)
*/
#define malloc(_size, _type, _flags) my_alloc(_size)
+#define calloc(_size, _type, _flags) my_alloc(_size)
void *my_alloc(int _size);
/* the 'tag' version does not work without -Gz in the linker */
void *m_data;
int m_len; /* length in this mbuf */
int m_flags;
+#ifdef __linux__
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
struct nf_info *queue_entry;
#else
struct nf_queue_entry *queue_entry;
+#endif
+#else /* _WIN32 */
+ int direction; /* could go in rcvif */
+ NDIS_HANDLE context; /* replaces queue_entry or skb ?*/
+ PNDIS_PACKET pkt;
#endif
struct sk_buff *m_skb;
struct {
+#ifdef __linux__
struct net_device *rcvif;
+#else
+ struct ifnet *rcvif;
+#endif
int len; /* total packet len */
SLIST_HEAD (packet_tags, m_tag) tags;
} m_pkthdr;
free(m, M_IPFW);
};
-/* we cannot pullup */
-//#define m_pullup(__m, __i) (m)
+/* m_pullup is not supported, there is a macro in missing.h */
#define M_GETFIB(_m) 0
-#endif /* !__linux__ */
+#endif /* __linux__ || _WIN32 */
/*
* Persistent tags stay with an mbuf until the mbuf is reclaimed. Otherwise
#define SLIST_HEAD_INITIALIZER(head) \
{ NULL }
+#if defined( _WIN32 ) && defined(SLIST_ENTRY)
+#undef SLIST_ENTRY
+#endif
#define SLIST_ENTRY(type) \
struct { \
struct type *sle_next; /* next element */ \
#ifndef _SYS_SYSTM_H_
#define _SYS_SYSTM_H_
+#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
+#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */
+
#ifndef _WIN32 /* this is the linux version */
/* callout support, in <sys/callout.h> on FreeBSD */
/*
#define callout_drain(co) del_timer(co)
#define callout_stop(co) del_timer(co)
-#define CALLOUT_ACTIVE 0x0002 /* callout is currently active */
-#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */
-
#else /* _WIN32 */
+#include <ndis.h>
/* This is the windows part for callout support */
struct callout {
- int dummy;
+ KTIMER thetimer;
+ KDPC timerdpc;
+ int dpcinitialized;
+ LARGE_INTEGER duetime;
};
+
+void dummynet (void*);
+VOID dummynet_dpc(
+ __in struct _KDPC *Dpc,
+ __in_opt PVOID DeferredContext,
+ __in_opt PVOID SystemArgument1,
+ __in_opt PVOID SystemArgument2
+ );
+
+VOID ipfw_dpc(
+ __in struct _KDPC *Dpc,
+ __in_opt PVOID DeferredContext,
+ __in_opt PVOID SystemArgument1,
+ __in_opt PVOID SystemArgument2
+ );
+
+/* callout_reset must handle two problems:
+ * - dummynet() scheduler must be run always on the same processor
+ * because do_gettimeofday() is based on cpu performance counter, and
+ * _occasionally_ can leap backward in time if we query another cpu.
+ * typically this won't happen that much, and the cpu will almost always
+ * be the same even without the affinity restriction, but better to be sure.
+ * - ipfw_tick() does not have the granularity requirements of dummynet()
+ * but we need to pass a pointer as argument.
+ *
+ * for these reasons, if we are called for dummynet() timer,
+ * KeInitializeDpc is called only once as it should be, and the thread
+ * is forced on cpu0 (which is always present), while if we're called
+ * for ipfw_tick(), we re-initialize the DPC each time, using
+ * parameter DeferredContext to pass the needed pointer. since this
+ * timer is called only once a sec, this won't hurt that much.
+ */
static __inline int
-callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg)
+callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg)
{
+ if(fn == &dummynet)
+ {
+ if(co->dpcinitialized == 0)
+ {
+ KeInitializeDpc(&co->timerdpc, dummynet_dpc, NULL);
+ KeSetTargetProcessorDpc(&co->timerdpc, 0);
+ co->dpcinitialized = 1;
+ }
+ }
+ else
+ {
+ KeInitializeDpc(&co->timerdpc, ipfw_dpc, arg);
+ }
+ co->duetime.QuadPart = (-ticks)*10000;
+ KeSetTimer(&co->thetimer, co->duetime, &co->timerdpc);
return 0;
}
-#define callout_init(co, safe)
-#define callout_drain(co)
-#define callout_stop(co)
-#endif /* !_WIN32 */
+static __inline void
+callout_init(struct callout* co, int safe)
+{
+ printf("%s: initializing timer at %p\n",__FUNCTION__,co);
+ KeInitializeTimer(&co->thetimer);
+}
+static __inline int
+callout_drain(struct callout* co)
+{
+ BOOLEAN canceled = KeCancelTimer(&co->thetimer);
+ while (canceled != TRUE)
+ {
+ canceled = KeCancelTimer(&co->thetimer);
+ }
+ printf("%s: stopping timer at %p\n",__FUNCTION__,co);
+ return 0;
+}
-#if 0
-/* add out timer to the kernel global timer list */
-NTSTATUS
- IoInitializeTimer(
- IN PDEVICE_OBJECT DeviceObject,
- IN PIO_TIMER_ROUTINE TimerRoutine,
- IN PVOID Context
- );
+static __inline int
+callout_stop(struct callout* co)
+{
+ return callout_drain(co);
+}
-/* see differences :
-IoInitializeDpcRequest
- http://dsrg.mff.cuni.cz/~ceres/sch/osy/text/ch04s01s01.php
- example http://www.beyondlogic.org/interrupts/winnt_isr_dpc.htm
-KeInitializeDpc IRQL: Any level
-IoInitializeTimer IRQL: Passive level
-KeInitializeTimer */
-VOID
- KeInitializeDpc(
- IN PRKDPC Dpc,
- IN PKDEFERRED_ROUTINE DeferredRoutine,
- IN PVOID DeferredContext
- );
-#endif /* commented out */
+#endif /* _WIN32 */
#endif /* _SYS_SYSTM_H_ */
/*
* Remap taskqueue to direct calls
*/
+
+#ifdef _WIN32
+struct task {
+ void (*func)(void*, int);
+};
+#define taskqueue_enqueue(tq, ta) (ta)->func(NULL,1)
+#define TASK_INIT(a,b,c,d) do { \
+ (a)->func = (c); } while (0)
+#else
struct task {
void (*func)(void);
};
#define TASK_INIT(a,b,c,d) do { \
(a)->func = (void (*)(void))c; } while (0)
+#endif
#define taskqueue_create_fast(_a, _b, _c, _d) NULL
#define taskqueue_start_threads(_a, _b, _c, _d)
--- /dev/null
+/*-
+ * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: ip_dn_glue.c 6031 2010-04-09 15:25:41Z svn_panicucci $
+ *
+ * Binary compatibility support for /sbin/ipfw RELENG_7 and RELENG_8
+ */
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/time.h>
+#include <sys/taskqueue.h>
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <netinet/in.h>
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ipfw/ip_fw_private.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+
+/* FREEBSD7.2 ip_dummynet.h r191715*/
+
+struct dn_heap_entry7 {
+ int64_t key; /* sorting key. Topmost element is smallest one */
+ void *object; /* object pointer */
+};
+
+struct dn_heap7 {
+ int size;
+ int elements;
+ int offset; /* XXX if > 0 this is the offset of direct ptr to obj */
+ struct dn_heap_entry7 *p; /* really an array of "size" entries */
+};
+
+/* Common to 7.2 and 8 */
+struct dn_flow_set {
+ SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */
+
+ u_short fs_nr ; /* flow_set number */
+ u_short flags_fs;
+#define DNOLD_HAVE_FLOW_MASK 0x0001
+#define DNOLD_IS_RED 0x0002
+#define DNOLD_IS_GENTLE_RED 0x0004
+#define DNOLD_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */
+#define DNOLD_NOERROR 0x0010 /* do not report ENOBUFS on drops */
+#define DNOLD_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */
+#define DNOLD_IS_PIPE 0x4000
+#define DNOLD_IS_QUEUE 0x8000
+
+ struct dn_pipe7 *pipe ; /* pointer to parent pipe */
+ u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */
+
+ int weight ; /* WFQ queue weight */
+ int qsize ; /* queue size in slots or bytes */
+ int plr ; /* pkt loss rate (2^31-1 means 100%) */
+
+ struct ipfw_flow_id flow_mask ;
+
+ /* hash table of queues onto this flow_set */
+ int rq_size ; /* number of slots */
+ int rq_elements ; /* active elements */
+ struct dn_flow_queue7 **rq; /* array of rq_size entries */
+
+ u_int32_t last_expired ; /* do not expire too frequently */
+ int backlogged ; /* #active queues for this flowset */
+
+ /* RED parameters */
+#define SCALE_RED 16
+#define SCALE(x) ( (x) << SCALE_RED )
+#define SCALE_VAL(x) ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
+ int w_q ; /* queue weight (scaled) */
+ int max_th ; /* maximum threshold for queue (scaled) */
+ int min_th ; /* minimum threshold for queue (scaled) */
+ int max_p ; /* maximum value for p_b (scaled) */
+ u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */
+ u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */
+ u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */
+ u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */
+ u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */
+ u_int lookup_depth ; /* depth of lookup table */
+ int lookup_step ; /* granularity inside the lookup table */
+ int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */
+ int avg_pkt_size ; /* medium packet size */
+ int max_pkt_size ; /* max packet size */
+};
+SLIST_HEAD(dn_flow_set_head, dn_flow_set);
+
+#define DN_IS_PIPE 0x4000
+#define DN_IS_QUEUE 0x8000
+struct dn_flow_queue7 {
+ struct dn_flow_queue7 *next ;
+ struct ipfw_flow_id id ;
+
+ struct mbuf *head, *tail ; /* queue of packets */
+ u_int len ;
+ u_int len_bytes ;
+
+ u_long numbytes;
+
+ u_int64_t tot_pkts ; /* statistics counters */
+ u_int64_t tot_bytes ;
+ u_int32_t drops ;
+
+ int hash_slot ; /* debugging/diagnostic */
+
+ /* RED parameters */
+ int avg ; /* average queue length est. (scaled) */
+ int count ; /* arrivals since last RED drop */
+ int random ; /* random value (scaled) */
+ u_int32_t q_time; /* start of queue idle time */
+
+ /* WF2Q+ support */
+ struct dn_flow_set *fs ; /* parent flow set */
+ int heap_pos ; /* position (index) of struct in heap */
+ int64_t sched_time ; /* current time when queue enters ready_heap */
+
+ int64_t S,F ; /* start time, finish time */
+};
+
+struct dn_pipe7 { /* a pipe */
+ SLIST_ENTRY(dn_pipe7) next; /* linked list in a hash slot */
+
+ int pipe_nr ; /* number */
+ int bandwidth; /* really, bytes/tick. */
+ int delay ; /* really, ticks */
+
+ struct mbuf *head, *tail ; /* packets in delay line */
+
+ /* WF2Q+ */
+ struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
+ struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
+ struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
+
+ int64_t V ; /* virtual time */
+ int sum; /* sum of weights of all active sessions */
+
+ int numbytes;
+
+ int64_t sched_time ; /* time pipe was scheduled in ready_heap */
+
+ /*
+ * When the tx clock come from an interface (if_name[0] != '\0'), its name
+ * is stored below, whereas the ifp is filled when the rule is configured.
+ */
+ char if_name[IFNAMSIZ];
+ struct ifnet *ifp ;
+ int ready ; /* set if ifp != NULL and we got a signal from it */
+
+ struct dn_flow_set fs ; /* used with fixed-rate flows */
+};
+SLIST_HEAD(dn_pipe_head7, dn_pipe7);
+
+
+/* FREEBSD8 ip_dummynet.h r196045 */
+struct dn_flow_queue8 {
+ struct dn_flow_queue8 *next ;
+ struct ipfw_flow_id id ;
+
+ struct mbuf *head, *tail ; /* queue of packets */
+ u_int len ;
+ u_int len_bytes ;
+
+ uint64_t numbytes ; /* credit for transmission (dynamic queues) */
+ int64_t extra_bits; /* extra bits simulating unavailable channel */
+
+ u_int64_t tot_pkts ; /* statistics counters */
+ u_int64_t tot_bytes ;
+ u_int32_t drops ;
+
+ int hash_slot ; /* debugging/diagnostic */
+
+ /* RED parameters */
+ int avg ; /* average queue length est. (scaled) */
+ int count ; /* arrivals since last RED drop */
+ int random ; /* random value (scaled) */
+ int64_t idle_time; /* start of queue idle time */
+
+ /* WF2Q+ support */
+ struct dn_flow_set *fs ; /* parent flow set */
+ int heap_pos ; /* position (index) of struct in heap */
+ int64_t sched_time ; /* current time when queue enters ready_heap */
+
+ int64_t S,F ; /* start time, finish time */
+};
+
+struct dn_pipe8 { /* a pipe */
+ SLIST_ENTRY(dn_pipe8) next; /* linked list in a hash slot */
+
+ int pipe_nr ; /* number */
+ int bandwidth; /* really, bytes/tick. */
+ int delay ; /* really, ticks */
+
+ struct mbuf *head, *tail ; /* packets in delay line */
+
+ /* WF2Q+ */
+ struct dn_heap7 scheduler_heap ; /* top extract - key Finish time*/
+ struct dn_heap7 not_eligible_heap; /* top extract- key Start time */
+ struct dn_heap7 idle_heap ; /* random extract - key Start=Finish time */
+
+ int64_t V ; /* virtual time */
+ int sum; /* sum of weights of all active sessions */
+
+ /* Same as in dn_flow_queue, numbytes can become large */
+ int64_t numbytes; /* bits I can transmit (more or less). */
+ uint64_t burst; /* burst size, scaled: bits * hz */
+
+ int64_t sched_time ; /* time pipe was scheduled in ready_heap */
+ int64_t idle_time; /* start of pipe idle time */
+
+ char if_name[IFNAMSIZ];
+ struct ifnet *ifp ;
+ int ready ; /* set if ifp != NULL and we got a signal from it */
+
+ struct dn_flow_set fs ; /* used with fixed-rate flows */
+
+ /* fields to simulate a delay profile */
+#define ED_MAX_NAME_LEN 32
+ char name[ED_MAX_NAME_LEN];
+ int loss_level;
+ int samples_no;
+ int *samples;
+};
+
+#define ED_MAX_SAMPLES_NO 1024
+struct dn_pipe_max8 {
+ struct dn_pipe8 pipe;
+ int samples[ED_MAX_SAMPLES_NO];
+};
+SLIST_HEAD(dn_pipe_head8, dn_pipe8);
+
+/*
+ * Changes from 7.2 to 8:
+ * dn_pipe:
+ * numbytes from int to int64_t
+ * add burst (int64_t)
+ * add idle_time (int64_t)
+ * add profile
+ * add struct dn_pipe_max
+ * add flag DN_HAS_PROFILE
+ *
+ * dn_flow_queue
+ * numbytes from u_long to int64_t
+ * add extra_bits (int64_t)
+ * q_time from u_int32_t to int64_t and name idle_time
+ *
+ * dn_flow_set unchanged
+ *
+ */
+
+/* NOTE:XXX copied from dummynet.c */
+#define O_NEXT(p, len) ((void *)((char *)p + len))
+static void
+oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
+{
+ oid->len = len;
+ oid->type = type;
+ oid->subtype = 0;
+ oid->id = id;
+}
+/* make room in the buffer and move the pointer forward */
+static void *
+o_next(struct dn_id **o, int len, int type)
+{
+ struct dn_id *ret = *o;
+ oid_fill(ret, len, type, 0);
+ *o = O_NEXT(*o, len);
+ return ret;
+}
+
+
+static size_t pipesize7 = sizeof(struct dn_pipe7);
+static size_t pipesize8 = sizeof(struct dn_pipe8);
+static size_t pipesizemax8 = sizeof(struct dn_pipe_max8);
+
+/* Indicate 'ipfw' version
+ * 1: from FreeBSD 7.2
+ * 0: from FreeBSD 8
+ * -1: unknow (for now is unused)
+ *
+ * It is update when a IP_DUMMYNET_DEL or IP_DUMMYNET_CONFIGURE request arrives
+ * NOTE: if a IP_DUMMYNET_GET arrives and the 'ipfw' version is unknow,
+ * it is suppose to be the FreeBSD 8 version.
+ */
+static int is7 = 0;
+
+static int
+convertflags2new(int src)
+{
+ int dst = 0;
+
+ if (src & DNOLD_HAVE_FLOW_MASK)
+ dst |= DN_HAVE_MASK;
+ if (src & DNOLD_QSIZE_IS_BYTES)
+ dst |= DN_QSIZE_BYTES;
+ if (src & DNOLD_NOERROR)
+ dst |= DN_NOERROR;
+ if (src & DNOLD_IS_RED)
+ dst |= DN_IS_RED;
+ if (src & DNOLD_IS_GENTLE_RED)
+ dst |= DN_IS_GENTLE_RED;
+ if (src & DNOLD_HAS_PROFILE)
+ dst |= DN_HAS_PROFILE;
+
+ return dst;
+}
+
+static int
+convertflags2old(int src)
+{
+ int dst = 0;
+
+ if (src & DN_HAVE_MASK)
+ dst |= DNOLD_HAVE_FLOW_MASK;
+ if (src & DN_IS_RED)
+ dst |= DNOLD_IS_RED;
+ if (src & DN_IS_GENTLE_RED)
+ dst |= DNOLD_IS_GENTLE_RED;
+ if (src & DN_NOERROR)
+ dst |= DNOLD_NOERROR;
+ if (src & DN_HAS_PROFILE)
+ dst |= DNOLD_HAS_PROFILE;
+ if (src & DN_QSIZE_BYTES)
+ dst |= DNOLD_QSIZE_IS_BYTES;
+
+ return dst;
+}
+
+static int
+dn_compat_del(void *v)
+{
+ struct dn_pipe7 *p = (struct dn_pipe7 *) v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *) v;
+ struct {
+ struct dn_id oid;
+ uintptr_t a[1]; /* add more if we want a list */
+ } cmd;
+
+ /* XXX DN_API_VERSION ??? */
+ oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
+
+ if (is7) {
+ if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
+ return EINVAL;
+ if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
+ return EINVAL;
+ } else {
+ if (p8->pipe_nr == 0 && p8->fs.fs_nr == 0)
+ return EINVAL;
+ if (p8->pipe_nr != 0 && p8->fs.fs_nr != 0)
+ return EINVAL;
+ }
+
+ if (p->pipe_nr != 0) { /* pipe x delete */
+ cmd.a[0] = p->pipe_nr;
+ cmd.oid.subtype = DN_LINK;
+ } else { /* queue x delete */
+ cmd.oid.subtype = DN_FS;
+ cmd.a[0] = (is7) ? p->fs.fs_nr : p8->fs.fs_nr;
+ }
+
+ return do_config(&cmd, cmd.oid.len);
+}
+
+static int
+dn_compat_config_queue(struct dn_fs *fs, void* v)
+{
+ struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+ struct dn_flow_set *f;
+
+ if (is7)
+ f = &p7->fs;
+ else
+ f = &p8->fs;
+
+ fs->fs_nr = f->fs_nr;
+ fs->sched_nr = f->parent_nr;
+ fs->flow_mask = f->flow_mask;
+ fs->buckets = f->rq_size;
+ fs->qsize = f->qsize;
+ fs->plr = f->plr;
+ fs->par[0] = f->weight;
+ fs->flags = convertflags2new(f->flags_fs);
+ if (fs->flags & DN_IS_GENTLE_RED || fs->flags & DN_IS_RED) {
+ fs->w_q = f->w_q;
+ fs->max_th = f->max_th;
+ fs->min_th = f->min_th;
+ fs->max_p = f->max_p;
+ }
+
+ return 0;
+}
+
+static int
+dn_compat_config_pipe(struct dn_sch *sch, struct dn_link *p,
+ struct dn_fs *fs, void* v)
+{
+ struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+ int i = p7->pipe_nr;
+
+ sch->sched_nr = i;
+ sch->oid.subtype = 0;
+ p->link_nr = i;
+ fs->fs_nr = i + 2*DN_MAX_ID;
+ fs->sched_nr = i + DN_MAX_ID;
+
+ /* Common to 7 and 8 */
+ p->bandwidth = p7->bandwidth;
+ p->delay = p7->delay;
+ if (!is7) {
+ /* FreeBSD 8 has burst */
+ p->burst = p8->burst;
+ }
+
+ /* fill the fifo flowset */
+ dn_compat_config_queue(fs, v);
+ fs->fs_nr = i + 2*DN_MAX_ID;
+ fs->sched_nr = i + DN_MAX_ID;
+
+ /* Move scheduler related parameter from fs to sch */
+ sch->buckets = fs->buckets; /*XXX*/
+ fs->buckets = 0;
+ if (fs->flags & DN_HAVE_MASK) {
+ sch->flags |= DN_HAVE_MASK;
+ fs->flags &= ~DN_HAVE_MASK;
+ sch->sched_mask = fs->flow_mask;
+ bzero(&fs->flow_mask, sizeof(struct ipfw_flow_id));
+ }
+
+ return 0;
+}
+
+static int
+dn_compat_config_profile(struct dn_profile *pf, struct dn_link *p,
+ void *v)
+{
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+
+ p8->samples = &(((struct dn_pipe_max8 *)p8)->samples[0]);
+
+ pf->link_nr = p->link_nr;
+ pf->loss_level = p8->loss_level;
+// pf->bandwidth = p->bandwidth; //XXX bandwidth redundant?
+ pf->samples_no = p8->samples_no;
+ strncpy(pf->name, p8->name,sizeof(pf->name));
+ bcopy(p8->samples, pf->samples, sizeof(pf->samples));
+
+ return 0;
+}
+
+/*
+ * If p->pipe_nr != 0 the command is 'pipe x config', so need to create
+ * the three main struct, else only a flowset is created
+ */
+static int
+dn_compat_configure(void *v)
+{
+ struct dn_id *buf = NULL, *base;
+ struct dn_sch *sch = NULL;
+ struct dn_link *p = NULL;
+ struct dn_fs *fs = NULL;
+ struct dn_profile *pf = NULL;
+ int lmax;
+ int error;
+
+ struct dn_pipe7 *p7 = (struct dn_pipe7 *)v;
+ struct dn_pipe8 *p8 = (struct dn_pipe8 *)v;
+
+ int i; /* number of object to configure */
+
+ lmax = sizeof(struct dn_id); /* command header */
+ lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
+ sizeof(struct dn_fs) + sizeof(struct dn_profile);
+
+ base = buf = malloc(lmax, M_DUMMYNET, M_WAIT|M_ZERO);
+ o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
+ base->id = DN_API_VERSION;
+
+ /* pipe_nr is the same in p7 and p8 */
+ i = p7->pipe_nr;
+ if (i != 0) { /* pipe config */
+ sch = o_next(&buf, sizeof(*sch), DN_SCH);
+ p = o_next(&buf, sizeof(*p), DN_LINK);
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+
+ error = dn_compat_config_pipe(sch, p, fs, v);
+ if (error) {
+ free(buf, M_DUMMYNET);
+ return error;
+ }
+ if (!is7 && p8->samples_no > 0) {
+ /* Add profiles*/
+ pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
+ error = dn_compat_config_profile(pf, p, v);
+ if (error) {
+ free(buf, M_DUMMYNET);
+ return error;
+ }
+ }
+ } else { /* queue config */
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+ error = dn_compat_config_queue(fs, v);
+ if (error) {
+ free(buf, M_DUMMYNET);
+ return error;
+ }
+ }
+ error = do_config(base, (char *)buf - (char *)base);
+
+ if (buf)
+ free(buf, M_DUMMYNET);
+ return error;
+}
+
+int
+dn_compat_calc_size(void)
+{
+ int need = 0;
+ /* XXX use FreeBSD 8 struct size */
+ /* NOTE:
+ * - half scheduler: schk_count/2
+ * - all flowset: fsk_count
+ * - all flowset queues: queue_count
+ * - all pipe queue: si_count
+ */
+ need += dn_cfg.schk_count * sizeof(struct dn_pipe8) / 2;
+ need += dn_cfg.fsk_count * sizeof(struct dn_flow_set);
+ need += dn_cfg.si_count * sizeof(struct dn_flow_queue8);
+ need += dn_cfg.queue_count * sizeof(struct dn_flow_queue8);
+
+ return need;
+}
+
+int
+dn_c_copy_q (void *_ni, void *arg)
+{
+ struct copy_args *a = arg;
+ struct dn_flow_queue7 *fq7 = (struct dn_flow_queue7 *)*a->start;
+ struct dn_flow_queue8 *fq8 = (struct dn_flow_queue8 *)*a->start;
+ struct dn_flow *ni = (struct dn_flow *)_ni;
+ int size = 0;
+
+ /* XXX hash slot not set */
+ /* No difference between 7.2/8 */
+ fq7->len = ni->length;
+ fq7->len_bytes = ni->len_bytes;
+ fq7->id = ni->fid;
+
+ if (is7) {
+ size = sizeof(struct dn_flow_queue7);
+ fq7->tot_pkts = ni->tot_pkts;
+ fq7->tot_bytes = ni->tot_bytes;
+ fq7->drops = ni->drops;
+ } else {
+ size = sizeof(struct dn_flow_queue8);
+ fq8->tot_pkts = ni->tot_pkts;
+ fq8->tot_bytes = ni->tot_bytes;
+ fq8->drops = ni->drops;
+ }
+
+ *a->start += size;
+ return 0;
+}
+
+int
+dn_c_copy_pipe(struct dn_schk *s, struct copy_args *a, int nq)
+{
+ struct dn_link *l = &s->link;
+ struct dn_fsk *f = s->fs;
+
+ struct dn_pipe7 *pipe7 = (struct dn_pipe7 *)*a->start;
+ struct dn_pipe8 *pipe8 = (struct dn_pipe8 *)*a->start;
+ struct dn_flow_set *fs;
+ int size = 0;
+
+ if (is7) {
+ fs = &pipe7->fs;
+ size = sizeof(struct dn_pipe7);
+ } else {
+ fs = &pipe8->fs;
+ size = sizeof(struct dn_pipe8);
+ }
+
+ /* These 4 field are the same in pipe7 and pipe8 */
+ pipe7->next.sle_next = (struct dn_pipe7 *)DN_IS_PIPE;
+ pipe7->bandwidth = l->bandwidth;
+ pipe7->delay = l->delay;
+ pipe7->pipe_nr = l->link_nr - DN_MAX_ID;
+
+ if (!is7) {
+ if (s->profile) {
+ struct dn_profile *pf = s->profile;
+ strncpy(pipe8->name, pf->name, sizeof(pf->name));
+ pipe8->loss_level = pf->loss_level;
+ pipe8->samples_no = pf->samples_no;
+ }
+ pipe8->burst = div64(l->burst , 8 * hz);
+ }
+
+ fs->flow_mask = s->sch.sched_mask;
+ fs->rq_size = s->sch.buckets ? s->sch.buckets : 1;
+
+ fs->parent_nr = l->link_nr - DN_MAX_ID;
+ fs->qsize = f->fs.qsize;
+ fs->plr = f->fs.plr;
+ fs->w_q = f->fs.w_q;
+ fs->max_th = f->max_th;
+ fs->min_th = f->min_th;
+ fs->max_p = f->fs.max_p;
+ fs->rq_elements = nq;
+
+ fs->flags_fs = convertflags2old(f->fs.flags);
+
+ *a->start += size;
+ return 0;
+}
+
+
+int
+dn_compat_copy_pipe(struct copy_args *a, void *_o)
+{
+ int have = a->end - *a->start;
+ int need = 0;
+ int pipe_size = sizeof(struct dn_pipe8);
+ int queue_size = sizeof(struct dn_flow_queue8);
+ int n_queue = 0; /* number of queues */
+
+ struct dn_schk *s = (struct dn_schk *)_o;
+ /* calculate needed space:
+ * - struct dn_pipe
+ * - if there are instances, dn_queue * n_instances
+ */
+ n_queue = (s->sch.flags & DN_HAVE_MASK ? dn_ht_entries(s->siht) :
+ (s->siht ? 1 : 0));
+ need = pipe_size + queue_size * n_queue;
+ if (have < need) {
+ D("have %d < need %d", have, need);
+ return 1;
+ }
+ /* copy pipe */
+ dn_c_copy_pipe(s, a, n_queue);
+
+ /* copy queues */
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, dn_c_copy_q, a);
+ else if (s->siht)
+ dn_c_copy_q(s->siht, a);
+ return 0;
+}
+
+int
+dn_c_copy_fs(struct dn_fsk *f, struct copy_args *a, int nq)
+{
+ struct dn_flow_set *fs = (struct dn_flow_set *)*a->start;
+
+ fs->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
+ fs->fs_nr = f->fs.fs_nr;
+ fs->qsize = f->fs.qsize;
+ fs->plr = f->fs.plr;
+ fs->w_q = f->fs.w_q;
+ fs->max_th = f->max_th;
+ fs->min_th = f->min_th;
+ fs->max_p = f->fs.max_p;
+ fs->flow_mask = f->fs.flow_mask;
+ fs->rq_elements = nq;
+ fs->rq_size = (f->fs.buckets ? f->fs.buckets : 1);
+ fs->parent_nr = f->fs.sched_nr;
+ fs->weight = f->fs.par[0];
+
+ fs->flags_fs = convertflags2old(f->fs.flags);
+ *a->start += sizeof(struct dn_flow_set);
+ return 0;
+}
+
+int
+dn_compat_copy_queue(struct copy_args *a, void *_o)
+{
+ int have = a->end - *a->start;
+ int need = 0;
+ int fs_size = sizeof(struct dn_flow_set);
+ int queue_size = sizeof(struct dn_flow_queue8);
+
+ struct dn_fsk *fs = (struct dn_fsk *)_o;
+ int n_queue = 0; /* number of queues */
+
+ n_queue = (fs->fs.flags & DN_HAVE_MASK ? dn_ht_entries(fs->qht) :
+ (fs->qht ? 1 : 0));
+
+ need = fs_size + queue_size * n_queue;
+ if (have < need) {
+ D("have < need");
+ return 1;
+ }
+
+ /* copy flowset */
+ dn_c_copy_fs(fs, a, n_queue);
+
+ /* copy queues */
+ if (fs->fs.flags & DN_HAVE_MASK)
+ dn_ht_scan(fs->qht, dn_c_copy_q, a);
+ else if (fs->qht)
+ dn_c_copy_q(fs->qht, a);
+
+ return 0;
+}
+
+int
+copy_data_helper_compat(void *_o, void *_arg)
+{
+ struct copy_args *a = _arg;
+
+ if (a->type == DN_COMPAT_PIPE) {
+ struct dn_schk *s = _o;
+ if (s->sch.oid.subtype != 1 || s->sch.sched_nr <= DN_MAX_ID) {
+ return 0; /* not old type */
+ }
+ /* copy pipe parameters, and if instance exists, copy
+ * other parameters and eventually queues.
+ */
+ if(dn_compat_copy_pipe(a, _o))
+ return DNHT_SCAN_END;
+ } else if (a->type == DN_COMPAT_QUEUE) {
+ struct dn_fsk *fs = _o;
+ if (fs->fs.fs_nr >= DN_MAX_ID)
+ return 0;
+ if (dn_compat_copy_queue(a, _o))
+ return DNHT_SCAN_END;
+ }
+ return 0;
+}
+
+/* Main function to manage old requests */
+int
+ip_dummynet_compat(struct sockopt *sopt)
+{
+ int error=0;
+ void *v = NULL;
+ struct dn_id oid;
+
+ /* Lenght of data, used to found ipfw version... */
+ int len = sopt->sopt_valsize;
+
+ /* len can be 0 if command was dummynet_flush */
+ if (len == pipesize7) {
+ D("setting compatibility with FreeBSD 7.2");
+ is7 = 1;
+ }
+ else if (len == pipesize8 || len == pipesizemax8) {
+ D("setting compatibility with FreeBSD 8");
+ is7 = 0;
+ }
+
+ switch (sopt->sopt_name) {
+ default:
+ printf("dummynet: -- unknown option %d", sopt->sopt_name);
+ error = EINVAL;
+ break;
+
+ case IP_DUMMYNET_FLUSH:
+ oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
+ do_config(&oid, oid.len);
+ break;
+
+ case IP_DUMMYNET_DEL:
+ v = malloc(len, M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, v, len, len);
+ if (error)
+ break;
+ error = dn_compat_del(v);
+ free(v, M_DUMMYNET);
+ break;
+
+ case IP_DUMMYNET_CONFIGURE:
+ v = malloc(len, M_TEMP, M_WAITOK);
+ error = sooptcopyin(sopt, v, len, len);
+ if (error)
+ break;
+ error = dn_compat_configure(v);
+ free(v, M_DUMMYNET);
+ break;
+
+ case IP_DUMMYNET_GET: {
+ void *buf;
+ int ret;
+ int original_size = sopt->sopt_valsize;
+ int size;
+
+ ret = dummynet_get(sopt, &buf);
+ if (ret)
+ return 0;//XXX ?
+ size = sopt->sopt_valsize;
+ sopt->sopt_valsize = original_size;
+ D("size=%d, buf=%p", size, buf);
+ ret = sooptcopyout(sopt, buf, size);
+ if (ret)
+ printf(" %s ERROR sooptcopyout\n", __FUNCTION__);
+ if (buf)
+ free(buf, M_DUMMYNET);
+ }
+ }
+
+ return error;
+}
+
+
--- /dev/null
+/*-
+ * Copyright (c) 2010 Luigi Rizzo, Riccardo Panicucci, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Dummynet portions related to packet handling.
+ */
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c 203321 2010-01-31 21:39:25Z luigi $");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/priv.h>
+#include <sys/proc.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/sysctl.h>
+#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
+#include <net/netisr.h>
+#include <netinet/in.h>
+#include <netinet/ip.h> /* ip_len, ip_off */
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
+#include <netinet/ip_fw.h>
+#include <netinet/ipfw/ip_fw_private.h>
+#include <netinet/ipfw/dn_heap.h>
+#include <netinet/ip_dummynet.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+
+#include <netinet/if_ether.h> /* various ether_* routines */
+
+#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
+#include <netinet6/ip6_var.h>
+
+/*
+ * We keep a private variable for the simulation time, but we could
+ * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
+ * instead of dn_cfg.curr_time
+ */
+
+struct dn_parms dn_cfg;
+
+static long tick_last; /* Last tick duration (usec). */
+static long tick_delta; /* Last vs standard tick diff (usec). */
+static long tick_delta_sum; /* Accumulated tick difference (usec).*/
+static long tick_adjustment; /* Tick adjustments done. */
+static long tick_lost; /* Lost(coalesced) ticks number. */
+/* Adjusted vs non-adjusted curr_time difference (ticks). */
+static long tick_diff;
+
+static unsigned long io_pkt;
+static unsigned long io_pkt_fast;
+static unsigned long io_pkt_drop;
+
+/*
+ * We use a heap to store entities for which we have pending timer events.
+ * The heap is checked at every tick and all entities with expired events
+ * are extracted.
+ */
+
+MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
+
+extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+
+#ifdef SYSCTL_NODE
+
+SYSBEGIN(f4)
+
+SYSCTL_DECL(_net_inet);
+SYSCTL_DECL(_net_inet_ip);
+SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+
+/* parameters */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
+ CTLFLAG_RW, &dn_cfg.hash_size, 0, "Default hash table size");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
+ CTLFLAG_RW, &dn_cfg.slot_limit, 0,
+ "Upper limit in slots for pipe queue.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
+ CTLFLAG_RW, &dn_cfg.byte_limit, 0,
+ "Upper limit in bytes for pipe queue.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
+ CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io.");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
+ CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
+ CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
+ CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes");
+
+/* RED parameters */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
+ CTLFLAG_RD, &dn_cfg.red_lookup_depth, 0, "Depth of RED lookup table");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
+ CTLFLAG_RD, &dn_cfg.red_avg_pkt_size, 0, "RED Medium packet size");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
+ CTLFLAG_RD, &dn_cfg.red_max_pkt_size, 0, "RED Max packet size");
+
+/* time adjustment */
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
+ CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
+ CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
+ CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
+ CTLFLAG_RD, &tick_diff, 0,
+ "Adjusted vs non-adjusted curr_time difference (ticks).");
+SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
+ CTLFLAG_RD, &tick_lost, 0,
+ "Number of ticks coalesced by dummynet taskqueue.");
+
+/* statistics */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
+ CTLFLAG_RD, &dn_cfg.schk_count, 0, "Number of schedulers");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
+ CTLFLAG_RD, &dn_cfg.si_count, 0, "Number of scheduler instances");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
+ CTLFLAG_RD, &dn_cfg.fsk_count, 0, "Number of flowsets");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
+ CTLFLAG_RD, &dn_cfg.queue_count, 0, "Number of queues");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
+ CTLFLAG_RD, &io_pkt, 0,
+ "Number of packets passed to dummynet.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
+ CTLFLAG_RD, &io_pkt_fast, 0,
+ "Number of packets bypassed dummynet scheduler.");
+SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
+ CTLFLAG_RD, &io_pkt_drop, 0,
+ "Number of packets dropped by dummynet.");
+
+SYSEND
+
+#endif
+
+static void dummynet_send(struct mbuf *);
+
+/*
+ * Packets processed by dummynet have an mbuf tag associated with
+ * them that carries their dummynet state.
+ * Outside dummynet, only the 'rule' field is relevant, and it must
+ * be at the beginning of the structure.
+ */
+struct dn_pkt_tag {
+ struct ipfw_rule_ref rule; /* matching rule */
+
+ /* second part, dummynet specific */
+ int dn_dir; /* action when packet comes out.*/
+ /* see ip_fw_private.h */
+ uint64_t output_time; /* when the pkt is due for delivery*/
+ struct ifnet *ifp; /* interface, for ip_output */
+ struct _ip6dn_args ip6opt; /* XXX ipv6 options */
+};
+
+/*
+ * Return the mbuf tag holding the dummynet state (it should
+ * be the first one on the list).
+ */
+static struct dn_pkt_tag *
+dn_tag_get(struct mbuf *m)
+{
+ struct m_tag *mtag = m_tag_first(m);
+ KASSERT(mtag != NULL &&
+ mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
+ mtag->m_tag_id == PACKET_TAG_DUMMYNET,
+ ("packet on dummynet queue w/o dummynet tag!"));
+ return (struct dn_pkt_tag *)(mtag+1);
+}
+
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+ if (q->head == NULL)
+ q->head = m;
+ else
+ q->tail->m_nextpkt = m;
+ q->tail = m;
+ m->m_nextpkt = NULL;
+}
+
+/*
+ * Dispose a list of packet. Use a functions so if we need to do
+ * more work, this is a central point to do it.
+ */
+void dn_free_pkts(struct mbuf *mnext)
+{
+ struct mbuf *m;
+
+ while ((m = mnext) != NULL) {
+ mnext = m->m_nextpkt;
+ FREE_PKT(m);
+ }
+}
+
+static int
+red_drops (struct dn_queue *q, int len)
+{
+ /*
+ * RED algorithm
+ *
+ * RED calculates the average queue size (avg) using a low-pass filter
+ * with an exponential weighted (w_q) moving average:
+ * avg <- (1-w_q) * avg + w_q * q_size
+ * where q_size is the queue length (measured in bytes or * packets).
+ *
+ * If q_size == 0, we compute the idle time for the link, and set
+ * avg = (1 - w_q)^(idle/s)
+ * where s is the time needed for transmitting a medium-sized packet.
+ *
+ * Now, if avg < min_th the packet is enqueued.
+ * If avg > max_th the packet is dropped. Otherwise, the packet is
+ * dropped with probability P function of avg.
+ */
+
+ struct dn_fsk *fs = q->fs;
+ int64_t p_b = 0;
+
+ /* Queue in bytes or packets? */
+ uint32_t q_size = (fs->fs.flags & DN_QSIZE_BYTES) ?
+ q->ni.len_bytes : q->ni.length;
+
+ /* Average queue size estimation. */
+ if (q_size != 0) {
+ /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
+ int diff = SCALE(q_size) - q->avg;
+ int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
+
+ q->avg += (int)v;
+ } else {
+ /*
+ * Queue is empty, find for how long the queue has been
+ * empty and use a lookup table for computing
+ * (1 - * w_q)^(idle_time/s) where s is the time to send a
+ * (small) packet.
+ * XXX check wraps...
+ */
+ if (q->avg) {
+ u_int t = div64((dn_cfg.curr_time - q->q_time), fs->lookup_step);
+
+ q->avg = (t < fs->lookup_depth) ?
+ SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
+ }
+ }
+
+ /* Should i drop? */
+ if (q->avg < fs->min_th) {
+ q->count = -1;
+ return (0); /* accept packet */
+ }
+ if (q->avg >= fs->max_th) { /* average queue >= max threshold */
+ if (fs->fs.flags & DN_IS_GENTLE_RED) {
+ /*
+ * According to Gentle-RED, if avg is greater than
+ * max_th the packet is dropped with a probability
+ * p_b = c_3 * avg - c_4
+ * where c_3 = (1 - max_p) / max_th
+ * c_4 = 1 - 2 * max_p
+ */
+ p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
+ fs->c_4;
+ } else {
+ q->count = -1;
+ return (1);
+ }
+ } else if (q->avg > fs->min_th) {
+ /*
+ * We compute p_b using the linear dropping function
+ * p_b = c_1 * avg - c_2
+ * where c_1 = max_p / (max_th - min_th)
+ * c_2 = max_p * min_th / (max_th - min_th)
+ */
+ p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
+ }
+
+ if (fs->fs.flags & DN_QSIZE_BYTES)
+ p_b = div64((p_b * len) , fs->max_pkt_size);
+ if (++q->count == 0)
+ q->random = random() & 0xffff;
+ else {
+ /*
+ * q->count counts packets arrived since last drop, so a greater
+ * value of q->count means a greater packet drop probability.
+ */
+ if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
+ q->count = 0;
+ /* After a drop we calculate a new random value. */
+ q->random = random() & 0xffff;
+ return (1); /* drop */
+ }
+ }
+ /* End of RED algorithm. */
+
+ return (0); /* accept */
+
+}
+
+/*
+ * Enqueue a packet in q, subject to space and queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+int
+dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
+{
+ struct dn_fs *f;
+ struct dn_flow *ni; /* stats for scheduler instance */
+ uint64_t len;
+
+ if (q->fs == NULL || q->_si == NULL) {
+ printf("%s fs %p si %p, dropping\n",
+ __FUNCTION__, q->fs, q->_si);
+ FREE_PKT(m);
+ return 1;
+ }
+ f = &(q->fs->fs);
+ ni = &q->_si->ni;
+ len = m->m_pkthdr.len;
+ /* Update statistics, then check reasons to drop pkt. */
+ q->ni.tot_bytes += len;
+ q->ni.tot_pkts++;
+ ni->tot_bytes += len;
+ ni->tot_pkts++;
+ if (drop)
+ goto drop;
+ if (f->plr && random() < f->plr)
+ goto drop;
+ if (f->flags & DN_IS_RED && red_drops(q, m->m_pkthdr.len))
+ goto drop;
+ if (f->flags & DN_QSIZE_BYTES) {
+ if (q->ni.len_bytes > f->qsize)
+ goto drop;
+ } else if (q->ni.length >= f->qsize) {
+ goto drop;
+ }
+ mq_append(&q->mq, m);
+ q->ni.length++;
+ q->ni.len_bytes += len;
+ ni->length++;
+ ni->len_bytes += len;
+ return 0;
+
+drop:
+ io_pkt_drop++;
+ q->ni.drops++;
+ ni->drops++;
+ FREE_PKT(m);
+ return 1;
+}
+
+/*
+ * Fetch packets from the delay line which are due now. If there are
+ * leftover packets, reinsert the delay line in the heap.
+ * Runs under scheduler lock.
+ */
+static void
+transmit_event(struct mq *q, struct delay_line *dline, uint64_t now)
+{
+ struct mbuf *m;
+ struct dn_pkt_tag *pkt = NULL;
+
+ dline->oid.subtype = 0; /* not in heap */
+ while ((m = dline->mq.head) != NULL) {
+ pkt = dn_tag_get(m);
+ if (!DN_KEY_LEQ(pkt->output_time, now))
+ break;
+ dline->mq.head = m->m_nextpkt;
+ mq_append(q, m);
+ }
+ if (m != NULL) {
+ dline->oid.subtype = 1; /* in heap */
+ heap_insert(&dn_cfg.evheap, pkt->output_time, dline);
+ }
+}
+
+/*
+ * Convert the additional MAC overheads/delays into an equivalent
+ * number of bits for the given data rate. The samples are
+ * in milliseconds so we need to divide by 1000.
+ */
+static uint64_t
+extra_bits(struct mbuf *m, struct dn_schk *s)
+{
+ int index;
+ uint64_t bits;
+ struct dn_profile *pf = s->profile;
+
+ if (!pf || pf->samples_no == 0)
+ return 0;
+ index = random() % pf->samples_no;
+ bits = div64((uint64_t)pf->samples[index] * s->link.bandwidth, 1000);
+ if (index >= pf->loss_level) {
+ struct dn_pkt_tag *dt = dn_tag_get(m);
+ if (dt)
+ dt->dn_dir = DIR_DROP;
+ }
+ return bits;
+}
+
+/*
+ * Send traffic from a scheduler instance due by 'now'.
+ * Return a pointer to the head of the queue.
+ */
+static struct mbuf *
+serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now)
+{
+ struct mq def_q;
+ struct dn_schk *s = si->sched;
+ struct mbuf *m = NULL;
+ int delay_line_idle = (si->dline.mq.head == NULL);
+ int done, bw;
+
+ if (q == NULL) {
+ q = &def_q;
+ q->head = NULL;
+ }
+
+ bw = s->link.bandwidth;
+ si->kflags &= ~DN_ACTIVE;
+
+ if (bw > 0)
+ si->credit += (now - si->sched_time) * bw;
+ else
+ si->credit = 0;
+ si->sched_time = now;
+ done = 0;
+ while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
+ if (m->m_pkthdr.len < 0) {
+ /* Received a packet with negative length.
+ * the scheduler instance will be waken up after
+ * -m->m_pkthdr.len ticks.
+ */
+ si->kflags |= DN_ACTIVE;
+ heap_insert(&dn_cfg.evheap, now - m->m_pkthdr.len, si);
+
+ /* Delete the fake packet */
+ free(m, M_DUMMYNET);
+
+ /* Dont' touch credit, exit from the function */
+ return NULL;
+ } else { /* normal behaviour */
+ uint64_t len_scaled;
+ done++;
+ len_scaled = (bw == 0) ? 0 : hz *
+ (m->m_pkthdr.len * 8 + extra_bits(m, s));
+ si->credit -= len_scaled;
+ /* Move packet in the delay line */
+ dn_tag_get(m)->output_time += s->link.delay ;
+ mq_append(&si->dline.mq, m);
+ }
+ }
+ /*
+ * If credit >= 0 the instance is idle, mark time.
+ * Otherwise put back in the heap, and adjust the output
+ * time of the last inserted packet, m, which was too early.
+ */
+ if (si->credit >= 0) {
+ si->idle_time = now;
+ } else {
+ uint64_t t;
+ KASSERT (bw > 0, ("bw=0 and credit<0 ?"));
+ t = div64(bw - 1 - si->credit, bw);
+ if (m)
+ dn_tag_get(m)->output_time += t;
+ si->kflags |= DN_ACTIVE;
+ heap_insert(&dn_cfg.evheap, now + t, si);
+ }
+ if (delay_line_idle && done)
+ transmit_event(q, &si->dline, now);
+ return q->head;
+}
+
+/*
+ * The timer handler for dummynet. Time is computed in ticks, but
+ * but the code is tolerant to the actual rate at which this is called.
+ * Once complete, the function reschedules itself for the next tick.
+ */
+void
+dummynet_task(void *context, int pending)
+{
+ struct timeval t;
+ struct mq q = { NULL, NULL }; /* queue to accumulate results */
+
+ DN_BH_WLOCK();
+
+ /* Update number of lost(coalesced) ticks. */
+ tick_lost += pending - 1;
+
+ getmicrouptime(&t);
+ /* Last tick duration (usec). */
+ tick_last = (t.tv_sec - dn_cfg.prev_t.tv_sec) * 1000000 +
+ (t.tv_usec - dn_cfg.prev_t.tv_usec);
+ /* Last tick vs standard tick difference (usec). */
+ tick_delta = (tick_last * hz - 1000000) / hz;
+ /* Accumulated tick difference (usec). */
+ tick_delta_sum += tick_delta;
+
+ dn_cfg.prev_t = t;
+
+ /*
+ * Adjust curr_time if the accumulated tick difference is
+ * greater than the 'standard' tick. Since curr_time should
+ * be monotonically increasing, we do positive adjustments
+ * as required, and throttle curr_time in case of negative
+ * adjustment.
+ */
+ dn_cfg.curr_time++;
+ if (tick_delta_sum - tick >= 0) {
+ int diff = tick_delta_sum / tick;
+
+ dn_cfg.curr_time += diff;
+ tick_diff += diff;
+ tick_delta_sum %= tick;
+ tick_adjustment++;
+ } else if (tick_delta_sum + tick <= 0) {
+ dn_cfg.curr_time--;
+ tick_diff--;
+ tick_delta_sum += tick;
+ tick_adjustment++;
+ }
+
+ /* serve pending events, accumulate in q */
+ for (;;) {
+ struct dn_id *p; /* generic parameter to handler */
+
+ if (dn_cfg.evheap.elements == 0 ||
+ DN_KEY_LT(dn_cfg.curr_time, HEAP_TOP(&dn_cfg.evheap)->key))
+ break;
+ p = HEAP_TOP(&dn_cfg.evheap)->object;
+ heap_extract(&dn_cfg.evheap, NULL);
+
+ if (p->type == DN_SCH_I) {
+ serve_sched(&q, (struct dn_sch_inst *)p, dn_cfg.curr_time);
+ } else { /* extracted a delay line */
+ transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
+ }
+ }
+ if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
+ dn_cfg.expire_cycle = 0;
+ dn_drain_scheduler();
+ dn_drain_queue();
+ }
+
+ DN_BH_WUNLOCK();
+ dn_reschedule();
+ if (q.head != NULL)
+ dummynet_send(q.head);
+}
+
+/*
+ * forward a chain of packets to the proper destination.
+ * This runs outside the dummynet lock.
+ */
+static void
+dummynet_send(struct mbuf *m)
+{
+ struct mbuf *n;
+
+ for (; m != NULL; m = n) {
+ struct ifnet *ifp = NULL; /* gcc 3.4.6 complains */
+ struct m_tag *tag;
+ int dst;
+
+ n = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ tag = m_tag_first(m);
+ if (tag == NULL) { /* should not happen */
+ dst = DIR_DROP;
+ } else {
+ struct dn_pkt_tag *pkt = dn_tag_get(m);
+ /* extract the dummynet info, rename the tag
+ * to carry reinject info.
+ */
+ dst = pkt->dn_dir;
+ ifp = pkt->ifp;
+ tag->m_tag_cookie = MTAG_IPFW_RULE;
+ tag->m_tag_id = 0;
+ }
+
+ switch (dst) {
+ case DIR_OUT:
+ SET_HOST_IPLEN(mtod(m, struct ip *));
+ ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
+ break ;
+
+ case DIR_IN :
+ /* put header in network format for ip_input() */
+ //SET_NET_IPLEN(mtod(m, struct ip *));
+ netisr_dispatch(NETISR_IP, m);
+ break;
+
+#ifdef INET6
+ case DIR_IN | PROTO_IPV6:
+ netisr_dispatch(NETISR_IPV6, m);
+ break;
+
+ case DIR_OUT | PROTO_IPV6:
+ SET_HOST_IPLEN(mtod(m, struct ip *));
+ ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
+ break;
+#endif
+
+ case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
+ if (bridge_dn_p != NULL)
+ ((*bridge_dn_p)(m, ifp));
+ else
+ printf("dummynet: if_bridge not loaded\n");
+
+ break;
+
+ case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
+ /*
+ * The Ethernet code assumes the Ethernet header is
+ * contiguous in the first mbuf header.
+ * Insure this is true.
+ */
+ if (m->m_len < ETHER_HDR_LEN &&
+ (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
+ printf("dummynet/ether: pullup failed, "
+ "dropping packet\n");
+ break;
+ }
+ ether_demux(m->m_pkthdr.rcvif, m);
+ break;
+
+ case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
+ ether_output_frame(ifp, m);
+ break;
+
+ case DIR_DROP:
+ /* drop the packet after some time */
+ FREE_PKT(m);
+ break;
+
+ default:
+ printf("dummynet: bad switch %d!\n", dst);
+ FREE_PKT(m);
+ break;
+ }
+ }
+}
+
+static inline int
+tag_mbuf(struct mbuf *m, int dir, struct ip_fw_args *fwa)
+{
+ struct dn_pkt_tag *dt;
+ struct m_tag *mtag;
+
+ mtag = m_tag_get(PACKET_TAG_DUMMYNET,
+ sizeof(*dt), M_NOWAIT | M_ZERO);
+ if (mtag == NULL)
+ return 1; /* Cannot allocate packet header. */
+ m_tag_prepend(m, mtag); /* Attach to mbuf chain. */
+ dt = (struct dn_pkt_tag *)(mtag + 1);
+ dt->rule = fwa->rule;
+ dt->rule.info &= IPFW_ONEPASS; /* only keep this info */
+ dt->dn_dir = dir;
+ dt->ifp = fwa->oif;
+ /* dt->output tame is updated as we move through */
+ dt->output_time = dn_cfg.curr_time;
+ return 0;
+}
+
+
+/*
+ * dummynet hook for packets.
+ * We use the argument to locate the flowset fs and the sched_set sch
+ * associated to it. The we apply flow_mask and sched_mask to
+ * determine the queue and scheduler instances.
+ *
+ * dir where shall we send the packet after dummynet.
+ * *m0 the mbuf with the packet
+ * ifp the 'ifp' parameter from the caller.
+ * NULL in ip_input, destination interface in ip_output,
+ */
+int
+dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
+{
+ struct mbuf *m = *m0;
+ struct dn_fsk *fs = NULL;
+ struct dn_sch_inst *si;
+ struct dn_queue *q = NULL; /* default */
+
+ int fs_id = (fwa->rule.info & IPFW_INFO_MASK) +
+ ((fwa->rule.info & IPFW_IS_PIPE) ? 2*DN_MAX_ID : 0);
+ DN_BH_WLOCK();
+ io_pkt++;
+ /* we could actually tag outside the lock, but who cares... */
+ if (tag_mbuf(m, dir, fwa))
+ goto dropit;
+ if (dn_cfg.busy) {
+ /* if the upper half is busy doing something expensive,
+ * lets queue the packet and move forward
+ */
+ mq_append(&dn_cfg.pending, m);
+ m = *m0 = NULL; /* consumed */
+ goto done; /* already active, nothing to do */
+ }
+ /* XXX locate_flowset could be optimised with a direct ref. */
+ fs = dn_ht_find(dn_cfg.fshash, fs_id, 0, NULL);
+ if (fs == NULL)
+ goto dropit; /* This queue/pipe does not exist! */
+ if (fs->sched == NULL) /* should not happen */
+ goto dropit;
+ /* find scheduler instance, possibly applying sched_mask */
+ si = ipdn_si_find(fs->sched, &(fwa->f_id));
+ if (si == NULL)
+ goto dropit;
+ /*
+ * If the scheduler supports multiple queues, find the right one
+ * (otherwise it will be ignored by enqueue).
+ */
+ if (fs->sched->fp->flags & DN_MULTIQUEUE) {
+ q = ipdn_q_find(fs, si, &(fwa->f_id));
+ if (q == NULL)
+ goto dropit;
+ }
+ if (fs->sched->fp->enqueue(si, q, m)) {
+ printf("%s dropped by enqueue\n", __FUNCTION__);
+ /* packet was dropped by enqueue() */
+ m = *m0 = NULL;
+ goto dropit;
+ }
+
+ if (si->kflags & DN_ACTIVE) {
+ m = *m0 = NULL; /* consumed */
+ goto done; /* already active, nothing to do */
+ }
+
+ /* compute the initial allowance */
+ {
+ struct dn_link *p = &fs->sched->link;
+ si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
+ if (p->burst) {
+ uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
+ if (burst > p->burst)
+ burst = p->burst;
+ si->credit += burst;
+ }
+ }
+ /* pass through scheduler and delay line */
+ m = serve_sched(NULL, si, dn_cfg.curr_time);
+
+ /* optimization -- pass it back to ipfw for immediate send */
+ /* XXX Don't call dummynet_send() if scheduler return the packet
+ * just enqueued. This avoid a lock order reversal.
+ *
+ */
+ if (/*dn_cfg.io_fast &&*/ m == *m0 && (dir & PROTO_LAYER2) == 0 ) {
+ /* fast io, rename the tag * to carry reinject info. */
+ struct m_tag *tag = m_tag_first(m);
+
+ tag->m_tag_cookie = MTAG_IPFW_RULE;
+ tag->m_tag_id = 0;
+ io_pkt_fast++;
+ if (m->m_nextpkt != NULL) {
+ printf("dummynet: fast io: pkt chain detected!\n");
+ m->m_nextpkt = NULL;
+ }
+ m = NULL;
+ } else {
+ *m0 = NULL;
+ }
+done:
+ DN_BH_WUNLOCK();
+ if (m)
+ dummynet_send(m);
+ return 0;
+
+dropit:
+ io_pkt_drop++;
+ DN_BH_WUNLOCK();
+ if (m)
+ FREE_PKT(m);
+ *m0 = NULL;
+ return (fs && (fs->fs.flags & DN_NOERROR)) ? 0 : ENOBUFS;
+}
/*-
- * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
* Portions Copyright (c) 2000 Akamba Corp.
* All rights reserved
*
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_dummynet.c 200601 2009-12-16 10:48:40Z luigi $");
-
-#define DUMMYNET_DEBUG
-
-#include "opt_inet6.h"
+__FBSDID("$FreeBSD: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dummynet.c 203340 2010-02-01 12:06:37Z luigi $");
/*
- * This module implements IP dummynet, a bandwidth limiter/delay emulator
- * used in conjunction with the ipfw package.
- * Description of the data structures used is in ip_dummynet.h
- * Here you mainly find the following blocks of code:
- * + variable declarations;
- * + heap management functions;
- * + scheduler and dummynet functions;
- * + configuration and initialization.
- *
- * NOTA BENE: critical sections are protected by the "dummynet lock".
- *
- * Most important Changes:
- *
- * 011004: KLDable
- * 010124: Fixed WF2Q behaviour
- * 010122: Fixed spl protection.
- * 000601: WF2Q support
- * 000106: large rewrite, use heaps to handle very many pipes.
- * 980513: initial release
- *
- * include files marked with XXX are probably not needed
+ * Configuration and internal object management for dummynet.
*/
+#include "opt_inet6.h"
+
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/time.h>
-#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
-#include <net/netisr.h>
#include <netinet/in.h>
-#include <netinet/ip.h> /* ip_len, ip_off */
+#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
#include <netinet/ip_fw.h>
#include <netinet/ipfw/ip_fw_private.h>
+#include <netinet/ipfw/dn_heap.h>
#include <netinet/ip_dummynet.h>
-#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
-
-#include <netinet/if_ether.h> /* various ether_* routines */
-
-#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */
-#include <netinet6/ip6_var.h>
+#include <netinet/ipfw/ip_dn_private.h>
+#include <netinet/ipfw/dn_sched.h>
+
+/* which objects to copy */
+#define DN_C_LINK 0x01
+#define DN_C_SCH 0x02
+#define DN_C_FLOW 0x04
+#define DN_C_FS 0x08
+#define DN_C_QUEUE 0x10
+
+/* we use this argument in case of a schk_new */
+struct schk_new_arg {
+ struct dn_alg *fp;
+ struct dn_sch *sch;
+};
-/*
- * We keep a private variable for the simulation time, but we could
- * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
- */
-static dn_key curr_time = 0 ; /* current simulation time */
+/*---- callout hooks. ----*/
+static struct callout dn_timeout;
+static struct task dn_task;
+static struct taskqueue *dn_tq = NULL;
-static int dn_hash_size = 64 ; /* default hash size */
+/* dummynet and ipfw_tick can't be static in windows */
+void
+dummynet(void * __unused unused)
+{
-/* statistics on number of queue searches and search steps */
-static long searches, search_steps ;
-static int pipe_expire = 1 ; /* expire queue if empty */
-static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
+ taskqueue_enqueue(dn_tq, &dn_task);
+}
-static long pipe_slot_limit = 100; /* Foot shooting limit for pipe queues. */
-static long pipe_byte_limit = 1024 * 1024;
+void
+dn_reschedule(void)
+{
+ callout_reset(&dn_timeout, 1, dummynet, NULL);
+}
+/*----- end of callout hooks -----*/
-static int red_lookup_depth = 256; /* RED - default lookup table depth */
-static int red_avg_pkt_size = 512; /* RED - default medium packet size */
-static int red_max_pkt_size = 1500; /* RED - default max packet size */
+/* Return a scheduler descriptor given the type or name. */
+static struct dn_alg *
+find_sched_type(int type, char *name)
+{
+ struct dn_alg *d;
-static struct timeval prev_t, t;
-static long tick_last; /* Last tick duration (usec). */
-static long tick_delta; /* Last vs standard tick diff (usec). */
-static long tick_delta_sum; /* Accumulated tick difference (usec).*/
-static long tick_adjustment; /* Tick adjustments done. */
-static long tick_lost; /* Lost(coalesced) ticks number. */
-/* Adjusted vs non-adjusted curr_time difference (ticks). */
-static long tick_diff;
+ SLIST_FOREACH(d, &dn_cfg.schedlist, next) {
+ if (d->type == type || (name && !strcmp(d->name, name)))
+ return d;
+ }
+ return NULL; /* not found */
+}
-static int io_fast;
-static unsigned long io_pkt;
-static unsigned long io_pkt_fast;
-static unsigned long io_pkt_drop;
+int
+ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
+{
+ int oldv = *v;
+ const char *op = NULL;
+ if (oldv < lo) {
+ *v = dflt;
+ op = "Bump";
+ } else if (oldv > hi) {
+ *v = hi;
+ op = "Clamp";
+ } else
+ return *v;
+ if (op && msg)
+ printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+ return *v;
+}
+/*---- flow_id mask, hash and compare functions ---*/
/*
- * Three heaps contain queues and pipes that the scheduler handles:
- *
- * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
- *
- * wfq_ready_heap contains the pipes associated with WF2Q flows
- *
- * extract_heap contains pipes associated with delay lines.
- *
+ * The flow_id includes the 5-tuple, the queue/pipe number
+ * which we store in the extra area in host order,
+ * and for ipv6 also the flow_id6.
+ * XXX see if we want the tos byte (can store in 'flags')
*/
+static struct ipfw_flow_id *
+flow_id_mask(struct ipfw_flow_id *mask, struct ipfw_flow_id *id)
+{
+ int is_v6 = IS_IP6_FLOW_ID(id);
-MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
-
-static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ;
-
-static int heap_init(struct dn_heap *h, int size);
-static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
-static void heap_extract(struct dn_heap *h, void *obj);
-static void transmit_event(struct dn_pipe *pipe, struct mbuf **head,
- struct mbuf **tail);
-static void ready_event(struct dn_flow_queue *q, struct mbuf **head,
- struct mbuf **tail);
-static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head,
- struct mbuf **tail);
-
-#define HASHSIZE 16
-#define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
-static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */
-static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */
-
-static struct callout dn_timeout;
-
-extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
-
-#ifdef SYSCTL_NODE
-SYSCTL_DECL(_net_inet);
-SYSCTL_DECL(_net_inet_ip);
-
-SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
-#if 0 /* curr_time is 64 bit */
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time,
- CTLFLAG_RD, &curr_time, 0, "Current tick");
-#endif
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
- CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
- CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches,
- CTLFLAG_RD, &searches, 0, "Number of queue searches");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps,
- CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
- CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
- CTLFLAG_RW, &dn_max_ratio, 0,
- "Max ratio between dynamic queues and buckets");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
- CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
- CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
- CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
- CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum,
- CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment,
- CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done.");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff,
- CTLFLAG_RD, &tick_diff, 0,
- "Adjusted vs non-adjusted curr_time difference (ticks).");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost,
- CTLFLAG_RD, &tick_lost, 0,
- "Number of ticks coalesced by dummynet taskqueue.");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
- CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
- CTLFLAG_RD, &io_pkt, 0,
- "Number of packets passed to dummynet.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast,
- CTLFLAG_RD, &io_pkt_fast, 0,
- "Number of packets bypassed dummynet scheduler.");
-SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
- CTLFLAG_RD, &io_pkt_drop, 0,
- "Number of packets dropped by dummynet.");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
- CTLFLAG_RW, &pipe_slot_limit, 0, "Upper limit in slots for pipe queue.");
-SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
- CTLFLAG_RW, &pipe_byte_limit, 0, "Upper limit in bytes for pipe queue.");
-#endif
-
-#ifdef DUMMYNET_DEBUG
-int dummynet_debug = 0;
-#ifdef SYSCTL_NODE
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug,
- 0, "control debugging printfs");
-#endif
-#define DPRINTF(X) if (dummynet_debug) printf X
-#else
-#define DPRINTF(X)
-#endif
-
-static struct task dn_task;
-static struct taskqueue *dn_tq = NULL;
-static void dummynet_task(void *, int);
-
-#if defined( __linux__ ) || defined( _WIN32 )
-static DEFINE_SPINLOCK(dummynet_mtx);
-#else
-static struct mtx dummynet_mtx;
-#endif
-#define DUMMYNET_LOCK_INIT() \
- mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF)
-#define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx)
-#define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx)
-#define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx)
-#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED)
-
-static int config_pipe(struct dn_pipe *p);
-static int ip_dn_ctl(struct sockopt *sopt);
-
-static void dummynet(void *);
-static void dummynet_flush(void);
-static void dummynet_send(struct mbuf *);
-void dummynet_drain(void);
-static int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
+ id->dst_port &= mask->dst_port;
+ id->src_port &= mask->src_port;
+ id->proto &= mask->proto;
+ id->extra &= mask->extra;
+ if (is_v6) {
+ APPLY_MASK(&id->dst_ip6, &mask->dst_ip6);
+ APPLY_MASK(&id->src_ip6, &mask->src_ip6);
+ id->flow_id6 &= mask->flow_id6;
+ } else {
+ id->dst_ip &= mask->dst_ip;
+ id->src_ip &= mask->src_ip;
+ }
+ return id;
+}
-/*
- * Flow queue is idle if:
- * 1) it's empty for at least 1 tick
- * 2) it has invalid timestamp (WF2Q case)
- * 3) parent pipe has no 'exhausted' burst.
- */
-#define QUEUE_IS_IDLE(q) ((q)->head == NULL && (q)->S == (q)->F + 1 && \
- curr_time > (q)->idle_time + 1 && \
- ((q)->numbytes + (curr_time - (q)->idle_time - 1) * \
- (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst))
+/* computes an OR of two masks, result in dst and also returned */
+static struct ipfw_flow_id *
+flow_id_or(struct ipfw_flow_id *src, struct ipfw_flow_id *dst)
+{
+ int is_v6 = IS_IP6_FLOW_ID(dst);
-/*
- * Heap management functions.
- *
- * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
- * Some macros help finding parent/children so we can optimize them.
- *
- * heap_init() is called to expand the heap when needed.
- * Increment size in blocks of 16 entries.
- * XXX failure to allocate a new element is a pretty bad failure
- * as we basically stall a whole queue forever!!
- * Returns 1 on error, 0 on success
- */
-#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
-#define HEAP_LEFT(x) ( 2*(x) + 1 )
-#define HEAP_IS_LEFT(x) ( (x) & 1 )
-#define HEAP_RIGHT(x) ( 2*(x) + 2 )
-#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
-#define HEAP_INCREMENT 15
+ dst->dst_port |= src->dst_port;
+ dst->src_port |= src->src_port;
+ dst->proto |= src->proto;
+ dst->extra |= src->extra;
+ if (is_v6) {
+#define OR_MASK(_d, _s) \
+ (_d)->__u6_addr.__u6_addr32[0] |= (_s)->__u6_addr.__u6_addr32[0]; \
+ (_d)->__u6_addr.__u6_addr32[1] |= (_s)->__u6_addr.__u6_addr32[1]; \
+ (_d)->__u6_addr.__u6_addr32[2] |= (_s)->__u6_addr.__u6_addr32[2]; \
+ (_d)->__u6_addr.__u6_addr32[3] |= (_s)->__u6_addr.__u6_addr32[3];
+ OR_MASK(&dst->dst_ip6, &src->dst_ip6);
+ OR_MASK(&dst->src_ip6, &src->src_ip6);
+#undef OR_MASK
+ dst->flow_id6 |= src->flow_id6;
+ } else {
+ dst->dst_ip |= src->dst_ip;
+ dst->src_ip |= src->src_ip;
+ }
+ return dst;
+}
static int
-heap_init(struct dn_heap *h, int new_size)
+nonzero_mask(struct ipfw_flow_id *m)
{
- struct dn_heap_entry *p;
+ if (m->dst_port || m->src_port || m->proto || m->extra)
+ return 1;
+ if (IS_IP6_FLOW_ID(m)) {
+ return
+ m->dst_ip6.__u6_addr.__u6_addr32[0] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[1] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[2] ||
+ m->dst_ip6.__u6_addr.__u6_addr32[3] ||
+ m->src_ip6.__u6_addr.__u6_addr32[0] ||
+ m->src_ip6.__u6_addr.__u6_addr32[1] ||
+ m->src_ip6.__u6_addr.__u6_addr32[2] ||
+ m->src_ip6.__u6_addr.__u6_addr32[3] ||
+ m->flow_id6;
+ } else {
+ return m->dst_ip || m->src_ip;
+ }
+}
- if (h->size >= new_size ) {
- printf("dummynet: %s, Bogus call, have %d want %d\n", __func__,
- h->size, new_size);
- return 0 ;
- }
- new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
- p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT);
- if (p == NULL) {
- printf("dummynet: %s, resize %d failed\n", __func__, new_size );
- return 1 ; /* error */
- }
- if (h->size > 0) {
- bcopy(h->p, p, h->size * sizeof(*p) );
- free(h->p, M_DUMMYNET);
+/* XXX we may want a better hash function */
+static uint32_t
+flow_id_hash(struct ipfw_flow_id *id)
+{
+ uint32_t i;
+
+ if (IS_IP6_FLOW_ID(id)) {
+ uint32_t *d = (uint32_t *)&id->dst_ip6;
+ uint32_t *s = (uint32_t *)&id->src_ip6;
+ i = (d[0] ) ^ (d[1]) ^
+ (d[2] ) ^ (d[3]) ^
+ (d[0] >> 15) ^ (d[1] >> 15) ^
+ (d[2] >> 15) ^ (d[3] >> 15) ^
+ (s[0] << 1) ^ (s[1] << 1) ^
+ (s[2] << 1) ^ (s[3] << 1) ^
+ (s[0] << 16) ^ (s[1] << 16) ^
+ (s[2] << 16) ^ (s[3] << 16) ^
+ (id->dst_port << 1) ^ (id->src_port) ^
+ (id->extra) ^
+ (id->proto ) ^ (id->flow_id6);
+ } else {
+ i = (id->dst_ip) ^ (id->dst_ip >> 15) ^
+ (id->src_ip << 1) ^ (id->src_ip >> 16) ^
+ (id->extra) ^
+ (id->dst_port << 1) ^ (id->src_port) ^ (id->proto);
}
- h->p = p ;
- h->size = new_size ;
- return 0 ;
+ return i;
}
-/*
- * Insert element in heap. Normally, p != NULL, we insert p in
- * a new position and bubble up. If p == NULL, then the element is
- * already in place, and key is the position where to start the
- * bubble-up.
- * Returns 1 on failure (cannot allocate new heap entry)
- *
- * If offset > 0 the position (index, int) of the element in the heap is
- * also stored in the element itself at the given offset in bytes.
- */
-#define SET_OFFSET(heap, node) \
- if (heap->offset > 0) \
- *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
-/*
- * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
- */
-#define RESET_OFFSET(heap, node) \
- if (heap->offset > 0) \
- *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
+/* Like bcmp, returns 0 if ids match, 1 otherwise. */
static int
-heap_insert(struct dn_heap *h, dn_key key1, void *p)
-{
- int son = h->elements ;
-
- if (p == NULL) /* data already there, set starting point */
- son = key1 ;
- else { /* insert new element at the end, possibly resize */
- son = h->elements ;
- if (son == h->size) /* need resize... */
- if (heap_init(h, h->elements+1) )
- return 1 ; /* failure... */
- h->p[son].object = p ;
- h->p[son].key = key1 ;
- h->elements++ ;
- }
- while (son > 0) { /* bubble up */
- int father = HEAP_FATHER(son) ;
- struct dn_heap_entry tmp ;
-
- if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
- break ; /* found right position */
- /* son smaller than father, swap and repeat */
- HEAP_SWAP(h->p[son], h->p[father], tmp) ;
- SET_OFFSET(h, son);
- son = father ;
- }
- SET_OFFSET(h, son);
- return 0 ;
+flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2)
+{
+ int is_v6 = IS_IP6_FLOW_ID(id1);
+
+ if (!is_v6) {
+ if (IS_IP6_FLOW_ID(id2))
+ return 1; /* different address families */
+
+ return (id1->dst_ip == id2->dst_ip &&
+ id1->src_ip == id2->src_ip &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
+ id1->extra == id2->extra) ? 0 : 1;
+ }
+ /* the ipv6 case */
+ return (
+ !bcmp(&id1->dst_ip6,&id2->dst_ip6, sizeof(id1->dst_ip6)) &&
+ !bcmp(&id1->src_ip6,&id2->src_ip6, sizeof(id1->src_ip6)) &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
+ id1->extra == id2->extra &&
+ id1->flow_id6 == id2->flow_id6) ? 0 : 1;
}
+/*--------- end of flow-id mask, hash and compare ---------*/
-/*
- * remove top element from heap, or obj if obj != NULL
+/*--- support functions for the qht hashtable ----
+ * Entries are hashed by flow-id
*/
-static void
-heap_extract(struct dn_heap *h, void *obj)
+static uint32_t
+q_hash(uintptr_t key, int flags, void *arg)
{
- int child, father, max = h->elements - 1 ;
+ /* compute the hash slot from the flow id */
+ struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_queue *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
- if (max < 0) {
- printf("dummynet: warning, extract from empty heap 0x%p\n", h);
- return ;
- }
- father = 0 ; /* default: move up smallest child */
- if (obj != NULL) { /* extract specific element, index is at offset */
- if (h->offset <= 0)
- panic("dummynet: heap_extract from middle not supported on this heap!!!\n");
- father = *((int *)((char *)obj + h->offset)) ;
- if (father < 0 || father >= h->elements) {
- printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
- father, h->elements);
- panic("dummynet: heap_extract");
+ return flow_id_hash(id);
+}
+
+static int
+q_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+ struct dn_queue *o = (struct dn_queue *)obj;
+ struct ipfw_flow_id *id2;
+
+ if (flags & DNHT_KEY_IS_OBJ) {
+ /* compare pointers */
+ id2 = &((struct dn_queue *)key)->ni.fid;
+ } else {
+ id2 = (struct ipfw_flow_id *)key;
}
- }
- RESET_OFFSET(h, father);
- child = HEAP_LEFT(father) ; /* left child */
- while (child <= max) { /* valid entry */
- if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
- child = child+1 ; /* take right child, otherwise left */
- h->p[father] = h->p[child] ;
- SET_OFFSET(h, father);
- father = child ;
- child = HEAP_LEFT(child) ; /* left child for next loop */
- }
- h->elements-- ;
- if (father != max) {
- /*
- * Fill hole with last entry and bubble up, reusing the insert code
- */
- h->p[father] = h->p[max] ;
- heap_insert(h, father, NULL); /* this one cannot fail */
- }
+ return (0 == flow_id_cmp(&o->ni.fid, id2));
}
-#if 0
/*
- * change object position and update references
- * XXX this one is never used!
+ * create a new queue instance for the given 'key'.
*/
-static void
-heap_move(struct dn_heap *h, dn_key new_key, void *object)
-{
- int temp;
- int i ;
- int max = h->elements-1 ;
- struct dn_heap_entry buf ;
-
- if (h->offset <= 0)
- panic("cannot move items on this heap");
-
- i = *((int *)((char *)object + h->offset));
- if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
- h->p[i].key = new_key ;
- for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
- i = temp ) { /* bubble up */
- HEAP_SWAP(h->p[i], h->p[temp], buf) ;
- SET_OFFSET(h, i);
- }
- } else { /* must move down */
- h->p[i].key = new_key ;
- while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
- if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
- temp++ ; /* select child with min key */
- if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
- HEAP_SWAP(h->p[i], h->p[temp], buf) ;
- SET_OFFSET(h, i);
- } else
- break ;
- i = temp ;
+static void *
+q_new(uintptr_t key, int flags, void *arg)
+{
+ struct dn_queue *q, *template = arg;
+ struct dn_fsk *fs = template->fs;
+ int size = sizeof(*q) + fs->sched->fp->q_datalen;
+
+ q = malloc(size, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (q == NULL) {
+ D("no memory for new queue");
+ return NULL;
}
- }
- SET_OFFSET(h, i);
+
+ set_oid(&q->ni.oid, DN_QUEUE, size);
+ if (fs->fs.flags & DN_QHT_HASH)
+ q->ni.fid = *(struct ipfw_flow_id *)key;
+ q->fs = fs;
+ q->_si = template->_si;
+ q->_si->q_count++;
+
+ if (fs->sched->fp->new_queue)
+ fs->sched->fp->new_queue(q);
+ dn_cfg.queue_count++;
+ return q;
}
-#endif /* heap_move, unused */
/*
- * heapify() will reorganize data inside an array to maintain the
- * heap property. It is needed when we delete a bunch of entries.
+ * Notify schedulers that a queue is going away.
+ * If (flags & DN_DESTROY), also free the packets.
+ * The version for callbacks is called q_delete_cb().
*/
static void
-heapify(struct dn_heap *h)
+dn_delete_queue(struct dn_queue *q, int flags)
{
- int i ;
+ struct dn_fsk *fs = q->fs;
+
+ // D("fs %p si %p\n", fs, q->_si);
+ /* notify the parent scheduler that the queue is going away */
+ if (fs && fs->sched->fp->free_queue)
+ fs->sched->fp->free_queue(q);
+ q->_si->q_count--;
+ q->_si = NULL;
+ if (flags & DN_DESTROY) {
+ if (q->mq.head)
+ dn_free_pkts(q->mq.head);
+ bzero(q, sizeof(*q)); // safety
+ free(q, M_DUMMYNET);
+ dn_cfg.queue_count--;
+ }
+}
- for (i = 0 ; i < h->elements ; i++ )
- heap_insert(h, i , NULL) ;
+static int
+q_delete_cb(void *q, void *arg)
+{
+ int flags = (int)(uintptr_t)arg;
+ dn_delete_queue(q, flags);
+ return (flags & DN_DESTROY) ? DNHT_SCAN_DEL : 0;
}
/*
- * cleanup the heap and free data structure
+ * calls dn_delete_queue/q_delete_cb on all queues,
+ * which notifies the parent scheduler and possibly drains packets.
+ * flags & DN_DESTROY: drains queues and destroy qht;
*/
static void
-heap_free(struct dn_heap *h)
+qht_delete(struct dn_fsk *fs, int flags)
{
- if (h->size >0 )
- free(h->p, M_DUMMYNET);
- bzero(h, sizeof(*h) );
+ ND("fs %d start flags %d qht %p",
+ fs->fs.fs_nr, flags, fs->qht);
+ if (!fs->qht)
+ return;
+ if (fs->fs.flags & DN_QHT_HASH) {
+ dn_ht_scan(fs->qht, q_delete_cb, (void *)(uintptr_t)flags);
+ if (flags & DN_DESTROY) {
+ dn_ht_free(fs->qht, 0);
+ fs->qht = NULL;
+ }
+ } else {
+ dn_delete_queue((struct dn_queue *)(fs->qht), flags);
+ if (flags & DN_DESTROY)
+ fs->qht = NULL;
+ }
}
/*
- * --- end of heap management functions ---
+ * Find and possibly create the queue for a MULTIQUEUE scheduler.
+ * We never call it for !MULTIQUEUE (the queue is in the sch_inst).
*/
+struct dn_queue *
+ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
+ struct ipfw_flow_id *id)
+{
+ struct dn_queue template;
+
+ template._si = si;
+ template.fs = fs;
+
+ if (fs->fs.flags & DN_QHT_HASH) {
+ struct ipfw_flow_id masked_id;
+ if (fs->qht == NULL) {
+ fs->qht = dn_ht_init(NULL, fs->fs.buckets,
+ offsetof(struct dn_queue, q_next),
+ q_hash, q_match, q_new);
+ if (fs->qht == NULL)
+ return NULL;
+ }
+ masked_id = *id;
+ flow_id_mask(&fs->fsk_mask, &masked_id);
+ return dn_ht_find(fs->qht, (uintptr_t)&masked_id,
+ DNHT_INSERT, &template);
+ } else {
+ if (fs->qht == NULL)
+ fs->qht = q_new(0, 0, &template);
+ return (struct dn_queue *)fs->qht;
+ }
+}
+/*--- end of queue hash table ---*/
-/*
- * Dispose a list of packet. Use an inline functions so if we
- * need to free extra state associated to a packet, this is a
- * central point to do it.
+/*--- support functions for the sch_inst hashtable ----
+ *
+ * These are hashed by flow-id
*/
-
-static __inline void dn_free_pkts(struct mbuf *mnext)
+static uint32_t
+si_hash(uintptr_t key, int flags, void *arg)
{
- struct mbuf *m;
+ /* compute the hash slot from the flow id */
+ struct ipfw_flow_id *id = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_sch_inst *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
- while ((m = mnext) != NULL) {
- mnext = m->m_nextpkt;
- FREE_PKT(m);
- }
+ return flow_id_hash(id);
}
-/*
- * Return the mbuf tag holding the dummynet state. As an optimization
- * this is assumed to be the first tag on the list. If this turns out
- * wrong we'll need to search the list.
- */
-static struct dn_pkt_tag *
-dn_tag_get(struct mbuf *m)
+static int
+si_match(void *obj, uintptr_t key, int flags, void *arg)
{
- struct m_tag *mtag = m_tag_first(m);
- KASSERT(mtag != NULL &&
- mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
- mtag->m_tag_id == PACKET_TAG_DUMMYNET,
- ("packet on dummynet queue w/o dummynet tag!"));
- return (struct dn_pkt_tag *)(mtag+1);
+ struct dn_sch_inst *o = obj;
+ struct ipfw_flow_id *id2;
+
+ id2 = (flags & DNHT_KEY_IS_OBJ) ?
+ &((struct dn_sch_inst *)key)->ni.fid :
+ (struct ipfw_flow_id *)key;
+ return flow_id_cmp(&o->ni.fid, id2) == 0;
}
/*
- * Scheduler functions:
- *
- * transmit_event() is called when the delay-line needs to enter
- * the scheduler, either because of existing pkts getting ready,
- * or new packets entering the queue. The event handled is the delivery
- * time of the packet.
- *
- * ready_event() does something similar with fixed-rate queues, and the
- * event handled is the finish time of the head pkt.
- *
- * wfq_ready_event() does something similar with WF2Q queues, and the
- * event handled is the start time of the head pkt.
- *
- * In all cases, we make sure that the data structures are consistent
- * before passing pkts out, because this might trigger recursive
- * invocations of the procedures.
+ * create a new instance for the given 'key'
+ * Allocate memory for instance, delay line and scheduler private data.
*/
-static void
-transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
+static void *
+si_new(uintptr_t key, int flags, void *arg)
{
- struct mbuf *m;
- struct dn_pkt_tag *pkt;
-
- DUMMYNET_LOCK_ASSERT();
-
- while ((m = pipe->head) != NULL) {
- pkt = dn_tag_get(m);
- if (!DN_KEY_LEQ(pkt->output_time, curr_time))
- break;
-
- pipe->head = m->m_nextpkt;
- if (*tail != NULL)
- (*tail)->m_nextpkt = m;
- else
- *head = m;
- *tail = m;
+ struct dn_schk *s = arg;
+ struct dn_sch_inst *si;
+ int l = sizeof(*si) + s->fp->si_datalen;
+
+ si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (si == NULL)
+ goto error;
+ /* Set length only for the part passed up to userland. */
+ set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
+ set_oid(&(si->dline.oid), DN_DELAY_LINE,
+ sizeof(struct delay_line));
+ /* mark si and dline as outside the event queue */
+ si->ni.oid.id = si->dline.oid.id = -1;
+
+ si->sched = s;
+ si->dline.si = si;
+
+ if (s->fp->new_sched && s->fp->new_sched(si)) {
+ D("new_sched error");
+ goto error;
}
- if (*tail != NULL)
- (*tail)->m_nextpkt = NULL;
+ if (s->sch.flags & DN_HAVE_MASK)
+ si->ni.fid = *(struct ipfw_flow_id *)key;
- /* If there are leftover packets, put into the heap for next event. */
- if ((m = pipe->head) != NULL) {
- pkt = dn_tag_get(m);
- /*
- * XXX Should check errors on heap_insert, by draining the
- * whole pipe p and hoping in the future we are more successful.
- */
- heap_insert(&extract_heap, pkt->output_time, pipe);
+ dn_cfg.si_count++;
+ return si;
+
+error:
+ if (si) {
+ bzero(si, sizeof(*si)); // safety
+ free(si, M_DUMMYNET);
}
+ return NULL;
}
-#ifndef __linux__
-#define div64(a, b) ((int64_t)(a) / (int64_t)(b))
-#endif
/*
- * Compute how many ticks we have to wait before being able to send
- * a packet. This is computed as the "wire time" for the packet
- * (length + extra bits), minus the credit available, scaled to ticks.
- * Check that the result is not be negative (it could be if we have
- * too much leftover credit in q->numbytes).
+ * Callback from siht to delete all scheduler instances. Remove
+ * si and delay line from the system heap, destroy all queues.
+ * We assume that all flowset have been notified and do not
+ * point to us anymore.
*/
-static inline dn_key
-set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p)
+static int
+si_destroy(void *_si, void *arg)
{
- int64_t ret;
-
- ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz
- - q->numbytes + p->bandwidth - 1 , p->bandwidth);
- if (ret < 0)
- ret = 0;
- return ret;
+ struct dn_sch_inst *si = _si;
+ struct dn_schk *s = si->sched;
+ struct delay_line *dl = &si->dline;
+
+ if (dl->oid.subtype) /* remove delay line from event heap */
+ heap_extract(&dn_cfg.evheap, dl);
+ dn_free_pkts(dl->mq.head); /* drain delay line */
+ if (si->kflags & DN_ACTIVE) /* remove si from event heap */
+ heap_extract(&dn_cfg.evheap, si);
+ if (s->fp->free_sched)
+ s->fp->free_sched(si);
+ bzero(si, sizeof(*si)); /* safety */
+ free(si, M_DUMMYNET);
+ dn_cfg.si_count--;
+ return DNHT_SCAN_DEL;
}
/*
- * Convert the additional MAC overheads/delays into an equivalent
- * number of bits for the given data rate. The samples are in milliseconds
- * so we need to divide by 1000.
+ * Find the scheduler instance for this packet. If we need to apply
+ * a mask, do on a local copy of the flow_id to preserve the original.
+ * Assume siht is always initialized if we have a mask.
*/
-static dn_key
-compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p)
+struct dn_sch_inst *
+ipdn_si_find(struct dn_schk *s, struct ipfw_flow_id *id)
{
- int index;
- dn_key extra_bits;
- if (!p->samples || p->samples_no == 0)
- return 0;
- index = random() % p->samples_no;
- extra_bits = div64((dn_key)p->samples[index] * p->bandwidth, 1000);
- if (index >= p->loss_level) {
- struct dn_pkt_tag *dt = dn_tag_get(pkt);
- if (dt)
- dt->dn_dir = DIR_DROP;
+ if (s->sch.flags & DN_HAVE_MASK) {
+ struct ipfw_flow_id id_t = *id;
+ flow_id_mask(&s->sch.sched_mask, &id_t);
+ return dn_ht_find(s->siht, (uintptr_t)&id_t,
+ DNHT_INSERT, s);
}
- return extra_bits;
+ if (!s->siht)
+ s->siht = si_new(0, 0, s);
+ return (struct dn_sch_inst *)s->siht;
}
-static void
-free_pipe(struct dn_pipe *p)
+/* callback to flush credit for the scheduler instance */
+static int
+si_reset_credit(void *_si, void *arg)
{
- if (p->samples)
- free(p->samples, M_DUMMYNET);
- free(p, M_DUMMYNET);
+ struct dn_sch_inst *si = _si;
+ struct dn_link *p = &si->sched->link;
+
+ si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0);
+ return 0;
}
-/*
- * extract pkt from queue, compute output time (could be now)
- * and put into delay line (p_queue)
- */
static void
-move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p,
- int len)
+schk_reset_credit(struct dn_schk *s)
{
- struct dn_pkt_tag *dt = dn_tag_get(pkt);
-
- q->head = pkt->m_nextpkt ;
- q->len-- ;
- q->len_bytes -= len ;
-
- dt->output_time = curr_time + p->delay ;
-
- if (p->head == NULL)
- p->head = pkt;
- else
- p->tail->m_nextpkt = pkt;
- p->tail = pkt;
- p->tail->m_nextpkt = NULL;
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, si_reset_credit, NULL);
+ else if (s->siht)
+ si_reset_credit(s->siht, NULL);
}
+/*---- end of sch_inst hashtable ---------------------*/
-/*
- * ready_event() is invoked every time the queue must enter the
- * scheduler, either because the first packet arrives, or because
- * a previously scheduled event fired.
- * On invokation, drain as many pkts as possible (could be 0) and then
- * if there are leftover packets reinsert the pkt in the scheduler.
+/*-------------------------------------------------------
+ * flowset hash (fshash) support. Entries are hashed by fs_nr.
+ * New allocations are put in the fsunlinked list, from which
+ * they are removed when they point to a specific scheduler.
*/
-static void
-ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
+static uint32_t
+fsk_hash(uintptr_t key, int flags, void *arg)
{
- struct mbuf *pkt;
- struct dn_pipe *p = q->fs->pipe;
- int p_was_empty;
-
- DUMMYNET_LOCK_ASSERT();
+ uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_fsk *)key)->fs.fs_nr;
- if (p == NULL) {
- printf("dummynet: ready_event- pipe is gone\n");
- return;
- }
- p_was_empty = (p->head == NULL);
+ return ( (i>>8)^(i>>4)^i );
+}
- /*
- * Schedule fixed-rate queues linked to this pipe:
- * account for the bw accumulated since last scheduling, then
- * drain as many pkts as allowed by q->numbytes and move to
- * the delay line (in p) computing output time.
- * bandwidth==0 (no limit) means we can drain the whole queue,
- * setting len_scaled = 0 does the job.
- */
- q->numbytes += (curr_time - q->sched_time) * p->bandwidth;
- while ((pkt = q->head) != NULL) {
- int len = pkt->m_pkthdr.len;
- dn_key len_scaled = p->bandwidth ? len*8*hz
- + q->extra_bits*hz
- : 0;
-
- if (DN_KEY_GT(len_scaled, q->numbytes))
- break;
- q->numbytes -= len_scaled;
- move_pkt(pkt, q, p, len);
- if (q->head)
- q->extra_bits = compute_extra_bits(q->head, p);
- }
- /*
- * If we have more packets queued, schedule next ready event
- * (can only occur when bandwidth != 0, otherwise we would have
- * flushed the whole queue in the previous loop).
- * To this purpose we record the current time and compute how many
- * ticks to go for the finish time of the packet.
- */
- if ((pkt = q->head) != NULL) { /* this implies bandwidth != 0 */
- dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */
+static int
+fsk_match(void *obj, uintptr_t key, int flags, void *arg)
+{
+ struct dn_fsk *fs = obj;
+ int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_fsk *)key)->fs.fs_nr;
- q->sched_time = curr_time;
- heap_insert(&ready_heap, curr_time + t, (void *)q);
- /*
- * XXX Should check errors on heap_insert, and drain the whole
- * queue on error hoping next time we are luckier.
- */
- } else /* RED needs to know when the queue becomes empty. */
- q->idle_time = curr_time;
+ return (fs->fs.fs_nr == i);
+}
- /*
- * If the delay line was empty call transmit_event() now.
- * Otherwise, the scheduler will take care of it.
- */
- if (p_was_empty)
- transmit_event(p, head, tail);
+static void *
+fsk_new(uintptr_t key, int flags, void *arg)
+{
+ struct dn_fsk *fs;
+
+ fs = malloc(sizeof(*fs), M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (fs) {
+ set_oid(&fs->fs.oid, DN_FS, sizeof(fs->fs));
+ dn_cfg.fsk_count++;
+ fs->drain_bucket = 0;
+ SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
+ }
+ return fs;
}
/*
- * Called when we can transmit packets on WF2Q queues. Take pkts out of
- * the queues at their start time, and enqueue into the delay line.
- * Packets are drained until p->numbytes < 0. As long as
- * len_scaled >= p->numbytes, the packet goes into the delay line
- * with a deadline p->delay. For the last packet, if p->numbytes < 0,
- * there is an additional delay.
+ * detach flowset from its current scheduler. Flags as follows:
+ * DN_DETACH removes from the fsk_list
+ * DN_DESTROY deletes individual queues
+ * DN_DELETE_FS destroys the flowset (otherwise goes in unlinked).
*/
static void
-ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
+fsk_detach(struct dn_fsk *fs, int flags)
{
- int p_was_empty = (p->head == NULL);
- struct dn_heap *sch = &(p->scheduler_heap);
- struct dn_heap *neh = &(p->not_eligible_heap);
- int64_t p_numbytes = p->numbytes;
-
- /*
- * p->numbytes is only 32bits in FBSD7, but we might need 64 bits.
- * Use a local variable for the computations, and write back the
- * results when done, saturating if needed.
- * The local variable has no impact on performance and helps
- * reducing diffs between the various branches.
- */
-
- DUMMYNET_LOCK_ASSERT();
-
- if (p->if_name[0] == 0) /* tx clock is simulated */
- p_numbytes += (curr_time - p->sched_time) * p->bandwidth;
- else { /*
- * tx clock is for real,
- * the ifq must be empty or this is a NOP.
- */
-#ifdef __linux__
- return;
-#else
- if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
- return;
- else {
- DPRINTF(("dummynet: pipe %d ready from %s --\n",
- p->pipe_nr, p->if_name));
- }
-#endif
+ if (flags & DN_DELETE_FS)
+ flags |= DN_DESTROY;
+ ND("fs %d from sched %d flags %s %s %s",
+ fs->fs.fs_nr, fs->fs.sched_nr,
+ (flags & DN_DELETE_FS) ? "DEL_FS":"",
+ (flags & DN_DESTROY) ? "DEL":"",
+ (flags & DN_DETACH) ? "DET":"");
+ if (flags & DN_DETACH) { /* detach from the list */
+ struct dn_fsk_head *h;
+ h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
+ SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
}
-
- /*
- * While we have backlogged traffic AND credit, we need to do
- * something on the queue.
+ /* Free the RED parameters, they will be recomputed on
+ * subsequent attach if needed.
*/
- while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) {
- if (sch->elements > 0) {
- /* Have some eligible pkts to send out. */
- struct dn_flow_queue *q = sch->p[0].object;
- struct mbuf *pkt = q->head;
- struct dn_flow_set *fs = q->fs;
- uint64_t len = pkt->m_pkthdr.len;
- int len_scaled = p->bandwidth ? len * 8 * hz : 0;
-
- heap_extract(sch, NULL); /* Remove queue from heap. */
- p_numbytes -= len_scaled;
- move_pkt(pkt, q, p, len);
-
- p->V += div64((len << MY_M), p->sum); /* Update V. */
- q->S = q->F; /* Update start time. */
- if (q->len == 0) {
- /* Flow not backlogged any more. */
- fs->backlogged--;
- heap_insert(&(p->idle_heap), q->F, q);
- } else {
- /* Still backlogged. */
-
- /*
- * Update F and position in backlogged queue,
- * then put flow in not_eligible_heap
- * (we will fix this later).
- */
- len = (q->head)->m_pkthdr.len;
- q->F += div64((len << MY_M), fs->weight);
- if (DN_KEY_LEQ(q->S, p->V))
- heap_insert(neh, q->S, q);
- else
- heap_insert(sch, q->F, q);
- }
- }
- /*
- * Now compute V = max(V, min(S_i)). Remember that all elements
- * in sch have by definition S_i <= V so if sch is not empty,
- * V is surely the max and we must not update it. Conversely,
- * if sch is empty we only need to look at neh.
- */
- if (sch->elements == 0 && neh->elements > 0)
- p->V = MAX64(p->V, neh->p[0].key);
- /* Move from neh to sch any packets that have become eligible */
- while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) {
- struct dn_flow_queue *q = neh->p[0].object;
- heap_extract(neh, NULL);
- heap_insert(sch, q->F, q);
- }
-
- if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */
- p_numbytes = -1; /* Mark not ready for I/O. */
- break;
- }
- }
- if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0) {
- p->idle_time = curr_time;
- /*
- * No traffic and no events scheduled.
- * We can get rid of idle-heap.
- */
- if (p->idle_heap.elements > 0) {
- int i;
-
- for (i = 0; i < p->idle_heap.elements; i++) {
- struct dn_flow_queue *q;
-
- q = p->idle_heap.p[i].object;
- q->F = 0;
- q->S = q->F + 1;
- }
- p->sum = 0;
- p->V = 0;
- p->idle_heap.elements = 0;
- }
- }
- /*
- * If we are getting clocks from dummynet (not a real interface) and
- * If we are under credit, schedule the next ready event.
- * Also fix the delivery time of the last packet.
- */
- if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */
- dn_key t = 0; /* Number of ticks i have to wait. */
-
- if (p->bandwidth > 0)
- t = div64(p->bandwidth - 1 - p_numbytes, p->bandwidth);
- dn_tag_get(p->tail)->output_time += t;
- p->sched_time = curr_time;
- heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
- /*
- * XXX Should check errors on heap_insert, and drain the whole
- * queue on error hoping next time we are luckier.
- */
+ if (fs->w_q_lookup)
+ free(fs->w_q_lookup, M_DUMMYNET);
+ fs->w_q_lookup = NULL;
+ qht_delete(fs, flags);
+ if (fs->sched && fs->sched->fp->free_fsk)
+ fs->sched->fp->free_fsk(fs);
+ fs->sched = NULL;
+ if (flags & DN_DELETE_FS) {
+ bzero(fs, sizeof(fs)); /* safety */
+ free(fs, M_DUMMYNET);
+ dn_cfg.fsk_count--;
+ } else {
+ SLIST_INSERT_HEAD(&dn_cfg.fsu, fs, sch_chain);
}
-
- /* Write back p_numbytes (adjust 64->32bit if necessary). */
- p->numbytes = p_numbytes;
-
- /*
- * If the delay line was empty call transmit_event() now.
- * Otherwise, the scheduler will take care of it.
- */
- if (p_was_empty)
- transmit_event(p, head, tail);
}
/*
- * This is called one tick, after previous run. It is used to
- * schedule next run.
+ * Detach or destroy all flowsets in a list.
+ * flags specifies what to do:
+ * DN_DESTROY: flush all queues
+ * DN_DELETE_FS: DN_DESTROY + destroy flowset
+ * DN_DELETE_FS implies DN_DESTROY
*/
static void
-dummynet(void * __unused unused)
+fsk_detach_list(struct dn_fsk_head *h, int flags)
{
-
- taskqueue_enqueue(dn_tq, &dn_task);
+ struct dn_fsk *fs;
+ int n = 0; /* only for stats */
+
+ ND("head %p flags %x", h, flags);
+ while ((fs = SLIST_FIRST(h))) {
+ SLIST_REMOVE_HEAD(h, sch_chain);
+ n++;
+ fsk_detach(fs, flags);
+ }
+ ND("done %d flowsets", n);
}
/*
- * The main dummynet processing function.
+ * called on 'queue X delete' -- removes the flowset from fshash,
+ * deletes all queues for the flowset, and removes the flowset.
*/
-static void
-dummynet_task(void *context, int pending)
+static int
+delete_fs(int i, int locked)
{
- struct mbuf *head = NULL, *tail = NULL;
- struct dn_pipe *pipe;
- struct dn_heap *heaps[3];
- struct dn_heap *h;
- void *p; /* generic parameter to handler */
- int i;
-
- DUMMYNET_LOCK();
-
- heaps[0] = &ready_heap; /* fixed-rate queues */
- heaps[1] = &wfq_ready_heap; /* wfq queues */
- heaps[2] = &extract_heap; /* delay line */
-
- /* Update number of lost(coalesced) ticks. */
- tick_lost += pending - 1;
-
- getmicrouptime(&t);
- /* Last tick duration (usec). */
- tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 +
- (t.tv_usec - prev_t.tv_usec);
- /* Last tick vs standard tick difference (usec). */
- tick_delta = (tick_last * hz - 1000000) / hz;
- /* Accumulated tick difference (usec). */
- tick_delta_sum += tick_delta;
-
- prev_t = t;
-
- /*
- * Adjust curr_time if accumulated tick difference greater than
- * 'standard' tick. Since curr_time should be monotonically increasing,
- * we do positive adjustment as required and throttle curr_time in
- * case of negative adjustment.
- */
- curr_time++;
- if (tick_delta_sum - tick >= 0) {
- int diff = tick_delta_sum / tick;
-
- curr_time += diff;
- tick_diff += diff;
- tick_delta_sum %= tick;
- tick_adjustment++;
- } else if (tick_delta_sum + tick <= 0) {
- curr_time--;
- tick_diff--;
- tick_delta_sum += tick;
- tick_adjustment++;
- }
-
- for (i = 0; i < 3; i++) {
- h = heaps[i];
- while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) {
- if (h->p[0].key > curr_time)
- printf("dummynet: warning, "
- "heap %d is %d ticks late\n",
- i, (int)(curr_time - h->p[0].key));
- /* store a copy before heap_extract */
- p = h->p[0].object;
- /* need to extract before processing */
- heap_extract(h, NULL);
- if (i == 0)
- ready_event(p, &head, &tail);
- else if (i == 1) {
- struct dn_pipe *pipe = p;
- if (pipe->if_name[0] != '\0')
- printf("dummynet: bad ready_event_wfq "
- "for pipe %s\n", pipe->if_name);
- else
- ready_event_wfq(p, &head, &tail);
- } else
- transmit_event(p, &head, &tail);
- }
- }
-
- /* Sweep pipes trying to expire idle flow_queues. */
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_FOREACH(pipe, &pipehash[i], next) {
- if (pipe->idle_heap.elements > 0 &&
- DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) {
- struct dn_flow_queue *q =
- pipe->idle_heap.p[0].object;
-
- heap_extract(&(pipe->idle_heap), NULL);
- /* Mark timestamp as invalid. */
- q->S = q->F + 1;
- pipe->sum -= q->fs->weight;
- }
- }
- }
-
- DUMMYNET_UNLOCK();
+ struct dn_fsk *fs;
+ int err = 0;
+
+ if (!locked)
+ DN_BH_WLOCK();
+ fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
+ ND("fs %d found %p", i, fs);
+ if (fs) {
+ fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
+ err = 0;
+ } else
+ err = EINVAL;
+ if (!locked)
+ DN_BH_WUNLOCK();
+ return err;
+}
- if (head != NULL)
- dummynet_send(head);
+/*----- end of flowset hashtable support -------------*/
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+/*------------------------------------------------------------
+ * Scheduler hash. When searching by index we pass sched_nr,
+ * otherwise we pass struct dn_sch * which is the first field in
+ * struct dn_schk so we can cast between the two. We use this trick
+ * because in the create phase (but it should be fixed).
+ */
+static uint32_t
+schk_hash(uintptr_t key, int flags, void *_arg)
+{
+ uint32_t i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_schk *)key)->sch.sched_nr;
+ return ( (i>>8)^(i>>4)^i );
}
-static void
-dummynet_send(struct mbuf *m)
+static int
+schk_match(void *obj, uintptr_t key, int flags, void *_arg)
{
- struct mbuf *n;
-
- for (; m != NULL; m = n) {
- struct ifnet *ifp = NULL;
- int dst;
- struct m_tag *tag;
-
- n = m->m_nextpkt;
- m->m_nextpkt = NULL;
- tag = m_tag_first(m);
- if (tag == NULL) {
- dst = DIR_DROP;
- } else {
- struct dn_pkt_tag *pkt = dn_tag_get(m);
- /* extract the dummynet info, rename the tag */
- dst = pkt->dn_dir;
- ifp = pkt->ifp;
- /* rename the tag so it carries reinject info */
- tag->m_tag_cookie = MTAG_IPFW_RULE;
- tag->m_tag_id = 0;
- }
-
- switch (dst) {
- case DIR_OUT:
- SET_HOST_IPLEN(mtod(m, struct ip *));
- ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
- break ;
- case DIR_IN :
- /* put header in network format for ip_input() */
- //SET_NET_IPLEN(mtod(m, struct ip *));
- netisr_dispatch(NETISR_IP, m);
- break;
-#ifdef INET6
- case DIR_IN | PROTO_IPV6:
- netisr_dispatch(NETISR_IPV6, m);
- break;
-
- case DIR_OUT | PROTO_IPV6:
- SET_HOST_IPLEN(mtod(m, struct ip *));
- ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
- break;
-#endif
- case DIR_FWD | PROTO_IFB: /* DN_TO_IFB_FWD: */
- if (bridge_dn_p != NULL)
- ((*bridge_dn_p)(m, ifp));
- else
- printf("dummynet: if_bridge not loaded\n");
-
- break;
- case DIR_IN | PROTO_LAYER2: /* DN_TO_ETH_DEMUX: */
- /*
- * The Ethernet code assumes the Ethernet header is
- * contiguous in the first mbuf header.
- * Insure this is true.
- */
- if (m->m_len < ETHER_HDR_LEN &&
- (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
- printf("dummynet/ether: pullup failed, "
- "dropping packet\n");
- break;
- }
- ether_demux(m->m_pkthdr.rcvif, m);
- break;
- case DIR_OUT | PROTO_LAYER2: /* N_TO_ETH_OUT: */
- ether_output_frame(ifp, m);
- break;
-
- case DIR_DROP:
- /* drop the packet after some time */
- FREE_PKT(m);
- break;
-
- default:
- printf("dummynet: bad switch %d!\n", dst);
- FREE_PKT(m);
- break;
- }
- }
+ struct dn_schk *s = (struct dn_schk *)obj;
+ int i = !(flags & DNHT_KEY_IS_OBJ) ? key :
+ ((struct dn_schk *)key)->sch.sched_nr;
+ return (s->sch.sched_nr == i);
}
/*
- * Unconditionally expire empty queues in case of shortage.
- * Returns the number of queues freed.
+ * Create the entry and intialize with the sched hash if needed.
+ * Leave s->fp unset so we can tell whether a dn_ht_find() returns
+ * a new object or a previously existing one.
*/
-static int
-expire_queues(struct dn_flow_set *fs)
-{
- struct dn_flow_queue *q, *prev ;
- int i, initial_elements = fs->rq_elements ;
-
- if (fs->last_expired == time_uptime)
- return 0 ;
- fs->last_expired = time_uptime ;
- for (i = 0 ; i <= fs->rq_size ; i++) { /* last one is overflow */
- for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) {
- if (!QUEUE_IS_IDLE(q)) {
- prev = q ;
- q = q->next ;
- } else { /* entry is idle, expire it */
- struct dn_flow_queue *old_q = q ;
-
- if (prev != NULL)
- prev->next = q = q->next ;
- else
- fs->rq[i] = q = q->next ;
- fs->rq_elements-- ;
- free(old_q, M_DUMMYNET);
- }
+static void *
+schk_new(uintptr_t key, int flags, void *arg)
+{
+ struct schk_new_arg *a = arg;
+ struct dn_schk *s;
+ int l = sizeof(*s) +a->fp->schk_datalen;
+
+ s = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (s == NULL)
+ return NULL;
+ set_oid(&s->link.oid, DN_LINK, sizeof(s->link));
+ s->sch = *a->sch; // copy initial values
+ s->link.link_nr = s->sch.sched_nr;
+ SLIST_INIT(&s->fsk_list);
+ /* initialize the hash table or create the single instance */
+ s->fp = a->fp; /* si_new needs this */
+ s->drain_bucket = 0;
+ if (s->sch.flags & DN_HAVE_MASK) {
+ s->siht = dn_ht_init(NULL, s->sch.buckets,
+ offsetof(struct dn_sch_inst, si_next),
+ si_hash, si_match, si_new);
+ if (s->siht == NULL) {
+ free(s, M_DUMMYNET);
+ return NULL;
+ }
}
- }
- return initial_elements - fs->rq_elements ;
+ s->fp = NULL; /* mark as a new scheduler */
+ dn_cfg.schk_count++;
+ return s;
}
/*
- * If room, create a new queue and put at head of slot i;
- * otherwise, create or use the default queue.
+ * Callback for sched delete. Notify all attached flowsets to
+ * detach from the scheduler, destroy the internal flowset, and
+ * all instances. The scheduler goes away too.
+ * arg is 0 (only detach flowsets and destroy instances)
+ * DN_DESTROY (detach & delete queues, delete schk)
+ * or DN_DELETE_FS (delete queues and flowsets, delete schk)
*/
-static struct dn_flow_queue *
-create_queue(struct dn_flow_set *fs, int i)
+static int
+schk_delete_cb(void *obj, void *arg)
{
- struct dn_flow_queue *q;
-
- if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
- expire_queues(fs) == 0) {
- /* No way to get room, use or create overflow queue. */
- i = fs->rq_size;
- if (fs->rq[i] != NULL)
- return fs->rq[i];
- }
- q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (q == NULL) {
- printf("dummynet: sorry, cannot allocate queue for new flow\n");
- return (NULL);
+ struct dn_schk *s = obj;
+#if 0
+ int a = (int)arg;
+ ND("sched %d arg %s%s",
+ s->sch.sched_nr,
+ a&DN_DESTROY ? "DEL ":"",
+ a&DN_DELETE_FS ? "DEL_FS":"");
+#endif
+ fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
+ /* no more flowset pointing to us now */
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, si_destroy, NULL);
+ else if (s->siht)
+ si_destroy(s->siht, NULL);
+ if (s->profile) {
+ free(s->profile, M_DUMMYNET);
+ s->profile = NULL;
}
- q->fs = fs;
- q->hash_slot = i;
- q->next = fs->rq[i];
- q->S = q->F + 1; /* hack - mark timestamp as invalid. */
- q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0);
- fs->rq[i] = q;
- fs->rq_elements++;
- return (q);
+ s->siht = NULL;
+ if (s->fp->destroy)
+ s->fp->destroy(s);
+ bzero(s, sizeof(*s)); // safety
+ free(obj, M_DUMMYNET);
+ dn_cfg.schk_count--;
+ return DNHT_SCAN_DEL;
}
/*
- * Given a flow_set and a pkt in last_pkt, find a matching queue
- * after appropriate masking. The queue is moved to front
- * so that further searches take less time.
+ * called on a 'sched X delete' command. Deletes a single scheduler.
+ * This is done by removing from the schedhash, unlinking all
+ * flowsets and deleting their traffic.
*/
-static struct dn_flow_queue *
-find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id)
-{
- int i = 0 ; /* we need i and q for new allocations */
- struct dn_flow_queue *q, *prev;
- int is_v6 = IS_IP6_FLOW_ID(id);
-
- if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) )
- q = fs->rq[0] ;
- else {
- /* first, do the masking, then hash */
- id->dst_port &= fs->flow_mask.dst_port ;
- id->src_port &= fs->flow_mask.src_port ;
- id->proto &= fs->flow_mask.proto ;
- id->flags = 0 ; /* we don't care about this one */
- if (is_v6) {
- APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6);
- APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6);
- id->flow_id6 &= fs->flow_mask.flow_id6;
-
- i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^
- ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^
- ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^
- ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^
-
- ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^
- ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^
- ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^
- ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^
-
- ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^
- ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^
- ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^
- ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^
-
- ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^
- ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^
- ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^
- ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^
-
- (id->dst_port << 1) ^ (id->src_port) ^
- (id->proto ) ^
- (id->flow_id6);
- } else {
- id->dst_ip &= fs->flow_mask.dst_ip ;
- id->src_ip &= fs->flow_mask.src_ip ;
-
- i = ( (id->dst_ip) & 0xffff ) ^
- ( (id->dst_ip >> 15) & 0xffff ) ^
- ( (id->src_ip << 1) & 0xffff ) ^
- ( (id->src_ip >> 16 ) & 0xffff ) ^
- (id->dst_port << 1) ^ (id->src_port) ^
- (id->proto );
- }
- i = i % fs->rq_size ;
- /* finally, scan the current list for a match */
- searches++ ;
- for (prev=NULL, q = fs->rq[i] ; q ; ) {
- search_steps++;
- if (is_v6 &&
- IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) &&
- IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) &&
- id->dst_port == q->id.dst_port &&
- id->src_port == q->id.src_port &&
- id->proto == q->id.proto &&
- id->flags == q->id.flags &&
- id->flow_id6 == q->id.flow_id6)
- break ; /* found */
-
- if (!is_v6 && id->dst_ip == q->id.dst_ip &&
- id->src_ip == q->id.src_ip &&
- id->dst_port == q->id.dst_port &&
- id->src_port == q->id.src_port &&
- id->proto == q->id.proto &&
- id->flags == q->id.flags)
- break ; /* found */
-
- /* No match. Check if we can expire the entry */
- if (pipe_expire && QUEUE_IS_IDLE(q)) {
- /* entry is idle and not in any heap, expire it */
- struct dn_flow_queue *old_q = q ;
-
- if (prev != NULL)
- prev->next = q = q->next ;
- else
- fs->rq[i] = q = q->next ;
- fs->rq_elements-- ;
- free(old_q, M_DUMMYNET);
- continue ;
- }
- prev = q ;
- q = q->next ;
- }
- if (q && prev != NULL) { /* found and not in front */
- prev->next = q->next ;
- q->next = fs->rq[i] ;
- fs->rq[i] = q ;
- }
- }
- if (q == NULL) { /* no match, need to allocate a new entry */
- q = create_queue(fs, i);
- if (q != NULL)
- q->id = *id ;
- }
- return q ;
+static int
+delete_schk(int i)
+{
+ struct dn_schk *s;
+
+ s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+ ND("%d %p", i, s);
+ if (!s)
+ return EINVAL;
+ delete_fs(i + DN_MAX_ID, 1); /* first delete internal fs */
+ /* then detach flowsets, delete traffic */
+ schk_delete_cb(s, (void*)(uintptr_t)DN_DESTROY);
+ return 0;
}
+/*--- end of schk hashtable support ---*/
static int
-red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
+copy_obj(char **start, char *end, void *_o, const char *msg, int i)
{
- /*
- * RED algorithm
- *
- * RED calculates the average queue size (avg) using a low-pass filter
- * with an exponential weighted (w_q) moving average:
- * avg <- (1-w_q) * avg + w_q * q_size
- * where q_size is the queue length (measured in bytes or * packets).
- *
- * If q_size == 0, we compute the idle time for the link, and set
- * avg = (1 - w_q)^(idle/s)
- * where s is the time needed for transmitting a medium-sized packet.
- *
- * Now, if avg < min_th the packet is enqueued.
- * If avg > max_th the packet is dropped. Otherwise, the packet is
- * dropped with probability P function of avg.
- */
-
- int64_t p_b = 0;
-
- /* Queue in bytes or packets? */
- u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ?
- q->len_bytes : q->len;
-
- DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size));
+ struct dn_id *o = _o;
+ int have = end - *start;
- /* Average queue size estimation. */
- if (q_size != 0) {
- /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */
- int diff = SCALE(q_size) - q->avg;
- int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q);
-
- q->avg += (int)v;
- } else {
- /*
- * Queue is empty, find for how long the queue has been
- * empty and use a lookup table for computing
- * (1 - * w_q)^(idle_time/s) where s is the time to send a
- * (small) packet.
- * XXX check wraps...
- */
- if (q->avg) {
- u_int t = div64(curr_time - q->idle_time,
- fs->lookup_step);
-
- q->avg = (t < fs->lookup_depth) ?
- SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
- }
+ if (have < o->len || o->len == 0 || o->type == 0) {
+ D("(WARN) type %d %s %d have %d need %d",
+ o->type, msg, i, have, o->len);
+ return 1;
}
- DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
-
- /* Should i drop? */
- if (q->avg < fs->min_th) {
- q->count = -1;
- return (0); /* accept packet */
- }
- if (q->avg >= fs->max_th) { /* average queue >= max threshold */
- if (fs->flags_fs & DN_IS_GENTLE_RED) {
- /*
- * According to Gentle-RED, if avg is greater than
- * max_th the packet is dropped with a probability
- * p_b = c_3 * avg - c_4
- * where c_3 = (1 - max_p) / max_th
- * c_4 = 1 - 2 * max_p
- */
- p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) -
- fs->c_4;
- } else {
- q->count = -1;
- DPRINTF(("dummynet: - drop"));
- return (1);
- }
- } else if (q->avg > fs->min_th) {
- /*
- * We compute p_b using the linear dropping function
- * p_b = c_1 * avg - c_2
- * where c_1 = max_p / (max_th - min_th)
- * c_2 = max_p * min_th / (max_th - min_th)
- */
- p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2;
- }
-
- if (fs->flags_fs & DN_QSIZE_IS_BYTES)
- p_b = div64(p_b * len, fs->max_pkt_size);
- if (++q->count == 0)
- q->random = random() & 0xffff;
- else {
- /*
- * q->count counts packets arrived since last drop, so a greater
- * value of q->count means a greater packet drop probability.
- */
- if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) {
- q->count = 0;
- DPRINTF(("dummynet: - red drop"));
- /* After a drop we calculate a new random value. */
- q->random = random() & 0xffff;
- return (1); /* drop */
- }
+ ND("type %d %s %d len %d", o->type, msg, i, o->len);
+ bcopy(_o, *start, o->len);
+ if (o->type == DN_LINK) {
+ /* Adjust burst parameter for link */
+ struct dn_link *l = (struct dn_link *)*start;
+ l->burst = div64(l->burst, 8 * hz);
+ } else if (o->type == DN_SCH) {
+ /* Set id->id to the number of instances */
+ struct dn_schk *s = _o;
+ struct dn_id *id = (struct dn_id *)(*start);
+ id->id = (s->sch.flags & DN_HAVE_MASK) ?
+ dn_ht_entries(s->siht) : (s->siht ? 1 : 0);
}
- /* End of RED algorithm. */
-
- return (0); /* accept */
+ *start += o->len;
+ return 0;
}
-static __inline struct dn_flow_set *
-locate_flowset(int fs_nr)
+/* Specific function to copy a queue.
+ * Copies only the user-visible part of a queue (which is in
+ * a struct dn_flow), and sets len accordingly.
+ */
+static int
+copy_obj_q(char **start, char *end, void *_o, const char *msg, int i)
{
- struct dn_flow_set *fs;
-
- SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next)
- if (fs->fs_nr == fs_nr)
- return (fs);
-
- return (NULL);
+ struct dn_id *o = _o;
+ int have = end - *start;
+ int len = sizeof(struct dn_flow); /* see above comment */
+
+ if (have < len || o->len == 0 || o->type != DN_QUEUE) {
+ D("ERROR type %d %s %d have %d need %d",
+ o->type, msg, i, have, len);
+ return 1;
+ }
+ ND("type %d %s %d len %d", o->type, msg, i, len);
+ bcopy(_o, *start, len);
+ ((struct dn_id*)(*start))->len = len;
+ *start += len;
+ return 0;
}
-static __inline struct dn_pipe *
-locate_pipe(int pipe_nr)
+static int
+copy_q_cb(void *obj, void *arg)
{
- struct dn_pipe *pipe;
-
- SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next)
- if (pipe->pipe_nr == pipe_nr)
- return (pipe);
-
- return (NULL);
+ struct dn_queue *q = obj;
+ struct copy_args *a = arg;
+ struct dn_flow *ni = (struct dn_flow *)(*a->start);
+ if (copy_obj_q(a->start, a->end, &q->ni, "queue", -1))
+ return DNHT_SCAN_END;
+ ni->oid.type = DN_FLOW; /* override the DN_QUEUE */
+ ni->oid.id = si_hash((uintptr_t)&ni->fid, 0, NULL);
+ return 0;
}
-/*
- * dummynet hook for packets. Below 'pipe' is a pipe or a queue
- * depending on whether WF2Q or fixed bw is used.
- *
- * pipe_nr pipe or queue the packet is destined for.
- * dir where shall we send the packet after dummynet.
- * m the mbuf with the packet
- * ifp the 'ifp' parameter from the caller.
- * NULL in ip_input, destination interface in ip_output,
- * rule matching rule, in case of multiple passes
- */
static int
-dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa)
-{
- struct mbuf *m = *m0, *head = NULL, *tail = NULL;
- struct dn_pkt_tag *pkt;
- struct m_tag *mtag;
- struct dn_flow_set *fs = NULL;
- struct dn_pipe *pipe;
- uint64_t len = m->m_pkthdr.len;
- struct dn_flow_queue *q = NULL;
- int is_pipe = fwa->rule.info & IPFW_IS_PIPE;
-
- KASSERT(m->m_nextpkt == NULL,
- ("dummynet_io: mbuf queue passed to dummynet"));
-
- DUMMYNET_LOCK();
- io_pkt++;
- /*
- * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
- */
- if (is_pipe) {
- pipe = locate_pipe(fwa->rule.info & IPFW_INFO_MASK);
- if (pipe != NULL)
- fs = &(pipe->fs);
- } else
- fs = locate_flowset(fwa->rule.info & IPFW_INFO_MASK);
-
- if (fs == NULL)
- goto dropit; /* This queue/pipe does not exist! */
- pipe = fs->pipe;
- if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */
- pipe = locate_pipe(fs->parent_nr);
- if (pipe != NULL)
- fs->pipe = pipe;
- else {
- printf("dummynet: no pipe %d for queue %d, drop pkt\n",
- fs->parent_nr, fs->fs_nr);
- goto dropit;
- }
- }
- q = find_queue(fs, &(fwa->f_id));
- if (q == NULL)
- goto dropit; /* Cannot allocate queue. */
-
- /* Update statistics, then check reasons to drop pkt. */
- q->tot_bytes += len;
- q->tot_pkts++;
- if (fs->plr && random() < fs->plr)
- goto dropit; /* Random pkt drop. */
- if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
- if (q->len_bytes > fs->qsize)
- goto dropit; /* Queue size overflow. */
- } else {
- if (q->len >= fs->qsize)
- goto dropit; /* Queue count overflow. */
- }
- if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len))
- goto dropit;
-
- /* XXX expensive to zero, see if we can remove it. */
- mtag = m_tag_get(PACKET_TAG_DUMMYNET,
- sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO);
- if (mtag == NULL)
- goto dropit; /* Cannot allocate packet header. */
- m_tag_prepend(m, mtag); /* Attach to mbuf chain. */
-
- pkt = (struct dn_pkt_tag *)(mtag + 1);
- /*
- * Ok, i can handle the pkt now...
- * Build and enqueue packet + parameters.
- */
- pkt->rule = fwa->rule;
- pkt->rule.info &= IPFW_ONEPASS; /* only keep this info */
- pkt->dn_dir = dir;
- pkt->ifp = fwa->oif;
-
- if (q->head == NULL)
- q->head = m;
+copy_q(struct copy_args *a, struct dn_fsk *fs, int flags)
+{
+ if (!fs->qht)
+ return 0;
+ if (fs->fs.flags & DN_QHT_HASH)
+ dn_ht_scan(fs->qht, copy_q_cb, a);
else
- q->tail->m_nextpkt = m;
- q->tail = m;
- q->len++;
- q->len_bytes += len;
-
- if (q->head != m) /* Flow was not idle, we are done. */
- goto done;
-
- if (is_pipe) { /* Fixed rate queues. */
- if (q->idle_time < curr_time) {
- /* Calculate available burst size. */
- q->numbytes +=
- (curr_time - q->idle_time - 1) * pipe->bandwidth;
- if (q->numbytes > pipe->burst)
- q->numbytes = pipe->burst;
- if (io_fast)
- q->numbytes += pipe->bandwidth;
- }
- } else { /* WF2Q. */
- if (pipe->idle_time < curr_time &&
- pipe->scheduler_heap.elements == 0 &&
- pipe->not_eligible_heap.elements == 0) {
- /* Calculate available burst size. */
- pipe->numbytes +=
- (curr_time - pipe->idle_time - 1) * pipe->bandwidth;
- if (pipe->numbytes > 0 && pipe->numbytes > pipe->burst)
- pipe->numbytes = pipe->burst;
- if (io_fast)
- pipe->numbytes += pipe->bandwidth;
- }
- pipe->idle_time = curr_time;
- }
- /* Necessary for both: fixed rate & WF2Q queues. */
- q->idle_time = curr_time;
-
- /*
- * If we reach this point the flow was previously idle, so we need
- * to schedule it. This involves different actions for fixed-rate or
- * WF2Q queues.
- */
- if (is_pipe) {
- /* Fixed-rate queue: just insert into the ready_heap. */
- dn_key t = 0;
-
- if (pipe->bandwidth) {
- q->extra_bits = compute_extra_bits(m, pipe);
- t = set_ticks(m, q, pipe);
- }
- q->sched_time = curr_time;
- if (t == 0) /* Must process it now. */
- ready_event(q, &head, &tail);
- else
- heap_insert(&ready_heap, curr_time + t , q);
- } else {
- /*
- * WF2Q. First, compute start time S: if the flow was
- * idle (S = F + 1) set S to the virtual time V for the
- * controlling pipe, and update the sum of weights for the pipe;
- * otherwise, remove flow from idle_heap and set S to max(F,V).
- * Second, compute finish time F = S + len / weight.
- * Third, if pipe was idle, update V = max(S, V).
- * Fourth, count one more backlogged flow.
- */
- if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */
- q->S = pipe->V;
- pipe->sum += fs->weight; /* Add weight of new queue. */
- } else {
- heap_extract(&(pipe->idle_heap), q);
- q->S = MAX64(q->F, pipe->V);
- }
- q->F = q->S + div64(len << MY_M, fs->weight);
-
- if (pipe->not_eligible_heap.elements == 0 &&
- pipe->scheduler_heap.elements == 0)
- pipe->V = MAX64(q->S, pipe->V);
- fs->backlogged++;
- /*
- * Look at eligibility. A flow is not eligibile if S>V (when
- * this happens, it means that there is some other flow already
- * scheduled for the same pipe, so the scheduler_heap cannot be
- * empty). If the flow is not eligible we just store it in the
- * not_eligible_heap. Otherwise, we store in the scheduler_heap
- * and possibly invoke ready_event_wfq() right now if there is
- * leftover credit.
- * Note that for all flows in scheduler_heap (SCH), S_i <= V,
- * and for all flows in not_eligible_heap (NEH), S_i > V.
- * So when we need to compute max(V, min(S_i)) forall i in
- * SCH+NEH, we only need to look into NEH.
- */
- if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */
- if (pipe->scheduler_heap.elements == 0)
- printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
- heap_insert(&(pipe->not_eligible_heap), q->S, q);
- } else {
- heap_insert(&(pipe->scheduler_heap), q->F, q);
- if (pipe->numbytes >= 0) { /* Pipe is idle. */
- if (pipe->scheduler_heap.elements != 1)
- printf("dummynet: OUCH! pipe should have been idle!\n");
- DPRINTF(("dummynet: waking up pipe %d at %d\n",
- pipe->pipe_nr, (int)(q->F >> MY_M)));
- pipe->sched_time = curr_time;
- ready_event_wfq(pipe, &head, &tail);
- }
- }
- }
-done:
- if (head == m && (dir & PROTO_LAYER2) == 0 ) {
- /* Fast io. */
- io_pkt_fast++;
- if (m->m_nextpkt != NULL)
- printf("dummynet: fast io: pkt chain detected!\n");
- head = m->m_nextpkt = NULL;
- } else
- *m0 = NULL; /* Normal io. */
-
- DUMMYNET_UNLOCK();
- if (head != NULL)
- dummynet_send(head);
- return (0);
-
-dropit:
- io_pkt_drop++;
- if (q)
- q->drops++;
- DUMMYNET_UNLOCK();
- FREE_PKT(m);
- *m0 = NULL;
- return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
+ copy_q_cb(fs->qht, a);
+ return 0;
}
/*
- * Dispose all packets and flow_queues on a flow_set.
- * If all=1, also remove red lookup table and other storage,
- * including the descriptor itself.
- * For the one in dn_pipe MUST also cleanup ready_heap...
+ * This routine only copies the initial part of a profile ? XXX
*/
-static void
-purge_flow_set(struct dn_flow_set *fs, int all)
+static int
+copy_profile(struct copy_args *a, struct dn_profile *p)
{
- struct dn_flow_queue *q, *qn;
- int i;
+ int have = a->end - *a->start;
+ /* XXX here we check for max length */
+ int profile_len = sizeof(struct dn_profile) -
+ ED_MAX_SAMPLES_NO*sizeof(int);
- DUMMYNET_LOCK_ASSERT();
-
- for (i = 0; i <= fs->rq_size; i++) {
- for (q = fs->rq[i]; q != NULL; q = qn) {
- dn_free_pkts(q->head);
- qn = q->next;
- free(q, M_DUMMYNET);
- }
- fs->rq[i] = NULL;
+ if (p == NULL)
+ return 0;
+ if (have < profile_len) {
+ D("error have %d need %d", have, profile_len);
+ return 1;
}
+ bcopy(p, *a->start, profile_len);
+ ((struct dn_id *)(*a->start))->len = profile_len;
+ *a->start += profile_len;
+ return 0;
+}
- fs->rq_elements = 0;
- if (all) {
- /* RED - free lookup table. */
- if (fs->w_q_lookup != NULL)
- free(fs->w_q_lookup, M_DUMMYNET);
- if (fs->rq != NULL)
- free(fs->rq, M_DUMMYNET);
- /* If this fs is not part of a pipe, free it. */
- if (fs->pipe == NULL || fs != &(fs->pipe->fs))
- free(fs, M_DUMMYNET);
+static int
+copy_flowset(struct copy_args *a, struct dn_fsk *fs, int flags)
+{
+ struct dn_fs *ufs = (struct dn_fs *)(*a->start);
+ if (!fs)
+ return 0;
+ ND("flowset %d", fs->fs.fs_nr);
+ if (copy_obj(a->start, a->end, &fs->fs, "flowset", fs->fs.fs_nr))
+ return DNHT_SCAN_END;
+ ufs->oid.id = (fs->fs.flags & DN_QHT_HASH) ?
+ dn_ht_entries(fs->qht) : (fs->qht ? 1 : 0);
+ if (flags) { /* copy queues */
+ copy_q(a, fs, 0);
}
+ return 0;
}
-/*
- * Dispose all packets queued on a pipe (not a flow_set).
- * Also free all resources associated to a pipe, which is about
- * to be deleted.
- */
-static void
-purge_pipe(struct dn_pipe *pipe)
+static int
+copy_si_cb(void *obj, void *arg)
{
+ struct dn_sch_inst *si = obj;
+ struct copy_args *a = arg;
+ struct dn_flow *ni = (struct dn_flow *)(*a->start);
+ if (copy_obj(a->start, a->end, &si->ni, "inst",
+ si->sched->sch.sched_nr))
+ return DNHT_SCAN_END;
+ ni->oid.type = DN_FLOW; /* override the DN_SCH_I */
+ ni->oid.id = si_hash((uintptr_t)si, DNHT_KEY_IS_OBJ, NULL);
+ return 0;
+}
- purge_flow_set( &(pipe->fs), 1 );
-
- dn_free_pkts(pipe->head);
-
- heap_free( &(pipe->scheduler_heap) );
- heap_free( &(pipe->not_eligible_heap) );
- heap_free( &(pipe->idle_heap) );
+static int
+copy_si(struct copy_args *a, struct dn_schk *s, int flags)
+{
+ if (s->sch.flags & DN_HAVE_MASK)
+ dn_ht_scan(s->siht, copy_si_cb, a);
+ else if (s->siht)
+ copy_si_cb(s->siht, a);
+ return 0;
}
/*
- * Delete all pipes and heaps returning memory. Must also
- * remove references from all ipfw rules to all pipes.
+ * compute a list of children of a scheduler and copy up
*/
-static void
-dummynet_flush(void)
+static int
+copy_fsk_list(struct copy_args *a, struct dn_schk *s, int flags)
{
- struct dn_pipe *pipe, *pipe1;
- struct dn_flow_set *fs, *fs1;
- int i;
-
- DUMMYNET_LOCK();
- /* Free heaps so we don't have unwanted events. */
- heap_free(&ready_heap);
- heap_free(&wfq_ready_heap);
- heap_free(&extract_heap);
+ struct dn_fsk *fs;
+ struct dn_id *o;
+ uint32_t *p;
+
+ int n = 0, space = sizeof(*o);
+ SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
+ if (fs->fs.fs_nr < DN_MAX_ID)
+ n++;
+ }
+ space += n * sizeof(uint32_t);
+ DX(3, "sched %d has %d flowsets", s->sch.sched_nr, n);
+ if (a->end - *(a->start) < space)
+ return DNHT_SCAN_END;
+ o = (struct dn_id *)(*(a->start));
+ o->len = space;
+ *a->start += o->len;
+ o->type = DN_TEXT;
+ p = (uint32_t *)(o+1);
+ SLIST_FOREACH(fs, &s->fsk_list, sch_chain)
+ if (fs->fs.fs_nr < DN_MAX_ID)
+ *p++ = fs->fs.fs_nr;
+ return 0;
+}
- /*
- * Now purge all queued pkts and delete all pipes.
- *
- * XXXGL: can we merge the for(;;) cycles into one or not?
- */
- for (i = 0; i < HASHSIZE; i++)
- SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) {
- SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next);
- purge_flow_set(fs, 1);
+static int
+copy_data_helper(void *_o, void *_arg)
+{
+ struct copy_args *a = _arg;
+ uint32_t *r = a->extra->r; /* start of first range */
+ uint32_t *lim; /* first invalid pointer */
+ int n;
+
+ lim = (uint32_t *)((char *)(a->extra) + a->extra->o.len);
+
+ if (a->type == DN_LINK || a->type == DN_SCH) {
+ /* pipe|sched show, we receive a dn_schk */
+ struct dn_schk *s = _o;
+
+ n = s->sch.sched_nr;
+ if (a->type == DN_SCH && n >= DN_MAX_ID)
+ return 0; /* not a scheduler */
+ if (a->type == DN_LINK && n <= DN_MAX_ID)
+ return 0; /* not a pipe */
+
+ /* see if the object is within one of our ranges */
+ for (;r < lim; r+=2) {
+ if (n < r[0] || n > r[1])
+ continue;
+ /* Found a valid entry, copy and we are done */
+ if (a->flags & DN_C_LINK) {
+ if (copy_obj(a->start, a->end,
+ &s->link, "link", n))
+ return DNHT_SCAN_END;
+ if (copy_profile(a, s->profile))
+ return DNHT_SCAN_END;
+ if (copy_flowset(a, s->fs, 0))
+ return DNHT_SCAN_END;
+ }
+ if (a->flags & DN_C_SCH) {
+ if (copy_obj(a->start, a->end,
+ &s->sch, "sched", n))
+ return DNHT_SCAN_END;
+ /* list all attached flowsets */
+ if (copy_fsk_list(a, s, 0))
+ return DNHT_SCAN_END;
}
- for (i = 0; i < HASHSIZE; i++)
- SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) {
- SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next);
- purge_pipe(pipe);
- free_pipe(pipe);
+ if (a->flags & DN_C_FLOW)
+ copy_si(a, s, 0);
+ break;
}
- DUMMYNET_UNLOCK();
+ } else if (a->type == DN_FS) {
+ /* queue show, skip internal flowsets */
+ struct dn_fsk *fs = _o;
+
+ n = fs->fs.fs_nr;
+ if (n >= DN_MAX_ID)
+ return 0;
+ /* see if the object is within one of our ranges */
+ for (;r < lim; r+=2) {
+ if (n < r[0] || n > r[1])
+ continue;
+ if (copy_flowset(a, fs, 0))
+ return DNHT_SCAN_END;
+ copy_q(a, fs, 0);
+ break; /* we are done */
+ }
+ }
+ return 0;
+}
+
+static inline struct dn_schk *
+locate_scheduler(int i)
+{
+ return dn_ht_find(dn_cfg.schedhash, i, 0, NULL);
}
/*
- * setup RED parameters
+ * red parameters are in fixed point arithmetic.
*/
static int
-config_red(struct dn_flow_set *p, struct dn_flow_set *x)
+config_red(struct dn_fsk *fs)
{
- int i;
-
- x->w_q = p->w_q;
- x->min_th = SCALE(p->min_th);
- x->max_th = SCALE(p->max_th);
- x->max_p = p->max_p;
-
- x->c_1 = p->max_p / (p->max_th - p->min_th);
- x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
-
- if (x->flags_fs & DN_IS_GENTLE_RED) {
- x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
- x->c_4 = SCALE(1) - 2 * p->max_p;
+ int64_t s, idle, weight, w0;
+ int t, i;
+
+ fs->w_q = fs->fs.w_q;
+ fs->max_p = fs->fs.max_p;
+ D("called");
+ /* Doing stuff that was in userland */
+ i = fs->sched->link.bandwidth;
+ s = (i <= 0) ? 0 :
+ hz * dn_cfg.red_avg_pkt_size * 8 * SCALE(1) / i;
+
+ idle = div64((s * 3) , fs->w_q); /* s, fs->w_q scaled; idle not scaled */
+ fs->lookup_step = div64(idle , dn_cfg.red_lookup_depth);
+ /* fs->lookup_step not scaled, */
+ if (!fs->lookup_step)
+ fs->lookup_step = 1;
+ w0 = weight = SCALE(1) - fs->w_q; //fs->w_q scaled
+
+ for (t = fs->lookup_step; t > 1; --t)
+ weight = SCALE_MUL(weight, w0);
+ fs->lookup_weight = (int)(weight); // scaled
+
+ /* Now doing stuff that was in kerneland */
+ fs->min_th = SCALE(fs->fs.min_th);
+ fs->max_th = SCALE(fs->fs.max_th);
+
+ fs->c_1 = fs->max_p / (fs->fs.max_th - fs->fs.min_th);
+ fs->c_2 = SCALE_MUL(fs->c_1, SCALE(fs->fs.min_th));
+
+ if (fs->fs.flags & DN_IS_GENTLE_RED) {
+ fs->c_3 = (SCALE(1) - fs->max_p) / fs->fs.max_th;
+ fs->c_4 = SCALE(1) - 2 * fs->max_p;
}
/* If the lookup table already exist, free and create it again. */
- if (x->w_q_lookup) {
- free(x->w_q_lookup, M_DUMMYNET);
- x->w_q_lookup = NULL;
+ if (fs->w_q_lookup) {
+ free(fs->w_q_lookup, M_DUMMYNET);
+ fs->w_q_lookup = NULL;
}
- if (red_lookup_depth == 0) {
+ if (dn_cfg.red_lookup_depth == 0) {
printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth"
"must be > 0\n");
- free(x, M_DUMMYNET);
+ fs->fs.flags &= ~DN_IS_RED;
+ fs->fs.flags &= ~DN_IS_GENTLE_RED;
return (EINVAL);
}
- x->lookup_depth = red_lookup_depth;
- x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int),
+ fs->lookup_depth = dn_cfg.red_lookup_depth;
+ fs->w_q_lookup = (u_int *)malloc(fs->lookup_depth * sizeof(int),
M_DUMMYNET, M_NOWAIT);
- if (x->w_q_lookup == NULL) {
+ if (fs->w_q_lookup == NULL) {
printf("dummynet: sorry, cannot allocate red lookup table\n");
- free(x, M_DUMMYNET);
+ fs->fs.flags &= ~DN_IS_RED;
+ fs->fs.flags &= ~DN_IS_GENTLE_RED;
return(ENOSPC);
}
/* Fill the lookup table with (1 - w_q)^x */
- x->lookup_step = p->lookup_step;
- x->lookup_weight = p->lookup_weight;
- x->w_q_lookup[0] = SCALE(1) - x->w_q;
-
- for (i = 1; i < x->lookup_depth; i++)
- x->w_q_lookup[i] =
- SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
+ fs->w_q_lookup[0] = SCALE(1) - fs->w_q;
+
+ for (i = 1; i < fs->lookup_depth; i++)
+ fs->w_q_lookup[i] =
+ SCALE_MUL(fs->w_q_lookup[i - 1], fs->lookup_weight);
+
+ if (dn_cfg.red_avg_pkt_size < 1)
+ dn_cfg.red_avg_pkt_size = 512;
+ fs->avg_pkt_size = dn_cfg.red_avg_pkt_size;
+ if (dn_cfg.red_max_pkt_size < 1)
+ dn_cfg.red_max_pkt_size = 1500;
+ fs->max_pkt_size = dn_cfg.red_max_pkt_size;
+ D("exit");
+ return 0;
+}
- if (red_avg_pkt_size < 1)
- red_avg_pkt_size = 512;
- x->avg_pkt_size = red_avg_pkt_size;
- if (red_max_pkt_size < 1)
- red_max_pkt_size = 1500;
- x->max_pkt_size = red_max_pkt_size;
- return (0);
+/* Scan all flowset attached to this scheduler and update red */
+static void
+update_red(struct dn_schk *s)
+{
+ struct dn_fsk *fs;
+ SLIST_FOREACH(fs, &s->fsk_list, sch_chain) {
+ if (fs && (fs->fs.flags & DN_IS_RED))
+ config_red(fs);
+ }
}
-static int
-alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs)
-{
- if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */
- int l = pfs->rq_size;
-
- if (l == 0)
- l = dn_hash_size;
- if (l < 4)
- l = 4;
- else if (l > DN_MAX_HASH_SIZE)
- l = DN_MAX_HASH_SIZE;
- x->rq_size = l;
- } else /* one is enough for null mask */
- x->rq_size = 1;
- x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (x->rq == NULL) {
- printf("dummynet: sorry, cannot allocate queue\n");
- return (ENOMEM);
- }
- x->rq_elements = 0;
- return 0 ;
+/* attach flowset to scheduler s, possibly requeue */
+static void
+fsk_attach(struct dn_fsk *fs, struct dn_schk *s)
+{
+ ND("remove fs %d from fsunlinked, link to sched %d",
+ fs->fs.fs_nr, s->sch.sched_nr);
+ SLIST_REMOVE(&dn_cfg.fsu, fs, dn_fsk, sch_chain);
+ fs->sched = s;
+ SLIST_INSERT_HEAD(&s->fsk_list, fs, sch_chain);
+ if (s->fp->new_fsk)
+ s->fp->new_fsk(fs);
+ /* XXX compute fsk_mask */
+ fs->fsk_mask = fs->fs.flow_mask;
+ if (fs->sched->sch.flags & DN_HAVE_MASK)
+ flow_id_or(&fs->sched->sch.sched_mask, &fs->fsk_mask);
+ if (fs->qht) {
+ /*
+ * we must drain qht according to the old
+ * type, and reinsert according to the new one.
+ * The requeue is complex -- in general we need to
+ * reclassify every single packet.
+ * For the time being, let's hope qht is never set
+ * when we reach this point.
+ */
+ D("XXX TODO requeue from fs %d to sch %d",
+ fs->fs.fs_nr, s->sch.sched_nr);
+ fs->qht = NULL;
+ }
+ /* set the new type for qht */
+ if (nonzero_mask(&fs->fsk_mask))
+ fs->fs.flags |= DN_QHT_HASH;
+ else
+ fs->fs.flags &= ~DN_QHT_HASH;
+
+ /* XXX config_red() can fail... */
+ if (fs->fs.flags & DN_IS_RED)
+ config_red(fs);
}
+/* update all flowsets which may refer to this scheduler */
static void
-set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
-{
- x->flags_fs = src->flags_fs;
- x->qsize = src->qsize;
- x->plr = src->plr;
- x->flow_mask = src->flow_mask;
- if (x->flags_fs & DN_QSIZE_IS_BYTES) {
- if (x->qsize > pipe_byte_limit)
- x->qsize = 1024 * 1024;
- } else {
- if (x->qsize == 0)
- x->qsize = 50;
- if (x->qsize > pipe_slot_limit)
- x->qsize = 50;
+update_fs(struct dn_schk *s)
+{
+ struct dn_fsk *fs, *tmp;
+
+ SLIST_FOREACH_SAFE(fs, &dn_cfg.fsu, sch_chain, tmp) {
+ if (s->sch.sched_nr != fs->fs.sched_nr) {
+ D("fs %d for sch %d not %d still unlinked",
+ fs->fs.fs_nr, fs->fs.sched_nr,
+ s->sch.sched_nr);
+ continue;
+ }
+ fsk_attach(fs, s);
}
- /* Configuring RED. */
- if (x->flags_fs & DN_IS_RED)
- config_red(src, x); /* XXX should check errors */
}
/*
- * Setup pipe or queue parameters.
+ * Configuration -- to preserve backward compatibility we use
+ * the following scheme (N is 65536)
+ * NUMBER SCHED LINK FLOWSET
+ * 1 .. N-1 (1)WFQ (2)WFQ (3)queue
+ * N+1 .. 2N-1 (4)FIFO (5)FIFO (6)FIFO for sched 1..N-1
+ * 2N+1 .. 3N-1 -- -- (7)FIFO for sched N+1..2N-1
+ *
+ * "pipe i config" configures #1, #2 and #3
+ * "sched i config" configures #1 and possibly #6
+ * "queue i config" configures #3
+ * #1 is configured with 'pipe i config' or 'sched i config'
+ * #2 is configured with 'pipe i config', and created if not
+ * existing with 'sched i config'
+ * #3 is configured with 'queue i config'
+ * #4 is automatically configured after #1, can only be FIFO
+ * #5 is automatically configured after #2
+ * #6 is automatically created when #1 is !MULTIQUEUE,
+ * and can be updated.
+ * #7 is automatically configured after #2
+ */
+
+/*
+ * configure a link (and its FIFO instance)
*/
static int
-config_pipe(struct dn_pipe *p)
+config_link(struct dn_link *p, struct dn_id *arg)
{
- struct dn_flow_set *pfs = &(p->fs);
- struct dn_flow_queue *q;
- int i, error;
+ int i;
+ if (p->oid.len != sizeof(*p)) {
+ D("invalid pipe len %d", p->oid.len);
+ return EINVAL;
+ }
+ i = p->link_nr;
+ if (i <= 0 || i >= DN_MAX_ID)
+ return EINVAL;
/*
* The config program passes parameters as follows:
* bw = bits/second (0 means no limits),
* delay = ms, must be translated into ticks.
* qsize = slots/bytes
+ * burst ???
*/
p->delay = (p->delay * hz) / 1000;
/* Scale burst size: bytes -> bits * hz */
p->burst *= 8 * hz;
- /* We need either a pipe number or a flow_set number. */
- if (p->pipe_nr == 0 && pfs->fs_nr == 0)
- return (EINVAL);
- if (p->pipe_nr != 0 && pfs->fs_nr != 0)
- return (EINVAL);
- if (p->pipe_nr != 0) { /* this is a pipe */
- struct dn_pipe *pipe;
-
- DUMMYNET_LOCK();
- pipe = locate_pipe(p->pipe_nr); /* locate pipe */
-
- if (pipe == NULL) { /* new pipe */
- pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
- M_NOWAIT | M_ZERO);
- if (pipe == NULL) {
- DUMMYNET_UNLOCK();
- printf("dummynet: no memory for new pipe\n");
- return (ENOMEM);
- }
- pipe->pipe_nr = p->pipe_nr;
- pipe->fs.pipe = pipe;
- /*
- * idle_heap is the only one from which
- * we extract from the middle.
- */
- pipe->idle_heap.size = pipe->idle_heap.elements = 0;
- pipe->idle_heap.offset =
- offsetof(struct dn_flow_queue, heap_pos);
- } else {
- /* Flush accumulated credit for all queues. */
- for (i = 0; i <= pipe->fs.rq_size; i++) {
- for (q = pipe->fs.rq[i]; q; q = q->next) {
- q->numbytes = p->burst +
- (io_fast ? p->bandwidth : 0);
- }
- }
- }
-
- pipe->bandwidth = p->bandwidth;
- pipe->burst = p->burst;
- pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0);
- bcopy(p->if_name, pipe->if_name, sizeof(p->if_name));
- pipe->ifp = NULL; /* reset interface ptr */
- pipe->delay = p->delay;
- set_fs_parms(&(pipe->fs), pfs);
-
- /* Handle changes in the delay profile. */
- if (p->samples_no > 0) {
- if (pipe->samples_no != p->samples_no) {
- if (pipe->samples != NULL)
- free(pipe->samples, M_DUMMYNET);
- pipe->samples =
- malloc(p->samples_no*sizeof(dn_key),
- M_DUMMYNET, M_NOWAIT | M_ZERO);
- if (pipe->samples == NULL) {
- DUMMYNET_UNLOCK();
- printf("dummynet: no memory "
- "for new samples\n");
- return (ENOMEM);
- }
- pipe->samples_no = p->samples_no;
- }
- strncpy(pipe->name,p->name,sizeof(pipe->name));
- pipe->loss_level = p->loss_level;
- for (i = 0; i<pipe->samples_no; ++i)
- pipe->samples[i] = p->samples[i];
- } else if (pipe->samples != NULL) {
- free(pipe->samples, M_DUMMYNET);
- pipe->samples = NULL;
- pipe->samples_no = 0;
- }
+ DN_BH_WLOCK();
+ /* do it twice, base link and FIFO link */
+ for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
+ struct dn_schk *s = locate_scheduler(i);
+ if (s == NULL) {
+ DN_BH_WUNLOCK();
+ D("sched %d not found", i);
+ return EINVAL;
+ }
+ /* remove profile if exists */
+ if (s->profile) {
+ free(s->profile, M_DUMMYNET);
+ s->profile = NULL;
+ }
+ /* copy all parameters */
+ s->link.oid = p->oid;
+ s->link.link_nr = i;
+ s->link.delay = p->delay;
+ if (s->link.bandwidth != p->bandwidth) {
+ /* XXX bandwidth changes, need to update red params */
+ s->link.bandwidth = p->bandwidth;
+ update_red(s);
+ }
+ s->link.burst = p->burst;
+ schk_reset_credit(s);
+ }
+ dn_cfg.id++;
+ DN_BH_WUNLOCK();
+ return 0;
+}
- if (pipe->fs.rq == NULL) { /* a new pipe */
- error = alloc_hash(&(pipe->fs), pfs);
- if (error) {
- DUMMYNET_UNLOCK();
- free_pipe(pipe);
- return (error);
- }
- SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)],
- pipe, next);
- }
- DUMMYNET_UNLOCK();
- } else { /* config queue */
- struct dn_flow_set *fs;
+/*
+ * configure a flowset. Can be called from inside with locked=1,
+ */
+static struct dn_fsk *
+config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked)
+{
+ int i;
+ struct dn_fsk *fs;
- DUMMYNET_LOCK();
- fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
+ if (nfs->oid.len != sizeof(*nfs)) {
+ D("invalid flowset len %d", nfs->oid.len);
+ return NULL;
+ }
+ i = nfs->fs_nr;
+ if (i <= 0 || i >= 3*DN_MAX_ID)
+ return NULL;
+ ND("flowset %d", i);
+ /* XXX other sanity checks */
+ if (nfs->flags & DN_QSIZE_BYTES) {
+ ipdn_bound_var(&nfs->qsize, 16384,
+ 1500, dn_cfg.byte_limit, NULL); // "queue byte size");
+ } else {
+ ipdn_bound_var(&nfs->qsize, 50,
+ 1, dn_cfg.slot_limit, NULL); // "queue slot size");
+ }
+ if (nfs->flags & DN_HAVE_MASK) {
+ /* make sure we have some buckets */
+ ipdn_bound_var(&nfs->buckets, dn_cfg.hash_size,
+ 1, dn_cfg.max_hash_size, "flowset buckets");
+ } else {
+ nfs->buckets = 1; /* we only need 1 */
+ }
+ if (!locked)
+ DN_BH_WLOCK();
+ do { /* exit with break when done */
+ struct dn_schk *s;
+ int flags = nfs->sched_nr ? DNHT_INSERT : 0;
+ int j;
+ int oldc = dn_cfg.fsk_count;
+ fs = dn_ht_find(dn_cfg.fshash, i, flags, NULL);
+ if (fs == NULL) {
+ D("missing sched for flowset %d", i);
+ break;
+ }
+ /* grab some defaults from the existing one */
+ if (nfs->sched_nr == 0) /* reuse */
+ nfs->sched_nr = fs->fs.sched_nr;
+ for (j = 0; j < sizeof(nfs->par)/sizeof(nfs->par[0]); j++) {
+ if (nfs->par[j] == -1) /* reuse */
+ nfs->par[j] = fs->fs.par[j];
+ }
+ if (bcmp(&fs->fs, nfs, sizeof(*nfs)) == 0) {
+ ND("flowset %d unchanged", i);
+ break; /* no change, nothing to do */
+ }
+ if (oldc != dn_cfg.fsk_count) /* new item */
+ dn_cfg.id++;
+ s = locate_scheduler(nfs->sched_nr);
+ /* detach from old scheduler if needed, preserving
+ * queues if we need to reattach. Then update the
+ * configuration, and possibly attach to the new sched.
+ */
+ DX(2, "fs %d changed sched %d@%p to %d@%p",
+ fs->fs.fs_nr,
+ fs->fs.sched_nr, fs->sched, nfs->sched_nr, s);
+ if (fs->sched) {
+ int flags = s ? DN_DETACH : (DN_DETACH | DN_DESTROY);
+ flags |= DN_DESTROY; /* XXX temporary */
+ fsk_detach(fs, flags);
+ }
+ fs->fs = *nfs; /* copy configuration */
+ if (s != NULL)
+ fsk_attach(fs, s);
+ } while (0);
+ if (!locked)
+ DN_BH_WUNLOCK();
+ return fs;
+}
- if (fs == NULL) { /* new */
- if (pfs->parent_nr == 0) { /* need link to a pipe */
- DUMMYNET_UNLOCK();
- return (EINVAL);
- }
- fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
- M_NOWAIT | M_ZERO);
- if (fs == NULL) {
- DUMMYNET_UNLOCK();
- printf(
- "dummynet: no memory for new flow_set\n");
- return (ENOMEM);
- }
- fs->fs_nr = pfs->fs_nr;
- fs->parent_nr = pfs->parent_nr;
- fs->weight = pfs->weight;
- if (fs->weight == 0)
- fs->weight = 1;
- else if (fs->weight > 100)
- fs->weight = 100;
+/*
+ * config/reconfig a scheduler and its FIFO variant.
+ * For !MULTIQUEUE schedulers, also set up the flowset.
+ *
+ * On reconfigurations (detected because s->fp is set),
+ * detach existing flowsets preserving traffic, preserve link,
+ * and delete the old scheduler creating a new one.
+ */
+static int
+config_sched(struct dn_sch *_nsch, struct dn_id *arg)
+{
+ struct dn_schk *s;
+ struct schk_new_arg a; /* argument for schk_new */
+ int i;
+ struct dn_link p; /* copy of oldlink */
+ struct dn_profile *pf = NULL; /* copy of old link profile */
+ /* Used to preserv mask parameter */
+ struct ipfw_flow_id new_mask;
+ int new_buckets = 0;
+ int new_flags = 0;
+ int pipe_cmd;
+ int err = ENOMEM;
+
+ a.sch = _nsch;
+ if (a.sch->oid.len != sizeof(*a.sch)) {
+ D("bad sched len %d", a.sch->oid.len);
+ return EINVAL;
+ }
+ i = a.sch->sched_nr;
+ if (i <= 0 || i >= DN_MAX_ID)
+ return EINVAL;
+ /* make sure we have some buckets */
+ if (a.sch->flags & DN_HAVE_MASK)
+ ipdn_bound_var(&a.sch->buckets, dn_cfg.hash_size,
+ 1, dn_cfg.max_hash_size, "sched buckets");
+ /* XXX other sanity checks */
+ bzero(&p, sizeof(p));
+
+ pipe_cmd = a.sch->flags & DN_PIPE_CMD;
+ a.sch->flags &= ~DN_PIPE_CMD; //XXX do it even if is not set?
+ if (pipe_cmd) {
+ /* Copy mask parameter */
+ new_mask = a.sch->sched_mask;
+ new_buckets = a.sch->buckets;
+ new_flags = a.sch->flags;
+ }
+ DN_BH_WLOCK();
+again: /* run twice, for wfq and fifo */
+ /*
+ * lookup the type. If not supplied, use the previous one
+ * or default to WF2Q+. Otherwise, return an error.
+ */
+ dn_cfg.id++;
+ a.fp = find_sched_type(a.sch->oid.subtype, a.sch->name);
+ if (a.fp != NULL) {
+ /* found. Lookup or create entry */
+ s = dn_ht_find(dn_cfg.schedhash, i, DNHT_INSERT, &a);
+ } else if (a.sch->oid.subtype == 0 && !a.sch->name[0]) {
+ /* No type. search existing s* or retry with WF2Q+ */
+ s = dn_ht_find(dn_cfg.schedhash, i, 0, &a);
+ if (s != NULL) {
+ a.fp = s->fp;
+ /* Scheduler exists, skip to FIFO scheduler
+ * if command was pipe config...
+ */
+ if (pipe_cmd)
+ goto next;
} else {
- /*
- * Change parent pipe not allowed;
- * must delete and recreate.
+ /* New scheduler, create a wf2q+ with no mask
+ * if command was pipe config...
*/
- if (pfs->parent_nr != 0 &&
- fs->parent_nr != pfs->parent_nr) {
- DUMMYNET_UNLOCK();
- return (EINVAL);
+ if (pipe_cmd) {
+ /* clear mask parameter */
+ bzero(&a.sch->sched_mask, sizeof(new_mask));
+ a.sch->buckets = 0;
+ a.sch->flags &= ~DN_HAVE_MASK;
}
+ a.sch->oid.subtype = DN_SCHED_WF2QP;
+ goto again;
}
-
- set_fs_parms(fs, pfs);
-
- if (fs->rq == NULL) { /* a new flow_set */
- error = alloc_hash(fs, pfs);
- if (error) {
- DUMMYNET_UNLOCK();
- free(fs, M_DUMMYNET);
- return (error);
+ } else {
+ D("invalid scheduler type %d %s",
+ a.sch->oid.subtype, a.sch->name);
+ err = EINVAL;
+ goto error;
+ }
+ /* normalize name and subtype */
+ a.sch->oid.subtype = a.fp->type;
+ bzero(a.sch->name, sizeof(a.sch->name));
+ strlcpy(a.sch->name, a.fp->name, sizeof(a.sch->name));
+ if (s == NULL) {
+ D("cannot allocate scheduler %d", i);
+ goto error;
+ }
+ /* restore existing link if any */
+ if (p.link_nr) {
+ s->link = p;
+ if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
+ s->profile = NULL; /* XXX maybe not needed */
+ } else {
+ s->profile = malloc(sizeof(struct dn_profile),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (s->profile == NULL) {
+ D("cannot allocate profile");
+ goto error; //XXX
+ }
+ bcopy(pf, s->profile, sizeof(*pf));
+ }
+ }
+ p.link_nr = 0;
+ if (s->fp == NULL) {
+ DX(2, "sched %d new type %s", i, a.fp->name);
+ } else if (s->fp != a.fp ||
+ bcmp(a.sch, &s->sch, sizeof(*a.sch)) ) {
+ /* already existing. */
+ DX(2, "sched %d type changed from %s to %s",
+ i, s->fp->name, a.fp->name);
+ DX(4, " type/sub %d/%d -> %d/%d",
+ s->sch.oid.type, s->sch.oid.subtype,
+ a.sch->oid.type, a.sch->oid.subtype);
+ if (s->link.link_nr == 0)
+ D("XXX WARNING link 0 for sched %d", i);
+ p = s->link; /* preserve link */
+ if (s->profile) {/* preserve profile */
+ if (!pf)
+ pf = malloc(sizeof(*pf),
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (pf) /* XXX should issue a warning otherwise */
+ bcopy(s->profile, pf, sizeof(*pf));
+ }
+ /* remove from the hash */
+ dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+ /* Detach flowsets, preserve queues. */
+ // schk_delete_cb(s, NULL);
+ // XXX temporarily, kill queues
+ schk_delete_cb(s, (void *)DN_DESTROY);
+ goto again;
+ } else {
+ DX(4, "sched %d unchanged type %s", i, a.fp->name);
+ }
+ /* complete initialization */
+ s->sch = *a.sch;
+ s->fp = a.fp;
+ s->cfg = arg;
+ // XXX schk_reset_credit(s);
+ /* create the internal flowset if needed,
+ * trying to reuse existing ones if available
+ */
+ if (!(s->fp->flags & DN_MULTIQUEUE) && !s->fs) {
+ s->fs = dn_ht_find(dn_cfg.fshash, i, 0, NULL);
+ if (!s->fs) {
+ struct dn_fs fs;
+ bzero(&fs, sizeof(fs));
+ set_oid(&fs.oid, DN_FS, sizeof(fs));
+ fs.fs_nr = i + DN_MAX_ID;
+ fs.sched_nr = i;
+ s->fs = config_fs(&fs, NULL, 1 /* locked */);
+ }
+ if (!s->fs) {
+ schk_delete_cb(s, (void *)DN_DESTROY);
+ D("error creating internal fs for %d", i);
+ goto error;
+ }
+ }
+ /* call init function after the flowset is created */
+ if (s->fp->config)
+ s->fp->config(s);
+ update_fs(s);
+next:
+ if (i < DN_MAX_ID) { /* now configure the FIFO instance */
+ i += DN_MAX_ID;
+ if (pipe_cmd) {
+ /* Restore mask parameter for FIFO */
+ a.sch->sched_mask = new_mask;
+ a.sch->buckets = new_buckets;
+ a.sch->flags = new_flags;
+ } else {
+ /* sched config shouldn't modify the FIFO scheduler */
+ if (dn_ht_find(dn_cfg.schedhash, i, 0, &a) != NULL) {
+ /* FIFO already exist, don't touch it */
+ err = 0; /* and this is not an error */
+ goto error;
}
- SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)],
- fs, next);
}
- DUMMYNET_UNLOCK();
+ a.sch->sched_nr = i;
+ a.sch->oid.subtype = DN_SCHED_FIFO;
+ bzero(a.sch->name, sizeof(a.sch->name));
+ goto again;
}
- return (0);
+ err = 0;
+error:
+ DN_BH_WUNLOCK();
+ if (pf)
+ free(pf, M_DUMMYNET);
+ return err;
}
/*
- * Helper function to remove from a heap queues which are linked to
- * a flow_set about to be deleted.
+ * attach a profile to a link
*/
-static void
-fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs)
+static int
+config_profile(struct dn_profile *pf, struct dn_id *arg)
{
- int i, found;
+ struct dn_schk *s;
+ int i, olen, err = 0;
- for (i = found = 0 ; i < h->elements ;) {
- if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) {
- h->elements-- ;
- h->p[i] = h->p[h->elements] ;
- found++ ;
- } else
- i++ ;
- }
- if (found)
- heapify(h);
+ if (pf->oid.len < sizeof(*pf)) {
+ D("short profile len %d", pf->oid.len);
+ return EINVAL;
+ }
+ i = pf->link_nr;
+ if (i <= 0 || i >= DN_MAX_ID)
+ return EINVAL;
+ /* XXX other sanity checks */
+ DN_BH_WLOCK();
+ for (; i < 2*DN_MAX_ID; i += DN_MAX_ID) {
+ s = locate_scheduler(i);
+
+ if (s == NULL) {
+ err = EINVAL;
+ break;
+ }
+ dn_cfg.id++;
+ /*
+ * If we had a profile and the new one does not fit,
+ * or it is deleted, then we need to free memory.
+ */
+ if (s->profile && (pf->samples_no == 0 ||
+ s->profile->oid.len < pf->oid.len)) {
+ free(s->profile, M_DUMMYNET);
+ s->profile = NULL;
+ }
+ if (pf->samples_no == 0)
+ continue;
+ /*
+ * new profile, possibly allocate memory
+ * and copy data.
+ */
+ if (s->profile == NULL)
+ s->profile = malloc(pf->oid.len,
+ M_DUMMYNET, M_NOWAIT | M_ZERO);
+ if (s->profile == NULL) {
+ D("no memory for profile %d", i);
+ err = ENOMEM;
+ break;
+ }
+ /* preserve larger length XXX double check */
+ olen = s->profile->oid.len;
+ if (olen < pf->oid.len)
+ olen = pf->oid.len;
+ bcopy(pf, s->profile, pf->oid.len);
+ s->profile->oid.len = olen;
+ }
+ DN_BH_WUNLOCK();
+ return err;
}
/*
- * helper function to remove a pipe from a heap (can be there at most once)
+ * Delete all objects:
*/
static void
-pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p)
+dummynet_flush(void)
{
- int i;
- for (i=0; i < h->elements ; i++ ) {
- if (h->p[i].object == p) { /* found it */
- h->elements-- ;
- h->p[i] = h->p[h->elements] ;
- heapify(h);
- break ;
- }
- }
+ /* delete all schedulers and related links/queues/flowsets */
+ dn_ht_scan(dn_cfg.schedhash, schk_delete_cb,
+ (void *)(uintptr_t)DN_DELETE_FS);
+ /* delete all remaining (unlinked) flowsets */
+ DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
+ dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
+ fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
+ /* Reinitialize system heap... */
+ heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
}
/*
- * drain all queues. Called in case of severe mbuf shortage.
+ * Main handler for configuration. We are guaranteed to be called
+ * with an oid which is at least a dn_id.
+ * - the first object is the command (config, delete, flush, ...)
+ * - config_link must be issued after the corresponding config_sched
+ * - parameters (DN_TXT) for an object must preceed the object
+ * processed on a config_sched.
*/
-void
-dummynet_drain(void)
+int
+do_config(void *p, int l)
{
- struct dn_flow_set *fs;
- struct dn_pipe *pipe;
- int i;
+ struct dn_id *next, *o;
+ int err = 0, err2 = 0;
+ struct dn_id *arg = NULL;
+ uintptr_t *a;
+
+ o = p;
+ if (o->id != DN_API_VERSION) {
+ D("invalid api version got %d need %d",
+ o->id, DN_API_VERSION);
+ return EINVAL;
+ }
+ for (; l >= sizeof(*o); o = next) {
+ struct dn_id *prev = arg;
+ if (o->len < sizeof(*o) || l < o->len) {
+ D("bad len o->len %d len %d", o->len, l);
+ err = EINVAL;
+ break;
+ }
+ l -= o->len;
+ next = (struct dn_id *)((char *)o + o->len);
+ err = 0;
+ switch (o->type) {
+ default:
+ D("cmd %d not implemented", o->type);
+ break;
+#ifdef EMULATE_SYSCTL
+ /* sysctl emulation.
+ * if we recognize the command, jump to the correct
+ * handler and return
+ */
+ case DN_SYSCTL_SET:
+ err = kesysctl_emu_set(p,l);
+ return err;
+#endif
+ case DN_CMD_CONFIG: /* simply a header */
+ break;
- DUMMYNET_LOCK_ASSERT();
+ case DN_CMD_DELETE:
+ /* the argument is in the first uintptr_t after o */
+ a = (uintptr_t *)(o+1);
+ if (o->len < sizeof(*o) + sizeof(*a)) {
+ err = EINVAL;
+ break;
+ }
+ switch (o->subtype) {
+ case DN_LINK:
+ /* delete base and derived schedulers */
+ DN_BH_WLOCK();
+ err = delete_schk(*a);
+ err2 = delete_schk(*a + DN_MAX_ID);
+ DN_BH_WUNLOCK();
+ if (!err)
+ err = err2;
+ break;
+
+ default:
+ D("invalid delete type %d",
+ o->subtype);
+ err = EINVAL;
+ break;
- heap_free(&ready_heap);
- heap_free(&wfq_ready_heap);
- heap_free(&extract_heap);
- /* remove all references to this pipe from flow_sets */
- for (i = 0; i < HASHSIZE; i++)
- SLIST_FOREACH(fs, &flowsethash[i], next)
- purge_flow_set(fs, 0);
+ case DN_FS:
+ err = (*a <1 || *a >= DN_MAX_ID) ?
+ EINVAL : delete_fs(*a, 0) ;
+ break;
+ }
+ break;
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_FOREACH(pipe, &pipehash[i], next) {
- purge_flow_set(&(pipe->fs), 0);
- dn_free_pkts(pipe->head);
- pipe->head = pipe->tail = NULL;
+ case DN_CMD_FLUSH:
+ DN_BH_WLOCK();
+ dummynet_flush();
+ DN_BH_WUNLOCK();
+ break;
+ case DN_TEXT: /* store argument the next block */
+ prev = NULL;
+ arg = o;
+ break;
+ case DN_LINK:
+ err = config_link((struct dn_link *)o, arg);
+ break;
+ case DN_PROFILE:
+ err = config_profile((struct dn_profile *)o, arg);
+ break;
+ case DN_SCH:
+ err = config_sched((struct dn_sch *)o, arg);
+ break;
+ case DN_FS:
+ err = (NULL==config_fs((struct dn_fs *)o, arg, 0));
+ break;
+ }
+ if (prev)
+ arg = NULL;
+ if (err != 0)
+ break;
}
- }
+ return err;
}
-/*
- * Fully delete a pipe or a queue, cleaning up associated info.
- */
static int
-delete_pipe(struct dn_pipe *p)
+compute_space(struct dn_id *cmd, struct copy_args *a)
{
+ int x = 0, need = 0;
+ int profile_size = sizeof(struct dn_profile) -
+ ED_MAX_SAMPLES_NO*sizeof(int);
+
+ /* NOTE about compute space:
+ * NP = dn_cfg.schk_count
+ * NSI = dn_cfg.si_count
+ * NF = dn_cfg.fsk_count
+ * NQ = dn_cfg.queue_count
+ * - ipfw pipe show
+ * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
+ * link, scheduler template, flowset
+ * integrated in scheduler and header
+ * for flowset list
+ * (NSI)*(dn_flow) all scheduler instance (includes
+ * the queue instance)
+ * - ipfw sched show
+ * (NP/2)*(dn_link + dn_sch + dn_id + dn_fs) only half scheduler
+ * link, scheduler template, flowset
+ * integrated in scheduler and header
+ * for flowset list
+ * (NSI * dn_flow) all scheduler instances
+ * (NF * sizeof(uint_32)) space for flowset list linked to scheduler
+ * (NQ * dn_queue) all queue [XXXfor now not listed]
+ * - ipfw queue show
+ * (NF * dn_fs) all flowset
+ * (NQ * dn_queue) all queues
+ */
+ switch (cmd->subtype) {
+ default:
+ return -1;
+ /* XXX where do LINK and SCH differ ? */
+ /* 'ipfw sched show' could list all queues associated to
+ * a scheduler. This feature for now is disabled
+ */
+ case DN_LINK: /* pipe show */
+ x = DN_C_LINK | DN_C_SCH | DN_C_FLOW;
+ need += dn_cfg.schk_count *
+ (sizeof(struct dn_fs) + profile_size) / 2;
+ need += dn_cfg.fsk_count * sizeof(uint32_t);
+ break;
+ case DN_SCH: /* sched show */
+ need += dn_cfg.schk_count *
+ (sizeof(struct dn_fs) + profile_size) / 2;
+ need += dn_cfg.fsk_count * sizeof(uint32_t);
+ x = DN_C_SCH | DN_C_LINK | DN_C_FLOW;
+ break;
+ case DN_FS: /* queue show */
+ x = DN_C_FS | DN_C_QUEUE;
+ break;
+ case DN_GET_COMPAT: /* compatibility mode */
+ need = dn_compat_calc_size();
+ break;
+ }
+ a->flags = x;
+ if (x & DN_C_SCH) {
+ need += dn_cfg.schk_count * sizeof(struct dn_sch) / 2;
+ /* NOT also, each fs might be attached to a sched */
+ need += dn_cfg.schk_count * sizeof(struct dn_id) / 2;
+ }
+ if (x & DN_C_FS)
+ need += dn_cfg.fsk_count * sizeof(struct dn_fs);
+ if (x & DN_C_LINK) {
+ need += dn_cfg.schk_count * sizeof(struct dn_link) / 2;
+ }
+ /*
+ * When exporting a queue to userland, only pass up the
+ * struct dn_flow, which is the only visible part.
+ */
- if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
- return EINVAL ;
- if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
- return EINVAL ;
- if (p->pipe_nr != 0) { /* this is an old-style pipe */
- struct dn_pipe *pipe;
- struct dn_flow_set *fs;
- int i;
-
- DUMMYNET_LOCK();
- pipe = locate_pipe(p->pipe_nr); /* locate pipe */
+ if (x & DN_C_QUEUE)
+ need += dn_cfg.queue_count * sizeof(struct dn_flow);
+ if (x & DN_C_FLOW)
+ need += dn_cfg.si_count * (sizeof(struct dn_flow));
+ return need;
+}
- if (pipe == NULL) {
- DUMMYNET_UNLOCK();
- return (ENOENT); /* not found */
+/*
+ * If compat != NULL dummynet_get is called in compatibility mode.
+ * *compat will be the pointer to the buffer to pass to ipfw
+ */
+int
+dummynet_get(struct sockopt *sopt, void **compat)
+{
+ int have, i, need, error;
+ char *start = NULL, *buf;
+ size_t sopt_valsize;
+ struct dn_id *cmd;
+ struct copy_args a;
+ struct copy_range r;
+ int l = sizeof(struct dn_id);
+
+ bzero(&a, sizeof(a));
+ bzero(&r, sizeof(r));
+
+ /* save and restore original sopt_valsize around copyin */
+ sopt_valsize = sopt->sopt_valsize;
+
+ cmd = &r.o;
+
+ if (!compat) {
+ /* copy at least an oid, and possibly a full object */
+ error = sooptcopyin(sopt, cmd, sizeof(r), sizeof(*cmd));
+ sopt->sopt_valsize = sopt_valsize;
+ if (error)
+ goto done;
+ l = cmd->len;
+#ifdef EMULATE_SYSCTL
+ /* sysctl emulation. */
+ if (cmd->type == DN_SYSCTL_GET)
+ return kesysctl_emu_get(sopt);
+#endif
+ if (l > sizeof(r)) {
+ /* request larger than default, allocate buffer */
+ cmd = malloc(l, M_DUMMYNET, M_WAIT);
+ if (cmd == NULL)
+ return ENOMEM; //XXX
+ error = sooptcopyin(sopt, cmd, l, l);
+ sopt->sopt_valsize = sopt_valsize;
+ if (error)
+ goto done;
+ }
+ } else { /* compatibility */
+ error = 0;
+ cmd->type = DN_CMD_GET;
+ cmd->len = sizeof(struct dn_id);
+ cmd->subtype = DN_GET_COMPAT;
+ // cmd->id = sopt_valsize;
+ D("compatibility mode");
+ }
+ a.extra = (struct copy_range *)cmd;
+ if (cmd->len == sizeof(*cmd)) { /* no range, create a default */
+ uint32_t *rp = (uint32_t *)(cmd + 1);
+ cmd->len += 2* sizeof(uint32_t);
+ rp[0] = 1;
+ rp[1] = DN_MAX_ID - 1;
+ if (cmd->subtype == DN_LINK) {
+ rp[0] += DN_MAX_ID;
+ rp[1] += DN_MAX_ID;
+ }
}
+ /* Count space (under lock) and allocate (outside lock).
+ * Exit with lock held if we manage to get enough buffer.
+ * Try a few times then give up.
+ */
+ for (have = 0, i = 0; i < 10; i++) {
+ DN_BH_WLOCK();
+ need = compute_space(cmd, &a);
+
+ /* if there is a range, ignore value from compute_space() */
+ if (l > sizeof(*cmd))
+ need = sopt_valsize - sizeof(*cmd);
+
+ if (need < 0) {
+ DN_BH_WUNLOCK();
+ error = EINVAL;
+ goto done;
+ }
+ need += sizeof(*cmd);
+ cmd->id = need;
+ if (have >= need)
+ break;
- /* Unlink from list of pipes. */
- SLIST_REMOVE(&pipehash[HASH(pipe->pipe_nr)], pipe, dn_pipe, next);
+ DN_BH_WUNLOCK();
+ if (start)
+ free(start, M_DUMMYNET);
+ start = NULL;
+ if (need > sopt_valsize)
+ break;
- /* Remove all references to this pipe from flow_sets. */
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_FOREACH(fs, &flowsethash[i], next) {
- if (fs->pipe == pipe) {
- printf("dummynet: ++ ref to pipe %d from fs %d\n",
- p->pipe_nr, fs->fs_nr);
- fs->pipe = NULL ;
- purge_flow_set(fs, 0);
+ have = need;
+ start = malloc(have, M_DUMMYNET, M_WAITOK | M_ZERO);
+ if (start == NULL) {
+ error = ENOMEM;
+ goto done;
}
- }
}
- fs_remove_from_heap(&ready_heap, &(pipe->fs));
- purge_pipe(pipe); /* remove all data associated to this pipe */
- /* remove reference to here from extract_heap and wfq_ready_heap */
- pipe_remove_from_heap(&extract_heap, pipe);
- pipe_remove_from_heap(&wfq_ready_heap, pipe);
- DUMMYNET_UNLOCK();
- free_pipe(pipe);
- } else { /* this is a WF2Q queue (dn_flow_set) */
- struct dn_flow_set *fs;
-
- DUMMYNET_LOCK();
- fs = locate_flowset(p->fs.fs_nr); /* locate set */
+ if (start == NULL) {
+ if (compat) {
+ *compat = NULL;
+ error = 1; // XXX
+ } else {
+ error = sooptcopyout(sopt, cmd, sizeof(*cmd));
+ }
+ goto done;
+ }
+ ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
+ "%d:%d si %d, %d:%d queues %d",
+ dn_cfg.schk_count, sizeof(struct dn_sch), DN_SCH,
+ dn_cfg.schk_count, sizeof(struct dn_link), DN_LINK,
+ dn_cfg.fsk_count, sizeof(struct dn_fs), DN_FS,
+ dn_cfg.si_count, sizeof(struct dn_flow), DN_SCH_I,
+ dn_cfg.queue_count, sizeof(struct dn_queue), DN_QUEUE);
+ sopt->sopt_valsize = sopt_valsize;
+ a.type = cmd->subtype;
+
+ if (compat == NULL) {
+ bcopy(cmd, start, sizeof(*cmd));
+ ((struct dn_id*)(start))->len = sizeof(struct dn_id);
+ buf = start + sizeof(*cmd);
+ } else
+ buf = start;
+ a.start = &buf;
+ a.end = start + have;
+ /* start copying other objects */
+ if (compat) {
+ a.type = DN_COMPAT_PIPE;
+ dn_ht_scan(dn_cfg.schedhash, copy_data_helper_compat, &a);
+ a.type = DN_COMPAT_QUEUE;
+ dn_ht_scan(dn_cfg.fshash, copy_data_helper_compat, &a);
+ } else if (a.type == DN_FS) {
+ dn_ht_scan(dn_cfg.fshash, copy_data_helper, &a);
+ } else {
+ dn_ht_scan(dn_cfg.schedhash, copy_data_helper, &a);
+ }
+ DN_BH_WUNLOCK();
- if (fs == NULL) {
- DUMMYNET_UNLOCK();
- return (ENOENT); /* not found */
+ if (compat) {
+ *compat = start;
+ sopt->sopt_valsize = buf - start;
+ /* free() is done by ip_dummynet_compat() */
+ start = NULL; //XXX hack
+ } else {
+ error = sooptcopyout(sopt, start, buf - start);
}
+done:
+ if (cmd && cmd != &r.o)
+ free(cmd, M_DUMMYNET);
+ if (start)
+ free(start, M_DUMMYNET);
+ return error;
+}
- /* Unlink from list of flowsets. */
- SLIST_REMOVE( &flowsethash[HASH(fs->fs_nr)], fs, dn_flow_set, next);
+/* Callback called on scheduler instance to delete it if idle */
+static int
+drain_scheduler_cb(void *_si, void *arg)
+{
+ struct dn_sch_inst *si = _si;
- if (fs->pipe != NULL) {
- /* Update total weight on parent pipe and cleanup parent heaps. */
- fs->pipe->sum -= fs->weight * fs->backlogged ;
- fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs);
- fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs);
-#if 1 /* XXX should i remove from idle_heap as well ? */
- fs_remove_from_heap(&(fs->pipe->idle_heap), fs);
-#endif
+ if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
+ return 0;
+
+ if (si->sched->fp->flags & DN_MULTIQUEUE) {
+ if (si->q_count == 0)
+ return si_destroy(si, NULL);
+ else
+ return 0;
+ } else { /* !DN_MULTIQUEUE */
+ if ((si+1)->ni.length == 0)
+ return si_destroy(si, NULL);
+ else
+ return 0;
}
- purge_flow_set(fs, 1);
- DUMMYNET_UNLOCK();
- }
- return 0 ;
+ return 0; /* unreachable */
}
-/*
- * helper function used to copy data from kernel in DUMMYNET_GET
- */
-static char *
-dn_copy_set(struct dn_flow_set *set, char *bp)
-{
- int i, copied = 0 ;
- struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp;
-
- DUMMYNET_LOCK_ASSERT();
-
- for (i = 0 ; i <= set->rq_size ; i++) {
- for (q = set->rq[i] ; q ; q = q->next, qp++ ) {
- if (q->hash_slot != i)
- printf("dummynet: ++ at %d: wrong slot (have %d, "
- "should be %d)\n", copied, q->hash_slot, i);
- if (q->fs != set)
- printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n",
- i, q->fs, set);
- copied++ ;
- bcopy(q, qp, sizeof( *q ) );
- /* cleanup pointers */
- qp->next = NULL ;
- qp->head = qp->tail = NULL ;
- qp->fs = NULL ;
+/* Callback called on scheduler to check if it has instances */
+static int
+drain_scheduler_sch_cb(void *_s, void *arg)
+{
+ struct dn_schk *s = _s;
+
+ if (s->sch.flags & DN_HAVE_MASK) {
+ dn_ht_scan_bucket(s->siht, &s->drain_bucket,
+ drain_scheduler_cb, NULL);
+ s->drain_bucket++;
+ } else {
+ if (s->siht) {
+ if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
+ s->siht = NULL;
+ }
}
- }
- if (copied != set->rq_elements)
- printf("dummynet: ++ wrong count, have %d should be %d\n",
- copied, set->rq_elements);
- return (char *)qp ;
-}
-
-static size_t
-dn_calc_size(void)
-{
- struct dn_flow_set *fs;
- struct dn_pipe *pipe;
- size_t size = 0;
- int i;
-
- DUMMYNET_LOCK_ASSERT();
- /*
- * Compute size of data structures: list of pipes and flow_sets.
- */
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_FOREACH(pipe, &pipehash[i], next)
- size += sizeof(*pipe) +
- pipe->fs.rq_elements * sizeof(struct dn_flow_queue);
- SLIST_FOREACH(fs, &flowsethash[i], next)
- size += sizeof (*fs) +
- fs->rq_elements * sizeof(struct dn_flow_queue);
- }
- return size;
+ return 0;
}
-static int
-dummynet_get(struct sockopt *sopt)
-{
- char *buf, *bp ; /* bp is the "copy-pointer" */
- size_t size ;
- struct dn_flow_set *fs;
- struct dn_pipe *pipe;
- int error=0, i ;
-
- /* XXX lock held too long */
- DUMMYNET_LOCK();
- /*
- * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we
- * cannot use this flag while holding a mutex.
- */
- for (i = 0; i < 10; i++) {
- size = dn_calc_size();
- DUMMYNET_UNLOCK();
- buf = malloc(size, M_TEMP, M_WAITOK);
- DUMMYNET_LOCK();
- if (size >= dn_calc_size())
- break;
- free(buf, M_TEMP);
- buf = NULL;
- }
- if (buf == NULL) {
- DUMMYNET_UNLOCK();
- return ENOBUFS ;
- }
- bp = buf;
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_FOREACH(pipe, &pipehash[i], next) {
- struct dn_pipe *pipe_bp = (struct dn_pipe *)bp;
+/* Called every tick, try to delete a 'bucket' of scheduler */
+void
+dn_drain_scheduler(void)
+{
+ dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
+ drain_scheduler_sch_cb, NULL);
+ dn_cfg.drain_sch++;
+}
- /*
- * Copy pipe descriptor into *bp, convert delay back to ms,
- * then copy the flow_set descriptor(s) one at a time.
- * After each flow_set, copy the queue descriptor it owns.
- */
- bcopy(pipe, bp, sizeof(*pipe));
- pipe_bp->delay = (pipe_bp->delay * 1000) / hz;
- pipe_bp->burst = div64(pipe_bp->burst, 8 * hz);
- /*
- * XXX the following is a hack based on ->next being the
- * first field in dn_pipe and dn_flow_set. The correct
- * solution would be to move the dn_flow_set to the beginning
- * of struct dn_pipe.
- */
- pipe_bp->next.sle_next = (struct dn_pipe *)DN_IS_PIPE;
- /* Clean pointers. */
- pipe_bp->head = pipe_bp->tail = NULL;
- pipe_bp->fs.next.sle_next = NULL;
- pipe_bp->fs.pipe = NULL;
- pipe_bp->fs.rq = NULL;
- pipe_bp->samples = NULL;
+/* Callback called on queue to delete if it is idle */
+static int
+drain_queue_cb(void *_q, void *arg)
+{
+ struct dn_queue *q = _q;
- bp += sizeof(*pipe) ;
- bp = dn_copy_set(&(pipe->fs), bp);
+ if (q->ni.length == 0) {
+ dn_delete_queue(q, DN_DESTROY);
+ return DNHT_SCAN_DEL; /* queue is deleted */
}
- }
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_FOREACH(fs, &flowsethash[i], next) {
- struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp;
+ return 0; /* queue isn't deleted */
+}
- bcopy(fs, bp, sizeof(*fs));
- /* XXX same hack as above */
- fs_bp->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
- fs_bp->pipe = NULL;
- fs_bp->rq = NULL;
- bp += sizeof(*fs);
- bp = dn_copy_set(fs, bp);
- }
- }
+/* Callback called on flowset used to check if it has queues */
+static int
+drain_queue_fs_cb(void *_fs, void *arg)
+{
+ struct dn_fsk *fs = _fs;
- DUMMYNET_UNLOCK();
+ if (fs->fs.flags & DN_QHT_HASH) {
+ /* Flowset has a hash table for queues */
+ dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
+ drain_queue_cb, NULL);
+ fs->drain_bucket++;
+ } else {
+ /* No hash table for this flowset, null the pointer
+ * if the queue is deleted
+ */
+ if (fs->qht) {
+ if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
+ fs->qht = NULL;
+ }
+ }
+ return 0;
+}
- error = sooptcopyout(sopt, buf, size);
- free(buf, M_TEMP);
- return error ;
+/* Called every tick, try to delete a 'bucket' of queue */
+void
+dn_drain_queue(void)
+{
+ /* scan a bucket of flowset */
+ dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
+ drain_queue_fs_cb, NULL);
+ dn_cfg.drain_fs++;
}
/*
- * Handler for the various dummynet socket options (get, flush, config, del)
+ * Handler for the various dummynet socket options
*/
static int
ip_dn_ctl(struct sockopt *sopt)
{
- int error;
- struct dn_pipe *p = NULL;
+ void *p = NULL;
+ int error, l;
- error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
- if (error)
- return (error);
-
- /* Disallow sets in really-really secure mode. */
- if (sopt->sopt_dir == SOPT_SET) {
-#if __FreeBSD_version >= 500034
- error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+ error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET);
if (error)
- return (error);
-#else
- if (securelevel >= 3)
- return (EPERM);
-#endif
- }
+ return (error);
- switch (sopt->sopt_name) {
- default :
- printf("dummynet: -- unknown option %d", sopt->sopt_name);
- error = EINVAL ;
- break;
-
- case IP_DUMMYNET_GET :
- error = dummynet_get(sopt);
- break ;
-
- case IP_DUMMYNET_FLUSH :
- dummynet_flush() ;
- break ;
-
- case IP_DUMMYNET_CONFIGURE :
- p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, p, sizeof(struct dn_pipe_max), sizeof *p);
- if (error)
- break ;
- if (p->samples_no > 0)
- p->samples = &(((struct dn_pipe_max *)p)->samples[0]);
+ /* Disallow sets in really-really secure mode. */
+ if (sopt->sopt_dir == SOPT_SET) {
+ error = securelevel_ge(sopt->sopt_td->td_ucred, 3);
+ if (error)
+ return (error);
+ }
- error = config_pipe(p);
- break ;
+ switch (sopt->sopt_name) {
+ default :
+ D("dummynet: unknown option %d", sopt->sopt_name);
+ error = EINVAL;
+ break;
- case IP_DUMMYNET_DEL : /* remove a pipe or queue */
- p = malloc(sizeof(struct dn_pipe), M_TEMP, M_WAITOK);
- error = sooptcopyin(sopt, p, sizeof(struct dn_pipe), sizeof *p);
- if (error)
- break ;
+ case IP_DUMMYNET_FLUSH:
+ case IP_DUMMYNET_CONFIGURE:
+ case IP_DUMMYNET_DEL: /* remove a pipe or queue */
+ case IP_DUMMYNET_GET:
+ D("dummynet: compat option %d", sopt->sopt_name);
+ error = ip_dummynet_compat(sopt);
+ break;
- error = delete_pipe(p);
- break ;
- }
+ case IP_DUMMYNET3 :
+ if (sopt->sopt_dir == SOPT_GET) {
+ error = dummynet_get(sopt, NULL);
+ break;
+ }
+ l = sopt->sopt_valsize;
+ if (l < sizeof(struct dn_id) || l > 12000) {
+ D("argument len %d invalid", l);
+ break;
+ }
+ p = malloc(l, M_TEMP, M_WAITOK); // XXX can it fail ?
+ error = sooptcopyin(sopt, p, l, l);
+ if (error)
+ break ;
+ error = do_config(p, l);
+ break;
+ }
- if (p != NULL)
- free(p, M_TEMP);
+ if (p != NULL)
+ free(p, M_TEMP);
- return error ;
+ return error ;
}
+
static void
ip_dn_init(void)
{
- int i;
+ static int init_done = 0;
+ if (init_done)
+ return;
+ init_done = 1;
if (bootverbose)
- printf("DUMMYNET with IPv6 initialized (040826)\n");
-
- DUMMYNET_LOCK_INIT();
-
- for (i = 0; i < HASHSIZE; i++) {
- SLIST_INIT(&pipehash[i]);
- SLIST_INIT(&flowsethash[i]);
- }
- ready_heap.size = ready_heap.elements = 0;
- ready_heap.offset = 0;
-
- wfq_ready_heap.size = wfq_ready_heap.elements = 0;
- wfq_ready_heap.offset = 0;
-
- extract_heap.size = extract_heap.elements = 0;
- extract_heap.offset = 0;
+ printf("DUMMYNET with IPv6 initialized (100131)\n");
+ /* Set defaults here. MSVC does not accept initializers,
+ * and this is also useful for vimages
+ */
+ /* queue limits */
+ dn_cfg.slot_limit = 100; /* Foot shooting limit for queues. */
+ dn_cfg.byte_limit = 1024 * 1024;
+ dn_cfg.expire = 1;
+
+ /* RED parameters */
+ dn_cfg.red_lookup_depth = 256; /* default lookup table depth */
+ dn_cfg.red_avg_pkt_size = 512; /* default medium packet size */
+ dn_cfg.red_max_pkt_size = 1500; /* default max packet size */
+
+ /* hash tables */
+ dn_cfg.max_hash_size = 1024; /* max in the hash tables */
+ dn_cfg.hash_size = 64; /* default hash size */
+
+ /* create hash tables for schedulers and flowsets.
+ * In both we search by key and by pointer.
+ */
+ dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
+ offsetof(struct dn_schk, schk_next),
+ schk_hash, schk_match, schk_new);
+ dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
+ offsetof(struct dn_fsk, fsk_next),
+ fsk_hash, fsk_match, fsk_new);
+
+ /* bucket index to drain object */
+ dn_cfg.drain_fs = 0;
+ dn_cfg.drain_sch = 0;
+
+ heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
+ SLIST_INIT(&dn_cfg.fsu);
+ SLIST_INIT(&dn_cfg.schedlist);
+
+ DN_LOCK_INIT();
ip_dn_ctl_ptr = ip_dn_ctl;
ip_dn_io_ptr = dummynet_io;
callout_reset(&dn_timeout, 1, dummynet, NULL);
/* Initialize curr_time adjustment mechanics. */
- getmicrouptime(&prev_t);
+ getmicrouptime(&dn_cfg.prev_t);
}
#ifdef KLD_MODULE
static void
ip_dn_destroy(void)
{
+ callout_drain(&dn_timeout);
+
+ DN_BH_WLOCK();
ip_dn_ctl_ptr = NULL;
ip_dn_io_ptr = NULL;
- DUMMYNET_LOCK();
- callout_stop(&dn_timeout);
- DUMMYNET_UNLOCK();
+ dummynet_flush();
+ DN_BH_WUNLOCK();
taskqueue_drain(dn_tq, &dn_task);
taskqueue_free(dn_tq);
- dummynet_flush();
+ dn_ht_free(dn_cfg.schedhash, 0);
+ dn_ht_free(dn_cfg.fshash, 0);
+ heap_free(&dn_cfg.evheap);
- DUMMYNET_LOCK_DESTROY();
+ DN_LOCK_DESTROY();
}
#endif /* KLD_MODULE */
dummynet_modevent(module_t mod, int type, void *data)
{
- switch (type) {
- case MOD_LOAD:
+ if (type == MOD_LOAD) {
if (ip_dn_io_ptr) {
- printf("DUMMYNET already loaded\n");
- return EEXIST ;
+ printf("DUMMYNET already loaded\n");
+ return EEXIST ;
}
ip_dn_init();
- break;
-
- case MOD_UNLOAD:
+ return 0;
+ } else if (type == MOD_UNLOAD) {
#if !defined(KLD_MODULE)
printf("dummynet statically compiled, cannot unload\n");
return EINVAL ;
#else
ip_dn_destroy();
+ return 0;
#endif
- break ;
- default:
+ } else
return EOPNOTSUPP;
- break ;
+}
+
+/* modevent helpers for the modules */
+static int
+load_dn_sched(struct dn_alg *d)
+{
+ struct dn_alg *s;
+
+ if (d == NULL)
+ return 1; /* error */
+ ip_dn_init(); /* just in case, we need the lock */
+
+ /* Check that mandatory funcs exists */
+ if (d->enqueue == NULL || d->dequeue == NULL) {
+ D("missing enqueue or dequeue for %s", d->name);
+ return 1;
+ }
+
+ /* Search if scheduler already exists */
+ DN_BH_WLOCK();
+ SLIST_FOREACH(s, &dn_cfg.schedlist, next) {
+ if (strcmp(s->name, d->name) == 0) {
+ D("%s already loaded", d->name);
+ break; /* scheduler already exists */
+ }
+ }
+ if (s == NULL)
+ SLIST_INSERT_HEAD(&dn_cfg.schedlist, d, next);
+ DN_BH_WUNLOCK();
+ D("dn_sched %s %sloaded", d->name, s ? "not ":"");
+ return s ? 1 : 0;
+}
+
+static int
+unload_dn_sched(struct dn_alg *s)
+{
+ struct dn_alg *tmp, *r;
+ int err = EINVAL;
+
+ D("called for %s", s->name);
+
+ DN_BH_WLOCK();
+ SLIST_FOREACH_SAFE(r, &dn_cfg.schedlist, next, tmp) {
+ if (strcmp(s->name, r->name) != 0)
+ continue;
+ D("ref_count = %d", r->ref_count);
+ err = (r->ref_count != 0) ? EBUSY : 0;
+ if (err == 0)
+ SLIST_REMOVE(&dn_cfg.schedlist, r, dn_alg, next);
+ break;
}
- return 0 ;
+ DN_BH_WUNLOCK();
+ D("dn_sched %s %sunloaded", s->name, err ? "not ":"");
+ return err;
+}
+
+int
+dn_sched_modevent(module_t mod, int cmd, void *arg)
+{
+ struct dn_alg *sch = arg;
+
+ if (cmd == MOD_LOAD)
+ return load_dn_sched(sch);
+ else if (cmd == MOD_UNLOAD)
+ return unload_dn_sched(sch);
+ else
+ return EINVAL;
}
static moduledata_t dummynet_mod = {
- "dummynet",
- dummynet_modevent,
- NULL
+ "dummynet", dummynet_modevent, NULL
};
-DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
+
+DECLARE_MODULE(dummynet, dummynet_mod,
+ SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY-1);
MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
MODULE_VERSION(dummynet, 1);
/* end of file */
ipfw_nat_cfg_t *ipfw_nat_get_log_ptr;
#ifdef SYSCTL_NODE
+uint32_t dummy_def = IPFW_DEFAULT_RULE;
+uint32_t dummy_tables_max = IPFW_TABLES_MAX;
+
+SYSBEGIN(f3)
+
SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall");
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass,
CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0,
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit,
CTLFLAG_RW, &VNET_NAME(verbose_limit), 0,
"Set upper limit of matches of ipfw rules logged");
-uint32_t dummy_def = IPFW_DEFAULT_RULE;
SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD,
&dummy_def, 0,
"The default/max possible rule number.");
-uint32_t dummy_tables_max = IPFW_TABLES_MAX;
SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD,
&dummy_tables_max, 0,
"The maximum number of tables.");
"Deny packets with unknown IPv6 Extension Headers");
#endif /* INET6 */
+SYSEND
+
#endif /* SYSCTL_NODE */
return(1);
}
} else {
-#if !defined( __linux__ ) && !defined( _WIN32 )
+#ifdef __FreeBSD__ /* and OSX too ? */
struct ifaddr *ia;
if_addr_rlock(ifp);
}
}
if_addr_runlock(ifp);
-#endif
+#endif /* __FreeBSD__ */
}
return(0); /* no match, fail ... */
}
static int
verify_path(struct in_addr src, struct ifnet *ifp, u_int fib)
{
-#if defined( __linux__ ) || defined( _WIN32 )
+#ifndef __FreeBSD__
return 0;
#else
struct route ro;
/* found valid route */
RTFREE(ro.ro_rt);
return 1;
-#endif
+#endif /* __FreeBSD__ */
}
#ifdef INET6
static int
check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
- u_int16_t src_port, struct ucred **uc, int *ugid_lookupp,
- struct inpcb *inp)
+ u_int16_t src_port, int *ugid_lookupp,
+ struct ucred **uc, struct inpcb *inp)
{
-#ifdef __linux__
+#ifndef __FreeBSD__
return cred_check(insn, proto, oif,
dst_ip, dst_port, src_ip, src_port,
(struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
else if (insn->o.opcode == O_JAIL)
match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]);
return match;
-#endif
+#endif /* __FreeBSD__ */
}
/*
* these types of constraints, as well as decrease contention
* on pcb related locks.
*/
-#ifdef __linux__
+#ifndef __FreeBSD__
struct bsd_ucred ucred_cache;
#else
struct ucred *ucred_cache = NULL;
* ulp is NULL if not found.
*/
void *ulp = NULL; /* upper layer protocol pointer. */
+
/* XXX ipv6 variables */
int is_ipv6 = 0;
- u_int16_t ext_hd = 0; /* bits vector for extension header filtering */
+ uint8_t icmp6_type = 0;
+ uint16_t ext_hd = 0; /* bits vector for extension header filtering */
/* end of ipv6 variables */
+
int is_ipv4 = 0;
int done = 0; /* flag to exit the outer loop */
switch (proto) {
case IPPROTO_ICMPV6:
PULLUP_TO(hlen, ulp, struct icmp6_hdr);
- args->f_id.flags = ICMP6(ulp)->icmp6_type;
+ icmp6_type = ICMP6(ulp)->icmp6_type;
break;
case IPPROTO_TCP:
PULLUP_TO(hlen, ulp, struct tcphdr);
dst_port = TCP(ulp)->th_dport;
src_port = TCP(ulp)->th_sport;
- args->f_id.flags = TCP(ulp)->th_flags;
+ /* save flags for dynamic rules */
+ args->f_id._flags = TCP(ulp)->th_flags;
break;
case IPPROTO_SCTP:
return (IP_FW_DENY);
break;
}
- args->f_id.frag_id6 =
+ args->f_id.extra =
ntohl(((struct ip6_frag *)ulp)->ip6f_ident);
ulp = NULL;
break;
PULLUP_TO(hlen, ulp, struct tcphdr);
dst_port = TCP(ulp)->th_dport;
src_port = TCP(ulp)->th_sport;
- args->f_id.flags = TCP(ulp)->th_flags;
+ /* save flags for dynamic rules */
+ args->f_id._flags = TCP(ulp)->th_flags;
break;
case IPPROTO_UDP:
case IPPROTO_ICMP:
PULLUP_TO(hlen, ulp, struct icmphdr);
- args->f_id.flags = ICMP(ulp)->icmp_type;
+ //args->f_id.flags = ICMP(ulp)->icmp_type;
break;
default:
(ipfw_insn_u32 *)cmd,
proto, oif,
dst_ip, dst_port,
- src_ip, src_port, (void *)&ucred_cache,
- &ucred_lookup, (struct inpcb *)args->m);
+ src_ip, src_port, &ucred_lookup,
+#ifdef __FreeBSD__
+ &ucred_cache, args->inp);
+#else
+ (void *)&ucred_cache,
+ (struct inpcb *)args->m);
+#endif
break;
case O_RECV:
key = dst_ip.s_addr;
else if (v == 1)
key = src_ip.s_addr;
+ else if (v == 6) /* dscp */
+ key = (ip->ip_tos >> 2) & 0x3f;
else if (offset != 0)
break;
else if (proto != IPPROTO_TCP &&
(ipfw_insn_u32 *)cmd,
proto, oif,
dst_ip, dst_port,
- src_ip, src_port, (void *)&ucred_cache,
- &ucred_lookup, (struct inpcb *)args->m);
-#ifdef __linux__
- if (v ==4 /* O_UID */)
- key = ucred_cache.uid;
- else if (v == 5 /* O_JAIL */)
- key = ucred_cache.xid;
-#else
+ src_ip, src_port, &ucred_lookup,
+#ifdef __FreeBSD__
+ &ucred_cache, args->inp);
if (v == 4 /* O_UID */)
key = ucred_cache->cr_uid;
else if (v == 5 /* O_JAIL */)
key = ucred_cache->cr_prison->pr_id;
-#endif
+#else /* !__FreeBSD__ */
+ (void *)&ucred_cache,
+ (struct inpcb *)args->m);
+ if (v ==4 /* O_UID */)
+ key = ucred_cache.uid;
+ else if (v == 5 /* O_JAIL */)
+ key = ucred_cache.xid;
+#endif /* !__FreeBSD__ */
key = htonl(key);
} else
break;
INADDR_TO_IFP(src_ip, tif);
match = (tif != NULL);
+ break;
}
+#ifdef INET6
+ /* FALLTHROUGH */
+ case O_IP6_SRC_ME:
+ match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
+#endif
break;
case O_IP_DST_SET:
INADDR_TO_IFP(dst_ip, tif);
match = (tif != NULL);
+ break;
}
+#ifdef INET6
+ /* FALLTHROUGH */
+ case O_IP6_DST_ME:
+ match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
+#endif
break;
+
case O_IP_SRCPORT:
case O_IP_DSTPORT:
/*
}
break;
- case O_IP6_SRC_ME:
- match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6);
- break;
-
- case O_IP6_DST_ME:
- match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6);
- break;
-
case O_FLOW6ID:
match = is_ipv6 &&
flow6id_match(args->f_id.flow_id6,
if (hlen > 0 && is_ipv6 &&
((offset & IP6F_OFF_MASK) == 0) &&
(proto != IPPROTO_ICMPV6 ||
- (is_icmp6_query(args->f_id.flags) == 1)) &&
+ (is_icmp6_query(icmp6_type) == 1)) &&
!(m->m_flags & (M_BCAST|M_MCAST)) &&
!IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) {
send_reject6(
printf("ipfw: ouch!, skip past end of rules, denying packet\n");
}
IPFW_RUNLOCK(chain);
-#ifndef __linux__
+#ifdef __FreeBSD__
if (ucred_cache != NULL)
crfree(ucred_cache);
#endif
IPFW_WLOCK(chain);
ipfw_dyn_uninit(0); /* run the callout_drain */
- ipfw_flush_tables(chain);
+ ipfw_destroy_tables(chain);
reap = NULL;
for (i = 0; i < chain->n_rules; i++) {
rule = chain->map[i];
#define V_ipfw_timeout VNET(ipfw_timeout)
static uma_zone_t ipfw_dyn_rule_zone;
-#if defined( __linux__ ) || defined( _WIN32 )
+#ifndef __FreeBSD__
DEFINE_SPINLOCK(ipfw_dyn_mtx);
#else
static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */
#define V_dyn_max VNET(dyn_max)
#ifdef SYSCTL_NODE
+
+SYSBEGIN(f2)
+
SYSCTL_DECL(_net_inet_ip_fw);
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets,
CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0,
SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive,
CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0,
"Enable keepalives for dyn. rules");
+
+SYSEND
+
#endif /* SYSCTL_NODE */
V_ipfw_dyn_v[i] = q;
}
if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */
- u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST);
+ u_char flags = pkt->_flags & (TH_FIN|TH_SYN|TH_RST);
#define BOTH_SYN (TH_SYN | (TH_SYN << 8))
#define BOTH_FIN (TH_FIN | (TH_FIN << 8))
ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
u_int32_t ack, int flags)
{
-#ifdef __linux__ // XXX to be revised
+#ifndef __FreeBSD__
return NULL;
#else
struct mbuf *m;
h->ip_hl = sizeof(*h) >> 2;
h->ip_tos = IPTOS_LOWDELAY;
h->ip_off = 0;
- h->ip_len = htons(len);
+ /* ip_len must be in host format for ip_output */
+ h->ip_len = len;
h->ip_ttl = V_ip_defttl;
h->ip_sum = 0;
break;
}
return (m);
-#endif /* !__linux__ */
+#endif /* __FreeBSD__ */
}
/*
* This procedure is only used to handle keepalives. It is invoked
* every dyn_keepalive_period
*/
-static void
+ /* dummynet() and ipfw_tick() can't be static in windows */
+void
ipfw_tick(void * vnetx)
{
struct mbuf *m0, *m, *mnext, **mtailp;
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
+#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
#include <netinet/ipfw/ip_fw_private.h>
#include <netinet/tcp_var.h>
if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG))
snprintf(SNPARGS(fragment, 0),
" (frag %08x:%d@%d%s)",
- args->f_id.frag_id6,
+ args->f_id.extra,
ntohs(ip6->ip6_plen) - hlen,
ntohs(offset & IP6F_OFF_MASK) << 3,
(offset & IP6F_MORE_FRAG) ? "+" : "");
(ipoff & IP_MF) ? "+" : "");
}
}
-#ifndef __linux__
+#ifdef __FreeBSD__
if (oif || m->m_pkthdr.rcvif)
log(LOG_SECURITY | LOG_INFO,
"ipfw: %d %s %s %s via %s%s\n",
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
#include <netinet/ipfw/ip_fw_private.h>
-#include <netinet/ip_dummynet.h>
#include <netgraph/ng_ipfw.h>
#include <machine/in_cksum.h>
int ipfw_chg_hook(SYSCTL_HANDLER_ARGS);
-/* Divert hooks. */
-void (*ip_divert_ptr)(struct mbuf *m, int incoming);
-
-/* ng_ipfw hooks. */
-ng_ipfw_input_t *ng_ipfw_input_p = NULL;
-
/* Forward declarations. */
static int ipfw_divert(struct mbuf **, int, struct ipfw_rule_ref *, int);
#ifdef SYSCTL_NODE
+
+SYSBEGIN(f1)
+
SYSCTL_DECL(_net_inet_ip_fw);
SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0,
ipfw_chg_hook, "I", "Enable ipfw+6");
#endif /* INET6 */
+
+SYSEND
+
#endif /* SYSCTL_NODE */
/*
int ret;
/* all the processing now uses ip_len in net format */
- SET_NET_IPLEN(mtod(*m0, struct ip *));
+ if (mtod(*m0, struct ip *)->ip_v == 4)
+ SET_NET_IPLEN(mtod(*m0, struct ip *));
/* convert dir to IPFW values */
dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT;
case IP_FW_NGTEE:
case IP_FW_NETGRAPH:
- if (!NG_IPFW_LOADED) {
+ if (ng_ipfw_input_p == NULL) {
ret = EACCES;
break; /* i.e. drop */
}
FREE_PKT(*m0);
*m0 = NULL;
}
- if (*m0)
+ if (*m0 && mtod(*m0, struct ip *)->ip_v == 4)
SET_HOST_IPLEN(mtod(*m0, struct ip *));
return ret;
}
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_sockopt.c 200601 2009-12-16 10:48:40Z luigi $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_sockopt.c 206339 2010-04-07 08:23:58Z luigi $");
/*
* Sockopt support for ipfw. The routines here implement
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/ip_var.h> /* hooks */
#include <netinet/ip_fw.h>
#include <netinet/ipfw/ip_fw_private.h>
int i;
i = chain->n_rules + extra;
- map = malloc(i * sizeof(struct ip_fw *), M_IPFW, M_WAITOK);
+ map = malloc(i * sizeof(struct ip_fw *), M_IPFW,
+ locked ? M_NOWAIT : M_WAITOK);
if (map == NULL) {
printf("%s: cannot allocate map\n", __FUNCTION__);
return NULL;
}
}
+/*
+ * Used by del_entry() to check if a rule should be kept.
+ * Returns 1 if the rule must be kept, 0 otherwise.
+ *
+ * Called with cmd = {0,1,5}.
+ * cmd == 0 matches on rule numbers, excludes rules in RESVD_SET if n == 0 ;
+ * cmd == 1 matches on set numbers only, rule numbers are ignored;
+ * cmd == 5 matches on rule and set numbers.
+ *
+ * n == 0 is a wildcard for rule numbers, there is no wildcard for sets.
+ *
+ * Rules to keep are
+ * (default || reserved || !match_set || !match_number)
+ * where
+ * default ::= (rule->rulenum == IPFW_DEFAULT_RULE)
+ * // the default rule is always protected
+ *
+ * reserved ::= (cmd == 0 && n == 0 && rule->set == RESVD_SET)
+ * // RESVD_SET is protected only if cmd == 0 and n == 0 ("ipfw flush")
+ *
+ * match_set ::= (cmd == 0 || rule->set == set)
+ * // set number is ignored for cmd == 0
+ *
+ * match_number ::= (cmd == 1 || n == 0 || n == rule->rulenum)
+ * // number is ignored for cmd == 1 or n == 0
+ *
+ */
+static int
+keep_rule(struct ip_fw *rule, uint8_t cmd, uint8_t set, uint32_t n)
+{
+ return
+ (rule->rulenum == IPFW_DEFAULT_RULE) ||
+ (cmd == 0 && n == 0 && rule->set == RESVD_SET) ||
+ !(cmd == 0 || rule->set == set) ||
+ !(cmd == 1 || n == 0 || n == rule->rulenum);
+}
+
/**
- * Remove all rules with given number, and also do set manipulation.
+ * Remove all rules with given number, or do set manipulation.
* Assumes chain != NULL && *chain != NULL.
*
- * The argument is an u_int32_t. The low 16 bit are the rule or set number,
- * the next 8 bits are the new set, the top 8 bits are the command:
+ * The argument is an uint32_t. The low 16 bit are the rule or set number;
+ * the next 8 bits are the new set; the top 8 bits indicate the command:
*
- * 0 delete rules with given number
- * 1 delete rules with given set number
- * 2 move rules with given number to new set
- * 3 move rules with given set number to new set
- * 4 swap sets with given numbers
- * 5 delete rules with given number and with given set number
+ * 0 delete rules numbered "rulenum"
+ * 1 delete rules in set "rulenum"
+ * 2 move rules "rulenum" to set "new_set"
+ * 3 move rules from set "rulenum" to set "new_set"
+ * 4 swap sets "rulenum" and "new_set"
+ * 5 delete rules "rulenum" and set "new_set"
*/
static int
-del_entry(struct ip_fw_chain *chain, u_int32_t arg)
+del_entry(struct ip_fw_chain *chain, uint32_t arg)
{
struct ip_fw *rule;
- uint32_t rulenum; /* rule or old_set */
+ uint32_t num; /* rule number or old_set */
uint8_t cmd, new_set;
- int start, end = 0, i, ofs, n;
+ int start, end, i, ofs, n;
struct ip_fw **map = NULL;
int error = 0;
- rulenum = arg & 0xffff;
+ num = arg & 0xffff;
cmd = (arg >> 24) & 0xff;
new_set = (arg >> 16) & 0xff;
if (cmd > 5 || new_set > RESVD_SET)
return EINVAL;
if (cmd == 0 || cmd == 2 || cmd == 5) {
- if (rulenum >= IPFW_DEFAULT_RULE)
+ if (num >= IPFW_DEFAULT_RULE)
return EINVAL;
} else {
- if (rulenum > RESVD_SET) /* old_set */
+ if (num > RESVD_SET) /* old_set */
return EINVAL;
}
- IPFW_UH_WLOCK(chain); /* prevent conflicts among the writers */
+ IPFW_UH_WLOCK(chain); /* arbitrate writers */
chain->reap = NULL; /* prepare for deletions */
switch (cmd) {
- case 0: /* delete rules with given number (0 is special means all) */
- case 1: /* delete all rules with given set number, rule->set == rulenum */
- case 5: /* delete rules with given number and with given set number.
- * rulenum - given rule number;
- * new_set - given set number.
- */
- /* locate first rule to delete (start), the one after the
- * last one (end), and count how many rules to delete (n)
+ case 0: /* delete rules "num" (num == 0 matches all) */
+ case 1: /* delete all rules in set N */
+ case 5: /* delete rules with number N and set "new_set". */
+
+ /*
+ * Locate first rule to delete (start), the rule after
+ * the last one to delete (end), and count how many
+ * rules to delete (n). Always use keep_rule() to
+ * determine which rules to keep.
*/
n = 0;
- if (cmd == 1) { /* look for a specific set, must scan all */
- for (start = -1, i = 0; i < chain->n_rules; i++) {
- if (chain->map[start]->set != rulenum)
+ if (cmd == 1) {
+ /* look for a specific set including RESVD_SET.
+ * Must scan the entire range, ignore num.
+ */
+ new_set = num;
+ for (start = -1, end = i = 0; i < chain->n_rules; i++) {
+ if (keep_rule(chain->map[i], cmd, new_set, 0))
continue;
if (start < 0)
start = i;
}
end++; /* first non-matching */
} else {
- start = ipfw_find_rule(chain, rulenum, 0);
+ /* Optimized search on rule numbers */
+ start = ipfw_find_rule(chain, num, 0);
for (end = start; end < chain->n_rules; end++) {
rule = chain->map[end];
- if (rulenum > 0 && rule->rulenum != rulenum)
+ if (num > 0 && rule->rulenum != num)
break;
- if (rule->set != RESVD_SET &&
- (cmd == 0 || rule->set == new_set) )
+ if (!keep_rule(rule, cmd, new_set, num))
n++;
}
}
- if (n == 0 && arg == 0)
- break; /* special case, flush on empty ruleset */
- /* allocate the map, if needed */
- if (n > 0)
- map = get_map(chain, -n, 1 /* locked */);
- if (n == 0 || map == NULL) {
+
+ if (n == 0) {
+ /* A flush request (arg == 0) on empty ruleset
+ * returns with no error. On the contrary,
+ * if there is no match on a specific request,
+ * we return EINVAL.
+ */
+ error = (arg == 0) ? 0 : EINVAL;
+ break;
+ }
+
+ /* We have something to delete. Allocate the new map */
+ map = get_map(chain, -n, 1 /* locked */);
+ if (map == NULL) {
error = EINVAL;
- break;
+ break;
}
- /* copy the initial part of the map */
+
+ /* 1. bcopy the initial part of the map */
if (start > 0)
bcopy(chain->map, map, start * sizeof(struct ip_fw *));
- /* copy active rules between start and end */
+ /* 2. copy active rules between start and end */
for (i = ofs = start; i < end; i++) {
rule = chain->map[i];
- if (!(rule->set != RESVD_SET &&
- (cmd == 0 || rule->set == new_set) ))
- map[ofs++] = chain->map[i];
+ if (keep_rule(rule, cmd, new_set, num))
+ map[ofs++] = rule;
}
- /* finally the tail */
+ /* 3. copy the final part of the map */
bcopy(chain->map + end, map + ofs,
(chain->n_rules - end) * sizeof(struct ip_fw *));
+ /* 4. swap the maps (under BH_LOCK) */
map = swap_map(chain, map, chain->n_rules - n);
- /* now remove the rules deleted */
+ /* 5. now remove the rules deleted from the old map */
for (i = start; i < end; i++) {
+ int l;
rule = map[i];
- if (rule->set != RESVD_SET &&
- (cmd == 0 || rule->set == new_set) ) {
- int l = RULESIZE(rule);
-
- chain->static_len -= l;
- ipfw_remove_dyn_children(rule);
- rule->x_next = chain->reap;
- chain->reap = rule;
- }
+ if (keep_rule(rule, cmd, new_set, num))
+ continue;
+ l = RULESIZE(rule);
+ chain->static_len -= l;
+ ipfw_remove_dyn_children(rule);
+ rule->x_next = chain->reap;
+ chain->reap = rule;
}
break;
- case 2: /* move rules with given number to new set */
- IPFW_UH_WLOCK(chain);
- for (i = 0; i < chain->n_rules; i++) {
+ /*
+ * In the next 3 cases the loop stops at (n_rules - 1)
+ * because the default rule is never eligible..
+ */
+
+ case 2: /* move rules with given RULE number to new set */
+ for (i = 0; i < chain->n_rules - 1; i++) {
rule = chain->map[i];
- if (rule->rulenum == rulenum)
+ if (rule->rulenum == num)
rule->set = new_set;
}
- IPFW_UH_WUNLOCK(chain);
break;
- case 3: /* move rules with given set number to new set */
- IPFW_UH_WLOCK(chain);
- for (i = 0; i < chain->n_rules; i++) {
+ case 3: /* move rules with given SET number to new set */
+ for (i = 0; i < chain->n_rules - 1; i++) {
rule = chain->map[i];
- if (rule->set == rulenum)
+ if (rule->set == num)
rule->set = new_set;
}
- IPFW_UH_WUNLOCK(chain);
break;
case 4: /* swap two sets */
- IPFW_UH_WLOCK(chain);
- for (i = 0; i < chain->n_rules; i++) {
+ for (i = 0; i < chain->n_rules - 1; i++) {
rule = chain->map[i];
- if (rule->set == rulenum)
+ if (rule->set == num)
rule->set = new_set;
else if (rule->set == new_set)
- rule->set = rulenum;
+ rule->set = num;
}
- IPFW_UH_WUNLOCK(chain);
break;
}
+
rule = chain->reap;
chain->reap = NULL;
IPFW_UH_WUNLOCK(chain);
for (i = 0; i < chain->n_rules; i++) {
rule = chain->map[i];
if (rule->rulenum == rulenum) {
- if (cmd == 0 || rule->set == set)
- clear_counters(rule, log_only);
+ if (cmd == 0 || rule->set == set)
+ clear_counters(rule, log_only);
cleared = 1;
}
if (rule->rulenum > rulenum)
break;
- }
+ }
if (!cleared) { /* we did not find any matching rules */
- IPFW_WUNLOCK(chain);
+ IPFW_UH_RUNLOCK(chain);
return (EINVAL);
}
msg = log_only ? "logging count reset" : "cleared";
goto check_size;
case O_NETGRAPH:
case O_NGTEE:
- if (!NG_IPFW_LOADED)
+ if (ng_ipfw_input_p == NULL)
return EINVAL;
else
goto check_size;
return EINVAL;
}
+
+/*
+ * Translation of requests for compatibility with FreeBSD 7.2/8.
+ * a static variable tells us if we have an old client from userland,
+ * and if necessary we translate requests and responses between the
+ * two formats.
+ */
+static int is7 = 0;
+
+struct ip_fw7 {
+ struct ip_fw7 *next; /* linked list of rules */
+ struct ip_fw7 *next_rule; /* ptr to next [skipto] rule */
+ /* 'next_rule' is used to pass up 'set_disable' status */
+
+ uint16_t act_ofs; /* offset of action in 32-bit units */
+ uint16_t cmd_len; /* # of 32-bit words in cmd */
+ uint16_t rulenum; /* rule number */
+ uint8_t set; /* rule set (0..31) */
+ // #define RESVD_SET 31 /* set for default and persistent rules */
+ uint8_t _pad; /* padding */
+ // uint32_t id; /* rule id, only in v.8 */
+ /* These fields are present in all rules. */
+ uint64_t pcnt; /* Packet counter */
+ uint64_t bcnt; /* Byte counter */
+ uint32_t timestamp; /* tv_sec of last match */
+
+ ipfw_insn cmd[1]; /* storage for commands */
+};
+
+ int convert_rule_to_7(struct ip_fw *rule);
+int convert_rule_to_8(struct ip_fw *rule);
+
+#ifndef RULESIZE7
+#define RULESIZE7(rule) (sizeof(struct ip_fw7) + \
+ ((struct ip_fw7 *)(rule))->cmd_len * 4 - 4)
+#endif
+
+
/*
* Copy the static and dynamic rules to the supplied buffer
* and return the amount of space actually used.
boot_seconds = boottime.tv_sec;
for (i = 0; i < chain->n_rules; i++) {
rule = chain->map[i];
+
+ if (is7) {
+ /* Convert rule to FreeBSd 7.2 format */
+ l = RULESIZE7(rule);
+ if (bp + l + sizeof(uint32_t) <= ep) {
+ int error;
+ bcopy(rule, bp, l + sizeof(uint32_t));
+ error = convert_rule_to_7((struct ip_fw *) bp);
+ if (error)
+ return 0; /*XXX correct? */
+ /*
+ * XXX HACK. Store the disable mask in the "next"
+ * pointer in a wild attempt to keep the ABI the same.
+ * Why do we do this on EVERY rule?
+ */
+ bcopy(&V_set_disable,
+ &(((struct ip_fw7 *)bp)->next_rule),
+ sizeof(V_set_disable));
+ if (((struct ip_fw7 *)bp)->timestamp)
+ ((struct ip_fw7 *)bp)->timestamp += boot_seconds;
+ bp += l;
+ }
+ continue; /* go to next rule */
+ }
+
+ /* normal mode, don't touch rules */
l = RULESIZE(rule);
if (bp + l > ep) { /* should not happen */
printf("overflow dumping static rules\n");
}
dst = (struct ip_fw *)bp;
bcopy(rule, dst, l);
- /*
- * XXX HACK. Store the disable mask in the "next"
- * pointer in a wild attempt to keep the ABI the same.
- * Why do we do this on EVERY rule?
- */
+ /*
+ * XXX HACK. Store the disable mask in the "next"
+ * pointer in a wild attempt to keep the ABI the same.
+ * Why do we do this on EVERY rule?
+ */
bcopy(&V_set_disable, &dst->next_rule, sizeof(V_set_disable));
if (dst->timestamp)
dst->timestamp += boot_seconds;
size = chain->static_len;
size += ipfw_dyn_len();
- if (size >= sopt->sopt_valsize)
- break;
- buf = malloc(size, M_TEMP, M_WAITOK);
+ if (size >= sopt->sopt_valsize)
+ break;
+ buf = malloc(size, M_TEMP, M_WAITOK);
if (buf == NULL)
break;
IPFW_UH_RLOCK(chain);
IPFW_UH_RUNLOCK(chain);
if (size >= want)
error = sooptcopyout(sopt, buf, len);
- free(buf, M_TEMP);
+ free(buf, M_TEMP);
if (size >= want)
break;
}
case IP_FW_ADD:
rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK);
error = sooptcopyin(sopt, rule, RULE_MAXSIZE,
- sizeof(struct ip_fw) );
+ sizeof(struct ip_fw7) );
+
+ /*
+ * If the size of commands equals RULESIZE7 then we assume
+ * a FreeBSD7.2 binary is talking to us (set is7=1).
+ * is7 is persistent so the next 'ipfw list' command
+ * will use this format.
+ * NOTE: If wrong version is guessed (this can happen if
+ * the first ipfw command is 'ipfw [pipe] list')
+ * the ipfw binary may crash or loop infinitly...
+ */
+ if (sopt->sopt_valsize == RULESIZE7(rule)) {
+ is7 = 1;
+ error = convert_rule_to_8(rule);
+ if (error)
+ return error;
+ if (error == 0)
+ error = check_ipfw_struct(rule, RULESIZE(rule));
+ } else {
+ is7 = 0;
if (error == 0)
error = check_ipfw_struct(rule, sopt->sopt_valsize);
+ }
if (error == 0) {
/* locking is done within ipfw_add_rule() */
error = ipfw_add_rule(chain, rule);
size = RULESIZE(rule);
- if (!error && sopt->sopt_dir == SOPT_GET)
+ if (!error && sopt->sopt_dir == SOPT_GET) {
+ if (is7) {
+ error = convert_rule_to_7(rule);
+ size = RULESIZE7(rule);
+ if (error)
+ return error;
+ }
error = sooptcopyout(sopt, rule, size);
}
+ }
free(rule, M_TEMP);
break;
return (error);
#undef RULE_MAXSIZE
}
+
+
+#define RULE_MAXSIZE (256*sizeof(u_int32_t))
+
+/* Functions to convert rules 7.2 <==> 8.0 */
+int
+convert_rule_to_7(struct ip_fw *rule)
+{
+ /* Used to modify original rule */
+ struct ip_fw7 *rule7 = (struct ip_fw7 *)rule;
+ /* copy of original rule, version 8 */
+ struct ip_fw *tmp;
+
+ /* Used to copy commands */
+ ipfw_insn *ccmd, *dst;
+ int ll = 0, ccmdlen = 0;
+
+ tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
+ if (tmp == NULL) {
+ return 1; //XXX error
+ }
+ bcopy(rule, tmp, RULE_MAXSIZE);
+
+ /* Copy fields */
+ rule7->_pad = tmp->_pad;
+ rule7->set = tmp->set;
+ rule7->rulenum = tmp->rulenum;
+ rule7->cmd_len = tmp->cmd_len;
+ rule7->act_ofs = tmp->act_ofs;
+ rule7->next_rule = (struct ip_fw7 *)tmp->next_rule;
+ rule7->next = (struct ip_fw7 *)tmp->x_next;
+ rule7->cmd_len = tmp->cmd_len;
+ rule7->pcnt = tmp->pcnt;
+ rule7->bcnt = tmp->bcnt;
+ rule7->timestamp = tmp->timestamp;
+
+ /* Copy commands */
+ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule7->cmd ;
+ ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
+ ccmdlen = F_LEN(ccmd);
+
+ bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
+
+ if (dst->opcode > O_NAT)
+ /* O_REASS doesn't exists in 7.2 version, so
+ * decrement opcode if it is after O_REASS
+ */
+ dst->opcode--;
+
+ if (ccmdlen > ll) {
+ printf("ipfw: opcode %d size truncated\n",
+ ccmd->opcode);
+ return EINVAL;
+ }
+ }
+ free(tmp, M_TEMP);
+
+ return 0;
+}
+
+int
+convert_rule_to_8(struct ip_fw *rule)
+{
+ /* Used to modify original rule */
+ struct ip_fw7 *rule7 = (struct ip_fw7 *) rule;
+
+ /* Used to copy commands */
+ ipfw_insn *ccmd, *dst;
+ int ll = 0, ccmdlen = 0;
+
+ /* Copy of original rule */
+ struct ip_fw7 *tmp = malloc(RULE_MAXSIZE, M_TEMP, M_NOWAIT | M_ZERO);
+ if (tmp == NULL) {
+ return 1; //XXX error
+ }
+
+ bcopy(rule7, tmp, RULE_MAXSIZE);
+
+ for (ll = tmp->cmd_len, ccmd = tmp->cmd, dst = rule->cmd ;
+ ll > 0 ; ll -= ccmdlen, ccmd += ccmdlen, dst += ccmdlen) {
+ ccmdlen = F_LEN(ccmd);
+
+ bcopy(ccmd, dst, F_LEN(ccmd)*sizeof(uint32_t));
+
+ if (dst->opcode > O_NAT)
+ /* O_REASS doesn't exists in 7.2 version, so
+ * increment opcode if it is after O_REASS
+ */
+ dst->opcode++;
+
+ if (ccmdlen > ll) {
+ printf("ipfw: opcode %d size truncated\n",
+ ccmd->opcode);
+ return EINVAL;
+ }
+ }
+
+ rule->_pad = tmp->_pad;
+ rule->set = tmp->set;
+ rule->rulenum = tmp->rulenum;
+ rule->cmd_len = tmp->cmd_len;
+ rule->act_ofs = tmp->act_ofs;
+ rule->next_rule = (struct ip_fw *)tmp->next_rule;
+ rule->x_next = (struct ip_fw *)tmp->next;
+ rule->cmd_len = tmp->cmd_len;
+ rule->id = 0; /* XXX see if is ok = 0 */
+ rule->pcnt = tmp->pcnt;
+ rule->bcnt = tmp->bcnt;
+ rule->timestamp = tmp->timestamp;
+
+ free (tmp, M_TEMP);
+ return 0;
+}
+
/* end of file */
#include <net/vnet.h>
#include <netinet/in.h>
+#include <netinet/ip_var.h> /* struct ipfw_rule_ref */
#include <netinet/ip_fw.h>
+#include <sys/queue.h> /* LIST_HEAD */
#include <netinet/ipfw/ip_fw_private.h>
#ifdef MAC
}
void
-ipfw_flush_tables(struct ip_fw_chain *ch)
+ipfw_destroy_tables(struct ip_fw_chain *ch)
{
uint16_t tbl;
+ struct radix_node_head *rnh;
IPFW_WLOCK_ASSERT(ch);
- for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++)
+ for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) {
ipfw_flush_table(ch, tbl);
+ rnh = ch->tables[tbl];
+ rn_detachhead((void **)&rnh);
+ }
}
int
*/
/*
- * $Id: ipfw2_mod.c 4671 2010-01-04 17:50:51Z luigi $
+ * $Id: ipfw2_mod.c 5797 2010-03-21 16:31:08Z luigi $
*
* The main interface to build ipfw+dummynet as a linux module.
* (and possibly as a windows module as well, though that part
#include <netinet/ipfw/ip_fw_private.h> /* ip_fw_ctl_t, ip_fw_chk_t */
#include <netinet/ip_dummynet.h> /* ip_dn_ctl_t, ip_dn_io_t */
#include <net/pfil.h> /* PFIL_IN, PFIL_OUT */
+
+#ifdef __linux__
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
#warning --- inet_hashtables not present on 2.4
#include <linux/tcp.h>
#else
#include <net/inet_hashtables.h> /* inet_lookup */
#endif
+#endif /* __linux__ */
+
#include <net/route.h> /* inet_iif */
/*
void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
+/* Divert hooks. */
+void (*ip_divert_ptr)(struct mbuf *m, int incoming);
+
+/* ng_ipfw hooks. */
+ng_ipfw_input_t *ng_ipfw_input_p = NULL;
+
/*---
* Glue code to implement the registration of children with the parent.
* Each child should call my_mod_register() when linking, so that
my_mod_register(const char *name, int order,
struct moduledata *mod, void *init, void *uninit)
{
- struct mod_args m = { .name = name, .order = order,
- .mod = mod, .init = init, .uninit = uninit };
+ struct mod_args m;
+
+ m.name = name;
+ m.order = order;
+ m.mod = mod;
+ m.init = init;
+ m.uninit = uninit;
printf("%s %s called\n", __FUNCTION__, name);
if (mod_idx < sizeof(mods) / sizeof(mods[0]))
memset(&t, 0, sizeof(t));
s->sopt_td = &t;
- // printf("%s called with cmd %d len %d\n", __FUNCTION__, cmd, len);
+ //printf("%s called with cmd %d len %d sopt %p user %p\n", __FUNCTION__, cmd, len, s, user);
- if (cmd < IP_DUMMYNET_CONFIGURE && ip_fw_ctl_ptr)
+ if (ip_fw_ctl_ptr && cmd != IP_DUMMYNET3 && (cmd == IP_FW3 ||
+ cmd < IP_DUMMYNET_CONFIGURE))
ret = ip_fw_ctl_ptr(s);
- else if (cmd >= IP_DUMMYNET_CONFIGURE && ip_dn_ctl_ptr)
+ else if (ip_dn_ctl_ptr && (cmd == IP_DUMMYNET3 ||
+ cmd >= IP_DUMMYNET_CONFIGURE))
ret = ip_dn_ctl_ptr(s);
-
+
return -ret; /* errors are < 0 on linux */
}
-#ifdef _WIN32
-
-void
-netisr_dispatch(int __unused num, struct mbuf *m)
-{
-}
-
int
ip_output(struct mbuf *m, struct mbuf __unused *opt,
struct route __unused *ro, int __unused flags,
struct ip_moptions __unused *imo, struct inpcb __unused *inp)
{
netisr_dispatch(0, m);
- return 0;
+ return 0;
}
-#else /* this is the linux glue */
/*
* setsockopt hook has no return value other than the error code.
*/
-static int
+int
do_ipfw_set_ctl(struct sock __unused *sk, int cmd,
void __user *user, unsigned int len)
{
struct sockopt s; /* pass arguments */
-
return ipfw_ctl_h(&s, cmd, SOPT_SET, len, user);
}
/*
* getsockopt can can return a block of data in response.
*/
-static int
+int
do_ipfw_get_ctl(struct sock __unused *sk,
int cmd, void __user *user, int *len)
{
return ret;
}
+#ifdef __linux__
+
/*
* declare our [get|set]sockopt hooks
*/
}
m->m_skb = skb;
- m->m_len = skb->len; /* len in this skbuf */
+ m->m_len = skb->len; /* len from ip header to end */
m->m_pkthdr.len = skb->len; /* total packet len */
m->m_pkthdr.rcvif = info->indev;
m->queue_entry = info;
REINJECT(info, ((num == -1)?NF_DROP:NF_STOP)); /* accept but no more firewall */
}
-int
-ip_output(struct mbuf *m, struct mbuf __unused *opt,
- struct route __unused *ro, int __unused flags,
- struct ip_moptions __unused *imo, struct inpcb __unused *inp)
-{
- netisr_dispatch(0, m);
- return 0;
-}
-
/*
* socket lookup function for linux.
* This code is used to associate uid, gid, jail/xid to packets,
SET_MOD_OWNER
},
};
-#endif /* !__linux__ */
+#endif /* __linux__ */
/* descriptors for the children, until i find a way for the
* linker to produce them
*/
extern moduledata_t *moddesc_ipfw;
extern moduledata_t *moddesc_dummynet;
+extern moduledata_t *moddesc_dn_fifo;
+extern moduledata_t *moddesc_dn_wf2qp;
+extern moduledata_t *moddesc_dn_rr;
+extern moduledata_t *moddesc_dn_qfq;
+extern moduledata_t *moddesc_dn_prio;
extern void *sysinit_ipfw_init;
extern void *sysuninit_ipfw_destroy;
extern void *sysinit_vnet_ipfw_init;
/*
* Module glue - init and exit function.
*/
-static int __init
+int __init
ipfw_module_init(void)
{
int ret = 0;
-
- printf("%s in-hook %d svn id %s\n", __FUNCTION__, IPFW_HOOK_IN, "$Id: ipfw2_mod.c 4671 2010-01-04 17:50:51Z luigi $");
+#ifdef _WIN32
+ unsigned long resolution;
+#endif
rn_init(64);
-
my_mod_register("ipfw", 1, moddesc_ipfw, NULL, NULL);
my_mod_register("sy_ipfw", 2, NULL,
sysinit_ipfw_init, sysuninit_ipfw_destroy);
my_mod_register("sy_Vnet_ipfw", 3, NULL,
sysinit_vnet_ipfw_init, sysuninit_vnet_ipfw_uninit);
my_mod_register("dummynet", 4, moddesc_dummynet, NULL, NULL);
+ my_mod_register("dn_fifo", 5, moddesc_dn_fifo, NULL, NULL);
+ my_mod_register("dn_wf2qp", 6, moddesc_dn_wf2qp, NULL, NULL);
+ my_mod_register("dn_rr", 7, moddesc_dn_rr, NULL, NULL);
+ my_mod_register("dn_qfq", 8, moddesc_dn_qfq, NULL, NULL);
+ my_mod_register("dn_prio", 9, moddesc_dn_prio, NULL, NULL);
init_children();
#ifdef _WIN32
- return ret;
+ resolution = ExSetTimerResolution(1, TRUE);
+ printf("*** ExSetTimerResolution: resolution set to %d n-sec ***\n",resolution);
+#endif
+#ifdef EMULATE_SYSCTL
+ keinit_GST();
+#endif
-#else /* linux hook */
+#ifdef __linux__
/* sockopt register, in order to talk with user space */
ret = nf_register_sockopt(&ipfw_sockopts);
if (ret < 0) {
fini_children();
printf("%s error\n", __FUNCTION__);
+#endif /* __linux__ */
return ret;
-#endif /* linux */
}
/* module shutdown */
-static void __exit
+void __exit
ipfw_module_exit(void)
{
+#ifdef EMULATE_SYSCTL
+ keexit_GST();
+#endif
#ifdef _WIN32
+ ExSetTimerResolution(0,FALSE);
+
#else /* linux hook */
nf_unregister_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops));
/* maybe drain the queue before unregistering ? */
nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) );
nf_unregister_sockopt(&ipfw_sockopts);
-#endif /* linux */
+#endif /* __linux__ */
fini_children();
--- /dev/null
+/*
+ * Copyright (C) 2010 Luigi Rizzo, Francesco Magno, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * kernel variables and functions that are not available in Windows.
+ */
+
+#include <net/pfil.h> /* provides PFIL_IN and PFIL_OUT */
+#include <arpa/inet.h>
+#include <netinet/in.h> /* in_addr */
+#include <ndis.h>
+#include <sys/mbuf.h>
+#include <passthru.h>
+
+/* credentials check */
+int
+cred_check(void *_insn, int proto, struct ifnet *oif,
+ struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip,
+ u_int16_t src_port, struct bsd_ucred *u, int *ugid_lookupp,
+ struct sk_buff *skb)
+{
+ return 0;
+}
+
+/*
+ * as good as anywhere, place here the missing calls
+ */
+
+void *
+my_alloc(int size)
+{
+ void *_ret = ExAllocatePoolWithTag(NonPagedPool, size, 'wfpi');
+ if (_ret)
+ memset(_ret, 0, size);
+ return _ret;
+}
+
+void
+panic(const char *fmt, ...)
+{
+ printf("%s", fmt);
+ for (;;);
+}
+
+int securelevel = 0;
+
+int ffs(int bits)
+{
+ int i;
+ if (bits == 0)
+ return (0);
+ for (i = 1; ; i++, bits >>= 1) {
+ if (bits & 1)
+ break;
+ }
+ return (i);
+}
+
+void
+do_gettimeofday(struct timeval *tv)
+{
+ static LARGE_INTEGER prevtime; //system time in 100-nsec resolution
+ static LARGE_INTEGER prevcount; //RTC counter value
+ static LARGE_INTEGER freq; //frequency
+
+ LARGE_INTEGER currtime;
+ LARGE_INTEGER currcount;
+ if (prevtime.QuadPart == 0) { //first time we ask for system time
+ KeQuerySystemTime(&prevtime);
+ prevcount = KeQueryPerformanceCounter(&freq);
+ currtime.QuadPart = prevtime.QuadPart;
+ } else {
+ KeQuerySystemTime(&currtime);
+ currcount = KeQueryPerformanceCounter(&freq);
+ if (currtime.QuadPart == prevtime.QuadPart) {
+ //time has NOT changed, calculate time using ticks and DO NOT update
+ LONGLONG difftime = 0; //difference in 100-nsec
+ LONGLONG diffcount = 0; //clock count difference
+ //printf("time has NOT changed\n");
+ diffcount = currcount.QuadPart - prevcount.QuadPart;
+ diffcount *= 10000000;
+ difftime = diffcount / freq.QuadPart;
+ currtime.QuadPart += difftime;
+ } else {
+ //time has changed, update and return SystemTime
+ //printf("time has changed\n");
+ prevtime.QuadPart = currtime.QuadPart;
+ prevcount.QuadPart = currcount.QuadPart;
+ }
+ }
+ currtime.QuadPart /= 10; //convert in usec
+ tv->tv_sec = currtime.QuadPart / (LONGLONG)1000000;
+ tv->tv_usec = currtime.QuadPart % (LONGLONG)1000000;
+ //printf("sec %d usec %d\n",tv->tv_sec, tv->tv_usec);
+}
+
+int time_uptime_w32()
+{
+ int ret;
+ LARGE_INTEGER tm;
+ KeQuerySystemTime(&tm);
+ ret = (int)(tm.QuadPart / (LONGLONG)1000000);
+ return ret;
+}
+
+
+/*
+ * Windows version of firewall hook. We receive a partial copy of
+ * the packet which points to the original buffers. In output,
+ * the refcount has been already incremented.
+ * The function reconstructs
+ * the whole packet in a contiguous memory area, builds a fake mbuf,
+ * calls the firewall, does the eventual cleaning and returns
+ * to MiniportSend or ProtocolReceive, which will silently return
+ * (dropping packet) or continue its execution (allowing packet).
+ * The memory area contains:
+ * - the fake mbuf, filled with data needed by ipfw, and information
+ * for reinjection
+ * - the packet data
+ */
+void hexdump(PUCHAR,int, const char *);
+static char _if_in[] = "incoming";
+static char _if_out[] = "outgoing";
+
+int
+ipfw2_qhandler_w32(PNDIS_PACKET pNdisPacket, int direction,
+ NDIS_HANDLE Context)
+{
+ unsigned int BufferCount = 0;
+ unsigned TotalPacketLength = 0;
+ PNDIS_BUFFER pCurrentBuffer = NULL;
+ PNDIS_BUFFER pNextBuffer = NULL;
+ struct mbuf* m;
+ unsigned char* payload = NULL;
+ unsigned int ofs, l;
+ unsigned short EtherType = 0;
+ unsigned int i = 0;
+ int ret = 0;
+ PNDIS_BUFFER pNdisBuffer, old_head, old_tail;
+ NDIS_HANDLE PacketPool;
+ PADAPT pAdapt;
+ NDIS_STATUS Status;
+
+ /* In NDIS, packets are a chain of NDIS_BUFFER. We query
+ * the packet to get a pointer of chain's head, the length
+ * of the chain, and the length of the packet itself.
+ * Then allocate a buffer for the mbuf and the payload.
+ */
+ NdisQueryPacket(pNdisPacket, NULL, &BufferCount,
+ &pCurrentBuffer, &TotalPacketLength);
+ m = malloc(sizeof(struct mbuf) + TotalPacketLength, 0, 0 );
+ if (m == NULL) //resource shortage, drop the packet
+ goto drop_pkt;
+
+ /* set mbuf fields to point past the MAC header.
+ * Also set additional W32 info
+ */
+ payload = (unsigned char*)(m + 1);
+ m->m_len = m->m_pkthdr.len = TotalPacketLength-14;
+ m->m_pkthdr.rcvif = (void *)((direction==INCOMING) ? _if_in : NULL);
+ m->m_data = payload + 14; /* past the MAC header */
+ m->direction = direction;
+ m->context = Context;
+ m->pkt = pNdisPacket;
+
+ /*
+ * Now copy the data from the Windows buffers to the mbuf.
+ */
+ for (i=0, ofs = 0; i < BufferCount; i++) {
+ unsigned char* src;
+ NdisQueryBufferSafe(pCurrentBuffer, &src, &l,
+ NormalPagePriority);
+ bcopy(src, payload + ofs, l);
+ ofs += l;
+ NdisGetNextBuffer(pCurrentBuffer, &pNextBuffer);
+ pCurrentBuffer = pNextBuffer;
+ }
+ /*
+ * Identify EtherType. If the packet is not IP, simply allow
+ * and don't bother the firewall. XXX should be done before.
+ */
+ EtherType = *(unsigned short*)(payload + 12);
+ EtherType = RtlUshortByteSwap(EtherType);
+ if (EtherType != 0x0800) {
+ //DbgPrint("ethertype = %X, skipping ipfw\n",EtherType);
+ free(m, 0);
+ return PASS;
+ }
+
+ /*
+ * Now build a buffer descriptor to replace the original chain.
+ */
+ pAdapt = Context;
+ PacketPool = direction == OUTGOING ?
+ pAdapt->SendPacketPoolHandle : pAdapt->RecvPacketPoolHandle;
+ NdisAllocateBuffer(&Status, &pNdisBuffer,
+ PacketPool, payload, m->m_pkthdr.len+14);
+ if (Status != NDIS_STATUS_SUCCESS)
+ goto drop_pkt;
+ /*
+ * Save the old buffer pointers, and put the new one
+ * into the chain.
+ */
+ pNdisBuffer->Next = NULL;
+ old_head = NDIS_PACKET_FIRST_NDIS_BUFFER(pNdisPacket);
+ old_tail = NDIS_PACKET_LAST_NDIS_BUFFER(pNdisPacket);
+ NdisReinitializePacket(pNdisPacket);
+ NdisChainBufferAtFront(pNdisPacket, pNdisBuffer);
+#if 0
+ if (direction == INCOMING) {
+ DBGPRINT(("incoming: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), TotalPacketLength));
+ } else {
+ DBGPRINT(("outgoing: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), TotalPacketLength));
+ }
+#endif
+ if (direction == INCOMING)
+ ret = ipfw_check_hook(NULL, &m, NULL, PFIL_IN, NULL);
+ else
+ ret = ipfw_check_hook(NULL, &m, (struct ifnet*)_if_out, PFIL_OUT, NULL);
+
+ if (m != NULL) {
+ /* Accept. Restore the old buffer chain, free
+ * the mbuf and return PASS.
+ */
+ //DBGPRINT(("accepted\n"));
+ NdisReinitializePacket(pNdisPacket);
+ NDIS_PACKET_FIRST_NDIS_BUFFER(pNdisPacket) = old_head;
+ NDIS_PACKET_LAST_NDIS_BUFFER(pNdisPacket) = old_tail;
+ NdisFreeBuffer(pNdisBuffer);
+ m_freem(m);
+ return PASS;
+ } else if (ret == 0) {
+ /* dummynet has kept the packet, will reinject later. */
+ //DBGPRINT(("kept by dummynet\n"));
+ return DUMMYNET;
+ } else {
+ /*
+ * Packet dropped by ipfw or dummynet. Nothing to do as
+ * FREE_PKT already freed the fake mbuf
+ */
+ //DBGPRINT(("dropped by dummynet, ret = %i\n", ret));
+ return DROP;
+ }
+drop_pkt:
+ /* for some reason we cannot proceed. Free any resources
+ * including those received from above, and return
+ * faking success. XXX this must be fixed later.
+ */
+ NdisFreePacket(pNdisPacket);
+ return DROP;
+}
+
+/*
+ * Windows reinjection function.
+ * The packet is already available as m->pkt, so we only
+ * need to send it to the right place.
+ * Normally a ndis intermediate driver allocates
+ * a fresh descriptor, while the actual data's ownership is
+ * retained by the protocol, or the miniport below.
+ * Since an intermediate driver behaves as a miniport driver
+ * at the upper edge (towards the protocol), and as a protocol
+ * driver at the lower edge (towards the NIC), when we handle a
+ * packet we have a reserved area in both directions (we can use
+ * only one for each direction at our own discretion).
+ * Normally this area is used to save a pointer to the original
+ * packet, so when the driver is done with it, the original descriptor
+ * can be retrieved, and the resources freed (packet descriptor,
+ * buffer descriptor(s) and the actual data). In our driver this
+ * area is used to mark the reinjected packets as 'orphan', because
+ * the original descriptor is gone long ago. This way we can handle
+ * correctly the resource freeing when the callback function
+ * is called by NDIS.
+ */
+
+void
+netisr_dispatch(int num, struct mbuf *m)
+{
+ unsigned char* payload = (unsigned char*)(m+1);
+ PADAPT pAdapt = m->context;
+ NDIS_STATUS Status;
+ PNDIS_PACKET pPacket = m->pkt;
+ PNDIS_BUFFER pNdisBuffer;
+ NDIS_HANDLE PacketPool;
+
+ if (num < 0)
+ goto drop_pkt;
+
+ //debug print
+#if 0
+ DbgPrint("reinject %s\n", m->direction == OUTGOING ?
+ "outgoing" : "incoming");
+#endif
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (m->direction == OUTGOING) {
+ //we must first check if the adapter is going down,
+ // in this case abort the reinjection
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0) {
+ pAdapt->OutstandingSends--;
+ // XXX should we notify up ?
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ goto drop_pkt;
+ }
+ } else {
+ /* if the upper miniport edge is not initialized or
+ * the miniport edge is in low power state, abort
+ * XXX we should notify the error.
+ */
+ if (!pAdapt->MiniportHandle ||
+ pAdapt->MPDeviceState > NdisDeviceStateD0) {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ goto drop_pkt;
+ }
+ }
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ if (m->direction == OUTGOING) {
+ PSEND_RSVD SendRsvd;
+ /* use the 8-bytes protocol reserved area, the first
+ * field is used to mark/the packet as 'orphan', the
+ * second stores the pointer to the mbuf, so in the
+ * the SendComplete handler we know that this is a
+ * reinjected packet and can free correctly.
+ */
+ SendRsvd = (PSEND_RSVD)(pPacket->ProtocolReserved);
+ SendRsvd->OriginalPkt = NULL;
+ SendRsvd->pMbuf = m;
+ //do the actual send
+ NdisSend(&Status, pAdapt->BindingHandle, pPacket);
+ if (Status != NDIS_STATUS_PENDING) {
+ /* done, call the callback now */
+ PtSendComplete(m->context, m->pkt, Status);
+ }
+ return; /* unconditional return here. */
+ } else {
+ /* There's no need to check the 8-bytes miniport
+ * reserved area since the path going up will be always
+ * syncronous, and all the cleanup will be done inline.
+ * If the reinjected packed comes from a PtReceivePacket,
+ * there will be no callback.
+ * Otherwise PtReceiveComplete will be called but will just
+ * return since all the cleaning is alreqady done */
+ // do the actual receive.
+ ULONG Proc = KeGetCurrentProcessorNumber();
+ pAdapt->ReceivedIndicationFlags[Proc] = TRUE;
+ NdisMEthIndicateReceive(pAdapt->MiniportHandle, NULL, payload, 14, payload+14, m->m_len, m->m_len);
+ NdisMEthIndicateReceiveComplete(pAdapt->MiniportHandle);
+ pAdapt->ReceivedIndicationFlags[Proc] = FALSE;
+ }
+drop_pkt:
+ /* NDIS_PACKET exists and must be freed only if
+ * the packet come from a PtReceivePacket, oherwise
+ * m->pkt will ne null.
+ */
+ if (m->pkt != NULL)
+ {
+ NdisUnchainBufferAtFront(m->pkt, &pNdisBuffer);
+ NdisFreeBuffer(pNdisBuffer);
+ NdisFreePacket(m->pkt);
+ }
+ m_freem(m);
+}
+
+void win_freem(void *); /* wrapper for m_freem() for protocol.c */
+void
+win_freem(void *_m)
+{
+ struct mbuf *m = _m;
+ m_freem(m);
+}
+
+/*
+ * not implemented in linux.
+ * taken from /usr/src/lib/libc/string/strlcpy.c
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0 && --n != 0) {
+ do {
+ if ((*d++ = *s++) == 0)
+ break;
+ } while (--n != 0);
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
+
+void CleanupReinjected(PNDIS_PACKET Packet, struct mbuf* m, PADAPT pAdapt)
+{
+ PNDIS_BUFFER pNdisBuffer;
+
+ NdisQueryPacket(Packet, NULL, NULL, &pNdisBuffer, NULL);
+ NdisUnchainBufferAtFront(Packet, &pNdisBuffer);
+ NdisFreeBuffer(pNdisBuffer);
+ win_freem(m);
+ NdisFreePacket(Packet);
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+}
+
+int
+ipfw2_qhandler_w32_oldstyle(int direction,
+ NDIS_HANDLE ProtocolBindingContext,
+ unsigned char* HeaderBuffer,
+ unsigned int HeaderBufferSize,
+ unsigned char* LookAheadBuffer,
+ unsigned int LookAheadBufferSize,
+ unsigned int PacketSize)
+{
+ struct mbuf* m;
+ unsigned char* payload = NULL;
+ unsigned short EtherType = 0;
+ int ret = 0;
+
+ /* We are in a special case when NIC signals an incoming
+ * packet using old style calls. This is done passing
+ * a pointer to the MAC header and a pointer to the
+ * rest of the packet.
+ * We simply allocate space for the mbuf and the
+ * subsequent payload section.
+ */
+ m = malloc(sizeof(struct mbuf) + HeaderBufferSize + LookAheadBufferSize, 0, 0 );
+ if (m == NULL) //resource shortage, drop the packet
+ return DROP;
+
+ /* set mbuf fields to point past the MAC header.
+ * Also set additional W32 info.
+ * m->pkt here is set to null because the notification
+ * from the NIC has come with a header+loolahead buffer,
+ * no NDIS_PACKET has been provided.
+ */
+ payload = (unsigned char*)(m + 1);
+ m->m_len = m->m_pkthdr.len = HeaderBufferSize+LookAheadBufferSize-14;
+ m->m_data = payload + 14; /* past the MAC header */
+ m->direction = direction;
+ m->context = ProtocolBindingContext;
+ m->pkt = NULL;
+
+ /*
+ * Now copy the data from the Windows buffers to the mbuf.
+ */
+ bcopy(HeaderBuffer, payload, HeaderBufferSize);
+ bcopy(LookAheadBuffer, payload+HeaderBufferSize, LookAheadBufferSize);
+ //hexdump(payload,HeaderBufferSize+LookAheadBufferSize,"qhandler");
+ /*
+ * Identify EtherType. If the packet is not IP, simply allow
+ * and don't bother the firewall. XXX should be done before.
+ */
+ EtherType = *(unsigned short*)(payload + 12);
+ EtherType = RtlUshortByteSwap(EtherType);
+ if (EtherType != 0x0800) {
+ //DbgPrint("ethertype = %X, skipping ipfw\n",EtherType);
+ free(m, 0);
+ return PASS;
+ }
+
+ //DbgPrint("incoming_raw: proto %u (%s), src %08X, dst %08X, sport %u, dport %u, len %u\n", *(payload+14+9), texify_proto(*(payload+14+9)), *(unsigned int*)(payload+14+12), *(unsigned int*)(payload+14+16), ntohs((*((unsigned short int*)(payload+14+20)))), ntohs((*((unsigned short int*)(payload+14+22)))), HeaderBufferSize+LookAheadBufferSize);
+
+ /* Query the firewall */
+ ret = ipfw_check_hook(NULL, &m, NULL, PFIL_IN, NULL);
+
+ if (m != NULL) {
+ /* Accept. Free the mbuf and return PASS. */
+ //DbgPrint("accepted\n");
+ m_freem(m);
+ return PASS;
+ } else if (ret == 0) {
+ /* dummynet has kept the packet, will reinject later. */
+ //DbgPrint("kept by dummynet\n");
+ return DUMMYNET;
+ } else {
+ /*
+ * Packet dropped by ipfw or dummynet. Nothing to do as
+ * FREE_PKT already freed the fake mbuf
+ */
+ //DbgPrint("dropped by dummynet, ret = %i\n", ret);
+ return DROP;
+ }
+}
+
+/* forward declaration because those functions are used only here,
+ * no point to make them visible in passthru/protocol/miniport */
+int do_ipfw_set_ctl(struct sock __unused *sk, int cmd,
+ void __user *user, unsigned int len);
+int do_ipfw_get_ctl(struct sock __unused *sk, int cmd,
+ void __user *user, int *len);
+
+NTSTATUS
+DevIoControl(
+ IN PDEVICE_OBJECT pDeviceObject,
+ IN PIRP pIrp
+ )
+/*++
+
+Routine Description:
+
+ This is the dispatch routine for handling device ioctl requests.
+
+Arguments:
+
+ pDeviceObject - Pointer to the device object.
+
+ pIrp - Pointer to the request packet.
+
+Return Value:
+
+ Status is returned.
+
+--*/
+{
+ PIO_STACK_LOCATION pIrpSp;
+ NTSTATUS NtStatus = STATUS_SUCCESS;
+ unsigned long BytesReturned = 0;
+ unsigned long FunctionCode;
+ unsigned long len;
+ struct sockopt *sopt;
+ int ret = 0;
+
+ UNREFERENCED_PARAMETER(pDeviceObject);
+
+ pIrpSp = IoGetCurrentIrpStackLocation(pIrp);
+
+ /*
+ * Using METHOD_BUFFERED as communication method, the userland
+ * side calls DeviceIoControl passing an input buffer and an output
+ * and their respective length (ipfw uses the same length for both).
+ * The system creates a single I/O buffer, with len=max(inlen,outlen).
+ * In the kernel we can read information from this buffer (which is
+ * directly accessible), overwrite it with our results, and set
+ * IoStatus.Information with the number of bytes that the system must
+ * copy back to userland.
+ * In our sockopt emulation, the initial part of the buffer contains
+ * a struct sockopt, followed by the data area.
+ */
+
+ len = pIrpSp->Parameters.DeviceIoControl.InputBufferLength;
+ if (len < sizeof(struct sockopt))
+ {
+ return STATUS_NOT_SUPPORTED; // XXX find better value
+ }
+ sopt = pIrp->AssociatedIrp.SystemBuffer;
+
+ FunctionCode = pIrpSp->Parameters.DeviceIoControl.IoControlCode;
+
+ len = sopt->sopt_valsize;
+
+ switch (FunctionCode)
+ {
+ case IP_FW_SETSOCKOPT:
+ ret = do_ipfw_set_ctl(NULL, sopt->sopt_name, sopt+1, len);
+ break;
+
+ case IP_FW_GETSOCKOPT:
+ ret = do_ipfw_get_ctl(NULL, sopt->sopt_name, sopt+1, &len);
+ sopt->sopt_valsize = len;
+ //sanity check on len
+ if (len + sizeof(struct sockopt) <= pIrpSp->Parameters.DeviceIoControl.InputBufferLength)
+ BytesReturned = len + sizeof(struct sockopt);
+ else
+ BytesReturned = pIrpSp->Parameters.DeviceIoControl.InputBufferLength;
+ break;
+
+ default:
+ NtStatus = STATUS_NOT_SUPPORTED;
+ break;
+ }
+
+ pIrp->IoStatus.Information = BytesReturned;
+ pIrp->IoStatus.Status = NtStatus;
+ IoCompleteRequest(pIrp, IO_NO_INCREMENT);
+
+ return NtStatus;
+}
+
+void dummynet(void * __unused unused);
+void ipfw_tick(void * vnetx);
+
+VOID dummynet_dpc(
+ __in struct _KDPC *Dpc,
+ __in_opt PVOID DeferredContext,
+ __in_opt PVOID SystemArgument1,
+ __in_opt PVOID SystemArgument2
+ )
+{
+ dummynet(NULL);
+}
+
+VOID ipfw_dpc(
+ __in struct _KDPC *Dpc,
+ __in_opt PVOID DeferredContext,
+ __in_opt PVOID SystemArgument1,
+ __in_opt PVOID SystemArgument2
+ )
+{
+ ipfw_tick(DeferredContext);
+}
--- /dev/null
+/*++
+
+Copyright (c) 1992-2000 Microsoft Corporation
+
+Module Name:
+
+ miniport.c
+
+Abstract:
+
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.
+
+Author:
+
+Environment:
+
+
+Revision History:
+
+
+--*/
+
+#include "precomp.h"
+#pragma hdrstop
+
+
+
+NDIS_STATUS
+MPInitialize(
+ OUT PNDIS_STATUS OpenErrorStatus,
+ OUT PUINT SelectedMediumIndex,
+ IN PNDIS_MEDIUM MediumArray,
+ IN UINT MediumArraySize,
+ IN NDIS_HANDLE MiniportAdapterHandle,
+ IN NDIS_HANDLE WrapperConfigurationContext
+ )
+/*++
+
+Routine Description:
+
+ This is the initialize handler which gets called as a result of
+ the BindAdapter handler calling NdisIMInitializeDeviceInstanceEx.
+ The context parameter which we pass there is the adapter structure
+ which we retrieve here.
+
+ Arguments:
+
+ OpenErrorStatus Not used by us.
+ SelectedMediumIndex Place-holder for what media we are using
+ MediumArray Array of ndis media passed down to us to pick from
+ MediumArraySize Size of the array
+ MiniportAdapterHandle The handle NDIS uses to refer to us
+ WrapperConfigurationContext For use by NdisOpenConfiguration
+
+Return Value:
+
+ NDIS_STATUS_SUCCESS unless something goes wrong
+
+--*/
+{
+ UINT i;
+ PADAPT pAdapt;
+ NDIS_STATUS Status = NDIS_STATUS_FAILURE;
+ NDIS_MEDIUM Medium;
+
+ UNREFERENCED_PARAMETER(WrapperConfigurationContext);
+
+ do
+ {
+ //
+ // Start off by retrieving our adapter context and storing
+ // the Miniport handle in it.
+ //
+ pAdapt = NdisIMGetDeviceContext(MiniportAdapterHandle);
+ pAdapt->MiniportIsHalted = FALSE;
+
+ DBGPRINT(("==> Miniport Initialize: Adapt %p\n", pAdapt));
+
+ //
+ // Usually we export the medium type of the adapter below as our
+ // virtual miniport's medium type. However if the adapter below us
+ // is a WAN device, then we claim to be of medium type 802.3.
+ //
+ Medium = pAdapt->Medium;
+
+ if (Medium == NdisMediumWan)
+ {
+ Medium = NdisMedium802_3;
+ }
+
+ for (i = 0; i < MediumArraySize; i++)
+ {
+ if (MediumArray[i] == Medium)
+ {
+ *SelectedMediumIndex = i;
+ break;
+ }
+ }
+
+ if (i == MediumArraySize)
+ {
+ Status = NDIS_STATUS_UNSUPPORTED_MEDIA;
+ break;
+ }
+
+
+ //
+ // Set the attributes now. NDIS_ATTRIBUTE_DESERIALIZE enables us
+ // to make up-calls to NDIS without having to call NdisIMSwitchToMiniport
+ // or NdisIMQueueCallBack. This also forces us to protect our data using
+ // spinlocks where appropriate. Also in this case NDIS does not queue
+ // packets on our behalf. Since this is a very simple pass-thru
+ // miniport, we do not have a need to protect anything. However in
+ // a general case there will be a need to use per-adapter spin-locks
+ // for the packet queues at the very least.
+ //
+ NdisMSetAttributesEx(MiniportAdapterHandle,
+ pAdapt,
+ 0, // CheckForHangTimeInSeconds
+ NDIS_ATTRIBUTE_IGNORE_PACKET_TIMEOUT |
+ NDIS_ATTRIBUTE_IGNORE_REQUEST_TIMEOUT|
+ NDIS_ATTRIBUTE_INTERMEDIATE_DRIVER |
+ NDIS_ATTRIBUTE_DESERIALIZE |
+ NDIS_ATTRIBUTE_NO_HALT_ON_SUSPEND,
+ 0);
+
+ pAdapt->MiniportHandle = MiniportAdapterHandle;
+ //
+ // Initialize LastIndicatedStatus to be NDIS_STATUS_MEDIA_CONNECT
+ //
+ pAdapt->LastIndicatedStatus = NDIS_STATUS_MEDIA_CONNECT;
+
+ //
+ // Initialize the power states for both the lower binding (PTDeviceState)
+ // and our miniport edge to Powered On.
+ //
+ pAdapt->MPDeviceState = NdisDeviceStateD0;
+ pAdapt->PTDeviceState = NdisDeviceStateD0;
+
+ //
+ // Add this adapter to the global pAdapt List
+ //
+ NdisAcquireSpinLock(&GlobalLock);
+
+ pAdapt->Next = pAdaptList;
+ pAdaptList = pAdapt;
+
+ NdisReleaseSpinLock(&GlobalLock);
+
+ //
+ // Create an ioctl interface
+ //
+ (VOID)PtRegisterDevice();
+
+ Status = NDIS_STATUS_SUCCESS;
+ }
+ while (FALSE);
+
+ //
+ // If we had received an UnbindAdapter notification on the underlying
+ // adapter, we would have blocked that thread waiting for the IM Init
+ // process to complete. Wake up any such thread.
+ //
+ ASSERT(pAdapt->MiniportInitPending == TRUE);
+ pAdapt->MiniportInitPending = FALSE;
+ NdisSetEvent(&pAdapt->MiniportInitEvent);
+
+ if (Status == NDIS_STATUS_SUCCESS)
+ {
+ PtReferenceAdapt(pAdapt);
+ }
+
+ DBGPRINT(("<== Miniport Initialize: Adapt %p, Status %x\n", pAdapt, Status));
+
+ *OpenErrorStatus = Status;
+
+
+ return Status;
+}
+
+
+NDIS_STATUS
+MPSend(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN PNDIS_PACKET Packet,
+ IN UINT Flags
+ )
+/*++
+
+Routine Description:
+
+ Send Packet handler. Either this or our SendPackets (array) handler is called
+ based on which one is enabled in our Miniport Characteristics.
+
+Arguments:
+
+ MiniportAdapterContext Pointer to the adapter
+ Packet Packet to send
+ Flags Unused, passed down below
+
+Return Value:
+
+ Return code from NdisSend
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+ NDIS_STATUS Status;
+ PNDIS_PACKET MyPacket;
+ PVOID MediaSpecificInfo = NULL;
+ ULONG MediaSpecificInfoSize = 0;
+
+ //
+ // The driver should fail the send if the virtual miniport is in low
+ // power state
+ //
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)
+ {
+ return NDIS_STATUS_FAILURE;
+ }
+
+#ifdef NDIS51
+ //
+ // Use NDIS 5.1 packet stacking:
+ //
+ if (0) // XXX IPFW - make sure we don't go in here
+ {
+ PNDIS_PACKET_STACK pStack;
+ BOOLEAN Remaining;
+
+ //
+ // Packet stacks: Check if we can use the same packet for sending down.
+ //
+
+ pStack = NdisIMGetCurrentPacketStack(Packet, &Remaining);
+ if (Remaining)
+ {
+ //
+ // We can reuse "Packet".
+ //
+ // NOTE: if we needed to keep per-packet information in packets
+ // sent down, we can use pStack->IMReserved[].
+ //
+ ASSERT(pStack);
+ //
+ // If the below miniport is going to low power state, stop sending down any packet.
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ return NDIS_STATUS_FAILURE;
+ }
+ pAdapt->OutstandingSends++;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ NdisSend(&Status,
+ pAdapt->BindingHandle,
+ Packet);
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+ }
+
+ return(Status);
+ }
+ }
+#endif // NDIS51
+
+ //
+ // We are either not using packet stacks, or there isn't stack space
+ // in the original packet passed down to us. Allocate a new packet
+ // to wrap the data with.
+ //
+ //
+ // If the below miniport is going to low power state, stop sending down any packet.
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ return NDIS_STATUS_FAILURE;
+
+ }
+ pAdapt->OutstandingSends++;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ NdisAllocatePacket(&Status,
+ &MyPacket,
+ pAdapt->SendPacketPoolHandle);
+
+ if (Status == NDIS_STATUS_SUCCESS)
+ {
+ PSEND_RSVD SendRsvd;
+
+ //
+ // Save a pointer to the original packet in our reserved
+ // area in the new packet. This is needed so that we can
+ // get back to the original packet when the new packet's send
+ // is completed.
+ //
+ SendRsvd = (PSEND_RSVD)(MyPacket->ProtocolReserved);
+ SendRsvd->OriginalPkt = Packet;
+
+ NdisGetPacketFlags(MyPacket) = Flags;
+
+ //
+ // Set up the new packet so that it describes the same
+ // data as the original packet.
+ //
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);
+#ifdef WIN9X
+ //
+ // Work around the fact that NDIS does not initialize this
+ // to FALSE on Win9x.
+ //
+ NDIS_PACKET_VALID_COUNTS(MyPacket) = FALSE;
+#endif
+
+ //
+ // Copy the OOB Offset from the original packet to the new
+ // packet.
+ //
+ NdisMoveMemory(NDIS_OOB_DATA_FROM_PACKET(MyPacket),
+ NDIS_OOB_DATA_FROM_PACKET(Packet),
+ sizeof(NDIS_PACKET_OOB_DATA));
+
+#ifndef WIN9X
+ //
+ // Copy the right parts of per packet info into the new packet.
+ // This API is not available on Win9x since task offload is
+ // not supported on that platform.
+ //
+ NdisIMCopySendPerPacketInfo(MyPacket, Packet);
+#endif
+
+ //
+ // Copy the Media specific information
+ //
+ NDIS_GET_PACKET_MEDIA_SPECIFIC_INFO(Packet,
+ &MediaSpecificInfo,
+ &MediaSpecificInfoSize);
+
+ if (MediaSpecificInfo || MediaSpecificInfoSize)
+ {
+ NDIS_SET_PACKET_MEDIA_SPECIFIC_INFO(MyPacket,
+ MediaSpecificInfo,
+ MediaSpecificInfoSize);
+ }
+#if 1 /* IPFW: query the firewall */
+ /* if dummynet keeps the packet, we mimic success.
+ * otherwise continue as usual.
+ */
+ {
+ int ret = ipfw2_qhandler_w32(MyPacket, OUTGOING,
+ MiniportAdapterContext);
+ if (ret != PASS) {
+ if (ret == DROP)
+ return NDIS_STATUS_FAILURE;
+ else { //dummynet kept the packet
+#ifndef WIN9X
+ NdisIMCopySendCompletePerPacketInfo (Packet, MyPacket);
+#endif
+ return NDIS_STATUS_SUCCESS; //otherwise simply continue
+ }
+ }
+ }
+#endif /* end of IPFW code */
+
+ NdisSend(&Status,
+ pAdapt->BindingHandle,
+ MyPacket);
+
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+#ifndef WIN9X
+ NdisIMCopySendCompletePerPacketInfo (Packet, MyPacket);
+#endif
+ NdisFreePacket(MyPacket);
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+ }
+ }
+ else
+ {
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+ //
+ // We are out of packets. Silently drop it. Alternatively we can deal with it:
+ // - By keeping separate send and receive pools
+ // - Dynamically allocate more pools as needed and free them when not needed
+ //
+ }
+
+ return(Status);
+}
+
+
+VOID
+MPSendPackets(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN PPNDIS_PACKET PacketArray,
+ IN UINT NumberOfPackets
+ )
+/*++
+
+Routine Description:
+
+ Send Packet Array handler. Either this or our SendPacket handler is called
+ based on which one is enabled in our Miniport Characteristics.
+
+Arguments:
+
+ MiniportAdapterContext Pointer to our adapter
+ PacketArray Set of packets to send
+ NumberOfPackets Self-explanatory
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+ NDIS_STATUS Status;
+ UINT i;
+ PVOID MediaSpecificInfo = NULL;
+ UINT MediaSpecificInfoSize = 0;
+
+
+ for (i = 0; i < NumberOfPackets; i++)
+ {
+ PNDIS_PACKET Packet, MyPacket;
+
+ Packet = PacketArray[i];
+ //
+ // The driver should fail the send if the virtual miniport is in low
+ // power state
+ //
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)
+ {
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),
+ Packet,
+ NDIS_STATUS_FAILURE);
+ continue;
+ }
+
+#ifdef NDIS51
+
+ //
+ // Use NDIS 5.1 packet stacking:
+ //
+ {
+ PNDIS_PACKET_STACK pStack;
+ BOOLEAN Remaining;
+
+ //
+ // Packet stacks: Check if we can use the same packet for sending down.
+ //
+ pStack = NdisIMGetCurrentPacketStack(Packet, &Remaining);
+ if (Remaining)
+ {
+ //
+ // We can reuse "Packet".
+ //
+ // NOTE: if we needed to keep per-packet information in packets
+ // sent down, we can use pStack->IMReserved[].
+ //
+ ASSERT(pStack);
+ //
+ // If the below miniport is going to low power state, stop sending down any packet.
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),
+ Packet,
+ NDIS_STATUS_FAILURE);
+ }
+ else
+ {
+ pAdapt->OutstandingSends++;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ NdisSend(&Status,
+ pAdapt->BindingHandle,
+ Packet);
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),
+ Packet,
+ Status);
+
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+ }
+ }
+ continue;
+ }
+ }
+#endif
+ do
+ {
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ //
+ // If the below miniport is going to low power state, stop sending down any packet.
+ //
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+ pAdapt->OutstandingSends++;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ NdisAllocatePacket(&Status,
+ &MyPacket,
+ pAdapt->SendPacketPoolHandle);
+
+ if (Status == NDIS_STATUS_SUCCESS)
+ {
+ PSEND_RSVD SendRsvd;
+
+ SendRsvd = (PSEND_RSVD)(MyPacket->ProtocolReserved);
+ SendRsvd->OriginalPkt = Packet;
+
+ NdisGetPacketFlags(MyPacket) = NdisGetPacketFlags(Packet);
+
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);
+#ifdef WIN9X
+ //
+ // Work around the fact that NDIS does not initialize this
+ // to FALSE on Win9x.
+ //
+ NDIS_PACKET_VALID_COUNTS(MyPacket) = FALSE;
+#endif // WIN9X
+
+ //
+ // Copy the OOB data from the original packet to the new
+ // packet.
+ //
+ NdisMoveMemory(NDIS_OOB_DATA_FROM_PACKET(MyPacket),
+ NDIS_OOB_DATA_FROM_PACKET(Packet),
+ sizeof(NDIS_PACKET_OOB_DATA));
+ //
+ // Copy relevant parts of the per packet info into the new packet
+ //
+#ifndef WIN9X
+ NdisIMCopySendPerPacketInfo(MyPacket, Packet);
+#endif
+
+ //
+ // Copy the Media specific information
+ //
+ NDIS_GET_PACKET_MEDIA_SPECIFIC_INFO(Packet,
+ &MediaSpecificInfo,
+ &MediaSpecificInfoSize);
+
+ if (MediaSpecificInfo || MediaSpecificInfoSize)
+ {
+ NDIS_SET_PACKET_MEDIA_SPECIFIC_INFO(MyPacket,
+ MediaSpecificInfo,
+ MediaSpecificInfoSize);
+ }
+
+ NdisSend(&Status,
+ pAdapt->BindingHandle,
+ MyPacket);
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+#ifndef WIN9X
+ NdisIMCopySendCompletePerPacketInfo (Packet, MyPacket);
+#endif
+ NdisFreePacket(MyPacket);
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+ }
+ }
+ else
+ {
+ //
+ // The driver cannot allocate a packet.
+ //
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+ }
+ }
+ while (FALSE);
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),
+ Packet,
+ Status);
+ }
+ }
+}
+
+
+NDIS_STATUS
+MPQueryInformation(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN NDIS_OID Oid,
+ IN PVOID InformationBuffer,
+ IN ULONG InformationBufferLength,
+ OUT PULONG BytesWritten,
+ OUT PULONG BytesNeeded
+ )
+/*++
+
+Routine Description:
+
+ Entry point called by NDIS to query for the value of the specified OID.
+ Typical processing is to forward the query down to the underlying miniport.
+
+ The following OIDs are filtered here:
+
+ OID_PNP_QUERY_POWER - return success right here
+
+ OID_GEN_SUPPORTED_GUIDS - do not forward, otherwise we will show up
+ multiple instances of private GUIDs supported by the underlying miniport.
+
+ OID_PNP_CAPABILITIES - we do send this down to the lower miniport, but
+ the values returned are postprocessed before we complete this request;
+ see PtRequestComplete.
+
+ NOTE on OID_TCP_TASK_OFFLOAD - if this IM driver modifies the contents
+ of data it passes through such that a lower miniport may not be able
+ to perform TCP task offload, then it should not forward this OID down,
+ but fail it here with the status NDIS_STATUS_NOT_SUPPORTED. This is to
+ avoid performing incorrect transformations on data.
+
+ If our miniport edge (upper edge) is at a low-power state, fail the request.
+
+ If our protocol edge (lower edge) has been notified of a low-power state,
+ we pend this request until the miniport below has been set to D0. Since
+ requests to miniports are serialized always, at most a single request will
+ be pended.
+
+Arguments:
+
+ MiniportAdapterContext Pointer to the adapter structure
+ Oid Oid for this query
+ InformationBuffer Buffer for information
+ InformationBufferLength Size of this buffer
+ BytesWritten Specifies how much info is written
+ BytesNeeded In case the buffer is smaller than what we need, tell them how much is needed
+
+
+Return Value:
+
+ Return code from the NdisRequest below.
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+ NDIS_STATUS Status = NDIS_STATUS_FAILURE;
+
+ do
+ {
+ if (Oid == OID_PNP_QUERY_POWER)
+ {
+ //
+ // Do not forward this.
+ //
+ Status = NDIS_STATUS_SUCCESS;
+ break;
+ }
+
+ if (Oid == OID_GEN_SUPPORTED_GUIDS)
+ {
+ //
+ // Do not forward this, otherwise we will end up with multiple
+ // instances of private GUIDs that the underlying miniport
+ // supports.
+ //
+ Status = NDIS_STATUS_NOT_SUPPORTED;
+ break;
+ }
+
+ if (Oid == OID_TCP_TASK_OFFLOAD)
+ {
+ //
+ // Fail this -if- this driver performs data transformations
+ // that can interfere with a lower driver's ability to offload
+ // TCP tasks.
+ //
+ // Status = NDIS_STATUS_NOT_SUPPORTED;
+ // break;
+ //
+ }
+ //
+ // If the miniport below is unbinding, just fail any request
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->UnbindingInProcess == TRUE)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ //
+ // All other queries are failed, if the miniport is not at D0,
+ //
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)
+ {
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+
+ pAdapt->Request.RequestType = NdisRequestQueryInformation;
+ pAdapt->Request.DATA.QUERY_INFORMATION.Oid = Oid;
+ pAdapt->Request.DATA.QUERY_INFORMATION.InformationBuffer = InformationBuffer;
+ pAdapt->Request.DATA.QUERY_INFORMATION.InformationBufferLength = InformationBufferLength;
+ pAdapt->BytesNeeded = BytesNeeded;
+ pAdapt->BytesReadOrWritten = BytesWritten;
+
+ //
+ // If the miniport below is binding, fail the request
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+
+ if (pAdapt->UnbindingInProcess == TRUE)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+ //
+ // If the Protocol device state is OFF, mark this request as being
+ // pended. We queue this until the device state is back to D0.
+ //
+ if ((pAdapt->PTDeviceState > NdisDeviceStateD0)
+ && (pAdapt->StandingBy == FALSE))
+ {
+ pAdapt->QueuedRequest = TRUE;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_PENDING;
+ break;
+ }
+ //
+ // This is in the process of powering down the system, always fail the request
+ //
+ if (pAdapt->StandingBy == TRUE)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+ pAdapt->OutstandingRequests = TRUE;
+
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ //
+ // default case, most requests will be passed to the miniport below
+ //
+ NdisRequest(&Status,
+ pAdapt->BindingHandle,
+ &pAdapt->Request);
+
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+ PtRequestComplete(pAdapt, &pAdapt->Request, Status);
+ Status = NDIS_STATUS_PENDING;
+ }
+
+ } while (FALSE);
+
+ return(Status);
+
+}
+
+
+VOID
+MPQueryPNPCapabilities(
+ IN OUT PADAPT pAdapt,
+ OUT PNDIS_STATUS pStatus
+ )
+/*++
+
+Routine Description:
+
+ Postprocess a request for OID_PNP_CAPABILITIES that was forwarded
+ down to the underlying miniport, and has been completed by it.
+
+Arguments:
+
+ pAdapt - Pointer to the adapter structure
+ pStatus - Place to return final status
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PNDIS_PNP_CAPABILITIES pPNPCapabilities;
+ PNDIS_PM_WAKE_UP_CAPABILITIES pPMstruct;
+
+ if (pAdapt->Request.DATA.QUERY_INFORMATION.InformationBufferLength >= sizeof(NDIS_PNP_CAPABILITIES))
+ {
+ pPNPCapabilities = (PNDIS_PNP_CAPABILITIES)(pAdapt->Request.DATA.QUERY_INFORMATION.InformationBuffer);
+
+ //
+ // The following fields must be overwritten by an IM driver.
+ //
+ pPMstruct= & pPNPCapabilities->WakeUpCapabilities;
+ pPMstruct->MinMagicPacketWakeUp = NdisDeviceStateUnspecified;
+ pPMstruct->MinPatternWakeUp = NdisDeviceStateUnspecified;
+ pPMstruct->MinLinkChangeWakeUp = NdisDeviceStateUnspecified;
+ *pAdapt->BytesReadOrWritten = sizeof(NDIS_PNP_CAPABILITIES);
+ *pAdapt->BytesNeeded = 0;
+
+
+ //
+ // Setting our internal flags
+ // Default, device is ON
+ //
+ pAdapt->MPDeviceState = NdisDeviceStateD0;
+ pAdapt->PTDeviceState = NdisDeviceStateD0;
+
+ *pStatus = NDIS_STATUS_SUCCESS;
+ }
+ else
+ {
+ *pAdapt->BytesNeeded= sizeof(NDIS_PNP_CAPABILITIES);
+ *pStatus = NDIS_STATUS_RESOURCES;
+ }
+}
+
+
+NDIS_STATUS
+MPSetInformation(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN NDIS_OID Oid,
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,
+ IN ULONG InformationBufferLength,
+ OUT PULONG BytesRead,
+ OUT PULONG BytesNeeded
+ )
+/*++
+
+Routine Description:
+
+ Miniport SetInfo handler.
+
+ In the case of OID_PNP_SET_POWER, record the power state and return the OID.
+ Do not pass below
+ If the device is suspended, do not block the SET_POWER_OID
+ as it is used to reactivate the Passthru miniport
+
+
+ PM- If the MP is not ON (DeviceState > D0) return immediately (except for 'query power' and 'set power')
+ If MP is ON, but the PT is not at D0, then queue the queue the request for later processing
+
+ Requests to miniports are always serialized
+
+
+Arguments:
+
+ MiniportAdapterContext Pointer to the adapter structure
+ Oid Oid for this query
+ InformationBuffer Buffer for information
+ InformationBufferLength Size of this buffer
+ BytesRead Specifies how much info is read
+ BytesNeeded In case the buffer is smaller than what we need, tell them how much is needed
+
+Return Value:
+
+ Return code from the NdisRequest below.
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+ NDIS_STATUS Status;
+
+ Status = NDIS_STATUS_FAILURE;
+
+ do
+ {
+ //
+ // The Set Power should not be sent to the miniport below the Passthru, but is handled internally
+ //
+ if (Oid == OID_PNP_SET_POWER)
+ {
+ MPProcessSetPowerOid(&Status,
+ pAdapt,
+ InformationBuffer,
+ InformationBufferLength,
+ BytesRead,
+ BytesNeeded);
+ break;
+
+ }
+
+ //
+ // If the miniport below is unbinding, fail the request
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->UnbindingInProcess == TRUE)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ //
+ // All other Set Information requests are failed, if the miniport is
+ // not at D0 or is transitioning to a device state greater than D0.
+ //
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)
+ {
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+
+ // Set up the Request and return the result
+ pAdapt->Request.RequestType = NdisRequestSetInformation;
+ pAdapt->Request.DATA.SET_INFORMATION.Oid = Oid;
+ pAdapt->Request.DATA.SET_INFORMATION.InformationBuffer = InformationBuffer;
+ pAdapt->Request.DATA.SET_INFORMATION.InformationBufferLength = InformationBufferLength;
+ pAdapt->BytesNeeded = BytesNeeded;
+ pAdapt->BytesReadOrWritten = BytesRead;
+
+ //
+ // If the miniport below is unbinding, fail the request
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->UnbindingInProcess == TRUE)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+
+ //
+ // If the device below is at a low power state, we cannot send it the
+ // request now, and must pend it.
+ //
+ if ((pAdapt->PTDeviceState > NdisDeviceStateD0)
+ && (pAdapt->StandingBy == FALSE))
+ {
+ pAdapt->QueuedRequest = TRUE;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_PENDING;
+ break;
+ }
+ //
+ // This is in the process of powering down the system, always fail the request
+ //
+ if (pAdapt->StandingBy == TRUE)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ Status = NDIS_STATUS_FAILURE;
+ break;
+ }
+ pAdapt->OutstandingRequests = TRUE;
+
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ //
+ // Forward the request to the device below.
+ //
+ NdisRequest(&Status,
+ pAdapt->BindingHandle,
+ &pAdapt->Request);
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+ *BytesRead = pAdapt->Request.DATA.SET_INFORMATION.BytesRead;
+ *BytesNeeded = pAdapt->Request.DATA.SET_INFORMATION.BytesNeeded;
+ pAdapt->OutstandingRequests = FALSE;
+ }
+
+ } while (FALSE);
+
+ return(Status);
+}
+
+
+VOID
+MPProcessSetPowerOid(
+ IN OUT PNDIS_STATUS pNdisStatus,
+ IN PADAPT pAdapt,
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,
+ IN ULONG InformationBufferLength,
+ OUT PULONG BytesRead,
+ OUT PULONG BytesNeeded
+ )
+/*++
+
+Routine Description:
+ This routine does all the procssing for a request with a SetPower Oid
+ The miniport shoud accept the Set Power and transition to the new state
+
+ The Set Power should not be passed to the miniport below
+
+ If the IM miniport is going into a low power state, then there is no guarantee if it will ever
+ be asked go back to D0, before getting halted. No requests should be pended or queued.
+
+
+Arguments:
+ pNdisStatus - Status of the operation
+ pAdapt - The Adapter structure
+ InformationBuffer - The New DeviceState
+ InformationBufferLength
+ BytesRead - No of bytes read
+ BytesNeeded - No of bytes needed
+
+
+Return Value:
+ Status - NDIS_STATUS_SUCCESS if all the wait events succeed.
+
+--*/
+{
+
+
+ NDIS_DEVICE_POWER_STATE NewDeviceState;
+
+ DBGPRINT(("==>MPProcessSetPowerOid: Adapt %p\n", pAdapt));
+
+ ASSERT (InformationBuffer != NULL);
+
+ *pNdisStatus = NDIS_STATUS_FAILURE;
+
+ do
+ {
+ //
+ // Check for invalid length
+ //
+ if (InformationBufferLength < sizeof(NDIS_DEVICE_POWER_STATE))
+ {
+ *pNdisStatus = NDIS_STATUS_INVALID_LENGTH;
+ break;
+ }
+
+ NewDeviceState = (*(PNDIS_DEVICE_POWER_STATE)InformationBuffer);
+
+ //
+ // Check for invalid device state
+ //
+ if ((pAdapt->MPDeviceState > NdisDeviceStateD0) && (NewDeviceState != NdisDeviceStateD0))
+ {
+ //
+ // If the miniport is in a non-D0 state, the miniport can only receive a Set Power to D0
+ //
+ ASSERT (!(pAdapt->MPDeviceState > NdisDeviceStateD0) && (NewDeviceState != NdisDeviceStateD0));
+
+ *pNdisStatus = NDIS_STATUS_FAILURE;
+ break;
+ }
+
+ //
+ // Is the miniport transitioning from an On (D0) state to an Low Power State (>D0)
+ // If so, then set the StandingBy Flag - (Block all incoming requests)
+ //
+ if (pAdapt->MPDeviceState == NdisDeviceStateD0 && NewDeviceState > NdisDeviceStateD0)
+ {
+ pAdapt->StandingBy = TRUE;
+ }
+
+ //
+ // If the miniport is transitioning from a low power state to ON (D0), then clear the StandingBy flag
+ // All incoming requests will be pended until the physical miniport turns ON.
+ //
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0 && NewDeviceState == NdisDeviceStateD0)
+ {
+ pAdapt->StandingBy = FALSE;
+ }
+
+ //
+ // Now update the state in the pAdapt structure;
+ //
+ pAdapt->MPDeviceState = NewDeviceState;
+
+ *pNdisStatus = NDIS_STATUS_SUCCESS;
+
+
+ } while (FALSE);
+
+ if (*pNdisStatus == NDIS_STATUS_SUCCESS)
+ {
+ //
+ // The miniport resume from low power state
+ //
+ if (pAdapt->StandingBy == FALSE)
+ {
+ //
+ // If we need to indicate the media connect state
+ //
+ if (pAdapt->LastIndicatedStatus != pAdapt->LatestUnIndicateStatus)
+ {
+ if (pAdapt->MiniportHandle != NULL)
+ {
+ NdisMIndicateStatus(pAdapt->MiniportHandle,
+ pAdapt->LatestUnIndicateStatus,
+ (PVOID)NULL,
+ 0);
+ NdisMIndicateStatusComplete(pAdapt->MiniportHandle);
+ pAdapt->LastIndicatedStatus = pAdapt->LatestUnIndicateStatus;
+ }
+ }
+ }
+ else
+ {
+ //
+ // Initialize LatestUnIndicatedStatus
+ //
+ pAdapt->LatestUnIndicateStatus = pAdapt->LastIndicatedStatus;
+ }
+ *BytesRead = sizeof(NDIS_DEVICE_POWER_STATE);
+ *BytesNeeded = 0;
+ }
+ else
+ {
+ *BytesRead = 0;
+ *BytesNeeded = sizeof (NDIS_DEVICE_POWER_STATE);
+ }
+
+ DBGPRINT(("<==MPProcessSetPowerOid: Adapt %p\n", pAdapt));
+}
+
+
+VOID
+MPReturnPacket(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN PNDIS_PACKET Packet
+ )
+/*++
+
+Routine Description:
+
+ NDIS Miniport entry point called whenever protocols are done with
+ a packet that we had indicated up and they had queued up for returning
+ later.
+
+Arguments:
+
+ MiniportAdapterContext - pointer to ADAPT structure
+ Packet - packet being returned.
+
+Return Value:
+
+ None.
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+
+#ifdef NDIS51
+ //
+ // Packet stacking: Check if this packet belongs to us.
+ //
+ if (NdisGetPoolFromPacket(Packet) != pAdapt->RecvPacketPoolHandle)
+ {
+ //
+ // We reused the original packet in a receive indication.
+ // Simply return it to the miniport below us.
+ //
+ NdisReturnPackets(&Packet, 1);
+ }
+ else
+#endif // NDIS51
+ {
+ //
+ // This is a packet allocated from this IM's receive packet pool.
+ // Reclaim our packet, and return the original to the driver below.
+ //
+
+ PNDIS_PACKET MyPacket;
+ PRECV_RSVD RecvRsvd;
+
+ RecvRsvd = (PRECV_RSVD)(Packet->MiniportReserved);
+ MyPacket = RecvRsvd->OriginalPkt;
+
+ NdisFreePacket(Packet);
+ NdisReturnPackets(&MyPacket, 1);
+ }
+}
+
+
+NDIS_STATUS
+MPTransferData(
+ OUT PNDIS_PACKET Packet,
+ OUT PUINT BytesTransferred,
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN NDIS_HANDLE MiniportReceiveContext,
+ IN UINT ByteOffset,
+ IN UINT BytesToTransfer
+ )
+/*++
+
+Routine Description:
+
+ Miniport's transfer data handler.
+
+Arguments:
+
+ Packet Destination packet
+ BytesTransferred Place-holder for how much data was copied
+ MiniportAdapterContext Pointer to the adapter structure
+ MiniportReceiveContext Context
+ ByteOffset Offset into the packet for copying data
+ BytesToTransfer How much to copy.
+
+Return Value:
+
+ Status of transfer
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+ NDIS_STATUS Status;
+
+ //
+ // Return, if the device is OFF
+ //
+
+ if (IsIMDeviceStateOn(pAdapt) == FALSE)
+ {
+ return NDIS_STATUS_FAILURE;
+ }
+
+ NdisTransferData(&Status,
+ pAdapt->BindingHandle,
+ MiniportReceiveContext,
+ ByteOffset,
+ BytesToTransfer,
+ Packet,
+ BytesTransferred);
+
+ return(Status);
+}
+
+VOID
+MPHalt(
+ IN NDIS_HANDLE MiniportAdapterContext
+ )
+/*++
+
+Routine Description:
+
+ Halt handler. All the hard-work for clean-up is done here.
+
+Arguments:
+
+ MiniportAdapterContext Pointer to the Adapter
+
+Return Value:
+
+ None.
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+ NDIS_STATUS Status;
+ PADAPT *ppCursor;
+
+ DBGPRINT(("==>MiniportHalt: Adapt %p\n", pAdapt));
+
+ pAdapt->MiniportHandle = NULL;
+ pAdapt->MiniportIsHalted = TRUE;
+
+ //
+ // Remove this adapter from the global list
+ //
+ NdisAcquireSpinLock(&GlobalLock);
+
+ for (ppCursor = &pAdaptList; *ppCursor != NULL; ppCursor = &(*ppCursor)->Next)
+ {
+ if (*ppCursor == pAdapt)
+ {
+ *ppCursor = pAdapt->Next;
+ break;
+ }
+ }
+
+ NdisReleaseSpinLock(&GlobalLock);
+
+ //
+ // Delete the ioctl interface that was created when the miniport
+ // was created.
+ //
+ (VOID)PtDeregisterDevice();
+
+ //
+ // If we have a valid bind, close the miniport below the protocol
+ //
+#pragma prefast(suppress: __WARNING_DEREF_NULL_PTR, "pAdapt cannot be NULL")
+ if (pAdapt->BindingHandle != NULL)
+ {
+ //
+ // Close the binding below. and wait for it to complete
+ //
+ NdisResetEvent(&pAdapt->Event);
+
+ NdisCloseAdapter(&Status, pAdapt->BindingHandle);
+
+ if (Status == NDIS_STATUS_PENDING)
+ {
+ NdisWaitEvent(&pAdapt->Event, 0);
+ Status = pAdapt->Status;
+ }
+
+ ASSERT (Status == NDIS_STATUS_SUCCESS);
+
+ pAdapt->BindingHandle = NULL;
+
+ PtDereferenceAdapt(pAdapt);
+ }
+
+ if (PtDereferenceAdapt(pAdapt))
+ {
+ pAdapt = NULL;
+ }
+
+
+ DBGPRINT(("<== MiniportHalt: pAdapt %p\n", pAdapt));
+}
+
+
+#ifdef NDIS51_MINIPORT
+
+VOID
+MPCancelSendPackets(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN PVOID CancelId
+ )
+/*++
+
+Routine Description:
+
+ The miniport entry point to handle cancellation of all send packets
+ that match the given CancelId. If we have queued any packets that match
+ this, then we should dequeue them and call NdisMSendComplete for all
+ such packets, with a status of NDIS_STATUS_REQUEST_ABORTED.
+
+ We should also call NdisCancelSendPackets in turn, on each lower binding
+ that this adapter corresponds to. This is to let miniports below cancel
+ any matching packets.
+
+Arguments:
+
+ MiniportAdapterContext - pointer to ADAPT structure
+ CancelId - ID of packets to be cancelled.
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;
+
+ //
+ // If we queue packets on our adapter structure, this would be
+ // the place to acquire a spinlock to it, unlink any packets whose
+ // Id matches CancelId, release the spinlock and call NdisMSendComplete
+ // with NDIS_STATUS_REQUEST_ABORTED for all unlinked packets.
+ //
+
+ //
+ // Next, pass this down so that we let the miniport(s) below cancel
+ // any packets that they might have queued.
+ //
+ NdisCancelSendPackets(pAdapt->BindingHandle, CancelId);
+
+ return;
+}
+
+VOID
+MPDevicePnPEvent(
+ IN NDIS_HANDLE MiniportAdapterContext,
+ IN NDIS_DEVICE_PNP_EVENT DevicePnPEvent,
+ IN PVOID InformationBuffer,
+ IN ULONG InformationBufferLength
+ )
+/*++
+
+Routine Description:
+
+ This handler is called to notify us of PnP events directed to
+ our miniport device object.
+
+Arguments:
+
+ MiniportAdapterContext - pointer to ADAPT structure
+ DevicePnPEvent - the event
+ InformationBuffer - Points to additional event-specific information
+ InformationBufferLength - length of above
+
+Return Value:
+
+ None
+--*/
+{
+ // TBD - add code/comments about processing this.
+
+ UNREFERENCED_PARAMETER(MiniportAdapterContext);
+ UNREFERENCED_PARAMETER(DevicePnPEvent);
+ UNREFERENCED_PARAMETER(InformationBuffer);
+ UNREFERENCED_PARAMETER(InformationBufferLength);
+
+ return;
+}
+
+VOID
+MPAdapterShutdown(
+ IN NDIS_HANDLE MiniportAdapterContext
+ )
+/*++
+
+Routine Description:
+
+ This handler is called to notify us of an impending system shutdown.
+
+Arguments:
+
+ MiniportAdapterContext - pointer to ADAPT structure
+
+Return Value:
+
+ None
+--*/
+{
+ UNREFERENCED_PARAMETER(MiniportAdapterContext);
+
+ return;
+}
+
+#endif
+
+
+VOID
+MPFreeAllPacketPools(
+ IN PADAPT pAdapt
+ )
+/*++
+
+Routine Description:
+
+ Free all packet pools on the specified adapter.
+
+Arguments:
+
+ pAdapt - pointer to ADAPT structure
+
+Return Value:
+
+ None
+
+--*/
+{
+ if (pAdapt->RecvPacketPoolHandle != NULL)
+ {
+ //
+ // Free the packet pool that is used to indicate receives
+ //
+ NdisFreePacketPool(pAdapt->RecvPacketPoolHandle);
+
+ pAdapt->RecvPacketPoolHandle = NULL;
+ }
+
+ if (pAdapt->SendPacketPoolHandle != NULL)
+ {
+
+ //
+ // Free the packet pool that is used to send packets below
+ //
+
+ NdisFreePacketPool(pAdapt->SendPacketPoolHandle);
+
+ pAdapt->SendPacketPoolHandle = NULL;
+
+ }
+}
+
*/
/*
- * $Id: missing.h 4696 2010-01-07 12:37:09Z marta $
+ * $Id: missing.h 5817 2010-03-23 09:50:56Z svn_panicucci $
*
* Header for kernel variables and functions that are not available in
* userland.
#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
#endif
-#endif /* __linux__ */
#define rw_assert(a, b)
#define rw_destroy(_l)
#define mtx_lock(_l) spin_lock_bh(_l)
#define mtx_unlock(_l) spin_unlock_bh(_l)
+#endif /* __linux__ */
/* end of locking support */
+/*
+ * Reference to an ipfw rule that can be carried outside critical sections.
+ * A rule is identified by rulenum:rule_id which is ordered.
+ * In version chain_id the rule can be found in slot 'slot', so
+ * we don't need a lookup if chain_id == chain->id.
+ *
+ * On exit from the firewall this structure refers to the rule after
+ * the matching one (slot points to the new rule; rulenum:rule_id-1
+ * is the matching rule), and additional info (e.g. info often contains
+ * the insn argument or tablearg in the low 16 bits, in host format).
+ * On entry, the structure is valid if slot>0, and refers to the starting
+ * rules. 'info' contains the reason for reinject, e.g. divert port,
+ * divert direction, and so on.
+ */
+struct ipfw_rule_ref {
+ uint32_t slot; /* slot for matching rule */
+ uint32_t rulenum; /* matching rule number */
+ uint32_t rule_id; /* matching rule id */
+ uint32_t chain_id; /* ruleset id */
+ uint32_t info; /* see below */
+};
+
+enum {
+ IPFW_INFO_MASK = 0x0000ffff,
+ IPFW_INFO_OUT = 0x00000000, /* outgoing, just for convenience */
+ IPFW_INFO_IN = 0x80000000, /* incoming, overloads dir */
+ IPFW_ONEPASS = 0x40000000, /* One-pass, do not reinject */
+ IPFW_IS_MASK = 0x30000000, /* which source ? */
+ IPFW_IS_DIVERT = 0x20000000,
+ IPFW_IS_DUMMYNET =0x10000000,
+ IPFW_IS_PIPE = 0x08000000, /* pipe=1, queue = 0 */
+};
+
/* in netinet/in.h */
#define in_nullhost(x) ((x).s_addr == INADDR_ANY)
/* ethernet stuff */
#define ETHERTYPE_IP 0x0800 /* IP protocol */
-#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */
+//#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */
struct ether_header {
u_char ether_dhost[ETHER_ADDR_LEN];
u_char ether_shost[ETHER_ADDR_LEN];
u_short ether_type;
};
-#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */
#define ETHER_TYPE_LEN 2 /* length of the Ethernet type field */
#define ETHER_HDR_LEN (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN)
extern int hz;
extern long tick; /* exists in 2.4 but not in 2.6 */
extern int bootverbose;
-extern time_t time_uptime;
extern struct timeval boottime;
+/* The time_uptime a FreeBSD variable increased each second */
+#ifdef __linux__
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,37) /* revise boundaries */
+#define time_uptime get_seconds()
+#else /* OpenWRT */
+#define time_uptime CURRENT_TIME
+#endif
+#else /* WIN32 */
+#define time_uptime time_uptime_w32()
+#endif
+
extern int max_linkhdr;
extern int ip_defttl;
extern u_long in_ifaddrhmask; /* mask for hash table */
int dummy; /* windows compiler does not like empty definition */
};
+#ifndef _WIN32
struct route {
struct rtentry *ro_rt;
struct sockaddr ro_dst;
};
+#endif
struct ifaltq {
void *ifq_head;
struct sysctl_oid;
struct sysctl_req;
-/*
- * sysctl are mapped into /sys/module/ipfw_mod parameters
- */
-#define CTLFLAG_RD 1
-#define CTLFLAG_RDTUN 1
-#define CTLFLAG_RW 2
-#define CTLFLAG_SECURE3 0 // unsupported
-#define CTLFLAG_VNET 0 /* unsupported */
-
#ifdef _WIN32
#define module_param_named(_name, _var, _ty, _perm)
-#else
+#else /* !_WIN32 */
/* Linux 2.4 is mostly for openwrt */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
#define module_param_named(_name, _var, _ty, _perm) \
//module_param(_name, _ty, 0644)
#endif
-#endif /* __linux__ */
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
typedef unsigned long uintptr_t;
+
+#ifdef MIPSEL
+static inline unsigned long __fls (unsigned long word)
+{
+ return word;
+}
+#else
+static inline unsigned long __fls(unsigned long word)
+{
+ asm("bsr %1,%0"
+ : "=r" (word)
+ : "rm" (word));
+ return word;
+}
#endif
+#endif /* LINUX < 2.6.25 */
+
+#endif /* !_WIN32 so maybe __linux__ */
+
+#if defined (__linux__) && !defined (EMULATE_SYSCTL)
#define SYSCTL_DECL(_1)
#define SYSCTL_OID(_1, _2, _3, _4, _5, _6, _7, _8)
#define SYSCTL_NODE(_1, _2, _3, _4, _5, _6)
#define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \
_SYSCTL_BASE(_name, _var, uint, _mode)
+#define TUNABLE_INT(_name, _ptr)
+
+#define SYSCTL_VNET_PROC SYSCTL_PROC
+#define SYSCTL_VNET_INT SYSCTL_INT
+
+#endif
+
#define SYSCTL_HANDLER_ARGS \
struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req
int sysctl_handle_int(SYSCTL_HANDLER_ARGS);
int sysctl_handle_long(SYSCTL_HANDLER_ARGS);
-#define TUNABLE_INT(_name, _ptr)
void ether_demux(struct ifnet *ifp, struct mbuf *m);
/* vnet wrappers, in vnet.h and ip_var.h */
//int ipfw_init(void);
//void ipfw_destroy(void);
+
+#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */
+#define MTAG_IPFW_RULE 1262273568 /* rule reference */
+
struct ip_fw_args;
extern int (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa);
#define VNET_PTR(n) (&(n))
#define VNET(n) (n)
-int
-ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
- struct inpcb *inp);
+int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp,
+ int dir, struct inpcb *inp);
+
+/* hooks for divert */
+extern void (*ip_divert_ptr)(struct mbuf *m, int incoming);
extern int (*ip_dn_ctl_ptr)(struct sockopt *);
typedef int ip_fw_ctl_t(struct sockopt *);
extern ip_fw_ctl_t *ip_fw_ctl_ptr;
+/* netgraph prototypes */
+typedef int ng_ipfw_input_t(struct mbuf **, int, struct ip_fw_args *, int);
+extern ng_ipfw_input_t *ng_ipfw_input_p;
+
/* For kernel ipfw_ether and ipfw_bridge. */
struct ip_fw_args;
typedef int ip_fw_chk_t(struct ip_fw_args *args);
#define V_tcbinfo VNET(tcbinfo)
#define V_udbinfo VNET(udbinfo)
-#define SYSCTL_VNET_PROC SYSCTL_PROC
-#define SYSCTL_VNET_INT SYSCTL_INT
-
#endif /* !_MISSING_H_ */
--- /dev/null
+; version section\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = NetService\r
+ClassGUID = {4D36E974-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Unipi%\r
+DriverVer = 26/02/2010,3.0.0.1\r
+\r
+; manufacturer section\r
+[Manufacturer]\r
+%Unipi% = UNIPI,NTx86\r
+\r
+; control flags section\r
+; optional, unused in netipfw.inf inf, used in netipfw_m.inf\r
+[ControlFlags]\r
+\r
+; models section\r
+[UNIPI] ; Win2k\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+[UNIPI.NTx86] ;For WinXP and later\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+\r
+; ddinstall section\r
+[Ipfw.ndi]\r
+AddReg = Ipfw.ndi.AddReg, Ipfw.AddReg\r
+Characteristics = 0x4410 ; NCF_FILTER | NCF_NDIS_PROTOCOL !--Filter Specific--!!\r
+CopyFiles = Ipfw.Files.Sys\r
+CopyInf = netipfw_m.inf\r
+\r
+; remove section\r
+[Ipfw.ndi.Remove]\r
+DelFiles = Ipfw.Files.Sys\r
+\r
+;ddinstall.services section\r
+[Ipfw.ndi.Services]\r
+AddService = Ipfw,,Ipfw.AddService\r
+\r
+[Ipfw.AddService]\r
+DisplayName = %ServiceDesc%\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\ipfw.sys\r
+AddReg = Ipfw.AddService.AddReg\r
+\r
+[Ipfw.AddService.AddReg]\r
+\r
+;file copy related sections\r
+[SourceDisksNames]\r
+1=%DiskDescription%,"",,\r
+\r
+[SourceDisksFiles]\r
+ipfw.sys=1\r
+\r
+[DestinationDirs]\r
+DefaultDestDir = 12\r
+Ipfw.Files.Sys = 12 ; %windir%\System32\drivers\r
+\r
+; ddinstall->copyfiles points here\r
+[Ipfw.Files.Sys]\r
+ipfw.sys,,,2\r
+\r
+; ddinstall->addreg points here\r
+[Ipfw.ndi.AddReg]\r
+HKR, Ndi, HelpText, , %HELP% ; this is displayed at the bottom of the General page of the Connection Properties dialog box\r
+HKR, Ndi, FilterClass, , failover\r
+HKR, Ndi, FilterDeviceInfId, , unipi_ipfwmp\r
+HKR, Ndi, Service, , Ipfw\r
+HKR, Ndi\Interfaces, UpperRange, , noupper\r
+HKR, Ndi\Interfaces, LowerRange, , nolower\r
+HKR, Ndi\Interfaces, FilterMediaTypes, , "ethernet, tokenring, fddi, wan"\r
+\r
+;strings section\r
+[Strings]\r
+Unipi = "Unipi"\r
+DiskDescription = "Ipfw Driver Disk"\r
+Desc = "ipfw+dummynet"\r
+HELP = "This is ipfw and dummynet network emulator, developed by unipi.it"\r
+ServiceDesc = "ipfw service"\r
--- /dev/null
+; version section\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = Net\r
+ClassGUID = {4D36E972-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Unipi%\r
+DriverVer = 26/02/2010,3.0.0.1\r
+\r
+; control flags section\r
+; optional, unused in netipfw.inf inf, used in netipfw_m.inf\r
+[ControlFlags]\r
+ExcludeFromSelect = unipi_ipfwmp\r
+\r
+; destinationdirs section, optional\r
+[DestinationDirs]\r
+DefaultDestDir=12\r
+; No files to copy \r
+\r
+; manufacturer section\r
+[Manufacturer]\r
+%Unipi% = UNIPI,NTx86\r
+\r
+; models section\r
+[UNIPI] ; Win2k\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+[UNIPI.NTx86] ;For WinXP and later\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+\r
+; ddinstall section\r
+[IpfwMP.ndi]\r
+AddReg = IpfwMP.ndi.AddReg\r
+Characteristics = 0x29 ;NCF_NOT_USER_REMOVABLE | NCF_VIRTUAL | NCF_HIDDEN\r
+\r
+; ddinstall->addreg points here\r
+[IpfwMP.ndi.AddReg]\r
+HKR, Ndi, Service, 0, IpfwMP\r
+\r
+;ddinstall.services section\r
+[IpfwMP.ndi.Services]\r
+AddService = IpfwMP,0x2, IpfwMP.AddService\r
+\r
+[IpfwMP.AddService]\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\ipfw.sys\r
+AddReg = IpfwMP.AddService.AddReg\r
+\r
+[IpfwMP.AddService.AddReg]\r
+; None\r
+\r
+[Strings]\r
+Unipi = "Unipi"\r
+Desc = "Ipfw Miniport"
\ No newline at end of file
--- /dev/null
+/*++
+
+Copyright (c) 1992-2000 Microsoft Corporation
+
+Module Name:
+
+ passthru.c
+
+Abstract:
+
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.
+
+Author:
+
+Environment:
+
+
+Revision History:
+
+
+--*/
+
+
+#include "precomp.h"
+#pragma hdrstop
+
+#pragma NDIS_INIT_FUNCTION(DriverEntry)
+
+NDIS_HANDLE ProtHandle = NULL;
+NDIS_HANDLE DriverHandle = NULL;
+NDIS_MEDIUM MediumArray[4] =
+ {
+ NdisMedium802_3, // Ethernet
+ NdisMedium802_5, // Token-ring
+ NdisMediumFddi, // Fddi
+ NdisMediumWan // NDISWAN
+ };
+
+NDIS_SPIN_LOCK GlobalLock;
+
+PADAPT pAdaptList = NULL;
+LONG MiniportCount = 0;
+
+NDIS_HANDLE NdisWrapperHandle;
+
+//
+// To support ioctls from user-mode:
+//
+
+#define STR2(x) #x
+#define STR(x) STR2(x)
+#define DOSPREFIX "\\DosDevices\\"
+#define NTPREFIX "\\Device\\"
+#define WIDEN2(x) L ## x
+#define WIDEN(x) WIDEN2(x)
+#define LINKNAME_STRING WIDEN(DOSPREFIX) WIDEN(STR(MODULENAME))
+#define NTDEVICE_STRING WIDEN(NTPREFIX) WIDEN(STR(MODULENAME))
+#define PROTOCOLNAME_STRING WIDEN(STR(MODULENAME))
+
+NDIS_HANDLE NdisDeviceHandle = NULL;
+PDEVICE_OBJECT ControlDeviceObject = NULL;
+
+enum _DEVICE_STATE
+{
+ PS_DEVICE_STATE_READY = 0, // ready for create/delete
+ PS_DEVICE_STATE_CREATING, // create operation in progress
+ PS_DEVICE_STATE_DELETING // delete operation in progress
+} ControlDeviceState = PS_DEVICE_STATE_READY;
+
+
+
+NTSTATUS
+DriverEntry(
+ IN PDRIVER_OBJECT DriverObject,
+ IN PUNICODE_STRING RegistryPath
+ )
+/*++
+
+Routine Description:
+
+ First entry point to be called, when this driver is loaded.
+ Register with NDIS as an intermediate driver.
+
+Arguments:
+
+ DriverObject - pointer to the system's driver object structure
+ for this driver
+
+ RegistryPath - system's registry path for this driver
+
+Return Value:
+
+ STATUS_SUCCESS if all initialization is successful, STATUS_XXX
+ error code if not.
+
+--*/
+{
+ NDIS_STATUS Status;
+ NDIS_PROTOCOL_CHARACTERISTICS PChars;
+ NDIS_MINIPORT_CHARACTERISTICS MChars;
+ NDIS_STRING Name;
+
+ Status = NDIS_STATUS_SUCCESS;
+ NdisAllocateSpinLock(&GlobalLock);
+
+ NdisMInitializeWrapper(&NdisWrapperHandle, DriverObject, RegistryPath, NULL);
+
+ do
+ {
+ //
+ // Register the miniport with NDIS. Note that it is the miniport
+ // which was started as a driver and not the protocol. Also the miniport
+ // must be registered prior to the protocol since the protocol's BindAdapter
+ // handler can be initiated anytime and when it is, it must be ready to
+ // start driver instances.
+ //
+
+ NdisZeroMemory(&MChars, sizeof(NDIS_MINIPORT_CHARACTERISTICS));
+
+ MChars.MajorNdisVersion = PASSTHRU_MAJOR_NDIS_VERSION;
+ MChars.MinorNdisVersion = PASSTHRU_MINOR_NDIS_VERSION;
+
+ MChars.InitializeHandler = MPInitialize;
+ MChars.QueryInformationHandler = MPQueryInformation;
+ MChars.SetInformationHandler = MPSetInformation;
+ MChars.ResetHandler = NULL;
+ MChars.TransferDataHandler = MPTransferData;
+ MChars.HaltHandler = MPHalt;
+#ifdef NDIS51_MINIPORT
+ MChars.CancelSendPacketsHandler = MPCancelSendPackets;
+ MChars.PnPEventNotifyHandler = MPDevicePnPEvent;
+ MChars.AdapterShutdownHandler = MPAdapterShutdown;
+#endif // NDIS51_MINIPORT
+
+ //
+ // We will disable the check for hang timeout so we do not
+ // need a check for hang handler!
+ //
+ MChars.CheckForHangHandler = NULL;
+ MChars.ReturnPacketHandler = MPReturnPacket;
+
+ //
+ // Either the Send or the SendPackets handler should be specified.
+ // If SendPackets handler is specified, SendHandler is ignored
+ //
+ MChars.SendHandler = MPSend; // IPFW: use MPSend, not SendPackets
+ MChars.SendPacketsHandler = NULL;
+
+ Status = NdisIMRegisterLayeredMiniport(NdisWrapperHandle,
+ &MChars,
+ sizeof(MChars),
+ &DriverHandle);
+ if (Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+
+#ifndef WIN9X
+ NdisMRegisterUnloadHandler(NdisWrapperHandle, PtUnload);
+#endif
+
+ //
+ // Now register the protocol.
+ //
+ NdisZeroMemory(&PChars, sizeof(NDIS_PROTOCOL_CHARACTERISTICS));
+ PChars.MajorNdisVersion = PASSTHRU_PROT_MAJOR_NDIS_VERSION;
+ PChars.MinorNdisVersion = PASSTHRU_PROT_MINOR_NDIS_VERSION;
+
+ //
+ // Make sure the protocol-name matches the service-name
+ // (from the INF) under which this protocol is installed.
+ // This is needed to ensure that NDIS can correctly determine
+ // the binding and call us to bind to miniports below.
+ //
+ NdisInitUnicodeString(&Name, PROTOCOLNAME_STRING); // Protocol name
+ PChars.Name = Name;
+ PChars.OpenAdapterCompleteHandler = PtOpenAdapterComplete;
+ PChars.CloseAdapterCompleteHandler = PtCloseAdapterComplete;
+ PChars.SendCompleteHandler = PtSendComplete;
+ PChars.TransferDataCompleteHandler = PtTransferDataComplete;
+
+ PChars.ResetCompleteHandler = PtResetComplete;
+ PChars.RequestCompleteHandler = PtRequestComplete;
+ PChars.ReceiveHandler = PtReceive;
+ PChars.ReceiveCompleteHandler = PtReceiveComplete;
+ PChars.StatusHandler = PtStatus;
+ PChars.StatusCompleteHandler = PtStatusComplete;
+ PChars.BindAdapterHandler = PtBindAdapter;
+ PChars.UnbindAdapterHandler = PtUnbindAdapter;
+ PChars.UnloadHandler = PtUnloadProtocol;
+
+ PChars.ReceivePacketHandler = PtReceivePacket;
+ PChars.PnPEventHandler= PtPNPHandler;
+
+ NdisRegisterProtocol(&Status,
+ &ProtHandle,
+ &PChars,
+ sizeof(NDIS_PROTOCOL_CHARACTERISTICS));
+
+ if (Status != NDIS_STATUS_SUCCESS)
+ {
+ NdisIMDeregisterLayeredMiniport(DriverHandle);
+ break;
+ }
+
+ NdisIMAssociateMiniport(DriverHandle, ProtHandle);
+ }
+ while (FALSE);
+
+ if (Status != NDIS_STATUS_SUCCESS)
+ {
+ NdisTerminateWrapper(NdisWrapperHandle, NULL);
+ }
+
+ ipfw_module_init(); // IPFW - start the system
+
+ return(Status);
+}
+
+
+NDIS_STATUS
+PtRegisterDevice(
+ VOID
+ )
+/*++
+
+Routine Description:
+
+ Register an ioctl interface - a device object to be used for this
+ purpose is created by NDIS when we call NdisMRegisterDevice.
+
+ This routine is called whenever a new miniport instance is
+ initialized. However, we only create one global device object,
+ when the first miniport instance is initialized. This routine
+ handles potential race conditions with PtDeregisterDevice via
+ the ControlDeviceState and MiniportCount variables.
+
+ NOTE: do not call this from DriverEntry; it will prevent the driver
+ from being unloaded (e.g. on uninstall).
+
+Arguments:
+
+ None
+
+Return Value:
+
+ NDIS_STATUS_SUCCESS if we successfully register a device object.
+
+--*/
+{
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;
+ UNICODE_STRING DeviceName;
+ UNICODE_STRING DeviceLinkUnicodeString;
+ PDRIVER_DISPATCH DispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1];
+
+ DBGPRINT(("==>PtRegisterDevice\n"));
+
+ NdisAcquireSpinLock(&GlobalLock);
+
+ ++MiniportCount;
+
+ if (1 == MiniportCount)
+ {
+ ASSERT(ControlDeviceState != PS_DEVICE_STATE_CREATING);
+
+ //
+ // Another thread could be running PtDeregisterDevice on
+ // behalf of another miniport instance. If so, wait for
+ // it to exit.
+ //
+ while (ControlDeviceState != PS_DEVICE_STATE_READY)
+ {
+ NdisReleaseSpinLock(&GlobalLock);
+ NdisMSleep(1);
+ NdisAcquireSpinLock(&GlobalLock);
+ }
+
+ ControlDeviceState = PS_DEVICE_STATE_CREATING;
+
+ NdisReleaseSpinLock(&GlobalLock);
+
+
+ NdisZeroMemory(DispatchTable, (IRP_MJ_MAXIMUM_FUNCTION+1) * sizeof(PDRIVER_DISPATCH));
+
+ DispatchTable[IRP_MJ_CREATE] = PtDispatch;
+ DispatchTable[IRP_MJ_CLEANUP] = PtDispatch;
+ DispatchTable[IRP_MJ_CLOSE] = PtDispatch;
+ // IPFW we use DevIoControl ?
+ DispatchTable[IRP_MJ_DEVICE_CONTROL] = DevIoControl;
+
+
+ NdisInitUnicodeString(&DeviceName, NTDEVICE_STRING);
+ NdisInitUnicodeString(&DeviceLinkUnicodeString, LINKNAME_STRING);
+
+ //
+ // Create a device object and register our dispatch handlers
+ //
+
+ Status = NdisMRegisterDevice(
+ NdisWrapperHandle,
+ &DeviceName,
+ &DeviceLinkUnicodeString,
+ &DispatchTable[0],
+ &ControlDeviceObject,
+ &NdisDeviceHandle
+ );
+
+ NdisAcquireSpinLock(&GlobalLock);
+
+ ControlDeviceState = PS_DEVICE_STATE_READY;
+ }
+
+ NdisReleaseSpinLock(&GlobalLock);
+
+ DBGPRINT(("<==PtRegisterDevice: %x\n", Status));
+
+ return (Status);
+}
+
+
+NTSTATUS
+PtDispatch(
+ IN PDEVICE_OBJECT DeviceObject,
+ IN PIRP Irp
+ )
+/*++
+Routine Description:
+
+ Process IRPs sent to this device.
+
+Arguments:
+
+ DeviceObject - pointer to a device object
+ Irp - pointer to an I/O Request Packet
+
+Return Value:
+
+ NTSTATUS - STATUS_SUCCESS always - change this when adding
+ real code to handle ioctls.
+
+--*/
+{
+ PIO_STACK_LOCATION irpStack;
+ NTSTATUS status = STATUS_SUCCESS;
+
+ UNREFERENCED_PARAMETER(DeviceObject);
+
+ DBGPRINT(("==>Pt Dispatch\n"));
+ irpStack = IoGetCurrentIrpStackLocation(Irp);
+
+
+ switch (irpStack->MajorFunction)
+ {
+ case IRP_MJ_CREATE:
+ break;
+
+ case IRP_MJ_CLEANUP:
+ break;
+
+ case IRP_MJ_CLOSE:
+ break;
+
+ case IRP_MJ_DEVICE_CONTROL:
+ //
+ // Add code here to handle ioctl commands sent to passthru.
+ //
+ break;
+ default:
+ break;
+ }
+
+ Irp->IoStatus.Status = status;
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);
+
+ DBGPRINT(("<== Pt Dispatch\n"));
+
+ return status;
+
+}
+
+
+NDIS_STATUS
+PtDeregisterDevice(
+ VOID
+ )
+/*++
+
+Routine Description:
+
+ Deregister the ioctl interface. This is called whenever a miniport
+ instance is halted. When the last miniport instance is halted, we
+ request NDIS to delete the device object
+
+Arguments:
+
+ NdisDeviceHandle - Handle returned by NdisMRegisterDevice
+
+Return Value:
+
+ NDIS_STATUS_SUCCESS if everything worked ok
+
+--*/
+{
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;
+
+ DBGPRINT(("==>PassthruDeregisterDevice\n"));
+
+ NdisAcquireSpinLock(&GlobalLock);
+
+ ASSERT(MiniportCount > 0);
+
+ --MiniportCount;
+
+ if (0 == MiniportCount)
+ {
+ //
+ // All miniport instances have been halted. Deregister
+ // the control device.
+ //
+
+ ASSERT(ControlDeviceState == PS_DEVICE_STATE_READY);
+
+ //
+ // Block PtRegisterDevice() while we release the control
+ // device lock and deregister the device.
+ //
+ ControlDeviceState = PS_DEVICE_STATE_DELETING;
+
+ NdisReleaseSpinLock(&GlobalLock);
+
+ if (NdisDeviceHandle != NULL)
+ {
+ Status = NdisMDeregisterDevice(NdisDeviceHandle);
+ NdisDeviceHandle = NULL;
+ }
+
+ NdisAcquireSpinLock(&GlobalLock);
+ ControlDeviceState = PS_DEVICE_STATE_READY;
+ }
+
+ NdisReleaseSpinLock(&GlobalLock);
+
+ DBGPRINT(("<== PassthruDeregisterDevice: %x\n", Status));
+ return Status;
+
+}
+
+VOID
+PtUnload(
+ IN PDRIVER_OBJECT DriverObject
+ )
+//
+// PassThru driver unload function
+//
+{
+ UNREFERENCED_PARAMETER(DriverObject);
+
+ DBGPRINT(("PtUnload: entered\n"));
+
+ PtUnloadProtocol();
+
+ NdisIMDeregisterLayeredMiniport(DriverHandle);
+
+ NdisFreeSpinLock(&GlobalLock);
+
+ ipfw_module_exit(); // IPFW unloading dummynet
+
+ DBGPRINT(("PtUnload: done!\n"));
+}
--- /dev/null
+/*++\r
+\r
+Copyright (c) 1992-2000 Microsoft Corporation\r
+\r
+Module Name:\r
+\r
+ passthru.h\r
+\r
+Abstract:\r
+\r
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.\r
+\r
+Author:\r
+\r
+Environment:\r
+\r
+\r
+Revision History:\r
+\r
+ \r
+--*/\r
+\r
+#ifdef NDIS51_MINIPORT\r
+#define PASSTHRU_MAJOR_NDIS_VERSION 5\r
+#define PASSTHRU_MINOR_NDIS_VERSION 1\r
+#else\r
+#define PASSTHRU_MAJOR_NDIS_VERSION 4\r
+#define PASSTHRU_MINOR_NDIS_VERSION 0\r
+#endif\r
+\r
+#ifdef NDIS51\r
+#define PASSTHRU_PROT_MAJOR_NDIS_VERSION 5\r
+#define PASSTHRU_PROT_MINOR_NDIS_VERSION 0\r
+#else\r
+#define PASSTHRU_PROT_MAJOR_NDIS_VERSION 4\r
+#define PASSTHRU_PROT_MINOR_NDIS_VERSION 0\r
+#endif\r
+\r
+#define MAX_BUNDLEID_LENGTH 50\r
+\r
+#define TAG 'ImPa'\r
+#define WAIT_INFINITE 0\r
+\r
+\r
+\r
+//advance declaration\r
+typedef struct _ADAPT ADAPT, *PADAPT;\r
+\r
+DRIVER_INITIALIZE DriverEntry;\r
+extern\r
+NTSTATUS\r
+DriverEntry(\r
+ IN PDRIVER_OBJECT DriverObject,\r
+ IN PUNICODE_STRING RegistryPath\r
+ );\r
+\r
+DRIVER_DISPATCH PtDispatch;\r
+NTSTATUS\r
+PtDispatch(\r
+ IN PDEVICE_OBJECT DeviceObject,\r
+ IN PIRP Irp\r
+ );\r
+\r
+DRIVER_DISPATCH DevIoControl;\r
+NTSTATUS\r
+DevIoControl(\r
+ IN PDEVICE_OBJECT pDeviceObject,\r
+ IN PIRP pIrp\r
+ );\r
+\r
+NDIS_STATUS\r
+PtRegisterDevice(\r
+ VOID\r
+ );\r
+\r
+NDIS_STATUS\r
+PtDeregisterDevice(\r
+ VOID\r
+ );\r
+\r
+DRIVER_UNLOAD PtUnload;\r
+VOID\r
+PtUnloadProtocol(\r
+ VOID\r
+ );\r
+\r
+//\r
+// Protocol proto-types\r
+//\r
+extern\r
+VOID\r
+PtOpenAdapterComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status,\r
+ IN NDIS_STATUS OpenErrorStatus\r
+ );\r
+\r
+extern\r
+VOID\r
+PtCloseAdapterComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtResetComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtRequestComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_REQUEST NdisRequest,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtStatus(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS GeneralStatus,\r
+ IN PVOID StatusBuffer,\r
+ IN UINT StatusBufferSize\r
+ );\r
+\r
+extern\r
+VOID\r
+PtStatusComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext\r
+ );\r
+\r
+extern\r
+VOID\r
+PtSendComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtTransferDataComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN NDIS_STATUS Status,\r
+ IN UINT BytesTransferred\r
+ );\r
+\r
+extern\r
+NDIS_STATUS\r
+PtReceive(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_HANDLE MacReceiveContext,\r
+ IN PVOID HeaderBuffer,\r
+ IN UINT HeaderBufferSize,\r
+ IN PVOID LookAheadBuffer,\r
+ IN UINT LookaheadBufferSize,\r
+ IN UINT PacketSize\r
+ );\r
+\r
+extern\r
+VOID\r
+PtReceiveComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext\r
+ );\r
+\r
+extern\r
+INT\r
+PtReceivePacket(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet\r
+ );\r
+\r
+extern\r
+VOID\r
+PtBindAdapter(\r
+ OUT PNDIS_STATUS Status,\r
+ IN NDIS_HANDLE BindContext,\r
+ IN PNDIS_STRING DeviceName,\r
+ IN PVOID SystemSpecific1,\r
+ IN PVOID SystemSpecific2\r
+ );\r
+\r
+extern\r
+VOID\r
+PtUnbindAdapter(\r
+ OUT PNDIS_STATUS Status,\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_HANDLE UnbindContext\r
+ );\r
+ \r
+VOID\r
+PtUnload(\r
+ IN PDRIVER_OBJECT DriverObject\r
+ );\r
+\r
+\r
+\r
+extern \r
+NDIS_STATUS\r
+PtPNPHandler(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ );\r
+\r
+\r
+\r
+\r
+NDIS_STATUS\r
+PtPnPNetEventReconfigure(\r
+ IN PADAPT pAdapt,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ ); \r
+\r
+NDIS_STATUS \r
+PtPnPNetEventSetPower (\r
+ IN PADAPT pAdapt,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ );\r
+ \r
+\r
+//\r
+// Miniport proto-types\r
+//\r
+NDIS_STATUS\r
+MPInitialize(\r
+ OUT PNDIS_STATUS OpenErrorStatus,\r
+ OUT PUINT SelectedMediumIndex,\r
+ IN PNDIS_MEDIUM MediumArray,\r
+ IN UINT MediumArraySize,\r
+ IN NDIS_HANDLE MiniportAdapterHandle,\r
+ IN NDIS_HANDLE WrapperConfigurationContext\r
+ );\r
+\r
+VOID\r
+MPSendPackets(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PPNDIS_PACKET PacketArray,\r
+ IN UINT NumberOfPackets\r
+ );\r
+\r
+NDIS_STATUS\r
+MPSend(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN UINT Flags\r
+ );\r
+\r
+NDIS_STATUS\r
+MPQueryInformation(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_OID Oid,\r
+ IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesWritten,\r
+ OUT PULONG BytesNeeded\r
+ );\r
+\r
+NDIS_STATUS\r
+MPSetInformation(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_OID Oid,\r
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesRead,\r
+ OUT PULONG BytesNeeded\r
+ );\r
+\r
+VOID\r
+MPReturnPacket(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PNDIS_PACKET Packet\r
+ );\r
+\r
+NDIS_STATUS\r
+MPTransferData(\r
+ OUT PNDIS_PACKET Packet,\r
+ OUT PUINT BytesTransferred,\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_HANDLE MiniportReceiveContext,\r
+ IN UINT ByteOffset,\r
+ IN UINT BytesToTransfer\r
+ );\r
+\r
+VOID\r
+MPHalt(\r
+ IN NDIS_HANDLE MiniportAdapterContext\r
+ );\r
+\r
+\r
+VOID\r
+MPQueryPNPCapabilities( \r
+ OUT PADAPT MiniportProtocolContext, \r
+ OUT PNDIS_STATUS Status\r
+ );\r
+\r
+\r
+#ifdef NDIS51_MINIPORT\r
+\r
+VOID\r
+MPCancelSendPackets(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PVOID CancelId\r
+ );\r
+\r
+VOID\r
+MPAdapterShutdown(\r
+ IN NDIS_HANDLE MiniportAdapterContext\r
+ );\r
+\r
+VOID\r
+MPDevicePnPEvent(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_DEVICE_PNP_EVENT DevicePnPEvent,\r
+ IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength\r
+ );\r
+\r
+#endif // NDIS51_MINIPORT\r
+\r
+VOID\r
+MPFreeAllPacketPools(\r
+ IN PADAPT pAdapt\r
+ );\r
+\r
+\r
+VOID\r
+MPProcessSetPowerOid(\r
+ IN OUT PNDIS_STATUS pNdisStatus,\r
+ IN PADAPT pAdapt,\r
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesRead,\r
+ OUT PULONG BytesNeeded\r
+ );\r
+\r
+VOID\r
+PtReferenceAdapt(\r
+ IN PADAPT pAdapt\r
+ );\r
+\r
+BOOLEAN\r
+PtDereferenceAdapt(\r
+ IN PADAPT pAdapt\r
+ );\r
+\r
+//\r
+// There should be no DbgPrint's in the Free version of the driver\r
+//\r
+#if DBG\r
+\r
+#define DBGPRINT(Fmt) \\r
+ { \\r
+ DbgPrint("Passthru: "); \
+ DbgPrint Fmt; \\r
+ }\r
+\r
+#else // if DBG\r
+\r
+#define DBGPRINT(Fmt) \r
+\r
+#endif // if DBG \r
+\r
+#define NUM_PKTS_IN_POOL 256\r
+\r
+\r
+//\r
+// Protocol reserved part of a sent packet that is allocated by us.\r
+//\r
+typedef struct _SEND_RSVD\r
+{\r
+ PNDIS_PACKET OriginalPkt;\r
+ struct mbuf* pMbuf; // IPFW extension, reference to the mbuf\r
+} SEND_RSVD, *PSEND_RSVD;\r
+\r
+//\r
+// Miniport reserved part of a received packet that is allocated by\r
+// us. Note that this should fit into the MiniportReserved space\r
+// in an NDIS_PACKET.\r
+//\r
+typedef struct _RECV_RSVD\r
+{\r
+ PNDIS_PACKET OriginalPkt;\r
+ struct mbuf* pMbuf; // IPFW extension, reference to the mbuf\r
+} RECV_RSVD, *PRECV_RSVD;\r
+\r
+C_ASSERT(sizeof(RECV_RSVD) <= sizeof(((PNDIS_PACKET)0)->MiniportReserved));\r
+\r
+//\r
+// Event Codes related to the PassthruEvent Structure\r
+//\r
+\r
+typedef enum \r
+{\r
+ Passthru_Invalid,\r
+ Passthru_SetPower,\r
+ Passthru_Unbind\r
+\r
+} PASSSTHRU_EVENT_CODE, *PPASTHRU_EVENT_CODE; \r
+\r
+//\r
+// Passthru Event with a code to state why they have been state\r
+//\r
+\r
+typedef struct _PASSTHRU_EVENT\r
+{\r
+ NDIS_EVENT Event;\r
+ PASSSTHRU_EVENT_CODE Code;\r
+\r
+} PASSTHRU_EVENT, *PPASSTHRU_EVENT;\r
+\r
+\r
+//\r
+// Structure used by both the miniport as well as the protocol part of the intermediate driver\r
+// to represent an adapter and its corres. lower bindings\r
+//\r
+typedef struct _ADAPT\r
+{\r
+ struct _ADAPT * Next;\r
+ \r
+ NDIS_HANDLE BindingHandle; // To the lower miniport\r
+ NDIS_HANDLE MiniportHandle; // NDIS Handle to for miniport up-calls\r
+ NDIS_HANDLE SendPacketPoolHandle;\r
+ NDIS_HANDLE RecvPacketPoolHandle;\r
+ NDIS_STATUS Status; // Open Status\r
+ NDIS_EVENT Event; // Used by bind/halt for Open/Close Adapter synch.\r
+ NDIS_MEDIUM Medium;\r
+ NDIS_REQUEST Request; // This is used to wrap a request coming down\r
+ // to us. This exploits the fact that requests\r
+ // are serialized down to us.\r
+ PULONG BytesNeeded;\r
+ PULONG BytesReadOrWritten;\r
+ BOOLEAN ReceivedIndicationFlags[32];\r
+ \r
+ BOOLEAN OutstandingRequests; // TRUE iff a request is pending\r
+ // at the miniport below\r
+ BOOLEAN QueuedRequest; // TRUE iff a request is queued at\r
+ // this IM miniport\r
+\r
+ BOOLEAN StandingBy; // True - When the miniport or protocol is transitioning from a D0 to Standby (>D0) State\r
+ BOOLEAN UnbindingInProcess;\r
+ NDIS_SPIN_LOCK Lock;\r
+ // False - At all other times, - Flag is cleared after a transition to D0\r
+\r
+ NDIS_DEVICE_POWER_STATE MPDeviceState; // Miniport's Device State \r
+ NDIS_DEVICE_POWER_STATE PTDeviceState; // Protocol's Device State \r
+ NDIS_STRING DeviceName; // For initializing the miniport edge\r
+ NDIS_EVENT MiniportInitEvent; // For blocking UnbindAdapter while\r
+ // an IM Init is in progress.\r
+ BOOLEAN MiniportInitPending; // TRUE iff IMInit in progress\r
+ NDIS_STATUS LastIndicatedStatus; // The last indicated media status\r
+ NDIS_STATUS LatestUnIndicateStatus; // The latest suppressed media status\r
+ ULONG OutstandingSends;\r
+ LONG RefCount;\r
+ BOOLEAN MiniportIsHalted;\r
+} ADAPT, *PADAPT;\r
+\r
+extern NDIS_HANDLE ProtHandle, DriverHandle;\r
+extern NDIS_MEDIUM MediumArray[4];\r
+extern PADAPT pAdaptList;\r
+extern NDIS_SPIN_LOCK GlobalLock;\r
+\r
+\r
+#define ADAPT_MINIPORT_HANDLE(_pAdapt) ((_pAdapt)->MiniportHandle)\r
+#define ADAPT_DECR_PENDING_SENDS(_pAdapt) \\r
+ { \\r
+ NdisAcquireSpinLock(&(_pAdapt)->Lock); \\r
+ (_pAdapt)->OutstandingSends--; \\r
+ NdisReleaseSpinLock(&(_pAdapt)->Lock); \\r
+ }\r
+\r
+//\r
+// Custom Macros to be used by the passthru driver \r
+//\r
+/*\r
+BOOLEAN\r
+IsIMDeviceStateOn(\r
+ PADAPT \r
+ )\r
+\r
+*/\r
+#define IsIMDeviceStateOn(_pP) ((_pP)->MPDeviceState == NdisDeviceStateD0 && (_pP)->PTDeviceState == NdisDeviceStateD0 ) \r
+\r
+#include "winmissing.h"\r
+\r
+int ipfw_module_init(void);\r
+void ipfw_module_exit(void);\r
+int ipfw2_qhandler_w32(PNDIS_PACKET pNdisPacket, int direction,\r
+ NDIS_HANDLE Context);\r
+int ipfw2_qhandler_w32_oldstyle(int direction, NDIS_HANDLE ProtocolBindingContext,\r
+ unsigned char* HeaderBuffer, unsigned int HeaderBufferSize,\r
+ unsigned char* LookAheadBuffer, unsigned int LookAheadBufferSize,\r
+ unsigned int PacketSize);\r
+void CleanupReinjected(PNDIS_PACKET Packet, struct mbuf* m, PADAPT pAdapt);\r
+void hexdump(PUCHAR,int, const char *);\r
+void my_init();\r
+void my_exit();
\ No newline at end of file
--- /dev/null
+#pragma warning(disable:4214) // bit field types other than int\r
+\r
+#pragma warning(disable:4201) // nameless struct/union\r
+#pragma warning(disable:4115) // named type definition in parentheses\r
+#pragma warning(disable:4127) // conditional expression is constant\r
+#pragma warning(disable:4054) // cast of function pointer to PVOID\r
+#pragma warning(disable:4244) // conversion from 'int' to 'BOOLEAN', possible loss of data\r
+\r
+#include <ndis.h>\r
+#include "passthru.h"\r
+\r
--- /dev/null
+/*++
+
+Copyright(c) 1992-2000 Microsoft Corporation
+
+Module Name:
+
+ protocol.c
+
+Abstract:
+
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.
+
+Author:
+
+Environment:
+
+
+Revision History:
+
+
+--*/
+
+
+#include "precomp.h"
+#pragma hdrstop
+
+#define MAX_PACKET_POOL_SIZE 0x0000FFFF
+#define MIN_PACKET_POOL_SIZE 0x000000FF
+
+//
+// NDIS version as 0xMMMMmmmm, where M=Major/m=minor (0x00050001 = 5.1);
+// initially unknown (0)
+//
+ULONG NdisDotSysVersion = 0x0;
+
+
+#define NDIS_SYS_VERSION_51 0x00050001
+
+
+VOID
+PtBindAdapter(
+ OUT PNDIS_STATUS Status,
+ IN NDIS_HANDLE BindContext,
+ IN PNDIS_STRING DeviceName,
+ IN PVOID SystemSpecific1,
+ IN PVOID SystemSpecific2
+ )
+/*++
+
+Routine Description:
+
+ Called by NDIS to bind to a miniport below.
+
+Arguments:
+
+ Status - Return status of bind here.
+ BindContext - Can be passed to NdisCompleteBindAdapter if this call is pended.
+ DeviceName - Device name to bind to. This is passed to NdisOpenAdapter.
+ SystemSpecific1 - Can be passed to NdisOpenProtocolConfiguration to read per-binding information
+ SystemSpecific2 - Unused
+
+Return Value:
+
+ NDIS_STATUS_PENDING if this call is pended. In this case call NdisCompleteBindAdapter
+ to complete.
+ Anything else Completes this call synchronously
+
+--*/
+{
+ NDIS_HANDLE ConfigHandle = NULL;
+ PNDIS_CONFIGURATION_PARAMETER Param;
+ NDIS_STRING DeviceStr = NDIS_STRING_CONST("UpperBindings");
+ NDIS_STRING NdisVersionStr = NDIS_STRING_CONST("NdisVersion");
+ PADAPT pAdapt = NULL;
+ NDIS_STATUS Sts;
+ UINT MediumIndex;
+ ULONG TotalSize;
+ BOOLEAN NoCleanUpNeeded = FALSE;
+
+
+ UNREFERENCED_PARAMETER(BindContext);
+ UNREFERENCED_PARAMETER(SystemSpecific2);
+
+ DBGPRINT(("==> Protocol BindAdapter\n"));
+
+ do
+ {
+ //
+ // Access the configuration section for our binding-specific
+ // parameters.
+ //
+ NdisOpenProtocolConfiguration(Status,
+ &ConfigHandle,
+ SystemSpecific1);
+
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+ if (NdisDotSysVersion == 0)
+ {
+ NdisReadConfiguration(Status,
+ &Param,
+ ConfigHandle,
+ &NdisVersionStr, // "NdisVersion"
+ NdisParameterInteger);
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+
+ NdisDotSysVersion = Param->ParameterData.IntegerData;
+ }
+
+
+ //
+ // Read the "UpperBindings" reserved key that contains a list
+ // of device names representing our miniport instances corresponding
+ // to this lower binding. Since this is a 1:1 IM driver, this key
+ // contains exactly one name.
+ //
+ // If we want to implement a N:1 mux driver (N adapter instances
+ // over a single lower binding), then UpperBindings will be a
+ // MULTI_SZ containing a list of device names - we would loop through
+ // this list, calling NdisIMInitializeDeviceInstanceEx once for
+ // each name in it.
+ //
+ NdisReadConfiguration(Status,
+ &Param,
+ ConfigHandle,
+ &DeviceStr,
+ NdisParameterString);
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+
+ //
+ // Allocate memory for the Adapter structure. This represents both the
+ // protocol context as well as the adapter structure when the miniport
+ // is initialized.
+ //
+ // In addition to the base structure, allocate space for the device
+ // instance string.
+ //
+ TotalSize = sizeof(ADAPT) + Param->ParameterData.StringData.MaximumLength;
+
+ NdisAllocateMemoryWithTag(&pAdapt, TotalSize, TAG);
+
+ if (pAdapt == NULL)
+ {
+ *Status = NDIS_STATUS_RESOURCES;
+ break;
+ }
+
+ //
+ // Initialize the adapter structure. We copy in the IM device
+ // name as well, because we may need to use it in a call to
+ // NdisIMCancelInitializeDeviceInstance. The string returned
+ // by NdisReadConfiguration is active (i.e. available) only
+ // for the duration of this call to our BindAdapter handler.
+ //
+ NdisZeroMemory(pAdapt, TotalSize);
+ pAdapt->DeviceName.MaximumLength = Param->ParameterData.StringData.MaximumLength;
+ pAdapt->DeviceName.Length = Param->ParameterData.StringData.Length;
+ pAdapt->DeviceName.Buffer = (PWCHAR)((ULONG_PTR)pAdapt + sizeof(ADAPT));
+ NdisMoveMemory(pAdapt->DeviceName.Buffer,
+ Param->ParameterData.StringData.Buffer,
+ Param->ParameterData.StringData.MaximumLength);
+
+
+
+ NdisInitializeEvent(&pAdapt->Event);
+ NdisAllocateSpinLock(&pAdapt->Lock);
+
+ //
+ // Allocate a packet pool for sends. We need this to pass sends down.
+ // We cannot use the same packet descriptor that came down to our send
+ // handler (see also NDIS 5.1 packet stacking).
+ //
+ NdisAllocatePacketPoolEx(Status,
+ &pAdapt->SendPacketPoolHandle,
+ MIN_PACKET_POOL_SIZE,
+ MAX_PACKET_POOL_SIZE - MIN_PACKET_POOL_SIZE,
+ sizeof(SEND_RSVD));
+
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+
+ //
+ // Allocate a packet pool for receives. We need this to indicate receives.
+ // Same consideration as sends (see also NDIS 5.1 packet stacking).
+ //
+ NdisAllocatePacketPoolEx(Status,
+ &pAdapt->RecvPacketPoolHandle,
+ MIN_PACKET_POOL_SIZE,
+ MAX_PACKET_POOL_SIZE - MIN_PACKET_POOL_SIZE,
+ PROTOCOL_RESERVED_SIZE_IN_PACKET);
+
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+
+ //
+ // Now open the adapter below and complete the initialization
+ //
+ NdisOpenAdapter(Status,
+ &Sts,
+ &pAdapt->BindingHandle,
+ &MediumIndex,
+ MediumArray,
+ sizeof(MediumArray)/sizeof(NDIS_MEDIUM),
+ ProtHandle,
+ pAdapt,
+ DeviceName,
+ 0,
+ NULL);
+
+ if (*Status == NDIS_STATUS_PENDING)
+ {
+ NdisWaitEvent(&pAdapt->Event, 0);
+ *Status = pAdapt->Status;
+ }
+
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ break;
+ }
+ PtReferenceAdapt(pAdapt);
+
+#pragma prefast(suppress: __WARNING_POTENTIAL_BUFFER_OVERFLOW, "Ndis guarantees MediumIndex to be within bounds");
+ pAdapt->Medium = MediumArray[MediumIndex];
+
+ //
+ // Now ask NDIS to initialize our miniport (upper) edge.
+ // Set the flag below to synchronize with a possible call
+ // to our protocol Unbind handler that may come in before
+ // our miniport initialization happens.
+ //
+ pAdapt->MiniportInitPending = TRUE;
+ NdisInitializeEvent(&pAdapt->MiniportInitEvent);
+
+ PtReferenceAdapt(pAdapt);
+
+ *Status = NdisIMInitializeDeviceInstanceEx(DriverHandle,
+ &pAdapt->DeviceName,
+ pAdapt);
+
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ if (pAdapt->MiniportIsHalted == TRUE)
+ {
+ NoCleanUpNeeded = TRUE;
+ }
+
+ DBGPRINT(("BindAdapter: Adapt %p, IMInitializeDeviceInstance error %x\n",
+ pAdapt, *Status));
+
+ if (PtDereferenceAdapt(pAdapt))
+ {
+ pAdapt = NULL;
+ }
+
+ break;
+ }
+
+ PtDereferenceAdapt(pAdapt);
+
+ } while(FALSE);
+
+ //
+ // Close the configuration handle now - see comments above with
+ // the call to NdisIMInitializeDeviceInstanceEx.
+ //
+ if (ConfigHandle != NULL)
+ {
+ NdisCloseConfiguration(ConfigHandle);
+ }
+
+ if ((*Status != NDIS_STATUS_SUCCESS) && (NoCleanUpNeeded == FALSE))
+ {
+ if (pAdapt != NULL)
+ {
+ if (pAdapt->BindingHandle != NULL)
+ {
+ NDIS_STATUS LocalStatus;
+
+ //
+ // Close the binding we opened above.
+ //
+
+ NdisResetEvent(&pAdapt->Event);
+
+ NdisCloseAdapter(&LocalStatus, pAdapt->BindingHandle);
+ pAdapt->BindingHandle = NULL;
+
+ if (LocalStatus == NDIS_STATUS_PENDING)
+ {
+ NdisWaitEvent(&pAdapt->Event, 0);
+ LocalStatus = pAdapt->Status;
+
+
+ }
+ if (PtDereferenceAdapt(pAdapt))
+ {
+ pAdapt = NULL;
+ }
+ }
+ }
+ }
+
+
+ DBGPRINT(("<== Protocol BindAdapter: pAdapt %p, Status %x\n", pAdapt, *Status));
+}
+
+
+VOID
+PtOpenAdapterComplete(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN NDIS_STATUS Status,
+ IN NDIS_STATUS OpenErrorStatus
+ )
+/*++
+
+Routine Description:
+
+ Completion routine for NdisOpenAdapter issued from within the PtBindAdapter. Simply
+ unblock the caller.
+
+Arguments:
+
+ ProtocolBindingContext Pointer to the adapter
+ Status Status of the NdisOpenAdapter call
+ OpenErrorStatus Secondary status(ignored by us).
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+
+ UNREFERENCED_PARAMETER(OpenErrorStatus);
+
+ DBGPRINT(("==> PtOpenAdapterComplete: Adapt %p, Status %x\n", pAdapt, Status));
+ pAdapt->Status = Status;
+ NdisSetEvent(&pAdapt->Event);
+}
+
+
+VOID
+PtUnbindAdapter(
+ OUT PNDIS_STATUS Status,
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN NDIS_HANDLE UnbindContext
+ )
+/*++
+
+Routine Description:
+
+ Called by NDIS when we are required to unbind to the adapter below.
+ This functions shares functionality with the miniport's HaltHandler.
+ The code should ensure that NdisCloseAdapter and NdisFreeMemory is called
+ only once between the two functions
+
+Arguments:
+
+ Status Placeholder for return status
+ ProtocolBindingContext Pointer to the adapter structure
+ UnbindContext Context for NdisUnbindComplete() if this pends
+
+Return Value:
+
+ Status for NdisIMDeinitializeDeviceContext
+
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+ NDIS_STATUS LocalStatus;
+
+ UNREFERENCED_PARAMETER(UnbindContext);
+
+ DBGPRINT(("==> PtUnbindAdapter: Adapt %p\n", pAdapt));
+
+ //
+ // Set the flag that the miniport below is unbinding, so the request handlers will
+ // fail any request comming later
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ pAdapt->UnbindingInProcess = TRUE;
+ if (pAdapt->QueuedRequest == TRUE)
+ {
+ pAdapt->QueuedRequest = FALSE;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ PtRequestComplete(pAdapt,
+ &pAdapt->Request,
+ NDIS_STATUS_FAILURE );
+
+ }
+ else
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ }
+#ifndef WIN9X
+ //
+ // Check if we had called NdisIMInitializeDeviceInstanceEx and
+ // we are awaiting a call to MiniportInitialize.
+ //
+ if (pAdapt->MiniportInitPending == TRUE)
+ {
+ //
+ // Try to cancel the pending IMInit process.
+ //
+ LocalStatus = NdisIMCancelInitializeDeviceInstance(
+ DriverHandle,
+ &pAdapt->DeviceName);
+
+ if (LocalStatus == NDIS_STATUS_SUCCESS)
+ {
+ //
+ // Successfully cancelled IM Initialization; our
+ // Miniport Initialize routine will not be called
+ // for this device.
+ //
+ pAdapt->MiniportInitPending = FALSE;
+ ASSERT(pAdapt->MiniportHandle == NULL);
+ }
+ else
+ {
+ //
+ // Our Miniport Initialize routine will be called
+ // (may be running on another thread at this time).
+ // Wait for it to finish.
+ //
+ NdisWaitEvent(&pAdapt->MiniportInitEvent, 0);
+ ASSERT(pAdapt->MiniportInitPending == FALSE);
+ }
+
+ }
+#endif // !WIN9X
+
+ //
+ // Call NDIS to remove our device-instance. We do most of the work
+ // inside the HaltHandler.
+ //
+ // The Handle will be NULL if our miniport Halt Handler has been called or
+ // if the IM device was never initialized
+ //
+
+ if (pAdapt->MiniportHandle != NULL)
+ {
+ *Status = NdisIMDeInitializeDeviceInstance(pAdapt->MiniportHandle);
+
+ if (*Status != NDIS_STATUS_SUCCESS)
+ {
+ *Status = NDIS_STATUS_FAILURE;
+ }
+ }
+ else
+ {
+ //
+ // We need to do some work here.
+ // Close the binding below us
+ // and release the memory allocated.
+ //
+
+ if(pAdapt->BindingHandle != NULL)
+ {
+ NdisResetEvent(&pAdapt->Event);
+
+ NdisCloseAdapter(Status, pAdapt->BindingHandle);
+
+ //
+ // Wait for it to complete
+ //
+ if(*Status == NDIS_STATUS_PENDING)
+ {
+ NdisWaitEvent(&pAdapt->Event, 0);
+ *Status = pAdapt->Status;
+ }
+ pAdapt->BindingHandle = NULL;
+ }
+ else
+ {
+ //
+ // Both Our MiniportHandle and Binding Handle should not be NULL.
+ //
+ *Status = NDIS_STATUS_FAILURE;
+ ASSERT(0);
+ }
+
+ //
+ // Free the memory here, if was not released earlier(by calling the HaltHandler)
+ //
+ MPFreeAllPacketPools(pAdapt);
+ NdisFreeSpinLock(&pAdapt->Lock);
+ NdisFreeMemory(pAdapt, 0, 0);
+ }
+
+ DBGPRINT(("<== PtUnbindAdapter: Adapt %p\n", pAdapt));
+}
+
+VOID
+PtUnloadProtocol(
+ VOID
+)
+{
+ NDIS_STATUS Status;
+
+ if (ProtHandle != NULL)
+ {
+ NdisDeregisterProtocol(&Status, ProtHandle);
+ ProtHandle = NULL;
+ }
+
+ DBGPRINT(("PtUnloadProtocol: done!\n"));
+}
+
+
+
+VOID
+PtCloseAdapterComplete(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN NDIS_STATUS Status
+ )
+/*++
+
+Routine Description:
+
+ Completion for the CloseAdapter call.
+
+Arguments:
+
+ ProtocolBindingContext Pointer to the adapter structure
+ Status Completion status
+
+Return Value:
+
+ None.
+
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+
+ DBGPRINT(("CloseAdapterComplete: Adapt %p, Status %x\n", pAdapt, Status));
+ pAdapt->Status = Status;
+ NdisSetEvent(&pAdapt->Event);
+}
+
+
+VOID
+PtResetComplete(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN NDIS_STATUS Status
+ )
+/*++
+
+Routine Description:
+
+ Completion for the reset.
+
+Arguments:
+
+ ProtocolBindingContext Pointer to the adapter structure
+ Status Completion status
+
+Return Value:
+
+ None.
+
+--*/
+{
+
+ UNREFERENCED_PARAMETER(ProtocolBindingContext);
+ UNREFERENCED_PARAMETER(Status);
+ //
+ // We never issue a reset, so we should not be here.
+ //
+ ASSERT(0);
+}
+
+
+VOID
+PtRequestComplete(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN PNDIS_REQUEST NdisRequest,
+ IN NDIS_STATUS Status
+ )
+/*++
+
+Routine Description:
+
+ Completion handler for the previously posted request. All OIDS
+ are completed by and sent to the same miniport that they were requested for.
+ If Oid == OID_PNP_QUERY_POWER then the data structure needs to returned with all entries =
+ NdisDeviceStateUnspecified
+
+Arguments:
+
+ ProtocolBindingContext Pointer to the adapter structure
+ NdisRequest The posted request
+ Status Completion status
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;
+ NDIS_OID Oid = pAdapt->Request.DATA.SET_INFORMATION.Oid ;
+
+ //
+ // Since our request is not outstanding anymore
+ //
+ ASSERT(pAdapt->OutstandingRequests == TRUE);
+
+ pAdapt->OutstandingRequests = FALSE;
+
+ //
+ // Complete the Set or Query, and fill in the buffer for OID_PNP_CAPABILITIES, if need be.
+ //
+ switch (NdisRequest->RequestType)
+ {
+ case NdisRequestQueryInformation:
+
+ //
+ // We never pass OID_PNP_QUERY_POWER down.
+ //
+ ASSERT(Oid != OID_PNP_QUERY_POWER);
+
+ if ((Oid == OID_PNP_CAPABILITIES) && (Status == NDIS_STATUS_SUCCESS))
+ {
+ MPQueryPNPCapabilities(pAdapt, &Status);
+ }
+ *pAdapt->BytesReadOrWritten = NdisRequest->DATA.QUERY_INFORMATION.BytesWritten;
+ *pAdapt->BytesNeeded = NdisRequest->DATA.QUERY_INFORMATION.BytesNeeded;
+
+ if (((Oid == OID_GEN_MAC_OPTIONS)
+ && (Status == NDIS_STATUS_SUCCESS))
+ && (NdisDotSysVersion >= NDIS_SYS_VERSION_51))
+ {
+ //
+ // Only do this on Windows XP or greater (NDIS.SYS v 5.1);
+ // do not do in Windows 2000 (NDIS.SYS v 5.0))
+ //
+
+ //
+ // Remove the no-loopback bit from mac-options. In essence we are
+ // telling NDIS that we can handle loopback. We don't, but the
+ // interface below us does. If we do not do this, then loopback
+ // processing happens both below us and above us. This is wasteful
+ // at best and if Netmon is running, it will see multiple copies
+ // of loopback packets when sniffing above us.
+ //
+ // Only the lowest miniport is a stack of layered miniports should
+ // ever report this bit set to NDIS.
+ //
+ *(PULONG)NdisRequest->DATA.QUERY_INFORMATION.InformationBuffer &= ~NDIS_MAC_OPTION_NO_LOOPBACK;
+ }
+
+ NdisMQueryInformationComplete(pAdapt->MiniportHandle,
+ Status);
+ break;
+
+ case NdisRequestSetInformation:
+
+ ASSERT( Oid != OID_PNP_SET_POWER);
+
+ *pAdapt->BytesReadOrWritten = NdisRequest->DATA.SET_INFORMATION.BytesRead;
+ *pAdapt->BytesNeeded = NdisRequest->DATA.SET_INFORMATION.BytesNeeded;
+ NdisMSetInformationComplete(pAdapt->MiniportHandle,
+ Status);
+ break;
+
+ default:
+ ASSERT(0);
+ break;
+ }
+
+}
+
+
+VOID
+PtStatus(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN NDIS_STATUS GeneralStatus,
+ IN PVOID StatusBuffer,
+ IN UINT StatusBufferSize
+ )
+/*++
+
+Routine Description:
+
+ Status handler for the lower-edge(protocol).
+
+Arguments:
+
+ ProtocolBindingContext Pointer to the adapter structure
+ GeneralStatus Status code
+ StatusBuffer Status buffer
+ StatusBufferSize Size of the status buffer
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;
+
+ //
+ // Pass up this indication only if the upper edge miniport is initialized
+ // and powered on. Also ignore indications that might be sent by the lower
+ // miniport when it isn't at D0.
+ //
+ if ((pAdapt->MiniportHandle != NULL) &&
+ (pAdapt->MPDeviceState == NdisDeviceStateD0) &&
+ (pAdapt->PTDeviceState == NdisDeviceStateD0))
+ {
+ if ((GeneralStatus == NDIS_STATUS_MEDIA_CONNECT) ||
+ (GeneralStatus == NDIS_STATUS_MEDIA_DISCONNECT))
+ {
+
+ pAdapt->LastIndicatedStatus = GeneralStatus;
+ }
+ NdisMIndicateStatus(pAdapt->MiniportHandle,
+ GeneralStatus,
+ StatusBuffer,
+ StatusBufferSize);
+ }
+ //
+ // Save the last indicated media status
+ //
+ else
+ {
+ if ((pAdapt->MiniportHandle != NULL) &&
+ ((GeneralStatus == NDIS_STATUS_MEDIA_CONNECT) ||
+ (GeneralStatus == NDIS_STATUS_MEDIA_DISCONNECT)))
+ {
+ pAdapt->LatestUnIndicateStatus = GeneralStatus;
+ }
+ }
+
+}
+
+
+VOID
+PtStatusComplete(
+ IN NDIS_HANDLE ProtocolBindingContext
+ )
+/*++
+
+Routine Description:
+
+
+Arguments:
+
+
+Return Value:
+
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;
+
+ //
+ // Pass up this indication only if the upper edge miniport is initialized
+ // and powered on. Also ignore indications that might be sent by the lower
+ // miniport when it isn't at D0.
+ //
+ if ((pAdapt->MiniportHandle != NULL) &&
+ (pAdapt->MPDeviceState == NdisDeviceStateD0) &&
+ (pAdapt->PTDeviceState == NdisDeviceStateD0))
+ {
+ NdisMIndicateStatusComplete(pAdapt->MiniportHandle);
+ }
+}
+
+
+VOID
+PtSendComplete(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN PNDIS_PACKET Packet,
+ IN NDIS_STATUS Status
+ )
+/*++
+
+Routine Description:
+
+ Called by NDIS when the miniport below had completed a send. We should
+ complete the corresponding upper-edge send this represents.
+
+Arguments:
+
+ ProtocolBindingContext - Points to ADAPT structure
+ Packet - Low level packet being completed
+ Status - status of send
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;
+ PNDIS_PACKET Pkt;
+ NDIS_HANDLE PoolHandle;
+
+#ifdef NDIS51
+ //
+ // Packet stacking:
+ //
+ // Determine if the packet we are completing is the one we allocated. If so, then
+ // get the original packet from the reserved area and completed it and free the
+ // allocated packet. If this is the packet that was sent down to us, then just
+ // complete it
+ //
+ PoolHandle = NdisGetPoolFromPacket(Packet);
+ if (PoolHandle != pAdapt->SendPacketPoolHandle)
+ {
+ //
+ // We had passed down a packet belonging to the protocol above us.
+ //
+ // DBGPRINT(("PtSendComp: Adapt %p, Stacked Packet %p\n", pAdapt, Packet));
+
+ NdisMSendComplete(pAdapt->MiniportHandle,
+ Packet,
+ Status);
+ }
+ else
+#endif // NDIS51
+ {
+ PSEND_RSVD SendRsvd;
+
+ SendRsvd = (PSEND_RSVD)(Packet->ProtocolReserved);
+ Pkt = SendRsvd->OriginalPkt;
+
+#if 1 // IPFW - new code
+ //DbgPrint("SendComplete: packet %p pkt %p\n", Packet, Pkt);
+ if (Pkt == NULL) { //this is a reinjected packet, with no 'father'
+ CleanupReinjected(Packet, SendRsvd->pMbuf, pAdapt);
+ return;
+ }
+#endif /* IPFW */
+
+#ifndef WIN9X
+ NdisIMCopySendCompletePerPacketInfo (Pkt, Packet);
+#endif
+
+ NdisDprFreePacket(Packet);
+
+ NdisMSendComplete(pAdapt->MiniportHandle,
+ Pkt,
+ Status);
+ }
+ //
+ // Decrease the outstanding send count
+ //
+ ADAPT_DECR_PENDING_SENDS(pAdapt);
+}
+
+
+VOID
+PtTransferDataComplete(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN PNDIS_PACKET Packet,
+ IN NDIS_STATUS Status,
+ IN UINT BytesTransferred
+ )
+/*++
+
+Routine Description:
+
+ Entry point called by NDIS to indicate completion of a call by us
+ to NdisTransferData.
+
+ See notes under SendComplete.
+
+Arguments:
+
+Return Value:
+
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+
+ if(pAdapt->MiniportHandle)
+ {
+ NdisMTransferDataComplete(pAdapt->MiniportHandle,
+ Packet,
+ Status,
+ BytesTransferred);
+ }
+}
+
+
+NDIS_STATUS
+PtReceive(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN NDIS_HANDLE MacReceiveContext,
+ IN PVOID HeaderBuffer,
+ IN UINT HeaderBufferSize,
+ IN PVOID LookAheadBuffer,
+ IN UINT LookAheadBufferSize,
+ IN UINT PacketSize
+ )
+/*++
+
+Routine Description:
+
+ Handle receive data indicated up by the miniport below. We pass
+ it along to the protocol above us.
+
+ If the miniport below indicates packets, NDIS would more
+ likely call us at our ReceivePacket handler. However we
+ might be called here in certain situations even though
+ the miniport below has indicated a receive packet, e.g.
+ if the miniport had set packet status to NDIS_STATUS_RESOURCES.
+
+Arguments:
+
+ <see DDK ref page for ProtocolReceive>
+
+Return Value:
+
+ NDIS_STATUS_SUCCESS if we processed the receive successfully,
+ NDIS_STATUS_XXX error code if we discarded it.
+
+--*/
+{
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;
+ PNDIS_PACKET MyPacket, Packet = NULL;
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;
+ ULONG Proc = KeGetCurrentProcessorNumber();
+
+ if ((!pAdapt->MiniportHandle) || (pAdapt->MPDeviceState > NdisDeviceStateD0))
+ {
+ Status = NDIS_STATUS_FAILURE;
+ }
+ else do
+ {
+ //
+ // Get at the packet, if any, indicated up by the miniport below.
+ //
+ Packet = NdisGetReceivedPacket(pAdapt->BindingHandle, MacReceiveContext);
+ if (Packet != NULL)
+ {
+ //
+ // The miniport below did indicate up a packet. Use information
+ // from that packet to construct a new packet to indicate up.
+ //
+
+#ifdef NDIS51
+ //
+ // NDIS 5.1 NOTE: Do not reuse the original packet in indicating
+ // up a receive, even if there is sufficient packet stack space.
+ // If we had to do so, we would have had to overwrite the
+ // status field in the original packet to NDIS_STATUS_RESOURCES,
+ // and it is not allowed for protocols to overwrite this field
+ // in received packets.
+ //
+#endif // NDIS51
+
+ //
+ // Get a packet off the pool and indicate that up
+ //
+ NdisDprAllocatePacket(&Status,
+ &MyPacket,
+ pAdapt->RecvPacketPoolHandle);
+
+ if (Status == NDIS_STATUS_SUCCESS)
+ {
+ //
+ // Make our packet point to data from the original
+ // packet. NOTE: this works only because we are
+ // indicating a receive directly from the context of
+ // our receive indication. If we need to queue this
+ // packet and indicate it from another thread context,
+ // we will also have to allocate a new buffer and copy
+ // over the packet contents, OOB data and per-packet
+ // information. This is because the packet data
+ // is available only for the duration of this
+ // receive indication call.
+ //
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);
+
+ //
+ // Get the original packet (it could be the same packet as the
+ // one received or a different one based on the number of layered
+ // miniports below) and set it on the indicated packet so the OOB
+ // data is visible correctly at protocols above. If the IM driver
+ // modifies the packet in any way it should not set the new packet's
+ // original packet equal to the original packet of the packet that
+ // was indicated to it from the underlying driver, in this case, the
+ // IM driver should also ensure that the related per packet info should
+ // be copied to the new packet.
+ // we can set the original packet to the original packet of the packet
+ // indicated from the underlying driver because the driver doesn't modify
+ // the data content in the packet.
+ //
+ NDIS_SET_ORIGINAL_PACKET(MyPacket, NDIS_GET_ORIGINAL_PACKET(Packet));
+ NDIS_SET_PACKET_HEADER_SIZE(MyPacket, HeaderBufferSize);
+
+ //
+ // Copy packet flags.
+ //
+ NdisGetPacketFlags(MyPacket) = NdisGetPacketFlags(Packet);
+
+ //
+ // Force protocols above to make a copy if they want to hang
+ // on to data in this packet. This is because we are in our
+ // Receive handler (not ReceivePacket) and we can't return a
+ // ref count from here.
+ //
+ NDIS_SET_PACKET_STATUS(MyPacket, NDIS_STATUS_RESOURCES);
+
+ //
+ // By setting NDIS_STATUS_RESOURCES, we also know that we can reclaim
+ // this packet as soon as the call to NdisMIndicateReceivePacket
+ // returns.
+ //
+
+ if (pAdapt->MiniportHandle != NULL)
+ {
+#if 1 /* IPFW: query the firewall */
+ int ret;
+ ret = ipfw2_qhandler_w32(MyPacket, INCOMING,
+ ProtocolBindingContext);
+ if (ret != PASS)
+ return 0; //otherwise simply continue
+#endif /* end of IPFW code */
+ NdisMIndicateReceivePacket(pAdapt->MiniportHandle, &MyPacket, 1);
+ }
+
+ //
+ // Reclaim the indicated packet. Since we had set its status
+ // to NDIS_STATUS_RESOURCES, we are guaranteed that protocols
+ // above are done with it.
+ //
+ NdisDprFreePacket(MyPacket);
+
+ break;
+ }
+ }
+ else
+ {
+ //
+ // The miniport below us uses the old-style (not packet)
+ // receive indication. Fall through.
+ //
+ }
+
+ //
+ // Fall through if the miniport below us has either not
+ // indicated a packet or we could not allocate one
+ //
+ pAdapt->ReceivedIndicationFlags[Proc] = TRUE;
+ if (pAdapt->MiniportHandle == NULL)
+ {
+ break;
+ }
+ switch (pAdapt->Medium)
+ {
+ case NdisMedium802_3:
+ case NdisMediumWan:
+ //DbgPrint("EthIndicateReceive context %p, header at %p len %u, lookahead at %p len %u, packetsize %u\n",ProtocolBindingContext,HeaderBuffer,HeaderBufferSize,LookAheadBuffer,LookAheadBufferSize,PacketSize);
+ //hexdump(HeaderBuffer,HeaderBufferSize+LookAheadBufferSize,"EthIndicateReceive");
+ {
+ int ret = ipfw2_qhandler_w32_oldstyle(INCOMING, ProtocolBindingContext, HeaderBuffer, HeaderBufferSize, LookAheadBuffer, LookAheadBufferSize, PacketSize);
+ if (ret != PASS)
+ return NDIS_STATUS_SUCCESS;
+ }
+ NdisMEthIndicateReceive(pAdapt->MiniportHandle,
+ MacReceiveContext,
+ HeaderBuffer,
+ HeaderBufferSize,
+ LookAheadBuffer,
+ LookAheadBufferSize,
+ PacketSize);
+ break;
+
+ case NdisMedium802_5:
+ NdisMTrIndicateReceive(pAdapt->MiniportHandle,
+ MacReceiveContext,
+ HeaderBuffer,
+ HeaderBufferSize,
+ LookAheadBuffer,
+ LookAheadBufferSize,
+ PacketSize);
+ break;
+
+#if FDDI
+ case NdisMediumFddi:
+ NdisMFddiIndicateReceive(pAdapt->MiniportHandle,
+ MacReceiveContext,
+ HeaderBuffer,
+ HeaderBufferSize,
+ LookAheadBuffer,
+ LookAheadBufferSize,
+ PacketSize);
+ break;
+#endif
+ default:
+ ASSERT(FALSE);
+ break;
+ }
+
+ } while(FALSE);
+
+ return Status;
+}
+
+
+VOID
+PtReceiveComplete(
+ IN NDIS_HANDLE ProtocolBindingContext
+ )
+/*++
+
+Routine Description:
+
+ Called by the adapter below us when it is done indicating a batch of
+ received packets.
+
+Arguments:
+
+ ProtocolBindingContext Pointer to our adapter structure.
+
+Return Value:
+
+ None
+
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+ ULONG Proc = KeGetCurrentProcessorNumber();
+
+ /* Warning: this is a poor implementation of the PtReceiveComplete
+ * made by MS, and it's a well known (but never fixed) issue.
+ * Since the ProcessorNumber here can be different from the one
+ * that processed the PtReceive, sometimes NdisMEthIndicateReceiveComplete
+ * will not be called, causing poor performance in the incoming traffic.
+ * In our driver, PtReceive is called for IP packets ONLY by particulary
+ * old NIC drivers, and the poor performance can be seen even
+ * in traffic not handled by ipfw or dummynet.
+ * Fortunately, this is quite rare, all the incoming IP packets
+ * will arrive through PtReceivePacket, and this callback will never
+ * be called. For reinjected traffic, a workaround is done
+ * commuting the ReceivedIndicationFlag and calling
+ * NdisMEthIndicateReceiveComplete manually for each packet.
+ */
+
+ if (((pAdapt->MiniportHandle != NULL)
+ && (pAdapt->MPDeviceState == NdisDeviceStateD0))
+ && (pAdapt->ReceivedIndicationFlags[Proc]))
+ {
+ switch (pAdapt->Medium)
+ {
+ case NdisMedium802_3:
+ case NdisMediumWan:
+ NdisMEthIndicateReceiveComplete(pAdapt->MiniportHandle);
+ break;
+
+ case NdisMedium802_5:
+ NdisMTrIndicateReceiveComplete(pAdapt->MiniportHandle);
+ break;
+#if FDDI
+ case NdisMediumFddi:
+ NdisMFddiIndicateReceiveComplete(pAdapt->MiniportHandle);
+ break;
+#endif
+ default:
+ ASSERT(FALSE);
+ break;
+ }
+ }
+
+ pAdapt->ReceivedIndicationFlags[Proc] = FALSE;
+}
+
+
+INT
+PtReceivePacket(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN PNDIS_PACKET Packet
+ )
+/*++
+
+Routine Description:
+
+ ReceivePacket handler. Called by NDIS if the miniport below supports
+ NDIS 4.0 style receives. Re-package the buffer chain in a new packet
+ and indicate the new packet to protocols above us. Any context for
+ packets indicated up must be kept in the MiniportReserved field.
+
+ NDIS 5.1 - packet stacking - if there is sufficient "stack space" in
+ the packet passed to us, we can use the same packet in a receive
+ indication.
+
+Arguments:
+
+ ProtocolBindingContext - Pointer to our adapter structure.
+ Packet - Pointer to the packet
+
+Return Value:
+
+ == 0 -> We are done with the packet
+ != 0 -> We will keep the packet and call NdisReturnPackets() this
+ many times when done.
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+ NDIS_STATUS Status;
+ PNDIS_PACKET MyPacket;
+ BOOLEAN Remaining;
+
+ //
+ // Drop the packet silently if the upper miniport edge isn't initialized or
+ // the miniport edge is in low power state
+ //
+ if ((!pAdapt->MiniportHandle) || (pAdapt->MPDeviceState > NdisDeviceStateD0))
+ {
+ return 0;
+ }
+
+#ifdef NDIS51
+ //
+ // Check if we can reuse the same packet for indicating up.
+ // See also: PtReceive().
+ //
+ (VOID)NdisIMGetCurrentPacketStack(Packet, &Remaining);
+ if (0 && Remaining)
+ {
+ //
+ // We can reuse "Packet". Indicate it up and be done with it.
+ //
+ Status = NDIS_GET_PACKET_STATUS(Packet);
+ NdisMIndicateReceivePacket(pAdapt->MiniportHandle, &Packet, 1);
+ return((Status != NDIS_STATUS_RESOURCES) ? 1 : 0);
+ }
+#endif // NDIS51
+
+ //
+ // Get a packet off the pool and indicate that up
+ //
+ NdisDprAllocatePacket(&Status,
+ &MyPacket,
+ pAdapt->RecvPacketPoolHandle);
+
+ if (Status == NDIS_STATUS_SUCCESS)
+ {
+ PRECV_RSVD RecvRsvd;
+
+ RecvRsvd = (PRECV_RSVD)(MyPacket->MiniportReserved);
+ RecvRsvd->OriginalPkt = Packet;
+
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);
+
+ //
+ // Get the original packet (it could be the same packet as the one
+ // received or a different one based on the number of layered miniports
+ // below) and set it on the indicated packet so the OOB data is visible
+ // correctly to protocols above us.
+ //
+ NDIS_SET_ORIGINAL_PACKET(MyPacket, NDIS_GET_ORIGINAL_PACKET(Packet));
+
+ //
+ // Set Packet Flags
+ //
+ NdisGetPacketFlags(MyPacket) = NdisGetPacketFlags(Packet);
+
+ Status = NDIS_GET_PACKET_STATUS(Packet);
+
+ NDIS_SET_PACKET_STATUS(MyPacket, Status);
+ NDIS_SET_PACKET_HEADER_SIZE(MyPacket, NDIS_GET_PACKET_HEADER_SIZE(Packet));
+
+ if (pAdapt->MiniportHandle != NULL)
+ {
+#if 1 /* IPFW: query the firewall */
+ int ret;
+ ret = ipfw2_qhandler_w32(MyPacket, INCOMING,
+ ProtocolBindingContext);
+ if (ret != PASS)
+ return 0; //otherwise simply continue
+#endif /* end of IPFW code */
+ NdisMIndicateReceivePacket(pAdapt->MiniportHandle, &MyPacket, 1);
+ }
+
+ //
+ // Check if we had indicated up the packet with NDIS_STATUS_RESOURCES
+ // NOTE -- do not use NDIS_GET_PACKET_STATUS(MyPacket) for this since
+ // it might have changed! Use the value saved in the local variable.
+ //
+ if (Status == NDIS_STATUS_RESOURCES)
+ {
+ //
+ // Our ReturnPackets handler will not be called for this packet.
+ // We should reclaim it right here.
+ //
+ NdisDprFreePacket(MyPacket);
+ }
+
+ return((Status != NDIS_STATUS_RESOURCES) ? 1 : 0);
+ }
+ else
+ {
+ //
+ // We are out of packets. Silently drop it.
+ //
+ return(0);
+ }
+}
+
+
+NDIS_STATUS
+PtPNPHandler(
+ IN NDIS_HANDLE ProtocolBindingContext,
+ IN PNET_PNP_EVENT pNetPnPEvent
+ )
+
+/*++
+Routine Description:
+
+ This is called by NDIS to notify us of a PNP event related to a lower
+ binding. Based on the event, this dispatches to other helper routines.
+
+ NDIS 5.1: forward this event to the upper protocol(s) by calling
+ NdisIMNotifyPnPEvent.
+
+Arguments:
+
+ ProtocolBindingContext - Pointer to our adapter structure. Can be NULL
+ for "global" notifications
+
+ pNetPnPEvent - Pointer to the PNP event to be processed.
+
+Return Value:
+
+ NDIS_STATUS code indicating status of event processing.
+
+--*/
+{
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;
+
+ DBGPRINT(("PtPnPHandler: Adapt %p, Event %d\n", pAdapt, pNetPnPEvent->NetEvent));
+
+ switch (pNetPnPEvent->NetEvent)
+ {
+ case NetEventSetPower:
+ Status = PtPnPNetEventSetPower(pAdapt, pNetPnPEvent);
+ break;
+
+ case NetEventReconfigure:
+ Status = PtPnPNetEventReconfigure(pAdapt, pNetPnPEvent);
+ break;
+
+ default:
+#ifdef NDIS51
+ //
+ // Pass on this notification to protocol(s) above, before
+ // doing anything else with it.
+ //
+ if (pAdapt && pAdapt->MiniportHandle)
+ {
+ Status = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);
+ }
+#else
+ Status = NDIS_STATUS_SUCCESS;
+
+#endif // NDIS51
+
+ break;
+ }
+
+ return Status;
+}
+
+
+NDIS_STATUS
+PtPnPNetEventReconfigure(
+ IN PADAPT pAdapt,
+ IN PNET_PNP_EVENT pNetPnPEvent
+ )
+/*++
+Routine Description:
+
+ This routine is called from NDIS to notify our protocol edge of a
+ reconfiguration of parameters for either a specific binding (pAdapt
+ is not NULL), or global parameters if any (pAdapt is NULL).
+
+Arguments:
+
+ pAdapt - Pointer to our adapter structure.
+ pNetPnPEvent - the reconfigure event
+
+Return Value:
+
+ NDIS_STATUS_SUCCESS
+
+--*/
+{
+ NDIS_STATUS ReconfigStatus = NDIS_STATUS_SUCCESS;
+ NDIS_STATUS ReturnStatus = NDIS_STATUS_SUCCESS;
+
+ do
+ {
+ //
+ // Is this is a global reconfiguration notification ?
+ //
+ if (pAdapt == NULL)
+ {
+ //
+ // An important event that causes this notification to us is if
+ // one of our upper-edge miniport instances was enabled after being
+ // disabled earlier, e.g. from Device Manager in Win2000. Note that
+ // NDIS calls this because we had set up an association between our
+ // miniport and protocol entities by calling NdisIMAssociateMiniport.
+ //
+ // Since we would have torn down the lower binding for that miniport,
+ // we need NDIS' assistance to re-bind to the lower miniport. The
+ // call to NdisReEnumerateProtocolBindings does exactly that.
+ //
+ NdisReEnumerateProtocolBindings (ProtHandle);
+
+ break;
+ }
+
+#ifdef NDIS51
+ //
+ // Pass on this notification to protocol(s) above before doing anything
+ // with it.
+ //
+ if (pAdapt->MiniportHandle)
+ {
+ ReturnStatus = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);
+ }
+#endif // NDIS51
+
+ ReconfigStatus = NDIS_STATUS_SUCCESS;
+
+ } while(FALSE);
+
+ DBGPRINT(("<==PtPNPNetEventReconfigure: pAdapt %p\n", pAdapt));
+
+#ifdef NDIS51
+ //
+ // Overwrite status with what upper-layer protocol(s) returned.
+ //
+ ReconfigStatus = ReturnStatus;
+#endif
+
+ return ReconfigStatus;
+}
+
+
+NDIS_STATUS
+PtPnPNetEventSetPower(
+ IN PADAPT pAdapt,
+ IN PNET_PNP_EVENT pNetPnPEvent
+ )
+/*++
+Routine Description:
+
+ This is a notification to our protocol edge of the power state
+ of the lower miniport. If it is going to a low-power state, we must
+ wait here for all outstanding sends and requests to complete.
+
+ NDIS 5.1: Since we use packet stacking, it is not sufficient to
+ check usage of our local send packet pool to detect whether or not
+ all outstanding sends have completed. For this, use the new API
+ NdisQueryPendingIOCount.
+
+ NDIS 5.1: Use the 5.1 API NdisIMNotifyPnPEvent to pass on PnP
+ notifications to upper protocol(s).
+
+Arguments:
+
+ pAdapt - Pointer to the adpater structure
+ pNetPnPEvent - The Net Pnp Event. this contains the new device state
+
+Return Value:
+
+ NDIS_STATUS_SUCCESS or the status returned by upper-layer protocols.
+
+--*/
+{
+ PNDIS_DEVICE_POWER_STATE pDeviceState =(PNDIS_DEVICE_POWER_STATE)(pNetPnPEvent->Buffer);
+ NDIS_DEVICE_POWER_STATE PrevDeviceState = pAdapt->PTDeviceState;
+ NDIS_STATUS Status;
+ NDIS_STATUS ReturnStatus;
+
+ ReturnStatus = NDIS_STATUS_SUCCESS;
+
+ //
+ // Set the Internal Device State, this blocks all new sends or receives
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ pAdapt->PTDeviceState = *pDeviceState;
+
+ //
+ // Check if the miniport below is going to a low power state.
+ //
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)
+ {
+ //
+ // If the miniport below is going to standby, fail all incoming requests
+ //
+ if (PrevDeviceState == NdisDeviceStateD0)
+ {
+ pAdapt->StandingBy = TRUE;
+ }
+
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+#ifdef NDIS51
+ //
+ // Notify upper layer protocol(s) first.
+ //
+ if (pAdapt->MiniportHandle != NULL)
+ {
+ ReturnStatus = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);
+ }
+#endif // NDIS51
+
+ //
+ // Wait for outstanding sends and requests to complete.
+ //
+ while (pAdapt->OutstandingSends != 0)
+ {
+ NdisMSleep(2);
+ }
+
+ while (pAdapt->OutstandingRequests == TRUE)
+ {
+ //
+ // sleep till outstanding requests complete
+ //
+ NdisMSleep(2);
+ }
+
+ //
+ // If the below miniport is going to low power state, complete the queued request
+ //
+ NdisAcquireSpinLock(&pAdapt->Lock);
+ if (pAdapt->QueuedRequest)
+ {
+ pAdapt->QueuedRequest = FALSE;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ PtRequestComplete(pAdapt, &pAdapt->Request, NDIS_STATUS_FAILURE);
+ }
+ else
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ }
+
+
+ ASSERT(NdisPacketPoolUsage(pAdapt->SendPacketPoolHandle) == 0);
+ ASSERT(pAdapt->OutstandingRequests == FALSE);
+ }
+ else
+ {
+ //
+ // If the physical miniport is powering up (from Low power state to D0),
+ // clear the flag
+ //
+ if (PrevDeviceState > NdisDeviceStateD0)
+ {
+ pAdapt->StandingBy = FALSE;
+ }
+ //
+ // The device below is being turned on. If we had a request
+ // pending, send it down now.
+ //
+ if (pAdapt->QueuedRequest == TRUE)
+ {
+ pAdapt->QueuedRequest = FALSE;
+
+ pAdapt->OutstandingRequests = TRUE;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ NdisRequest(&Status,
+ pAdapt->BindingHandle,
+ &pAdapt->Request);
+
+ if (Status != NDIS_STATUS_PENDING)
+ {
+ PtRequestComplete(pAdapt,
+ &pAdapt->Request,
+ Status);
+
+ }
+ }
+ else
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+ }
+
+
+#ifdef NDIS51
+ //
+ // Pass on this notification to protocol(s) above
+ //
+ if (pAdapt->MiniportHandle)
+ {
+ ReturnStatus = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);
+ }
+#endif // NDIS51
+
+ }
+
+ return ReturnStatus;
+}
+
+VOID
+PtReferenceAdapt(
+ IN PADAPT pAdapt
+ )
+{
+ NdisAcquireSpinLock(&pAdapt->Lock);
+
+ ASSERT(pAdapt->RefCount >= 0);
+
+ pAdapt->RefCount ++;
+ NdisReleaseSpinLock(&pAdapt->Lock);
+}
+
+
+BOOLEAN
+PtDereferenceAdapt(
+ IN PADAPT pAdapt
+ )
+{
+ NdisAcquireSpinLock(&pAdapt->Lock);
+
+ ASSERT(pAdapt->RefCount > 0);
+
+ pAdapt->RefCount--;
+
+ if (pAdapt->RefCount == 0)
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ //
+ // Free all resources on this adapter structure.
+ //
+ MPFreeAllPacketPools (pAdapt);;
+ NdisFreeSpinLock(&pAdapt->Lock);
+ NdisFreeMemory(pAdapt, 0 , 0);
+
+ return TRUE;
+
+ }
+ else
+ {
+ NdisReleaseSpinLock(&pAdapt->Lock);
+
+ return FALSE;
+ }
+}
+
+
#define log(x, arg...) fprintf(stderr, ## arg)
#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1)
#define min(a, b) ((a) < (b) ? (a) : (b) )
-#include "include/net/radix.h"
+#include <net/radix.h>
#endif /* !_KERNEL */
static int rn_walktree_from(struct radix_node_head *h, void *a, void *m,
m = mm;
}
if (m)
- log(LOG_ERR, "rn_delete: Orphaned Mask %p at %p\n", m, x);
+ log(LOG_ERR,
+ "rn_delete: Orphaned Mask %p at %p\n",
+ m, x);
}
}
/*
return (1);
}
+int
+rn_detachhead(void **head)
+{
+ struct radix_node_head *rnh;
+
+ KASSERT((head != NULL && *head != NULL),
+ ("%s: head already freed", __func__));
+ rnh = *head;
+
+ /* Free <left,root,right> nodes. */
+ Free(rnh);
+
+ *head = NULL;
+ return (1);
+}
+
void
rn_init(int maxk)
{
--- /dev/null
+/*
+ * Copyright (c) 2010 Francesco Magno, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * $Id: winmissing.h 5563 2010-02-26 16:25:23Z svn_magno $
+ * definitions and other things needed to build freebsd kernel
+ * modules in Windows (with the MSVC compiler)
+ */
+
+#ifndef _WINMISSING_H_
+#define _WINMISSING_H_
+
+#include <ntifs.h>
+#include <ntddk.h>
+#include <basetsd.h>
+#include <windef.h>
+#include <stdio.h>
+#include <ndis.h>
+
+typedef UCHAR u_char;
+typedef UCHAR u_int8_t;
+typedef UCHAR uint8_t;
+typedef USHORT u_short;
+typedef USHORT u_int16_t;
+typedef USHORT uint16_t;
+typedef USHORT n_short;
+typedef UINT u_int;
+typedef INT32 int32_t;
+typedef UINT32 u_int32_t;
+typedef UINT32 uint32_t;
+typedef ULONG u_long;
+typedef ULONG n_long;
+typedef UINT64 uint64_t;
+typedef UINT64 u_int64_t;
+typedef INT64 int64_t;
+
+typedef UINT32 in_addr_t;
+typedef UCHAR sa_family_t;
+typedef USHORT in_port_t;
+typedef UINT32 __gid_t;
+typedef UINT32 gid_t;
+typedef UINT32 __uid_t;
+typedef UINT32 uid_t;
+typedef ULONG n_time;
+typedef char* caddr_t;
+
+/* linux_lookup uses __be32 and __be16 in the prototype */
+typedef uint32_t __be32; /* XXX __u32 __bitwise __be32 */
+typedef uint16_t __be16; /* XXX */
+
+//*** DEBUG STUFF ***
+#define printf DbgPrint
+#define log(lev, ...) DbgPrint(__VA_ARGS__)
+const char* texify_cmd(int i);
+const char* texify_proto(unsigned int p);
+//*** end DEBUG STUFF ***
+
+#define snprintf _snprintf
+#define timespec timeval
+struct timeval {
+ long tv_sec;
+ long tv_usec;
+};
+
+struct in_addr {
+ in_addr_t s_addr;
+};
+
+struct sockaddr_in {
+ uint8_t sin_len;
+ sa_family_t sin_family;
+ in_port_t sin_port;
+ struct in_addr sin_addr;
+ char sin_zero[8];
+};
+
+/* XXX watch out, windows names are actually longer */
+#define IFNAMSIZ 16
+#define IF_NAMESIZE 16
+
+#define ETHER_ADDR_LEN 6
+
+/* we do not include the windows headers for in6_addr so
+ * we need to provide our own definition for the kernel.
+ */
+struct in6_addr {
+ union {
+ uint8_t __u6_addr8[16];
+ uint16_t __u6_addr16[8];
+ uint32_t __u6_addr32[4];
+ } __u6_addr; /* 128-bit IP6 address */
+};
+
+#define htons(x) RtlUshortByteSwap(x)
+#define ntohs(x) RtlUshortByteSwap(x)
+#define htonl(x) RtlUlongByteSwap(x)
+#define ntohl(x) RtlUlongByteSwap(x)
+
+#define ENOSPC 28 /* No space left on device */
+#define EOPNOTSUPP 45 /* Operation not supported */
+#define EACCES 13 /* Permission denied */
+#define ENOENT 2 /* No such file or directory */
+#define EINVAL 22 /* Invalid argument */
+#define EPROTONOSUPPORT 43 /* Protocol not supported */
+#define ENOMEM 12 /* Cannot allocate memory */
+#define EEXIST 17 /* File exists */
+#define ESRCH 3
+#define ENOBUFS 55 /* No buffer space available */
+#define EBUSY 16 /* Module busy */
+
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#define __unused
+#define __packed
+#define __aligned(x);
+#define __user
+#define __init
+#define __exit
+#define __func__ __FUNCTION__
+#define inline __inline
+
+struct sockaddr_in6 {
+ int dummy;
+};
+
+//SPINLOCKS
+#define DEFINE_SPINLOCK(x) NDIS_SPIN_LOCK x
+#define mtx_init(m,a,b,c) NdisAllocateSpinLock(m)
+#define mtx_lock(_l) NdisAcquireSpinLock(_l)
+#define mtx_unlock(_l) NdisReleaseSpinLock(_l)
+#define mtx_destroy(m) NdisFreeSpinLock(m)
+#define mtx_assert(a, b)
+
+#define rw_rlock(_l) NdisAcquireSpinLock(_l)
+#define rw_runlock(_l) NdisReleaseSpinLock(_l)
+#define rw_assert(a, b)
+#define rw_wlock(_l) NdisAcquireSpinLock(_l)
+#define rw_wunlock(_l) NdisReleaseSpinLock(_l)
+#define rw_destroy(_l) NdisFreeSpinLock(_l)
+#define rw_init(_l, msg) NdisAllocateSpinLock(_l)
+#define rw_init_flags(_l, s, v) NdisAllocateSpinLock(_l)
+
+#define rwlock_t NDIS_SPIN_LOCK
+#define spinlock_t NDIS_SPIN_LOCK
+
+#define s6_addr __u6_addr.__u6_addr8
+
+
+struct icmphdr {
+ u_char icmp_type; /* type of message, see below */
+ u_char icmp_code; /* type sub code */
+ u_short icmp_cksum; /* ones complement cksum of struct */
+};
+
+#define ICMP_ECHO 8 /* echo service */
+
+#define IPOPT_OPTVAL 0 /* option ID */
+#define IPOPT_OLEN 1 /* option length */
+#define IPOPT_EOL 0 /* end of option list */
+#define IPOPT_NOP 1 /* no operation */
+#define IPOPT_LSRR 131 /* loose source route */
+#define IPOPT_SSRR 137 /* strict source route */
+#define IPOPT_RR 7 /* record packet route */
+#define IPOPT_TS 68 /* timestamp */
+
+#define IPPROTO_ICMP 1 /* control message protocol */
+#define IPPROTO_TCP 6 /* tcp */
+#define IPPROTO_UDP 17 /* user datagram protocol */
+#define IPPROTO_ICMPV6 58 /* ICMP6 */
+#define IPPROTO_SCTP 132 /* SCTP */
+#define IPPROTO_HOPOPTS 0 /* IP6 hop-by-hop options */
+#define IPPROTO_ROUTING 43 /* IP6 routing header */
+#define IPPROTO_FRAGMENT 44 /* IP6 fragmentation header */
+#define IPPROTO_DSTOPTS 60 /* IP6 destination option */
+#define IPPROTO_AH 51 /* IP6 Auth Header */
+#define IPPROTO_ESP 50 /* IP6 Encap Sec. Payload */
+#define IPPROTO_NONE 59 /* IP6 no next header */
+#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
+
+#define IPPROTO_IPV6 41
+#define IPPROTO_IPV4 4 /* IPv4 encapsulation */
+
+
+#define INADDR_ANY (uint32_t)0x00000000
+
+#define AF_INET 2 /* internetwork: UDP, TCP, etc. */
+#define AF_LINK 18 /* Link layer interface */
+
+#define IN_CLASSD(i) (((uint32_t)(i) & 0xf0000000) == 0xe0000000)
+#define IN_MULTICAST(i) IN_CLASSD(i)
+
+#define DROP 0
+#define PASS 1
+#define DUMMYNET 2
+#define INCOMING 0
+#define OUTGOING 1
+
+size_t strlcpy(char *dst, const char *src, size_t siz);
+void do_gettimeofday(struct timeval *tv);
+int ffs(int bits);
+int time_uptime_w32();
+
+#endif /* _WINMISSING_H_ */
* SUCH DAMAGE.
*/
/*
- * $Id: glue.h 4661 2010-01-04 11:56:12Z luigi $
+ * $Id: glue.h 5822 2010-03-23 10:39:56Z svn_magno $
*
* glue code to adapt the FreeBSD version to linux and windows,
* userland and kernel.
* This is included before any other headers, so we do not have
* a chance to override any #define that should appear in other
* headers.
+ * First handle headers for userland and kernel. Then common code
+ * (including headers that require a specific order of inclusion),
+ * then the user- and kernel- specific parts.
*/
#ifndef _GLUE_H
#define __FBSDID(x)
#endif /* FBSDID */
-/*
- * emulation of FreeBSD's sockopt and thread
- * This was in sockopt.h
- */
-enum sopt_dir { SOPT_GET, SOPT_SET };
+#ifndef KERNEL_MODULE /* Userland headers */
-#ifndef KERNEL_MODULE /* Userland part */
+#if defined(__CYGWIN32__) && !defined(_WIN32)
+#define _WIN32
+#endif
-#include <stdint.h> /* linux needs this in addition to sys/types.h */
+#if defined(TCC) && defined(_WIN32)
+#include <tcc_glue.h>
+#endif /* TCC */
-#include <sys/types.h> /* for size_t */
+#include <stdint.h> /* linux needs it in addition to sys/types.h */
+#include <sys/types.h> /* for size_t */
#include <sys/ioctl.h>
#include <time.h>
#include <errno.h>
-
#include <netinet/ether.h>
-#else /* KERNEL_MODULE, kernel part */
+#else /* KERNEL_MODULE, kernel headers */
-#ifndef _WIN32
+#ifdef __linux__
#include <linux/version.h>
#define ifnet net_device /* remap */
#include <linux/stddef.h> /* linux kernel */
#include <linux/types.h> /* linux kernel */
-
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) // or 2.4.x
#include <linux/linkage.h> /* linux/msg.h require this */
#include <linux/netdevice.h> /* just MAX_ADDR_LEN 8 on 2.4 32 on 2.6, also brings in byteorder */
#endif
+/* on 2.6.22, msg.h requires spinlock_types.h */
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) && \
- LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // under 2.6.22 compilation is required by msg.h
+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
#include <linux/spinlock_types.h>
#endif
-#include <linux/msg.h> /* XXX m_type define conflict with include/sys/mbuf.h,
- * so early include this file (to be solved) */
+/* XXX m_type define conflict with include/sys/mbuf.h,
+ * so early include msg.h (to be solved)
+*/
+#include <linux/msg.h>
+
#include <linux/list.h>
#include <linux/in.h> /* struct in_addr */
#include <linux/in6.h> /* struct in6_addr */
*/
#undef LIST_HEAD
-#define IF_NAMESIZE 16
-typedef uint32_t in_addr_t;
+#define IF_NAMESIZE (16)
+typedef uint32_t in_addr_t;
#define printf(fmt, arg...) printk(KERN_ERR fmt, ##arg)
+#endif /* __linux__ */
+
+#endif /* KERNEL_MODULE end of kernel headers */
+
+
+/*
+ * Part 2: common userland and kernel definitions
+ */
+
+#ifndef ETHER_ADDR_LEN
+#define ETHER_ADDR_LEN (6+0) /* length of an Ethernet address */
+#endif
+
+#define ICMP6_DST_UNREACH_NOROUTE 0 /* no route to destination */
+#define ICMP6_DST_UNREACH_ADMIN 1 /* administratively prohibited */
+#define ICMP6_DST_UNREACH_ADDR 3 /* address unreachable */
+#define ICMP6_DST_UNREACH_NOPORT 4 /* port unreachable */
+
+/*
+ * linux: sysctl are mapped into /sys/module/ipfw_mod parameters
+ * windows: they are emulated via get/setsockopt
+ */
+#define CTLFLAG_RD 1
+#define CTLFLAG_RDTUN 1
+#define CTLFLAG_RW 2
+#define CTLFLAG_SECURE3 0 // unsupported
+#define CTLFLAG_VNET 0 /* unsupported */
-#endif /* !_WIN32 */
-#endif /* KERNEL_MODULE */
+/* if needed, queue.h must be included here after list.h */
/*
+ * struct thread is used in linux and windows kernel.
* In windows, we need to emulate the sockopt interface
* so also the userland needs to have the struct sockopt defined.
- * No need to declare struct thread on linux, but we need on windows.
+ * In order to achieve 64 bit compatibility, padding has been inserted.
*/
-
struct thread {
void *sopt_td;
void *td_ucred;
};
+enum sopt_dir { SOPT_GET, SOPT_SET };
+
struct sockopt {
enum sopt_dir sopt_dir; /* is this a get or a set? */
int sopt_level; /* second arg of [gs]etsockopt */
int sopt_name; /* third arg of [gs]etsockopt */
- void *sopt_val; /* fourth arg of [gs]etsockopt */
- size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */
- struct thread *sopt_td; /* calling thread or null if kernel */
+#ifdef _X64EMU
+ void* pad1;
+ void* pad2;
+#endif
+ void *sopt_val; /* fourth arg of [gs]etsockopt */
+ size_t sopt_valsize; /* (almost) fifth arg of [gs]etsockopt */
+#ifdef _X64EMU
+ void* pad3;
+ void* pad4;
+#endif
+ struct thread *sopt_td; /* calling thread or null if kernel */
};
-/* This must be included here after list.h */
-#include <sys/queue.h> /* both the kernel side and nat.c needs this */
+#define INET_ADDRSTRLEN (16) /* missing in netinet/in.h */
+
+/*
+ * List of values used for set/getsockopt options.
+ * The base value on FreeBSD is defined as a macro,
+ * if not available we will use our own enum.
+ * The TABLE_BASE value is used in the kernel.
+ */
+#ifndef IP_FW_TABLE_ADD
+#define _IPFW_SOCKOPT_BASE 100 /* 40 on freebsd */
+enum ipfw_msg_type {
+ IP_FW_TABLE_ADD = _IPFW_SOCKOPT_BASE,
+ IP_FW_TABLE_DEL,
+ IP_FW_TABLE_FLUSH,
+ IP_FW_TABLE_GETSIZE,
+ IP_FW_TABLE_LIST,
+ IP_FW_DYN_GET, /* new addition */
+
+ /* IP_FW3 and IP_DUMMYNET3 are the new API */
+ IP_FW3 = _IPFW_SOCKOPT_BASE + 8,
+ IP_DUMMYNET3,
+
+ IP_FW_ADD = _IPFW_SOCKOPT_BASE + 10,
+ IP_FW_DEL,
+ IP_FW_FLUSH,
+ IP_FW_ZERO,
+ IP_FW_GET,
+ IP_FW_RESETLOG,
+
+ IP_FW_NAT_CFG,
+ IP_FW_NAT_DEL,
+ IP_FW_NAT_GET_CONFIG,
+ IP_FW_NAT_GET_LOG,
+
+ IP_DUMMYNET_CONFIGURE,
+ IP_DUMMYNET_DEL ,
+ IP_DUMMYNET_FLUSH,
+ /* 63 is missing */
+ IP_DUMMYNET_GET = _IPFW_SOCKOPT_BASE + 24,
+ _IPFW_SOCKOPT_END
+};
+#endif /* IP_FW_TABLE_ADD */
+
+/*
+ * Part 3: userland stuff
+ */
#ifndef KERNEL_MODULE
-/* define internals for struct in6_addr netinet/in6.h on FreeBSD */
-#define __u6_addr in6_u
-#define __u6_addr32 u6_addr32
-/* define missing type for ipv6 (linux 2.6.28) */
-#define in6_u __in6_u
+/*
+ * internal names in struct in6_addr (netinet/in6.h) differ,
+ * so we remap the FreeBSD names to the platform-specific ones.
+ */
+#ifndef _WIN32
+#define __u6_addr in6_u
+#define __u6_addr32 u6_addr32
+#define in6_u __in6_u /* missing type for ipv6 (linux 2.6.28) */
+#else /* _WIN32 uses different naming */
+#define __u6_addr __u6
+#define __u6_addr32 __s6_addr32
+#endif /* _WIN32 */
/* missing in linux netinet/ip.h */
-#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */
-#define IPTOS_ECN_CE 0x03 /* congestion experienced */
+#define IPTOS_ECN_ECT0 0x02 /* ECN-capable transport (0) */
+#define IPTOS_ECN_CE 0x03 /* congestion experienced */
/* defined in freebsd netinet/icmp6.h */
-#define ICMP6_MAXTYPE 201
+#define ICMP6_MAXTYPE 201
/* on freebsd sys/socket.h pf specific */
-#define NET_RT_IFLIST 3 /* survey interface list */
+#define NET_RT_IFLIST 3 /* survey interface list */
/* on freebsd net/if.h XXX used */
struct if_data {
-
/* ... */
- u_long ifi_mtu; /* maximum transmission unit */
+ u_long ifi_mtu; /* maximum transmission unit */
};
/*
* This is used in nat.c
*/
struct if_msghdr {
- u_short ifm_msglen; /* to skip over non-understood messages */
+ u_short ifm_msglen; /* to skip over unknown messages */
u_char ifm_version; /* future binary compatibility */
u_char ifm_type; /* message type */
int ifm_addrs; /* like rtm_addrs */
int ifm_flags; /* value of if_flags */
u_short ifm_index; /* index for associated ifp */
- struct if_data ifm_data;/* statistics and other data about if */
+ struct if_data ifm_data;/* stats and other ifdata */
};
/*
- * Message format for use in obtaining information about interface addresses
- * from getkerninfo and the routing socket
+ * Message format for use in obtaining information about interface
+ * addresses from getkerninfo and the routing socket
*/
struct ifa_msghdr {
- u_short ifam_msglen; /* to skip over non-understood messages */
+ u_short ifam_msglen; /* to skip over unknown messages */
u_char ifam_version; /* future binary compatibility */
u_char ifam_type; /* message type */
int ifam_addrs; /* like rtm_addrs */
* linux does not have a reentrant version of qsort,
* so we the FreeBSD stdlib version.
*/
-void
-qsort_r(void *a, size_t n, size_t es, void *thunk,
+void qsort_r(void *a, size_t n, size_t es, void *thunk,
int cmp_t(void *, const void *, const void *));
/* prototypes from libutil */
extern int optreset; /* not present in linux */
size_t strlcpy(char * dst, const char * src, size_t siz);
-long long int
-strtonum(const char *nptr, long long minval, long long maxval,
- const char **errstr);
+long long int strtonum(const char *nptr, long long minval,
+ long long maxval, const char **errstr);
-int
-sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp,
- size_t newlen);
+int sysctlbyname(const char *name, void *oldp, size_t *oldlenp,
+ void *newp, size_t newlen);
-#ifdef __linux__
-/* linux does not have sin_len in sockaddr, we only remap in userland */
+/* no sin_len in sockaddr, we only remap in userland */
#define sin_len sin_zero[0]
-#endif /* __linux__ */
#else /* KERNEL_MODULE */
+/*
+ * Part 4: kernel stuff
+ */
+
/* linux and windows kernel do not have bcopy ? */
#define bcopy(_s, _d, _l) memcpy(_d, _s, _l)
+/* definitions useful for the kernel side */
+struct route_in6 {
+ int dummy;
+};
+
+#ifdef __linux__
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) // or 2.4.x
#include <linux/in6.h>
#define skb_dst(_dummy) skb->dst
#endif
-/* definitions useful for the kernel side */
-
-struct route_in6 { };
+#endif /* __linux__ */
#endif /* KERNEL_MODULE */
-/* missing in netinet/in.h */
+/*
+ * Part 5: windows specific stuff
+ */
-#define INET_ADDRSTRLEN 16
+#ifdef _WIN32
+#ifndef KERNEL_MODULE
+#define CTL_CODE( DeviceType, Function, Method, Access ) ( \
+ ((DeviceType) << 16) | ((Access) << 14) | ((Function) << 2) | (Method) \
+)
+
+#define METHOD_BUFFERED 0
+#define METHOD_IN_DIRECT 1
+#define METHOD_OUT_DIRECT 2
+#define METHOD_NEITHER 3
+#define FILE_ANY_ACCESS 0
+#define FILE_READ_DATA ( 0x0001 ) // file & pipe
+#define FILE_WRITE_DATA ( 0x0002 ) // file & pipe
+#endif /* !KERNEL_MODULE */
+
+#define FILE_DEVICE_IPFW 0x00654324
+#define IP_FW_BASE_CTL 0x840
+#define IP_FW_SETSOCKOPT \
+ CTL_CODE(FILE_DEVICE_IPFW, IP_FW_BASE_CTL + 1, METHOD_BUFFERED, FILE_WRITE_DATA)
+#define IP_FW_GETSOCKOPT \
+ CTL_CODE(FILE_DEVICE_IPFW, IP_FW_BASE_CTL + 2, METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+/*********************************
+* missing declarations in altq.c *
+**********************************/
+
+#define _IOWR(x,y,t) _IOW(x,y,t)
+
+/**********************************
+* missing declarations in ipfw2.c *
+***********************************/
+
+#define ICMP_UNREACH_NET 0 /* bad net */
+#define ICMP_UNREACH_HOST 1 /* bad host */
+#define ICMP_UNREACH_PROTOCOL 2 /* bad protocol */
+#define ICMP_UNREACH_PORT 3 /* bad port */
+#define ICMP_UNREACH_NEEDFRAG 4 /* IP_DF caused drop */
+#define ICMP_UNREACH_SRCFAIL 5 /* src route failed */
+#define ICMP_UNREACH_NET_UNKNOWN 6 /* unknown net */
+#define ICMP_UNREACH_HOST_UNKNOWN 7 /* unknown host */
+#define ICMP_UNREACH_ISOLATED 8 /* src host isolated */
+#define ICMP_UNREACH_NET_PROHIB 9 /* prohibited access */
+#define ICMP_UNREACH_HOST_PROHIB 10 /* ditto */
+#define ICMP_UNREACH_TOSNET 11 /* bad tos for net */
+#define ICMP_UNREACH_TOSHOST 12 /* bad tos for host */
+#define ICMP_UNREACH_FILTER_PROHIB 13 /* admin prohib */
+#define ICMP_UNREACH_HOST_PRECEDENCE 14 /* host prec vio. */
+#define ICMP_UNREACH_PRECEDENCE_CUTOFF 15 /* prec cutoff */
+
+#define __unused
+
+
+struct ether_addr;
+struct ether_addr * ether_aton(const char *a);
+
+/*********************************
+* missing declarations in ipv6.c *
+**********************************/
+
+struct hostent* gethostbyname2(const char *name, int af);
+
+
+/********************
+* windows wrappings *
+*********************/
+
+int my_socket(int domain, int ty, int proto);
+#define socket(_a, _b, _c) my_socket(_a, _b, _c)
+
+#endif /* _WIN32 */
+/*******************
+* SYSCTL emulation *
+********************/
+#if defined (_WIN32) || defined (EMULATE_SYSCTL)
+#define STRINGIFY(x) #x
+
+/* flag is set with the last 2 bits for access, as defined in glue.h
+ * and the rest for type
+ */
+enum {
+ SYSCTLTYPE_INT = 0,
+ SYSCTLTYPE_UINT,
+ SYSCTLTYPE_SHORT,
+ SYSCTLTYPE_USHORT,
+ SYSCTLTYPE_LONG,
+ SYSCTLTYPE_ULONG,
+ SYSCTLTYPE_STRING,
+};
+struct sysctlhead {
+ uint32_t blocklen; //total size of the entry
+ uint32_t namelen; //strlen(name) + '\0'
+ uint32_t flags; //type and access
+ uint32_t datalen;
+};
-/*
- * List of values used for set/getsockopt options.
- * The base value on FreeBSD is defined as a macro,
- * if not available we will use our own enum.
- * The TABLE_BASE value is used in the kernel.
+#ifdef _KERNEL
+
+#ifdef SYSCTL_NODE
+#undef SYSCTL_NODE
+#endif
+#define SYSCTL_NODE(a,b,c,d,e,f)
+#define SYSCTL_DECL(a)
+#define SYSCTL_VNET_PROC(a,b,c,d,e,f,g,h,i)
+
+#define GST_HARD_LIMIT 100
+
+/* In the module, GST is implemented as an array of
+ * sysctlentry, but while passing data to the userland
+ * pointers are useless, the buffer is actually made of:
+ * - sysctlhead (fixed size, containing lengths)
+ * - data (typically 32 bit)
+ * - name (zero-terminated and padded to mod4)
*/
-#ifndef IP_FW_TABLE_ADD
-#define _IPFW_SOCKOPT_BASE 100 /* 40 on freebsd */
-enum ipfw_msg_type {
- IP_FW_TABLE_ADD = _IPFW_SOCKOPT_BASE,
- IP_FW_TABLE_DEL,
- IP_FW_TABLE_FLUSH,
- IP_FW_TABLE_GETSIZE,
- IP_FW_TABLE_LIST,
- IP_FW_DYN_GET, /* new addition */
- /* IP_FW3 and IP_DUMMYNET3 are the new API */
- IP_FW3 = _IPFW_SOCKOPT_BASE + 8,
- IP_DUMMYNET3,
+struct sysctlentry {
+ struct sysctlhead head;
+ char* name;
+ void* data;
+};
- IP_FW_ADD = _IPFW_SOCKOPT_BASE + 10,
- IP_FW_DEL,
- IP_FW_FLUSH,
- IP_FW_ZERO,
- IP_FW_GET,
- IP_FW_RESETLOG,
+struct sysctltable {
+ int count; //number of valid tables
+ int totalsize; //total size of valid entries of al the valid tables
+ void* namebuffer; //a buffer for all chained names
+ struct sysctlentry entry[GST_HARD_LIMIT];
+};
- IP_FW_NAT_CFG,
- IP_FW_NAT_DEL,
- IP_FW_NAT_GET_CONFIG,
- IP_FW_NAT_GET_LOG,
+#ifdef SYSBEGIN
+#undef SYSBEGIN
+#endif
+#define SYSBEGIN(x) void sysctl_addgroup_##x() {
+#ifdef SYSEND
+#undef SYSEND
+#endif
+#define SYSEND }
- IP_DUMMYNET_CONFIGURE,
- IP_DUMMYNET_DEL ,
- IP_DUMMYNET_FLUSH,
- /* 63 is missing */
- IP_DUMMYNET_GET = _IPFW_SOCKOPT_BASE + 24,
- _IPFW_SOCKOPT_END
-};
-#endif /* IP_FW_TABLE_ADD */
+/* XXX remove duplication */
+#define SYSCTL_INT(a,b,c,d,e,f,g) \
+ sysctl_pushback(STRINGIFY(a) "." STRINGIFY(c) + 1, \
+ (d) | (SYSCTLTYPE_INT << 2), sizeof(*e), e)
+
+#define SYSCTL_VNET_INT(a,b,c,d,e,f,g) \
+ sysctl_pushback(STRINGIFY(a) "." STRINGIFY(c) + 1, \
+ (d) | (SYSCTLTYPE_INT << 2), sizeof(*e), e)
+
+#define SYSCTL_UINT(a,b,c,d,e,f,g) \
+ sysctl_pushback(STRINGIFY(a) "." STRINGIFY(c) + 1, \
+ (d) | (SYSCTLTYPE_UINT << 2), sizeof(*e), e)
+
+#define SYSCTL_LONG(a,b,c,d,e,f,g) \
+ sysctl_pushback(STRINGIFY(a) "." STRINGIFY(c) + 1, \
+ (d) | (SYSCTLTYPE_LONG << 2), sizeof(*e), e)
+
+#define SYSCTL_ULONG(a,b,c,d,e,f,g) \
+ sysctl_pushback(STRINGIFY(a) "." STRINGIFY(c) + 1, \
+ (d) | (SYSCTLTYPE_ULONG << 2), sizeof(*e), e)
+#define TUNABLE_INT(a,b)
+
+void keinit_GST(void);
+void keexit_GST(void);
+int kesysctl_emu_set(void* p, int l);
+int kesysctl_emu_get(struct sockopt* sopt);
+void sysctl_pushback(char* name, int flags, int datalen, void* data);
+
+#endif /* _KERNEL */
+
+int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp,
+ size_t newlen);
+#endif /* _WIN32" || EMULATE_SYSCTL */
+#ifdef _WIN32
+int do_cmd(int optname, void *optval, uintptr_t optlen);
+
+#endif /* _WIN32 */
#endif /* !_GLUE_H */
#
# $Id$
#
-# GNUMakefile to build the userland part of ipfw on Linux
+# GNUMakefile to build the userland part of ipfw on Linux and Windows
#
# enable extra debugging information
# Do not set with = or := so we can inherit from the caller
-$(warning Building userland ipfw for $(VER))
+XOSARCH := $(shell uname)
+OSARCH ?= $(XOSARCH)
+$(warning Building userland ipfw for $(VER) $(OSARCH))
+
+#TCC=c:/tesi/tcc
+
+# common flags
EXTRA_CFLAGS += -O1
-EXTRA_CFLAGS += -Wall -Werror
+EXTRA_CFLAGS += -Wall
EXTRA_CFLAGS += -include ../glue.h
EXTRA_CFLAGS += -I ./include_e -I ./include
+TARGET := ipfw
ifneq ($(VER),openwrt)
-OSARCH := $(shell uname)
ifeq ($(OSARCH),Linux)
EXTRA_CFLAGS += -D__BSD_VISIBLE
+ EXTRA_CFLAGS += -Werror
+else # must be Cygwin ?
+ifeq ($(TCC),)
+ EXTRA_CFLAGS += -I/cygdrive/c/WinDDK/7600.16385.0/inc/ddk
+ EXTRA_CFLAGS += -I .
+ EXTRA_CFLAGS += -pipe -Wall
else
- HAVE_NAT := $(shell grep O_NAT /usr/include/netinet/ip_fw.h)
- # EXTRA_CFLAGS += ...
+ # TCC points to the root of tcc tree
+ CC=$(TCC)/tcc.exe
+ EXTRA_CFLAGS += -DTCC -I..
+ EXTRA_CFLAGS += -I$(TCC)/include/winapi -I$(TCC)/include
+ EXTRA_CFLAGS += -nostdinc
+
+ EDIRS += arpa net netinet sys
+ EFILES += err.h grp.h netdb.h pwd.h sysexits.h
+ EFILES += arpa/inet.h
+ EFILES += net/if.h
+ EFILES += netinet/in.h netinet/in_systm.h netinet/ip.h
+ EFILES += netinet/ip_icmp.h
+ EFILES += sys/cdefs.h sys/wait.h
+ EFILES += sys/ioctl.h sys/socket.h
+endif
+ # EXTRA_CFLAGS += -D_WIN32 # see who defines it
+ EXTRA_CFLAGS += -Dsetsockopt=wnd_setsockopt
+ EXTRA_CFLAGS += -Dgetsockopt=wnd_getsockopt
+ EXTRA_CFLAGS += -DEMULATE_SYSCTL
+ EDIRS += net netinet
+ EFILES += net/ethernet.h net/route.h
+ EFILES += netinet/ether.h netinet/icmp6.h
+ EFILES += sys/sysctl.h
+ TARGET := ipfw.exe
endif
endif # !openwrt
CFLAGS += $(EXTRA_CFLAGS)
# Location of OS headers and libraries. After our stuff.
USRDIR?= /usr
-CFLAGS += -I$(USRDIR)/include
-LDFLAGS += -L$(USRDIR)/lib
-
-OBJS = ipfw2.o dummynet.o main.o ipv6.o altq.o qsort_r.o
-OBJS += expand_number.o humanize_number.o
-ifneq ($(HAVE_NAT),)
- OBJS += nat.o
- EXTRA_CFLAGS += -DHAVE_NAT
+ifeq ($(TCC),)
+ CFLAGS += -I$(USRDIR)/include
+ LDFLAGS += -L$(USRDIR)/lib
+else
+ LDFLAGS += -L. -lws2_32
endif
-OBJS += glue.o
-all: ipfw
- echo "VER is $(VER)"
+OBJS = ipfw2.o dummynet.o main.o ipv6.o qsort_r.o
+OBJS += expand_number.o humanize_number.o glue.o
+
+# we don't use ALTQ
+CFLAGS += -DNO_ALTQ
+#OBJS += altq.o
-ipfw: $(OBJS)
+all: $(TARGET)
+ echo "Done build for $(OSARCH) VER $(VER)"
+
+$(TARGET): $(OBJS)
$(CC) $(LDFLAGS) -o $@ $^
-$(OBJS) : ipfw2.h ../glue.h include/netinet include_e
+$(OBJS) : ipfw2.h ../glue.h include_e
# support to create empty dirs and files in include_e/
# EDIRS is the list of directories, EFILES is the list of files.
-
-EDIRS = sys
+EDIRS += sys netinet
+EFILES += sys/sockio.h libutil.h
-EFILES = sys/sockio.h libutil.h
M ?= $(shell pwd)
include_e:
-@rm -rf $(M)/include_e opt_*
-@mkdir -p $(M)/include_e
-@(cd $(M)/include_e; mkdir -p $(EDIRS); touch $(EFILES) )
-
-include/netinet:
- -@rm -rf include/netinet
- -@mkdir -p include/netinet
- -(cd include/netinet; \
+ -@(cd $(M)/include_e/netinet; \
for i in ip_fw.h ip_dummynet.h tcp.h; do \
- ln -s ../../../dummynet2/include/netinet/$$i; done; )
+ cp ../../../dummynet2/include/netinet/$$i .; done; )
clean distclean:
- -rm -f $(OBJS) ipfw
- -rm -rf include/netinet/
+ -rm -f $(OBJS) $(TARGET)
+ -rm -rf include/netinet/ include_e
+++ /dev/null
-/*
- * Copyright (c) 2002-2003 Luigi Rizzo
- * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
- * Copyright (c) 1994 Ugen J.S.Antsilevich
- *
- * Idea and grammar partially left from:
- * Copyright (c) 1993 Daniel Boulet
- *
- * Redistribution and use in source forms, with and without modification,
- * are permitted provided that this entire comment appears intact.
- *
- * Redistribution in binary form may occur without any restrictions.
- * Obviously, it would be nice if you gave credit where credit is due
- * but requiring it would be too onerous.
- *
- * This software is provided ``AS IS'' without any warranties of any kind.
- *
- * NEW command line interface for IP firewall facility
- *
- * $FreeBSD: head/sbin/ipfw/altq.c 187983 2009-02-01 16:00:49Z luigi $
- *
- * altq interface
- */
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/sockio.h>
-
-#include "ipfw2.h"
-
-#include <err.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sysexits.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include <net/if.h> /* IFNAMSIZ */
-#include <net/pfvar.h>
-#include <netinet/in.h>
-#include <netinet/ip_fw.h>
-
-/*
- * Map between current altq queue id numbers and names.
- */
-static TAILQ_HEAD(, pf_altq) altq_entries =
- TAILQ_HEAD_INITIALIZER(altq_entries);
-
-void
-altq_set_enabled(int enabled)
-{
- int pffd;
-
- pffd = open("/dev/pf", O_RDWR);
- if (pffd == -1)
- err(EX_UNAVAILABLE,
- "altq support opening pf(4) control device");
- if (enabled) {
- if (ioctl(pffd, DIOCSTARTALTQ) != 0 && errno != EEXIST)
- err(EX_UNAVAILABLE, "enabling altq");
- } else {
- if (ioctl(pffd, DIOCSTOPALTQ) != 0 && errno != ENOENT)
- err(EX_UNAVAILABLE, "disabling altq");
- }
- close(pffd);
-}
-
-static void
-altq_fetch(void)
-{
- struct pfioc_altq pfioc;
- struct pf_altq *altq;
- int pffd;
- unsigned int mnr;
- static int altq_fetched = 0;
-
- if (altq_fetched)
- return;
- altq_fetched = 1;
- pffd = open("/dev/pf", O_RDONLY);
- if (pffd == -1) {
- warn("altq support opening pf(4) control device");
- return;
- }
- bzero(&pfioc, sizeof(pfioc));
- if (ioctl(pffd, DIOCGETALTQS, &pfioc) != 0) {
- warn("altq support getting queue list");
- close(pffd);
- return;
- }
- mnr = pfioc.nr;
- for (pfioc.nr = 0; pfioc.nr < mnr; pfioc.nr++) {
- if (ioctl(pffd, DIOCGETALTQ, &pfioc) != 0) {
- if (errno == EBUSY)
- break;
- warn("altq support getting queue list");
- close(pffd);
- return;
- }
- if (pfioc.altq.qid == 0)
- continue;
- altq = safe_calloc(1, sizeof(*altq));
- *altq = pfioc.altq;
- TAILQ_INSERT_TAIL(&altq_entries, altq, entries);
- }
- close(pffd);
-}
-
-u_int32_t
-altq_name_to_qid(const char *name)
-{
- struct pf_altq *altq;
-
- altq_fetch();
- TAILQ_FOREACH(altq, &altq_entries, entries)
- if (strcmp(name, altq->qname) == 0)
- break;
- if (altq == NULL)
- errx(EX_DATAERR, "altq has no queue named `%s'", name);
- return altq->qid;
-}
-
-static const char *
-altq_qid_to_name(u_int32_t qid)
-{
- struct pf_altq *altq;
-
- altq_fetch();
- TAILQ_FOREACH(altq, &altq_entries, entries)
- if (qid == altq->qid)
- break;
- if (altq == NULL)
- return NULL;
- return altq->qname;
-}
-
-void
-print_altq_cmd(ipfw_insn_altq *altqptr)
-{
- if (altqptr) {
- const char *qname;
-
- qname = altq_qid_to_name(altqptr->qid);
- if (qname == NULL)
- printf(" altq ?<%u>", altqptr->qid);
- else
- printf(" altq %s", qname);
- }
-}
/*
- * Copyright (c) 2002-2003 Luigi Rizzo
- * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
- * Copyright (c) 1994 Ugen J.S.Antsilevich
- *
- * Idea and grammar partially left from:
- * Copyright (c) 1993 Daniel Boulet
+ * Copyright (c) 2002-2003,2010 Luigi Rizzo
*
* Redistribution and use in source forms, with and without modification,
* are permitted provided that this entire comment appears intact.
*
* This software is provided ``AS IS'' without any warranties of any kind.
*
- * NEW command line interface for IP firewall facility
- *
- * $FreeBSD: head/sbin/ipfw/dummynet.c 187769 2009-01-27 11:06:59Z luigi $
+ * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/dummynet.c 203321 2010-01-31 21:39:25Z luigi $
*
* dummynet support
*/
#include <sys/types.h>
#include <sys/socket.h>
-#include <sys/queue.h>
/* XXX there are several sysctl leftover here */
#include <sys/sysctl.h>
#include <netinet/ip_dummynet.h>
#include <arpa/inet.h> /* inet_ntoa */
+
static struct _s_x dummynet_params[] = {
{ "plr", TOK_PLR },
{ "noerror", TOK_NOERROR },
{ "src-port", TOK_SRCPORT },
{ "proto", TOK_PROTO },
{ "weight", TOK_WEIGHT },
+ { "lmax", TOK_LMAX },
+ { "maxlen", TOK_LMAX },
{ "all", TOK_ALL },
- { "mask", TOK_MASK },
+ { "mask", TOK_MASK }, /* alias for both */
+ { "sched_mask", TOK_SCHED_MASK },
+ { "flow_mask", TOK_FLOW_MASK },
{ "droptail", TOK_DROPTAIL },
{ "red", TOK_RED },
{ "gred", TOK_GRED },
{ "bw", TOK_BW },
{ "bandwidth", TOK_BW },
{ "delay", TOK_DELAY },
+ { "link", TOK_LINK },
{ "pipe", TOK_PIPE },
{ "queue", TOK_QUEUE },
+ { "flowset", TOK_FLOWSET },
+ { "sched", TOK_SCHED },
+ { "pri", TOK_PRI },
+ { "priority", TOK_PRI },
+ { "type", TOK_TYPE },
{ "flow-id", TOK_FLOWID},
{ "dst-ipv6", TOK_DSTIP6},
{ "dst-ip6", TOK_DSTIP6},
{ "src-ipv6", TOK_SRCIP6},
{ "src-ip6", TOK_SRCIP6},
- { "profile", TOK_PIPE_PROFILE},
+ { "profile", TOK_PROFILE},
{ "burst", TOK_BURST},
{ "dummynet-params", TOK_NULL },
{ NULL, 0 } /* terminator */
};
+#define O_NEXT(p, len) ((void *)((char *)p + len))
+
+static void
+oid_fill(struct dn_id *oid, int len, int type, uintptr_t id)
+{
+ oid->len = len;
+ oid->type = type;
+ oid->subtype = 0;
+ oid->id = id;
+}
+
+/* make room in the buffer and move the pointer forward */
+static void *
+o_next(struct dn_id **o, int len, int type)
+{
+ struct dn_id *ret = *o;
+ oid_fill(ret, len, type, 0);
+ *o = O_NEXT(*o, len);
+ return ret;
+}
+
+#if 0
static int
sort_q(void *arg, const void *pa, const void *pb)
{
res = 1;
return (int)(rev ? res : -res);
}
+#endif
+/* print a mask and header for the subsequent list of flows */
static void
-list_queues(struct dn_flow_set *fs, struct dn_flow_queue *q)
+print_mask(struct ipfw_flow_id *id)
{
- int l;
- int index_printed, indexes = 0;
- char buff[255];
- struct protoent *pe;
-
- if (fs->rq_elements == 0)
- return;
-
- if (co.do_sort != 0)
- qsort_r(q, fs->rq_elements, sizeof *q, NULL, sort_q);
+ if (!IS_IP6_FLOW_ID(id)) {
+ printf(" "
+ "mask: %s 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
+ id->extra ? "queue," : "",
+ id->proto,
+ id->src_ip, id->src_port,
+ id->dst_ip, id->dst_port);
+
+ printf("BKT Prot ___Source IP/port____ "
+ "____Dest. IP/port____ "
+ "Tot_pkt/bytes Pkt/Byte Drp\n");
+ } else {
+ char buf[255];
+ printf("\n mask: %sproto: 0x%02x, flow_id: 0x%08x, ",
+ id->extra ? "queue," : "",
+ id->proto, id->flow_id6);
+ inet_ntop(AF_INET6, &(id->src_ip6), buf, sizeof(buf));
+ printf("%s/0x%04x -> ", buf, id->src_port);
+ inet_ntop(AF_INET6, &(id->dst_ip6), buf, sizeof(buf));
+ printf("%s/0x%04x\n", buf, id->dst_port);
+
+ printf("BKT ___Prot___ _flow-id_ "
+ "______________Source IPv6/port_______________ "
+ "_______________Dest. IPv6/port_______________ "
+ "Tot_pkt/bytes Pkt/Byte Drp\n");
+ }
+}
- /* Print IPv4 flows */
- index_printed = 0;
- for (l = 0; l < fs->rq_elements; l++) {
- struct in_addr ina;
+static void
+list_flow(struct dn_flow *ni)
+{
+ char buff[255];
+ struct protoent *pe = NULL;
+ struct in_addr ina;
+ struct ipfw_flow_id *id = &ni->fid;
+ pe = getprotobynumber(id->proto);
/* XXX: Should check for IPv4 flows */
- if (IS_IP6_FLOW_ID(&(q[l].id)))
- continue;
-
- if (!index_printed) {
- index_printed = 1;
- if (indexes > 0) /* currently a no-op */
- printf("\n");
- indexes++;
- printf(" "
- "mask: 0x%02x 0x%08x/0x%04x -> 0x%08x/0x%04x\n",
- fs->flow_mask.proto,
- fs->flow_mask.src_ip, fs->flow_mask.src_port,
- fs->flow_mask.dst_ip, fs->flow_mask.dst_port);
-
- printf("BKT Prot ___Source IP/port____ "
- "____Dest. IP/port____ "
- "Tot_pkt/bytes Pkt/Byte Drp\n");
- }
-
- printf("%3d ", q[l].hash_slot);
- pe = getprotobynumber(q[l].id.proto);
+ printf("%3u%c", (ni->oid.id) & 0xff,
+ id->extra ? '*' : ' ');
+ if (!IS_IP6_FLOW_ID(id)) {
if (pe)
printf("%-4s ", pe->p_name);
else
- printf("%4u ", q[l].id.proto);
- ina.s_addr = htonl(q[l].id.src_ip);
+ printf("%4u ", id->proto);
+ ina.s_addr = htonl(id->src_ip);
printf("%15s/%-5d ",
- inet_ntoa(ina), q[l].id.src_port);
- ina.s_addr = htonl(q[l].id.dst_ip);
+ inet_ntoa(ina), id->src_port);
+ ina.s_addr = htonl(id->dst_ip);
printf("%15s/%-5d ",
- inet_ntoa(ina), q[l].id.dst_port);
- printf("%4llu %8llu %2u %4u %3u\n",
- align_uint64(&q[l].tot_pkts),
- align_uint64(&q[l].tot_bytes),
- q[l].len, q[l].len_bytes, q[l].drops);
- if (co.verbose)
- printf(" S %20llu F %20llu\n",
- align_uint64(&q[l].S), align_uint64(&q[l].F));
- }
-
- /* Print IPv6 flows */
- index_printed = 0;
- for (l = 0; l < fs->rq_elements; l++) {
- if (!IS_IP6_FLOW_ID(&(q[l].id)))
- continue;
-
- if (!index_printed) {
- index_printed = 1;
- if (indexes > 0)
- printf("\n");
- indexes++;
- printf("\n mask: proto: 0x%02x, flow_id: 0x%08x, ",
- fs->flow_mask.proto, fs->flow_mask.flow_id6);
- inet_ntop(AF_INET6, &(fs->flow_mask.src_ip6),
- buff, sizeof(buff));
- printf("%s/0x%04x -> ", buff, fs->flow_mask.src_port);
- inet_ntop( AF_INET6, &(fs->flow_mask.dst_ip6),
- buff, sizeof(buff) );
- printf("%s/0x%04x\n", buff, fs->flow_mask.dst_port);
-
- printf("BKT ___Prot___ _flow-id_ "
- "______________Source IPv6/port_______________ "
- "_______________Dest. IPv6/port_______________ "
- "Tot_pkt/bytes Pkt/Byte Drp\n");
- }
- printf("%3d ", q[l].hash_slot);
- pe = getprotobynumber(q[l].id.proto);
+ inet_ntoa(ina), id->dst_port);
+ } else {
+ /* Print IPv6 flows */
if (pe != NULL)
printf("%9s ", pe->p_name);
else
- printf("%9u ", q[l].id.proto);
- printf("%7d %39s/%-5d ", q[l].id.flow_id6,
- inet_ntop(AF_INET6, &(q[l].id.src_ip6), buff, sizeof(buff)),
- q[l].id.src_port);
+ printf("%9u ", id->proto);
+ printf("%7d %39s/%-5d ", id->flow_id6,
+ inet_ntop(AF_INET6, &(id->src_ip6), buff, sizeof(buff)),
+ id->src_port);
printf(" %39s/%-5d ",
- inet_ntop(AF_INET6, &(q[l].id.dst_ip6), buff, sizeof(buff)),
- q[l].id.dst_port);
- printf(" %4llu %8llu %2u %4u %3u\n",
- align_uint64(&q[l].tot_pkts),
- align_uint64(&q[l].tot_bytes),
- q[l].len, q[l].len_bytes, q[l].drops);
- if (co.verbose)
- printf(" S %20llu F %20llu\n",
- align_uint64(&q[l].S),
- align_uint64(&q[l].F));
+ inet_ntop(AF_INET6, &(id->dst_ip6), buff, sizeof(buff)),
+ id->dst_port);
}
+
+ /* Tcc relies on msvcrt.dll for printf, and
+ * it does not support ANSI %llu syntax
+ */
+#ifndef TCC
+ printf("%4llu %8llu %2u %4u %3u\n",
+ align_uint64(&ni->tot_pkts),
+ align_uint64(&ni->tot_bytes),
+ ni->length, ni->len_bytes, ni->drops);
+#else
+ /* XXX This should be printed correctly, but for some
+ * weird reason, it is not. Making a printf for each
+ * value is a workaround, until we don't undestand what's wrong
+ */
+ /*printf("%4I64u %8I64u %2u %4u %3u\n",
+ align_uint64(&ni->tot_pkts),
+ align_uint64(&ni->tot_bytes),
+ ni->length, ni->len_bytes, ni->drops);*/
+
+ printf("%4I64u ",align_uint64(&ni->tot_pkts));
+ printf("%8I64u ",align_uint64(&ni->tot_bytes));
+ printf("%2u ",ni->length);
+ printf("%4u ",ni->len_bytes);
+ printf("%3u\n",ni->drops);
+#endif
}
static void
-print_flowset_parms(struct dn_flow_set *fs, char *prefix)
+print_flowset_parms(struct dn_fs *fs, char *prefix)
{
int l;
char qs[30];
char red[90]; /* Display RED parameters */
l = fs->qsize;
- if (fs->flags_fs & DN_QSIZE_IS_BYTES) {
+ if (fs->flags & DN_QSIZE_BYTES) {
if (l >= 8192)
sprintf(qs, "%d KB", l / 1024);
else
sprintf(plr, "plr %f", 1.0 * fs->plr / (double)(0x7fffffff));
else
plr[0] = '\0';
- if (fs->flags_fs & DN_IS_RED) /* RED parameters */
+
+ if (fs->flags & DN_IS_RED) /* RED parameters */
sprintf(red,
- "\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
- (fs->flags_fs & DN_IS_GENTLE_RED) ? 'G' : ' ',
+ "\n\t %cRED w_q %f min_th %d max_th %d max_p %f",
+ (fs->flags & DN_IS_GENTLE_RED) ? 'G' : ' ',
1.0 * fs->w_q / (double)(1 << SCALE_RED),
- SCALE_VAL(fs->min_th),
- SCALE_VAL(fs->max_th),
+ fs->min_th,
+ fs->max_th,
1.0 * fs->max_p / (double)(1 << SCALE_RED));
else
sprintf(red, "droptail");
- printf("%s %s%s %d queues (%d buckets) %s\n",
- prefix, qs, plr, fs->rq_elements, fs->rq_size, red);
+ if (prefix[0]) {
+ printf("%s %s%s %d queues (%d buckets) %s\n",
+ prefix, qs, plr, fs->oid.id, fs->buckets, red);
+ prefix[0] = '\0';
+ } else {
+ printf("q%05d %s%s %d flows (%d buckets) sched %d "
+ "weight %d lmax %d pri %d %s\n",
+ fs->fs_nr, qs, plr, fs->oid.id, fs->buckets,
+ fs->sched_nr, fs->par[0], fs->par[1], fs->par[2], red);
+ if (fs->flags & DN_HAVE_MASK)
+ print_mask(&fs->flow_mask);
+ }
}
static void
-print_extra_delay_parms(struct dn_pipe *p)
+print_extra_delay_parms(struct dn_profile *p)
{
double loss;
if (p->samples_no <= 0)
p->name, loss, p->samples_no);
}
-void
-ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[])
+static void
+flush_buf(char *buf)
{
- int rulenum;
- void *next = data;
- struct dn_pipe *p = (struct dn_pipe *) data;
- struct dn_flow_set *fs;
- struct dn_flow_queue *q;
- int l;
-
- if (ac > 0)
- rulenum = strtoul(*av++, NULL, 10);
- else
- rulenum = 0;
- for (; nbytes >= sizeof *p; p = (struct dn_pipe *)next) {
- double b = p->bandwidth;
- char buf[30];
- char prefix[80];
- char burst[5 + 7];
-
- if (SLIST_NEXT(p, next) != (struct dn_pipe *)DN_IS_PIPE)
- break; /* done with pipes, now queues */
-
- /*
- * compute length, as pipe have variable size
- */
- l = sizeof(*p) + p->fs.rq_elements * sizeof(*q);
- next = (char *)p + l;
- nbytes -= l;
-
- if ((rulenum != 0 && rulenum != p->pipe_nr) || co.do_pipe == 2)
- continue;
-
- /*
- * Print rate (or clocking interface)
- */
- if (p->if_name[0] != '\0')
- sprintf(buf, "%s", p->if_name);
- else if (b == 0)
- sprintf(buf, "unlimited");
- else if (b >= 1000000)
- sprintf(buf, "%7.3f Mbit/s", b/1000000);
- else if (b >= 1000)
- sprintf(buf, "%7.3f Kbit/s", b/1000);
- else
- sprintf(buf, "%7.3f bit/s ", b);
-
- sprintf(prefix, "%05d: %s %4d ms ",
- p->pipe_nr, buf, p->delay);
-
- print_flowset_parms(&(p->fs), prefix);
-
- if (humanize_number(burst, sizeof(burst), p->burst,
- "Byte", HN_AUTOSCALE, 0) < 0 || co.verbose)
- printf("\t burst: %ju Byte\n", p->burst);
- else
- printf("\t burst: %s\n", burst);
-
- print_extra_delay_parms(p);
-
- q = (struct dn_flow_queue *)(p+1);
- list_queues(&(p->fs), q);
+ if (buf[0])
+ printf("%s\n", buf);
+ buf[0] = '\0';
+}
+
+/*
+ * generic list routine. We expect objects in a specific order, i.e.
+ * PIPES AND SCHEDULERS:
+ * link; scheduler; internal flowset if any; instances
+ * we can tell a pipe from the number.
+ *
+ * FLOWSETS:
+ * flowset; queues;
+ * link i (int queue); scheduler i; si(i) { flowsets() : queues }
+ */
+static void
+list_pipes(struct dn_id *oid, struct dn_id *end)
+{
+ char buf[160]; /* pending buffer */
+ buf[0] = '\0';
+
+ for (; oid != end; oid = O_NEXT(oid, oid->len)) {
+ if (oid->len < sizeof(*oid))
+ errx(1, "invalid oid len %d\n", oid->len);
+
+ switch (oid->type) {
+ default:
+ flush_buf(buf);
+ printf("unrecognized object %d size %d\n", oid->type, oid->len);
+ break;
+ case DN_TEXT: /* list of attached flowsets */
+ {
+ int i, l;
+ struct {
+ struct dn_id id;
+ uint32_t p[0];
+ } *d = (void *)oid;
+ l = (oid->len - sizeof(*oid))/sizeof(d->p[0]);
+ if (l == 0)
+ break;
+ printf(" Children flowsets: ");
+ for (i = 0; i < l; i++)
+ printf("%u ", d->p[i]);
+ printf("\n");
+ break;
+ }
+ case DN_CMD_GET:
+ if (co.verbose)
+ printf("answer for cmd %d, len %d\n", oid->type, oid->id);
+ break;
+ case DN_SCH: {
+ struct dn_sch *s = (struct dn_sch *)oid;
+ flush_buf(buf);
+ printf(" sched %d type %s flags 0x%x %d buckets %d active\n",
+ s->sched_nr,
+ s->name, s->flags, s->buckets, s->oid.id);
+ if (s->flags & DN_HAVE_MASK)
+ print_mask(&s->sched_mask);
+ }
+ break;
+
+ case DN_FLOW:
+ list_flow((struct dn_flow *)oid);
+ break;
+
+ case DN_LINK: {
+ struct dn_link *p = (struct dn_link *)oid;
+ double b = p->bandwidth;
+ char bwbuf[30];
+ char burst[5 + 7];
+
+ /* This starts a new object so flush buffer */
+ flush_buf(buf);
+ /* data rate */
+ if (b == 0)
+ sprintf(bwbuf, "unlimited ");
+ else if (b >= 1000000)
+ sprintf(bwbuf, "%7.3f Mbit/s", b/1000000);
+ else if (b >= 1000)
+ sprintf(bwbuf, "%7.3f Kbit/s", b/1000);
+ else
+ sprintf(bwbuf, "%7.3f bit/s ", b);
+
+ if (humanize_number(burst, sizeof(burst), p->burst,
+ "", HN_AUTOSCALE, 0) < 0 || co.verbose)
+ sprintf(burst, "%d", (int)p->burst);
+ sprintf(buf, "%05d: %s %4d ms burst %s",
+ p->link_nr % DN_MAX_ID, bwbuf, p->delay, burst);
+ }
+ break;
+
+ case DN_FS:
+ print_flowset_parms((struct dn_fs *)oid, buf);
+ break;
+ case DN_PROFILE:
+ flush_buf(buf);
+ print_extra_delay_parms((struct dn_profile *)oid);
}
- for (fs = next; nbytes >= sizeof *fs; fs = next) {
- char prefix[80];
-
- if (SLIST_NEXT(fs, next) != (struct dn_flow_set *)DN_IS_QUEUE)
- break;
- l = sizeof(*fs) + fs->rq_elements * sizeof(*q);
- next = (char *)fs + l;
- nbytes -= l;
-
- if (rulenum != 0 && ((rulenum != fs->fs_nr && co.do_pipe == 2) ||
- (rulenum != fs->parent_nr && co.do_pipe == 1))) {
- continue;
- }
-
- q = (struct dn_flow_queue *)(fs+1);
- sprintf(prefix, "q%05d: weight %d pipe %d ",
- fs->fs_nr, fs->weight, fs->parent_nr);
- print_flowset_parms(fs, prefix);
- list_queues(fs, q);
+ flush_buf(buf); // XXX does it really go here ?
}
}
/*
- * Delete pipe or queue i
+ * Delete pipe, queue or scheduler i
*/
int
-ipfw_delete_pipe(int pipe_or_queue, int i)
+ipfw_delete_pipe(int do_pipe, int i)
{
- struct dn_pipe p;
-
- memset(&p, 0, sizeof p);
- if (pipe_or_queue == 1)
- p.pipe_nr = i; /* pipe */
- else
- p.fs.fs_nr = i; /* queue */
- i = do_cmd(IP_DUMMYNET_DEL, &p, sizeof p);
+ struct {
+ struct dn_id oid;
+ uintptr_t a[1]; /* add more if we want a list */
+ } cmd;
+ oid_fill((void *)&cmd, sizeof(cmd), DN_CMD_DELETE, DN_API_VERSION);
+ cmd.oid.subtype = (do_pipe == 1) ? DN_LINK :
+ ( (do_pipe == 2) ? DN_FS : DN_SCH);
+ cmd.a[0] = i;
+ i = do_cmd(IP_DUMMYNET3, &cmd, cmd.oid.len);
if (i) {
i = 1;
warn("rule %u: setsockopt(IP_DUMMYNET_DEL)", i);
* The empirical curve may have both vertical and horizontal lines.
* Vertical lines represent constant delay for a range of
* probabilities; horizontal lines correspond to a discontinuty
- * in the delay distribution: the pipe will use the largest delay
+ * in the delay distribution: the link will use the largest delay
* for a given probability.
*
* To pass the curve to dummynet, we must store the parameters
read_bandwidth(char *arg, int *bandwidth, char *if_name, int namelen)
{
if (*bandwidth != -1)
- warn("duplicate token, override bandwidth value!");
+ warnx("duplicate token, override bandwidth value!");
if (arg[0] >= 'a' && arg[0] <= 'z') {
+ if (!if_name) {
+ errx(1, "no if support");
+ }
if (namelen >= IFNAMSIZ)
warn("interface name truncated");
namelen--;
if (*end == 'K' || *end == 'k') {
end++;
bw *= 1000;
- } else if (*end == 'M') {
+ } else if (*end == 'M' || *end == 'm') {
end++;
bw *= 1000000;
}
if ((*end == 'B' &&
- _substrcmp2(end, "Bi", "Bit/s") != 0) ||
+ _substrcmp2(end, "Bi", "Bit/s") != 0) ||
_substrcmp2(end, "by", "bytes") == 0)
bw *= 8;
errx(EX_DATAERR, "bandwidth too large");
*bandwidth = bw;
- if_name[0] = '\0';
+ if (if_name)
+ if_name[0] = '\0';
}
}
#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
static void
-load_extra_delays(const char *filename, struct dn_pipe *p)
+load_extra_delays(const char *filename, struct dn_profile *p,
+ struct dn_link *link)
{
char line[ED_MAX_LINE_LEN];
FILE *f;
struct point points[ED_MAX_SAMPLES_NO];
int points_no = 0;
+ /* XXX link never NULL? */
+ p->link_nr = link->link_nr;
+
profile_name[0] = '\0';
f = fopen(filename, "r");
if (f == NULL)
ED_MAX_SAMPLES_NO);
do_points = 0;
} else if (!strcasecmp(name, ED_TOK_BW)) {
- read_bandwidth(arg, &p->bandwidth, p->if_name, sizeof(p->if_name));
+ char buf[IFNAMSIZ];
+ read_bandwidth(arg, &link->bandwidth, buf, sizeof(buf));
} else if (!strcasecmp(name, ED_TOK_LOSS)) {
if (loss != -1.0)
errx(ED_EFMT("duplicated token: %s"), name);
double y2 = points[i+1].prob * samples;
double x2 = points[i+1].delay;
- int index = y1;
+ int ix = y1;
int stop = y2;
if (x1 == x2) {
- for (; index<stop; ++index)
- p->samples[index] = x1;
+ for (; ix<stop; ++ix)
+ p->samples[ix] = x1;
} else {
double m = (y2-y1)/(x2-x1);
double c = y1 - m*x1;
- for (; index<stop ; ++index)
- p->samples[index] = (index - c)/m;
+ for (; ix<stop ; ++ix)
+ p->samples[ix] = (ix - c)/m;
}
}
p->samples_no = samples;
strncpy(p->name, profile_name, sizeof(p->name));
}
+/*
+ * configuration of pipes, schedulers, flowsets.
+ * When we configure a new scheduler, an empty pipe is created, so:
+ *
+ * do_pipe = 1 -> "pipe N config ..." only for backward compatibility
+ * sched N+Delta type fifo sched_mask ...
+ * pipe N+Delta <parameters>
+ * flowset N+Delta pipe N+Delta (no parameters)
+ * sched N type wf2q+ sched_mask ...
+ * pipe N <parameters>
+ *
+ * do_pipe = 2 -> flowset N config
+ * flowset N parameters
+ *
+ * do_pipe = 3 -> sched N config
+ * sched N parameters (default no pipe)
+ * optional Pipe N config ...
+ * pipe ==>
+ */
void
ipfw_config_pipe(int ac, char **av)
{
- int samples[ED_MAX_SAMPLES_NO];
- struct dn_pipe p;
- int i;
+ int i, j;
char *end;
void *par = NULL;
-
- memset(&p, 0, sizeof p);
- p.bandwidth = -1;
+ struct dn_id *buf, *base;
+ struct dn_sch *sch = NULL;
+ struct dn_link *p = NULL;
+ struct dn_fs *fs = NULL;
+ struct dn_profile *pf = NULL;
+ struct ipfw_flow_id *mask = NULL;
+ int lmax;
+ uint32_t _foo = 0, *flags = &_foo , *buckets = &_foo;
+
+ /*
+ * allocate space for 1 header,
+ * 1 scheduler, 1 link, 1 flowset, 1 profile
+ */
+ lmax = sizeof(struct dn_id); /* command header */
+ lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
+ sizeof(struct dn_fs) + sizeof(struct dn_profile);
av++; ac--;
/* Pipe number */
if (ac && isdigit(**av)) {
i = atoi(*av); av++; ac--;
- if (co.do_pipe == 1)
- p.pipe_nr = i;
- else
- p.fs.fs_nr = i;
+ } else
+ i = -1;
+ if (i <= 0)
+ errx(EX_USAGE, "need a pipe/flowset/sched number");
+ base = buf = safe_calloc(1, lmax);
+ /* all commands start with a 'CONFIGURE' and a version */
+ o_next(&buf, sizeof(struct dn_id), DN_CMD_CONFIG);
+ base->id = DN_API_VERSION;
+
+ switch (co.do_pipe) {
+ case 1: /* "pipe N config ..." */
+ /* Allocate space for the WF2Q+ scheduler, its link
+ * and the FIFO flowset. Set the number, but leave
+ * the scheduler subtype and other parameters to 0
+ * so the kernel will use appropriate defaults.
+ * XXX todo: add a flag to record if a parameter
+ * is actually configured.
+ * If we do a 'pipe config' mask -> sched_mask.
+ * The FIFO scheduler and link are derived from the
+ * WF2Q+ one in the kernel.
+ */
+ sch = o_next(&buf, sizeof(*sch), DN_SCH);
+ p = o_next(&buf, sizeof(*p), DN_LINK);
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+
+ sch->sched_nr = i;
+ sch->oid.subtype = 0; /* defaults to WF2Q+ */
+ mask = &sch->sched_mask;
+ flags = &sch->flags;
+ buckets = &sch->buckets;
+ *flags |= DN_PIPE_CMD;
+
+ p->link_nr = i;
+
+ /* This flowset is only for the FIFO scheduler */
+ fs->fs_nr = i + 2*DN_MAX_ID;
+ fs->sched_nr = i + DN_MAX_ID;
+ break;
+
+ case 2: /* "queue N config ... " */
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+ fs->fs_nr = i;
+ mask = &fs->flow_mask;
+ flags = &fs->flags;
+ buckets = &fs->buckets;
+ break;
+
+ case 3: /* "sched N config ..." */
+ sch = o_next(&buf, sizeof(*sch), DN_SCH);
+ fs = o_next(&buf, sizeof(*fs), DN_FS);
+ sch->sched_nr = i;
+ mask = &sch->sched_mask;
+ flags = &sch->flags;
+ buckets = &sch->buckets;
+ /* fs is used only with !MULTIQUEUE schedulers */
+ fs->fs_nr = i + DN_MAX_ID;
+ fs->sched_nr = i;
+ break;
}
+ /* set to -1 those fields for which we want to reuse existing
+ * values from the kernel.
+ * Also, *_nr and subtype = 0 mean reuse the value from the kernel.
+ * XXX todo: support reuse of the mask.
+ */
+ if (p)
+ p->bandwidth = -1;
+ for (j = 0; j < sizeof(fs->par)/sizeof(fs->par[0]); j++)
+ fs->par[j] = -1;
while (ac > 0) {
double d;
int tok = match_token(dummynet_params, *av);
switch(tok) {
case TOK_NOERROR:
- p.fs.flags_fs |= DN_NOERROR;
+ NEED(fs, "noerror is only for pipes");
+ fs->flags |= DN_NOERROR;
break;
case TOK_PLR:
+ NEED(fs, "plr is only for pipes");
NEED1("plr needs argument 0..1\n");
d = strtod(av[0], NULL);
if (d > 1)
d = 1;
else if (d < 0)
d = 0;
- p.fs.plr = (int)(d*0x7fffffff);
+ fs->plr = (int)(d*0x7fffffff);
ac--; av++;
break;
case TOK_QUEUE:
+ NEED(fs, "queue is only for pipes or flowsets");
NEED1("queue needs queue size\n");
end = NULL;
- p.fs.qsize = strtoul(av[0], &end, 0);
+ fs->qsize = strtoul(av[0], &end, 0);
if (*end == 'K' || *end == 'k') {
- p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
- p.fs.qsize *= 1024;
+ fs->flags |= DN_QSIZE_BYTES;
+ fs->qsize *= 1024;
} else if (*end == 'B' ||
_substrcmp2(end, "by", "bytes") == 0) {
- p.fs.flags_fs |= DN_QSIZE_IS_BYTES;
+ fs->flags |= DN_QSIZE_BYTES;
}
ac--; av++;
break;
case TOK_BUCKETS:
+ NEED(fs, "buckets is only for pipes or flowsets");
NEED1("buckets needs argument\n");
- p.fs.rq_size = strtoul(av[0], NULL, 0);
+ *buckets = strtoul(av[0], NULL, 0);
ac--; av++;
break;
+ case TOK_FLOW_MASK:
+ case TOK_SCHED_MASK:
case TOK_MASK:
+ NEED(mask, "tok_mask");
NEED1("mask needs mask specifier\n");
/*
* per-flow queue, mask is dst_ip, dst_port,
*/
par = NULL;
- bzero(&p.fs.flow_mask, sizeof(p.fs.flow_mask));
+ bzero(mask, sizeof(*mask));
end = NULL;
while (ac >= 1) {
case TOK_ALL:
/*
* special case, all bits significant
+ * except 'extra' (the queue number)
*/
- p.fs.flow_mask.dst_ip = ~0;
- p.fs.flow_mask.src_ip = ~0;
- p.fs.flow_mask.dst_port = ~0;
- p.fs.flow_mask.src_port = ~0;
- p.fs.flow_mask.proto = ~0;
- n2mask(&(p.fs.flow_mask.dst_ip6), 128);
- n2mask(&(p.fs.flow_mask.src_ip6), 128);
- p.fs.flow_mask.flow_id6 = ~0;
- p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
+ mask->dst_ip = ~0;
+ mask->src_ip = ~0;
+ mask->dst_port = ~0;
+ mask->src_port = ~0;
+ mask->proto = ~0;
+ n2mask(&mask->dst_ip6, 128);
+ n2mask(&mask->src_ip6, 128);
+ mask->flow_id6 = ~0;
+ *flags |= DN_HAVE_MASK;
+ goto end_mask;
+
+ case TOK_QUEUE:
+ mask->extra = ~0;
+ *flags |= DN_HAVE_MASK;
goto end_mask;
case TOK_DSTIP:
- p32 = &p.fs.flow_mask.dst_ip;
+ mask->addr_type = 4;
+ p32 = &mask->dst_ip;
break;
case TOK_SRCIP:
- p32 = &p.fs.flow_mask.src_ip;
+ mask->addr_type = 4;
+ p32 = &mask->src_ip;
break;
case TOK_DSTIP6:
- pa6 = &(p.fs.flow_mask.dst_ip6);
+ mask->addr_type = 6;
+ pa6 = &mask->dst_ip6;
break;
case TOK_SRCIP6:
- pa6 = &(p.fs.flow_mask.src_ip6);
+ mask->addr_type = 6;
+ pa6 = &mask->src_ip6;
break;
case TOK_FLOWID:
- p20 = &p.fs.flow_mask.flow_id6;
+ mask->addr_type = 6;
+ p20 = &mask->flow_id6;
break;
case TOK_DSTPORT:
- p16 = &p.fs.flow_mask.dst_port;
+ p16 = &mask->dst_port;
break;
case TOK_SRCPORT:
- p16 = &p.fs.flow_mask.src_port;
+ p16 = &mask->src_port;
break;
case TOK_PROTO:
if (a > 0xFF)
errx(EX_DATAERR,
"proto mask must be 8 bit");
- p.fs.flow_mask.proto = (uint8_t)a;
+ mask->proto = (uint8_t)a;
}
if (a != 0)
- p.fs.flags_fs |= DN_HAVE_FLOW_MASK;
+ *flags |= DN_HAVE_MASK;
ac--; av++;
} /* end while, config masks */
end_mask:
case TOK_RED:
case TOK_GRED:
NEED1("red/gred needs w_q/min_th/max_th/max_p\n");
- p.fs.flags_fs |= DN_IS_RED;
+ fs->flags |= DN_IS_RED;
if (tok == TOK_GRED)
- p.fs.flags_fs |= DN_IS_GENTLE_RED;
+ fs->flags |= DN_IS_GENTLE_RED;
/*
* the format for parameters is w_q/min_th/max_th/max_p
*/
double w_q = strtod(end, NULL);
if (w_q > 1 || w_q <= 0)
errx(EX_DATAERR, "0 < w_q <= 1");
- p.fs.w_q = (int) (w_q * (1 << SCALE_RED));
+ fs->w_q = (int) (w_q * (1 << SCALE_RED));
}
if ((end = strsep(&av[0], "/"))) {
- p.fs.min_th = strtoul(end, &end, 0);
+ fs->min_th = strtoul(end, &end, 0);
if (*end == 'K' || *end == 'k')
- p.fs.min_th *= 1024;
+ fs->min_th *= 1024;
}
if ((end = strsep(&av[0], "/"))) {
- p.fs.max_th = strtoul(end, &end, 0);
+ fs->max_th = strtoul(end, &end, 0);
if (*end == 'K' || *end == 'k')
- p.fs.max_th *= 1024;
+ fs->max_th *= 1024;
}
if ((end = strsep(&av[0], "/"))) {
double max_p = strtod(end, NULL);
if (max_p > 1 || max_p <= 0)
errx(EX_DATAERR, "0 < max_p <= 1");
- p.fs.max_p = (int)(max_p * (1 << SCALE_RED));
+ fs->max_p = (int)(max_p * (1 << SCALE_RED));
}
ac--; av++;
break;
case TOK_DROPTAIL:
- p.fs.flags_fs &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
+ NEED(fs, "droptail is only for flowsets");
+ fs->flags &= ~(DN_IS_RED|DN_IS_GENTLE_RED);
break;
case TOK_BW:
+ NEED(p, "bw is only for links");
NEED1("bw needs bandwidth or interface\n");
- if (co.do_pipe != 1)
- errx(EX_DATAERR, "bandwidth only valid for pipes");
- read_bandwidth(av[0], &p.bandwidth, p.if_name, sizeof(p.if_name));
+ read_bandwidth(av[0], &p->bandwidth, NULL, 0);
ac--; av++;
break;
case TOK_DELAY:
- if (co.do_pipe != 1)
- errx(EX_DATAERR, "delay only valid for pipes");
+ NEED(p, "delay is only for links");
NEED1("delay needs argument 0..10000ms\n");
- p.delay = strtoul(av[0], NULL, 0);
+ p->delay = strtoul(av[0], NULL, 0);
+ ac--; av++;
+ break;
+
+ case TOK_TYPE: {
+ int l;
+ NEED(sch, "type is only for schedulers");
+ NEED1("type needs a string");
+ l = strlen(av[0]);
+ if (l == 0 || l > 15)
+ errx(1, "type %s too long\n", av[0]);
+ strcpy(sch->name, av[0]);
+ sch->oid.subtype = 0; /* use string */
ac--; av++;
break;
+ }
case TOK_WEIGHT:
- if (co.do_pipe == 1)
- errx(EX_DATAERR,"weight only valid for queues");
- NEED1("weight needs argument 0..100\n");
- p.fs.weight = strtoul(av[0], &end, 0);
+ NEED(fs, "weight is only for flowsets");
+ NEED1("weight needs argument\n");
+ fs->par[0] = strtol(av[0], &end, 0);
+ ac--; av++;
+ break;
+
+ case TOK_LMAX:
+ NEED(fs, "lmax is only for flowsets");
+ NEED1("lmax needs argument\n");
+ fs->par[1] = strtol(av[0], &end, 0);
ac--; av++;
break;
+ case TOK_PRI:
+ NEED(fs, "priority is only for flowsets");
+ NEED1("priority needs argument\n");
+ fs->par[2] = strtol(av[0], &end, 0);
+ ac--; av++;
+ break;
+
+ case TOK_SCHED:
case TOK_PIPE:
- if (co.do_pipe == 1)
- errx(EX_DATAERR,"pipe only valid for queues");
- NEED1("pipe needs pipe_number\n");
- p.fs.parent_nr = strtoul(av[0], &end, 0);
+ NEED(fs, "pipe/sched");
+ NEED1("pipe/link/sched needs number\n");
+ fs->sched_nr = strtoul(av[0], &end, 0);
ac--; av++;
break;
- case TOK_PIPE_PROFILE:
- if (co.do_pipe != 1)
- errx(EX_DATAERR, "extra delay only valid for pipes");
+ case TOK_PROFILE:
+ NEED((!pf), "profile already set");
+ NEED(p, "profile");
+ {
NEED1("extra delay needs the file name\n");
- p.samples = &samples[0];
- load_extra_delays(av[0], &p);
+ pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
+ load_extra_delays(av[0], pf, p); //XXX can't fail?
--ac; ++av;
+ }
break;
case TOK_BURST:
- if (co.do_pipe != 1)
- errx(EX_DATAERR, "burst only valid for pipes");
+ NEED(p, "burst");
NEED1("burst needs argument\n");
errno = 0;
- if (expand_number(av[0], (int64_t *)&p.burst) < 0)
+ if (expand_number(av[0], (int64_t *)&p->burst) < 0)
if (errno != ERANGE)
errx(EX_DATAERR,
- "burst: invalid argument");
- if (errno || p.burst > (1ULL << 48) - 1)
+ "burst: invalid argument");
+ if (errno || p->burst > (1ULL << 48) - 1)
errx(EX_DATAERR,
- "burst: out of range (0..2^48-1)");
+ "burst: out of range (0..2^48-1)");
ac--; av++;
break;
errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
}
}
- if (co.do_pipe == 1) {
- if (p.pipe_nr == 0)
- errx(EX_DATAERR, "pipe_nr must be > 0");
- if (p.delay > 10000)
- errx(EX_DATAERR, "delay must be < 10000");
- } else { /* co.do_pipe == 2, queue */
- if (p.fs.parent_nr == 0)
- errx(EX_DATAERR, "pipe must be > 0");
- if (p.fs.weight >100)
- errx(EX_DATAERR, "weight must be <= 100");
- }
- /* check for bandwidth value */
- if (p.bandwidth == -1) {
- p.bandwidth = 0;
- if (p.samples_no > 0)
- errx(EX_DATAERR, "profile requires a bandwidth limit");
+ /* check validity of parameters */
+ if (p) {
+ if (p->delay > 10000)
+ errx(EX_DATAERR, "delay must be < 10000");
+ if (p->bandwidth == -1)
+ p->bandwidth = 0;
}
-
- if (p.fs.flags_fs & DN_QSIZE_IS_BYTES) {
- size_t len;
+ if (fs) {
+ /* XXX accept a 0 scheduler to keep the default */
+ if (fs->flags & DN_QSIZE_BYTES) {
+ size_t len;
long limit;
len = sizeof(limit);
if (sysctlbyname("net.inet.ip.dummynet.pipe_byte_limit",
&limit, &len, NULL, 0) == -1)
limit = 1024*1024;
- if (p.fs.qsize > limit)
+ if (fs->qsize > limit)
errx(EX_DATAERR, "queue size must be < %ldB", limit);
- } else {
+ } else {
size_t len;
long limit;
if (sysctlbyname("net.inet.ip.dummynet.pipe_slot_limit",
&limit, &len, NULL, 0) == -1)
limit = 100;
- if (p.fs.qsize > limit)
+ if (fs->qsize > limit)
errx(EX_DATAERR, "2 <= queue size <= %ld", limit);
- }
- if (p.fs.flags_fs & DN_IS_RED) {
+ }
+
+ if (fs->flags & DN_IS_RED) {
size_t len;
int lookup_depth, avg_pkt_size;
- double s, idle, weight, w_q;
- struct clockinfo ck;
- int t;
+ double w_q;
- if (p.fs.min_th >= p.fs.max_th)
+ if (fs->min_th >= fs->max_th)
errx(EX_DATAERR, "min_th %d must be < than max_th %d",
- p.fs.min_th, p.fs.max_th);
- if (p.fs.max_th == 0)
+ fs->min_th, fs->max_th);
+ if (fs->max_th == 0)
errx(EX_DATAERR, "max_th must be > 0");
len = sizeof(int);
if (sysctlbyname("net.inet.ip.dummynet.red_lookup_depth",
&lookup_depth, &len, NULL, 0) == -1)
- errx(1, "sysctlbyname(\"%s\")",
- "net.inet.ip.dummynet.red_lookup_depth");
+ lookup_depth = 256;
if (lookup_depth == 0)
errx(EX_DATAERR, "net.inet.ip.dummynet.red_lookup_depth"
" must be greater than zero");
len = sizeof(int);
if (sysctlbyname("net.inet.ip.dummynet.red_avg_pkt_size",
&avg_pkt_size, &len, NULL, 0) == -1)
+ avg_pkt_size = 512;
- errx(1, "sysctlbyname(\"%s\")",
- "net.inet.ip.dummynet.red_avg_pkt_size");
if (avg_pkt_size == 0)
errx(EX_DATAERR,
"net.inet.ip.dummynet.red_avg_pkt_size must"
" be greater than zero");
- len = sizeof(struct clockinfo);
- if (sysctlbyname("kern.clockrate", &ck, &len, NULL, 0) == -1)
- errx(1, "sysctlbyname(\"%s\")", "kern.clockrate");
-
/*
* Ticks needed for sending a medium-sized packet.
* Unfortunately, when we are configuring a WF2Q+ queue, we
* correct. But on the other hand, why do we want RED with
* WF2Q+ ?
*/
+#if 0
if (p.bandwidth==0) /* this is a WF2Q+ queue */
s = 0;
else
s = (double)ck.hz * avg_pkt_size * 8 / p.bandwidth;
-
+#endif
/*
* max idle time (in ticks) before avg queue size becomes 0.
* NOTA: (3/w_q) is approx the value x so that
* (1-w_q)^x < 10^-3.
*/
- w_q = ((double)p.fs.w_q) / (1 << SCALE_RED);
+ w_q = ((double)fs->w_q) / (1 << SCALE_RED);
+#if 0 // go in kernel
idle = s * 3. / w_q;
- p.fs.lookup_step = (int)idle / lookup_depth;
- if (!p.fs.lookup_step)
- p.fs.lookup_step = 1;
+ fs->lookup_step = (int)idle / lookup_depth;
+ if (!fs->lookup_step)
+ fs->lookup_step = 1;
weight = 1 - w_q;
- for (t = p.fs.lookup_step; t > 1; --t)
+ for (t = fs->lookup_step; t > 1; --t)
weight *= 1 - w_q;
- p.fs.lookup_weight = (int)(weight * (1 << SCALE_RED));
+ fs->lookup_weight = (int)(weight * (1 << SCALE_RED));
+#endif
+ }
}
- if (p.samples_no <= 0) {
- i = do_cmd(IP_DUMMYNET_CONFIGURE, &p, sizeof p);
- } else {
- struct dn_pipe_max pm;
- int len = sizeof(pm);
-
- memcpy(&pm.pipe, &p, sizeof(pm.pipe));
- memcpy(&pm.samples, samples, sizeof(pm.samples));
- i = do_cmd(IP_DUMMYNET_CONFIGURE, &pm, len);
- }
+ i = do_cmd(IP_DUMMYNET3, base, (char *)buf - (char *)base);
if (i)
err(1, "setsockopt(%s)", "IP_DUMMYNET_CONFIGURE");
}
+
+void
+dummynet_flush(void)
+{
+ struct dn_id oid;
+ oid_fill(&oid, sizeof(oid), DN_CMD_FLUSH, DN_API_VERSION);
+ do_cmd(IP_DUMMYNET3, &oid, oid.len);
+}
+
+/* Parse input for 'ipfw [pipe|sched|queue] show [range list]'
+ * Returns the number of ranges, and possibly stores them
+ * in the array v of size len.
+ */
+static int
+parse_range(int ac, char *av[], uint32_t *v, int len)
+{
+ int n = 0;
+ char *endptr, *s;
+ uint32_t base[2];
+
+ if (v == NULL || len < 2) {
+ v = base;
+ len = 2;
+ }
+
+ for (s = *av; s != NULL; av++, ac--) {
+ v[0] = strtoul(s, &endptr, 10);
+ v[1] = (*endptr != '-') ? v[0] :
+ strtoul(endptr+1, &endptr, 10);
+ if (*endptr == '\0') { /* prepare for next round */
+ s = (ac > 0) ? *(av+1) : NULL;
+ } else {
+ if (*endptr != ',') {
+ warn("invalid number: %s", s);
+ s = ++endptr;
+ continue;
+ }
+ /* continue processing from here */
+ s = ++endptr;
+ ac++;
+ av--;
+ }
+ if (v[1] < v[0] ||
+ v[1] < 0 || v[1] >= DN_MAX_ID-1 ||
+ v[0] < 0 || v[1] >= DN_MAX_ID-1) {
+ continue; /* invalid entry */
+ }
+ n++;
+ /* translate if 'pipe list' */
+ if (co.do_pipe == 1) {
+ v[0] += DN_MAX_ID;
+ v[1] += DN_MAX_ID;
+ }
+ v = (n*2 < len) ? v + 2 : base;
+ }
+ return n;
+}
+
+/* main entry point for dummynet list functions. co.do_pipe indicates
+ * which function we want to support.
+ * av may contain filtering arguments, either individual entries
+ * or ranges, or lists (space or commas are valid separators).
+ * Format for a range can be n1-n2 or n3 n4 n5 ...
+ * In a range n1 must be <= n2, otherwise the range is ignored.
+ * A number 'n4' is translate in a range 'n4-n4'
+ * All number must be > 0 and < DN_MAX_ID-1
+ */
+void
+dummynet_list(int ac, char *av[], int show_counters)
+{
+ struct dn_id *oid, *x = NULL;
+ int ret, i, l;
+ int n; /* # of ranges */
+ int buflen;
+ int max_size; /* largest obj passed up */
+
+ ac--;
+ av++; /* skip 'list' | 'show' word */
+
+ n = parse_range(ac, av, NULL, 0); /* Count # of ranges. */
+
+ /* Allocate space to store ranges */
+ l = sizeof(*oid) + sizeof(uint32_t) * n * 2;
+ oid = safe_calloc(1, l);
+ oid_fill(oid, l, DN_CMD_GET, DN_API_VERSION);
+
+ if (n > 0) /* store ranges in idx */
+ parse_range(ac, av, (uint32_t *)(oid + 1), n*2);
+ /*
+ * Compute the size of the largest object returned. If the
+ * response leaves at least this much spare space in the
+ * buffer, then surely the response is complete; otherwise
+ * there might be a risk of truncation and we will need to
+ * retry with a larger buffer.
+ * XXX don't bother with smaller structs.
+ */
+ max_size = sizeof(struct dn_fs);
+ if (max_size < sizeof(struct dn_sch))
+ max_size = sizeof(struct dn_sch);
+ if (max_size < sizeof(struct dn_flow))
+ max_size = sizeof(struct dn_flow);
+
+ switch (co.do_pipe) {
+ case 1:
+ oid->subtype = DN_LINK; /* list pipe */
+ break;
+ case 2:
+ oid->subtype = DN_FS; /* list queue */
+ break;
+ case 3:
+ oid->subtype = DN_SCH; /* list sched */
+ break;
+ }
+
+ /*
+ * Ask the kernel an estimate of the required space (result
+ * in oid.id), unless we are requesting a subset of objects,
+ * in which case the kernel does not give an exact answer.
+ * In any case, space might grow in the meantime due to the
+ * creation of new queues, so we must be prepared to retry.
+ */
+ if (n > 0) {
+ buflen = 4*1024;
+ } else {
+ ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l);
+ if (ret != 0 || oid->id <= sizeof(*oid))
+ goto done;
+ buflen = oid->id + max_size;
+ oid->len = sizeof(*oid); /* restore */
+ }
+ /* Try a few times, until the buffer fits */
+ for (i = 0; i < 20; i++) {
+ l = buflen;
+ x = safe_realloc(x, l);
+ bcopy(oid, x, oid->len);
+ ret = do_cmd(-IP_DUMMYNET3, x, (uintptr_t)&l);
+ if (ret != 0 || x->id <= sizeof(*oid))
+ goto done; /* no response */
+ if (l + max_size <= buflen)
+ break; /* ok */
+ buflen *= 2; /* double for next attempt */
+ }
+ list_pipes(x, O_NEXT(x, l));
+done:
+ if (x)
+ free(x);
+ free(oid);
+}
*/
/*
- * $Id: glue.c 4540 2009-12-16 17:22:47Z marta $
+ * $Id: glue.c 5881 2010-03-25 14:29:48Z svn_panicucci $
*
- * Userland functions missing in linux
+ * Userland functions missing in linux/Windows
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#ifdef _WIN32
+#include <netdb.h>
+#include <windows.h>
+#endif /* _WIN32 */
+
#ifndef HAVE_NAT
/* dummy nat functions */
void
int optreset; /* missing in linux */
#endif
-#if defined( __linux__ ) || defined(_WIN32)
/*
* not implemented in linux.
* taken from /usr/src/lib/libc/string/strlcpy.c
strtonum(const char *nptr, long long minval, long long maxval,
const char **errstr)
{
- return strtoll(nptr, (char **)errstr, 0);
+ long long ret;
+ int errno_c = errno; /* save actual errno */
+
+ errno = 0;
+#ifdef TCC
+ ret = strtol(nptr, (char **)errstr, 0);
+#else
+ ret = strtoll(nptr, (char **)errstr, 0);
+#endif
+ /* We accept only a string that represent exactly a number (ie. start
+ * and end with a digit).
+ * FreeBSD version wants errstr==NULL if no error occurs, otherwise
+ * errstr should point to an error string.
+ * For our purspose, we implement only the invalid error, ranges
+ * error aren't checked
+ */
+ if (errno != 0 || nptr == *errstr || **errstr != '\0')
+ *errstr = "invalid";
+ else {
+ *errstr = NULL;
+ errno = errno_c;
+ }
+ return ret;
}
+#if defined (_WIN32) || defined (EMULATE_SYSCTL)
+//XXX missing prerequisites
+#include <net/if.h> //openwrt
+#include <netinet/ip.h> //openwrt
+#include <netinet/ip_fw.h>
+#include <netinet/ip_dummynet.h>
+#endif
+
/*
* set or get system information
* XXX lock acquisition/serialize calls
sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp,
size_t newlen)
{
+#if defined (_WIN32) || defined (EMULATE_SYSCTL)
+ /*
+ * we embed the sysctl request in the usual sockopt mechanics.
+ * the sockopt buffer il filled with a dn_id with IP_DUMMYNET3
+ * command, and the special DN_SYSCTL_GET and DN_SYSCTL_SET
+ * subcommands.
+ * the syntax of this function is fully compatible with
+ * POSIX sysctlby name:
+ * if newp and newlen are != 0 => this is a set
+ * else if oldp and oldlen are != 0 => this is a get
+ * to avoid too much overhead in the module, the whole
+ * sysctltable is returned, and the parsing is done in userland,
+ * a probe request is done to retrieve the size needed to
+ * transfer the table, before the real request
+ * if both old and new params = 0 => this is a print
+ * this is a special request, done only by main()
+ * to implement the extension './ipfw sysctl',
+ * a command that bypasses the normal getopt, and that
+ * is available on those platforms that use this
+ * sysctl emulation.
+ * in this case, a negative oldlen signals that *oldp
+ * is actually a FILE* to print somewhere else than stdout
+ */
+
+ int l;
+ int ret;
+ struct dn_id* oid;
+ struct sysctlhead* entry;
+ char* pstring;
+ char* pdata;
+ FILE* fp;
+
+ if((oldlenp != NULL) && (*oldlenp < 0))
+ fp = (FILE*)oldp;
+ else
+ fp = stdout;
+ if(newp != NULL && newlen != 0)
+ {
+ //this is a set
+ l = sizeof(struct dn_id) + sizeof(struct sysctlhead) + strlen(name)+1 + newlen;
+ oid = malloc(l);
+ if (oid == NULL)
+ return -1;
+ oid->len = l;
+ oid->type = DN_SYSCTL_SET;
+ oid->id = DN_API_VERSION;
+
+ entry = (struct sysctlhead*)(oid+1);
+ pdata = (unsigned char*)(entry+1);
+ pstring = pdata + newlen;
+
+ entry->blocklen = ((sizeof(struct sysctlhead) + strlen(name)+1 + newlen) + 3) & ~3;
+ entry->namelen = strlen(name)+1;
+ entry->flags = 0;
+ entry->datalen = newlen;
+
+ bcopy(newp, pdata, newlen);
+ bcopy(name, pstring, strlen(name)+1);
+
+ ret = do_cmd(IP_DUMMYNET3, oid, (uintptr_t)l);
+ if (ret != 0)
+ return -1;
+ }
+ else
+ {
+ //this is a get or a print
+ l = sizeof(struct dn_id);
+ oid = malloc(l);
+ if (oid == NULL)
+ return -1;
+ oid->len = l;
+ oid->type = DN_SYSCTL_GET;
+ oid->id = DN_API_VERSION;
+
+ ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l);
+ if (ret != 0)
+ return -1;
+
+ l=oid->id;
+ free(oid);
+ oid = malloc(l);
+ if (oid == NULL)
+ return -1;
+ oid->len = l;
+ oid->type = DN_SYSCTL_GET;
+ oid->id = DN_API_VERSION;
+
+ ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l);
+ if (ret != 0)
+ return -1;
+
+ entry = (struct sysctlhead*)(oid+1);
+ while(entry->blocklen != 0)
+ {
+ pdata = (unsigned char*)(entry+1);
+ pstring = pdata+entry->datalen;
+
+ //time to check if this is a get or a print
+ if(name != NULL && oldp != NULL && *oldlenp > 0)
+ {
+ //this is a get
+ if(strcmp(name,pstring) == 0)
+ {
+ //match found, sanity chech on len
+ if(*oldlenp < entry->datalen)
+ {
+ printf("%s error: buffer too small\n",__FUNCTION__);
+ return -1;
+ }
+ *oldlenp = entry->datalen;
+ bcopy(pdata, oldp, *oldlenp);
+ return 0;
+ }
+ }
+ else
+ {
+ //this is a print
+ if( name == NULL )
+ goto print;
+ if ( (strncmp(pstring,name,strlen(name)) == 0) && ( pstring[strlen(name)]=='\0' || pstring[strlen(name)]=='.' ) )
+ goto print;
+ else
+ goto skip;
+print:
+ fprintf(fp, "%s: ",pstring);
+ switch( entry->flags >> 2 )
+ {
+ case SYSCTLTYPE_LONG:
+ fprintf(fp, "%li ", *(long*)(pdata));
+ break;
+ case SYSCTLTYPE_UINT:
+ fprintf(fp, "%u ", *(unsigned int*)(pdata));
+ break;
+ case SYSCTLTYPE_ULONG:
+ fprintf(fp, "%lu ", *(unsigned long*)(pdata));
+ break;
+ case SYSCTLTYPE_INT:
+ default:
+ fprintf(fp, "%i ", *(int*)(pdata));
+ }
+ if( (entry->flags & 0x00000003) == CTLFLAG_RD )
+ fprintf(fp, "\t(read only)\n");
+ else
+ fprintf(fp, "\n");
+skip: ;
+ }
+ entry = (struct sysctlhead*)((unsigned char*)entry + entry->blocklen);
+ }
+ free(oid);
+ return 0;
+ }
+ //fallback for invalid options
+ return -1;
+
+#else /* __linux__ */
FILE *fp;
char *basename = "/sys/module/ipfw_mod/parameters/";
char filename[256]; /* full filename */
}
return ret;
+#endif /* __linux__ */
+}
+
+#ifdef _WIN32
+/*
+ * On windows, set/getsockopt are mapped to DeviceIoControl()
+ */
+int
+wnd_setsockopt(int s, int level, int sopt_name, const void *optval,
+ socklen_t optlen)
+{
+ size_t len = sizeof (struct sockopt) + optlen;
+ struct sockopt *sock;
+ DWORD n;
+ BOOL result;
+ HANDLE _dev_h = (HANDLE)s;
+
+ /* allocate a data structure for communication */
+ sock = malloc(len);
+ if (sock == NULL)
+ return -1;
+
+ sock->sopt_dir = SOPT_SET;
+ sock->sopt_name = sopt_name;
+ sock->sopt_valsize = optlen;
+ sock->sopt_val = (void *)(sock+1);
+
+ memcpy(sock->sopt_val, optval, optlen);
+ result = DeviceIoControl (_dev_h, IP_FW_SETSOCKOPT, sock, len,
+ NULL, 0, &n, NULL);
+ free (sock);
+
+ return (result ? 0 : -1);
}
-#endif /* __linux__ || _WIN32 */
+
+int
+wnd_getsockopt(int s, int level, int sopt_name, void *optval,
+ socklen_t *optlen)
+{
+ size_t len = sizeof (struct sockopt) + *optlen;
+ struct sockopt *sock;
+ DWORD n;
+ BOOL result;
+ HANDLE _dev_h = (HANDLE)s;
+
+ sock = malloc(len);
+ if (sock == NULL)
+ return -1;
+
+ sock->sopt_dir = SOPT_GET;
+ sock->sopt_name = sopt_name;
+ sock->sopt_valsize = *optlen;
+ sock->sopt_val = (void *)(sock+1);
+
+ memcpy (sock->sopt_val, optval, *optlen);
+
+ result = DeviceIoControl (_dev_h, IP_FW_GETSOCKOPT, sock, len,
+ sock, len, &n, NULL);
+ //printf("len = %i, returned = %u, valsize = %i\n",len,n,sock->sopt_valsize);
+ *optlen = sock->sopt_valsize;
+ memcpy (optval, sock->sopt_val, *optlen);
+ free (sock);
+ return (result ? 0 : -1);
+}
+
+int
+my_socket(int domain, int ty, int proto)
+{
+ TCHAR *pcCommPort = TEXT("\\\\.\\Ipfw");
+ HANDLE _dev_h = INVALID_HANDLE_VALUE;
+
+ /* Special Handling For Accessing Device On Windows 2000 Terminal Server
+ See Microsoft KB Article 259131 */
+ if (_dev_h == INVALID_HANDLE_VALUE) {
+ _dev_h = CreateFile (pcCommPort,
+ GENERIC_READ | GENERIC_WRITE,
+ 0, NULL,
+ OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+ }
+ if (_dev_h == INVALID_HANDLE_VALUE) {
+ printf("%s failed %u, cannot talk to kernel module\n",
+ __FUNCTION__, (unsigned)GetLastError());
+ return -1;
+ }
+ return (int)_dev_h;
+}
+
+struct hostent* gethostbyname2(const char *name, int af)
+{
+ return gethostbyname(name);
+}
+
+struct ether_addr* ether_aton(const char *a)
+{
+ fprintf(stderr, "%s empty\n", __FUNCTION__);
+ return NULL;
+}
+
+#ifdef TCC
+int opterr = 1, /* if error message should be printed */
+ optind = 1, /* index into parent argv vector */
+ optopt, /* character checked for validity */
+ optreset; /* reset getopt */
+char *optarg; /* argument associated with option */
+
+#define BADCH (int)'?'
+#define BADARG (int)':'
+#define EMSG ""
+
+#define PROGNAME "ipfw"
+/*
+ * getopt --
+ * Parse argc/argv argument vector.
+ */
+int
+getopt(nargc, nargv, ostr)
+ int nargc;
+ char * const nargv[];
+ const char *ostr;
+{
+ static char *place = EMSG; /* option letter processing */
+ char *oli; /* option letter list index */
+
+ if (optreset || *place == 0) { /* update scanning pointer */
+ optreset = 0;
+ place = nargv[optind];
+ if (optind >= nargc || *place++ != '-') {
+ /* Argument is absent or is not an option */
+ place = EMSG;
+ return (-1);
+ }
+ optopt = *place++;
+ if (optopt == '-' && *place == 0) {
+ /* "--" => end of options */
+ ++optind;
+ place = EMSG;
+ return (-1);
+ }
+ if (optopt == 0) {
+ /* Solitary '-', treat as a '-' option
+ if the program (eg su) is looking for it. */
+ place = EMSG;
+ if (strchr(ostr, '-') == NULL)
+ return (-1);
+ optopt = '-';
+ }
+ } else
+ optopt = *place++;
+
+ /* See if option letter is one the caller wanted... */
+ if (optopt == ':' || (oli = strchr(ostr, optopt)) == NULL) {
+ if (*place == 0)
+ ++optind;
+ if (opterr && *ostr != ':')
+ (void)fprintf(stderr,
+ "%s: illegal option -- %c\n", PROGNAME,
+ optopt);
+ return (BADCH);
+ }
+
+ /* Does this option need an argument? */
+ if (oli[1] != ':') {
+ /* don't need argument */
+ optarg = NULL;
+ if (*place == 0)
+ ++optind;
+ } else {
+ /* Option-argument is either the rest of this argument or the
+ entire next argument. */
+ if (*place)
+ optarg = place;
+ else if (nargc > ++optind)
+ optarg = nargv[optind];
+ else {
+ /* option-argument absent */
+ place = EMSG;
+ if (*ostr == ':')
+ return (BADARG);
+ if (opterr)
+ (void)fprintf(stderr,
+ "%s: option requires an argument -- %c\n",
+ PROGNAME, optopt);
+ return (BADCH);
+ }
+ place = EMSG;
+ ++optind;
+ }
+ return (optopt); /* return option letter */
+}
+
+//static FILE *err_file = stderr;
+void
+verrx(int ex, int eval, const char *fmt, va_list ap)
+{
+ fprintf(stderr, "%s: ", PROGNAME);
+ if (fmt != NULL)
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ if (ex)
+ exit(eval);
+}
+void
+errx(int eval, const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ verrx(1, eval, fmt, ap);
+ va_end(ap);
+}
+
+void
+warnx(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ verrx(0, 0, fmt, ap);
+ va_end(ap);
+}
+
+char *
+strsep(char **stringp, const char *delim)
+{
+ char *s;
+ const char *spanp;
+ int c, sc;
+ char *tok;
+
+ if ((s = *stringp) == NULL)
+ return (NULL);
+ for (tok = s;;) {
+ c = *s++;
+ spanp = delim;
+ do {
+ if ((sc = *spanp++) == c) {
+ if (c == 0)
+ s = NULL;
+ else
+ s[-1] = 0;
+ *stringp = s;
+ return (tok);
+ }
+ } while (sc != 0);
+ }
+ /* NOTREACHED */
+}
+
+static unsigned char
+tolower(unsigned char c)
+{
+ return (c >= 'A' && c <= 'Z') ? c + 'a' - 'A' : c;
+}
+
+static int isdigit(unsigned char c)
+{
+ return (c >= '0' && c <= '9');
+}
+
+static int isxdigit(unsigned char c)
+{
+ return (index("0123456789ABCDEFabcdef", c) ? 1 : 0);
+}
+
+static int isspace(unsigned char c)
+{
+ return (index(" \t\n\r", c) ? 1 : 0);
+}
+
+static int isascii(unsigned char c)
+{
+ return (c < 128);
+}
+
+static int islower(unsigned char c)
+{
+ return (c >= 'a' && c <= 'z');
+}
+
+int
+strcasecmp(const char *s1, const char *s2)
+{
+ const unsigned char
+ *us1 = (const unsigned char *)s1,
+ *us2 = (const unsigned char *)s2;
+
+ while (tolower(*us1) == tolower(*us2++))
+ if (*us1++ == '\0')
+ return (0);
+ return (tolower(*us1) - tolower(*--us2));
+}
+
+intmax_t
+strtoimax(const char * restrict nptr, char ** restrict endptr, int base)
+{
+ return strtol(nptr, endptr,base);
+}
+
+void
+setservent(int a)
+{
+}
+
+#define NS_INADDRSZ 128
+
+int
+inet_pton(int af, const char *src, void *dst)
+{
+ static const char digits[] = "0123456789";
+ int saw_digit, octets, ch;
+ u_char tmp[NS_INADDRSZ], *tp;
+
+ if (af != AF_INET) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ saw_digit = 0;
+ octets = 0;
+ *(tp = tmp) = 0;
+ while ((ch = *src++) != '\0') {
+ const char *pch;
+
+ if ((pch = strchr(digits, ch)) != NULL) {
+ u_int new = *tp * 10 + (pch - digits);
+
+ if (saw_digit && *tp == 0)
+ return (0);
+ if (new > 255)
+ return (0);
+ *tp = new;
+ if (!saw_digit) {
+ if (++octets > 4)
+ return (0);
+ saw_digit = 1;
+ }
+ } else if (ch == '.' && saw_digit) {
+ if (octets == 4)
+ return (0);
+ *++tp = 0;
+ saw_digit = 0;
+ } else
+ return (0);
+ }
+ if (octets < 4)
+ return (0);
+ memcpy(dst, tmp, NS_INADDRSZ);
+ return (1);
+}
+
+const char *
+inet_ntop(int af, const void *_src, char *dst, socklen_t size)
+{
+ static const char fmt[] = "%u.%u.%u.%u";
+ char tmp[sizeof "255.255.255.255"];
+ const u_char *src = _src;
+ int l;
+ if (af != AF_INET) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ l = snprintf(tmp, sizeof(tmp), fmt, src[0], src[1], src[2], src[3]);
+ if (l <= 0 || (socklen_t) l >= size) {
+ errno = ENOSPC;
+ return (NULL);
+ }
+ strlcpy(dst, tmp, size);
+ return (dst);
+}
+
+/*%
+ * Check whether "cp" is a valid ascii representation
+ * of an Internet address and convert to a binary address.
+ * Returns 1 if the address is valid, 0 if not.
+ * This replaces inet_addr, the return value from which
+ * cannot distinguish between failure and a local broadcast address.
+ */
+int
+inet_aton(const char *cp, struct in_addr *addr) {
+ u_long val;
+ int base, n;
+ char c;
+ u_int8_t parts[4];
+ u_int8_t *pp = parts;
+ int digit;
+
+ c = *cp;
+ for (;;) {
+ /*
+ * Collect number up to ``.''.
+ * Values are specified as for C:
+ * 0x=hex, 0=octal, isdigit=decimal.
+ */
+ if (!isdigit((unsigned char)c))
+ return (0);
+ val = 0; base = 10; digit = 0;
+ if (c == '0') {
+ c = *++cp;
+ if (c == 'x' || c == 'X')
+ base = 16, c = *++cp;
+ else {
+ base = 8;
+ digit = 1 ;
+ }
+ }
+ for (;;) {
+ if (isascii(c) && isdigit((unsigned char)c)) {
+ if (base == 8 && (c == '8' || c == '9'))
+ return (0);
+ val = (val * base) + (c - '0');
+ c = *++cp;
+ digit = 1;
+ } else if (base == 16 && isascii(c) &&
+ isxdigit((unsigned char)c)) {
+ val = (val << 4) |
+ (c + 10 - (islower((unsigned char)c) ? 'a' : 'A'));
+ c = *++cp;
+ digit = 1;
+ } else
+ break;
+ }
+ if (c == '.') {
+ /*
+ * Internet format:
+ * a.b.c.d
+ * a.b.c (with c treated as 16 bits)
+ * a.b (with b treated as 24 bits)
+ */
+ if (pp >= parts + 3 || val > 0xffU)
+ return (0);
+ *pp++ = val;
+ c = *++cp;
+ } else
+ break;
+ }
+ /*
+ * Check for trailing characters.
+ */
+ if (c != '\0' && (!isascii(c) || !isspace((unsigned char)c)))
+ return (0);
+ /*
+ * Did we get a valid digit?
+ */
+ if (!digit)
+ return (0);
+ /*
+ * Concoct the address according to
+ * the number of parts specified.
+ */
+ n = pp - parts + 1;
+ switch (n) {
+ case 1: /*%< a -- 32 bits */
+ break;
+
+ case 2: /*%< a.b -- 8.24 bits */
+ if (val > 0xffffffU)
+ return (0);
+ val |= parts[0] << 24;
+ break;
+
+ case 3: /*%< a.b.c -- 8.8.16 bits */
+ if (val > 0xffffU)
+ return (0);
+ val |= (parts[0] << 24) | (parts[1] << 16);
+ break;
+
+ case 4: /*%< a.b.c.d -- 8.8.8.8 bits */
+ if (val > 0xffU)
+ return (0);
+ val |= (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8);
+ break;
+ }
+ if (addr != NULL)
+ addr->s_addr = htonl(val);
+ return (1);
+}
+
+#endif /* TCC */
+
+#endif /* _WIN32 */
--- /dev/null
+.\"
+.\" $FreeBSD: head/sbin/ipfw/ipfw.8 205372 2010-03-20 14:42:16Z gavin $
+.\"
+.Dd March 20, 2010
+.Dt IPFW 8
+.Os
+.Sh NAME
+.Nm ipfw
+.Nd User interface for firewall, traffic shaper, packet scheduler,
+in-kernel NAT.
+.Sh SYNOPSIS
+.Ss FIREWALL CONFIGURATION
+.Nm
+.Op Fl cq
+.Cm add
+.Ar rule
+.Nm
+.Op Fl acdefnNStT
+.Op Cm set Ar N
+.Brq Cm list | show
+.Op Ar rule | first-last ...
+.Nm
+.Op Fl f | q
+.Op Cm set Ar N
+.Cm flush
+.Nm
+.Op Fl q
+.Op Cm set Ar N
+.Brq Cm delete | zero | resetlog
+.Op Ar number ...
+.Pp
+.Nm
+.Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
+.Nm
+.Cm set move
+.Op Cm rule
+.Ar number Cm to Ar number
+.Nm
+.Cm set swap Ar number number
+.Nm
+.Cm set show
+.Ss SYSCTL SHORTCUTS
+.Pp
+.Nm
+.Cm enable
+.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
+.Nm
+.Cm disable
+.Brq Cm firewall | altq | one_pass | debug | verbose | dyn_keepalive
+.Pp
+.Ss LOOKUP TABLES
+.Nm
+.Cm table Ar number Cm add Ar addr Ns Oo / Ns Ar masklen Oc Op Ar value
+.Nm
+.Cm table Ar number Cm delete Ar addr Ns Op / Ns Ar masklen
+.Nm
+.Cm table
+.Brq Ar number | all
+.Cm flush
+.Nm
+.Cm table
+.Brq Ar number | all
+.Cm list
+.Pp
+.Ss DUMMYNET CONFIGURATION (TRAFFIC SHAPER AND PACKET SCHEDULER)
+.Nm
+.Brq Cm pipe | queue | sched
+.Ar number
+.Cm config
+.Ar config-options
+.Nm
+.Op Fl s Op Ar field
+.Brq Cm pipe | queue | sched
+.Brq Cm delete | list | show
+.Op Ar number ...
+.Pp
+.Ss IN-KERNEL NAT
+.Nm
+.Op Fl q
+.Cm nat
+.Ar number
+.Cm config
+.Ar config-options
+.Pp
+.Nm
+.Op Fl cfnNqS
+.Oo
+.Fl p Ar preproc
+.Oo
+.Ar preproc-flags
+.Oc
+.Oc
+.Ar pathname
+.Sh DESCRIPTION
+The
+.Nm
+utility is the user interface for controlling the
+.Xr ipfw 4
+firewall, the
+.Xr dummynet 4
+traffic shaper/packet scheduler, and the
+in-kernel NAT services.
+.Pp
+A firewall configuration, or
+.Em ruleset ,
+is made of a list of
+.Em rules
+numbered from 1 to 65535.
+Packets are passed to the firewall
+from a number of different places in the protocol stack
+(depending on the source and destination of the packet,
+it is possible for the firewall to be
+invoked multiple times on the same packet).
+The packet passed to the firewall is compared
+against each of the rules in the
+.Em ruleset ,
+in rule-number order
+(multiple rules with the same number are permitted, in which case
+they are processed in order of insertion).
+When a match is found, the action corresponding to the
+matching rule is performed.
+.Pp
+Depending on the action and certain system settings, packets
+can be reinjected into the firewall at some rule after the
+matching one for further processing.
+.Pp
+A ruleset always includes a
+.Em default
+rule (numbered 65535) which cannot be modified or deleted,
+and matches all packets.
+The action associated with the
+.Em default
+rule can be either
+.Cm deny
+or
+.Cm allow
+depending on how the kernel is configured.
+.Pp
+If the ruleset includes one or more rules with the
+.Cm keep-state
+or
+.Cm limit
+option,
+the firewall will have a
+.Em stateful
+behaviour, i.e., upon a match it will create
+.Em dynamic rules ,
+i.e. rules that match packets with the same 5-tuple
+(protocol, source and destination addresses and ports)
+as the packet which caused their creation.
+Dynamic rules, which have a limited lifetime, are checked
+at the first occurrence of a
+.Cm check-state ,
+.Cm keep-state
+or
+.Cm limit
+rule, and are typically used to open the firewall on-demand to
+legitimate traffic only.
+See the
+.Sx STATEFUL FIREWALL
+and
+.Sx EXAMPLES
+Sections below for more information on the stateful behaviour of
+.Nm .
+.Pp
+All rules (including dynamic ones) have a few associated counters:
+a packet count, a byte count, a log count and a timestamp
+indicating the time of the last match.
+Counters can be displayed or reset with
+.Nm
+commands.
+.Pp
+Each rule belongs to one of 32 different
+.Em sets
+, and there are
+.Nm
+commands to atomically manipulate sets, such as enable,
+disable, swap sets, move all rules in a set to another
+one, delete all rules in a set.
+These can be useful to
+install temporary configurations, or to test them.
+See Section
+.Sx SETS OF RULES
+for more information on
+.Em sets .
+.Pp
+.Pp
+Rules can be added with the
+.Cm add
+command; deleted individually or in groups with the
+.Cm delete
+command, and globally (except those in set 31) with the
+.Cm flush
+command; displayed, optionally with the content of the
+counters, using the
+.Cm show
+and
+.Cm list
+commands.
+Finally, counters can be reset with the
+.Cm zero
+and
+.Cm resetlog
+commands.
+.Pp
+.Ss COMMAND OPTIONS
+The following general options are available when invoking
+.Nm :
+.Bl -tag -width indent
+.It Fl a
+Show counter values when listing rules.
+The
+.Cm show
+command implies this option.
+.It Fl b
+Only show the action and the comment, not the body of a rule.
+Implies
+.Fl c .
+.It Fl c
+When entering or showing rules, print them in compact form,
+i.e., omitting the "ip from any to any" string
+when this does not carry any additional information.
+.It Fl d
+When listing, show dynamic rules in addition to static ones.
+.It Fl e
+When listing and
+.Fl d
+is specified, also show expired dynamic rules.
+.It Fl f
+Do not ask for confirmation for commands that can cause problems
+if misused,
+.No i.e. Cm flush .
+If there is no tty associated with the process, this is implied.
+.It Fl i
+When listing a table (see the
+.Sx LOOKUP TABLES
+section below for more information on lookup tables), format values
+as IP addresses. By default, values are shown as integers.
+.It Fl n
+Only check syntax of the command strings, without actually passing
+them to the kernel.
+.It Fl N
+Try to resolve addresses and service names in output.
+.It Fl q
+Be quiet when executing the
+.Cm add ,
+.Cm nat ,
+.Cm zero ,
+.Cm resetlog
+or
+.Cm flush
+commands;
+(implies
+.Fl f ) .
+This is useful when updating rulesets by executing multiple
+.Nm
+commands in a script
+(e.g.,
+.Ql sh\ /etc/rc.firewall ) ,
+or by processing a file with many
+.Nm
+rules across a remote login session.
+It also stops a table add or delete
+from failing if the entry already exists or is not present.
+.Pp
+The reason why this option may be important is that
+for some of these actions,
+.Nm
+may print a message; if the action results in blocking the
+traffic to the remote client,
+the remote login session will be closed
+and the rest of the ruleset will not be processed.
+Access to the console would then be required to recover.
+.It Fl S
+When listing rules, show the
+.Em set
+each rule belongs to.
+If this flag is not specified, disabled rules will not be
+listed.
+.It Fl s Op Ar field
+When listing pipes, sort according to one of the four
+counters (total or current packets or bytes).
+.It Fl t
+When listing, show last match timestamp converted with ctime().
+.It Fl T
+When listing, show last match timestamp as seconds from the epoch.
+This form can be more convenient for postprocessing by scripts.
+.El
+.Pp
+.Ss LIST OF RULES AND PREPROCESSING
+To ease configuration, rules can be put into a file which is
+processed using
+.Nm
+as shown in the last synopsis line.
+An absolute
+.Ar pathname
+must be used.
+The file will be read line by line and applied as arguments to the
+.Nm
+utility.
+.Pp
+Optionally, a preprocessor can be specified using
+.Fl p Ar preproc
+where
+.Ar pathname
+is to be piped through.
+Useful preprocessors include
+.Xr cpp 1
+and
+.Xr m4 1 .
+If
+.Ar preproc
+does not start with a slash
+.Pq Ql /
+as its first character, the usual
+.Ev PATH
+name search is performed.
+Care should be taken with this in environments where not all
+file systems are mounted (yet) by the time
+.Nm
+is being run (e.g.\& when they are mounted over NFS).
+Once
+.Fl p
+has been specified, any additional arguments are passed on to the preprocessor
+for interpretation.
+This allows for flexible configuration files (like conditionalizing
+them on the local hostname) and the use of macros to centralize
+frequently required arguments like IP addresses.
+.Pp
+.Ss TRAFFIC SHAPER CONFIGURATION
+The
+.Nm
+.Cm pipe , queue
+and
+.Cm sched
+commands are used to configure the traffic shaper and packet scheduler.
+See the
+.Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
+Section below for details.
+.Pp
+If the world and the kernel get out of sync the
+.Nm
+ABI may break, preventing you from being able to add any rules.
+This can
+adversely effect the booting process.
+You can use
+.Nm
+.Cm disable
+.Cm firewall
+to temporarily disable the firewall to regain access to the network,
+allowing you to fix the problem.
+.Sh PACKET FLOW
+A packet is checked against the active ruleset in multiple places
+in the protocol stack, under control of several sysctl variables.
+These places and variables are shown below, and it is important to
+have this picture in mind in order to design a correct ruleset.
+.Bd -literal -offset indent
+ ^ to upper layers V
+ | |
+ +----------->-----------+
+ ^ V
+ [ip(6)_input] [ip(6)_output] net.inet(6).ip(6).fw.enable=1
+ | |
+ ^ V
+ [ether_demux] [ether_output_frame] net.link.ether.ipfw=1
+ | |
+ +-->--[bdg_forward]-->--+ net.link.bridge.ipfw=1
+ ^ V
+ | to devices |
+.Ed
+.Pp
+The number of
+times the same packet goes through the firewall can
+vary between 0 and 4 depending on packet source and
+destination, and system configuration.
+.Pp
+Note that as packets flow through the stack, headers can be
+stripped or added to it, and so they may or may not be available
+for inspection.
+E.g., incoming packets will include the MAC header when
+.Nm
+is invoked from
+.Cm ether_demux() ,
+but the same packets will have the MAC header stripped off when
+.Nm
+is invoked from
+.Cm ip_input()
+or
+.Cm ip6_input() .
+.Pp
+Also note that each packet is always checked against the complete ruleset,
+irrespective of the place where the check occurs, or the source of the packet.
+If a rule contains some match patterns or actions which are not valid
+for the place of invocation (e.g.\& trying to match a MAC header within
+.Cm ip_input
+or
+.Cm ip6_input ),
+the match pattern will not match, but a
+.Cm not
+operator in front of such patterns
+.Em will
+cause the pattern to
+.Em always
+match on those packets.
+It is thus the responsibility of
+the programmer, if necessary, to write a suitable ruleset to
+differentiate among the possible places.
+.Cm skipto
+rules can be useful here, as an example:
+.Bd -literal -offset indent
+# packets from ether_demux or bdg_forward
+ipfw add 10 skipto 1000 all from any to any layer2 in
+# packets from ip_input
+ipfw add 10 skipto 2000 all from any to any not layer2 in
+# packets from ip_output
+ipfw add 10 skipto 3000 all from any to any not layer2 out
+# packets from ether_output_frame
+ipfw add 10 skipto 4000 all from any to any layer2 out
+.Ed
+.Pp
+(yes, at the moment there is no way to differentiate between
+ether_demux and bdg_forward).
+.Sh SYNTAX
+In general, each keyword or argument must be provided as
+a separate command line argument, with no leading or trailing
+spaces.
+Keywords are case-sensitive, whereas arguments may
+or may not be case-sensitive depending on their nature
+(e.g.\& uid's are, hostnames are not).
+.Pp
+Some arguments (e.g. port or address lists) are comma-separated
+lists of values.
+In this case, spaces after commas ',' are allowed to make
+the line more readable.
+You can also put the entire
+command (including flags) into a single argument.
+E.g., the following forms are equivalent:
+.Bd -literal -offset indent
+ipfw -q add deny src-ip 10.0.0.0/24,127.0.0.1/8
+ipfw -q add deny src-ip 10.0.0.0/24, 127.0.0.1/8
+ipfw "-q add deny src-ip 10.0.0.0/24, 127.0.0.1/8"
+.Ed
+.Sh RULE FORMAT
+The format of firewall rules is the following:
+.Bd -ragged -offset indent
+.Bk -words
+.Op Ar rule_number
+.Op Cm set Ar set_number
+.Op Cm prob Ar match_probability
+.Ar action
+.Op Cm log Op Cm logamount Ar number
+.Op Cm altq Ar queue
+.Oo
+.Bro Cm tag | untag
+.Brc Ar number
+.Oc
+.Ar body
+.Ek
+.Ed
+.Pp
+where the body of the rule specifies which information is used
+for filtering packets, among the following:
+.Pp
+.Bl -tag -width "Source and dest. addresses and ports" -offset XXX -compact
+.It Layer-2 header fields
+When available
+.It IPv4 and IPv6 Protocol
+TCP, UDP, ICMP, etc.
+.It Source and dest. addresses and ports
+.It Direction
+See Section
+.Sx PACKET FLOW
+.It Transmit and receive interface
+By name or address
+.It Misc. IP header fields
+Version, type of service, datagram length, identification,
+fragment flag (non-zero IP offset),
+Time To Live
+.It IP options
+.It IPv6 Extension headers
+Fragmentation, Hop-by-Hop options,
+Routing Headers, Source routing rthdr0, Mobile IPv6 rthdr2, IPSec options.
+.It IPv6 Flow-ID
+.It Misc. TCP header fields
+TCP flags (SYN, FIN, ACK, RST, etc.),
+sequence number, acknowledgment number,
+window
+.It TCP options
+.It ICMP types
+for ICMP packets
+.It ICMP6 types
+for ICMP6 packets
+.It User/group ID
+When the packet can be associated with a local socket.
+.It Divert status
+Whether a packet came from a divert socket (e.g.,
+.Xr natd 8 ) .
+.It Fib annotation state
+Whether a packet has been tagged for using a specific FIB (routing table)
+in future forwarding decisions.
+.El
+.Pp
+Note that some of the above information, e.g.\& source MAC or IP addresses and
+TCP/UDP ports, can be easily spoofed, so filtering on those fields
+alone might not guarantee the desired results.
+.Bl -tag -width indent
+.It Ar rule_number
+Each rule is associated with a
+.Ar rule_number
+in the range 1..65535, with the latter reserved for the
+.Em default
+rule.
+Rules are checked sequentially by rule number.
+Multiple rules can have the same number, in which case they are
+checked (and listed) according to the order in which they have
+been added.
+If a rule is entered without specifying a number, the kernel will
+assign one in such a way that the rule becomes the last one
+before the
+.Em default
+rule.
+Automatic rule numbers are assigned by incrementing the last
+non-default rule number by the value of the sysctl variable
+.Ar net.inet.ip.fw.autoinc_step
+which defaults to 100.
+If this is not possible (e.g.\& because we would go beyond the
+maximum allowed rule number), the number of the last
+non-default value is used instead.
+.It Cm set Ar set_number
+Each rule is associated with a
+.Ar set_number
+in the range 0..31.
+Sets can be individually disabled and enabled, so this parameter
+is of fundamental importance for atomic ruleset manipulation.
+It can be also used to simplify deletion of groups of rules.
+If a rule is entered without specifying a set number,
+set 0 will be used.
+.br
+Set 31 is special in that it cannot be disabled,
+and rules in set 31 are not deleted by the
+.Nm ipfw flush
+command (but you can delete them with the
+.Nm ipfw delete set 31
+command).
+Set 31 is also used for the
+.Em default
+rule.
+.It Cm prob Ar match_probability
+A match is only declared with the specified probability
+(floating point number between 0 and 1).
+This can be useful for a number of applications such as
+random packet drop or
+(in conjunction with
+.Nm dummynet )
+to simulate the effect of multiple paths leading to out-of-order
+packet delivery.
+.Pp
+Note: this condition is checked before any other condition, including
+ones such as keep-state or check-state which might have side effects.
+.It Cm log Op Cm logamount Ar number
+When a packet matches a rule with the
+.Cm log
+keyword, a message will be
+logged to
+.Xr syslogd 8
+with a
+.Dv LOG_SECURITY
+facility.
+The logging only occurs if the sysctl variable
+.Va net.inet.ip.fw.verbose
+is set to 1
+(which is the default when the kernel is compiled with
+.Dv IPFIREWALL_VERBOSE )
+and the number of packets logged so far for that
+particular rule does not exceed the
+.Cm logamount
+parameter.
+If no
+.Cm logamount
+is specified, the limit is taken from the sysctl variable
+.Va net.inet.ip.fw.verbose_limit .
+In both cases, a value of 0 removes the logging limit.
+.Pp
+Once the limit is reached, logging can be re-enabled by
+clearing the logging counter or the packet counter for that entry, see the
+.Cm resetlog
+command.
+.Pp
+Note: logging is done after all other packet matching conditions
+have been successfully verified, and before performing the final
+action (accept, deny, etc.) on the packet.
+.It Cm tag Ar number
+When a packet matches a rule with the
+.Cm tag
+keyword, the numeric tag for the given
+.Ar number
+in the range 1..65534 will be attached to the packet.
+The tag acts as an internal marker (it is not sent out over
+the wire) that can be used to identify these packets later on.
+This can be used, for example, to provide trust between interfaces
+and to start doing policy-based filtering.
+A packet can have multiple tags at the same time.
+Tags are "sticky", meaning once a tag is applied to a packet by a
+matching rule it exists until explicit removal.
+Tags are kept with the packet everywhere within the kernel, but are
+lost when packet leaves the kernel, for example, on transmitting
+packet out to the network or sending packet to a
+.Xr divert 4
+socket.
+.Pp
+To check for previously applied tags, use the
+.Cm tagged
+rule option.
+To delete previously applied tag, use the
+.Cm untag
+keyword.
+.Pp
+Note: since tags are kept with the packet everywhere in kernelspace,
+they can be set and unset anywhere in the kernel network subsystem
+(using the
+.Xr mbuf_tags 9
+facility), not only by means of the
+.Xr ipfw 4
+.Cm tag
+and
+.Cm untag
+keywords.
+For example, there can be a specialized
+.Xr netgraph 4
+node doing traffic analyzing and tagging for later inspecting
+in firewall.
+.It Cm untag Ar number
+When a packet matches a rule with the
+.Cm untag
+keyword, the tag with the number
+.Ar number
+is searched among the tags attached to this packet and,
+if found, removed from it.
+Other tags bound to packet, if present, are left untouched.
+.It Cm altq Ar queue
+When a packet matches a rule with the
+.Cm altq
+keyword, the ALTQ identifier for the given
+.Ar queue
+(see
+.Xr altq 4 )
+will be attached.
+Note that this ALTQ tag is only meaningful for packets going "out" of IPFW,
+and not being rejected or going to divert sockets.
+Note that if there is insufficient memory at the time the packet is
+processed, it will not be tagged, so it is wise to make your ALTQ
+"default" queue policy account for this.
+If multiple
+.Cm altq
+rules match a single packet, only the first one adds the ALTQ classification
+tag.
+In doing so, traffic may be shaped by using
+.Cm count Cm altq Ar queue
+rules for classification early in the ruleset, then later applying
+the filtering decision.
+For example,
+.Cm check-state
+and
+.Cm keep-state
+rules may come later and provide the actual filtering decisions in
+addition to the fallback ALTQ tag.
+.Pp
+You must run
+.Xr pfctl 8
+to set up the queues before IPFW will be able to look them up by name,
+and if the ALTQ disciplines are rearranged, the rules in containing the
+queue identifiers in the kernel will likely have gone stale and need
+to be reloaded.
+Stale queue identifiers will probably result in misclassification.
+.Pp
+All system ALTQ processing can be turned on or off via
+.Nm
+.Cm enable Ar altq
+and
+.Nm
+.Cm disable Ar altq .
+The usage of
+.Va net.inet.ip.fw.one_pass
+is irrelevant to ALTQ traffic shaping, as the actual rule action is followed
+always after adding an ALTQ tag.
+.El
+.Ss RULE ACTIONS
+A rule can be associated with one of the following actions, which
+will be executed when the packet matches the body of the rule.
+.Bl -tag -width indent
+.It Cm allow | accept | pass | permit
+Allow packets that match rule.
+The search terminates.
+.It Cm check-state
+Checks the packet against the dynamic ruleset.
+If a match is found, execute the action associated with
+the rule which generated this dynamic rule, otherwise
+move to the next rule.
+.br
+.Cm Check-state
+rules do not have a body.
+If no
+.Cm check-state
+rule is found, the dynamic ruleset is checked at the first
+.Cm keep-state
+or
+.Cm limit
+rule.
+.It Cm count
+Update counters for all packets that match rule.
+The search continues with the next rule.
+.It Cm deny | drop
+Discard packets that match this rule.
+The search terminates.
+.It Cm divert Ar port
+Divert packets that match this rule to the
+.Xr divert 4
+socket bound to port
+.Ar port .
+The search terminates.
+.It Cm fwd | forward Ar ipaddr | tablearg Ns Op , Ns Ar port
+Change the next-hop on matching packets to
+.Ar ipaddr ,
+which can be an IP address or a host name.
+The next hop can also be supplied by the last table
+looked up for the packet by using the
+.Cm tablearg
+keyword instead of an explicit address.
+The search terminates if this rule matches.
+.Pp
+If
+.Ar ipaddr
+is a local address, then matching packets will be forwarded to
+.Ar port
+(or the port number in the packet if one is not specified in the rule)
+on the local machine.
+.br
+If
+.Ar ipaddr
+is not a local address, then the port number
+(if specified) is ignored, and the packet will be
+forwarded to the remote address, using the route as found in
+the local routing table for that IP.
+.br
+A
+.Ar fwd
+rule will not match layer-2 packets (those received
+on ether_input, ether_output, or bridged).
+.br
+The
+.Cm fwd
+action does not change the contents of the packet at all.
+In particular, the destination address remains unmodified, so
+packets forwarded to another system will usually be rejected by that system
+unless there is a matching rule on that system to capture them.
+For packets forwarded locally,
+the local address of the socket will be
+set to the original destination address of the packet.
+This makes the
+.Xr netstat 1
+entry look rather weird but is intended for
+use with transparent proxy servers.
+.Pp
+To enable
+.Cm fwd
+a custom kernel needs to be compiled with the option
+.Cd "options IPFIREWALL_FORWARD" .
+.It Cm nat Ar nat_nr
+Pass packet to a
+nat instance
+(for network address translation, address redirect, etc.):
+see the
+.Sx NETWORK ADDRESS TRANSLATION (NAT)
+Section for further information.
+.It Cm pipe Ar pipe_nr
+Pass packet to a
+.Nm dummynet
+.Dq pipe
+(for bandwidth limitation, delay, etc.).
+See the
+.Sx TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
+Section for further information.
+The search terminates; however, on exit from the pipe and if
+the
+.Xr sysctl 8
+variable
+.Va net.inet.ip.fw.one_pass
+is not set, the packet is passed again to the firewall code
+starting from the next rule.
+.It Cm queue Ar queue_nr
+Pass packet to a
+.Nm dummynet
+.Dq queue
+(for bandwidth limitation using WF2Q+).
+.It Cm reject
+(Deprecated).
+Synonym for
+.Cm unreach host .
+.It Cm reset
+Discard packets that match this rule, and if the
+packet is a TCP packet, try to send a TCP reset (RST) notice.
+The search terminates.
+.It Cm reset6
+Discard packets that match this rule, and if the
+packet is a TCP packet, try to send a TCP reset (RST) notice.
+The search terminates.
+.It Cm skipto Ar number | tablearg
+Skip all subsequent rules numbered less than
+.Ar number .
+The search continues with the first rule numbered
+.Ar number
+or higher.
+It is possible to use the
+.Cm tablearg
+keyword with a skipto for a
+.Em computed
+skipto, but care should be used, as no destination caching
+is possible in this case so the rules are always walked to find it,
+starting from the
+.Cm skipto .
+.It Cm tee Ar port
+Send a copy of packets matching this rule to the
+.Xr divert 4
+socket bound to port
+.Ar port .
+The search continues with the next rule.
+.It Cm unreach Ar code
+Discard packets that match this rule, and try to send an ICMP
+unreachable notice with code
+.Ar code ,
+where
+.Ar code
+is a number from 0 to 255, or one of these aliases:
+.Cm net , host , protocol , port ,
+.Cm needfrag , srcfail , net-unknown , host-unknown ,
+.Cm isolated , net-prohib , host-prohib , tosnet ,
+.Cm toshost , filter-prohib , host-precedence
+or
+.Cm precedence-cutoff .
+The search terminates.
+.It Cm unreach6 Ar code
+Discard packets that match this rule, and try to send an ICMPv6
+unreachable notice with code
+.Ar code ,
+where
+.Ar code
+is a number from 0, 1, 3 or 4, or one of these aliases:
+.Cm no-route, admin-prohib, address
+or
+.Cm port .
+The search terminates.
+.It Cm netgraph Ar cookie
+Divert packet into netgraph with given
+.Ar cookie .
+The search terminates.
+If packet is later returned from netgraph it is either
+accepted or continues with the next rule, depending on
+.Va net.inet.ip.fw.one_pass
+sysctl variable.
+.It Cm ngtee Ar cookie
+A copy of packet is diverted into netgraph, original
+packet is either accepted or continues with the next rule, depending on
+.Va net.inet.ip.fw.one_pass
+sysctl variable.
+See
+.Xr ng_ipfw 4
+for more information on
+.Cm netgraph
+and
+.Cm ngtee
+actions.
+.It Cm setfib Ar fibnum
+The packet is tagged so as to use the FIB (routing table)
+.Ar fibnum
+in any subsequent forwarding decisions.
+Initially this is limited to the values 0 through 15, see
+.Xr setfib 1 .
+Processing continues at the next rule.
+.It Cm reass
+Queue and reassemble ip fragments.
+If the packet is not fragmented, counters are updated and processing continues with the next rule.
+If the packet is the last logical fragment, the packet is reassembled and, if
+.Va net.inet.ip.fw.one_pass
+is set to 0, processing continues with the next rule, else packet is allowed to pass and search terminates.
+If the packet is a fragment in the middle, it is consumed and processing stops immediately.
+.Pp
+Fragments handling can be tuned via
+.Va net.inet.ip.maxfragpackets
+and
+.Va net.inet.ip.maxfragsperpacket
+which limit, respectively, the maximum number of processable fragments (default: 800) and
+the maximum number of fragments per packet (default: 16).
+.Pp
+NOTA BENE: since fragments do not contain port numbers, they should be avoided with the
+.Nm reass
+rule.
+Alternatively, direction-based (like
+.Nm in
+/
+.Nm out
+) and source-based (like
+.Nm via
+) match patterns can be used to select fragments.
+.Pp
+Usually a simple rule like:
+.Bd -literal -offset indent
+# reassemble incoming fragments
+ipfw add reass all from any to any in
+.Ed
+.Pp
+is all you need at the beginning of your ruleset.
+.El
+.Ss RULE BODY
+The body of a rule contains zero or more patterns (such as
+specific source and destination addresses or ports,
+protocol options, incoming or outgoing interfaces, etc.)
+that the packet must match in order to be recognised.
+In general, the patterns are connected by (implicit)
+.Cm and
+operators -- i.e., all must match in order for the
+rule to match.
+Individual patterns can be prefixed by the
+.Cm not
+operator to reverse the result of the match, as in
+.Pp
+.Dl "ipfw add 100 allow ip from not 1.2.3.4 to any"
+.Pp
+Additionally, sets of alternative match patterns
+.Pq Em or-blocks
+can be constructed by putting the patterns in
+lists enclosed between parentheses ( ) or braces { }, and
+using the
+.Cm or
+operator as follows:
+.Pp
+.Dl "ipfw add 100 allow ip from { x or not y or z } to any"
+.Pp
+Only one level of parentheses is allowed.
+Beware that most shells have special meanings for parentheses
+or braces, so it is advisable to put a backslash \\ in front of them
+to prevent such interpretations.
+.Pp
+The body of a rule must in general include a source and destination
+address specifier.
+The keyword
+.Ar any
+can be used in various places to specify that the content of
+a required field is irrelevant.
+.Pp
+The rule body has the following format:
+.Bd -ragged -offset indent
+.Op Ar proto Cm from Ar src Cm to Ar dst
+.Op Ar options
+.Ed
+.Pp
+The first part (proto from src to dst) is for backward
+compatibility with earlier versions of
+.Fx .
+In modern
+.Fx
+any match pattern (including MAC headers, IP protocols,
+addresses and ports) can be specified in the
+.Ar options
+section.
+.Pp
+Rule fields have the following meaning:
+.Bl -tag -width indent
+.It Ar proto : protocol | Cm { Ar protocol Cm or ... }
+.It Ar protocol : Oo Cm not Oc Ar protocol-name | protocol-number
+An IP protocol specified by number or name
+(for a complete list see
+.Pa /etc/protocols ) ,
+or one of the following keywords:
+.Bl -tag -width indent
+.It Cm ip4 | ipv4
+Matches IPv4 packets.
+.It Cm ip6 | ipv6
+Matches IPv6 packets.
+.It Cm ip | all
+Matches any packet.
+.El
+.Pp
+The
+.Cm ipv6
+in
+.Cm proto
+option will be treated as inner protocol.
+And, the
+.Cm ipv4
+is not available in
+.Cm proto
+option.
+.Pp
+The
+.Cm { Ar protocol Cm or ... }
+format (an
+.Em or-block )
+is provided for convenience only but its use is deprecated.
+.It Ar src No and Ar dst : Bro Cm addr | Cm { Ar addr Cm or ... } Brc Op Oo Cm not Oc Ar ports
+An address (or a list, see below)
+optionally followed by
+.Ar ports
+specifiers.
+.Pp
+The second format
+.Em ( or-block
+with multiple addresses) is provided for convenience only and
+its use is discouraged.
+.It Ar addr : Oo Cm not Oc Bro
+.Bl -tag -width indent
+.Cm any | me | me6 |
+.Cm table Ns Pq Ar number Ns Op , Ns Ar value
+.Ar | addr-list | addr-set
+.Brc
+.It Cm any
+matches any IP address.
+.It Cm me
+matches any IP address configured on an interface in the system.
+.It Cm me6
+matches any IPv6 address configured on an interface in the system.
+The address list is evaluated at the time the packet is
+analysed.
+.It Cm table Ns Pq Ar number Ns Op , Ns Ar value
+Matches any IPv4 address for which an entry exists in the lookup table
+.Ar number .
+If an optional 32-bit unsigned
+.Ar value
+is also specified, an entry will match only if it has this value.
+See the
+.Sx LOOKUP TABLES
+section below for more information on lookup tables.
+.El
+.It Ar addr-list : ip-addr Ns Op Ns , Ns Ar addr-list
+.It Ar ip-addr :
+A host or subnet address specified in one of the following ways:
+.Bl -tag -width indent
+.It Ar numeric-ip | hostname
+Matches a single IPv4 address, specified as dotted-quad or a hostname.
+Hostnames are resolved at the time the rule is added to the firewall list.
+.It Ar addr Ns / Ns Ar masklen
+Matches all addresses with base
+.Ar addr
+(specified as an IP address, a network number, or a hostname)
+and mask width of
+.Cm masklen
+bits.
+As an example, 1.2.3.4/25 or 1.2.3.0/25 will match
+all IP numbers from 1.2.3.0 to 1.2.3.127 .
+.It Ar addr Ns : Ns Ar mask
+Matches all addresses with base
+.Ar addr
+(specified as an IP address, a network number, or a hostname)
+and the mask of
+.Ar mask ,
+specified as a dotted quad.
+As an example, 1.2.3.4:255.0.255.0 or 1.0.3.0:255.0.255.0 will match
+1.*.3.*.
+This form is advised only for non-contiguous
+masks.
+It is better to resort to the
+.Ar addr Ns / Ns Ar masklen
+format for contiguous masks, which is more compact and less
+error-prone.
+.El
+.It Ar addr-set : addr Ns Oo Ns / Ns Ar masklen Oc Ns Cm { Ns Ar list Ns Cm }
+.It Ar list : Bro Ar num | num-num Brc Ns Op Ns , Ns Ar list
+Matches all addresses with base address
+.Ar addr
+(specified as an IP address, a network number, or a hostname)
+and whose last byte is in the list between braces { } .
+Note that there must be no spaces between braces and
+numbers (spaces after commas are allowed).
+Elements of the list can be specified as single entries
+or ranges.
+The
+.Ar masklen
+field is used to limit the size of the set of addresses,
+and can have any value between 24 and 32.
+If not specified,
+it will be assumed as 24.
+.br
+This format is particularly useful to handle sparse address sets
+within a single rule.
+Because the matching occurs using a
+bitmask, it takes constant time and dramatically reduces
+the complexity of rulesets.
+.br
+As an example, an address specified as 1.2.3.4/24{128,35-55,89}
+or 1.2.3.0/24{128,35-55,89}
+will match the following IP addresses:
+.br
+1.2.3.128, 1.2.3.35 to 1.2.3.55, 1.2.3.89 .
+.It Ar addr6-list : ip6-addr Ns Op Ns , Ns Ar addr6-list
+.It Ar ip6-addr :
+A host or subnet specified one of the following ways:
+.Pp
+.Bl -tag -width indent
+.It Ar numeric-ip | hostname
+Matches a single IPv6 address as allowed by
+.Xr inet_pton 3
+or a hostname.
+Hostnames are resolved at the time the rule is added to the firewall
+list.
+.It Ar addr Ns / Ns Ar masklen
+Matches all IPv6 addresses with base
+.Ar addr
+(specified as allowed by
+.Xr inet_pton
+or a hostname)
+and mask width of
+.Cm masklen
+bits.
+.El
+.Pp
+No support for sets of IPv6 addresses is provided because IPv6 addresses
+are typically random past the initial prefix.
+.It Ar ports : Bro Ar port | port Ns \&- Ns Ar port Ns Brc Ns Op , Ns Ar ports
+For protocols which support port numbers (such as TCP and UDP), optional
+.Cm ports
+may be specified as one or more ports or port ranges, separated
+by commas but no spaces, and an optional
+.Cm not
+operator.
+The
+.Ql \&-
+notation specifies a range of ports (including boundaries).
+.Pp
+Service names (from
+.Pa /etc/services )
+may be used instead of numeric port values.
+The length of the port list is limited to 30 ports or ranges,
+though one can specify larger ranges by using an
+.Em or-block
+in the
+.Cm options
+section of the rule.
+.Pp
+A backslash
+.Pq Ql \e
+can be used to escape the dash
+.Pq Ql -
+character in a service name (from a shell, the backslash must be
+typed twice to avoid the shell itself interpreting it as an escape
+character).
+.Pp
+.Dl "ipfw add count tcp from any ftp\e\e-data-ftp to any"
+.Pp
+Fragmented packets which have a non-zero offset (i.e., not the first
+fragment) will never match a rule which has one or more port
+specifications.
+See the
+.Cm frag
+option for details on matching fragmented packets.
+.El
+.Ss RULE OPTIONS (MATCH PATTERNS)
+Additional match patterns can be used within
+rules.
+Zero or more of these so-called
+.Em options
+can be present in a rule, optionally prefixed by the
+.Cm not
+operand, and possibly grouped into
+.Em or-blocks .
+.Pp
+The following match patterns can be used (listed in alphabetical order):
+.Bl -tag -width indent
+.It Cm // this is a comment.
+Inserts the specified text as a comment in the rule.
+Everything following // is considered as a comment and stored in the rule.
+You can have comment-only rules, which are listed as having a
+.Cm count
+action followed by the comment.
+.It Cm bridged
+Alias for
+.Cm layer2 .
+.It Cm diverted
+Matches only packets generated by a divert socket.
+.It Cm diverted-loopback
+Matches only packets coming from a divert socket back into the IP stack
+input for delivery.
+.It Cm diverted-output
+Matches only packets going from a divert socket back outward to the IP
+stack output for delivery.
+.It Cm dst-ip Ar ip-address
+Matches IPv4 packets whose destination IP is one of the address(es)
+specified as argument.
+.It Bro Cm dst-ip6 | dst-ipv6 Brc Ar ip6-address
+Matches IPv6 packets whose destination IP is one of the address(es)
+specified as argument.
+.It Cm dst-port Ar ports
+Matches IP packets whose destination port is one of the port(s)
+specified as argument.
+.It Cm established
+Matches TCP packets that have the RST or ACK bits set.
+.It Cm ext6hdr Ar header
+Matches IPv6 packets containing the extended header given by
+.Ar header .
+Supported headers are:
+.Pp
+Fragment,
+.Pq Cm frag ,
+Hop-to-hop options
+.Pq Cm hopopt ,
+any type of Routing Header
+.Pq Cm route ,
+Source routing Routing Header Type 0
+.Pq Cm rthdr0 ,
+Mobile IPv6 Routing Header Type 2
+.Pq Cm rthdr2 ,
+Destination options
+.Pq Cm dstopt ,
+IPSec authentication headers
+.Pq Cm ah ,
+and IPsec encapsulated security payload headers
+.Pq Cm esp .
+.It Cm fib Ar fibnum
+Matches a packet that has been tagged to use
+the given FIB (routing table) number.
+.It Cm flow-id Ar labels
+Matches IPv6 packets containing any of the flow labels given in
+.Ar labels .
+.Ar labels
+is a comma separated list of numeric flow labels.
+.It Cm frag
+Matches packets that are fragments and not the first
+fragment of an IP datagram.
+Note that these packets will not have
+the next protocol header (e.g.\& TCP, UDP) so options that look into
+these headers cannot match.
+.It Cm gid Ar group
+Matches all TCP or UDP packets sent by or received for a
+.Ar group .
+A
+.Ar group
+may be specified by name or number.
+.It Cm jail Ar prisonID
+Matches all TCP or UDP packets sent by or received for the
+jail whos prison ID is
+.Ar prisonID .
+.It Cm icmptypes Ar types
+Matches ICMP packets whose ICMP type is in the list
+.Ar types .
+The list may be specified as any combination of
+individual types (numeric) separated by commas.
+.Em Ranges are not allowed .
+The supported ICMP types are:
+.Pp
+echo reply
+.Pq Cm 0 ,
+destination unreachable
+.Pq Cm 3 ,
+source quench
+.Pq Cm 4 ,
+redirect
+.Pq Cm 5 ,
+echo request
+.Pq Cm 8 ,
+router advertisement
+.Pq Cm 9 ,
+router solicitation
+.Pq Cm 10 ,
+time-to-live exceeded
+.Pq Cm 11 ,
+IP header bad
+.Pq Cm 12 ,
+timestamp request
+.Pq Cm 13 ,
+timestamp reply
+.Pq Cm 14 ,
+information request
+.Pq Cm 15 ,
+information reply
+.Pq Cm 16 ,
+address mask request
+.Pq Cm 17
+and address mask reply
+.Pq Cm 18 .
+.It Cm icmp6types Ar types
+Matches ICMP6 packets whose ICMP6 type is in the list of
+.Ar types .
+The list may be specified as any combination of
+individual types (numeric) separated by commas.
+.Em Ranges are not allowed .
+.It Cm in | out
+Matches incoming or outgoing packets, respectively.
+.Cm in
+and
+.Cm out
+are mutually exclusive (in fact,
+.Cm out
+is implemented as
+.Cm not in Ns No ).
+.It Cm ipid Ar id-list
+Matches IPv4 packets whose
+.Cm ip_id
+field has value included in
+.Ar id-list ,
+which is either a single value or a list of values or ranges
+specified in the same way as
+.Ar ports .
+.It Cm iplen Ar len-list
+Matches IP packets whose total length, including header and data, is
+in the set
+.Ar len-list ,
+which is either a single value or a list of values or ranges
+specified in the same way as
+.Ar ports .
+.It Cm ipoptions Ar spec
+Matches packets whose IPv4 header contains the comma separated list of
+options specified in
+.Ar spec .
+The supported IP options are:
+.Pp
+.Cm ssrr
+(strict source route),
+.Cm lsrr
+(loose source route),
+.Cm rr
+(record packet route) and
+.Cm ts
+(timestamp).
+The absence of a particular option may be denoted
+with a
+.Ql \&! .
+.It Cm ipprecedence Ar precedence
+Matches IPv4 packets whose precedence field is equal to
+.Ar precedence .
+.It Cm ipsec
+Matches packets that have IPSEC history associated with them
+(i.e., the packet comes encapsulated in IPSEC, the kernel
+has IPSEC support and IPSEC_FILTERTUNNEL option, and can correctly
+decapsulate it).
+.Pp
+Note that specifying
+.Cm ipsec
+is different from specifying
+.Cm proto Ar ipsec
+as the latter will only look at the specific IP protocol field,
+irrespective of IPSEC kernel support and the validity of the IPSEC data.
+.Pp
+Further note that this flag is silently ignored in kernels without
+IPSEC support.
+It does not affect rule processing when given and the
+rules are handled as if with no
+.Cm ipsec
+flag.
+.It Cm iptos Ar spec
+Matches IPv4 packets whose
+.Cm tos
+field contains the comma separated list of
+service types specified in
+.Ar spec .
+The supported IP types of service are:
+.Pp
+.Cm lowdelay
+.Pq Dv IPTOS_LOWDELAY ,
+.Cm throughput
+.Pq Dv IPTOS_THROUGHPUT ,
+.Cm reliability
+.Pq Dv IPTOS_RELIABILITY ,
+.Cm mincost
+.Pq Dv IPTOS_MINCOST ,
+.Cm congestion
+.Pq Dv IPTOS_ECN_CE .
+The absence of a particular type may be denoted
+with a
+.Ql \&! .
+.It Cm ipttl Ar ttl-list
+Matches IPv4 packets whose time to live is included in
+.Ar ttl-list ,
+which is either a single value or a list of values or ranges
+specified in the same way as
+.Ar ports .
+.It Cm ipversion Ar ver
+Matches IP packets whose IP version field is
+.Ar ver .
+.It Cm keep-state
+Upon a match, the firewall will create a dynamic rule, whose
+default behaviour is to match bidirectional traffic between
+source and destination IP/port using the same protocol.
+The rule has a limited lifetime (controlled by a set of
+.Xr sysctl 8
+variables), and the lifetime is refreshed every time a matching
+packet is found.
+.It Cm layer2
+Matches only layer2 packets, i.e., those passed to
+.Nm
+from ether_demux() and ether_output_frame().
+.It Cm limit Bro Cm src-addr | src-port | dst-addr | dst-port Brc Ar N
+The firewall will only allow
+.Ar N
+connections with the same
+set of parameters as specified in the rule.
+One or more
+of source and destination addresses and ports can be
+specified.
+Currently,
+only IPv4 flows are supported.
+.It Cm lookup Bro Cm dst-ip | dst-port | src-ip | src-port | uid | jail Brc Ar N
+Search an entry in lookup table
+.Ar N
+that matches the field specified as argument.
+If not found, the match fails.
+Otherwise, the match succeeds and
+.Cm tablearg
+is set to the value extracted from the table.
+.Pp
+This option can be useful to quickly dispatch traffic based on
+certain packet fields.
+See the
+.Sx LOOKUP TABLES
+section below for more information on lookup tables.
+.It Cm { MAC | mac } Ar dst-mac src-mac
+Match packets with a given
+.Ar dst-mac
+and
+.Ar src-mac
+addresses, specified as the
+.Cm any
+keyword (matching any MAC address), or six groups of hex digits
+separated by colons,
+and optionally followed by a mask indicating the significant bits.
+The mask may be specified using either of the following methods:
+.Bl -enum -width indent
+.It
+A slash
+.Pq /
+followed by the number of significant bits.
+For example, an address with 33 significant bits could be specified as:
+.Pp
+.Dl "MAC 10:20:30:40:50:60/33 any"
+.Pp
+.It
+An ampersand
+.Pq &
+followed by a bitmask specified as six groups of hex digits separated
+by colons.
+For example, an address in which the last 16 bits are significant could
+be specified as:
+.Pp
+.Dl "MAC 10:20:30:40:50:60&00:00:00:00:ff:ff any"
+.Pp
+Note that the ampersand character has a special meaning in many shells
+and should generally be escaped.
+.Pp
+.El
+Note that the order of MAC addresses (destination first,
+source second) is
+the same as on the wire, but the opposite of the one used for
+IP addresses.
+.It Cm mac-type Ar mac-type
+Matches packets whose Ethernet Type field
+corresponds to one of those specified as argument.
+.Ar mac-type
+is specified in the same way as
+.Cm port numbers
+(i.e., one or more comma-separated single values or ranges).
+You can use symbolic names for known values such as
+.Em vlan , ipv4, ipv6 .
+Values can be entered as decimal or hexadecimal (if prefixed by 0x),
+and they are always printed as hexadecimal (unless the
+.Cm -N
+option is used, in which case symbolic resolution will be attempted).
+.It Cm proto Ar protocol
+Matches packets with the corresponding IP protocol.
+.It Cm recv | xmit | via Brq Ar ifX | Ar if Ns Cm * | Ar ipno | Ar any
+Matches packets received, transmitted or going through,
+respectively, the interface specified by exact name
+.Ns No ( Ar ifX Ns No ),
+by device name
+.Ns No ( Ar if Ns Ar * Ns No ),
+by IP address, or through some interface.
+.Pp
+The
+.Cm via
+keyword causes the interface to always be checked.
+If
+.Cm recv
+or
+.Cm xmit
+is used instead of
+.Cm via ,
+then only the receive or transmit interface (respectively)
+is checked.
+By specifying both, it is possible to match packets based on
+both receive and transmit interface, e.g.:
+.Pp
+.Dl "ipfw add deny ip from any to any out recv ed0 xmit ed1"
+.Pp
+The
+.Cm recv
+interface can be tested on either incoming or outgoing packets,
+while the
+.Cm xmit
+interface can only be tested on outgoing packets.
+So
+.Cm out
+is required (and
+.Cm in
+is invalid) whenever
+.Cm xmit
+is used.
+.Pp
+A packet might not have a receive or transmit interface: packets
+originating from the local host have no receive interface,
+while packets destined for the local host have no transmit
+interface.
+.It Cm setup
+Matches TCP packets that have the SYN bit set but no ACK bit.
+This is the short form of
+.Dq Li tcpflags\ syn,!ack .
+.It Cm src-ip Ar ip-address
+Matches IPv4 packets whose source IP is one of the address(es)
+specified as an argument.
+.It Cm src-ip6 Ar ip6-address
+Matches IPv6 packets whose source IP is one of the address(es)
+specified as an argument.
+.It Cm src-port Ar ports
+Matches IP packets whose source port is one of the port(s)
+specified as argument.
+.It Cm tagged Ar tag-list
+Matches packets whose tags are included in
+.Ar tag-list ,
+which is either a single value or a list of values or ranges
+specified in the same way as
+.Ar ports .
+Tags can be applied to the packet using
+.Cm tag
+rule action parameter (see it's description for details on tags).
+.It Cm tcpack Ar ack
+TCP packets only.
+Match if the TCP header acknowledgment number field is set to
+.Ar ack .
+.It Cm tcpdatalen Ar tcpdatalen-list
+Matches TCP packets whose length of TCP data is
+.Ar tcpdatalen-list ,
+which is either a single value or a list of values or ranges
+specified in the same way as
+.Ar ports .
+.It Cm tcpflags Ar spec
+TCP packets only.
+Match if the TCP header contains the comma separated list of
+flags specified in
+.Ar spec .
+The supported TCP flags are:
+.Pp
+.Cm fin ,
+.Cm syn ,
+.Cm rst ,
+.Cm psh ,
+.Cm ack
+and
+.Cm urg .
+The absence of a particular flag may be denoted
+with a
+.Ql \&! .
+A rule which contains a
+.Cm tcpflags
+specification can never match a fragmented packet which has
+a non-zero offset.
+See the
+.Cm frag
+option for details on matching fragmented packets.
+.It Cm tcpseq Ar seq
+TCP packets only.
+Match if the TCP header sequence number field is set to
+.Ar seq .
+.It Cm tcpwin Ar win
+TCP packets only.
+Match if the TCP header window field is set to
+.Ar win .
+.It Cm tcpoptions Ar spec
+TCP packets only.
+Match if the TCP header contains the comma separated list of
+options specified in
+.Ar spec .
+The supported TCP options are:
+.Pp
+.Cm mss
+(maximum segment size),
+.Cm window
+(tcp window advertisement),
+.Cm sack
+(selective ack),
+.Cm ts
+(rfc1323 timestamp) and
+.Cm cc
+(rfc1644 t/tcp connection count).
+The absence of a particular option may be denoted
+with a
+.Ql \&! .
+.It Cm uid Ar user
+Match all TCP or UDP packets sent by or received for a
+.Ar user .
+A
+.Ar user
+may be matched by name or identification number.
+.It Cm verrevpath
+For incoming packets,
+a routing table lookup is done on the packet's source address.
+If the interface on which the packet entered the system matches the
+outgoing interface for the route,
+the packet matches.
+If the interfaces do not match up,
+the packet does not match.
+All outgoing packets or packets with no incoming interface match.
+.Pp
+The name and functionality of the option is intentionally similar to
+the Cisco IOS command:
+.Pp
+.Dl ip verify unicast reverse-path
+.Pp
+This option can be used to make anti-spoofing rules to reject all
+packets with source addresses not from this interface.
+See also the option
+.Cm antispoof .
+.It Cm versrcreach
+For incoming packets,
+a routing table lookup is done on the packet's source address.
+If a route to the source address exists, but not the default route
+or a blackhole/reject route, the packet matches.
+Otherwise, the packet does not match.
+All outgoing packets match.
+.Pp
+The name and functionality of the option is intentionally similar to
+the Cisco IOS command:
+.Pp
+.Dl ip verify unicast source reachable-via any
+.Pp
+This option can be used to make anti-spoofing rules to reject all
+packets whose source address is unreachable.
+.It Cm antispoof
+For incoming packets, the packet's source address is checked if it
+belongs to a directly connected network.
+If the network is directly connected, then the interface the packet
+came on in is compared to the interface the network is connected to.
+When incoming interface and directly connected interface are not the
+same, the packet does not match.
+Otherwise, the packet does match.
+All outgoing packets match.
+.Pp
+This option can be used to make anti-spoofing rules to reject all
+packets that pretend to be from a directly connected network but do
+not come in through that interface.
+This option is similar to but more restricted than
+.Cm verrevpath
+because it engages only on packets with source addresses of directly
+connected networks instead of all source addresses.
+.El
+.Sh LOOKUP TABLES
+Lookup tables are useful to handle large sparse sets of
+addresses or other search keys (e.g. ports, jail IDs).
+In the rest of this section we will use the term ``address''
+to mean any unsigned value of up to 32-bit.
+There may be up to 128 different lookup tables, numbered 0 to 127.
+.Pp
+Each entry is represented by an
+.Ar addr Ns Op / Ns Ar masklen
+and will match all addresses with base
+.Ar addr
+(specified as an IP address, a hostname or an unsigned integer)
+and mask width of
+.Ar masklen
+bits.
+If
+.Ar masklen
+is not specified, it defaults to 32.
+When looking up an IP address in a table, the most specific
+entry will match.
+Associated with each entry is a 32-bit unsigned
+.Ar value ,
+which can optionally be checked by a rule matching code.
+When adding an entry, if
+.Ar value
+is not specified, it defaults to 0.
+.Pp
+An entry can be added to a table
+.Pq Cm add ,
+or removed from a table
+.Pq Cm delete .
+A table can be examined
+.Pq Cm list
+or flushed
+.Pq Cm flush .
+.Pp
+Internally, each table is stored in a Radix tree, the same way as
+the routing table (see
+.Xr route 4 ) .
+.Pp
+Lookup tables currently support only ports, jail IDs and IPv4 addresses.
+.Pp
+The
+.Cm tablearg
+feature provides the ability to use a value, looked up in the table, as
+the argument for a rule action, action parameter or rule option.
+This can significantly reduce number of rules in some configurations.
+If two tables are used in a rule, the result of the second (destination)
+is used.
+The
+.Cm tablearg
+argument can be used with the following actions:
+.Cm nat, pipe , queue, divert, tee, netgraph, ngtee, fwd, skipto
+action parameters:
+.Cm tag, untag,
+rule options:
+.Cm limit, tagged.
+.Pp
+When used with
+.Cm fwd
+it is possible to supply table entries with values
+that are in the form of IP addresses or hostnames.
+See the
+.Sx EXAMPLES
+Section for example usage of tables and the tablearg keyword.
+.Pp
+When used with the
+.Cm skipto
+action, the user should be aware that the code will walk the ruleset
+up to a rule equal to, or past, the given number, and should therefore try keep the
+ruleset compact between the skipto and the target rules.
+.Sh SETS OF RULES
+Each rule belongs to one of 32 different
+.Em sets
+, numbered 0 to 31.
+Set 31 is reserved for the default rule.
+.Pp
+By default, rules are put in set 0, unless you use the
+.Cm set N
+attribute when entering a new rule.
+Sets can be individually and atomically enabled or disabled,
+so this mechanism permits an easy way to store multiple configurations
+of the firewall and quickly (and atomically) switch between them.
+The command to enable/disable sets is
+.Bd -ragged -offset indent
+.Nm
+.Cm set Oo Cm disable Ar number ... Oc Op Cm enable Ar number ...
+.Ed
+.Pp
+where multiple
+.Cm enable
+or
+.Cm disable
+sections can be specified.
+Command execution is atomic on all the sets specified in the command.
+By default, all sets are enabled.
+.Pp
+When you disable a set, its rules behave as if they do not exist
+in the firewall configuration, with only one exception:
+.Bd -ragged -offset indent
+dynamic rules created from a rule before it had been disabled
+will still be active until they expire.
+In order to delete
+dynamic rules you have to explicitly delete the parent rule
+which generated them.
+.Ed
+.Pp
+The set number of rules can be changed with the command
+.Bd -ragged -offset indent
+.Nm
+.Cm set move
+.Brq Cm rule Ar rule-number | old-set
+.Cm to Ar new-set
+.Ed
+.Pp
+Also, you can atomically swap two rulesets with the command
+.Bd -ragged -offset indent
+.Nm
+.Cm set swap Ar first-set second-set
+.Ed
+.Pp
+See the
+.Sx EXAMPLES
+Section on some possible uses of sets of rules.
+.Sh STATEFUL FIREWALL
+Stateful operation is a way for the firewall to dynamically
+create rules for specific flows when packets that
+match a given pattern are detected.
+Support for stateful
+operation comes through the
+.Cm check-state , keep-state
+and
+.Cm limit
+options of
+.Nm rules .
+.Pp
+Dynamic rules are created when a packet matches a
+.Cm keep-state
+or
+.Cm limit
+rule, causing the creation of a
+.Em dynamic
+rule which will match all and only packets with
+a given
+.Em protocol
+between a
+.Em src-ip/src-port dst-ip/dst-port
+pair of addresses
+.Em ( src
+and
+.Em dst
+are used here only to denote the initial match addresses, but they
+are completely equivalent afterwards).
+Dynamic rules will be checked at the first
+.Cm check-state, keep-state
+or
+.Cm limit
+occurrence, and the action performed upon a match will be the same
+as in the parent rule.
+.Pp
+Note that no additional attributes other than protocol and IP addresses
+and ports are checked on dynamic rules.
+.Pp
+The typical use of dynamic rules is to keep a closed firewall configuration,
+but let the first TCP SYN packet from the inside network install a
+dynamic rule for the flow so that packets belonging to that session
+will be allowed through the firewall:
+.Pp
+.Dl "ipfw add check-state"
+.Dl "ipfw add allow tcp from my-subnet to any setup keep-state"
+.Dl "ipfw add deny tcp from any to any"
+.Pp
+A similar approach can be used for UDP, where an UDP packet coming
+from the inside will install a dynamic rule to let the response through
+the firewall:
+.Pp
+.Dl "ipfw add check-state"
+.Dl "ipfw add allow udp from my-subnet to any keep-state"
+.Dl "ipfw add deny udp from any to any"
+.Pp
+Dynamic rules expire after some time, which depends on the status
+of the flow and the setting of some
+.Cm sysctl
+variables.
+See Section
+.Sx SYSCTL VARIABLES
+for more details.
+For TCP sessions, dynamic rules can be instructed to periodically
+send keepalive packets to refresh the state of the rule when it is
+about to expire.
+.Pp
+See Section
+.Sx EXAMPLES
+for more examples on how to use dynamic rules.
+.Sh TRAFFIC SHAPER (DUMMYNET) CONFIGURATION
+.Nm
+is also the user interface for the
+.Nm dummynet
+traffic shaper, packet scheduler and network emulator, a subsystem that
+can artificially queue, delay or drop packets
+emulating the behaviour of certain network links
+or queueing systems.
+.Pp
+.Nm dummynet
+operates by first using the firewall to select packets
+using any match pattern that can be used in
+.Nm
+rules.
+Matching packets are then passed to either of two
+different objects, which implement the traffic regulation:
+.Bl -hang -offset XXXX
+.It Em pipe
+A
+.Em pipe
+emulates a
+.Em link
+with given bandwidth and propagation delay,
+driven by a FIFO scheduler and a single queue with programmable
+queue size and packet loss rate.
+Packets are appended to the queue as they come out from
+.Nm ipfw ,
+and then transferred in FIFO order to the link at the desired rate.
+.It Em queue
+A
+.Em queue
+is an abstraction used to implement packet scheduling
+using one of several packet scheduling algorithms.
+Packets sent to a
+.Em queue
+are first grouped into flows according to a mask on the 5-tuple.
+Flows are then passed to the scheduler associated to the
+.Em queue ,
+and each flow uses scheduling parameters (weight and others)
+as configured in the
+.Em queue
+itself.
+A scheduler in turn is connected to an emulated link,
+and arbitrates the link's bandwidth among backlogged flows according to
+weights and to the features of the scheduling algorithm in use.
+.El
+.Pp
+In practice,
+.Em pipes
+can be used to set hard limits to the bandwidth that a flow can use, whereas
+.Em queues
+can be used to determine how different flows share the available bandwidth.
+.Pp
+A graphical representation of the binding of queues,
+flows, schedulers and links is below.
+.Bd -literal -offset indent
+ (flow_mask|sched_mask) sched_mask
+ +---------+ weight Wx +-------------+
+ | |->-[flow]-->--| |-+
+ -->--| QUEUE x | ... | | |
+ | |->-[flow]-->--| SCHEDuler N | |
+ +---------+ | | |
+ ... | +--[LINK N]-->--
+ +---------+ weight Wy | | +--[LINK N]-->--
+ | |->-[flow]-->--| | |
+ -->--| QUEUE y | ... | | |
+ | |->-[flow]-->--| | |
+ +---------+ +-------------+ |
+ +-------------+
+.Ed
+It is important to understand the role of the SCHED_MASK
+and FLOW_MASK, which are configured through the commands
+.Dl "ipfw sched N config mask SCHED_MASK ..."
+and
+.Dl "ipfw queue X config mask FLOW_MASK ..." .
+.Pp
+The SCHED_MASK is used to assign flows to one or more
+scheduler instances, one for each
+value of the packet's 5-fuple after applying SCHED_MASK.
+As an example, using ``src-ip 0xffffff00'' creates one instance
+for each /24 destination subnet.
+.Pp
+The FLOW_MASK, together with the SCHED_MASK, is used to split
+packets into flows. As an example, using
+``src-ip 0x000000ff''
+together with the previous SCHED_MASK makes a flow for
+each individual source address. In turn, flows for each /24
+subnet will be sent to the same scheduler instance.
+.Pp
+The above diagram holds even for the
+.Em pipe
+case, with the only restriction that a
+.Em pipe
+only supports a SCHED_MASK, and forces the use of a FIFO
+scheduler (these are for backward compatibility reasons;
+in fact, internally, a
+.Nm dummynet's
+pipe is implemented exactly as above).
+.Pp
+There are two modes of
+.Nm dummynet
+operation:
+.Dq normal
+and
+.Dq fast .
+The
+.Dq normal
+mode tries to emulate a real link: the
+.Nm dummynet
+scheduler ensures that the packet will not leave the pipe faster than it
+would on the real link with a given bandwidth.
+The
+.Dq fast
+mode allows certain packets to bypass the
+.Nm dummynet
+scheduler (if packet flow does not exceed pipe's bandwidth).
+This is the reason why the
+.Dq fast
+mode requires less CPU cycles per packet (on average) and packet latency
+can be significantly lower in comparison to a real link with the same
+bandwidth.
+The default mode is
+.Dq normal .
+The
+.Dq fast
+mode can be enabled by setting the
+.Va net.inet.ip.dummynet.io_fast
+.Xr sysctl 8
+variable to a non-zero value.
+.Pp
+.Ss PIPE, QUEUE AND SCHEDULER CONFIGURATION
+The
+.Em pipe ,
+.Em queue
+and
+.Em scheduler
+configuration commands are the following:
+.Bd -ragged -offset indent
+.Cm pipe Ar number Cm config Ar pipe-configuration
+.Pp
+.Cm queue Ar number Cm config Ar queue-configuration
+.Pp
+.Cm sched Ar number Cm config Ar sched-configuration
+.Ed
+.Pp
+The following parameters can be configured for a pipe:
+.Pp
+.Bl -tag -width indent -compact
+.It Cm bw Ar bandwidth | device
+Bandwidth, measured in
+.Sm off
+.Op Cm K | M
+.Brq Cm bit/s | Byte/s .
+.Sm on
+.Pp
+A value of 0 (default) means unlimited bandwidth.
+The unit must immediately follow the number, as in
+.Pp
+.Dl "ipfw pipe 1 config bw 300Kbit/s"
+.Pp
+If a device name is specified instead of a numeric value, as in
+.Pp
+.Dl "ipfw pipe 1 config bw tun0"
+.Pp
+then the transmit clock is supplied by the specified device.
+At the moment only the
+.Xr tun 4
+device supports this
+functionality, for use in conjunction with
+.Xr ppp 8 .
+.Pp
+.It Cm delay Ar ms-delay
+Propagation delay, measured in milliseconds.
+The value is rounded to the next multiple of the clock tick
+(typically 10ms, but it is a good practice to run kernels
+with
+.Dq "options HZ=1000"
+to reduce
+the granularity to 1ms or less).
+The default value is 0, meaning no delay.
+.Pp
+.It Cm burst Ar size
+If the data to be sent exceeds the pipe's bandwidth limit
+(and the pipe was previously idle), up to
+.Ar size
+bytes of data are allowed to bypass the
+.Nm dummynet
+scheduler, and will be sent as fast as the physical link allows.
+Any additional data will be transmitted at the rate specified
+by the
+.Nm pipe
+bandwidth.
+The burst size depends on how long the pipe has been idle;
+the effective burst size is calculated as follows:
+MAX(
+.Ar size
+,
+.Nm bw
+* pipe_idle_time).
+.Pp
+.It Cm profile Ar filename
+A file specifying the additional overhead incurred in the transmission
+of a packet on the link.
+.Pp
+Some link types introduce extra delays in the transmission
+of a packet, e.g. because of MAC level framing, contention on
+the use of the channel, MAC level retransmissions and so on.
+From our point of view, the channel is effectively unavailable
+for this extra time, which is constant or variable depending
+on the link type. Additionally, packets may be dropped after this
+time (e.g. on a wireless link after too many retransmissions).
+We can model the additional delay with an empirical curve
+that represents its distribution.
+.Bd -literal -offset indent
+ cumulative probability
+ 1.0 ^
+ |
+ L +-- loss-level x
+ | ******
+ | *
+ | *****
+ | *
+ | **
+ | *
+ +-------*------------------->
+ delay
+.Ed
+The empirical curve may have both vertical and horizontal lines.
+Vertical lines represent constant delay for a range of
+probabilities.
+Horizontal lines correspond to a discontinuity in the delay
+distribution: the pipe will use the largest delay for a
+given probability.
+.Pp
+The file format is the following, with whitespace acting as
+a separator and '#' indicating the beginning a comment:
+.Bl -tag -width indent
+.It Cm name Ar identifier
+optional name (listed by "ipfw pipe show")
+to identify the delay distribution;
+.It Cm bw Ar value
+the bandwidth used for the pipe.
+If not specified here, it must be present
+explicitly as a configuration parameter for the pipe;
+.It Cm loss-level Ar L
+the probability above which packets are lost.
+(0.0 <= L <= 1.0, default 1.0 i.e. no loss);
+.It Cm samples Ar N
+the number of samples used in the internal
+representation of the curve (2..1024; default 100);
+.It Cm "delay prob" | "prob delay"
+One of these two lines is mandatory and defines
+the format of the following lines with data points.
+.It Ar XXX Ar YYY
+2 or more lines representing points in the curve,
+with either delay or probability first, according
+to the chosen format.
+The unit for delay is milliseconds.
+Data points do not need to be sorted.
+Also, tne number of actual lines can be different
+from the value of the "samples" parameter:
+.Nm
+utility will sort and interpolate
+the curve as needed.
+.El
+.Pp
+Example of a profile file:
+.Bd -literal -offset indent
+name bla_bla_bla
+samples 100
+loss-level 0.86
+prob delay
+0 200 # minimum overhead is 200ms
+0.5 200
+0.5 300
+0.8 1000
+0.9 1300
+1 1300
+#configuration file end
+.Ed
+.El
+.Pp
+The following parameters can be configured for a queue:
+.Pp
+.Bl -tag -width indent -compact
+.It Cm pipe Ar pipe_nr
+Connects a queue to the specified pipe.
+Multiple queues (with the same or different weights) can be connected to
+the same pipe, which specifies the aggregate rate for the set of queues.
+.Pp
+.It Cm weight Ar weight
+Specifies the weight to be used for flows matching this queue.
+The weight must be in the range 1..100, and defaults to 1.
+.El
+.Pp
+The following parameters can be configured for a scheduler:
+.Pp
+.Bl -tag -width indent -compact
+.It Cm type Ar {fifo | wf2qp | rr | qfq}
+specifies the scheduling algorithm to use.
+.Bl -tag -width indent -compact
+.It cm fifo
+is just a FIFO scheduler (which means that all packets
+are stored in the same queue as they arrive to the scheduler).
+FIFO has O(1) per-packet time complexity, with very low
+constants (estimate 60-80ns on a 2Ghz desktop machine)
+but gives no service guarantees.
+.It Cm wf2qp
+implements the WF2Q+ algorithm, which is a Weighted Fair Queueing
+algorithm which permits flows to share bandwidth according to
+their weights. Note that weights are not priorities; even a flow
+with a minuscule weight will never starve.
+WF2Q+ has O(log N) per-packet processing cost, where N is the number
+of flows, and is the default algorithm used by previous versions
+dummynet's queues.
+.It Cm rr
+implements the Deficit Round Robin algorithm, which has O(1) processing
+costs (roughly, 100-150ns per packet)
+and permits bandwidth allocation according to weights, but
+with poor service guarantees.
+.It Cm qfq
+implements the QFQ algorithm, which is a very fast variant of
+WF2Q+, with similar service guarantees and O(1) processing
+costs (roughly, 200-250ns per packet).
+.El
+.El
+.Pp
+In addition to the type, all parameters allowed for a pipe can also
+be specified for a scheduler.
+.Pp
+Finally, the following parameters can be configured for both
+pipes and queues:
+.Pp
+.Bl -tag -width XXXX -compact
+.Pp
+.It Cm buckets Ar hash-table-size
+Specifies the size of the hash table used for storing the
+various queues.
+Default value is 64 controlled by the
+.Xr sysctl 8
+variable
+.Va net.inet.ip.dummynet.hash_size ,
+allowed range is 16 to 65536.
+.Pp
+.It Cm mask Ar mask-specifier
+Packets sent to a given pipe or queue by an
+.Nm
+rule can be further classified into multiple flows, each of which is then
+sent to a different
+.Em dynamic
+pipe or queue.
+A flow identifier is constructed by masking the IP addresses,
+ports and protocol types as specified with the
+.Cm mask
+options in the configuration of the pipe or queue.
+For each different flow identifier, a new pipe or queue is created
+with the same parameters as the original object, and matching packets
+are sent to it.
+.Pp
+Thus, when
+.Em dynamic pipes
+are used, each flow will get the same bandwidth as defined by the pipe,
+whereas when
+.Em dynamic queues
+are used, each flow will share the parent's pipe bandwidth evenly
+with other flows generated by the same queue (note that other queues
+with different weights might be connected to the same pipe).
+.br
+Available mask specifiers are a combination of one or more of the following:
+.Pp
+.Cm dst-ip Ar mask ,
+.Cm dst-ip6 Ar mask ,
+.Cm src-ip Ar mask ,
+.Cm src-ip6 Ar mask ,
+.Cm dst-port Ar mask ,
+.Cm src-port Ar mask ,
+.Cm flow-id Ar mask ,
+.Cm proto Ar mask
+or
+.Cm all ,
+.Pp
+where the latter means all bits in all fields are significant.
+.Pp
+.It Cm noerror
+When a packet is dropped by a
+.Nm dummynet
+queue or pipe, the error
+is normally reported to the caller routine in the kernel, in the
+same way as it happens when a device queue fills up.
+Setting this
+option reports the packet as successfully delivered, which can be
+needed for some experimental setups where you want to simulate
+loss or congestion at a remote router.
+.Pp
+.It Cm plr Ar packet-loss-rate
+Packet loss rate.
+Argument
+.Ar packet-loss-rate
+is a floating-point number between 0 and 1, with 0 meaning no
+loss, 1 meaning 100% loss.
+The loss rate is internally represented on 31 bits.
+.Pp
+.It Cm queue Brq Ar slots | size Ns Cm Kbytes
+Queue size, in
+.Ar slots
+or
+.Cm KBytes .
+Default value is 50 slots, which
+is the typical queue size for Ethernet devices.
+Note that for slow speed links you should keep the queue
+size short or your traffic might be affected by a significant
+queueing delay.
+E.g., 50 max-sized ethernet packets (1500 bytes) mean 600Kbit
+or 20s of queue on a 30Kbit/s pipe.
+Even worse effects can result if you get packets from an
+interface with a much larger MTU, e.g.\& the loopback interface
+with its 16KB packets.
+The
+.Xr sysctl 8
+variables
+.Em net.inet.ip.dummynet.pipe_byte_limit
+and
+.Em net.inet.ip.dummynet.pipe_slot_limit
+control the maximum lengths that can be specified.
+.Pp
+.It Cm red | gred Ar w_q Ns / Ns Ar min_th Ns / Ns Ar max_th Ns / Ns Ar max_p
+Make use of the RED (Random Early Detection) queue management algorithm.
+.Ar w_q
+and
+.Ar max_p
+are floating
+point numbers between 0 and 1 (0 not included), while
+.Ar min_th
+and
+.Ar max_th
+are integer numbers specifying thresholds for queue management
+(thresholds are computed in bytes if the queue has been defined
+in bytes, in slots otherwise).
+The
+.Nm dummynet
+also supports the gentle RED variant (gred).
+Three
+.Xr sysctl 8
+variables can be used to control the RED behaviour:
+.Bl -tag -width indent
+.It Va net.inet.ip.dummynet.red_lookup_depth
+specifies the accuracy in computing the average queue
+when the link is idle (defaults to 256, must be greater than zero)
+.It Va net.inet.ip.dummynet.red_avg_pkt_size
+specifies the expected average packet size (defaults to 512, must be
+greater than zero)
+.It Va net.inet.ip.dummynet.red_max_pkt_size
+specifies the expected maximum packet size, only used when queue
+thresholds are in bytes (defaults to 1500, must be greater than zero).
+.El
+.El
+.Pp
+When used with IPv6 data,
+.Nm dummynet
+currently has several limitations.
+Information necessary to route link-local packets to an
+interface is not available after processing by
+.Nm dummynet
+so those packets are dropped in the output path.
+Care should be taken to insure that link-local packets are not passed to
+.Nm dummynet .
+.Sh CHECKLIST
+Here are some important points to consider when designing your
+rules:
+.Bl -bullet
+.It
+Remember that you filter both packets going
+.Cm in
+and
+.Cm out .
+Most connections need packets going in both directions.
+.It
+Remember to test very carefully.
+It is a good idea to be near the console when doing this.
+If you cannot be near the console,
+use an auto-recovery script such as the one in
+.Pa /usr/share/examples/ipfw/change_rules.sh .
+.It
+Do not forget the loopback interface.
+.El
+.Sh FINE POINTS
+.Bl -bullet
+.It
+There are circumstances where fragmented datagrams are unconditionally
+dropped.
+TCP packets are dropped if they do not contain at least 20 bytes of
+TCP header, UDP packets are dropped if they do not contain a full 8
+byte UDP header, and ICMP packets are dropped if they do not contain
+4 bytes of ICMP header, enough to specify the ICMP type, code, and
+checksum.
+These packets are simply logged as
+.Dq pullup failed
+since there may not be enough good data in the packet to produce a
+meaningful log entry.
+.It
+Another type of packet is unconditionally dropped, a TCP packet with a
+fragment offset of one.
+This is a valid packet, but it only has one use, to try
+to circumvent firewalls.
+When logging is enabled, these packets are
+reported as being dropped by rule -1.
+.It
+If you are logged in over a network, loading the
+.Xr kld 4
+version of
+.Nm
+is probably not as straightforward as you would think.
+The following command line is recommended:
+.Bd -literal -offset indent
+kldload ipfw && \e
+ipfw add 32000 allow ip from any to any
+.Ed
+.Pp
+Along the same lines, doing an
+.Bd -literal -offset indent
+ipfw flush
+.Ed
+.Pp
+in similar surroundings is also a bad idea.
+.It
+The
+.Nm
+filter list may not be modified if the system security level
+is set to 3 or higher
+(see
+.Xr init 8
+for information on system security levels).
+.El
+.Sh PACKET DIVERSION
+A
+.Xr divert 4
+socket bound to the specified port will receive all packets
+diverted to that port.
+If no socket is bound to the destination port, or if the divert module is
+not loaded, or if the kernel was not compiled with divert socket support,
+the packets are dropped.
+.Sh NETWORK ADDRESS TRANSLATION (NAT)
+.Pp
+.Nm
+support in-kernel NAT using the kernel version of
+.Xr libalias 3 .
+.Pp
+The nat configuration command is the following:
+.Bd -ragged -offset indent
+.Bk -words
+.Cm nat
+.Ar nat_number
+.Cm config
+.Ar nat-configuration
+.Ek
+.Ed
+.Pp
+The following parameters can be configured:
+.Bl -tag -width indent
+.It Cm ip Ar ip_address
+Define an ip address to use for aliasing.
+.It Cm if Ar nic
+Use ip address of NIC for aliasing, dynamically changing
+it if NIC's ip address changes.
+.It Cm log
+Enable logging on this nat instance.
+.It Cm deny_in
+Deny any incoming connection from outside world.
+.It Cm same_ports
+Try to leave the alias port numbers unchanged from
+the actual local port numbers.
+.It Cm unreg_only
+Traffic on the local network not originating from an
+unregistered address spaces will be ignored.
+.It Cm reset
+Reset table of the packet aliasing engine on address change.
+.It Cm reverse
+Reverse the way libalias handles aliasing.
+.It Cm proxy_only
+Obey transparent proxy rules only, packet aliasing is not performed.
+.El
+.Pp
+To let the packet continue after being (de)aliased, set the sysctl variable
+.Va net.inet.ip.fw.one_pass
+to 0.
+For more information about aliasing modes, refer to
+.Xr libalias 3 .
+See Section
+.Sx EXAMPLES
+for some examples about nat usage.
+.Ss REDIRECT AND LSNAT SUPPORT IN IPFW
+Redirect and LSNAT support follow closely the syntax used in
+.Xr natd 8 .
+See Section
+.Sx EXAMPLES
+for some examples on how to do redirect and lsnat.
+.Ss SCTP NAT SUPPORT
+SCTP nat can be configured in a similar manner to TCP through the
+.Nm
+command line tool.
+The main difference is that
+.Nm sctp nat
+does not do port translation.
+Since the local and global side ports will be the same,
+there is no need to specify both.
+Ports are redirected as follows:
+.Bd -ragged -offset indent
+.Bk -words
+.Cm nat
+.Ar nat_number
+.Cm config if
+.Ar nic
+.Cm redirect_port sctp
+.Ar ip_address [,addr_list] {[port | port-port] [,ports]}
+.Ek
+.Ed
+.Pp
+Most
+.Nm sctp nat
+configuration can be done in real-time through the
+.Xr sysctl 8
+interface.
+All may be changed dynamically, though the hash_table size will only
+change for new
+.Nm nat
+instances.
+See
+.Sx SYSCTL VARIABLES
+for more info.
+.Sh SYSCTL VARIABLES
+A set of
+.Xr sysctl 8
+variables controls the behaviour of the firewall and
+associated modules
+.Pq Nm dummynet , bridge , sctp nat .
+These are shown below together with their default value
+(but always check with the
+.Xr sysctl 8
+command what value is actually in use) and meaning:
+.Bl -tag -width indent
+.It Va net.inet.ip.alias.sctp.accept_global_ootb_addip: No 0
+Defines how the
+.Nm nat
+responds to receipt of global OOTB ASCONF-AddIP:
+.Bl -tag -width indent
+.It Cm 0
+No response (unless a partially matching association exists -
+ports and vtags match but global address does not)
+.It Cm 1
+.Nm nat
+will accept and process all OOTB global AddIP messages.
+.El
+.Pp
+Option 1 should never be selected as this forms a security risk.
+An attacker can
+establish multiple fake associations by sending AddIP messages.
+.It Va net.inet.ip.alias.sctp.chunk_proc_limit: No 5
+Defines the maximum number of chunks in an SCTP packet that will be parsed for a
+packet that matches an existing association.
+This value is enforced to be greater or equal than
+.Cm net.inet.ip.alias.sctp.initialising_chunk_proc_limit .
+A high value is
+a DoS risk yet setting too low a value may result in important control chunks in
+the packet not being located and parsed.
+.It Va net.inet.ip.alias.sctp.error_on_ootb: No 1
+Defines when the
+.Nm nat
+responds to any Out-of-the-Blue (OOTB) packets with ErrorM packets.
+An OOTB packet is a packet that arrives with no existing association
+registered in the
+.Nm nat
+and is not an INIT or ASCONF-AddIP packet:
+.Bl -tag -width indent
+.It Cm 0
+ErrorM is never sent in response to OOTB packets.
+.It Cm 1
+ErrorM is only sent to OOTB packets received on the local side.
+.It Cm 2
+ErrorM is sent to the local side and on the global side ONLY if there is a
+partial match (ports and vtags match but the source global IP does not).
+This value is only useful if the
+.Nm nat
+is tracking global IP addresses.
+.It Cm 3
+ErrorM is sent in response to all OOTB packets on both the local and global side
+(DoS risk).
+.El
+.Pp
+At the moment the default is 0, since the ErrorM packet is not yet
+supported by most SCTP stacks.
+When it is supported, and if not tracking
+global addresses, we recommend setting this value to 1 to allow
+multi-homed local hosts to function with the
+.Nm nat .
+To track global addresses, we recommend setting this value to 2 to
+allow global hosts to be informed when they need to (re)send an
+ASCONF-AddIP.
+Value 3 should never be chosen (except for debugging) as the
+.Nm nat
+will respond to all OOTB global packets (a DoS risk).
+.It Va net.inet.ip.alias.sctp.hashtable_size: No 2003
+Size of hash tables used for
+.Nm nat
+lookups (100 < prime_number > 1000001).
+This value sets the
+.Nm hash table
+size for any future created
+.Nm nat
+instance and therefore must be set prior to creating a
+.Nm nat
+instance.
+The table sizes may be changed to suit specific needs.
+If there will be few
+concurrent associations, and memory is scarce, you may make these smaller.
+If there will be many thousands (or millions) of concurrent associations, you
+should make these larger.
+A prime number is best for the table size.
+The sysctl
+update function will adjust your input value to the next highest prime number.
+.It Va net.inet.ip.alias.sctp.holddown_time: No 0
+Hold association in table for this many seconds after receiving a
+SHUTDOWN-COMPLETE.
+This allows endpoints to correct shutdown gracefully if a
+shutdown_complete is lost and retransmissions are required.
+.It Va net.inet.ip.alias.sctp.init_timer: No 15
+Timeout value while waiting for (INIT-ACK|AddIP-ACK).
+This value cannot be 0.
+.It Va net.inet.ip.alias.sctp.initialising_chunk_proc_limit: No 2
+Defines the maximum number of chunks in an SCTP packet that will be parsed when
+no existing association exists that matches that packet.
+Ideally this packet
+will only be an INIT or ASCONF-AddIP packet.
+A higher value may become a DoS
+risk as malformed packets can consume processing resources.
+.It Va net.inet.ip.alias.sctp.param_proc_limit: No 25
+Defines the maximum number of parameters within a chunk that will be parsed in a
+packet.
+As for other similar sysctl variables, larger values pose a DoS risk.
+.It Va net.inet.ip.alias.sctp.log_level: No 0
+Level of detail in the system log messages (0 \- minimal, 1 \- event,
+2 \- info, 3 \- detail, 4 \- debug, 5 \- max debug). May be a good
+option in high loss environments.
+.It Va net.inet.ip.alias.sctp.shutdown_time: No 15
+Timeout value while waiting for SHUTDOWN-COMPLETE.
+This value cannot be 0.
+.It Va net.inet.ip.alias.sctp.track_global_addresses: No 0
+Enables/disables global IP address tracking within the
+.Nm nat
+and places an
+upper limit on the number of addresses tracked for each association:
+.Bl -tag -width indent
+.It Cm 0
+Global tracking is disabled
+.It Cm >1
+Enables tracking, the maximum number of addresses tracked for each
+association is limited to this value
+.El
+.Pp
+This variable is fully dynamic, the new value will be adopted for all newly
+arriving associations, existing associations are treated as they were previously.
+Global tracking will decrease the number of collisions within the
+.Nm nat
+at a cost
+of increased processing load, memory usage, complexity, and possible
+.Nm nat
+state
+problems in complex networks with multiple
+.Nm nats .
+We recommend not tracking
+global IP addresses, this will still result in a fully functional
+.Nm nat .
+.It Va net.inet.ip.alias.sctp.up_timer: No 300
+Timeout value to keep an association up with no traffic.
+This value cannot be 0.
+.It Va net.inet.ip.dummynet.expire : No 1
+Lazily delete dynamic pipes/queue once they have no pending traffic.
+You can disable this by setting the variable to 0, in which case
+the pipes/queues will only be deleted when the threshold is reached.
+.It Va net.inet.ip.dummynet.hash_size : No 64
+Default size of the hash table used for dynamic pipes/queues.
+This value is used when no
+.Cm buckets
+option is specified when configuring a pipe/queue.
+.It Va net.inet.ip.dummynet.io_fast : No 0
+If set to a non-zero value,
+the
+.Dq fast
+mode of
+.Nm dummynet
+operation (see above) is enabled.
+.It Va net.inet.ip.dummynet.io_pkt
+Number of packets passed to
+.Nm dummynet .
+.It Va net.inet.ip.dummynet.io_pkt_drop
+Number of packets dropped by
+.Nm dummynet .
+.It Va net.inet.ip.dummynet.io_pkt_fast
+Number of packets bypassed by the
+.Nm dummynet
+scheduler.
+.It Va net.inet.ip.dummynet.max_chain_len : No 16
+Target value for the maximum number of pipes/queues in a hash bucket.
+The product
+.Cm max_chain_len*hash_size
+is used to determine the threshold over which empty pipes/queues
+will be expired even when
+.Cm net.inet.ip.dummynet.expire=0 .
+.It Va net.inet.ip.dummynet.red_lookup_depth : No 256
+.It Va net.inet.ip.dummynet.red_avg_pkt_size : No 512
+.It Va net.inet.ip.dummynet.red_max_pkt_size : No 1500
+Parameters used in the computations of the drop probability
+for the RED algorithm.
+.It Va net.inet.ip.dummynet.pipe_byte_limit : No 1048576
+.It Va net.inet.ip.dummynet.pipe_slot_limit : No 100
+The maximum queue size that can be specified in bytes or packets.
+These limits prevent accidental exhaustion of resources such as mbufs.
+If you raise these limits,
+you should make sure the system is configured so that sufficient resources
+are available.
+.It Va net.inet.ip.fw.autoinc_step : No 100
+Delta between rule numbers when auto-generating them.
+The value must be in the range 1..1000.
+.It Va net.inet.ip.fw.curr_dyn_buckets : Va net.inet.ip.fw.dyn_buckets
+The current number of buckets in the hash table for dynamic rules
+(readonly).
+.It Va net.inet.ip.fw.debug : No 1
+Controls debugging messages produced by
+.Nm .
+.It Va net.inet.ip.fw.default_rule : No 65535
+The default rule number (read-only).
+By the design of
+.Nm , the default rule is the last one, so its number
+can also serve as the highest number allowed for a rule.
+.It Va net.inet.ip.fw.dyn_buckets : No 256
+The number of buckets in the hash table for dynamic rules.
+Must be a power of 2, up to 65536.
+It only takes effect when all dynamic rules have expired, so you
+are advised to use a
+.Cm flush
+command to make sure that the hash table is resized.
+.It Va net.inet.ip.fw.dyn_count : No 3
+Current number of dynamic rules
+(read-only).
+.It Va net.inet.ip.fw.dyn_keepalive : No 1
+Enables generation of keepalive packets for
+.Cm keep-state
+rules on TCP sessions.
+A keepalive is generated to both
+sides of the connection every 5 seconds for the last 20
+seconds of the lifetime of the rule.
+.It Va net.inet.ip.fw.dyn_max : No 8192
+Maximum number of dynamic rules.
+When you hit this limit, no more dynamic rules can be
+installed until old ones expire.
+.It Va net.inet.ip.fw.dyn_ack_lifetime : No 300
+.It Va net.inet.ip.fw.dyn_syn_lifetime : No 20
+.It Va net.inet.ip.fw.dyn_fin_lifetime : No 1
+.It Va net.inet.ip.fw.dyn_rst_lifetime : No 1
+.It Va net.inet.ip.fw.dyn_udp_lifetime : No 5
+.It Va net.inet.ip.fw.dyn_short_lifetime : No 30
+These variables control the lifetime, in seconds, of dynamic
+rules.
+Upon the initial SYN exchange the lifetime is kept short,
+then increased after both SYN have been seen, then decreased
+again during the final FIN exchange or when a RST is received.
+Both
+.Em dyn_fin_lifetime
+and
+.Em dyn_rst_lifetime
+must be strictly lower than 5 seconds, the period of
+repetition of keepalives.
+The firewall enforces that.
+.It Va net.inet.ip.fw.enable : No 1
+Enables the firewall.
+Setting this variable to 0 lets you run your machine without
+firewall even if compiled in.
+.It Va net.inet6.ip6.fw.enable : No 1
+provides the same functionality as above for the IPv6 case.
+.It Va net.inet.ip.fw.one_pass : No 1
+When set, the packet exiting from the
+.Nm dummynet
+pipe or from
+.Xr ng_ipfw 4
+node is not passed though the firewall again.
+Otherwise, after an action, the packet is
+reinjected into the firewall at the next rule.
+.It Va net.inet.ip.fw.tables_max : No 128
+Maximum number of tables (read-only).
+.It Va net.inet.ip.fw.verbose : No 1
+Enables verbose messages.
+.It Va net.inet.ip.fw.verbose_limit : No 0
+Limits the number of messages produced by a verbose firewall.
+.It Va net.inet6.ip6.fw.deny_unknown_exthdrs : No 1
+If enabled packets with unknown IPv6 Extension Headers will be denied.
+.It Va net.link.ether.ipfw : No 0
+Controls whether layer-2 packets are passed to
+.Nm .
+Default is no.
+.It Va net.link.bridge.ipfw : No 0
+Controls whether bridged packets are passed to
+.Nm .
+Default is no.
+.El
+.Pp
+.Sh EXAMPLES
+There are far too many possible uses of
+.Nm
+so this Section will only give a small set of examples.
+.Pp
+.Ss BASIC PACKET FILTERING
+This command adds an entry which denies all tcp packets from
+.Em cracker.evil.org
+to the telnet port of
+.Em wolf.tambov.su
+from being forwarded by the host:
+.Pp
+.Dl "ipfw add deny tcp from cracker.evil.org to wolf.tambov.su telnet"
+.Pp
+This one disallows any connection from the entire cracker's
+network to my host:
+.Pp
+.Dl "ipfw add deny ip from 123.45.67.0/24 to my.host.org"
+.Pp
+A first and efficient way to limit access (not using dynamic rules)
+is the use of the following rules:
+.Pp
+.Dl "ipfw add allow tcp from any to any established"
+.Dl "ipfw add allow tcp from net1 portlist1 to net2 portlist2 setup"
+.Dl "ipfw add allow tcp from net3 portlist3 to net3 portlist3 setup"
+.Dl "..."
+.Dl "ipfw add deny tcp from any to any"
+.Pp
+The first rule will be a quick match for normal TCP packets,
+but it will not match the initial SYN packet, which will be
+matched by the
+.Cm setup
+rules only for selected source/destination pairs.
+All other SYN packets will be rejected by the final
+.Cm deny
+rule.
+.Pp
+If you administer one or more subnets, you can take advantage
+of the address sets and or-blocks and write extremely
+compact rulesets which selectively enable services to blocks
+of clients, as below:
+.Pp
+.Dl "goodguys=\*q{ 10.1.2.0/24{20,35,66,18} or 10.2.3.0/28{6,3,11} }\*q"
+.Dl "badguys=\*q10.1.2.0/24{8,38,60}\*q"
+.Dl ""
+.Dl "ipfw add allow ip from ${goodguys} to any"
+.Dl "ipfw add deny ip from ${badguys} to any"
+.Dl "... normal policies ..."
+.Pp
+The
+.Cm verrevpath
+option could be used to do automated anti-spoofing by adding the
+following to the top of a ruleset:
+.Pp
+.Dl "ipfw add deny ip from any to any not verrevpath in"
+.Pp
+This rule drops all incoming packets that appear to be coming to the
+system on the wrong interface.
+For example, a packet with a source
+address belonging to a host on a protected internal network would be
+dropped if it tried to enter the system from an external interface.
+.Pp
+The
+.Cm antispoof
+option could be used to do similar but more restricted anti-spoofing
+by adding the following to the top of a ruleset:
+.Pp
+.Dl "ipfw add deny ip from any to any not antispoof in"
+.Pp
+This rule drops all incoming packets that appear to be coming from another
+directly connected system but on the wrong interface.
+For example, a packet with a source address of
+.Li 192.168.0.0/24 ,
+configured on
+.Li fxp0 ,
+but coming in on
+.Li fxp1
+would be dropped.
+.Ss DYNAMIC RULES
+In order to protect a site from flood attacks involving fake
+TCP packets, it is safer to use dynamic rules:
+.Pp
+.Dl "ipfw add check-state"
+.Dl "ipfw add deny tcp from any to any established"
+.Dl "ipfw add allow tcp from my-net to any setup keep-state"
+.Pp
+This will let the firewall install dynamic rules only for
+those connection which start with a regular SYN packet coming
+from the inside of our network.
+Dynamic rules are checked when encountering the first
+.Cm check-state
+or
+.Cm keep-state
+rule.
+A
+.Cm check-state
+rule should usually be placed near the beginning of the
+ruleset to minimize the amount of work scanning the ruleset.
+Your mileage may vary.
+.Pp
+To limit the number of connections a user can open
+you can use the following type of rules:
+.Pp
+.Dl "ipfw add allow tcp from my-net/24 to any setup limit src-addr 10"
+.Dl "ipfw add allow tcp from any to me setup limit src-addr 4"
+.Pp
+The former (assuming it runs on a gateway) will allow each host
+on a /24 network to open at most 10 TCP connections.
+The latter can be placed on a server to make sure that a single
+client does not use more than 4 simultaneous connections.
+.Pp
+.Em BEWARE :
+stateful rules can be subject to denial-of-service attacks
+by a SYN-flood which opens a huge number of dynamic rules.
+The effects of such attacks can be partially limited by
+acting on a set of
+.Xr sysctl 8
+variables which control the operation of the firewall.
+.Pp
+Here is a good usage of the
+.Cm list
+command to see accounting records and timestamp information:
+.Pp
+.Dl ipfw -at list
+.Pp
+or in short form without timestamps:
+.Pp
+.Dl ipfw -a list
+.Pp
+which is equivalent to:
+.Pp
+.Dl ipfw show
+.Pp
+Next rule diverts all incoming packets from 192.168.2.0/24
+to divert port 5000:
+.Pp
+.Dl ipfw divert 5000 ip from 192.168.2.0/24 to any in
+.Pp
+.Ss TRAFFIC SHAPING
+The following rules show some of the applications of
+.Nm
+and
+.Nm dummynet
+for simulations and the like.
+.Pp
+This rule drops random incoming packets with a probability
+of 5%:
+.Pp
+.Dl "ipfw add prob 0.05 deny ip from any to any in"
+.Pp
+A similar effect can be achieved making use of
+.Nm dummynet
+pipes:
+.Pp
+.Dl "ipfw add pipe 10 ip from any to any"
+.Dl "ipfw pipe 10 config plr 0.05"
+.Pp
+We can use pipes to artificially limit bandwidth, e.g.\& on a
+machine acting as a router, if we want to limit traffic from
+local clients on 192.168.2.0/24 we do:
+.Pp
+.Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
+.Dl "ipfw pipe 1 config bw 300Kbit/s queue 50KBytes"
+.Pp
+note that we use the
+.Cm out
+modifier so that the rule is not used twice.
+Remember in fact that
+.Nm
+rules are checked both on incoming and outgoing packets.
+.Pp
+Should we want to simulate a bidirectional link with bandwidth
+limitations, the correct way is the following:
+.Pp
+.Dl "ipfw add pipe 1 ip from any to any out"
+.Dl "ipfw add pipe 2 ip from any to any in"
+.Dl "ipfw pipe 1 config bw 64Kbit/s queue 10Kbytes"
+.Dl "ipfw pipe 2 config bw 64Kbit/s queue 10Kbytes"
+.Pp
+The above can be very useful, e.g.\& if you want to see how
+your fancy Web page will look for a residential user who
+is connected only through a slow link.
+You should not use only one pipe for both directions, unless
+you want to simulate a half-duplex medium (e.g.\& AppleTalk,
+Ethernet, IRDA).
+It is not necessary that both pipes have the same configuration,
+so we can also simulate asymmetric links.
+.Pp
+Should we want to verify network performance with the RED queue
+management algorithm:
+.Pp
+.Dl "ipfw add pipe 1 ip from any to any"
+.Dl "ipfw pipe 1 config bw 500Kbit/s queue 100 red 0.002/30/80/0.1"
+.Pp
+Another typical application of the traffic shaper is to
+introduce some delay in the communication.
+This can significantly affect applications which do a lot of Remote
+Procedure Calls, and where the round-trip-time of the
+connection often becomes a limiting factor much more than
+bandwidth:
+.Pp
+.Dl "ipfw add pipe 1 ip from any to any out"
+.Dl "ipfw add pipe 2 ip from any to any in"
+.Dl "ipfw pipe 1 config delay 250ms bw 1Mbit/s"
+.Dl "ipfw pipe 2 config delay 250ms bw 1Mbit/s"
+.Pp
+Per-flow queueing can be useful for a variety of purposes.
+A very simple one is counting traffic:
+.Pp
+.Dl "ipfw add pipe 1 tcp from any to any"
+.Dl "ipfw add pipe 1 udp from any to any"
+.Dl "ipfw add pipe 1 ip from any to any"
+.Dl "ipfw pipe 1 config mask all"
+.Pp
+The above set of rules will create queues (and collect
+statistics) for all traffic.
+Because the pipes have no limitations, the only effect is
+collecting statistics.
+Note that we need 3 rules, not just the last one, because
+when
+.Nm
+tries to match IP packets it will not consider ports, so we
+would not see connections on separate ports as different
+ones.
+.Pp
+A more sophisticated example is limiting the outbound traffic
+on a net with per-host limits, rather than per-network limits:
+.Pp
+.Dl "ipfw add pipe 1 ip from 192.168.2.0/24 to any out"
+.Dl "ipfw add pipe 2 ip from any to 192.168.2.0/24 in"
+.Dl "ipfw pipe 1 config mask src-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
+.Dl "ipfw pipe 2 config mask dst-ip 0x000000ff bw 200Kbit/s queue 20Kbytes"
+.Ss LOOKUP TABLES
+In the following example, we need to create several traffic bandwidth
+classes and we need different hosts/networks to fall into different classes.
+We create one pipe for each class and configure them accordingly.
+Then we create a single table and fill it with IP subnets and addresses.
+For each subnet/host we set the argument equal to the number of the pipe
+that it should use.
+Then we classify traffic using a single rule:
+.Pp
+.Dl "ipfw pipe 1 config bw 1000Kbyte/s"
+.Dl "ipfw pipe 4 config bw 4000Kbyte/s"
+.Dl "..."
+.Dl "ipfw table 1 add 192.168.2.0/24 1"
+.Dl "ipfw table 1 add 192.168.0.0/27 4"
+.Dl "ipfw table 1 add 192.168.0.2 1"
+.Dl "..."
+.Dl "ipfw add pipe tablearg ip from table(1) to any"
+.Pp
+Using the
+.Cm fwd
+action, the table entries may include hostnames and IP addresses.
+.Pp
+.Dl "ipfw table 1 add 192.168.2.0/24 10.23.2.1"
+.Dl "ipfw table 1 add 192.168.0.0/27 router1.dmz"
+.Dl "..."
+.Dl "ipfw add 100 fwd tablearg ip from any to table(1)"
+.Ss SETS OF RULES
+To add a set of rules atomically, e.g.\& set 18:
+.Pp
+.Dl "ipfw set disable 18"
+.Dl "ipfw add NN set 18 ... # repeat as needed"
+.Dl "ipfw set enable 18"
+.Pp
+To delete a set of rules atomically the command is simply:
+.Pp
+.Dl "ipfw delete set 18"
+.Pp
+To test a ruleset and disable it and regain control if something goes wrong:
+.Pp
+.Dl "ipfw set disable 18"
+.Dl "ipfw add NN set 18 ... # repeat as needed"
+.Dl "ipfw set enable 18; echo done; sleep 30 && ipfw set disable 18"
+.Pp
+Here if everything goes well, you press control-C before the "sleep"
+terminates, and your ruleset will be left active.
+Otherwise, e.g.\& if
+you cannot access your box, the ruleset will be disabled after
+the sleep terminates thus restoring the previous situation.
+.Pp
+To show rules of the specific set:
+.Pp
+.Dl "ipfw set 18 show"
+.Pp
+To show rules of the disabled set:
+.Pp
+.Dl "ipfw -S set 18 show"
+.Pp
+To clear a specific rule counters of the specific set:
+.Pp
+.Dl "ipfw set 18 zero NN"
+.Pp
+To delete a specific rule of the specific set:
+.Pp
+.Dl "ipfw set 18 delete NN"
+.Ss NAT, REDIRECT AND LSNAT
+First redirect all the traffic to nat instance 123:
+.Pp
+.Dl "ipfw add nat 123 all from any to any"
+.Pp
+Then to configure nat instance 123 to alias all the outgoing traffic with ip
+192.168.0.123, blocking all incoming connections, trying to keep
+same ports on both sides, clearing aliasing table on address change
+and keeping a log of traffic/link statistics:
+.Pp
+.Dl "ipfw nat 123 config ip 192.168.0.123 log deny_in reset same_ports"
+.Pp
+Or to change address of instance 123, aliasing table will be cleared (see
+reset option):
+.Pp
+.Dl "ipfw nat 123 config ip 10.0.0.1"
+.Pp
+To see configuration of nat instance 123:
+.Pp
+.Dl "ipfw nat 123 show config"
+.Pp
+To show logs of all the instances in range 111-999:
+.Pp
+.Dl "ipfw nat 111-999 show"
+.Pp
+To see configurations of all instances:
+.Pp
+.Dl "ipfw nat show config"
+.Pp
+Or a redirect rule with mixed modes could looks like:
+.Pp
+.Dl "ipfw nat 123 config redirect_addr 10.0.0.1 10.0.0.66"
+.Dl " redirect_port tcp 192.168.0.1:80 500"
+.Dl " redirect_proto udp 192.168.1.43 192.168.1.1"
+.Dl " redirect_addr 192.168.0.10,192.168.0.11"
+.Dl " 10.0.0.100 # LSNAT"
+.Dl " redirect_port tcp 192.168.0.1:80,192.168.0.10:22"
+.Dl " 500 # LSNAT"
+.Pp
+or it could be split in:
+.Pp
+.Dl "ipfw nat 1 config redirect_addr 10.0.0.1 10.0.0.66"
+.Dl "ipfw nat 2 config redirect_port tcp 192.168.0.1:80 500"
+.Dl "ipfw nat 3 config redirect_proto udp 192.168.1.43 192.168.1.1"
+.Dl "ipfw nat 4 config redirect_addr 192.168.0.10,192.168.0.11,192.168.0.12"
+.Dl " 10.0.0.100"
+.Dl "ipfw nat 5 config redirect_port tcp"
+.Dl " 192.168.0.1:80,192.168.0.10:22,192.168.0.20:25 500"
+.Pp
+.Sh SEE ALSO
+.Xr cpp 1 ,
+.Xr m4 1 ,
+.Xr altq 4 ,
+.Xr divert 4 ,
+.Xr dummynet 4 ,
+.Xr if_bridge 4 ,
+.Xr ip 4 ,
+.Xr ipfirewall 4 ,
+.Xr ng_ipfw 4 ,
+.Xr protocols 5 ,
+.Xr services 5 ,
+.Xr init 8 ,
+.Xr kldload 8 ,
+.Xr reboot 8 ,
+.Xr sysctl 8 ,
+.Xr syslogd 8
+.Sh HISTORY
+The
+.Nm
+utility first appeared in
+.Fx 2.0 .
+.Nm dummynet
+was introduced in
+.Fx 2.2.8 .
+Stateful extensions were introduced in
+.Fx 4.0 .
+.Nm ipfw2
+was introduced in Summer 2002.
+.Sh AUTHORS
+.An Ugen J. S. Antsilevich ,
+.An Poul-Henning Kamp ,
+.An Alex Nash ,
+.An Archie Cobbs ,
+.An Luigi Rizzo .
+.Pp
+.An -nosplit
+API based upon code written by
+.An Daniel Boulet
+for BSDI.
+.Pp
+Dummynet has been introduced by Luigi Rizzo in 1997-1998.
+.Pp
+Some early work (1999-2000) on the
+.Nm dummynet
+traffic shaper supported by Akamba Corp.
+.Pp
+The ipfw core (ipfw2) has been completely redesigned and
+reimplemented by Luigi Rizzo in summer 2002. Further
+actions and
+options have been added by various developer over the years.
+.Pp
+.An -nosplit
+In-kernel NAT support written by
+.An Paolo Pisati Aq piso@FreeBSD.org
+as part of a Summer of Code 2005 project.
+.Pp
+SCTP
+.Nm nat
+support has been developed by
+.An The Centre for Advanced Internet Architectures (CAIA) Aq http://www.caia.swin.edu.au .
+The primary developers and maintainers are David Hayes and Jason But.
+For further information visit:
+.Aq http://www.caia.swin.edu.au/urp/SONATA
+.Pp
+Delay profiles have been developed by Alessandro Cerri and
+Luigi Rizzo, supported by the
+European Commission within Projects Onelab and Onelab2.
+.Sh BUGS
+The syntax has grown over the years and sometimes it might be confusing.
+Unfortunately, backward compatibility prevents cleaning up mistakes
+made in the definition of the syntax.
+.Pp
+.Em !!! WARNING !!!
+.Pp
+Misconfiguring the firewall can put your computer in an unusable state,
+possibly shutting down network services and requiring console access to
+regain control of it.
+.Pp
+Incoming packet fragments diverted by
+.Cm divert
+are reassembled before delivery to the socket.
+The action used on those packet is the one from the
+rule which matches the first fragment of the packet.
+.Pp
+Packets diverted to userland, and then reinserted by a userland process
+may lose various packet attributes.
+The packet source interface name
+will be preserved if it is shorter than 8 bytes and the userland process
+saves and reuses the sockaddr_in
+(as does
+.Xr natd 8 ) ;
+otherwise, it may be lost.
+If a packet is reinserted in this manner, later rules may be incorrectly
+applied, making the order of
+.Cm divert
+rules in the rule sequence very important.
+.Pp
+Dummynet drops all packets with IPv6 link-local addresses.
+.Pp
+Rules using
+.Cm uid
+or
+.Cm gid
+may not behave as expected.
+In particular, incoming SYN packets may
+have no uid or gid associated with them since they do not yet belong
+to a TCP connection, and the uid/gid associated with a packet may not
+be as expected if the associated process calls
+.Xr setuid 2
+or similar system calls.
+.Pp
+Rule syntax is subject to the command line environment and some patterns
+may need to be escaped with the backslash character
+or quoted appropriately.
+.Pp
+Due to the architecture of
+.Xr libalias 3 ,
+ipfw nat is not compatible with the TCP segmentation offloading (TSO).
+Thus, to reliably nat your network traffic, please disable TSO
+on your NICs using
+.Xr ifconfig 8 .
+.Pp
+ICMP error messages are not implicitly matched by dynamic rules
+for the respective conversations.
+To avoid failures of network error detection and path MTU discovery,
+ICMP error messages may need to be allowed explicitly through static
+rules.
*
* NEW command line interface for IP firewall facility
*
- * $FreeBSD: head/sbin/ipfw/ipfw2.c 187983 2009-02-01 16:00:49Z luigi $
+ * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipfw2.c 203369 2010-02-02 07:39:56Z luigi $
*/
#include <sys/types.h>
int resvd_set_number = RESVD_SET;
#define GET_UINT_ARG(arg, min, max, tok, s_x) do { \
- if (!ac) \
+ if (!av[0]) \
errx(EX_USAGE, "%s: missing argument", match_value(s_x, tok)); \
if (_substrcmp(*av, "tablearg") == 0) { \
arg = IP_FW_TABLEARG; \
} \
\
{ \
- long val; \
+ long _xval; \
char *end; \
\
- val = strtol(*av, &end, 10); \
+ _xval = strtol(*av, &end, 10); \
\
- if (!isdigit(**av) || *end != '\0' || (val == 0 && errno == EINVAL)) \
+ if (!isdigit(**av) || *end != '\0' || (_xval == 0 && errno == EINVAL)) \
errx(EX_DATAERR, "%s: invalid argument: %s", \
match_value(s_x, tok), *av); \
\
- if (errno == ERANGE || val < min || val > max) \
+ if (errno == ERANGE || _xval < min || _xval > max) \
errx(EX_DATAERR, "%s: argument is out of range (%u..%u): %s", \
match_value(s_x, tok), min, max, *av); \
\
- if (val == IP_FW_TABLEARG) \
+ if (_xval == IP_FW_TABLEARG) \
errx(EX_DATAERR, "%s: illegal argument value: %s", \
match_value(s_x, tok), *av); \
- arg = val; \
+ arg = _xval; \
} \
} while (0)
{ NULL, 0 } /* terminator */
};
-/*
+/*
* The 'lookup' instruction accepts one of the following arguments.
* -1 is a terminator for the list.
* Arguments are passed as v[1] in O_DST_LOOKUP options.
*/
static int lookup_key[] = {
TOK_DSTIP, TOK_SRCIP, TOK_DSTPORT, TOK_SRCPORT,
- TOK_UID, TOK_JAIL, -1 };
+ TOK_UID, TOK_JAIL, TOK_DSCP, -1 };
static struct _s_x rule_options[] = {
{ "tagged", TOK_TAGGED },
{ "iplen", TOK_IPLEN },
{ "ipid", TOK_IPID },
{ "ipprecedence", TOK_IPPRECEDENCE },
+ { "dscp", TOK_DSCP },
{ "iptos", TOK_IPTOS },
{ "ipttl", TOK_IPTTL },
{ "ipversion", TOK_IPVER },
/*
* conditionally runs the command.
+ * Selected options or negative -> getsockopt
*/
int
do_cmd(int optname, void *optval, uintptr_t optlen)
err(EX_UNAVAILABLE, "socket");
if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET ||
- optname == IP_FW_DYN_GET ||
optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST ||
optname == IP_FW_TABLE_GETSIZE ||
optname == IP_FW_NAT_GET_CONFIG ||
- optname == IP_FW_NAT_GET_LOG)
+ optname < 0 ||
+ optname == IP_FW_NAT_GET_LOG) {
+ if (optname < 0)
+ optname = -optname;
i = getsockopt(s, IPPROTO_IP, optname, optval,
(socklen_t *)optlen);
- else
+ } else {
i = setsockopt(s, IPPROTO_IP, optname, optval, optlen);
+ }
return i;
}
print_ip(ipfw_insn_ip *cmd, char const *s)
{
struct hostent *he = NULL;
- int len = F_LEN((ipfw_insn *)cmd);
+ uint32_t len = F_LEN((ipfw_insn *)cmd);
uint32_t *a = ((ipfw_insn_u32 *)cmd)->d;
if (cmd->o.opcode == O_IP_DST_LOOKUP && len > F_INSN_SIZE(ipfw_insn_u32)) {
#define HAVE_DSTIP 0x0004
#define HAVE_PROTO4 0x0008
#define HAVE_PROTO6 0x0010
+#define HAVE_IP 0x0100
#define HAVE_OPTIONS 0x8000
-#define HAVE_IP (HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP)
static void
show_prerequisites(int *flags, int want, int cmd __unused)
{
printf("%05u ", rule->rulenum);
if (pcwidth>0 || bcwidth>0)
+
+ /* Tcc relies on msvcrt.dll for printf, and
+ * it does not support ANSI %llu syntax
+ */
+#ifndef TCC
printf("%*llu %*llu ", pcwidth, align_uint64(&rule->pcnt),
bcwidth, align_uint64(&rule->bcnt));
-
+#else
+ printf("%*I64u %*I64u ", pcwidth, align_uint64(&rule->pcnt),
+ bcwidth, align_uint64(&rule->bcnt));
+#endif
if (co.do_time == 2)
printf("%10u ", rule->timestamp);
else if (co.do_time == 1) {
switch(cmd->opcode) {
case O_CHECK_STATE:
printf("check-state");
- flags = HAVE_IP; /* avoid printing anything else */
+ /* avoid printing anything else */
+ flags = HAVE_PROTO | HAVE_SRCIP |
+ HAVE_DSTIP | HAVE_IP;
break;
case O_ACCEPT:
case O_SETFIB:
PRINT_UINT_ARG("setfib ", cmd->arg1);
break;
-
+
case O_REASS:
printf("reass");
break;
else
printf(" log");
}
+#ifndef NO_ALTQ
if (altqptr) {
print_altq_cmd(altqptr);
}
+#endif
if (tagptr) {
if (tagptr->len & F_NOT)
PRINT_UINT_ARG(" untag ", tagptr->arg1);
show_prerequisites(&flags, HAVE_PROTO, 0);
printf(" from any to any");
}
- flags |= HAVE_IP | HAVE_OPTIONS;
+ flags |= HAVE_IP | HAVE_OPTIONS | HAVE_PROTO |
+ HAVE_SRCIP | HAVE_DSTIP;
}
if (co.comment_only)
break;
case O_IP_DSTPORT:
- show_prerequisites(&flags, HAVE_IP, 0);
+ show_prerequisites(&flags,
+ HAVE_PROTO | HAVE_SRCIP |
+ HAVE_DSTIP | HAVE_IP, 0);
case O_IP_SRCPORT:
- show_prerequisites(&flags, HAVE_PROTO|HAVE_SRCIP, 0);
+ show_prerequisites(&flags,
+ HAVE_PROTO | HAVE_SRCIP, 0);
if ((cmd->len & F_OR) && !or_block)
printf(" {");
if (cmd->len & F_NOT)
if ((flags & (HAVE_PROTO4 | HAVE_PROTO6)) &&
!(flags & HAVE_PROTO))
show_prerequisites(&flags,
- HAVE_IP | HAVE_OPTIONS, 0);
+ HAVE_PROTO | HAVE_IP | HAVE_SRCIP |
+ HAVE_DSTIP | HAVE_OPTIONS, 0);
if (flags & HAVE_OPTIONS)
printf(" proto");
if (pe)
((cmd->opcode == O_IP4) &&
(flags & HAVE_PROTO4)))
break;
- show_prerequisites(&flags, HAVE_IP | HAVE_OPTIONS, 0);
+ show_prerequisites(&flags, HAVE_PROTO | HAVE_SRCIP |
+ HAVE_DSTIP | HAVE_IP | HAVE_OPTIONS, 0);
if ((cmd->len & F_OR) && !or_block)
printf(" {");
if (cmd->len & F_NOT && cmd->opcode != O_IN)
or_block = 0;
}
}
- show_prerequisites(&flags, HAVE_IP, 0);
+ show_prerequisites(&flags, HAVE_PROTO | HAVE_SRCIP | HAVE_DSTIP
+ | HAVE_IP, 0);
if (comment)
printf(" // %s", comment);
printf("\n");
bcopy(&d->rule, &rulenum, sizeof(rulenum));
printf("%05d", rulenum);
if (pcwidth>0 || bcwidth>0)
+
+ /* Tcc relies on msvcrt.dll for printf, and
+ * it does not support ANSI %llu syntax
+ */
+#ifndef TCC
printf(" %*llu %*llu (%ds)", pcwidth,
align_uint64(&d->pcnt), bcwidth,
align_uint64(&d->bcnt), d->expire);
+#else
+ /*printf(" %*I64u %*I64u (%ds)", pcwidth,
+ align_uint64(&d->pcnt), bcwidth,
+ align_uint64(&d->bcnt), d->expire);*/
+
+ //XXX workaround here, for multiple I64 on the same printf
+ printf(" %*I64u",pcwidth,align_uint64(&d->pcnt));
+ printf(" %*I64u",bcwidth,align_uint64(&d->bcnt));
+ printf(" (%ds)",d->expire);
+#endif
switch (d->dyn_type) {
case O_LIMIT_PARENT:
printf(" PARENT %d", d->count);
* ipfw set move rule X to Y
*/
void
-ipfw_sets_handler(int ac, char *av[])
+ipfw_sets_handler(char *av[])
{
uint32_t set_disable, masks[2];
int i, nbytes;
uint16_t rulenum;
uint8_t cmd, new_set;
- ac--;
av++;
- if (!ac)
+ if (av[0] == NULL)
errx(EX_USAGE, "set needs command");
if (_substrcmp(*av, "show") == 0) {
- void *data;
+ void *data = NULL;
char const *msg;
+ int nalloc;
+
+ nalloc = nbytes = sizeof(struct ip_fw);
+ while (nbytes >= nalloc) {
+ if (data)
+ free(data);
+ nalloc = nalloc * 2 + 200;
+ nbytes = nalloc;
+ data = safe_calloc(1, nbytes);
+ if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0)
+ err(EX_OSERR, "getsockopt(IP_FW_GET)");
+ }
- nbytes = sizeof(struct ip_fw);
- data = safe_calloc(1, nbytes);
- if (do_cmd(IP_FW_GET, data, (uintptr_t)&nbytes) < 0)
- err(EX_OSERR, "getsockopt(IP_FW_GET)");
bcopy(&((struct ip_fw *)data)->next_rule,
&set_disable, sizeof(set_disable));
}
printf("\n");
} else if (_substrcmp(*av, "swap") == 0) {
- ac--; av++;
- if (ac != 2)
+ av++;
+ if ( av[0] == NULL || av[1] == NULL )
errx(EX_USAGE, "set swap needs 2 set numbers\n");
rulenum = atoi(av[0]);
new_set = atoi(av[1]);
masks[0] = (4 << 24) | (new_set << 16) | (rulenum);
i = do_cmd(IP_FW_DEL, masks, sizeof(uint32_t));
} else if (_substrcmp(*av, "move") == 0) {
- ac--; av++;
- if (ac && _substrcmp(*av, "rule") == 0) {
+ av++;
+ if (av[0] && _substrcmp(*av, "rule") == 0) {
cmd = 2;
- ac--; av++;
+ av++;
} else
cmd = 3;
- if (ac != 3 || _substrcmp(av[1], "to") != 0)
+ if (av[0] == NULL || av[1] == NULL || av[2] == NULL ||
+ av[3] != NULL || _substrcmp(av[1], "to") != 0)
errx(EX_USAGE, "syntax: set move [rule] X to Y\n");
rulenum = atoi(av[0]);
new_set = atoi(av[2]);
_substrcmp(*av, "enable") == 0 ) {
int which = _substrcmp(*av, "enable") == 0 ? 1 : 0;
- ac--; av++;
+ av++;
masks[0] = masks[1] = 0;
- while (ac) {
+ while (av[0]) {
if (isdigit(**av)) {
i = atoi(*av);
if (i < 0 || i > RESVD_SET)
else
errx(EX_DATAERR,
"invalid set command %s\n", *av);
- av++; ac--;
+ av++;
}
if ( (masks[0] & masks[1]) != 0 )
errx(EX_DATAERR,
}
void
-ipfw_sysctl_handler(int ac, char *av[], int which)
+ipfw_sysctl_handler(char *av[], int which)
{
- ac--;
av++;
- if (ac == 0) {
+ if (av[0] == NULL) {
warnx("missing keyword to enable/disable\n");
} else if (_substrcmp(*av, "firewall") == 0) {
sysctlbyname("net.inet.ip.fw.enable", NULL, 0,
} else if (_substrcmp(*av, "dyn_keepalive") == 0) {
sysctlbyname("net.inet.ip.fw.dyn_keepalive", NULL, 0,
&which, sizeof(which));
+#ifndef NO_ALTQ
} else if (_substrcmp(*av, "altq") == 0) {
altq_set_enabled(which);
+#endif
} else {
warnx("unrecognize enable/disable keyword: %s\n", *av);
}
ipfw_list(int ac, char *av[], int show_counters)
{
struct ip_fw *r;
- ipfw_dyn_rule *dynrules = NULL;
- ipfw_dyn_rule *d;
+ ipfw_dyn_rule *dynrules, *d;
#define NEXT(r) ((struct ip_fw *)((char *)r + RULESIZE(r)))
char *lim;
void *data = NULL;
- int bcwidth, n, nbytes, pcwidth, width, nstat;
- int ndyn = 0;
+ int bcwidth, n, nbytes, nstat, ndyn, pcwidth, width;
int exitval = EX_OK;
int lac;
char **lav;
char *endptr;
int seen = 0;
uint8_t set;
- int ocmd = IP_FW_GET;
-
- if (co.do_pipe)
- ocmd = IP_DUMMYNET_GET;
- else if (co.do_dynamic)
- ocmd = IP_FW_DYN_GET;
+ const int ocmd = co.do_pipe ? IP_DUMMYNET_GET : IP_FW_GET;
int nalloc = 1024; /* start somewhere... */
last = 0;
fprintf(stderr, "Testing only, list disabled\n");
return;
}
+ if (co.do_pipe) {
+ dummynet_list(ac, av, show_counters);
+ return;
+ }
ac--;
av++;
co.do_pipe ? "DUMMYNET" : "FW");
}
- if (co.do_pipe) {
- ipfw_list_pipes(data, nbytes, ac, av);
- goto done;
- }
-
/*
* Count static rules. They have variable size so we
* need to scan the list to count them.
*/
- nstat = 0;
- r = data;
-
- if (!co.do_dynamic) {
for (nstat = 1, r = data, lim = (char *)data + nbytes;
r->rulenum < IPFW_DEFAULT_RULE && (char *)r < lim;
++nstat, r = NEXT(r) )
; /* nothing */
- }
/*
* Count dynamic rules. This is easier as they have
* fixed size.
*/
- if (co.do_dynamic) {
- dynrules = (ipfw_dyn_rule *)r ;
- n = (char *)r - (char *)data;
- ndyn = (nbytes - n) / sizeof *dynrules;
- }
+ r = NEXT(r);
+ dynrules = (ipfw_dyn_rule *)r ;
+ n = (char *)r - (char *)data;
+ ndyn = (nbytes - n) / sizeof *dynrules;
/* if showing stats, figure out column widths ahead of time */
bcwidth = pcwidth = 0;
continue;
/* packet counter */
- width = snprintf(NULL, 0, "%llu",
- align_uint64(&r->pcnt));
+
+ /* Tcc relies on msvcrt.dll for printf, and
+ * it does not support ANSI %llu syntax
+ */
+#ifndef TCC
+ width = snprintf(NULL, 0, "%llu", align_uint64(&r->pcnt));
+#else
+ width = snprintf(NULL, 0, "%I64u", align_uint64(&r->pcnt));
+#endif
if (width > pcwidth)
pcwidth = width;
/* byte counter */
- width = snprintf(NULL, 0, "%llu",
- align_uint64(&r->bcnt));
+#ifndef TCC
+ width = snprintf(NULL, 0, "%llu",align_uint64(&r->bcnt));
+#else
+ width = snprintf(NULL, 0, "%I64u",align_uint64(&r->bcnt));
+#endif
if (width > bcwidth)
bcwidth = width;
}
if (set != co.use_set - 1)
continue;
}
- width = snprintf(NULL, 0, "%llu",
- align_uint64(&d->pcnt));
+
+ /* Tcc relies on msvcrt.dll for printf, and
+ * it does not support ANSI %llu syntax
+ */
+#ifndef TCC
+ width = snprintf(NULL, 0, "%llu",align_uint64(&d->pcnt));
+#else
+ width = snprintf(NULL, 0, "%I64u",align_uint64(&d->pcnt));
+#endif
if (width > pcwidth)
pcwidth = width;
- width = snprintf(NULL, 0, "%llu",
- align_uint64(&d->bcnt));
+#ifndef TCC
+ width = snprintf(NULL, 0, "%llu",align_uint64(&d->bcnt));
+#else
+ width = snprintf(NULL, 0, "%I64u",align_uint64(&d->bcnt));
+#endif
if (width > bcwidth)
bcwidth = width;
}
/* display specific rules requested on command line */
- if (!co.do_dynamic) {
for (lac = ac, lav = av; lac != 0; lac--) {
/* convert command line rule # */
last = rnum = strtoul(*lav++, &endptr, 10);
warnx("rule %lu does not exist", rnum);
}
}
- }
if (co.do_dynamic && ndyn) {
printf("## Dynamic rules:\n");
return;
}
/* A single IP can be stored in an optimized format */
- if (d[1] == ~0 && av == NULL && len == 0) {
+ if (d[1] == (uint32_t)~0 && av == NULL && len == 0) {
cmd->o.len |= F_INSN_SIZE(ipfw_insn_u32);
return;
}
void
-ipfw_delete(int ac, char *av[])
+ipfw_delete(char *av[])
{
uint32_t rulenum;
int i;
int exitval = EX_OK;
int do_set = 0;
-
- av++; ac--;
+ av++;
NEED1("missing rule specification");
- if (ac > 0 && _substrcmp(*av, "set") == 0) {
+ if ( *av && _substrcmp(*av, "set") == 0) {
/* Do not allow using the following syntax:
* ipfw set N delete set M
*/
if (co.use_set)
errx(EX_DATAERR, "invalid syntax");
do_set = 1; /* delete set */
- ac--; av++;
+ av++;
}
/* Rule number */
- while (ac && isdigit(**av)) {
- i = atoi(*av); av++; ac--;
+ while (*av && isdigit(**av)) {
+ i = atoi(*av); av++;
if (co.do_nat) {
exitval = do_cmd(IP_FW_NAT_DEL, &i, sizeof i);
if (exitval) {
static void
get_mac_addr_mask(const char *p, uint8_t *addr, uint8_t *mask)
{
- int i, l;
+ int i;
+ size_t l;
char *ap, *ptr, *optr;
struct ether_addr *mac;
const char *macset = "0123456789abcdefABCDEF:";
if (ptr != NULL) { /* we have mask? */
if (p[ptr - optr - 1] == '/') { /* mask len */
- l = strtol(ptr, &ap, 10);
- if (*ap != 0 || l > ETHER_ADDR_LEN * 8 || l < 0)
+ long ml = strtol(ptr, &ap, 10);
+ if (*ap != 0 || ml > ETHER_ADDR_LEN * 8 || ml < 0)
errx(EX_DATAERR, "Incorrect mask length");
- for (i = 0; l > 0 && i < ETHER_ADDR_LEN; l -= 8, i++)
- mask[i] = (l >= 8) ? 0xff: (~0) << (8 - l);
+ for (i = 0; ml > 0 && i < ETHER_ADDR_LEN; ml -= 8, i++)
+ mask[i] = (ml >= 8) ? 0xff: (~0) << (8 - ml);
} else { /* mask */
l = strlen(ptr);
if (strspn(ptr, macset) != l ||
* Takes arguments and copies them into a comment
*/
static void
-fill_comment(ipfw_insn *cmd, int ac, char **av)
+fill_comment(ipfw_insn *cmd, char **av)
{
int i, l;
char *p = (char *)(cmd + 1);
cmd->len = (cmd->len & (F_NOT | F_OR));
/* Compute length of comment string. */
- for (i = 0, l = 0; i < ac; i++)
+ for (i = 0, l = 0; av[i] != NULL; i++)
l += strlen(av[i]) + 1;
if (l == 0)
return;
"comment too long (max 80 chars)");
l = 1 + (l+3)/4;
cmd->len = (cmd->len & (F_NOT | F_OR)) | l;
- for (i = 0; i < ac; i++) {
+ for (i = 0; av[i] != NULL; i++) {
strcpy(p, av[i]);
p += strlen(av[i]);
*p++ = ' ';
* two microinstructions, and returns the pointer to the last one.
*/
static ipfw_insn *
-add_mac(ipfw_insn *cmd, int ac, char *av[])
+add_mac(ipfw_insn *cmd, char *av[])
{
ipfw_insn_mac *mac;
- if (ac < 2)
+ if ( ( av[0] == NULL ) || ( av[1] == NULL ) )
errx(EX_DATAERR, "MAC dst src");
cmd->opcode = O_MACADDR2;
}
static ipfw_insn *
-add_mactype(ipfw_insn *cmd, int ac, char *av)
+add_mactype(ipfw_insn *cmd, char *av)
{
- if (ac < 1)
+ if (!av)
errx(EX_DATAERR, "missing MAC type");
if (strcmp(av, "any") != 0) { /* we have a non-null type */
fill_newports((ipfw_insn_u16 *)cmd, av, IPPROTO_ETHERTYPE);
static ipfw_insn *
add_ports(ipfw_insn *cmd, char *av, u_char proto, int opcode)
{
+ /* XXX "any" is trapped before. Perhaps "to" */
if (_substrcmp(av, "any") == 0) {
return NULL;
} else if (fill_newports((ipfw_insn_u16 *)cmd, av, proto)) {
*ch = '\0';
if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 ||
- inet_pton(AF_INET6, host, &a))
+ inet_pton(AF_INET6, host, &a) == 1)
ret = add_srcip6(cmd, av);
/* XXX: should check for IPv4, not !IPv6 */
if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 ||
- !inet_pton(AF_INET6, host, &a)))
+ inet_pton(AF_INET6, host, &a) != 1))
ret = add_srcip(cmd, av);
if (ret == NULL && strcmp(av, "any") != 0)
ret = cmd;
*ch = '\0';
if (proto == IPPROTO_IPV6 || strcmp(av, "me6") == 0 ||
- inet_pton(AF_INET6, host, &a))
+ inet_pton(AF_INET6, host, &a) == 1)
ret = add_dstip6(cmd, av);
/* XXX: should check for IPv4, not !IPv6 */
if (ret == NULL && (proto == IPPROTO_IP || strcmp(av, "me") == 0 ||
- !inet_pton(AF_INET6, host, &a)))
+ inet_pton(AF_INET6, host, &a) != 1))
ret = add_dstip(cmd, av);
if (ret == NULL && strcmp(av, "any") != 0)
ret = cmd;
*
*/
void
-ipfw_add(int ac, char *av[])
+ipfw_add(char *av[])
{
/*
* rules are added into the 'rulebuf' and then copied in
cmd = (ipfw_insn *)cmdbuf;
action = (ipfw_insn *)actbuf;
- av++; ac--;
+ av++;
/* [rule N] -- Rule number optional */
- if (ac && isdigit(**av)) {
+ if (av[0] && isdigit(**av)) {
rule->rulenum = atoi(*av);
av++;
- ac--;
}
/* [set N] -- set number (0..RESVD_SET), optional */
- if (ac > 1 && _substrcmp(*av, "set") == 0) {
+ if (av[0] && av[1] && _substrcmp(*av, "set") == 0) {
int set = strtoul(av[1], NULL, 10);
if (set < 0 || set > RESVD_SET)
errx(EX_DATAERR, "illegal set %s", av[1]);
rule->set = set;
- av += 2; ac -= 2;
+ av += 2;
}
/* [prob D] -- match probability, optional */
- if (ac > 1 && _substrcmp(*av, "prob") == 0) {
+ if (av[0] && av[1] && _substrcmp(*av, "prob") == 0) {
match_prob = strtod(av[1], NULL);
if (match_prob <= 0 || match_prob > 1)
errx(EX_DATAERR, "illegal match prob. %s", av[1]);
- av += 2; ac -= 2;
+ av += 2;
}
/* action -- mandatory */
NEED1("missing action");
i = match_token(rule_actions, *av);
- ac--; av++;
+ av++;
action->len = 1; /* default */
switch(i) {
case TOK_CHECKSTATE:
action->opcode = O_REJECT;
NEED1("missing reject code");
fill_reject_code(&action->arg1, *av);
- ac--; av++;
+ av++;
break;
case TOK_UNREACH6:
action->opcode = O_UNREACH6;
NEED1("missing unreach code");
fill_unreach6_code(&action->arg1, *av);
- ac--; av++;
+ av++;
break;
case TOK_COUNT:
case TOK_TEE:
action->opcode = O_TEE;
chkarg:
- if (!ac)
+ if (!av[0])
errx(EX_USAGE, "missing argument for %s", *(av - 1));
if (isdigit(**av)) {
action->arg1 = strtoul(*av, NULL, 10);
errx(EX_DATAERR, "illegal divert/tee port");
} else
errx(EX_DATAERR, "illegal argument for %s", *(av - 1));
- ac--; av++;
+ av++;
break;
case TOK_FORWARD: {
p->sa.sin_addr.s_addr = INADDR_ANY;
else
lookup_host(*av, &(p->sa.sin_addr));
- ac--; av++;
+ av++;
break;
}
case TOK_COMMENT:
/* pretend it is a 'count' rule followed by the comment */
action->opcode = O_COUNT;
- ac++; av--; /* go back... */
+ av--; /* go back... */
break;
case TOK_SETFIB:
errx(EX_DATAERR, "fibs not suported.\n");
if (action->arg1 >= numfibs) /* Temporary */
errx(EX_DATAERR, "fib too large.\n");
- ac--; av++;
+ av++;
break;
}
-
+
case TOK_REASS:
action->opcode = O_REASS;
break;
* If they exist, it go first in the cmdbuf, but then it is
* skipped in the copy section to the end of the buffer.
*/
- while (ac != 0 && (i = match_token(rule_action_params, *av)) != -1) {
- ac--; av++;
+ while (av[0] != NULL && (i = match_token(rule_action_params, *av)) != -1) {
+ av++;
switch (i) {
case TOK_LOG:
{
have_log = (ipfw_insn *)c;
cmd->len = F_INSN_SIZE(ipfw_insn_log);
cmd->opcode = O_LOG;
- if (ac && _substrcmp(*av, "logamount") == 0) {
- ac--; av++;
+ if (av[0] && _substrcmp(*av, "logamount") == 0) {
+ av++;
NEED1("logamount requires argument");
l = atoi(*av);
if (l < 0)
errx(EX_DATAERR,
"logamount must be positive");
c->max_log = l;
- ac--; av++;
+ av++;
} else {
len = sizeof(c->max_log);
if (sysctlbyname("net.inet.ip.fw.verbose_limit",
}
break;
+#ifndef NO_ALTQ
case TOK_ALTQ:
{
ipfw_insn_altq *a = (ipfw_insn_altq *)cmd;
cmd->len = F_INSN_SIZE(ipfw_insn_altq);
cmd->opcode = O_ALTQ;
a->qid = altq_name_to_qid(*av);
- ac--; av++;
+ av++;
}
break;
+#endif
case TOK_TAG:
case TOK_UNTAG: {
rule_action_params);
have_tag = cmd;
fill_cmd(cmd, O_TAG, (i == TOK_TAG) ? 0: F_NOT, tag);
- ac--; av++;
+ av++;
break;
}
goto done;
#define OR_START(target) \
- if (ac && (*av[0] == '(' || *av[0] == '{')) { \
+ if (av[0] && (*av[0] == '(' || *av[0] == '{')) { \
if (open_par) \
errx(EX_USAGE, "nested \"(\" not allowed\n"); \
prev = NULL; \
open_par = 1; \
if ( (av[0])[1] == '\0') { \
- ac--; av++; \
+ av++; \
} else \
(*av)++; \
} \
#define CLOSE_PAR \
if (open_par) { \
- if (ac && ( \
+ if (av[0] && ( \
strcmp(*av, ")") == 0 || \
strcmp(*av, "}") == 0)) { \
prev = NULL; \
open_par = 0; \
- ac--; av++; \
+ av++; \
} else \
errx(EX_USAGE, "missing \")\"\n"); \
}
#define NOT_BLOCK \
- if (ac && _substrcmp(*av, "not") == 0) { \
+ if (av[0] && _substrcmp(*av, "not") == 0) { \
if (cmd->len & F_NOT) \
errx(EX_USAGE, "double \"not\" not allowed\n"); \
cmd->len |= F_NOT; \
- ac--; av++; \
+ av++; \
}
#define OR_BLOCK(target) \
- if (ac && _substrcmp(*av, "or") == 0) { \
+ if (av[0] && _substrcmp(*av, "or") == 0) { \
if (prev == NULL || open_par == 0) \
errx(EX_DATAERR, "invalid OR block"); \
prev->len |= F_OR; \
- ac--; av++; \
+ av++; \
goto target; \
} \
CLOSE_PAR;
NEED1("missing protocol");
if (_substrcmp(*av, "MAC") == 0 ||
_substrcmp(*av, "mac") == 0) {
- ac--; av++; /* the "MAC" keyword */
- add_mac(cmd, ac, av); /* exits in case of errors */
+ av++; /* the "MAC" keyword */
+ add_mac(cmd, av); /* exits in case of errors */
cmd = next_cmd(cmd);
- ac -= 2; av += 2; /* dst-mac and src-mac */
+ av += 2; /* dst-mac and src-mac */
NOT_BLOCK;
NEED1("missing mac type");
- if (add_mactype(cmd, ac, av[0]))
+ if (add_mactype(cmd, av[0]))
cmd = next_cmd(cmd);
- ac--; av++; /* any or mac-type */
+ av++; /* any or mac-type */
goto read_options;
}
#endif
NOT_BLOCK;
NEED1("missing protocol");
if (add_proto_compat(cmd, *av, &proto)) {
- av++; ac--;
+ av++;
if (F_LEN(cmd) != 0) {
prev = cmd;
cmd = next_cmd(cmd);
/*
* "from", mandatory
*/
- if (!ac || _substrcmp(*av, "from") != 0)
+ if ((av[0] == NULL) || _substrcmp(*av, "from") != 0)
errx(EX_USAGE, "missing ``from''");
- ac--; av++;
+ av++;
/*
* source IP, mandatory
NOT_BLOCK; /* optional "not" */
NEED1("missing source address");
if (add_src(cmd, *av, proto)) {
- ac--; av++;
+ av++;
if (F_LEN(cmd) != 0) { /* ! any */
prev = cmd;
cmd = next_cmd(cmd);
* source ports, optional
*/
NOT_BLOCK; /* optional "not" */
- if (ac) {
+ if ( av[0] != NULL ) {
if (_substrcmp(*av, "any") == 0 ||
add_ports(cmd, *av, proto, O_IP_SRCPORT)) {
- ac--; av++;
+ av++;
if (F_LEN(cmd) != 0)
cmd = next_cmd(cmd);
}
/*
* "to", mandatory
*/
- if (!ac || _substrcmp(*av, "to") != 0)
+ if ( (av[0] == NULL) || _substrcmp(*av, "to") != 0 )
errx(EX_USAGE, "missing ``to''");
- av++; ac--;
+ av++;
/*
* destination, mandatory
NOT_BLOCK; /* optional "not" */
NEED1("missing dst address");
if (add_dst(cmd, *av, proto)) {
- ac--; av++;
+ av++;
if (F_LEN(cmd) != 0) { /* ! any */
prev = cmd;
cmd = next_cmd(cmd);
* dest. ports, optional
*/
NOT_BLOCK; /* optional "not" */
- if (ac) {
+ if (av[0]) {
if (_substrcmp(*av, "any") == 0 ||
add_ports(cmd, *av, proto, O_IP_DSTPORT)) {
- ac--; av++;
+ av++;
if (F_LEN(cmd) != 0)
cmd = next_cmd(cmd);
}
}
read_options:
- if (ac && first_cmd == cmd) {
+ if (av[0] && first_cmd == cmd) {
/*
* nothing specified so far, store in the rule to ease
* printout later.
rule->_pad = 1;
}
prev = NULL;
- while (ac) {
+ while ( av[0] != NULL ) {
char *s;
ipfw_insn_u32 *cmd32; /* alias for cmd */
s++;
}
i = match_token(rule_options, s);
- ac--; av++;
+ av++;
switch(i) {
case TOK_NOT:
if (cmd->len & F_NOT)
NEED1("recv, xmit, via require interface name"
" or address");
fill_iface((ipfw_insn_if *)cmd, av[0]);
- ac--; av++;
+ av++;
if (F_LEN(cmd) == 0) /* not a valid address */
break;
if (i == TOK_XMIT)
case TOK_ICMPTYPES:
NEED1("icmptypes requires list of types");
fill_icmptypes((ipfw_insn_u32 *)cmd, *av);
- av++; ac--;
+ av++;
break;
case TOK_ICMP6TYPES:
NEED1("icmptypes requires list of types");
fill_icmp6types((ipfw_insn_icmp6 *)cmd, *av);
- av++; ac--;
+ av++;
break;
case TOK_IPTTL:
errx(EX_DATAERR, "invalid ipttl %s", *av);
} else
fill_cmd(cmd, O_IPTTL, 0, strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_IPID:
errx(EX_DATAERR, "invalid ipid %s", *av);
} else
fill_cmd(cmd, O_IPID, 0, strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_IPLEN:
errx(EX_DATAERR, "invalid ip len %s", *av);
} else
fill_cmd(cmd, O_IPLEN, 0, strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_IPVER:
NEED1("ipver requires version");
fill_cmd(cmd, O_IPVER, 0, strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_IPPRECEDENCE:
NEED1("ipprecedence requires value");
fill_cmd(cmd, O_IPPRECEDENCE, 0,
(strtoul(*av, NULL, 0) & 7) << 5);
- ac--; av++;
+ av++;
break;
case TOK_IPOPTS:
NEED1("missing argument for ipoptions");
fill_flags(cmd, O_IPOPT, f_ipopts, *av);
- ac--; av++;
+ av++;
break;
case TOK_IPTOS:
NEED1("missing argument for iptos");
fill_flags(cmd, O_IPTOS, f_iptos, *av);
- ac--; av++;
+ av++;
break;
case TOK_UID:
errx(EX_DATAERR, "uid \"%s\" nonexistent", *av);
cmd32->d[0] = pwd->pw_uid;
cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
- ac--; av++;
+ av++;
}
break;
errx(EX_DATAERR, "gid \"%s\" nonexistent", *av);
cmd32->d[0] = grp->gr_gid;
cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
- ac--; av++;
+ av++;
}
break;
errx(EX_DATAERR, "jail requires prison ID");
cmd32->d[0] = (uint32_t)jid;
cmd->len |= F_INSN_SIZE(ipfw_insn_u32);
- ac--; av++;
+ av++;
}
break;
} else
fill_cmd(cmd, O_TCPDATALEN, 0,
strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_TCPOPTS:
NEED1("missing argument for tcpoptions");
fill_flags(cmd, O_TCPOPTS, f_tcpopts, *av);
- ac--; av++;
+ av++;
break;
case TOK_TCPSEQ:
cmd->len = F_INSN_SIZE(ipfw_insn_u32);
cmd->opcode = (i == TOK_TCPSEQ) ? O_TCPSEQ : O_TCPACK;
cmd32->d[0] = htonl(strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_TCPWIN:
NEED1("tcpwin requires length");
fill_cmd(cmd, O_TCPWIN, 0,
htons(strtoul(*av, NULL, 0)));
- ac--; av++;
+ av++;
break;
case TOK_TCPFLAGS:
NEED1("missing argument for tcpflags");
cmd->opcode = O_TCPFLAGS;
fill_flags(cmd, O_TCPFLAGS, f_tcpflags, *av);
- ac--; av++;
+ av++;
break;
case TOK_KEEPSTATE:
cmd->opcode = O_LIMIT;
c->limit_mask = c->conn_limit = 0;
- while (ac > 0) {
+ while ( av[0] != NULL ) {
if ((val = match_token(limit_masks, *av)) <= 0)
break;
c->limit_mask |= val;
- ac--; av++;
+ av++;
}
if (c->limit_mask == 0)
GET_UINT_ARG(c->conn_limit, IPFW_ARG_MIN, IPFW_ARG_MAX,
TOK_LIMIT, rule_options);
- ac--; av++;
+ av++;
break;
}
case TOK_PROTO:
NEED1("missing protocol");
if (add_proto(cmd, *av, &proto)) {
- ac--; av++;
+ av++;
} else
errx(EX_DATAERR, "invalid protocol ``%s''",
*av);
case TOK_SRCIP:
NEED1("missing source IP");
if (add_srcip(cmd, *av)) {
- ac--; av++;
+ av++;
}
break;
case TOK_DSTIP:
NEED1("missing destination IP");
if (add_dstip(cmd, *av)) {
- ac--; av++;
+ av++;
}
break;
case TOK_SRCIP6:
NEED1("missing source IP6");
if (add_srcip6(cmd, *av)) {
- ac--; av++;
+ av++;
}
break;
case TOK_DSTIP6:
NEED1("missing destination IP6");
if (add_dstip6(cmd, *av)) {
- ac--; av++;
+ av++;
}
break;
NEED1("missing source port");
if (_substrcmp(*av, "any") == 0 ||
add_ports(cmd, *av, proto, O_IP_SRCPORT)) {
- ac--; av++;
+ av++;
} else
errx(EX_DATAERR, "invalid source port %s", *av);
break;
NEED1("missing destination port");
if (_substrcmp(*av, "any") == 0 ||
add_ports(cmd, *av, proto, O_IP_DSTPORT)) {
- ac--; av++;
+ av++;
} else
errx(EX_DATAERR, "invalid destination port %s",
*av);
break;
case TOK_MAC:
- if (add_mac(cmd, ac, av)) {
- ac -= 2; av += 2;
- }
+ if (add_mac(cmd, av))
+ av += 2;
break;
case TOK_MACTYPE:
NEED1("missing mac type");
- if (!add_mactype(cmd, ac, *av))
+ if (!add_mactype(cmd, *av))
errx(EX_DATAERR, "invalid mac type %s", *av);
- ac--; av++;
+ av++;
break;
case TOK_VERREVPATH:
case TOK_EXT6HDR:
fill_ext6hdr( cmd, *av );
- ac--; av++;
+ av++;
break;
case TOK_FLOWID:
errx( EX_USAGE, "flow-id filter is active "
"only for ipv6 protocol\n");
fill_flow6( (ipfw_insn_u32 *) cmd, *av );
- ac--; av++;
+ av++;
break;
case TOK_COMMENT:
- fill_comment(cmd, ac, av);
- av += ac;
- ac = 0;
+ fill_comment(cmd, av);
+ av[0]=NULL;
break;
case TOK_TAGGED:
- if (ac > 0 && strpbrk(*av, "-,")) {
+ if (av[0] && strpbrk(*av, "-,")) {
if (!add_ports(cmd, *av, 0, O_TAGGED))
errx(EX_DATAERR, "tagged: invalid tag"
" list: %s", *av);
TOK_TAGGED, rule_options);
fill_cmd(cmd, O_TAGGED, 0, tag);
}
- ac--; av++;
+ av++;
break;
case TOK_FIB:
NEED1("fib requires fib number");
fill_cmd(cmd, O_FIB, 0, strtoul(*av, NULL, 0));
- ac--; av++;
+ av++;
break;
case TOK_LOOKUP: {
char *p;
int j;
- if (ac < 2)
+ if (!av[0] || !av[1])
errx(EX_USAGE, "format: lookup argument tablenum");
cmd->opcode = O_IP_DST_LOOKUP;
cmd->len |= F_INSN_SIZE(ipfw_insn) + 2;
if (lookup_key[j] <= 0)
errx(EX_USAGE, "format: cannot lookup on %s", *av);
c->d[1] = j; // i converted to option
- ac--; av++;
+ av++;
cmd->arg1 = strtoul(*av, &p, 0);
if (p && *p)
errx(EX_USAGE, "format: lookup argument tablenum");
- ac--; av++;
+ av++;
}
break;
if (c == 'N') /* user said no */
return;
}
+ if (co.do_pipe) {
+ dummynet_flush();
+ return;
+ }
/* `ipfw set N flush` - is the same that `ipfw delete set N` */
if (co.use_set) {
uint32_t arg = ((co.use_set - 1) & 0xffff) | (1 << 24);
}
}
} else if (_substrcmp(*av, "flush") == 0) {
- a = is_all ? tables_max : (ent.tbl + 1);
+ a = is_all ? tables_max : (uint32_t)(ent.tbl + 1);
do {
if (do_cmd(IP_FW_TABLE_FLUSH, &ent.tbl,
sizeof(ent.tbl)) < 0)
err(EX_OSERR, "setsockopt(IP_FW_TABLE_FLUSH)");
} while (++ent.tbl < a);
} else if (_substrcmp(*av, "list") == 0) {
- a = is_all ? tables_max : (ent.tbl + 1);
+ a = is_all ? tables_max : (uint32_t)(ent.tbl + 1);
do {
table_list(ent, is_all);
} while (++ent.tbl < a);
*
* NEW command line interface for IP firewall facility
*
- * $FreeBSD: head/sbin/ipfw/ipfw2.h 187983 2009-02-01 16:00:49Z luigi $
+ * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipfw2.h 203280 2010-01-31 12:21:20Z luigi $
*/
/*
int do_resolv; /* try to resolve all ip to names */
int do_time; /* Show time stamps */
int do_quiet; /* Be quiet in add and flush */
- int do_pipe; /* this cmd refers to a pipe */
+ int do_pipe; /* this cmd refers to a pipe/queue/sched */
int do_nat; /* this cmd refers to a nat config */
int do_dynamic; /* display dynamic rules */
int do_expired; /* display expired dynamic rules */
TOK_ACCEPT,
TOK_COUNT,
TOK_PIPE,
+ TOK_LINK,
TOK_QUEUE,
+ TOK_FLOWSET,
+ TOK_SCHED,
TOK_DIVERT,
TOK_TEE,
TOK_NETGRAPH,
TOK_IPLEN,
TOK_IPID,
TOK_IPPRECEDENCE,
+ TOK_DSCP,
TOK_IPTOS,
TOK_IPTTL,
TOK_IPVER,
TOK_SRCPORT,
TOK_ALL,
TOK_MASK,
+ TOK_FLOW_MASK,
+ TOK_SCHED_MASK,
TOK_BW,
TOK_DELAY,
- TOK_PIPE_PROFILE,
+ TOK_PROFILE,
TOK_BURST,
TOK_RED,
TOK_GRED,
TOK_DROPTAIL,
TOK_PROTO,
+ /* dummynet tokens */
TOK_WEIGHT,
+ TOK_LMAX,
+ TOK_PRI,
+ TOK_TYPE,
+ TOK_SLOTSIZE,
+
TOK_IP,
TOK_IF,
TOK_ALOG,
* the following macro returns an error message if we run out of
* arguments.
*/
-#define NEED1(msg) {if (!ac) errx(EX_USAGE, msg);}
+#define NEED(_p, msg) {if (!_p) errx(EX_USAGE, msg);}
+#define NEED1(msg) {if (!(*av)) errx(EX_USAGE, msg);}
unsigned long long align_uint64(const uint64_t *pll);
extern int resvd_set_number;
/* first-level command handlers */
-void ipfw_add(int ac, char *av[]);
+void ipfw_add(char *av[]);
void ipfw_show_nat(int ac, char **av);
void ipfw_config_pipe(int ac, char **av);
void ipfw_config_nat(int ac, char **av);
-void ipfw_sets_handler(int ac, char *av[]);
+void ipfw_sets_handler(char *av[]);
void ipfw_table_handler(int ac, char *av[]);
-void ipfw_sysctl_handler(int ac, char *av[], int which);
-void ipfw_delete(int ac, char *av[]);
+void ipfw_sysctl_handler(char *av[], int which);
+void ipfw_delete(char *av[]);
void ipfw_flush(int force);
void ipfw_zero(int ac, char *av[], int optname);
void ipfw_list(int ac, char *av[], int show_counters);
void print_altq_cmd(struct _ipfw_insn_altq *altqptr);
/* dummynet.c */
-void ipfw_list_pipes(void *data, uint nbytes, int ac, char *av[]);
+void dummynet_list(int ac, char *av[], int show_counters);
+void dummynet_flush(void);
int ipfw_delete_pipe(int pipe_or_queue, int n);
/* ipv6.c */
*
* NEW command line interface for IP firewall facility
*
- * $FreeBSD: head/sbin/ipfw/ipv6.c 187770 2009-01-27 12:01:30Z luigi $
+ * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipv6.c 187770 2009-01-27 12:01:30Z luigi $
*
* ipv6 support
*/
/*
- * Copyright (c) 2002-2003 Luigi Rizzo
+ * Copyright (c) 2002-2003,2010 Luigi Rizzo
* Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
* Copyright (c) 1994 Ugen J.S.Antsilevich
*
*
* Command line interface for IP firewall facility
*
- * $FreeBSD: head/sbin/ipfw/main.c 187767 2009-01-27 10:18:55Z luigi $
+ * $FreeBSD: head/sbin/ipfw/main.c 206494 2010-04-12 08:27:53Z luigi $
*/
#include <sys/wait.h>
exit(0);
}
-/*
- * Free a the (locally allocated) copy of command line arguments.
- */
-static void
-free_args(int ac, char **av)
-{
- int i;
-
- for (i=0; i < ac; i++)
- free(av[i]);
- free(av);
-}
-
/*
* Called with the arguments, including program name because getopt
* wants it to be present.
* Returns 0 if successful, 1 if empty command, errx() in case of errors.
+ * First thing we do is process parameters creating an argv[] array
+ * which includes the program name and a NULL entry at the end.
+ * If we are called with a single string, we split it on whitespace.
+ * Also, arguments with a trailing ',' are joined to the next one.
+ * The pointers (av[]) and data are in a a single chunk of memory.
+ * av[0] points to the original program name, all other entries
+ * point into the allocated chunk.
*/
static int
ipfw_main(int oldac, char **oldav)
{
- int ch, ac, save_ac;
+ int ch, ac;
const char *errstr;
char **av, **save_av;
int do_acct = 0; /* Show packet/byte count */
int try_next = 0; /* set if pipe cmd not found */
+ int av_size; /* compute the av size */
+ char *av_p; /* used to build the av list */
#define WHITESP " \t\f\v\n\r"
if (oldac < 2)
if (oldac == 2) {
/*
- * If we are called with a single string, try to split it into
- * arguments for subsequent parsing.
- * But first, remove spaces after a ',', by copying the string
- * in-place.
+ * If we are called with one argument, try to split it into
+ * words for subsequent parsing. Spaces after a ',' are
+ * removed by copying the string in-place.
*/
char *arg = oldav[1]; /* The string is the first arg. */
int l = strlen(arg);
ac++;
/*
- * Allocate the argument list, including one entry for
- * the program name because getopt expects it.
+ * Allocate the argument list structure as a single block
+ * of memory, containing pointers and the argument
+ * strings. We include one entry for the program name
+ * because getopt expects it, and a NULL at the end
+ * to simplify further parsing.
*/
- av = safe_calloc(ac + 1, sizeof(char *));
+ ac++; /* add 1 for the program name */
+ av_size = (ac+1) * sizeof(char *) + l + 1;
+ av = safe_calloc(av_size, 1);
/*
- * Second, copy arguments from arg[] to av[]. For each one,
+ * Init the argument pointer to the end of the array
+ * and copy arguments from arg[] to av[]. For each one,
* j is the initial character, i is the one past the end.
*/
- for (ac = 1, i = j = 0; i < l; i++)
+ av_p = (char *)&av[ac+1];
+ for (ac = 1, i = j = 0; i < l; i++) {
if (index(WHITESP, arg[i]) != NULL || i == l-1) {
if (i == l-1)
i++;
- av[ac] = safe_calloc(i-j+1, 1);
- bcopy(arg+j, av[ac], i-j);
+ bcopy(arg+j, av_p, i-j);
+ av[ac] = av_p;
+ av_p += i-j; /* the lenght of the string */
+ *av_p++ = '\0';
ac++;
j = i + 1;
}
+ }
} else {
/*
* If an argument ends with ',' join with the next one.
*/
- int first, i, l;
+ int first, i, l=0;
+
+ /*
+ * Allocate the argument list structure as a single block
+ * of memory, containing both pointers and the argument
+ * strings. We include some space for the program name
+ * because getopt expects it.
+ * We add an extra pointer to the end of the array,
+ * to make simpler further parsing.
+ */
+ for (i=0; i<oldac; i++)
+ l+=strlen(oldav[i]);
- av = safe_calloc(oldac, sizeof(char *));
+ av_size = (oldac+1) * sizeof(char *) + l + oldac;
+ av = safe_calloc(av_size, 1);
+
+ /*
+ * Init the argument pointer to the end of the array
+ * and copy arguments from arg[] to av[]
+ */
+ av_p = (char *)&av[oldac+1];
for (first = i = ac = 1, l = 0; i < oldac; i++) {
char *arg = oldav[i];
int k = strlen(arg);
l += k;
if (arg[k-1] != ',' || i == oldac-1) {
/* Time to copy. */
- av[ac] = safe_calloc(l+1, 1);
+ av[ac] = av_p;
for (l=0; first <= i; first++) {
- strcat(av[ac]+l, oldav[first]);
- l += strlen(oldav[first]);
+ strcat(av_p, oldav[first]);
+ av_p += strlen(oldav[first]);
}
+ *av_p++ = '\0';
ac++;
l = 0;
first = i+1;
}
}
- av[0] = strdup(oldav[0]); /* copy progname from the caller */
+ /*
+ * set the progname pointer to the original string
+ * and terminate the array with null
+ */
+ av[0] = oldav[0];
+ av[ac] = NULL;
+
/* Set the force flag for non-interactive processes */
if (!co.do_force)
co.do_force = !isatty(STDIN_FILENO);
+#ifdef EMULATE_SYSCTL /* sysctl emulation */
+ if ( ac >= 2 && !strcmp(av[1], "sysctl")) {
+ char *s;
+ int i;
+
+ if (ac != 3) {
+ printf( "sysctl emulation usage:\n"
+ " ipfw sysctl name[=value]\n"
+ " ipfw sysctl -a\n");
+ return 0;
+ }
+ s = index(av[2], '=');
+ if (s == NULL) {
+ s = !strcmp(av[2], "-a") ? NULL : av[2];
+ sysctlbyname(s, NULL, NULL, NULL, 0);
+ } else { /* ipfw sysctl x.y.z=value */
+ /* assume an INT value, will extend later */
+ if (s[1] == '\0') {
+ printf("ipfw sysctl: missing value\n\n");
+ return 0;
+ }
+ *s = '\0';
+ i = strtol(s+1, NULL, 0);
+ sysctlbyname(av[2], NULL, NULL, &i, sizeof(int));
+ }
+ return 0;
+ }
+#endif
+
/* Save arguments for final freeing of memory. */
- save_ac = ac;
save_av = av;
optind = optreset = 1; /* restart getopt() */
break;
case 'h': /* help */
- free_args(save_ac, save_av);
+ free(save_av);
help();
break; /* NOTREACHED */
break;
default:
- free_args(save_ac, save_av);
+ free(save_av);
return 1;
}
co.do_pipe = 1;
else if (_substrcmp(*av, "queue") == 0)
co.do_pipe = 2;
+ else if (_substrcmp(*av, "flowset") == 0)
+ co.do_pipe = 2;
+ else if (_substrcmp(*av, "sched") == 0)
+ co.do_pipe = 3;
else if (!strncmp(*av, "set", strlen(*av))) {
if (ac > 1 && isdigit(av[1][0])) {
co.use_set = strtonum(av[1], 0, resvd_set_number,
if (co.use_set == 0) {
if (_substrcmp(*av, "add") == 0)
- ipfw_add(ac, av);
+ ipfw_add(av);
else if (co.do_nat && _substrcmp(*av, "show") == 0)
ipfw_show_nat(ac, av);
else if (co.do_pipe && _substrcmp(*av, "config") == 0)
else if (co.do_nat && _substrcmp(*av, "config") == 0)
ipfw_config_nat(ac, av);
else if (_substrcmp(*av, "set") == 0)
- ipfw_sets_handler(ac, av);
+ ipfw_sets_handler(av);
else if (_substrcmp(*av, "table") == 0)
ipfw_table_handler(ac, av);
else if (_substrcmp(*av, "enable") == 0)
- ipfw_sysctl_handler(ac, av, 1);
+ ipfw_sysctl_handler(av, 1);
else if (_substrcmp(*av, "disable") == 0)
- ipfw_sysctl_handler(ac, av, 0);
+ ipfw_sysctl_handler(av, 0);
else
try_next = 1;
}
if (co.use_set || try_next) {
if (_substrcmp(*av, "delete") == 0)
- ipfw_delete(ac, av);
+ ipfw_delete(av);
else if (_substrcmp(*av, "flush") == 0)
ipfw_flush(co.do_force);
else if (_substrcmp(*av, "zero") == 0)
}
/* Free memory allocated in the argument parsing. */
- free_args(save_ac, save_av);
+ free(save_av);
return 0;
}
}
while (fgets(buf, BUFSIZ, f)) { /* read commands */
- char linename[10];
+ char linename[20];
char *args[2];
lineno++;
- sprintf(linename, "Line %d", lineno);
+ snprintf(linename, sizeof(linename), "Line %d", lineno);
setprogname(linename); /* XXX */
args[0] = progname;
args[1] = buf;
int
main(int ac, char *av[])
{
+#if defined(_WIN32) && defined(TCC)
+ {
+ WSADATA wsaData;
+ int ret=0;
+ unsigned short wVersionRequested = MAKEWORD(2, 2);
+ ret = WSAStartup(wVersionRequested, &wsaData);
+ if (ret != 0) {
+ /* Tell the user that we could not find a usable */
+ /* Winsock DLL. */
+ printf("WSAStartup failed with error: %d\n", ret);
+ return 1;
+ }
+ }
+#endif
/*
* If the last argument is an absolute pathname, interpret it
* as a file to be preprocessed.
+++ /dev/null
-/*
- * Copyright (c) 2002-2003 Luigi Rizzo
- * Copyright (c) 1996 Alex Nash, Paul Traina, Poul-Henning Kamp
- * Copyright (c) 1994 Ugen J.S.Antsilevich
- *
- * Idea and grammar partially left from:
- * Copyright (c) 1993 Daniel Boulet
- *
- * Redistribution and use in source forms, with and without modification,
- * are permitted provided that this entire comment appears intact.
- *
- * Redistribution in binary form may occur without any restrictions.
- * Obviously, it would be nice if you gave credit where credit is due
- * but requiring it would be too onerous.
- *
- * This software is provided ``AS IS'' without any warranties of any kind.
- *
- * NEW command line interface for IP firewall facility
- *
- * $FreeBSD: head/sbin/ipfw/nat.c 187770 2009-01-27 12:01:30Z luigi $
- *
- * In-kernel nat support
- */
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-
-#include "ipfw2.h"
-
-#include <ctype.h>
-#include <err.h>
-#include <netdb.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sysexits.h>
-
-#define IPFW_INTERNAL /* Access to protected structures in ip_fw.h. */
-
-#include <net/if.h>
-#include <net/if_dl.h>
-#include <net/route.h> /* def. of struct route */
-#include <netinet/in.h>
-#include <netinet/ip_fw.h>
-#include <arpa/inet.h>
-#include <alias.h>
-
-static struct _s_x nat_params[] = {
- { "ip", TOK_IP },
- { "if", TOK_IF },
- { "log", TOK_ALOG },
- { "deny_in", TOK_DENY_INC },
- { "same_ports", TOK_SAME_PORTS },
- { "unreg_only", TOK_UNREG_ONLY },
- { "reset", TOK_RESET_ADDR },
- { "reverse", TOK_ALIAS_REV },
- { "proxy_only", TOK_PROXY_ONLY },
- { "redirect_addr", TOK_REDIR_ADDR },
- { "redirect_port", TOK_REDIR_PORT },
- { "redirect_proto", TOK_REDIR_PROTO },
- { NULL, 0 } /* terminator */
-};
-
-
-/*
- * Search for interface with name "ifn", and fill n accordingly:
- *
- * n->ip ip address of interface "ifn"
- * n->if_name copy of interface name "ifn"
- */
-static void
-set_addr_dynamic(const char *ifn, struct cfg_nat *n)
-{
- size_t needed;
- int mib[6];
- char *buf, *lim, *next;
- struct if_msghdr *ifm;
- struct ifa_msghdr *ifam;
- struct sockaddr_dl *sdl;
- struct sockaddr_in *sin;
- int ifIndex, ifMTU;
-
- mib[0] = CTL_NET;
- mib[1] = PF_ROUTE;
- mib[2] = 0;
- mib[3] = AF_INET;
- mib[4] = NET_RT_IFLIST;
- mib[5] = 0;
-/*
- * Get interface data.
- */
- if (sysctl(mib, 6, NULL, &needed, NULL, 0) == -1)
- err(1, "iflist-sysctl-estimate");
- buf = safe_calloc(1, needed);
- if (sysctl(mib, 6, buf, &needed, NULL, 0) == -1)
- err(1, "iflist-sysctl-get");
- lim = buf + needed;
-/*
- * Loop through interfaces until one with
- * given name is found. This is done to
- * find correct interface index for routing
- * message processing.
- */
- ifIndex = 0;
- next = buf;
- while (next < lim) {
- ifm = (struct if_msghdr *)next;
- next += ifm->ifm_msglen;
- if (ifm->ifm_version != RTM_VERSION) {
- if (co.verbose)
- warnx("routing message version %d "
- "not understood", ifm->ifm_version);
- continue;
- }
- if (ifm->ifm_type == RTM_IFINFO) {
- sdl = (struct sockaddr_dl *)(ifm + 1);
- if (strlen(ifn) == sdl->sdl_nlen &&
- strncmp(ifn, sdl->sdl_data, sdl->sdl_nlen) == 0) {
- ifIndex = ifm->ifm_index;
- ifMTU = ifm->ifm_data.ifi_mtu;
- break;
- }
- }
- }
- if (!ifIndex)
- errx(1, "unknown interface name %s", ifn);
-/*
- * Get interface address.
- */
- sin = NULL;
- while (next < lim) {
- ifam = (struct ifa_msghdr *)next;
- next += ifam->ifam_msglen;
- if (ifam->ifam_version != RTM_VERSION) {
- if (co.verbose)
- warnx("routing message version %d "
- "not understood", ifam->ifam_version);
- continue;
- }
- if (ifam->ifam_type != RTM_NEWADDR)
- break;
- if (ifam->ifam_addrs & RTA_IFA) {
- int i;
- char *cp = (char *)(ifam + 1);
-
- for (i = 1; i < RTA_IFA; i <<= 1) {
- if (ifam->ifam_addrs & i)
- cp += SA_SIZE((struct sockaddr *)cp);
- }
- if (((struct sockaddr *)cp)->sa_family == AF_INET) {
- sin = (struct sockaddr_in *)cp;
- break;
- }
- }
- }
- if (sin == NULL)
- errx(1, "%s: cannot get interface address", ifn);
-
- n->ip = sin->sin_addr;
- strncpy(n->if_name, ifn, IF_NAMESIZE);
-
- free(buf);
-}
-
-/*
- * XXX - The following functions, macros and definitions come from natd.c:
- * it would be better to move them outside natd.c, in a file
- * (redirect_support.[ch]?) shared by ipfw and natd, but for now i can live
- * with it.
- */
-
-/*
- * Definition of a port range, and macros to deal with values.
- * FORMAT: HI 16-bits == first port in range, 0 == all ports.
- * LO 16-bits == number of ports in range
- * NOTES: - Port values are not stored in network byte order.
- */
-
-#define port_range u_long
-
-#define GETLOPORT(x) ((x) >> 0x10)
-#define GETNUMPORTS(x) ((x) & 0x0000ffff)
-#define GETHIPORT(x) (GETLOPORT((x)) + GETNUMPORTS((x)))
-
-/* Set y to be the low-port value in port_range variable x. */
-#define SETLOPORT(x,y) ((x) = ((x) & 0x0000ffff) | ((y) << 0x10))
-
-/* Set y to be the number of ports in port_range variable x. */
-#define SETNUMPORTS(x,y) ((x) = ((x) & 0xffff0000) | (y))
-
-static void
-StrToAddr (const char* str, struct in_addr* addr)
-{
- struct hostent* hp;
-
- if (inet_aton (str, addr))
- return;
-
- hp = gethostbyname (str);
- if (!hp)
- errx (1, "unknown host %s", str);
-
- memcpy (addr, hp->h_addr, sizeof (struct in_addr));
-}
-
-static int
-StrToPortRange (const char* str, const char* proto, port_range *portRange)
-{
- char* sep;
- struct servent* sp;
- char* end;
- u_short loPort;
- u_short hiPort;
-
- /* First see if this is a service, return corresponding port if so. */
- sp = getservbyname (str,proto);
- if (sp) {
- SETLOPORT(*portRange, ntohs(sp->s_port));
- SETNUMPORTS(*portRange, 1);
- return 0;
- }
-
- /* Not a service, see if it's a single port or port range. */
- sep = strchr (str, '-');
- if (sep == NULL) {
- SETLOPORT(*portRange, strtol(str, &end, 10));
- if (end != str) {
- /* Single port. */
- SETNUMPORTS(*portRange, 1);
- return 0;
- }
-
- /* Error in port range field. */
- errx (EX_DATAERR, "%s/%s: unknown service", str, proto);
- }
-
- /* Port range, get the values and sanity check. */
- sscanf (str, "%hu-%hu", &loPort, &hiPort);
- SETLOPORT(*portRange, loPort);
- SETNUMPORTS(*portRange, 0); /* Error by default */
- if (loPort <= hiPort)
- SETNUMPORTS(*portRange, hiPort - loPort + 1);
-
- if (GETNUMPORTS(*portRange) == 0)
- errx (EX_DATAERR, "invalid port range %s", str);
-
- return 0;
-}
-
-static int
-StrToProto (const char* str)
-{
- if (!strcmp (str, "tcp"))
- return IPPROTO_TCP;
-
- if (!strcmp (str, "udp"))
- return IPPROTO_UDP;
-
- if (!strcmp (str, "sctp"))
- return IPPROTO_SCTP;
- errx (EX_DATAERR, "unknown protocol %s. Expected sctp, tcp or udp", str);
-}
-
-static int
-StrToAddrAndPortRange (const char* str, struct in_addr* addr, char* proto,
- port_range *portRange)
-{
- char* ptr;
-
- ptr = strchr (str, ':');
- if (!ptr)
- errx (EX_DATAERR, "%s is missing port number", str);
-
- *ptr = '\0';
- ++ptr;
-
- StrToAddr (str, addr);
- return StrToPortRange (ptr, proto, portRange);
-}
-
-/* End of stuff taken from natd.c. */
-
-#define INC_ARGCV() do { \
- (*_av)++; \
- (*_ac)--; \
- av = *_av; \
- ac = *_ac; \
-} while(0)
-
-/*
- * The next 3 functions add support for the addr, port and proto redirect and
- * their logic is loosely based on SetupAddressRedirect(), SetupPortRedirect()
- * and SetupProtoRedirect() from natd.c.
- *
- * Every setup_* function fills at least one redirect entry
- * (struct cfg_redir) and zero or more server pool entry (struct cfg_spool)
- * in buf.
- *
- * The format of data in buf is:
- *
- *
- * cfg_nat cfg_redir cfg_spool ...... cfg_spool
- *
- * ------------------------------------- ------------
- * | | .....X ... | | | | .....
- * ------------------------------------- ...... ------------
- * ^
- * spool_cnt n=0 ...... n=(X-1)
- *
- * len points to the amount of available space in buf
- * space counts the memory consumed by every function
- *
- * XXX - Every function get all the argv params so it
- * has to check, in optional parameters, that the next
- * args is a valid option for the redir entry and not
- * another token. Only redir_port and redir_proto are
- * affected by this.
- */
-
-static int
-setup_redir_addr(char *spool_buf, unsigned int len,
- int *_ac, char ***_av)
-{
- char **av, *sep; /* Token separator. */
- /* Temporary buffer used to hold server pool ip's. */
- char tmp_spool_buf[NAT_BUF_LEN];
- int ac, space, lsnat;
- struct cfg_redir *r;
- struct cfg_spool *tmp;
-
- av = *_av;
- ac = *_ac;
- space = 0;
- lsnat = 0;
- if (len >= SOF_REDIR) {
- r = (struct cfg_redir *)spool_buf;
- /* Skip cfg_redir at beginning of buf. */
- spool_buf = &spool_buf[SOF_REDIR];
- space = SOF_REDIR;
- len -= SOF_REDIR;
- } else
- goto nospace;
- r->mode = REDIR_ADDR;
- /* Extract local address. */
- if (ac == 0)
- errx(EX_DATAERR, "redirect_addr: missing local address");
- sep = strchr(*av, ',');
- if (sep) { /* LSNAT redirection syntax. */
- r->laddr.s_addr = INADDR_NONE;
- /* Preserve av, copy spool servers to tmp_spool_buf. */
- strncpy(tmp_spool_buf, *av, strlen(*av)+1);
- lsnat = 1;
- } else
- StrToAddr(*av, &r->laddr);
- INC_ARGCV();
-
- /* Extract public address. */
- if (ac == 0)
- errx(EX_DATAERR, "redirect_addr: missing public address");
- StrToAddr(*av, &r->paddr);
- INC_ARGCV();
-
- /* Setup LSNAT server pool. */
- if (sep) {
- sep = strtok(tmp_spool_buf, ",");
- while (sep != NULL) {
- tmp = (struct cfg_spool *)spool_buf;
- if (len < SOF_SPOOL)
- goto nospace;
- len -= SOF_SPOOL;
- space += SOF_SPOOL;
- StrToAddr(sep, &tmp->addr);
- tmp->port = ~0;
- r->spool_cnt++;
- /* Point to the next possible cfg_spool. */
- spool_buf = &spool_buf[SOF_SPOOL];
- sep = strtok(NULL, ",");
- }
- }
- return(space);
-nospace:
- errx(EX_DATAERR, "redirect_addr: buf is too small\n");
-}
-
-static int
-setup_redir_port(char *spool_buf, unsigned int len,
- int *_ac, char ***_av)
-{
- char **av, *sep, *protoName;
- char tmp_spool_buf[NAT_BUF_LEN];
- int ac, space, lsnat;
- struct cfg_redir *r;
- struct cfg_spool *tmp;
- u_short numLocalPorts;
- port_range portRange;
-
- av = *_av;
- ac = *_ac;
- space = 0;
- lsnat = 0;
- numLocalPorts = 0;
-
- if (len >= SOF_REDIR) {
- r = (struct cfg_redir *)spool_buf;
- /* Skip cfg_redir at beginning of buf. */
- spool_buf = &spool_buf[SOF_REDIR];
- space = SOF_REDIR;
- len -= SOF_REDIR;
- } else
- goto nospace;
- r->mode = REDIR_PORT;
- /*
- * Extract protocol.
- */
- if (ac == 0)
- errx (EX_DATAERR, "redirect_port: missing protocol");
- r->proto = StrToProto(*av);
- protoName = *av;
- INC_ARGCV();
-
- /*
- * Extract local address.
- */
- if (ac == 0)
- errx (EX_DATAERR, "redirect_port: missing local address");
-
- sep = strchr(*av, ',');
- /* LSNAT redirection syntax. */
- if (sep) {
- r->laddr.s_addr = INADDR_NONE;
- r->lport = ~0;
- numLocalPorts = 1;
- /* Preserve av, copy spool servers to tmp_spool_buf. */
- strncpy(tmp_spool_buf, *av, strlen(*av)+1);
- lsnat = 1;
- } else {
- /*
- * The sctp nat does not allow the port numbers to be mapped to
- * new port numbers. Therefore, no ports are to be specified
- * in the target port field.
- */
- if (r->proto == IPPROTO_SCTP) {
- if (strchr (*av, ':'))
- errx(EX_DATAERR, "redirect_port:"
- "port numbers do not change in sctp, so do not "
- "specify them as part of the target");
- else
- StrToAddr(*av, &r->laddr);
- } else {
- if (StrToAddrAndPortRange (*av, &r->laddr, protoName,
- &portRange) != 0)
- errx(EX_DATAERR, "redirect_port:"
- "invalid local port range");
-
- r->lport = GETLOPORT(portRange);
- numLocalPorts = GETNUMPORTS(portRange);
- }
- }
- INC_ARGCV();
-
- /*
- * Extract public port and optionally address.
- */
- if (ac == 0)
- errx (EX_DATAERR, "redirect_port: missing public port");
-
- sep = strchr (*av, ':');
- if (sep) {
- if (StrToAddrAndPortRange (*av, &r->paddr, protoName,
- &portRange) != 0)
- errx(EX_DATAERR, "redirect_port:"
- "invalid public port range");
- } else {
- r->paddr.s_addr = INADDR_ANY;
- if (StrToPortRange (*av, protoName, &portRange) != 0)
- errx(EX_DATAERR, "redirect_port:"
- "invalid public port range");
- }
-
- r->pport = GETLOPORT(portRange);
- if (r->proto == IPPROTO_SCTP) { /* so the logic below still works */
- numLocalPorts = GETNUMPORTS(portRange);
- r->lport = r->pport;
- }
- r->pport_cnt = GETNUMPORTS(portRange);
- INC_ARGCV();
-
- /*
- * Extract remote address and optionally port.
- */
- /*
- * NB: isalpha(**av) => we've to check that next parameter is really an
- * option for this redirect entry, else stop here processing arg[cv].
- */
- if (ac != 0 && !isalpha(**av)) {
- sep = strchr (*av, ':');
- if (sep) {
- if (StrToAddrAndPortRange (*av, &r->raddr, protoName,
- &portRange) != 0)
- errx(EX_DATAERR, "redirect_port:"
- "invalid remote port range");
- } else {
- SETLOPORT(portRange, 0);
- SETNUMPORTS(portRange, 1);
- StrToAddr (*av, &r->raddr);
- }
- INC_ARGCV();
- } else {
- SETLOPORT(portRange, 0);
- SETNUMPORTS(portRange, 1);
- r->raddr.s_addr = INADDR_ANY;
- }
- r->rport = GETLOPORT(portRange);
- r->rport_cnt = GETNUMPORTS(portRange);
-
- /*
- * Make sure port ranges match up, then add the redirect ports.
- */
- if (numLocalPorts != r->pport_cnt)
- errx(EX_DATAERR, "redirect_port:"
- "port ranges must be equal in size");
-
- /* Remote port range is allowed to be '0' which means all ports. */
- if (r->rport_cnt != numLocalPorts &&
- (r->rport_cnt != 1 || r->rport != 0))
- errx(EX_DATAERR, "redirect_port: remote port must"
- "be 0 or equal to local port range in size");
-
- /*
- * Setup LSNAT server pool.
- */
- if (lsnat) {
- sep = strtok(tmp_spool_buf, ",");
- while (sep != NULL) {
- tmp = (struct cfg_spool *)spool_buf;
- if (len < SOF_SPOOL)
- goto nospace;
- len -= SOF_SPOOL;
- space += SOF_SPOOL;
- /*
- * The sctp nat does not allow the port numbers to be mapped to new port numbers
- * Therefore, no ports are to be specified in the targetport field
- */
- if (r->proto == IPPROTO_SCTP) {
- if (strchr (sep, ':')) {
- errx(EX_DATAERR, "redirect_port:"
- "port numbers do not change in "
- "sctp, so do not specify them as "
- "part of the target");
- } else {
- StrToAddr(sep, &tmp->addr);
- tmp->port = r->pport;
- }
- } else {
- if (StrToAddrAndPortRange(sep, &tmp->addr,
- protoName, &portRange) != 0)
- errx(EX_DATAERR, "redirect_port:"
- "invalid local port range");
- if (GETNUMPORTS(portRange) != 1)
- errx(EX_DATAERR, "redirect_port:"
- " local port must be single in "
- "this context");
- tmp->port = GETLOPORT(portRange);
- }
- r->spool_cnt++;
- /* Point to the next possible cfg_spool. */
- spool_buf = &spool_buf[SOF_SPOOL];
- sep = strtok(NULL, ",");
- }
- }
- return (space);
-nospace:
- errx(EX_DATAERR, "redirect_port: buf is too small\n");
-}
-
-static int
-setup_redir_proto(char *spool_buf, unsigned int len,
- int *_ac, char ***_av)
-{
- char **av;
- int ac, space;
- struct protoent *protoent;
- struct cfg_redir *r;
-
- av = *_av;
- ac = *_ac;
- if (len >= SOF_REDIR) {
- r = (struct cfg_redir *)spool_buf;
- /* Skip cfg_redir at beginning of buf. */
- spool_buf = &spool_buf[SOF_REDIR];
- space = SOF_REDIR;
- len -= SOF_REDIR;
- } else
- goto nospace;
- r->mode = REDIR_PROTO;
- /*
- * Extract protocol.
- */
- if (ac == 0)
- errx(EX_DATAERR, "redirect_proto: missing protocol");
-
- protoent = getprotobyname(*av);
- if (protoent == NULL)
- errx(EX_DATAERR, "redirect_proto: unknown protocol %s", *av);
- else
- r->proto = protoent->p_proto;
-
- INC_ARGCV();
-
- /*
- * Extract local address.
- */
- if (ac == 0)
- errx(EX_DATAERR, "redirect_proto: missing local address");
- else
- StrToAddr(*av, &r->laddr);
-
- INC_ARGCV();
-
- /*
- * Extract optional public address.
- */
- if (ac == 0) {
- r->paddr.s_addr = INADDR_ANY;
- r->raddr.s_addr = INADDR_ANY;
- } else {
- /* see above in setup_redir_port() */
- if (!isalpha(**av)) {
- StrToAddr(*av, &r->paddr);
- INC_ARGCV();
-
- /*
- * Extract optional remote address.
- */
- /* see above in setup_redir_port() */
- if (ac!=0 && !isalpha(**av)) {
- StrToAddr(*av, &r->raddr);
- INC_ARGCV();
- }
- }
- }
- return (space);
-nospace:
- errx(EX_DATAERR, "redirect_proto: buf is too small\n");
-}
-
-static void
-print_nat_config(unsigned char *buf)
-{
- struct cfg_nat *n;
- int i, cnt, flag, off;
- struct cfg_redir *t;
- struct cfg_spool *s;
- struct protoent *p;
-
- n = (struct cfg_nat *)buf;
- flag = 1;
- off = sizeof(*n);
- printf("ipfw nat %u config", n->id);
- if (strlen(n->if_name) != 0)
- printf(" if %s", n->if_name);
- else if (n->ip.s_addr != 0)
- printf(" ip %s", inet_ntoa(n->ip));
- while (n->mode != 0) {
- if (n->mode & PKT_ALIAS_LOG) {
- printf(" log");
- n->mode &= ~PKT_ALIAS_LOG;
- } else if (n->mode & PKT_ALIAS_DENY_INCOMING) {
- printf(" deny_in");
- n->mode &= ~PKT_ALIAS_DENY_INCOMING;
- } else if (n->mode & PKT_ALIAS_SAME_PORTS) {
- printf(" same_ports");
- n->mode &= ~PKT_ALIAS_SAME_PORTS;
- } else if (n->mode & PKT_ALIAS_UNREGISTERED_ONLY) {
- printf(" unreg_only");
- n->mode &= ~PKT_ALIAS_UNREGISTERED_ONLY;
- } else if (n->mode & PKT_ALIAS_RESET_ON_ADDR_CHANGE) {
- printf(" reset");
- n->mode &= ~PKT_ALIAS_RESET_ON_ADDR_CHANGE;
- } else if (n->mode & PKT_ALIAS_REVERSE) {
- printf(" reverse");
- n->mode &= ~PKT_ALIAS_REVERSE;
- } else if (n->mode & PKT_ALIAS_PROXY_ONLY) {
- printf(" proxy_only");
- n->mode &= ~PKT_ALIAS_PROXY_ONLY;
- }
- }
- /* Print all the redirect's data configuration. */
- for (cnt = 0; cnt < n->redir_cnt; cnt++) {
- t = (struct cfg_redir *)&buf[off];
- off += SOF_REDIR;
- switch (t->mode) {
- case REDIR_ADDR:
- printf(" redirect_addr");
- if (t->spool_cnt == 0)
- printf(" %s", inet_ntoa(t->laddr));
- else
- for (i = 0; i < t->spool_cnt; i++) {
- s = (struct cfg_spool *)&buf[off];
- if (i)
- printf(",");
- else
- printf(" ");
- printf("%s", inet_ntoa(s->addr));
- off += SOF_SPOOL;
- }
- printf(" %s", inet_ntoa(t->paddr));
- break;
- case REDIR_PORT:
- p = getprotobynumber(t->proto);
- printf(" redirect_port %s ", p->p_name);
- if (!t->spool_cnt) {
- printf("%s:%u", inet_ntoa(t->laddr), t->lport);
- if (t->pport_cnt > 1)
- printf("-%u", t->lport +
- t->pport_cnt - 1);
- } else
- for (i=0; i < t->spool_cnt; i++) {
- s = (struct cfg_spool *)&buf[off];
- if (i)
- printf(",");
- printf("%s:%u", inet_ntoa(s->addr),
- s->port);
- off += SOF_SPOOL;
- }
-
- printf(" ");
- if (t->paddr.s_addr)
- printf("%s:", inet_ntoa(t->paddr));
- printf("%u", t->pport);
- if (!t->spool_cnt && t->pport_cnt > 1)
- printf("-%u", t->pport + t->pport_cnt - 1);
-
- if (t->raddr.s_addr) {
- printf(" %s", inet_ntoa(t->raddr));
- if (t->rport) {
- printf(":%u", t->rport);
- if (!t->spool_cnt && t->rport_cnt > 1)
- printf("-%u", t->rport +
- t->rport_cnt - 1);
- }
- }
- break;
- case REDIR_PROTO:
- p = getprotobynumber(t->proto);
- printf(" redirect_proto %s %s", p->p_name,
- inet_ntoa(t->laddr));
- if (t->paddr.s_addr != 0) {
- printf(" %s", inet_ntoa(t->paddr));
- if (t->raddr.s_addr)
- printf(" %s", inet_ntoa(t->raddr));
- }
- break;
- default:
- errx(EX_DATAERR, "unknown redir mode");
- break;
- }
- }
- printf("\n");
-}
-
-void
-ipfw_config_nat(int ac, char **av)
-{
- struct cfg_nat *n; /* Nat instance configuration. */
- int i, len, off, tok;
- char *id, buf[NAT_BUF_LEN]; /* Buffer for serialized data. */
-
- len = NAT_BUF_LEN;
- /* Offset in buf: save space for n at the beginning. */
- off = sizeof(*n);
- memset(buf, 0, sizeof(buf));
- n = (struct cfg_nat *)buf;
-
- av++; ac--;
- /* Nat id. */
- if (ac && isdigit(**av)) {
- id = *av;
- i = atoi(*av);
- ac--; av++;
- n->id = i;
- } else
- errx(EX_DATAERR, "missing nat id");
- if (ac == 0)
- errx(EX_DATAERR, "missing option");
-
- while (ac > 0) {
- tok = match_token(nat_params, *av);
- ac--; av++;
- switch (tok) {
- case TOK_IP:
- if (ac == 0)
- errx(EX_DATAERR, "missing option");
- if (!inet_aton(av[0], &(n->ip)))
- errx(EX_DATAERR, "bad ip address ``%s''",
- av[0]);
- ac--; av++;
- break;
- case TOK_IF:
- if (ac == 0)
- errx(EX_DATAERR, "missing option");
- set_addr_dynamic(av[0], n);
- ac--; av++;
- break;
- case TOK_ALOG:
- n->mode |= PKT_ALIAS_LOG;
- break;
- case TOK_DENY_INC:
- n->mode |= PKT_ALIAS_DENY_INCOMING;
- break;
- case TOK_SAME_PORTS:
- n->mode |= PKT_ALIAS_SAME_PORTS;
- break;
- case TOK_UNREG_ONLY:
- n->mode |= PKT_ALIAS_UNREGISTERED_ONLY;
- break;
- case TOK_RESET_ADDR:
- n->mode |= PKT_ALIAS_RESET_ON_ADDR_CHANGE;
- break;
- case TOK_ALIAS_REV:
- n->mode |= PKT_ALIAS_REVERSE;
- break;
- case TOK_PROXY_ONLY:
- n->mode |= PKT_ALIAS_PROXY_ONLY;
- break;
- /*
- * All the setup_redir_* functions work directly in the final
- * buffer, see above for details.
- */
- case TOK_REDIR_ADDR:
- case TOK_REDIR_PORT:
- case TOK_REDIR_PROTO:
- switch (tok) {
- case TOK_REDIR_ADDR:
- i = setup_redir_addr(&buf[off], len, &ac, &av);
- break;
- case TOK_REDIR_PORT:
- i = setup_redir_port(&buf[off], len, &ac, &av);
- break;
- case TOK_REDIR_PROTO:
- i = setup_redir_proto(&buf[off], len, &ac, &av);
- break;
- }
- n->redir_cnt++;
- off += i;
- len -= i;
- break;
- default:
- errx(EX_DATAERR, "unrecognised option ``%s''", av[-1]);
- }
- }
-
- i = do_cmd(IP_FW_NAT_CFG, buf, off);
- if (i)
- err(1, "setsockopt(%s)", "IP_FW_NAT_CFG");
-
- if (!co.do_quiet) {
- /* After every modification, we show the resultant rule. */
- int _ac = 3;
- const char *_av[] = {"show", "config", id};
- ipfw_show_nat(_ac, (char **)(void *)_av);
- }
-}
-
-
-void
-ipfw_show_nat(int ac, char **av)
-{
- struct cfg_nat *n;
- struct cfg_redir *e;
- int cmd, i, nbytes, do_cfg, do_rule, frule, lrule, nalloc, size;
- int nat_cnt, redir_cnt, r;
- uint8_t *data, *p;
- char *endptr;
-
- do_rule = 0;
- nalloc = 1024;
- size = 0;
- data = NULL;
- frule = 0;
- lrule = IPFW_DEFAULT_RULE; /* max ipfw rule number */
- ac--; av++;
-
- if (co.test_only)
- return;
-
- /* Parse parameters. */
- for (cmd = IP_FW_NAT_GET_LOG, do_cfg = 0; ac != 0; ac--, av++) {
- if (!strncmp(av[0], "config", strlen(av[0]))) {
- cmd = IP_FW_NAT_GET_CONFIG, do_cfg = 1;
- continue;
- }
- /* Convert command line rule #. */
- frule = lrule = strtoul(av[0], &endptr, 10);
- if (*endptr == '-')
- lrule = strtoul(endptr+1, &endptr, 10);
- if (lrule == 0)
- err(EX_USAGE, "invalid rule number: %s", av[0]);
- do_rule = 1;
- }
-
- nbytes = nalloc;
- while (nbytes >= nalloc) {
- nalloc = nalloc * 2;
- nbytes = nalloc;
- data = safe_realloc(data, nbytes);
- if (do_cmd(cmd, data, (uintptr_t)&nbytes) < 0)
- err(EX_OSERR, "getsockopt(IP_FW_GET_%s)",
- (cmd == IP_FW_NAT_GET_LOG) ? "LOG" : "CONFIG");
- }
- if (nbytes == 0)
- exit(0);
- if (do_cfg) {
- nat_cnt = *((int *)data);
- for (i = sizeof(nat_cnt); nat_cnt; nat_cnt--) {
- n = (struct cfg_nat *)&data[i];
- if (frule <= n->id && lrule >= n->id)
- print_nat_config(&data[i]);
- i += sizeof(struct cfg_nat);
- for (redir_cnt = 0; redir_cnt < n->redir_cnt; redir_cnt++) {
- e = (struct cfg_redir *)&data[i];
- i += sizeof(struct cfg_redir) + e->spool_cnt *
- sizeof(struct cfg_spool);
- }
- }
- } else {
- for (i = 0; 1; i += LIBALIAS_BUF_SIZE + sizeof(int)) {
- p = &data[i];
- if (p == data + nbytes)
- break;
- bcopy(p, &r, sizeof(int));
- if (do_rule) {
- if (!(frule <= r && lrule >= r))
- continue;
- }
- printf("nat %u: %s\n", r, p+sizeof(int));
- }
- }
-}
--- /dev/null
+#/bin/bash
+
+COMMAND=ipfw
+
+
+echo .########## Set $COMMAND mode .##########
+$COMMAND add allow ip from any to any
+$COMMAND -q flush
+
+echo .########## empty rules .##########
+$COMMAND list
+$COMMAND add allow ip from any to any
+$COMMAND add allow ip from any to { 1.2.3.4 or 2.3.4.5 }
+$COMMAND add allow { dst-ip 1.2.3.4 or dst-ip 2.3.4.5 }
+
+echo .########## listing 3 rules .##########
+$COMMAND list
+
+$COMMAND delete 200
+echo .########## listing 2 rules .##########
+$COMMAND list
+
+$COMMAND table 10 add 1.2.3.4
+$COMMAND table 10 add 1.2.3.5
+$COMMAND table 10 add 1.2.3.6
+$COMMAND table 10 add 1.2.3.7/13
+$COMMAND table 10 add 1.2.3.7/20
+$COMMAND table 10 add 1.2.3.7/28
+
+echo .########## listing table 10 with 6 elements .##########
+$COMMAND table 10 list
+$COMMAND table 10 delete 1.2.3.6
+
+echo .########## listing table 10 with 5 elements .##########
+$COMMAND table 10 list
+$COMMAND table 10 flush
+
+echo .########## table 10 empty .##########
+$COMMAND table 10 list
+
+echo .########## move rule 100 to set 1 300 to 3 .##########
+$COMMAND set move rule 100 to 1
+$COMMAND set move rule 300 to 3
+$COMMAND -S show
+
+echo .########## move rule 200 to 2 but 200 do not exist .######
+$COMMAND set move rule 200 to 2
+
+echo .########## add some rules .##########
+$COMMAND add 200 queue 2 proto ip
+$COMMAND add 300 queue 5 proto ip
+$COMMAND add 400 queue 40 proto ip
+$COMMAND add 400 queue 50 proto ip
+
+echo .########## move rule 200 to 2 .######
+$COMMAND set move rule 200 to 2
+
+echo .########## move rule 400 to 5 .######
+$COMMAND set move rule 400 to 5
+
+echo .########## set 5 show 2 rules .######
+$COMMAND set 5 show
+
+echo .########## flush set 5 .######
+$COMMAND -q set 5 flush
+
+echo .########## set 5 show 0 rule .######
+$COMMAND set 5 show
+
+echo .########## disable set 1 .######
+$COMMAND set disable 1
+
+echo .########## show all rules except set 1 .######
+$COMMAND -S show
+
+echo .########## enable set 1 .######
+$COMMAND set enable 1
+
+echo .########## show all rules .######
+$COMMAND -S show
+
+
+
--- /dev/null
+LIBRARY ws2_32.dll\r
+\r
+EXPORTS\r
+FreeAddrInfoW\r
+GetAddrInfoW\r
+GetNameInfoW\r
+WEP\r
+WPUCompleteOverlappedRequest\r
+WSAAccept\r
+WSAAddressToStringA\r
+WSAAddressToStringW\r
+WSAAsyncGetHostByAddr\r
+WSAAsyncGetHostByName\r
+WSAAsyncGetProtoByName\r
+WSAAsyncGetProtoByNumber\r
+WSAAsyncGetServByName\r
+WSAAsyncGetServByPort\r
+WSAAsyncSelect\r
+WSACancelAsyncRequest\r
+WSACancelBlockingCall\r
+WSACleanup\r
+WSACloseEvent\r
+WSAConnect\r
+WSACreateEvent\r
+WSADuplicateSocketA\r
+WSADuplicateSocketW\r
+WSAEnumNameSpaceProvidersA\r
+WSAEnumNameSpaceProvidersW\r
+WSAEnumNetworkEvents\r
+WSAEnumProtocolsA\r
+WSAEnumProtocolsW\r
+WSAEventSelect\r
+WSAGetLastError\r
+WSAGetOverlappedResult\r
+WSAGetQOSByName\r
+WSAGetServiceClassInfoA\r
+WSAGetServiceClassInfoW\r
+WSAGetServiceClassNameByClassIdA\r
+WSAGetServiceClassNameByClassIdW\r
+WSAHtonl\r
+WSAHtons\r
+WSAInstallServiceClassA\r
+WSAInstallServiceClassW\r
+WSAIoctl\r
+WSAIsBlocking\r
+WSAJoinLeaf\r
+WSALookupServiceBeginA\r
+WSALookupServiceBeginW\r
+WSALookupServiceEnd\r
+WSALookupServiceNextA\r
+WSALookupServiceNextW\r
+WSANSPIoctl\r
+WSANtohl\r
+WSANtohs\r
+WSAProviderConfigChange\r
+WSARecv\r
+WSARecvDisconnect\r
+WSARecvFrom\r
+WSARemoveServiceClass\r
+WSAResetEvent\r
+WSASend\r
+WSASendDisconnect\r
+WSASendTo\r
+WSASetBlockingHook\r
+WSASetEvent\r
+WSASetLastError\r
+WSASetServiceA\r
+WSASetServiceW\r
+WSASocketA\r
+WSASocketW\r
+WSAStartup\r
+WSAStringToAddressA\r
+WSAStringToAddressW\r
+WSAUnhookBlockingHook\r
+WSAWaitForMultipleEvents\r
+WSApSetPostRoutine\r
+WSCDeinstallProvider\r
+WSCEnableNSProvider\r
+WSCEnumProtocols\r
+WSCGetProviderPath\r
+WSCInstallNameSpace\r
+WSCInstallProvider\r
+WSCUnInstallNameSpace\r
+WSCUpdateProvider\r
+WSCWriteNameSpaceOrder\r
+WSCWriteProviderOrder\r
+__WSAFDIsSet\r
+accept\r
+bind\r
+closesocket\r
+connect\r
+freeaddrinfo\r
+getaddrinfo\r
+gethostbyaddr\r
+gethostbyname\r
+gethostname\r
+getnameinfo\r
+getpeername\r
+getprotobyname\r
+getprotobynumber\r
+getservbyname\r
+getservbyport\r
+getsockname\r
+getsockopt\r
+htonl\r
+htons\r
+inet_addr\r
+inet_ntoa\r
+ioctlsocket\r
+listen\r
+ntohl\r
+ntohs\r
+recv\r
+recvfrom\r
+select\r
+send\r
+sendto\r
+setsockopt\r
+shutdown\r
+socket\r
--- /dev/null
+#\r
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source\r
+# file to this component. This file merely indirects to the real make file\r
+# that is shared by all the components of NT\r
+#\r
+\r
+#!INCLUDE $(NTMAKEENV)\makefile.def\r
+\r
+\r
+!IF DEFINED(_NT_TARGET_VERSION)\r
+! IF $(_NT_TARGET_VERSION)>=0x501\r
+! INCLUDE $(NTMAKEENV)\makefile.def\r
+! ELSE\r
+# Only warn once per directory\r
+! INCLUDE $(NTMAKEENV)\makefile.plt\r
+! IF "$(BUILD_PASS)"=="PASS1"\r
+! message BUILDMSG: Warning : The sample "$(MAKEDIR)" is not valid for the current OS target.\r
+! ENDIF\r
+! ENDIF\r
+!ELSE\r
+! INCLUDE $(NTMAKEENV)\makefile.def\r
+!ENDIF\r
--- /dev/null
+/*++\r
+\r
+Copyright (c) 1992-2000 Microsoft Corporation\r
+\r
+Module Name:\r
+\r
+ miniport.c\r
+\r
+Abstract:\r
+\r
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.\r
+\r
+Author:\r
+\r
+Environment:\r
+\r
+\r
+Revision History:\r
+\r
+\r
+--*/\r
+\r
+#include "precomp.h"\r
+#pragma hdrstop\r
+\r
+\r
+\r
+NDIS_STATUS\r
+MPInitialize(\r
+ OUT PNDIS_STATUS OpenErrorStatus,\r
+ OUT PUINT SelectedMediumIndex,\r
+ IN PNDIS_MEDIUM MediumArray,\r
+ IN UINT MediumArraySize,\r
+ IN NDIS_HANDLE MiniportAdapterHandle,\r
+ IN NDIS_HANDLE WrapperConfigurationContext\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ This is the initialize handler which gets called as a result of\r
+ the BindAdapter handler calling NdisIMInitializeDeviceInstanceEx.\r
+ The context parameter which we pass there is the adapter structure\r
+ which we retrieve here.\r
+\r
+ Arguments:\r
+\r
+ OpenErrorStatus Not used by us.\r
+ SelectedMediumIndex Place-holder for what media we are using\r
+ MediumArray Array of ndis media passed down to us to pick from\r
+ MediumArraySize Size of the array\r
+ MiniportAdapterHandle The handle NDIS uses to refer to us\r
+ WrapperConfigurationContext For use by NdisOpenConfiguration\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_SUCCESS unless something goes wrong\r
+\r
+--*/\r
+{\r
+ UINT i;\r
+ PADAPT pAdapt;\r
+ NDIS_STATUS Status = NDIS_STATUS_FAILURE;\r
+ NDIS_MEDIUM Medium;\r
+\r
+ UNREFERENCED_PARAMETER(WrapperConfigurationContext);\r
+ \r
+ do\r
+ {\r
+ //\r
+ // Start off by retrieving our adapter context and storing\r
+ // the Miniport handle in it.\r
+ //\r
+ pAdapt = NdisIMGetDeviceContext(MiniportAdapterHandle);\r
+ pAdapt->MiniportIsHalted = FALSE;\r
+\r
+ DBGPRINT(("==> Miniport Initialize: Adapt %p\n", pAdapt));\r
+\r
+ //\r
+ // Usually we export the medium type of the adapter below as our\r
+ // virtual miniport's medium type. However if the adapter below us\r
+ // is a WAN device, then we claim to be of medium type 802.3.\r
+ //\r
+ Medium = pAdapt->Medium;\r
+\r
+ if (Medium == NdisMediumWan)\r
+ {\r
+ Medium = NdisMedium802_3;\r
+ }\r
+\r
+ for (i = 0; i < MediumArraySize; i++)\r
+ {\r
+ if (MediumArray[i] == Medium)\r
+ {\r
+ *SelectedMediumIndex = i;\r
+ break;\r
+ }\r
+ }\r
+\r
+ if (i == MediumArraySize)\r
+ {\r
+ Status = NDIS_STATUS_UNSUPPORTED_MEDIA;\r
+ break;\r
+ }\r
+\r
+\r
+ //\r
+ // Set the attributes now. NDIS_ATTRIBUTE_DESERIALIZE enables us\r
+ // to make up-calls to NDIS without having to call NdisIMSwitchToMiniport\r
+ // or NdisIMQueueCallBack. This also forces us to protect our data using\r
+ // spinlocks where appropriate. Also in this case NDIS does not queue\r
+ // packets on our behalf. Since this is a very simple pass-thru\r
+ // miniport, we do not have a need to protect anything. However in\r
+ // a general case there will be a need to use per-adapter spin-locks\r
+ // for the packet queues at the very least.\r
+ //\r
+ NdisMSetAttributesEx(MiniportAdapterHandle,\r
+ pAdapt,\r
+ 0, // CheckForHangTimeInSeconds\r
+ NDIS_ATTRIBUTE_IGNORE_PACKET_TIMEOUT |\r
+ NDIS_ATTRIBUTE_IGNORE_REQUEST_TIMEOUT|\r
+ NDIS_ATTRIBUTE_INTERMEDIATE_DRIVER |\r
+ NDIS_ATTRIBUTE_DESERIALIZE |\r
+ NDIS_ATTRIBUTE_NO_HALT_ON_SUSPEND,\r
+ 0);\r
+\r
+ pAdapt->MiniportHandle = MiniportAdapterHandle;\r
+ //\r
+ // Initialize LastIndicatedStatus to be NDIS_STATUS_MEDIA_CONNECT\r
+ //\r
+ pAdapt->LastIndicatedStatus = NDIS_STATUS_MEDIA_CONNECT;\r
+ \r
+ //\r
+ // Initialize the power states for both the lower binding (PTDeviceState)\r
+ // and our miniport edge to Powered On.\r
+ //\r
+ pAdapt->MPDeviceState = NdisDeviceStateD0;\r
+ pAdapt->PTDeviceState = NdisDeviceStateD0;\r
+\r
+ //\r
+ // Add this adapter to the global pAdapt List\r
+ //\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+\r
+ pAdapt->Next = pAdaptList;\r
+ pAdaptList = pAdapt;\r
+\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+ \r
+ //\r
+ // Create an ioctl interface\r
+ //\r
+ (VOID)PtRegisterDevice();\r
+\r
+ Status = NDIS_STATUS_SUCCESS;\r
+ }\r
+ while (FALSE);\r
+\r
+ //\r
+ // If we had received an UnbindAdapter notification on the underlying\r
+ // adapter, we would have blocked that thread waiting for the IM Init\r
+ // process to complete. Wake up any such thread.\r
+ //\r
+ ASSERT(pAdapt->MiniportInitPending == TRUE);\r
+ pAdapt->MiniportInitPending = FALSE;\r
+ NdisSetEvent(&pAdapt->MiniportInitEvent);\r
+\r
+ if (Status == NDIS_STATUS_SUCCESS)\r
+ {\r
+ PtReferenceAdapt(pAdapt);\r
+ }\r
+\r
+ DBGPRINT(("<== Miniport Initialize: Adapt %p, Status %x\n", pAdapt, Status));\r
+\r
+ *OpenErrorStatus = Status;\r
+\r
+ \r
+ return Status;\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+MPSend(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN UINT Flags\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Send Packet handler. Either this or our SendPackets (array) handler is called\r
+ based on which one is enabled in our Miniport Characteristics.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext Pointer to the adapter\r
+ Packet Packet to send\r
+ Flags Unused, passed down below\r
+\r
+Return Value:\r
+\r
+ Return code from NdisSend\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+ NDIS_STATUS Status;\r
+ PNDIS_PACKET MyPacket;\r
+ PVOID MediaSpecificInfo = NULL;\r
+ ULONG MediaSpecificInfoSize = 0;\r
+\r
+ //\r
+ // The driver should fail the send if the virtual miniport is in low \r
+ // power state\r
+ //\r
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)\r
+ {\r
+ return NDIS_STATUS_FAILURE;\r
+ }\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Use NDIS 5.1 packet stacking:\r
+ //\r
+ {\r
+ PNDIS_PACKET_STACK pStack;\r
+ BOOLEAN Remaining;\r
+\r
+ //\r
+ // Packet stacks: Check if we can use the same packet for sending down.\r
+ //\r
+\r
+ pStack = NdisIMGetCurrentPacketStack(Packet, &Remaining);\r
+ if (Remaining)\r
+ {\r
+ //\r
+ // We can reuse "Packet".\r
+ //\r
+ // NOTE: if we needed to keep per-packet information in packets\r
+ // sent down, we can use pStack->IMReserved[].\r
+ //\r
+ ASSERT(pStack);\r
+ //\r
+ // If the below miniport is going to low power state, stop sending down any packet.\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ return NDIS_STATUS_FAILURE;\r
+ }\r
+ pAdapt->OutstandingSends++;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ NdisSend(&Status,\r
+ pAdapt->BindingHandle,\r
+ Packet);\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+ }\r
+\r
+ return(Status);\r
+ }\r
+ }\r
+#endif // NDIS51\r
+\r
+ //\r
+ // We are either not using packet stacks, or there isn't stack space\r
+ // in the original packet passed down to us. Allocate a new packet\r
+ // to wrap the data with.\r
+ //\r
+ //\r
+ // If the below miniport is going to low power state, stop sending down any packet.\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ return NDIS_STATUS_FAILURE;\r
+ \r
+ }\r
+ pAdapt->OutstandingSends++;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ \r
+ NdisAllocatePacket(&Status,\r
+ &MyPacket,\r
+ pAdapt->SendPacketPoolHandle);\r
+\r
+ if (Status == NDIS_STATUS_SUCCESS)\r
+ {\r
+ PSEND_RSVD SendRsvd;\r
+\r
+ //\r
+ // Save a pointer to the original packet in our reserved\r
+ // area in the new packet. This is needed so that we can\r
+ // get back to the original packet when the new packet's send\r
+ // is completed.\r
+ //\r
+ SendRsvd = (PSEND_RSVD)(MyPacket->ProtocolReserved);\r
+ SendRsvd->OriginalPkt = Packet;\r
+\r
+ NdisGetPacketFlags(MyPacket) = Flags;\r
+\r
+ //\r
+ // Set up the new packet so that it describes the same\r
+ // data as the original packet.\r
+ //\r
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);\r
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);\r
+#ifdef WIN9X\r
+ //\r
+ // Work around the fact that NDIS does not initialize this\r
+ // to FALSE on Win9x.\r
+ //\r
+ NDIS_PACKET_VALID_COUNTS(MyPacket) = FALSE;\r
+#endif\r
+\r
+ //\r
+ // Copy the OOB Offset from the original packet to the new\r
+ // packet.\r
+ //\r
+ NdisMoveMemory(NDIS_OOB_DATA_FROM_PACKET(MyPacket),\r
+ NDIS_OOB_DATA_FROM_PACKET(Packet),\r
+ sizeof(NDIS_PACKET_OOB_DATA));\r
+\r
+#ifndef WIN9X\r
+ //\r
+ // Copy the right parts of per packet info into the new packet.\r
+ // This API is not available on Win9x since task offload is\r
+ // not supported on that platform.\r
+ //\r
+ NdisIMCopySendPerPacketInfo(MyPacket, Packet);\r
+#endif\r
+ \r
+ //\r
+ // Copy the Media specific information\r
+ //\r
+ NDIS_GET_PACKET_MEDIA_SPECIFIC_INFO(Packet,\r
+ &MediaSpecificInfo,\r
+ &MediaSpecificInfoSize);\r
+\r
+ if (MediaSpecificInfo || MediaSpecificInfoSize)\r
+ {\r
+ NDIS_SET_PACKET_MEDIA_SPECIFIC_INFO(MyPacket,\r
+ MediaSpecificInfo,\r
+ MediaSpecificInfoSize);\r
+ }\r
+\r
+ NdisSend(&Status,\r
+ pAdapt->BindingHandle,\r
+ MyPacket);\r
+\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+#ifndef WIN9X\r
+ NdisIMCopySendCompletePerPacketInfo (Packet, MyPacket);\r
+#endif\r
+ NdisFreePacket(MyPacket);\r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+ //\r
+ // We are out of packets. Silently drop it. Alternatively we can deal with it:\r
+ // - By keeping separate send and receive pools\r
+ // - Dynamically allocate more pools as needed and free them when not needed\r
+ //\r
+ }\r
+\r
+ return(Status);\r
+}\r
+\r
+\r
+VOID\r
+MPSendPackets(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PPNDIS_PACKET PacketArray,\r
+ IN UINT NumberOfPackets\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Send Packet Array handler. Either this or our SendPacket handler is called\r
+ based on which one is enabled in our Miniport Characteristics.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext Pointer to our adapter\r
+ PacketArray Set of packets to send\r
+ NumberOfPackets Self-explanatory\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+ NDIS_STATUS Status;\r
+ UINT i;\r
+ PVOID MediaSpecificInfo = NULL;\r
+ UINT MediaSpecificInfoSize = 0;\r
+ \r
+\r
+ for (i = 0; i < NumberOfPackets; i++)\r
+ {\r
+ PNDIS_PACKET Packet, MyPacket;\r
+\r
+ Packet = PacketArray[i];\r
+ //\r
+ // The driver should fail the send if the virtual miniport is in low \r
+ // power state\r
+ //\r
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)\r
+ {\r
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),\r
+ Packet,\r
+ NDIS_STATUS_FAILURE);\r
+ continue;\r
+ }\r
+\r
+#ifdef NDIS51\r
+\r
+ //\r
+ // Use NDIS 5.1 packet stacking:\r
+ //\r
+ {\r
+ PNDIS_PACKET_STACK pStack;\r
+ BOOLEAN Remaining;\r
+\r
+ //\r
+ // Packet stacks: Check if we can use the same packet for sending down.\r
+ //\r
+ pStack = NdisIMGetCurrentPacketStack(Packet, &Remaining);\r
+ if (Remaining)\r
+ {\r
+ //\r
+ // We can reuse "Packet".\r
+ //\r
+ // NOTE: if we needed to keep per-packet information in packets\r
+ // sent down, we can use pStack->IMReserved[].\r
+ //\r
+ ASSERT(pStack);\r
+ //\r
+ // If the below miniport is going to low power state, stop sending down any packet.\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),\r
+ Packet,\r
+ NDIS_STATUS_FAILURE);\r
+ }\r
+ else\r
+ {\r
+ pAdapt->OutstandingSends++;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ \r
+ NdisSend(&Status,\r
+ pAdapt->BindingHandle,\r
+ Packet);\r
+ \r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),\r
+ Packet,\r
+ Status);\r
+ \r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+ }\r
+ }\r
+ continue;\r
+ }\r
+ }\r
+#endif\r
+ do \r
+ {\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ //\r
+ // If the below miniport is going to low power state, stop sending down any packet.\r
+ //\r
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ pAdapt->OutstandingSends++;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ \r
+ NdisAllocatePacket(&Status,\r
+ &MyPacket,\r
+ pAdapt->SendPacketPoolHandle);\r
+\r
+ if (Status == NDIS_STATUS_SUCCESS)\r
+ {\r
+ PSEND_RSVD SendRsvd;\r
+\r
+ SendRsvd = (PSEND_RSVD)(MyPacket->ProtocolReserved);\r
+ SendRsvd->OriginalPkt = Packet;\r
+\r
+ NdisGetPacketFlags(MyPacket) = NdisGetPacketFlags(Packet);\r
+\r
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);\r
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);\r
+#ifdef WIN9X\r
+ //\r
+ // Work around the fact that NDIS does not initialize this\r
+ // to FALSE on Win9x.\r
+ //\r
+ NDIS_PACKET_VALID_COUNTS(MyPacket) = FALSE;\r
+#endif // WIN9X\r
+\r
+ //\r
+ // Copy the OOB data from the original packet to the new\r
+ // packet.\r
+ //\r
+ NdisMoveMemory(NDIS_OOB_DATA_FROM_PACKET(MyPacket),\r
+ NDIS_OOB_DATA_FROM_PACKET(Packet),\r
+ sizeof(NDIS_PACKET_OOB_DATA));\r
+ //\r
+ // Copy relevant parts of the per packet info into the new packet\r
+ //\r
+#ifndef WIN9X\r
+ NdisIMCopySendPerPacketInfo(MyPacket, Packet);\r
+#endif\r
+\r
+ //\r
+ // Copy the Media specific information\r
+ //\r
+ NDIS_GET_PACKET_MEDIA_SPECIFIC_INFO(Packet,\r
+ &MediaSpecificInfo,\r
+ &MediaSpecificInfoSize);\r
+\r
+ if (MediaSpecificInfo || MediaSpecificInfoSize)\r
+ {\r
+ NDIS_SET_PACKET_MEDIA_SPECIFIC_INFO(MyPacket,\r
+ MediaSpecificInfo,\r
+ MediaSpecificInfoSize);\r
+ }\r
+\r
+ NdisSend(&Status,\r
+ pAdapt->BindingHandle,\r
+ MyPacket);\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+#ifndef WIN9X\r
+ NdisIMCopySendCompletePerPacketInfo (Packet, MyPacket);\r
+#endif\r
+ NdisFreePacket(MyPacket);\r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // The driver cannot allocate a packet.\r
+ // \r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+ }\r
+ }\r
+ while (FALSE);\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+ NdisMSendComplete(ADAPT_MINIPORT_HANDLE(pAdapt),\r
+ Packet,\r
+ Status);\r
+ }\r
+ }\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+MPQueryInformation(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_OID Oid,\r
+ IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesWritten,\r
+ OUT PULONG BytesNeeded\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Entry point called by NDIS to query for the value of the specified OID.\r
+ Typical processing is to forward the query down to the underlying miniport.\r
+\r
+ The following OIDs are filtered here:\r
+\r
+ OID_PNP_QUERY_POWER - return success right here\r
+\r
+ OID_GEN_SUPPORTED_GUIDS - do not forward, otherwise we will show up\r
+ multiple instances of private GUIDs supported by the underlying miniport.\r
+\r
+ OID_PNP_CAPABILITIES - we do send this down to the lower miniport, but\r
+ the values returned are postprocessed before we complete this request;\r
+ see PtRequestComplete.\r
+\r
+ NOTE on OID_TCP_TASK_OFFLOAD - if this IM driver modifies the contents\r
+ of data it passes through such that a lower miniport may not be able\r
+ to perform TCP task offload, then it should not forward this OID down,\r
+ but fail it here with the status NDIS_STATUS_NOT_SUPPORTED. This is to\r
+ avoid performing incorrect transformations on data.\r
+\r
+ If our miniport edge (upper edge) is at a low-power state, fail the request.\r
+\r
+ If our protocol edge (lower edge) has been notified of a low-power state,\r
+ we pend this request until the miniport below has been set to D0. Since\r
+ requests to miniports are serialized always, at most a single request will\r
+ be pended.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext Pointer to the adapter structure\r
+ Oid Oid for this query\r
+ InformationBuffer Buffer for information\r
+ InformationBufferLength Size of this buffer\r
+ BytesWritten Specifies how much info is written\r
+ BytesNeeded In case the buffer is smaller than what we need, tell them how much is needed\r
+\r
+\r
+Return Value:\r
+\r
+ Return code from the NdisRequest below.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+ NDIS_STATUS Status = NDIS_STATUS_FAILURE;\r
+\r
+ do\r
+ {\r
+ if (Oid == OID_PNP_QUERY_POWER)\r
+ {\r
+ //\r
+ // Do not forward this.\r
+ //\r
+ Status = NDIS_STATUS_SUCCESS;\r
+ break;\r
+ }\r
+\r
+ if (Oid == OID_GEN_SUPPORTED_GUIDS)\r
+ {\r
+ //\r
+ // Do not forward this, otherwise we will end up with multiple\r
+ // instances of private GUIDs that the underlying miniport\r
+ // supports.\r
+ //\r
+ Status = NDIS_STATUS_NOT_SUPPORTED;\r
+ break;\r
+ }\r
+\r
+ if (Oid == OID_TCP_TASK_OFFLOAD)\r
+ {\r
+ //\r
+ // Fail this -if- this driver performs data transformations\r
+ // that can interfere with a lower driver's ability to offload\r
+ // TCP tasks.\r
+ //\r
+ // Status = NDIS_STATUS_NOT_SUPPORTED;\r
+ // break;\r
+ //\r
+ }\r
+ //\r
+ // If the miniport below is unbinding, just fail any request\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ if (pAdapt->UnbindingInProcess == TRUE)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ //\r
+ // All other queries are failed, if the miniport is not at D0,\r
+ //\r
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0) \r
+ {\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+\r
+ pAdapt->Request.RequestType = NdisRequestQueryInformation;\r
+ pAdapt->Request.DATA.QUERY_INFORMATION.Oid = Oid;\r
+ pAdapt->Request.DATA.QUERY_INFORMATION.InformationBuffer = InformationBuffer;\r
+ pAdapt->Request.DATA.QUERY_INFORMATION.InformationBufferLength = InformationBufferLength;\r
+ pAdapt->BytesNeeded = BytesNeeded;\r
+ pAdapt->BytesReadOrWritten = BytesWritten;\r
+\r
+ //\r
+ // If the miniport below is binding, fail the request\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ \r
+ if (pAdapt->UnbindingInProcess == TRUE)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ //\r
+ // If the Protocol device state is OFF, mark this request as being \r
+ // pended. We queue this until the device state is back to D0. \r
+ //\r
+ if ((pAdapt->PTDeviceState > NdisDeviceStateD0) \r
+ && (pAdapt->StandingBy == FALSE))\r
+ {\r
+ pAdapt->QueuedRequest = TRUE;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_PENDING;\r
+ break;\r
+ }\r
+ //\r
+ // This is in the process of powering down the system, always fail the request\r
+ // \r
+ if (pAdapt->StandingBy == TRUE)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ pAdapt->OutstandingRequests = TRUE;\r
+ \r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+\r
+ //\r
+ // default case, most requests will be passed to the miniport below\r
+ //\r
+ NdisRequest(&Status,\r
+ pAdapt->BindingHandle,\r
+ &pAdapt->Request);\r
+\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+ PtRequestComplete(pAdapt, &pAdapt->Request, Status);\r
+ Status = NDIS_STATUS_PENDING;\r
+ }\r
+\r
+ } while (FALSE);\r
+\r
+ return(Status);\r
+\r
+}\r
+\r
+\r
+VOID\r
+MPQueryPNPCapabilities(\r
+ IN OUT PADAPT pAdapt,\r
+ OUT PNDIS_STATUS pStatus\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Postprocess a request for OID_PNP_CAPABILITIES that was forwarded\r
+ down to the underlying miniport, and has been completed by it.\r
+\r
+Arguments:\r
+\r
+ pAdapt - Pointer to the adapter structure\r
+ pStatus - Place to return final status\r
+\r
+Return Value:\r
+\r
+ None.\r
+\r
+--*/\r
+\r
+{\r
+ PNDIS_PNP_CAPABILITIES pPNPCapabilities;\r
+ PNDIS_PM_WAKE_UP_CAPABILITIES pPMstruct;\r
+\r
+ if (pAdapt->Request.DATA.QUERY_INFORMATION.InformationBufferLength >= sizeof(NDIS_PNP_CAPABILITIES))\r
+ {\r
+ pPNPCapabilities = (PNDIS_PNP_CAPABILITIES)(pAdapt->Request.DATA.QUERY_INFORMATION.InformationBuffer);\r
+\r
+ //\r
+ // The following fields must be overwritten by an IM driver.\r
+ //\r
+ pPMstruct= & pPNPCapabilities->WakeUpCapabilities;\r
+ pPMstruct->MinMagicPacketWakeUp = NdisDeviceStateUnspecified;\r
+ pPMstruct->MinPatternWakeUp = NdisDeviceStateUnspecified;\r
+ pPMstruct->MinLinkChangeWakeUp = NdisDeviceStateUnspecified;\r
+ *pAdapt->BytesReadOrWritten = sizeof(NDIS_PNP_CAPABILITIES);\r
+ *pAdapt->BytesNeeded = 0;\r
+\r
+\r
+ //\r
+ // Setting our internal flags\r
+ // Default, device is ON\r
+ //\r
+ pAdapt->MPDeviceState = NdisDeviceStateD0;\r
+ pAdapt->PTDeviceState = NdisDeviceStateD0;\r
+\r
+ *pStatus = NDIS_STATUS_SUCCESS;\r
+ }\r
+ else\r
+ {\r
+ *pAdapt->BytesNeeded= sizeof(NDIS_PNP_CAPABILITIES);\r
+ *pStatus = NDIS_STATUS_RESOURCES;\r
+ }\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+MPSetInformation(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_OID Oid,\r
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesRead,\r
+ OUT PULONG BytesNeeded\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Miniport SetInfo handler.\r
+\r
+ In the case of OID_PNP_SET_POWER, record the power state and return the OID. \r
+ Do not pass below\r
+ If the device is suspended, do not block the SET_POWER_OID \r
+ as it is used to reactivate the Passthru miniport\r
+\r
+ \r
+ PM- If the MP is not ON (DeviceState > D0) return immediately (except for 'query power' and 'set power')\r
+ If MP is ON, but the PT is not at D0, then queue the queue the request for later processing\r
+\r
+ Requests to miniports are always serialized\r
+\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext Pointer to the adapter structure\r
+ Oid Oid for this query\r
+ InformationBuffer Buffer for information\r
+ InformationBufferLength Size of this buffer\r
+ BytesRead Specifies how much info is read\r
+ BytesNeeded In case the buffer is smaller than what we need, tell them how much is needed\r
+\r
+Return Value:\r
+\r
+ Return code from the NdisRequest below.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+ NDIS_STATUS Status;\r
+\r
+ Status = NDIS_STATUS_FAILURE;\r
+\r
+ do\r
+ {\r
+ //\r
+ // The Set Power should not be sent to the miniport below the Passthru, but is handled internally\r
+ //\r
+ if (Oid == OID_PNP_SET_POWER)\r
+ {\r
+ MPProcessSetPowerOid(&Status, \r
+ pAdapt, \r
+ InformationBuffer, \r
+ InformationBufferLength, \r
+ BytesRead, \r
+ BytesNeeded);\r
+ break;\r
+\r
+ }\r
+\r
+ //\r
+ // If the miniport below is unbinding, fail the request\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock); \r
+ if (pAdapt->UnbindingInProcess == TRUE)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ //\r
+ // All other Set Information requests are failed, if the miniport is\r
+ // not at D0 or is transitioning to a device state greater than D0.\r
+ //\r
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0)\r
+ {\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+\r
+ // Set up the Request and return the result\r
+ pAdapt->Request.RequestType = NdisRequestSetInformation;\r
+ pAdapt->Request.DATA.SET_INFORMATION.Oid = Oid;\r
+ pAdapt->Request.DATA.SET_INFORMATION.InformationBuffer = InformationBuffer;\r
+ pAdapt->Request.DATA.SET_INFORMATION.InformationBufferLength = InformationBufferLength;\r
+ pAdapt->BytesNeeded = BytesNeeded;\r
+ pAdapt->BytesReadOrWritten = BytesRead;\r
+\r
+ //\r
+ // If the miniport below is unbinding, fail the request\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock); \r
+ if (pAdapt->UnbindingInProcess == TRUE)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ \r
+ //\r
+ // If the device below is at a low power state, we cannot send it the\r
+ // request now, and must pend it.\r
+ //\r
+ if ((pAdapt->PTDeviceState > NdisDeviceStateD0) \r
+ && (pAdapt->StandingBy == FALSE))\r
+ {\r
+ pAdapt->QueuedRequest = TRUE;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_PENDING;\r
+ break;\r
+ }\r
+ //\r
+ // This is in the process of powering down the system, always fail the request\r
+ // \r
+ if (pAdapt->StandingBy == TRUE)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ Status = NDIS_STATUS_FAILURE;\r
+ break;\r
+ }\r
+ pAdapt->OutstandingRequests = TRUE;\r
+ \r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ //\r
+ // Forward the request to the device below.\r
+ //\r
+ NdisRequest(&Status,\r
+ pAdapt->BindingHandle,\r
+ &pAdapt->Request);\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+ *BytesRead = pAdapt->Request.DATA.SET_INFORMATION.BytesRead;\r
+ *BytesNeeded = pAdapt->Request.DATA.SET_INFORMATION.BytesNeeded;\r
+ pAdapt->OutstandingRequests = FALSE;\r
+ }\r
+\r
+ } while (FALSE);\r
+\r
+ return(Status);\r
+}\r
+\r
+\r
+VOID\r
+MPProcessSetPowerOid(\r
+ IN OUT PNDIS_STATUS pNdisStatus,\r
+ IN PADAPT pAdapt,\r
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesRead,\r
+ OUT PULONG BytesNeeded\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+ This routine does all the procssing for a request with a SetPower Oid\r
+ The miniport shoud accept the Set Power and transition to the new state\r
+\r
+ The Set Power should not be passed to the miniport below\r
+\r
+ If the IM miniport is going into a low power state, then there is no guarantee if it will ever\r
+ be asked go back to D0, before getting halted. No requests should be pended or queued.\r
+\r
+ \r
+Arguments:\r
+ pNdisStatus - Status of the operation\r
+ pAdapt - The Adapter structure\r
+ InformationBuffer - The New DeviceState\r
+ InformationBufferLength\r
+ BytesRead - No of bytes read\r
+ BytesNeeded - No of bytes needed\r
+\r
+\r
+Return Value:\r
+ Status - NDIS_STATUS_SUCCESS if all the wait events succeed.\r
+\r
+--*/\r
+{\r
+\r
+ \r
+ NDIS_DEVICE_POWER_STATE NewDeviceState;\r
+\r
+ DBGPRINT(("==>MPProcessSetPowerOid: Adapt %p\n", pAdapt)); \r
+\r
+ ASSERT (InformationBuffer != NULL);\r
+\r
+ *pNdisStatus = NDIS_STATUS_FAILURE;\r
+\r
+ do \r
+ {\r
+ //\r
+ // Check for invalid length\r
+ //\r
+ if (InformationBufferLength < sizeof(NDIS_DEVICE_POWER_STATE))\r
+ {\r
+ *pNdisStatus = NDIS_STATUS_INVALID_LENGTH;\r
+ break;\r
+ }\r
+\r
+ NewDeviceState = (*(PNDIS_DEVICE_POWER_STATE)InformationBuffer);\r
+\r
+ //\r
+ // Check for invalid device state\r
+ //\r
+ if ((pAdapt->MPDeviceState > NdisDeviceStateD0) && (NewDeviceState != NdisDeviceStateD0))\r
+ {\r
+ //\r
+ // If the miniport is in a non-D0 state, the miniport can only receive a Set Power to D0\r
+ //\r
+ ASSERT (!(pAdapt->MPDeviceState > NdisDeviceStateD0) && (NewDeviceState != NdisDeviceStateD0));\r
+\r
+ *pNdisStatus = NDIS_STATUS_FAILURE;\r
+ break;\r
+ } \r
+\r
+ //\r
+ // Is the miniport transitioning from an On (D0) state to an Low Power State (>D0)\r
+ // If so, then set the StandingBy Flag - (Block all incoming requests)\r
+ //\r
+ if (pAdapt->MPDeviceState == NdisDeviceStateD0 && NewDeviceState > NdisDeviceStateD0)\r
+ {\r
+ pAdapt->StandingBy = TRUE;\r
+ }\r
+\r
+ //\r
+ // If the miniport is transitioning from a low power state to ON (D0), then clear the StandingBy flag\r
+ // All incoming requests will be pended until the physical miniport turns ON.\r
+ //\r
+ if (pAdapt->MPDeviceState > NdisDeviceStateD0 && NewDeviceState == NdisDeviceStateD0)\r
+ {\r
+ pAdapt->StandingBy = FALSE;\r
+ }\r
+ \r
+ //\r
+ // Now update the state in the pAdapt structure;\r
+ //\r
+ pAdapt->MPDeviceState = NewDeviceState;\r
+ \r
+ *pNdisStatus = NDIS_STATUS_SUCCESS;\r
+ \r
+\r
+ } while (FALSE); \r
+ \r
+ if (*pNdisStatus == NDIS_STATUS_SUCCESS)\r
+ {\r
+ //\r
+ // The miniport resume from low power state\r
+ // \r
+ if (pAdapt->StandingBy == FALSE)\r
+ {\r
+ //\r
+ // If we need to indicate the media connect state\r
+ // \r
+ if (pAdapt->LastIndicatedStatus != pAdapt->LatestUnIndicateStatus)\r
+ {\r
+ if (pAdapt->MiniportHandle != NULL)\r
+ {\r
+ NdisMIndicateStatus(pAdapt->MiniportHandle,\r
+ pAdapt->LatestUnIndicateStatus,\r
+ (PVOID)NULL,\r
+ 0);\r
+ NdisMIndicateStatusComplete(pAdapt->MiniportHandle);\r
+ pAdapt->LastIndicatedStatus = pAdapt->LatestUnIndicateStatus;\r
+ }\r
+ }\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // Initialize LatestUnIndicatedStatus\r
+ //\r
+ pAdapt->LatestUnIndicateStatus = pAdapt->LastIndicatedStatus;\r
+ }\r
+ *BytesRead = sizeof(NDIS_DEVICE_POWER_STATE);\r
+ *BytesNeeded = 0;\r
+ }\r
+ else\r
+ {\r
+ *BytesRead = 0;\r
+ *BytesNeeded = sizeof (NDIS_DEVICE_POWER_STATE);\r
+ }\r
+\r
+ DBGPRINT(("<==MPProcessSetPowerOid: Adapt %p\n", pAdapt)); \r
+}\r
+\r
+\r
+VOID\r
+MPReturnPacket(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PNDIS_PACKET Packet\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ NDIS Miniport entry point called whenever protocols are done with\r
+ a packet that we had indicated up and they had queued up for returning\r
+ later.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext - pointer to ADAPT structure\r
+ Packet - packet being returned.\r
+\r
+Return Value:\r
+\r
+ None.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Packet stacking: Check if this packet belongs to us.\r
+ //\r
+ if (NdisGetPoolFromPacket(Packet) != pAdapt->RecvPacketPoolHandle)\r
+ {\r
+ //\r
+ // We reused the original packet in a receive indication.\r
+ // Simply return it to the miniport below us.\r
+ //\r
+ NdisReturnPackets(&Packet, 1);\r
+ }\r
+ else\r
+#endif // NDIS51\r
+ {\r
+ //\r
+ // This is a packet allocated from this IM's receive packet pool.\r
+ // Reclaim our packet, and return the original to the driver below.\r
+ //\r
+\r
+ PNDIS_PACKET MyPacket;\r
+ PRECV_RSVD RecvRsvd;\r
+ \r
+ RecvRsvd = (PRECV_RSVD)(Packet->MiniportReserved);\r
+ MyPacket = RecvRsvd->OriginalPkt;\r
+ \r
+ NdisFreePacket(Packet);\r
+ NdisReturnPackets(&MyPacket, 1);\r
+ }\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+MPTransferData(\r
+ OUT PNDIS_PACKET Packet,\r
+ OUT PUINT BytesTransferred,\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_HANDLE MiniportReceiveContext,\r
+ IN UINT ByteOffset,\r
+ IN UINT BytesToTransfer\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Miniport's transfer data handler.\r
+\r
+Arguments:\r
+\r
+ Packet Destination packet\r
+ BytesTransferred Place-holder for how much data was copied\r
+ MiniportAdapterContext Pointer to the adapter structure\r
+ MiniportReceiveContext Context\r
+ ByteOffset Offset into the packet for copying data\r
+ BytesToTransfer How much to copy.\r
+\r
+Return Value:\r
+\r
+ Status of transfer\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+ NDIS_STATUS Status;\r
+\r
+ //\r
+ // Return, if the device is OFF\r
+ //\r
+\r
+ if (IsIMDeviceStateOn(pAdapt) == FALSE)\r
+ {\r
+ return NDIS_STATUS_FAILURE;\r
+ }\r
+\r
+ NdisTransferData(&Status,\r
+ pAdapt->BindingHandle,\r
+ MiniportReceiveContext,\r
+ ByteOffset,\r
+ BytesToTransfer,\r
+ Packet,\r
+ BytesTransferred);\r
+\r
+ return(Status);\r
+}\r
+\r
+VOID\r
+MPHalt(\r
+ IN NDIS_HANDLE MiniportAdapterContext\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Halt handler. All the hard-work for clean-up is done here.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext Pointer to the Adapter\r
+\r
+Return Value:\r
+\r
+ None.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+ NDIS_STATUS Status;\r
+ PADAPT *ppCursor;\r
+\r
+ DBGPRINT(("==>MiniportHalt: Adapt %p\n", pAdapt));\r
+\r
+ pAdapt->MiniportHandle = NULL;\r
+ pAdapt->MiniportIsHalted = TRUE;\r
+\r
+ //\r
+ // Remove this adapter from the global list\r
+ //\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+\r
+ for (ppCursor = &pAdaptList; *ppCursor != NULL; ppCursor = &(*ppCursor)->Next)\r
+ {\r
+ if (*ppCursor == pAdapt)\r
+ {\r
+ *ppCursor = pAdapt->Next;\r
+ break;\r
+ }\r
+ }\r
+\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+\r
+ //\r
+ // Delete the ioctl interface that was created when the miniport\r
+ // was created.\r
+ //\r
+ (VOID)PtDeregisterDevice();\r
+\r
+ //\r
+ // If we have a valid bind, close the miniport below the protocol\r
+ //\r
+#pragma prefast(suppress: __WARNING_DEREF_NULL_PTR, "pAdapt cannot be NULL")\r
+ if (pAdapt->BindingHandle != NULL)\r
+ {\r
+ //\r
+ // Close the binding below. and wait for it to complete\r
+ //\r
+ NdisResetEvent(&pAdapt->Event);\r
+\r
+ NdisCloseAdapter(&Status, pAdapt->BindingHandle);\r
+\r
+ if (Status == NDIS_STATUS_PENDING)\r
+ {\r
+ NdisWaitEvent(&pAdapt->Event, 0);\r
+ Status = pAdapt->Status;\r
+ }\r
+\r
+ ASSERT (Status == NDIS_STATUS_SUCCESS);\r
+\r
+ pAdapt->BindingHandle = NULL;\r
+ \r
+ PtDereferenceAdapt(pAdapt);\r
+ }\r
+\r
+ if (PtDereferenceAdapt(pAdapt))\r
+ {\r
+ pAdapt = NULL;\r
+ }\r
+ \r
+ \r
+ DBGPRINT(("<== MiniportHalt: pAdapt %p\n", pAdapt));\r
+}\r
+\r
+\r
+#ifdef NDIS51_MINIPORT\r
+\r
+VOID\r
+MPCancelSendPackets(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PVOID CancelId\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ The miniport entry point to handle cancellation of all send packets\r
+ that match the given CancelId. If we have queued any packets that match\r
+ this, then we should dequeue them and call NdisMSendComplete for all\r
+ such packets, with a status of NDIS_STATUS_REQUEST_ABORTED.\r
+\r
+ We should also call NdisCancelSendPackets in turn, on each lower binding\r
+ that this adapter corresponds to. This is to let miniports below cancel\r
+ any matching packets.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext - pointer to ADAPT structure\r
+ CancelId - ID of packets to be cancelled.\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)MiniportAdapterContext;\r
+\r
+ //\r
+ // If we queue packets on our adapter structure, this would be \r
+ // the place to acquire a spinlock to it, unlink any packets whose\r
+ // Id matches CancelId, release the spinlock and call NdisMSendComplete\r
+ // with NDIS_STATUS_REQUEST_ABORTED for all unlinked packets.\r
+ //\r
+\r
+ //\r
+ // Next, pass this down so that we let the miniport(s) below cancel\r
+ // any packets that they might have queued.\r
+ //\r
+ NdisCancelSendPackets(pAdapt->BindingHandle, CancelId);\r
+\r
+ return;\r
+}\r
+\r
+VOID\r
+MPDevicePnPEvent(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_DEVICE_PNP_EVENT DevicePnPEvent,\r
+ IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ This handler is called to notify us of PnP events directed to\r
+ our miniport device object.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext - pointer to ADAPT structure\r
+ DevicePnPEvent - the event\r
+ InformationBuffer - Points to additional event-specific information\r
+ InformationBufferLength - length of above\r
+\r
+Return Value:\r
+\r
+ None\r
+--*/\r
+{\r
+ // TBD - add code/comments about processing this.\r
+\r
+ UNREFERENCED_PARAMETER(MiniportAdapterContext);\r
+ UNREFERENCED_PARAMETER(DevicePnPEvent);\r
+ UNREFERENCED_PARAMETER(InformationBuffer);\r
+ UNREFERENCED_PARAMETER(InformationBufferLength);\r
+ \r
+ return;\r
+}\r
+\r
+VOID\r
+MPAdapterShutdown(\r
+ IN NDIS_HANDLE MiniportAdapterContext\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ This handler is called to notify us of an impending system shutdown.\r
+\r
+Arguments:\r
+\r
+ MiniportAdapterContext - pointer to ADAPT structure\r
+\r
+Return Value:\r
+\r
+ None\r
+--*/\r
+{\r
+ UNREFERENCED_PARAMETER(MiniportAdapterContext);\r
+ \r
+ return;\r
+}\r
+\r
+#endif\r
+\r
+\r
+VOID\r
+MPFreeAllPacketPools(\r
+ IN PADAPT pAdapt\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Free all packet pools on the specified adapter.\r
+ \r
+Arguments:\r
+\r
+ pAdapt - pointer to ADAPT structure\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ if (pAdapt->RecvPacketPoolHandle != NULL)\r
+ {\r
+ //\r
+ // Free the packet pool that is used to indicate receives\r
+ //\r
+ NdisFreePacketPool(pAdapt->RecvPacketPoolHandle);\r
+\r
+ pAdapt->RecvPacketPoolHandle = NULL;\r
+ }\r
+\r
+ if (pAdapt->SendPacketPoolHandle != NULL)\r
+ {\r
+\r
+ //\r
+ // Free the packet pool that is used to send packets below\r
+ //\r
+\r
+ NdisFreePacketPool(pAdapt->SendPacketPoolHandle);\r
+\r
+ pAdapt->SendPacketPoolHandle = NULL;\r
+\r
+ }\r
+}\r
+\r
--- /dev/null
+; -- NETSF.INF --\r
+;\r
+; Passthru driver INF file - this is the INF for the service (protocol)\r
+; part.\r
+;\r
+; Copyright (c) 1993-2001, Microsoft Corporation\r
+;\r
+; ----------------------------------------------------------------------\r
+; Notes:\r
+; 0. The term "filter" is used in this INF to refer to an NDIS IM driver that\r
+; implements a 1:1 relationship between upper and lower bindings.\r
+;\r
+; 1. Items specifically required for a filter have been marked with\r
+; "!!--Filter Specific--!!" keyword\r
+; 2. In general a filter DOES NOT require a notify object for proper installation.\r
+; A notify object is only required if one wants to have better control\r
+; over binding operations or if one wants to receive notifications\r
+; when other components get installed/removed/bound/unbound.\r
+; Since Windows 2000 systems do not have support for CopyINF directive,\r
+; a notify object is required to programmatically copy the miniport INF \r
+; file to the system INF directory. Previous versions of this INF file\r
+; erroneously used to copy the INF files directly by using the CopyFiles \r
+; directive.\r
+; On Windows XP, you can install a filter IM without a notify object.\r
+; by following the instructions in (4).\r
+;\r
+; 3. If you want to use this INF file with your own IM driver, please\r
+; make the following modifications:\r
+; File netsf.inf\r
+; --------------\r
+; a. In section [SourceDiskFiles] and [Passthru.Files.Sys]\r
+; change passthru.sys to the name of your own driver binary.\r
+; b. In section [Passthru.ndi.AddReg], change values of\r
+; BindForm and MiniportId to appropriate values.\r
+; File netsf_m.inf\r
+; ----------------\r
+; a. Replace MS_PassthruMP with InfId of your miniport.\r
+; b. In section [PassthruMP.AddService],\r
+; change ServiceBinary appropriately.\r
+; c. In section [PassthruMP.ndi.AddReg],\r
+; change "Passthru" in the line having "Service"\r
+; to reflect the appropriate name\r
+;\r
+;\r
+; ----------------------------------------------------------------------\r
+\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = NetService\r
+ClassGUID = {4D36E974-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Msft%\r
+DriverVer =10/01/2002,6.0.5019.0\r
+\r
+[Manufacturer]\r
+%Msft% = MSFT,NTx86,NTia64,NTamd64\r
+\r
+[ControlFlags]\r
+\r
+;=========================================================================\r
+;\r
+;=========================================================================\r
+;For Win2K\r
+\r
+[MSFT]\r
+%Passthru_Desc% = Passthru.ndi, ms_passthru\r
+ \r
+;For WinXP and later\r
+\r
+[MSFT.NTx86]\r
+%Passthru_Desc% = Passthru.ndi, ms_passthru\r
+\r
+[MSFT.NTia64]\r
+%Passthru_Desc% = Passthru.ndi, ms_passthru\r
+\r
+[MSFT.NTamd64]\r
+%Passthru_Desc% = Passthru.ndi, ms_passthru\r
+\r
+\r
+[Passthru.ndi]\r
+AddReg = Passthru.ndi.AddReg, Passthru.AddReg\r
+Characteristics = 0x4410 ; NCF_FILTER | NCF_NDIS_PROTOCOL !--Filter Specific--!!\r
+CopyFiles = Passthru.Files.Sys\r
+CopyInf = netsf_m.inf\r
+\r
+[Passthru.ndi.Remove]\r
+DelFiles = Passthru.Files.Sys\r
+\r
+[Passthru.ndi.Services]\r
+AddService = Passthru,, Passthru.AddService\r
+\r
+[Passthru.AddService]\r
+DisplayName = %PassthruService_Desc%\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\passthru.sys\r
+AddReg = Passthru.AddService.AddReg\r
+\r
+\r
+[Passthru.AddService.AddReg]\r
+; ----------------------------------------------------------------------\r
+; Add any miniport-specific parameters here. These are params that your\r
+; filter device is going to use.\r
+;\r
+;HKR, Parameters, ParameterName, 0x10000, "MultiSz", "Parameter", "Value"\r
+;HKR, Parameters, ParameterName2, 0x10001, 4\r
+\r
+\r
+; ----------------------------------------------------------------------\r
+; File copy\r
+;\r
+[SourceDisksNames]\r
+1=%DiskDescription%,"",,\r
+\r
+[SourceDisksFiles]\r
+passthru.sys=1\r
+\r
+[DestinationDirs]\r
+DefaultDestDir = 12\r
+Passthru.Files.Sys = 12 ; %windir%\System32\drivers\r
+\r
+[Passthru.Files.Sys]\r
+passthru.sys,,,2\r
+\r
+; ----------------------------------------------------------------------\r
+; Filter Install\r
+;\r
+\r
+[Passthru.ndi.AddReg]\r
+HKR, Ndi, HelpText, , %Passthru_HELP%\r
+\r
+; ----------------------------------------------------------------------\r
+; !!--Filter Specific--!!\r
+;\r
+; Note:\r
+; 1. Other components may also have UpperRange/LowerRange but for filters\r
+; the value of both of them must be noupper/nolower\r
+; 2. The value FilterClass is required.\r
+; 3. The value Service is required\r
+; 4. FilterDeviceInfId is the InfId of the filter device (miniport) that will\r
+; be installed for each filtered adapter.\r
+; In this case this is ms_passthrump (refer to netsf_m.inf)\r
+;\r
+HKR, Ndi, FilterClass, , failover\r
+HKR, Ndi, FilterDeviceInfId, , ms_passthrump\r
+HKR, Ndi, Service, , Passthru\r
+HKR, Ndi\Interfaces, UpperRange, , noupper\r
+HKR, Ndi\Interfaces, LowerRange, , nolower\r
+HKR, Ndi\Interfaces, FilterMediaTypes, , "ethernet, tokenring, fddi, wan"\r
+\r
+[Passthru.AddReg]\r
+; The following key is Required\r
+; The following key is Passthru specific\r
+HKR, Parameters, Param1, 0, 4\r
+\r
+; ----------------------------------------------------------------------\r
+[Strings]\r
+Msft = "Microsoft"\r
+DiskDescription = "Microsoft Passthru Driver Disk"\r
+\r
+Passthru_Desc = "Passthru Driver"\r
+Passthru_HELP = "Passthru Driver"\r
+PassthruService_Desc = "Passthru Service"\r
+\r
+\r
--- /dev/null
+; -- NETSF_M.INF --\r
+;\r
+; Passsthru Miniport INF file\r
+;\r
+; Copyright (c) 1993-1999, Microsoft Corporation\r
+\r
+; ----------------------------------------------------------------------\r
+; Notes:\r
+; 0. The term "filter" is used here to refer to an NDIS IM driver that\r
+; implements a 1:1 relationship between upper and lower bindings.\r
+; 1. Items specifically required for a filter have been marked with\r
+; "!!--Filter Specific--!!" keyword\r
+; 2. A filter DOES NOT require a notify object for proper installation.\r
+; A notify object is only required if one wants to have better control\r
+; over binding operations or if one wants to receive notifications\r
+; when other components get installed/removed/bound/unbound.\r
+; This sample uses a notify object as an example only. If you do not\r
+; want to use a notify object, please comment out the lines that add\r
+; ClsId and ComponentDll registry keys.\r
+; ----------------------------------------------------------------------\r
+\r
+[Version]\r
+signature = "$Windows NT$"\r
+Class = Net\r
+ClassGUID = {4d36e972-e325-11ce-bfc1-08002be10318}\r
+Provider = %Msft%\r
+DriverVer =10/01/2002,6.0.5019.0\r
+\r
+[ControlFlags]\r
+ExcludeFromSelect = ms_passthrump\r
+\r
+[DestinationDirs]\r
+DefaultDestDir=12\r
+; No files to copy \r
+\r
+[Manufacturer]\r
+%Msft% = MSFT,NTx86,NTia64,NTamd64\r
+\r
+;For Win2K\r
+\r
+[MSFT]\r
+%PassthruMP_Desc% = PassthruMP.ndi, ms_passthrump\r
+\r
+;For WinXP and later\r
+\r
+[MSFT.NTx86]\r
+%PassthruMP_Desc% = PassthruMP.ndi, ms_passthrump\r
+\r
+[MSFT.NTia64]\r
+%PassthruMP_Desc% = PassthruMP.ndi, ms_passthrump\r
+\r
+[MSFT.NTamd64]\r
+%PassthruMP_Desc% = PassthruMP.ndi, ms_passthrump\r
+\r
+\r
+[PassthruMP.ndi]\r
+AddReg = PassthruMP.ndi.AddReg\r
+Characteristics = 0x29 ;NCF_NOT_USER_REMOVABLE | NCF_VIRTUAL | NCF_HIDDEN\r
+\r
+[PassthruMP.ndi.AddReg]\r
+HKR, Ndi, Service, 0, PassthruMP\r
+\r
+[PassthruMP.ndi.Services]\r
+AddService = PassthruMP,0x2, PassthruMP.AddService\r
+\r
+\r
+[PassthruMP.AddService]\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\passthru.sys\r
+AddReg = PassthruMP.AddService.AddReg\r
+\r
+\r
+[PassthruMP.AddService.AddReg]\r
+; ----------------------------------------------------------------------\r
+; Add any miniport-specific parameters here. These are params that your\r
+; filter device is going to use.\r
+;\r
+;HKR, Parameters, ParameterName, 0x10000, "MultiSz", "Parameter", "Value"\r
+;HKR, Parameters, ParameterName2, 0x10001, 4\r
+\r
+[Strings]\r
+Msft = "Microsoft"\r
+PassthruMP_Desc = "Passthru Miniport"\r
+\r
+[SourceDisksNames]\r
+;None \r
+\r
+[SourceDisksFiles]\r
+;None\r
+\r
+\r
--- /dev/null
+/*++\r
+\r
+Copyright (c) 1992-2000 Microsoft Corporation\r
+ \r
+Module Name:\r
+ \r
+ passthru.c\r
+\r
+Abstract:\r
+\r
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.\r
+\r
+Author:\r
+\r
+Environment:\r
+\r
+\r
+Revision History:\r
+\r
+\r
+--*/\r
+\r
+\r
+#include "precomp.h"\r
+#pragma hdrstop\r
+\r
+#pragma NDIS_INIT_FUNCTION(DriverEntry)\r
+\r
+NDIS_HANDLE ProtHandle = NULL;\r
+NDIS_HANDLE DriverHandle = NULL;\r
+NDIS_MEDIUM MediumArray[4] =\r
+ {\r
+ NdisMedium802_3, // Ethernet\r
+ NdisMedium802_5, // Token-ring\r
+ NdisMediumFddi, // Fddi\r
+ NdisMediumWan // NDISWAN\r
+ };\r
+\r
+NDIS_SPIN_LOCK GlobalLock;\r
+\r
+PADAPT pAdaptList = NULL;\r
+LONG MiniportCount = 0;\r
+\r
+NDIS_HANDLE NdisWrapperHandle;\r
+\r
+//\r
+// To support ioctls from user-mode:\r
+//\r
+\r
+#define LINKNAME_STRING L"\\DosDevices\\Passthru"\r
+#define NTDEVICE_STRING L"\\Device\\Passthru"\r
+\r
+NDIS_HANDLE NdisDeviceHandle = NULL;\r
+PDEVICE_OBJECT ControlDeviceObject = NULL;\r
+\r
+enum _DEVICE_STATE\r
+{\r
+ PS_DEVICE_STATE_READY = 0, // ready for create/delete\r
+ PS_DEVICE_STATE_CREATING, // create operation in progress\r
+ PS_DEVICE_STATE_DELETING // delete operation in progress\r
+} ControlDeviceState = PS_DEVICE_STATE_READY;\r
+\r
+\r
+\r
+NTSTATUS\r
+DriverEntry(\r
+ IN PDRIVER_OBJECT DriverObject,\r
+ IN PUNICODE_STRING RegistryPath\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ First entry point to be called, when this driver is loaded.\r
+ Register with NDIS as an intermediate driver.\r
+\r
+Arguments:\r
+\r
+ DriverObject - pointer to the system's driver object structure\r
+ for this driver\r
+ \r
+ RegistryPath - system's registry path for this driver\r
+ \r
+Return Value:\r
+\r
+ STATUS_SUCCESS if all initialization is successful, STATUS_XXX\r
+ error code if not.\r
+\r
+--*/\r
+{\r
+ NDIS_STATUS Status;\r
+ NDIS_PROTOCOL_CHARACTERISTICS PChars;\r
+ NDIS_MINIPORT_CHARACTERISTICS MChars;\r
+ NDIS_STRING Name;\r
+\r
+ Status = NDIS_STATUS_SUCCESS;\r
+ NdisAllocateSpinLock(&GlobalLock);\r
+\r
+ NdisMInitializeWrapper(&NdisWrapperHandle, DriverObject, RegistryPath, NULL);\r
+\r
+ do\r
+ {\r
+ //\r
+ // Register the miniport with NDIS. Note that it is the miniport\r
+ // which was started as a driver and not the protocol. Also the miniport\r
+ // must be registered prior to the protocol since the protocol's BindAdapter\r
+ // handler can be initiated anytime and when it is, it must be ready to\r
+ // start driver instances.\r
+ //\r
+\r
+ NdisZeroMemory(&MChars, sizeof(NDIS_MINIPORT_CHARACTERISTICS));\r
+\r
+ MChars.MajorNdisVersion = PASSTHRU_MAJOR_NDIS_VERSION;\r
+ MChars.MinorNdisVersion = PASSTHRU_MINOR_NDIS_VERSION;\r
+\r
+ MChars.InitializeHandler = MPInitialize;\r
+ MChars.QueryInformationHandler = MPQueryInformation;\r
+ MChars.SetInformationHandler = MPSetInformation;\r
+ MChars.ResetHandler = NULL;\r
+ MChars.TransferDataHandler = MPTransferData;\r
+ MChars.HaltHandler = MPHalt;\r
+#ifdef NDIS51_MINIPORT\r
+ MChars.CancelSendPacketsHandler = MPCancelSendPackets;\r
+ MChars.PnPEventNotifyHandler = MPDevicePnPEvent;\r
+ MChars.AdapterShutdownHandler = MPAdapterShutdown;\r
+#endif // NDIS51_MINIPORT\r
+\r
+ //\r
+ // We will disable the check for hang timeout so we do not\r
+ // need a check for hang handler!\r
+ //\r
+ MChars.CheckForHangHandler = NULL;\r
+ MChars.ReturnPacketHandler = MPReturnPacket;\r
+\r
+ //\r
+ // Either the Send or the SendPackets handler should be specified.\r
+ // If SendPackets handler is specified, SendHandler is ignored\r
+ //\r
+ MChars.SendHandler = NULL; // MPSend;\r
+ MChars.SendPacketsHandler = MPSendPackets;\r
+\r
+ Status = NdisIMRegisterLayeredMiniport(NdisWrapperHandle,\r
+ &MChars,\r
+ sizeof(MChars),\r
+ &DriverHandle);\r
+ if (Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+\r
+#ifndef WIN9X\r
+ NdisMRegisterUnloadHandler(NdisWrapperHandle, PtUnload);\r
+#endif\r
+\r
+ //\r
+ // Now register the protocol.\r
+ //\r
+ NdisZeroMemory(&PChars, sizeof(NDIS_PROTOCOL_CHARACTERISTICS));\r
+ PChars.MajorNdisVersion = PASSTHRU_PROT_MAJOR_NDIS_VERSION;\r
+ PChars.MinorNdisVersion = PASSTHRU_PROT_MINOR_NDIS_VERSION;\r
+\r
+ //\r
+ // Make sure the protocol-name matches the service-name\r
+ // (from the INF) under which this protocol is installed.\r
+ // This is needed to ensure that NDIS can correctly determine\r
+ // the binding and call us to bind to miniports below.\r
+ //\r
+ NdisInitUnicodeString(&Name, L"Passthru"); // Protocol name\r
+ PChars.Name = Name;\r
+ PChars.OpenAdapterCompleteHandler = PtOpenAdapterComplete;\r
+ PChars.CloseAdapterCompleteHandler = PtCloseAdapterComplete;\r
+ PChars.SendCompleteHandler = PtSendComplete;\r
+ PChars.TransferDataCompleteHandler = PtTransferDataComplete;\r
+ \r
+ PChars.ResetCompleteHandler = PtResetComplete;\r
+ PChars.RequestCompleteHandler = PtRequestComplete;\r
+ PChars.ReceiveHandler = PtReceive;\r
+ PChars.ReceiveCompleteHandler = PtReceiveComplete;\r
+ PChars.StatusHandler = PtStatus;\r
+ PChars.StatusCompleteHandler = PtStatusComplete;\r
+ PChars.BindAdapterHandler = PtBindAdapter;\r
+ PChars.UnbindAdapterHandler = PtUnbindAdapter;\r
+ PChars.UnloadHandler = PtUnloadProtocol;\r
+\r
+ PChars.ReceivePacketHandler = PtReceivePacket;\r
+ PChars.PnPEventHandler= PtPNPHandler;\r
+\r
+ NdisRegisterProtocol(&Status,\r
+ &ProtHandle,\r
+ &PChars,\r
+ sizeof(NDIS_PROTOCOL_CHARACTERISTICS));\r
+\r
+ if (Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ NdisIMDeregisterLayeredMiniport(DriverHandle);\r
+ break;\r
+ }\r
+\r
+ NdisIMAssociateMiniport(DriverHandle, ProtHandle);\r
+ }\r
+ while (FALSE);\r
+\r
+ if (Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ NdisTerminateWrapper(NdisWrapperHandle, NULL);\r
+ }\r
+\r
+ return(Status);\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+PtRegisterDevice(\r
+ VOID\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Register an ioctl interface - a device object to be used for this\r
+ purpose is created by NDIS when we call NdisMRegisterDevice.\r
+\r
+ This routine is called whenever a new miniport instance is\r
+ initialized. However, we only create one global device object,\r
+ when the first miniport instance is initialized. This routine\r
+ handles potential race conditions with PtDeregisterDevice via\r
+ the ControlDeviceState and MiniportCount variables.\r
+\r
+ NOTE: do not call this from DriverEntry; it will prevent the driver\r
+ from being unloaded (e.g. on uninstall).\r
+\r
+Arguments:\r
+\r
+ None\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_SUCCESS if we successfully register a device object.\r
+\r
+--*/\r
+{\r
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;\r
+ UNICODE_STRING DeviceName;\r
+ UNICODE_STRING DeviceLinkUnicodeString;\r
+ PDRIVER_DISPATCH DispatchTable[IRP_MJ_MAXIMUM_FUNCTION+1];\r
+\r
+ DBGPRINT(("==>PtRegisterDevice\n"));\r
+\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+\r
+ ++MiniportCount;\r
+ \r
+ if (1 == MiniportCount)\r
+ {\r
+ ASSERT(ControlDeviceState != PS_DEVICE_STATE_CREATING);\r
+\r
+ //\r
+ // Another thread could be running PtDeregisterDevice on\r
+ // behalf of another miniport instance. If so, wait for\r
+ // it to exit.\r
+ //\r
+ while (ControlDeviceState != PS_DEVICE_STATE_READY)\r
+ {\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+ NdisMSleep(1);\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+ }\r
+\r
+ ControlDeviceState = PS_DEVICE_STATE_CREATING;\r
+\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+\r
+ \r
+ NdisZeroMemory(DispatchTable, (IRP_MJ_MAXIMUM_FUNCTION+1) * sizeof(PDRIVER_DISPATCH));\r
+\r
+ DispatchTable[IRP_MJ_CREATE] = PtDispatch;\r
+ DispatchTable[IRP_MJ_CLEANUP] = PtDispatch;\r
+ DispatchTable[IRP_MJ_CLOSE] = PtDispatch;\r
+ DispatchTable[IRP_MJ_DEVICE_CONTROL] = PtDispatch;\r
+ \r
+\r
+ NdisInitUnicodeString(&DeviceName, NTDEVICE_STRING);\r
+ NdisInitUnicodeString(&DeviceLinkUnicodeString, LINKNAME_STRING);\r
+\r
+ //\r
+ // Create a device object and register our dispatch handlers\r
+ //\r
+ \r
+ Status = NdisMRegisterDevice(\r
+ NdisWrapperHandle, \r
+ &DeviceName,\r
+ &DeviceLinkUnicodeString,\r
+ &DispatchTable[0],\r
+ &ControlDeviceObject,\r
+ &NdisDeviceHandle\r
+ );\r
+\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+\r
+ ControlDeviceState = PS_DEVICE_STATE_READY;\r
+ }\r
+\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+\r
+ DBGPRINT(("<==PtRegisterDevice: %x\n", Status));\r
+\r
+ return (Status);\r
+}\r
+\r
+\r
+NTSTATUS\r
+PtDispatch(\r
+ IN PDEVICE_OBJECT DeviceObject,\r
+ IN PIRP Irp\r
+ )\r
+/*++\r
+Routine Description:\r
+\r
+ Process IRPs sent to this device.\r
+\r
+Arguments:\r
+\r
+ DeviceObject - pointer to a device object\r
+ Irp - pointer to an I/O Request Packet\r
+\r
+Return Value:\r
+\r
+ NTSTATUS - STATUS_SUCCESS always - change this when adding\r
+ real code to handle ioctls.\r
+\r
+--*/\r
+{\r
+ PIO_STACK_LOCATION irpStack;\r
+ NTSTATUS status = STATUS_SUCCESS;\r
+\r
+ UNREFERENCED_PARAMETER(DeviceObject);\r
+ \r
+ DBGPRINT(("==>Pt Dispatch\n"));\r
+ irpStack = IoGetCurrentIrpStackLocation(Irp);\r
+ \r
+\r
+ switch (irpStack->MajorFunction)\r
+ {\r
+ case IRP_MJ_CREATE:\r
+ break;\r
+ \r
+ case IRP_MJ_CLEANUP:\r
+ break;\r
+ \r
+ case IRP_MJ_CLOSE:\r
+ break; \r
+ \r
+ case IRP_MJ_DEVICE_CONTROL:\r
+ //\r
+ // Add code here to handle ioctl commands sent to passthru.\r
+ //\r
+ break; \r
+ default:\r
+ break;\r
+ }\r
+\r
+ Irp->IoStatus.Status = status;\r
+ IoCompleteRequest(Irp, IO_NO_INCREMENT);\r
+\r
+ DBGPRINT(("<== Pt Dispatch\n"));\r
+\r
+ return status;\r
+\r
+} \r
+\r
+\r
+NDIS_STATUS\r
+PtDeregisterDevice(\r
+ VOID\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Deregister the ioctl interface. This is called whenever a miniport\r
+ instance is halted. When the last miniport instance is halted, we\r
+ request NDIS to delete the device object\r
+\r
+Arguments:\r
+\r
+ NdisDeviceHandle - Handle returned by NdisMRegisterDevice\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_SUCCESS if everything worked ok\r
+\r
+--*/\r
+{\r
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;\r
+\r
+ DBGPRINT(("==>PassthruDeregisterDevice\n"));\r
+\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+\r
+ ASSERT(MiniportCount > 0);\r
+\r
+ --MiniportCount;\r
+ \r
+ if (0 == MiniportCount)\r
+ {\r
+ //\r
+ // All miniport instances have been halted. Deregister\r
+ // the control device.\r
+ //\r
+\r
+ ASSERT(ControlDeviceState == PS_DEVICE_STATE_READY);\r
+\r
+ //\r
+ // Block PtRegisterDevice() while we release the control\r
+ // device lock and deregister the device.\r
+ // \r
+ ControlDeviceState = PS_DEVICE_STATE_DELETING;\r
+\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+\r
+ if (NdisDeviceHandle != NULL)\r
+ {\r
+ Status = NdisMDeregisterDevice(NdisDeviceHandle);\r
+ NdisDeviceHandle = NULL;\r
+ }\r
+\r
+ NdisAcquireSpinLock(&GlobalLock);\r
+ ControlDeviceState = PS_DEVICE_STATE_READY;\r
+ }\r
+\r
+ NdisReleaseSpinLock(&GlobalLock);\r
+\r
+ DBGPRINT(("<== PassthruDeregisterDevice: %x\n", Status));\r
+ return Status;\r
+ \r
+}\r
+\r
+VOID\r
+PtUnload(\r
+ IN PDRIVER_OBJECT DriverObject\r
+ )\r
+//\r
+// PassThru driver unload function\r
+//\r
+{\r
+ UNREFERENCED_PARAMETER(DriverObject);\r
+ \r
+ DBGPRINT(("PtUnload: entered\n"));\r
+ \r
+ PtUnloadProtocol();\r
+ \r
+ NdisIMDeregisterLayeredMiniport(DriverHandle);\r
+ \r
+ NdisFreeSpinLock(&GlobalLock);\r
+\r
+ DBGPRINT(("PtUnload: done!\n"));\r
+}\r
+\r
--- /dev/null
+/*++\r
+\r
+Copyright (c) 1992-2000 Microsoft Corporation\r
+\r
+Module Name:\r
+\r
+ passthru.h\r
+\r
+Abstract:\r
+\r
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.\r
+\r
+Author:\r
+\r
+Environment:\r
+\r
+\r
+Revision History:\r
+\r
+ \r
+--*/\r
+\r
+#ifdef NDIS51_MINIPORT\r
+#define PASSTHRU_MAJOR_NDIS_VERSION 5\r
+#define PASSTHRU_MINOR_NDIS_VERSION 1\r
+#else\r
+#define PASSTHRU_MAJOR_NDIS_VERSION 4\r
+#define PASSTHRU_MINOR_NDIS_VERSION 0\r
+#endif\r
+\r
+#ifdef NDIS51\r
+#define PASSTHRU_PROT_MAJOR_NDIS_VERSION 5\r
+#define PASSTHRU_PROT_MINOR_NDIS_VERSION 0\r
+#else\r
+#define PASSTHRU_PROT_MAJOR_NDIS_VERSION 4\r
+#define PASSTHRU_PROT_MINOR_NDIS_VERSION 0\r
+#endif\r
+\r
+#define MAX_BUNDLEID_LENGTH 50\r
+\r
+#define TAG 'ImPa'\r
+#define WAIT_INFINITE 0\r
+\r
+\r
+\r
+//advance declaration\r
+typedef struct _ADAPT ADAPT, *PADAPT;\r
+\r
+DRIVER_INITIALIZE DriverEntry;\r
+extern\r
+NTSTATUS\r
+DriverEntry(\r
+ IN PDRIVER_OBJECT DriverObject,\r
+ IN PUNICODE_STRING RegistryPath\r
+ );\r
+\r
+DRIVER_DISPATCH PtDispatch;\r
+NTSTATUS\r
+PtDispatch(\r
+ IN PDEVICE_OBJECT DeviceObject,\r
+ IN PIRP Irp\r
+ );\r
+\r
+NDIS_STATUS\r
+PtRegisterDevice(\r
+ VOID\r
+ );\r
+\r
+NDIS_STATUS\r
+PtDeregisterDevice(\r
+ VOID\r
+ );\r
+\r
+DRIVER_UNLOAD PtUnload;\r
+VOID\r
+PtUnloadProtocol(\r
+ VOID\r
+ );\r
+\r
+//\r
+// Protocol proto-types\r
+//\r
+extern\r
+VOID\r
+PtOpenAdapterComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status,\r
+ IN NDIS_STATUS OpenErrorStatus\r
+ );\r
+\r
+extern\r
+VOID\r
+PtCloseAdapterComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtResetComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtRequestComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_REQUEST NdisRequest,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtStatus(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS GeneralStatus,\r
+ IN PVOID StatusBuffer,\r
+ IN UINT StatusBufferSize\r
+ );\r
+\r
+extern\r
+VOID\r
+PtStatusComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext\r
+ );\r
+\r
+extern\r
+VOID\r
+PtSendComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN NDIS_STATUS Status\r
+ );\r
+\r
+extern\r
+VOID\r
+PtTransferDataComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN NDIS_STATUS Status,\r
+ IN UINT BytesTransferred\r
+ );\r
+\r
+extern\r
+NDIS_STATUS\r
+PtReceive(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_HANDLE MacReceiveContext,\r
+ IN PVOID HeaderBuffer,\r
+ IN UINT HeaderBufferSize,\r
+ IN PVOID LookAheadBuffer,\r
+ IN UINT LookaheadBufferSize,\r
+ IN UINT PacketSize\r
+ );\r
+\r
+extern\r
+VOID\r
+PtReceiveComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext\r
+ );\r
+\r
+extern\r
+INT\r
+PtReceivePacket(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet\r
+ );\r
+\r
+extern\r
+VOID\r
+PtBindAdapter(\r
+ OUT PNDIS_STATUS Status,\r
+ IN NDIS_HANDLE BindContext,\r
+ IN PNDIS_STRING DeviceName,\r
+ IN PVOID SystemSpecific1,\r
+ IN PVOID SystemSpecific2\r
+ );\r
+\r
+extern\r
+VOID\r
+PtUnbindAdapter(\r
+ OUT PNDIS_STATUS Status,\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_HANDLE UnbindContext\r
+ );\r
+ \r
+VOID\r
+PtUnload(\r
+ IN PDRIVER_OBJECT DriverObject\r
+ );\r
+\r
+\r
+\r
+extern \r
+NDIS_STATUS\r
+PtPNPHandler(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ );\r
+\r
+\r
+\r
+\r
+NDIS_STATUS\r
+PtPnPNetEventReconfigure(\r
+ IN PADAPT pAdapt,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ ); \r
+\r
+NDIS_STATUS \r
+PtPnPNetEventSetPower (\r
+ IN PADAPT pAdapt,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ );\r
+ \r
+\r
+//\r
+// Miniport proto-types\r
+//\r
+NDIS_STATUS\r
+MPInitialize(\r
+ OUT PNDIS_STATUS OpenErrorStatus,\r
+ OUT PUINT SelectedMediumIndex,\r
+ IN PNDIS_MEDIUM MediumArray,\r
+ IN UINT MediumArraySize,\r
+ IN NDIS_HANDLE MiniportAdapterHandle,\r
+ IN NDIS_HANDLE WrapperConfigurationContext\r
+ );\r
+\r
+VOID\r
+MPSendPackets(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PPNDIS_PACKET PacketArray,\r
+ IN UINT NumberOfPackets\r
+ );\r
+\r
+NDIS_STATUS\r
+MPSend(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN UINT Flags\r
+ );\r
+\r
+NDIS_STATUS\r
+MPQueryInformation(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_OID Oid,\r
+ IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesWritten,\r
+ OUT PULONG BytesNeeded\r
+ );\r
+\r
+NDIS_STATUS\r
+MPSetInformation(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_OID Oid,\r
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesRead,\r
+ OUT PULONG BytesNeeded\r
+ );\r
+\r
+VOID\r
+MPReturnPacket(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PNDIS_PACKET Packet\r
+ );\r
+\r
+NDIS_STATUS\r
+MPTransferData(\r
+ OUT PNDIS_PACKET Packet,\r
+ OUT PUINT BytesTransferred,\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_HANDLE MiniportReceiveContext,\r
+ IN UINT ByteOffset,\r
+ IN UINT BytesToTransfer\r
+ );\r
+\r
+VOID\r
+MPHalt(\r
+ IN NDIS_HANDLE MiniportAdapterContext\r
+ );\r
+\r
+\r
+VOID\r
+MPQueryPNPCapabilities( \r
+ OUT PADAPT MiniportProtocolContext, \r
+ OUT PNDIS_STATUS Status\r
+ );\r
+\r
+\r
+#ifdef NDIS51_MINIPORT\r
+\r
+VOID\r
+MPCancelSendPackets(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN PVOID CancelId\r
+ );\r
+\r
+VOID\r
+MPAdapterShutdown(\r
+ IN NDIS_HANDLE MiniportAdapterContext\r
+ );\r
+\r
+VOID\r
+MPDevicePnPEvent(\r
+ IN NDIS_HANDLE MiniportAdapterContext,\r
+ IN NDIS_DEVICE_PNP_EVENT DevicePnPEvent,\r
+ IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength\r
+ );\r
+\r
+#endif // NDIS51_MINIPORT\r
+\r
+VOID\r
+MPFreeAllPacketPools(\r
+ IN PADAPT pAdapt\r
+ );\r
+\r
+\r
+VOID\r
+MPProcessSetPowerOid(\r
+ IN OUT PNDIS_STATUS pNdisStatus,\r
+ IN PADAPT pAdapt,\r
+ __in_bcount(InformationBufferLength) IN PVOID InformationBuffer,\r
+ IN ULONG InformationBufferLength,\r
+ OUT PULONG BytesRead,\r
+ OUT PULONG BytesNeeded\r
+ );\r
+\r
+VOID\r
+PtReferenceAdapt(\r
+ IN PADAPT pAdapt\r
+ );\r
+\r
+BOOLEAN\r
+PtDereferenceAdapt(\r
+ IN PADAPT pAdapt\r
+ );\r
+\r
+//\r
+// There should be no DbgPrint's in the Free version of the driver\r
+//\r
+#if DBG\r
+\r
+#define DBGPRINT(Fmt) \\r
+ { \\r
+ DbgPrint("Passthru: "); \\r
+ DbgPrint Fmt; \\r
+ }\r
+\r
+#else // if DBG\r
+\r
+#define DBGPRINT(Fmt) \r
+\r
+#endif // if DBG \r
+\r
+#define NUM_PKTS_IN_POOL 256\r
+\r
+\r
+//\r
+// Protocol reserved part of a sent packet that is allocated by us.\r
+//\r
+typedef struct _SEND_RSVD\r
+{\r
+ PNDIS_PACKET OriginalPkt;\r
+} SEND_RSVD, *PSEND_RSVD;\r
+\r
+//\r
+// Miniport reserved part of a received packet that is allocated by\r
+// us. Note that this should fit into the MiniportReserved space\r
+// in an NDIS_PACKET.\r
+//\r
+typedef struct _RECV_RSVD\r
+{\r
+ PNDIS_PACKET OriginalPkt;\r
+} RECV_RSVD, *PRECV_RSVD;\r
+\r
+C_ASSERT(sizeof(RECV_RSVD) <= sizeof(((PNDIS_PACKET)0)->MiniportReserved));\r
+\r
+//\r
+// Event Codes related to the PassthruEvent Structure\r
+//\r
+\r
+typedef enum \r
+{\r
+ Passthru_Invalid,\r
+ Passthru_SetPower,\r
+ Passthru_Unbind\r
+\r
+} PASSSTHRU_EVENT_CODE, *PPASTHRU_EVENT_CODE; \r
+\r
+//\r
+// Passthru Event with a code to state why they have been state\r
+//\r
+\r
+typedef struct _PASSTHRU_EVENT\r
+{\r
+ NDIS_EVENT Event;\r
+ PASSSTHRU_EVENT_CODE Code;\r
+\r
+} PASSTHRU_EVENT, *PPASSTHRU_EVENT;\r
+\r
+\r
+//\r
+// Structure used by both the miniport as well as the protocol part of the intermediate driver\r
+// to represent an adapter and its corres. lower bindings\r
+//\r
+typedef struct _ADAPT\r
+{\r
+ struct _ADAPT * Next;\r
+ \r
+ NDIS_HANDLE BindingHandle; // To the lower miniport\r
+ NDIS_HANDLE MiniportHandle; // NDIS Handle to for miniport up-calls\r
+ NDIS_HANDLE SendPacketPoolHandle;\r
+ NDIS_HANDLE RecvPacketPoolHandle;\r
+ NDIS_STATUS Status; // Open Status\r
+ NDIS_EVENT Event; // Used by bind/halt for Open/Close Adapter synch.\r
+ NDIS_MEDIUM Medium;\r
+ NDIS_REQUEST Request; // This is used to wrap a request coming down\r
+ // to us. This exploits the fact that requests\r
+ // are serialized down to us.\r
+ PULONG BytesNeeded;\r
+ PULONG BytesReadOrWritten;\r
+ BOOLEAN ReceivedIndicationFlags[32];\r
+ \r
+ BOOLEAN OutstandingRequests; // TRUE iff a request is pending\r
+ // at the miniport below\r
+ BOOLEAN QueuedRequest; // TRUE iff a request is queued at\r
+ // this IM miniport\r
+\r
+ BOOLEAN StandingBy; // True - When the miniport or protocol is transitioning from a D0 to Standby (>D0) State\r
+ BOOLEAN UnbindingInProcess;\r
+ NDIS_SPIN_LOCK Lock;\r
+ // False - At all other times, - Flag is cleared after a transition to D0\r
+\r
+ NDIS_DEVICE_POWER_STATE MPDeviceState; // Miniport's Device State \r
+ NDIS_DEVICE_POWER_STATE PTDeviceState; // Protocol's Device State \r
+ NDIS_STRING DeviceName; // For initializing the miniport edge\r
+ NDIS_EVENT MiniportInitEvent; // For blocking UnbindAdapter while\r
+ // an IM Init is in progress.\r
+ BOOLEAN MiniportInitPending; // TRUE iff IMInit in progress\r
+ NDIS_STATUS LastIndicatedStatus; // The last indicated media status\r
+ NDIS_STATUS LatestUnIndicateStatus; // The latest suppressed media status\r
+ ULONG OutstandingSends;\r
+ LONG RefCount;\r
+ BOOLEAN MiniportIsHalted;\r
+} ADAPT, *PADAPT;\r
+\r
+extern NDIS_HANDLE ProtHandle, DriverHandle;\r
+extern NDIS_MEDIUM MediumArray[4];\r
+extern PADAPT pAdaptList;\r
+extern NDIS_SPIN_LOCK GlobalLock;\r
+\r
+\r
+#define ADAPT_MINIPORT_HANDLE(_pAdapt) ((_pAdapt)->MiniportHandle)\r
+#define ADAPT_DECR_PENDING_SENDS(_pAdapt) \\r
+ { \\r
+ NdisAcquireSpinLock(&(_pAdapt)->Lock); \\r
+ (_pAdapt)->OutstandingSends--; \\r
+ NdisReleaseSpinLock(&(_pAdapt)->Lock); \\r
+ }\r
+\r
+//\r
+// Custom Macros to be used by the passthru driver \r
+//\r
+/*\r
+BOOLEAN\r
+IsIMDeviceStateOn(\r
+ PADAPT \r
+ )\r
+\r
+*/\r
+#define IsIMDeviceStateOn(_pP) ((_pP)->MPDeviceState == NdisDeviceStateD0 && (_pP)->PTDeviceState == NdisDeviceStateD0 ) \r
+\r
--- /dev/null
+<html xmlns:v="urn:schemas-microsoft-com:vml"\r
+xmlns:o="urn:schemas-microsoft-com:office:office"\r
+xmlns:w="urn:schemas-microsoft-com:office:word"\r
+xmlns:st1="urn:schemas-microsoft-com:office:smarttags"\r
+xmlns="http://www.w3.org/TR/REC-html40">\r
+\r
+<head>\r
+<meta http-equiv=Content-Type content="text/html; charset=windows-1252">\r
+<meta name=ProgId content=Word.Document>\r
+<meta name=Generator content="Microsoft Word 10">\r
+<meta name=Originator content="Microsoft Word 10">\r
+<link rel=File-List href="passthru_files/filelist.xml">\r
+<title>passthru</title>\r
+<o:SmartTagType namespaceuri="urn:schemas-microsoft-com:office:smarttags"\r
+ name="place"/>\r
+<o:SmartTagType namespaceuri="urn:schemas-microsoft-com:office:smarttags"\r
+ name="PlaceType"/>\r
+<o:SmartTagType namespaceuri="urn:schemas-microsoft-com:office:smarttags"\r
+ name="PlaceName"/>\r
+<!--[if gte mso 9]><xml>\r
+ <w:WordDocument>\r
+ <w:SpellingState>Clean</w:SpellingState>\r
+ <w:GrammarState>Clean</w:GrammarState>\r
+ <w:Compatibility>\r
+ <w:UseFELayout/>\r
+ </w:Compatibility>\r
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>\r
+ </w:WordDocument>\r
+</xml><![endif]--><!--[if !mso]><object\r
+ classid="clsid:38481807-CA0E-42D2-BF39-B33AF135CC4D" id=ieooui></object>\r
+<style>\r
+st1\:*{behavior:url(#ieooui) }\r
+</style>\r
+<![endif]-->\r
+<style>\r
+<!--\r
+ /* Font Definitions */\r
+ @font-face\r
+ {font-family:"MS Mincho";\r
+ panose-1:2 2 6 9 4 2 5 8 3 4;\r
+ mso-font-alt:"\FF2D\FF33 \660E\671D";\r
+ mso-font-charset:128;\r
+ mso-generic-font-family:modern;\r
+ mso-font-pitch:fixed;\r
+ mso-font-signature:-1610612033 1757936891 16 0 131231 0;}\r
+@font-face\r
+ {font-family:Verdana;\r
+ panose-1:2 11 6 4 3 5 4 4 2 4;\r
+ mso-font-charset:0;\r
+ mso-generic-font-family:swiss;\r
+ mso-font-pitch:variable;\r
+ mso-font-signature:536871559 0 0 0 415 0;}\r
+@font-face\r
+ {font-family:"\@MS Mincho";\r
+ panose-1:2 2 6 9 4 2 5 8 3 4;\r
+ mso-font-charset:128;\r
+ mso-generic-font-family:modern;\r
+ mso-font-pitch:fixed;\r
+ mso-font-signature:-1610612033 1757936891 16 0 131231 0;}\r
+@font-face\r
+ {font-family:"MS Sans Serif";\r
+ panose-1:0 0 0 0 0 0 0 0 0 0;\r
+ mso-font-charset:0;\r
+ mso-generic-font-family:swiss;\r
+ mso-font-format:other;\r
+ mso-font-pitch:variable;\r
+ mso-font-signature:3 0 0 0 1 0;}\r
+ /* Style Definitions */\r
+ p.MsoNormal, li.MsoNormal, div.MsoNormal\r
+ {mso-style-parent:"";\r
+ margin:0in;\r
+ margin-bottom:.0001pt;\r
+ mso-pagination:widow-orphan;\r
+ font-size:12.0pt;\r
+ font-family:"Times New Roman";\r
+ mso-fareast-font-family:"Times New Roman";\r
+ color:black;}\r
+h2\r
+ {mso-margin-top-alt:auto;\r
+ margin-right:0in;\r
+ mso-margin-bottom-alt:auto;\r
+ margin-left:0in;\r
+ mso-pagination:widow-orphan;\r
+ mso-outline-level:2;\r
+ font-size:18.0pt;\r
+ font-family:"Times New Roman";\r
+ mso-fareast-font-family:"MS Mincho";\r
+ color:black;\r
+ font-weight:bold;}\r
+h3\r
+ {mso-margin-top-alt:auto;\r
+ margin-right:0in;\r
+ mso-margin-bottom-alt:auto;\r
+ margin-left:0in;\r
+ mso-pagination:widow-orphan;\r
+ mso-outline-level:3;\r
+ font-size:13.5pt;\r
+ font-family:"Times New Roman";\r
+ mso-fareast-font-family:"MS Mincho";\r
+ color:black;\r
+ font-weight:bold;}\r
+h4\r
+ {mso-margin-top-alt:auto;\r
+ margin-right:0in;\r
+ mso-margin-bottom-alt:auto;\r
+ margin-left:0in;\r
+ mso-pagination:widow-orphan;\r
+ mso-outline-level:4;\r
+ font-size:12.0pt;\r
+ font-family:"Times New Roman";\r
+ mso-fareast-font-family:"MS Mincho";\r
+ color:black;\r
+ font-weight:bold;}\r
+a:link, span.MsoHyperlink\r
+ {color:blue;\r
+ text-decoration:underline;\r
+ text-underline:single;}\r
+a:visited, span.MsoHyperlinkFollowed\r
+ {color:purple;\r
+ text-decoration:underline;\r
+ text-underline:single;}\r
+p\r
+ {mso-margin-top-alt:auto;\r
+ margin-right:0in;\r
+ mso-margin-bottom-alt:auto;\r
+ margin-left:0in;\r
+ mso-pagination:widow-orphan;\r
+ font-size:12.0pt;\r
+ font-family:"Times New Roman";\r
+ mso-fareast-font-family:"Times New Roman";\r
+ color:black;}\r
+pre\r
+ {margin:0in;\r
+ margin-bottom:.0001pt;\r
+ mso-pagination:widow-orphan;\r
+ tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt;\r
+ font-size:10.0pt;\r
+ font-family:"Courier New";\r
+ mso-fareast-font-family:"Courier New";\r
+ color:black;}\r
+span.SpellE\r
+ {mso-style-name:"";\r
+ mso-spl-e:yes;}\r
+span.GramE\r
+ {mso-style-name:"";\r
+ mso-gram-e:yes;}\r
+@page Section1\r
+ {size:8.5in 11.0in;\r
+ margin:1.0in 1.25in 1.0in 1.25in;\r
+ mso-header-margin:.5in;\r
+ mso-footer-margin:.5in;\r
+ mso-paper-source:0;}\r
+div.Section1\r
+ {page:Section1;}\r
+-->\r
+</style>\r
+<!--[if gte mso 10]>\r
+<style>\r
+ /* Style Definitions */\r
+ table.MsoNormalTable\r
+ {mso-style-name:"Table Normal";\r
+ mso-tstyle-rowband-size:0;\r
+ mso-tstyle-colband-size:0;\r
+ mso-style-noshow:yes;\r
+ mso-style-parent:"";\r
+ mso-padding-alt:0in 5.4pt 0in 5.4pt;\r
+ mso-para-margin:0in;\r
+ mso-para-margin-bottom:.0001pt;\r
+ mso-pagination:widow-orphan;\r
+ font-size:10.0pt;\r
+ font-family:"Times New Roman";}\r
+</style>\r
+<![endif]-->\r
+<meta name=Template content="C:\Program Files\Microsoft Office\Office\html.dot">\r
+<!--[if gte mso 9]><xml>\r
+ <o:shapedefaults v:ext="edit" spidmax="3074"/>\r
+</xml><![endif]--><!--[if gte mso 9]><xml>\r
+ <o:shapelayout v:ext="edit">\r
+ <o:idmap v:ext="edit" data="1"/>\r
+ </o:shapelayout></xml><![endif]-->\r
+</head>\r
+\r
+<body bgcolor=white lang=EN-US link=blue vlink=purple style='tab-interval:.5in'>\r
+\r
+<div class=Section1>\r
+\r
+<h2><a name=MYSAMPLE></a><a name=top></a><span style='mso-bookmark:MYSAMPLE'>\r
+\r
+<!doctype HTML>\r
+\r
+<span style='font-family:Verdana'><! ---------------- Snip Snip ---------------- >PASSTHRU.SYS\r
+- Sample NDIS Intermediate Driver</span></span><span style='font-family:Verdana'><o:p></o:p></span></h2>\r
+\r
+<h3><span style='font-family:Verdana'>SUMMARY<o:p></o:p></span></h3>\r
+\r
+<p><st1:place><st1:PlaceName><span class=SpellE><b><span style='font-family:\r
+ Verdana'>Passthru</span></b></span></st1:PlaceName><b><span style='font-family:\r
+ Verdana'> </span></b><st1:PlaceName><b><span style='font-family:Verdana'>Intermediate</span></b></st1:PlaceName><b><span\r
+ style='font-family:Verdana'> </span></b><st1:PlaceType><b><span\r
+ style='font-family:Verdana'>Miniport</span></b></st1:PlaceType></st1:place><b><span\r
+style='font-family:Verdana'> Driver<o:p></o:p></span></b></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>The <span class=SpellE>Passthru</span>\r
+sample is a do-nothing pass-through NDIS 5 driver that demonstrates the basic\r
+principles underlying an NDIS Intermediate Miniport (IM) driver. This driver\r
+exposes a virtual adapter for each binding to a real or virtual NDIS adapter.\r
+Protocols bind to these virtual adapters as if they are real adapters. <o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>The <span class=SpellE>Passthru</span>\r
+driver re-packages and sends down all requests and sends submitted to this\r
+virtual adapter. The <span class=SpellE>Passthru</span> driver can be modified\r
+to change the data before passing it along. For example, it could\r
+encrypt/compress outgoing and decrypt/decompress incoming data.<o:p></o:p></span></p>\r
+\r
+<p><span class=SpellE><span style='font-size:10.0pt;font-family:Verdana'>Passthru</span></span><span\r
+style='font-size:10.0pt;font-family:Verdana'> also re-packages and indicates up\r
+all received data and status indications that it receives at its lower\r
+(protocol) edge.<o:p></o:p></span></p>\r
+\r
+<h3><span style='font-family:Verdana'>BUILDING THE SAMPLE<o:p></o:p></span></h3>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Run the <b>build</b>\r
+command from this directory to build the sample\97it creates the binary <span\r
+class=SpellE>Passthru.sys</span>. <o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>To install this driver on\r
+Windows® 2000, use the PASSTHRU sample notification object and <span\r
+class=SpellE>INFs</span>, also found in this DDK.<o:p></o:p></span></p>\r
+\r
+<h3><span style='font-family:Verdana'>INSTALLING THE SAMPLE<o:p></o:p></span></h3>\r
+\r
+<p><span class=SpellE><span style='font-size:10.0pt;font-family:Verdana'>Passthru</span></span><span\r
+style='font-size:10.0pt;font-family:Verdana'> is installed as a service (called\r
+\93<span class=SpellE>Passthru</span> Driver\94 in the supplied <span class=SpellE>INFs</span>/notification\r
+object). To install, follow the steps below.<o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Prepare a floppy disk (or\r
+installation directory) that contains these files: <span class=SpellE>netsf.inf</span>,\r
+<span class=SpellE>netsf_m.inf</span> and <span class=SpellE>passthru.sys</span>.<o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>On the desktop,\r
+right-click the <b>My Network Places</b> icon and choose <b>Properties</b>. <o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Right-click on the\r
+relevant Local Area Connection icon and choose <b>Properties</b>. <o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Click <b>Install</b>,\r
+then <b>Service</b>, then <b>Add</b>, <span class=GramE>then</span> <b>Have Disk</b>.\r
+<o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Browse to the\r
+drive/directory containing the files listed above. Click <b>OK</b>. This should\r
+show \93<span class=SpellE>Passthru</span> Driver\94 in a list of Network Services.\r
+Highlight this and click <b>OK</b>. This should install the <span class=SpellE>Passthru</span>\r
+driver. <o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Click <b>OK</b> or <span\r
+class=GramE><b>Yes</b></span> each time the system prompts with a warning\r
+regarding installation of unsigned files. This is necessary because binaries\r
+generated via the DDK build environment are not signed.<o:p></o:p></span></p>\r
+\r
+<p><span style='font-size:10.0pt;font-family:Verdana'>Two .INF files are needed\r
+rather than one because <span class=SpellE>Passthru</span> is installed both as\r
+a protocol and a miniport.<o:p></o:p></span></p>\r
+\r
+<h3><span style='font-family:Verdana'>CODE TOUR<o:p></o:p></span></h3>\r
+\r
+<h4><span style='font-family:Verdana'>File Manifest<o:p></o:p></span></h4>\r
+\r
+<pre><u>File<span style='mso-tab-count:2'>Â Â Â Â Â Â Â Â Â Â </span>Description<o:p></o:p></u></pre><pre><o:p> </o:p></pre><pre><span\r
+class=SpellE>Makefile</span><span style='mso-tab-count:1'>Â Â Â Â Â Â </span>Used during compilation to create the object and sys files</pre><pre><span\r
+class=SpellE>Miniport.c</span><span style='mso-tab-count:1'>Â Â Â Â </span>Miniport related functions of the <span\r
+class=SpellE>passthru</span> driver</pre><pre><span class=SpellE>Netsf.inf</span><span\r
+style='mso-tab-count:1'>Â Â Â Â Â </span>Installation INF for the service (protocol side installation)</pre><pre><span\r
+class=SpellE>Netsf_m.inf</span><span style='mso-tab-count:1'>Â Â Â </span>Installation INF for the miniport (virtual device installation)</pre><pre><span\r
+class=SpellE>Passthru.c</span><span style='mso-tab-count:1'>Â Â Â Â </span><span\r
+class=SpellE>DriverEntry</span> routine and any routines common to the <span\r
+class=SpellE>passthru</span> miniport and protocol </pre><pre><span\r
+class=SpellE>Passthru.h</span><span style='mso-tab-count:1'>Â Â Â Â </span>Prototypes of all functions and data structures used by the <span\r
+class=SpellE>Passthru</span> driver</pre><pre>Passthru.htm<span\r
+style='mso-tab-count:1'>Â Â </span>Documentation for the <span class=SpellE>Passthru</span> driver (this file)</pre><pre><span\r
+class=SpellE>Passthru.rc</span><span style='mso-tab-count:1'>Â Â Â </span>Resource <span\r
+class=GramE>file</span> for the <span class=SpellE>Passthru</span> driver</pre><pre><span\r
+class=SpellE>Precomp.h</span><span style='mso-tab-count:1'>Â Â Â Â Â </span><span\r
+class=SpellE>Precompile</span> header file</pre><pre><span class=SpellE>Protocol.c</span><span\r
+style='mso-tab-count:1'>Â Â Â Â </span>Protocol related functions of the <span\r
+class=SpellE>Passthru</span> driver</pre><pre>Sources<span style='mso-tab-count:\r
+2'>Â Â Â Â Â Â Â </span>List of source files that are compiled and linked to create the <span\r
+class=SpellE>passthru</span> driver. This can be modified to create binaries that operate on previous Windows versions (e.g. Windows 2000).</pre>\r
+\r
+<h4 style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-family:Verdana'>Programming Tour<o:p></o:p></span></h4>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>Basic steps in initializing and\r
+halting of <span class=SpellE>Passthru</span> driver:<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>1) During <span class=SpellE>DriverEntry</span>,\r
+the <span class=SpellE>Passthru</span> driver registers as a protocol and an\r
+Intermediate miniport driver.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>2) Later on, NDIS calls <span\r
+class=SpellE>Passthru\92s</span> <span class=SpellE>BindAdapterHandler</span>, <span\r
+class=SpellE>PtBindAdapter</span>, for each underlying NDIS adapter to which it\r
+is configured to bind.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>3) In the context of <span\r
+class=SpellE>BindAdapterHandler</span> and after successfully opening a binding\r
+to the underlying adapter, the <span class=SpellE>Passthru</span> driver\r
+queries the reserved keyword "<span class=SpellE>UpperBindings</span>"\r
+to get a list of device names for the virtual adapters that this particular\r
+binding is to expose. Since this driver implements a 1:1 relationship between\r
+lower bindings and virtual adapters, this list contains a single name. \93<span\r
+class=SpellE>Mux</span>\94 IM drivers that expose multiple virtual adapters over\r
+a single underlying adapter will process multiple entries in <span\r
+class=SpellE>UpperBindings</span>.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>4) For each device name, the <span\r
+class=SpellE>Passthru</span> driver calls <span class=SpellE>NdisIMInitializeDeviceInstanceEx</span>.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>5) In response, NDIS will\r
+eventually call back <span class=SpellE>Passthru</span> miniport\92s <span\r
+class=SpellE>MiniportInitialize</span> entry point, <span class=SpellE>MPInitialize</span>.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>6) After <span class=SpellE>MPInitialize</span>\r
+successfully returns, NDIS takes care of getting upper-layer protocols to bind\r
+to the newly created virtual adapter(s).<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>7) All requests and sends coming\r
+from upper-layer protocols for the <span class=SpellE>Passthru</span> miniport\r
+driver are repackaged and sent down to NDIS, to be passed to the underlying\r
+NDIS adapter.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>8) All indications arriving from\r
+bindings to an underlying NDIS adapter are forwarded up as if they generated\r
+from <span class=SpellE>Passthru\92s</span> virtual adapters.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>9) NDIS calls the <span\r
+class=SpellE>Passthru</span> driver\92s <span class=SpellE>ProtocolUnbind</span>\r
+entry point to request it to close the binding between an underlying adapter\r
+and <span class=SpellE>Passthru</span> protocol. In processing this, the <span\r
+class=SpellE>Passthru</span> driver first calls <span class=SpellE>NdisIMDeInitializeDeviceInstance</span>\r
+for the virtual adapter(s) representing that particular binding.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>10) NDIS in turn will close all\r
+the bindings between upper-layer protocols and virtual <span class=SpellE>Passthru</span>\r
+adapter.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>11) After all the bindings are\r
+closed, NDIS calls the <span class=SpellE>Passthru</span> driver\92s <span\r
+class=SpellE>MiniportHalt</span> entry point (<span class=SpellE>MPHalt</span>)\r
+for the virtual adapter.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>12) The <span class=SpellE>Passthru</span>\r
+protocol then closes the binding to the underlying adapter by calling <span\r
+class=SpellE>NdisCloseAdapter</span>, and completes the unbind request issued\r
+in step 9.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>13) <b>Handling Power Management</b><o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>13.1 During initialization, the <span\r
+class=SpellE>Passthru</span> miniport should set the Attribute '<i>NDIS_ATTRIBUTE_NO_HALT_ON_SUSPEND</i>'\r
+in its call to <span class=SpellE>NdisMSetAttributesEx</span>. <o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>13.2 When the <span class=SpellE>Passthru</span>\r
+miniport is requested to report its Plug and Play capabilities\r
+(OID_PNP_CAPABILITIES), the <span class=SpellE>Passthru</span> miniport must\r
+pass the request to the underlying miniport. If this request succeeds, then the\r
+<span class=SpellE>Passthru</span> miniport should overwrite the following\r
+fields before successfully completing the original request: <o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>NDIS_DEVICE_POWER_STATE<span\r
+style='mso-tab-count:1'>Â Â Â Â Â Â Â Â Â </span><span class=SpellE>MinMagicPacketWakeUp</span>\r
+= <span class=SpellE>NdisDeviceStateUnspecified</span>;<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>NDIS_DEVICE_POWER_STATE<span\r
+style='mso-tab-count:1'>Â Â Â Â Â Â Â Â Â </span><span class=SpellE>MinPatternWakeUp</span>=\r
+<span class=SpellE>NdisDeviceStateUnspecified</span>;<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>NDIS_DEVICE_POWER_STATE<span\r
+style='mso-tab-count:1'>Â Â Â Â Â Â Â Â Â </span><span class=SpellE>MinLinkChangeWakeUp</span>=<span\r
+class=SpellE>NdisDeviceStateUnspecified</span><o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>If the miniport below the <span\r
+class=SpellE>Passthru</span> protocol fails this request, then the status that\r
+was returned should be used to respond to the original request that was made to\r
+the <span class=SpellE>Passthru</span> miniport. <o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>13.3 OID_PNP_SET_POWER and OID_PNP_QUERY_POWER\r
+should not be passed to the miniport below the <span class=SpellE>Passthru</span>\r
+protocol, as those <span class=SpellE>miniports</span> will receive independent\r
+requests from NDIS.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>13.4 NDIS calls the <span\r
+class=SpellE>Passthru</span> driver\92s <span class=SpellE>ProtocolPnPEvent</span>\r
+entry point (<span class=SpellE>PtPnPHandler</span>) whenever the underlying adapter\r
+is transitioned to a different power state. If the underlying adapter is\r
+transitioning to a low power state, the IM driver should wait for all\r
+outstanding sends and requests to complete.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>14) <b>NDIS 5.1 Features</b><o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>14.1 All NDIS 5.1 features in <span\r
+class=SpellE>Passthru</span> are identified by #<span class=SpellE>ifdef</span>\r
+NDIS51 compiler directives. The following major features are illustrated (refer\r
+to the DDK documentation for more information on these):<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><b><span\r
+style='font-size:10.0pt;font-family:Verdana'>Packet stacking</span></b><span\r
+style='font-size:10.0pt;font-family:Verdana'>: this allows an IM driver to\r
+reuse a packet submitted to its protocol or miniport edge to forward data down\r
+(or up) to the adjacent layer.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><b><span\r
+style='font-size:10.0pt;font-family:Verdana'>Canceling Sends</span></b><span\r
+style='font-size:10.0pt;font-family:Verdana'>: <span class=SpellE>Passthru</span>\r
+propagates send cancellations from protocols above it to lower <span\r
+class=SpellE>miniports</span>.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><b><span\r
+style='font-size:10.0pt;font-family:Verdana'>PnP Event Propagation</span></b><span\r
+style='font-size:10.0pt;font-family:Verdana'>: <span class=SpellE>Passthru</span>\r
+propagates PnP events arriving at its protocol (lower) edge to higher layer\r
+protocols that are bound to its virtual adapter.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+class=SpellE><b><span style='font-size:10.0pt;font-family:Verdana'>NdisQueryPendingIOCount</span></b></span><span\r
+style='font-size:10.0pt;font-family:Verdana'>: <span class=SpellE>Passthru</span>\r
+uses this new API to determine if any I/O operations are in progress on its\r
+lower binding.<o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'>15) For Win2K SP2 and <span\r
+class=SpellE>WinXP</span>, the <span class=SpellE>Passthru</span> sample no\r
+longer requires a Notify Object. The Notify Object has been removed. <o:p></o:p></span></p>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:10.0pt;font-family:Verdana'><span\r
+style='mso-spacerun:yes'>Â </span><o:p></o:p></span></p>\r
+\r
+<p align=center style='text-align:center;tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><a\r
+href="#top"><span style='font-size:10.0pt;font-family:Verdana'>Top of page</span></a><span\r
+style='font-size:10.0pt;font-family:Verdana'> <o:p></o:p></span></p>\r
+\r
+<table class=MsoNormalTable border=0 cellspacing=0 cellpadding=0 width=624\r
+ style='width:6.5in;mso-cellspacing:0in;mso-padding-alt:0in 0in 0in 0in'>\r
+ <tr style='mso-yfti-irow:0;mso-yfti-lastrow:yes;height:1.5pt'>\r
+ <td style='background:aqua;padding:.75pt .75pt .75pt .75pt;height:1.5pt'>\r
+ <p class=MsoNormal><o:p> </o:p></p>\r
+ </td>\r
+ </tr>\r
+</table>\r
+\r
+<p style='tab-stops:45.8pt 91.6pt 137.4pt 183.2pt 229.0pt 274.8pt 320.6pt 366.4pt 412.2pt 458.0pt 503.8pt 549.6pt 595.4pt 641.2pt 687.0pt 732.8pt'><span\r
+style='font-size:7.5pt;font-family:"MS Sans Serif"'>© 1999 Microsoft\r
+Corporation</span><span style='font-size:10.0pt;font-family:Verdana'> <o:p></o:p></span></p>\r
+\r
+</div>\r
+\r
+</body>\r
+\r
+</html>\r
+\r
--- /dev/null
+#include <windows.h>\r
+#include <ntverp.h>\r
+\r
+/*-----------------------------------------------*/\r
+/* the following lines are specific to this file */\r
+/*-----------------------------------------------*/\r
+\r
+/* VER_FILETYPE, VER_FILESUBTYPE, VER_FILEDESCRIPTION_STR\r
+ * and VER_INTERNALNAME_STR must be defined before including COMMON.VER\r
+ * The strings don't need a '\0', since common.ver has them.\r
+ */\r
+#define VER_FILETYPE VFT_DRV\r
+/* possible values: VFT_UNKNOWN\r
+ VFT_APP\r
+ VFT_DLL\r
+ VFT_DRV\r
+ VFT_FONT\r
+ VFT_VXD\r
+ VFT_STATIC_LIB\r
+*/\r
+#define VER_FILESUBTYPE VFT2_DRV_NETWORK\r
+/* possible values VFT2_UNKNOWN\r
+ VFT2_DRV_PRINTER\r
+ VFT2_DRV_KEYBOARD\r
+ VFT2_DRV_LANGUAGE\r
+ VFT2_DRV_DISPLAY\r
+ VFT2_DRV_MOUSE\r
+ VFT2_DRV_NETWORK\r
+ VFT2_DRV_SYSTEM\r
+ VFT2_DRV_INSTALLABLE\r
+ VFT2_DRV_SOUND\r
+ VFT2_DRV_COMM\r
+*/\r
+#define VER_FILEDESCRIPTION_STR "Sample NDIS 4.0 Intermediate Miniport Driver"\r
+#define VER_INTERNALNAME_STR "PASSTHRU.SYS"\r
+#define VER_ORIGINALFILENAME_STR "PASSTHRU.SYS"\r
+#define VER_LANGNEUTRAL\r
+\r
+#include "common.ver"\r
+\r
+\1a\r
--- /dev/null
+#pragma warning(disable:4214) // bit field types other than int\r
+\r
+#pragma warning(disable:4201) // nameless struct/union\r
+#pragma warning(disable:4115) // named type definition in parentheses\r
+#pragma warning(disable:4127) // conditional expression is constant\r
+#pragma warning(disable:4054) // cast of function pointer to PVOID\r
+#pragma warning(disable:4244) // conversion from 'int' to 'BOOLEAN', possible loss of data\r
+\r
+#include <ndis.h>\r
+#include "passthru.h"\r
+\r
--- /dev/null
+/*++\r
+\r
+Copyright(c) 1992-2000 Microsoft Corporation\r
+\r
+Module Name:\r
+\r
+ protocol.c\r
+\r
+Abstract:\r
+\r
+ Ndis Intermediate Miniport driver sample. This is a passthru driver.\r
+\r
+Author:\r
+\r
+Environment:\r
+\r
+\r
+Revision History:\r
+\r
+\r
+--*/\r
+\r
+\r
+#include "precomp.h"\r
+#pragma hdrstop\r
+\r
+#define MAX_PACKET_POOL_SIZE 0x0000FFFF\r
+#define MIN_PACKET_POOL_SIZE 0x000000FF\r
+\r
+//\r
+// NDIS version as 0xMMMMmmmm, where M=Major/m=minor (0x00050001 = 5.1); \r
+// initially unknown (0)\r
+// \r
+ULONG NdisDotSysVersion = 0x0;\r
+\r
+\r
+#define NDIS_SYS_VERSION_51 0x00050001\r
+\r
+\r
+VOID\r
+PtBindAdapter(\r
+ OUT PNDIS_STATUS Status,\r
+ IN NDIS_HANDLE BindContext,\r
+ IN PNDIS_STRING DeviceName,\r
+ IN PVOID SystemSpecific1,\r
+ IN PVOID SystemSpecific2\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Called by NDIS to bind to a miniport below.\r
+\r
+Arguments:\r
+\r
+ Status - Return status of bind here.\r
+ BindContext - Can be passed to NdisCompleteBindAdapter if this call is pended.\r
+ DeviceName - Device name to bind to. This is passed to NdisOpenAdapter.\r
+ SystemSpecific1 - Can be passed to NdisOpenProtocolConfiguration to read per-binding information\r
+ SystemSpecific2 - Unused\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_PENDING if this call is pended. In this case call NdisCompleteBindAdapter\r
+ to complete.\r
+ Anything else Completes this call synchronously\r
+\r
+--*/\r
+{\r
+ NDIS_HANDLE ConfigHandle = NULL;\r
+ PNDIS_CONFIGURATION_PARAMETER Param;\r
+ NDIS_STRING DeviceStr = NDIS_STRING_CONST("UpperBindings");\r
+ NDIS_STRING NdisVersionStr = NDIS_STRING_CONST("NdisVersion");\r
+ PADAPT pAdapt = NULL;\r
+ NDIS_STATUS Sts;\r
+ UINT MediumIndex;\r
+ ULONG TotalSize;\r
+ BOOLEAN NoCleanUpNeeded = FALSE;\r
+\r
+\r
+ UNREFERENCED_PARAMETER(BindContext);\r
+ UNREFERENCED_PARAMETER(SystemSpecific2);\r
+ \r
+ DBGPRINT(("==> Protocol BindAdapter\n"));\r
+\r
+ do\r
+ {\r
+ //\r
+ // Access the configuration section for our binding-specific\r
+ // parameters.\r
+ //\r
+ NdisOpenProtocolConfiguration(Status,\r
+ &ConfigHandle,\r
+ SystemSpecific1);\r
+\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+ if (NdisDotSysVersion == 0)\r
+ {\r
+ NdisReadConfiguration(Status,\r
+ &Param,\r
+ ConfigHandle,\r
+ &NdisVersionStr, // "NdisVersion"\r
+ NdisParameterInteger);\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+ \r
+ NdisDotSysVersion = Param->ParameterData.IntegerData;\r
+ }\r
+ \r
+\r
+ //\r
+ // Read the "UpperBindings" reserved key that contains a list\r
+ // of device names representing our miniport instances corresponding\r
+ // to this lower binding. Since this is a 1:1 IM driver, this key\r
+ // contains exactly one name.\r
+ //\r
+ // If we want to implement a N:1 mux driver (N adapter instances\r
+ // over a single lower binding), then UpperBindings will be a\r
+ // MULTI_SZ containing a list of device names - we would loop through\r
+ // this list, calling NdisIMInitializeDeviceInstanceEx once for\r
+ // each name in it.\r
+ //\r
+ NdisReadConfiguration(Status,\r
+ &Param,\r
+ ConfigHandle,\r
+ &DeviceStr,\r
+ NdisParameterString);\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+\r
+ //\r
+ // Allocate memory for the Adapter structure. This represents both the\r
+ // protocol context as well as the adapter structure when the miniport\r
+ // is initialized.\r
+ //\r
+ // In addition to the base structure, allocate space for the device\r
+ // instance string.\r
+ //\r
+ TotalSize = sizeof(ADAPT) + Param->ParameterData.StringData.MaximumLength;\r
+\r
+ NdisAllocateMemoryWithTag(&pAdapt, TotalSize, TAG);\r
+\r
+ if (pAdapt == NULL)\r
+ {\r
+ *Status = NDIS_STATUS_RESOURCES;\r
+ break;\r
+ }\r
+\r
+ //\r
+ // Initialize the adapter structure. We copy in the IM device\r
+ // name as well, because we may need to use it in a call to\r
+ // NdisIMCancelInitializeDeviceInstance. The string returned\r
+ // by NdisReadConfiguration is active (i.e. available) only\r
+ // for the duration of this call to our BindAdapter handler.\r
+ //\r
+ NdisZeroMemory(pAdapt, TotalSize);\r
+ pAdapt->DeviceName.MaximumLength = Param->ParameterData.StringData.MaximumLength;\r
+ pAdapt->DeviceName.Length = Param->ParameterData.StringData.Length;\r
+ pAdapt->DeviceName.Buffer = (PWCHAR)((ULONG_PTR)pAdapt + sizeof(ADAPT));\r
+ NdisMoveMemory(pAdapt->DeviceName.Buffer,\r
+ Param->ParameterData.StringData.Buffer,\r
+ Param->ParameterData.StringData.MaximumLength);\r
+\r
+\r
+\r
+ NdisInitializeEvent(&pAdapt->Event);\r
+ NdisAllocateSpinLock(&pAdapt->Lock);\r
+\r
+ //\r
+ // Allocate a packet pool for sends. We need this to pass sends down.\r
+ // We cannot use the same packet descriptor that came down to our send\r
+ // handler (see also NDIS 5.1 packet stacking).\r
+ //\r
+ NdisAllocatePacketPoolEx(Status,\r
+ &pAdapt->SendPacketPoolHandle,\r
+ MIN_PACKET_POOL_SIZE,\r
+ MAX_PACKET_POOL_SIZE - MIN_PACKET_POOL_SIZE,\r
+ sizeof(SEND_RSVD));\r
+\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+\r
+ //\r
+ // Allocate a packet pool for receives. We need this to indicate receives.\r
+ // Same consideration as sends (see also NDIS 5.1 packet stacking).\r
+ //\r
+ NdisAllocatePacketPoolEx(Status,\r
+ &pAdapt->RecvPacketPoolHandle,\r
+ MIN_PACKET_POOL_SIZE,\r
+ MAX_PACKET_POOL_SIZE - MIN_PACKET_POOL_SIZE,\r
+ PROTOCOL_RESERVED_SIZE_IN_PACKET);\r
+\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+\r
+ //\r
+ // Now open the adapter below and complete the initialization\r
+ //\r
+ NdisOpenAdapter(Status,\r
+ &Sts,\r
+ &pAdapt->BindingHandle,\r
+ &MediumIndex,\r
+ MediumArray,\r
+ sizeof(MediumArray)/sizeof(NDIS_MEDIUM),\r
+ ProtHandle,\r
+ pAdapt,\r
+ DeviceName,\r
+ 0,\r
+ NULL);\r
+\r
+ if (*Status == NDIS_STATUS_PENDING)\r
+ {\r
+ NdisWaitEvent(&pAdapt->Event, 0);\r
+ *Status = pAdapt->Status;\r
+ }\r
+\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ break;\r
+ }\r
+ PtReferenceAdapt(pAdapt);\r
+\r
+#pragma prefast(suppress: __WARNING_POTENTIAL_BUFFER_OVERFLOW, "Ndis guarantees MediumIndex to be within bounds");\r
+ pAdapt->Medium = MediumArray[MediumIndex];\r
+\r
+ //\r
+ // Now ask NDIS to initialize our miniport (upper) edge.\r
+ // Set the flag below to synchronize with a possible call\r
+ // to our protocol Unbind handler that may come in before\r
+ // our miniport initialization happens.\r
+ //\r
+ pAdapt->MiniportInitPending = TRUE;\r
+ NdisInitializeEvent(&pAdapt->MiniportInitEvent);\r
+\r
+ PtReferenceAdapt(pAdapt);\r
+\r
+ *Status = NdisIMInitializeDeviceInstanceEx(DriverHandle,\r
+ &pAdapt->DeviceName,\r
+ pAdapt);\r
+\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ if (pAdapt->MiniportIsHalted == TRUE)\r
+ {\r
+ NoCleanUpNeeded = TRUE;\r
+ }\r
+ \r
+ DBGPRINT(("BindAdapter: Adapt %p, IMInitializeDeviceInstance error %x\n",\r
+ pAdapt, *Status));\r
+ \r
+ if (PtDereferenceAdapt(pAdapt))\r
+ {\r
+ pAdapt = NULL;\r
+ }\r
+ \r
+ break;\r
+ }\r
+ \r
+ PtDereferenceAdapt(pAdapt);\r
+\r
+ } while(FALSE);\r
+\r
+ //\r
+ // Close the configuration handle now - see comments above with\r
+ // the call to NdisIMInitializeDeviceInstanceEx.\r
+ //\r
+ if (ConfigHandle != NULL)\r
+ {\r
+ NdisCloseConfiguration(ConfigHandle);\r
+ }\r
+\r
+ if ((*Status != NDIS_STATUS_SUCCESS) && (NoCleanUpNeeded == FALSE))\r
+ {\r
+ if (pAdapt != NULL)\r
+ {\r
+ if (pAdapt->BindingHandle != NULL)\r
+ {\r
+ NDIS_STATUS LocalStatus;\r
+\r
+ //\r
+ // Close the binding we opened above.\r
+ //\r
+\r
+ NdisResetEvent(&pAdapt->Event);\r
+ \r
+ NdisCloseAdapter(&LocalStatus, pAdapt->BindingHandle);\r
+ pAdapt->BindingHandle = NULL;\r
+\r
+ if (LocalStatus == NDIS_STATUS_PENDING)\r
+ {\r
+ NdisWaitEvent(&pAdapt->Event, 0);\r
+ LocalStatus = pAdapt->Status;\r
+\r
+ \r
+ }\r
+ if (PtDereferenceAdapt(pAdapt))\r
+ {\r
+ pAdapt = NULL;\r
+ }\r
+ }\r
+ }\r
+ }\r
+\r
+\r
+ DBGPRINT(("<== Protocol BindAdapter: pAdapt %p, Status %x\n", pAdapt, *Status));\r
+}\r
+\r
+\r
+VOID\r
+PtOpenAdapterComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status,\r
+ IN NDIS_STATUS OpenErrorStatus\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Completion routine for NdisOpenAdapter issued from within the PtBindAdapter. Simply\r
+ unblock the caller.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext Pointer to the adapter\r
+ Status Status of the NdisOpenAdapter call\r
+ OpenErrorStatus Secondary status(ignored by us).\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+ \r
+ UNREFERENCED_PARAMETER(OpenErrorStatus);\r
+ \r
+ DBGPRINT(("==> PtOpenAdapterComplete: Adapt %p, Status %x\n", pAdapt, Status));\r
+ pAdapt->Status = Status;\r
+ NdisSetEvent(&pAdapt->Event);\r
+}\r
+\r
+\r
+VOID\r
+PtUnbindAdapter(\r
+ OUT PNDIS_STATUS Status,\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_HANDLE UnbindContext\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Called by NDIS when we are required to unbind to the adapter below.\r
+ This functions shares functionality with the miniport's HaltHandler.\r
+ The code should ensure that NdisCloseAdapter and NdisFreeMemory is called\r
+ only once between the two functions\r
+\r
+Arguments:\r
+\r
+ Status Placeholder for return status\r
+ ProtocolBindingContext Pointer to the adapter structure\r
+ UnbindContext Context for NdisUnbindComplete() if this pends\r
+\r
+Return Value:\r
+\r
+ Status for NdisIMDeinitializeDeviceContext\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+ NDIS_STATUS LocalStatus;\r
+\r
+ UNREFERENCED_PARAMETER(UnbindContext);\r
+ \r
+ DBGPRINT(("==> PtUnbindAdapter: Adapt %p\n", pAdapt));\r
+\r
+ //\r
+ // Set the flag that the miniport below is unbinding, so the request handlers will\r
+ // fail any request comming later\r
+ // \r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ pAdapt->UnbindingInProcess = TRUE;\r
+ if (pAdapt->QueuedRequest == TRUE)\r
+ {\r
+ pAdapt->QueuedRequest = FALSE;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+\r
+ PtRequestComplete(pAdapt,\r
+ &pAdapt->Request,\r
+ NDIS_STATUS_FAILURE );\r
+\r
+ }\r
+ else\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ }\r
+#ifndef WIN9X\r
+ //\r
+ // Check if we had called NdisIMInitializeDeviceInstanceEx and\r
+ // we are awaiting a call to MiniportInitialize.\r
+ //\r
+ if (pAdapt->MiniportInitPending == TRUE)\r
+ {\r
+ //\r
+ // Try to cancel the pending IMInit process.\r
+ //\r
+ LocalStatus = NdisIMCancelInitializeDeviceInstance(\r
+ DriverHandle,\r
+ &pAdapt->DeviceName);\r
+\r
+ if (LocalStatus == NDIS_STATUS_SUCCESS)\r
+ {\r
+ //\r
+ // Successfully cancelled IM Initialization; our\r
+ // Miniport Initialize routine will not be called\r
+ // for this device.\r
+ //\r
+ pAdapt->MiniportInitPending = FALSE;\r
+ ASSERT(pAdapt->MiniportHandle == NULL);\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // Our Miniport Initialize routine will be called\r
+ // (may be running on another thread at this time).\r
+ // Wait for it to finish.\r
+ //\r
+ NdisWaitEvent(&pAdapt->MiniportInitEvent, 0);\r
+ ASSERT(pAdapt->MiniportInitPending == FALSE);\r
+ }\r
+\r
+ }\r
+#endif // !WIN9X\r
+\r
+ //\r
+ // Call NDIS to remove our device-instance. We do most of the work\r
+ // inside the HaltHandler.\r
+ //\r
+ // The Handle will be NULL if our miniport Halt Handler has been called or\r
+ // if the IM device was never initialized\r
+ //\r
+ \r
+ if (pAdapt->MiniportHandle != NULL)\r
+ {\r
+ *Status = NdisIMDeInitializeDeviceInstance(pAdapt->MiniportHandle);\r
+\r
+ if (*Status != NDIS_STATUS_SUCCESS)\r
+ {\r
+ *Status = NDIS_STATUS_FAILURE;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // We need to do some work here. \r
+ // Close the binding below us \r
+ // and release the memory allocated.\r
+ //\r
+ \r
+ if(pAdapt->BindingHandle != NULL)\r
+ {\r
+ NdisResetEvent(&pAdapt->Event);\r
+\r
+ NdisCloseAdapter(Status, pAdapt->BindingHandle);\r
+\r
+ //\r
+ // Wait for it to complete\r
+ //\r
+ if(*Status == NDIS_STATUS_PENDING)\r
+ {\r
+ NdisWaitEvent(&pAdapt->Event, 0);\r
+ *Status = pAdapt->Status;\r
+ }\r
+ pAdapt->BindingHandle = NULL;\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // Both Our MiniportHandle and Binding Handle should not be NULL.\r
+ //\r
+ *Status = NDIS_STATUS_FAILURE;\r
+ ASSERT(0);\r
+ }\r
+\r
+ //\r
+ // Free the memory here, if was not released earlier(by calling the HaltHandler)\r
+ //\r
+ MPFreeAllPacketPools(pAdapt);\r
+ NdisFreeSpinLock(&pAdapt->Lock);\r
+ NdisFreeMemory(pAdapt, 0, 0);\r
+ }\r
+\r
+ DBGPRINT(("<== PtUnbindAdapter: Adapt %p\n", pAdapt));\r
+}\r
+\r
+VOID\r
+PtUnloadProtocol(\r
+ VOID\r
+)\r
+{\r
+ NDIS_STATUS Status;\r
+\r
+ if (ProtHandle != NULL)\r
+ {\r
+ NdisDeregisterProtocol(&Status, ProtHandle);\r
+ ProtHandle = NULL;\r
+ }\r
+\r
+ DBGPRINT(("PtUnloadProtocol: done!\n"));\r
+}\r
+\r
+\r
+\r
+VOID\r
+PtCloseAdapterComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Completion for the CloseAdapter call.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext Pointer to the adapter structure\r
+ Status Completion status\r
+\r
+Return Value:\r
+\r
+ None.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+\r
+ DBGPRINT(("CloseAdapterComplete: Adapt %p, Status %x\n", pAdapt, Status));\r
+ pAdapt->Status = Status;\r
+ NdisSetEvent(&pAdapt->Event);\r
+}\r
+\r
+\r
+VOID\r
+PtResetComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS Status\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Completion for the reset.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext Pointer to the adapter structure\r
+ Status Completion status\r
+\r
+Return Value:\r
+\r
+ None.\r
+\r
+--*/\r
+{\r
+\r
+ UNREFERENCED_PARAMETER(ProtocolBindingContext);\r
+ UNREFERENCED_PARAMETER(Status);\r
+ //\r
+ // We never issue a reset, so we should not be here.\r
+ //\r
+ ASSERT(0);\r
+}\r
+\r
+\r
+VOID\r
+PtRequestComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_REQUEST NdisRequest,\r
+ IN NDIS_STATUS Status\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Completion handler for the previously posted request. All OIDS\r
+ are completed by and sent to the same miniport that they were requested for.\r
+ If Oid == OID_PNP_QUERY_POWER then the data structure needs to returned with all entries =\r
+ NdisDeviceStateUnspecified\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext Pointer to the adapter structure\r
+ NdisRequest The posted request\r
+ Status Completion status\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;\r
+ NDIS_OID Oid = pAdapt->Request.DATA.SET_INFORMATION.Oid ;\r
+\r
+ //\r
+ // Since our request is not outstanding anymore\r
+ //\r
+ ASSERT(pAdapt->OutstandingRequests == TRUE);\r
+\r
+ pAdapt->OutstandingRequests = FALSE;\r
+\r
+ //\r
+ // Complete the Set or Query, and fill in the buffer for OID_PNP_CAPABILITIES, if need be.\r
+ //\r
+ switch (NdisRequest->RequestType)\r
+ {\r
+ case NdisRequestQueryInformation:\r
+\r
+ //\r
+ // We never pass OID_PNP_QUERY_POWER down.\r
+ //\r
+ ASSERT(Oid != OID_PNP_QUERY_POWER);\r
+\r
+ if ((Oid == OID_PNP_CAPABILITIES) && (Status == NDIS_STATUS_SUCCESS))\r
+ {\r
+ MPQueryPNPCapabilities(pAdapt, &Status);\r
+ }\r
+ *pAdapt->BytesReadOrWritten = NdisRequest->DATA.QUERY_INFORMATION.BytesWritten;\r
+ *pAdapt->BytesNeeded = NdisRequest->DATA.QUERY_INFORMATION.BytesNeeded;\r
+\r
+ if (((Oid == OID_GEN_MAC_OPTIONS) \r
+ && (Status == NDIS_STATUS_SUCCESS))\r
+ && (NdisDotSysVersion >= NDIS_SYS_VERSION_51))\r
+ {\r
+ //\r
+ // Only do this on Windows XP or greater (NDIS.SYS v 5.1); \r
+ // do not do in Windows 2000 (NDIS.SYS v 5.0))\r
+ //\r
+ \r
+ //\r
+ // Remove the no-loopback bit from mac-options. In essence we are\r
+ // telling NDIS that we can handle loopback. We don't, but the\r
+ // interface below us does. If we do not do this, then loopback\r
+ // processing happens both below us and above us. This is wasteful\r
+ // at best and if Netmon is running, it will see multiple copies\r
+ // of loopback packets when sniffing above us.\r
+ //\r
+ // Only the lowest miniport is a stack of layered miniports should\r
+ // ever report this bit set to NDIS.\r
+ //\r
+ *(PULONG)NdisRequest->DATA.QUERY_INFORMATION.InformationBuffer &= ~NDIS_MAC_OPTION_NO_LOOPBACK;\r
+ }\r
+\r
+ NdisMQueryInformationComplete(pAdapt->MiniportHandle,\r
+ Status);\r
+ break;\r
+\r
+ case NdisRequestSetInformation:\r
+\r
+ ASSERT( Oid != OID_PNP_SET_POWER);\r
+\r
+ *pAdapt->BytesReadOrWritten = NdisRequest->DATA.SET_INFORMATION.BytesRead;\r
+ *pAdapt->BytesNeeded = NdisRequest->DATA.SET_INFORMATION.BytesNeeded;\r
+ NdisMSetInformationComplete(pAdapt->MiniportHandle,\r
+ Status);\r
+ break;\r
+\r
+ default:\r
+ ASSERT(0);\r
+ break;\r
+ }\r
+ \r
+}\r
+\r
+\r
+VOID\r
+PtStatus(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_STATUS GeneralStatus,\r
+ IN PVOID StatusBuffer,\r
+ IN UINT StatusBufferSize\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Status handler for the lower-edge(protocol).\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext Pointer to the adapter structure\r
+ GeneralStatus Status code\r
+ StatusBuffer Status buffer\r
+ StatusBufferSize Size of the status buffer\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;\r
+\r
+ //\r
+ // Pass up this indication only if the upper edge miniport is initialized\r
+ // and powered on. Also ignore indications that might be sent by the lower\r
+ // miniport when it isn't at D0.\r
+ //\r
+ if ((pAdapt->MiniportHandle != NULL) &&\r
+ (pAdapt->MPDeviceState == NdisDeviceStateD0) &&\r
+ (pAdapt->PTDeviceState == NdisDeviceStateD0)) \r
+ {\r
+ if ((GeneralStatus == NDIS_STATUS_MEDIA_CONNECT) || \r
+ (GeneralStatus == NDIS_STATUS_MEDIA_DISCONNECT))\r
+ {\r
+ \r
+ pAdapt->LastIndicatedStatus = GeneralStatus;\r
+ }\r
+ NdisMIndicateStatus(pAdapt->MiniportHandle,\r
+ GeneralStatus,\r
+ StatusBuffer,\r
+ StatusBufferSize);\r
+ }\r
+ //\r
+ // Save the last indicated media status \r
+ //\r
+ else\r
+ {\r
+ if ((pAdapt->MiniportHandle != NULL) && \r
+ ((GeneralStatus == NDIS_STATUS_MEDIA_CONNECT) || \r
+ (GeneralStatus == NDIS_STATUS_MEDIA_DISCONNECT)))\r
+ {\r
+ pAdapt->LatestUnIndicateStatus = GeneralStatus;\r
+ }\r
+ }\r
+ \r
+}\r
+\r
+\r
+VOID\r
+PtStatusComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+\r
+Arguments:\r
+\r
+\r
+Return Value:\r
+\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;\r
+\r
+ //\r
+ // Pass up this indication only if the upper edge miniport is initialized\r
+ // and powered on. Also ignore indications that might be sent by the lower\r
+ // miniport when it isn't at D0.\r
+ //\r
+ if ((pAdapt->MiniportHandle != NULL) &&\r
+ (pAdapt->MPDeviceState == NdisDeviceStateD0) &&\r
+ (pAdapt->PTDeviceState == NdisDeviceStateD0)) \r
+ {\r
+ NdisMIndicateStatusComplete(pAdapt->MiniportHandle);\r
+ }\r
+}\r
+\r
+\r
+VOID\r
+PtSendComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN NDIS_STATUS Status\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Called by NDIS when the miniport below had completed a send. We should\r
+ complete the corresponding upper-edge send this represents.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext - Points to ADAPT structure\r
+ Packet - Low level packet being completed\r
+ Status - status of send\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;\r
+ PNDIS_PACKET Pkt; \r
+ NDIS_HANDLE PoolHandle;\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Packet stacking:\r
+ //\r
+ // Determine if the packet we are completing is the one we allocated. If so, then\r
+ // get the original packet from the reserved area and completed it and free the\r
+ // allocated packet. If this is the packet that was sent down to us, then just\r
+ // complete it\r
+ //\r
+ PoolHandle = NdisGetPoolFromPacket(Packet);\r
+ if (PoolHandle != pAdapt->SendPacketPoolHandle)\r
+ {\r
+ //\r
+ // We had passed down a packet belonging to the protocol above us.\r
+ //\r
+ // DBGPRINT(("PtSendComp: Adapt %p, Stacked Packet %p\n", pAdapt, Packet));\r
+\r
+ NdisMSendComplete(pAdapt->MiniportHandle,\r
+ Packet,\r
+ Status);\r
+ }\r
+ else\r
+#endif // NDIS51\r
+ {\r
+ PSEND_RSVD SendRsvd;\r
+\r
+ SendRsvd = (PSEND_RSVD)(Packet->ProtocolReserved);\r
+ Pkt = SendRsvd->OriginalPkt;\r
+ \r
+#ifndef WIN9X\r
+ NdisIMCopySendCompletePerPacketInfo (Pkt, Packet);\r
+#endif\r
+ \r
+ NdisDprFreePacket(Packet);\r
+\r
+ NdisMSendComplete(pAdapt->MiniportHandle,\r
+ Pkt,\r
+ Status);\r
+ }\r
+ //\r
+ // Decrease the outstanding send count\r
+ //\r
+ ADAPT_DECR_PENDING_SENDS(pAdapt);\r
+} \r
+\r
+\r
+VOID\r
+PtTransferDataComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet,\r
+ IN NDIS_STATUS Status,\r
+ IN UINT BytesTransferred\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Entry point called by NDIS to indicate completion of a call by us\r
+ to NdisTransferData.\r
+\r
+ See notes under SendComplete.\r
+\r
+Arguments:\r
+\r
+Return Value:\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+\r
+ if(pAdapt->MiniportHandle)\r
+ {\r
+ NdisMTransferDataComplete(pAdapt->MiniportHandle,\r
+ Packet,\r
+ Status,\r
+ BytesTransferred);\r
+ }\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+PtReceive(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN NDIS_HANDLE MacReceiveContext,\r
+ IN PVOID HeaderBuffer,\r
+ IN UINT HeaderBufferSize,\r
+ IN PVOID LookAheadBuffer,\r
+ IN UINT LookAheadBufferSize,\r
+ IN UINT PacketSize\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Handle receive data indicated up by the miniport below. We pass\r
+ it along to the protocol above us.\r
+\r
+ If the miniport below indicates packets, NDIS would more\r
+ likely call us at our ReceivePacket handler. However we\r
+ might be called here in certain situations even though\r
+ the miniport below has indicated a receive packet, e.g.\r
+ if the miniport had set packet status to NDIS_STATUS_RESOURCES.\r
+ \r
+Arguments:\r
+\r
+ <see DDK ref page for ProtocolReceive>\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_SUCCESS if we processed the receive successfully,\r
+ NDIS_STATUS_XXX error code if we discarded it.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt = (PADAPT)ProtocolBindingContext;\r
+ PNDIS_PACKET MyPacket, Packet = NULL;\r
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;\r
+ ULONG Proc = KeGetCurrentProcessorNumber(); \r
+ \r
+ if ((!pAdapt->MiniportHandle) || (pAdapt->MPDeviceState > NdisDeviceStateD0))\r
+ {\r
+ Status = NDIS_STATUS_FAILURE;\r
+ }\r
+ else do\r
+ {\r
+ //\r
+ // Get at the packet, if any, indicated up by the miniport below.\r
+ //\r
+ Packet = NdisGetReceivedPacket(pAdapt->BindingHandle, MacReceiveContext);\r
+ if (Packet != NULL)\r
+ {\r
+ //\r
+ // The miniport below did indicate up a packet. Use information\r
+ // from that packet to construct a new packet to indicate up.\r
+ //\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // NDIS 5.1 NOTE: Do not reuse the original packet in indicating\r
+ // up a receive, even if there is sufficient packet stack space.\r
+ // If we had to do so, we would have had to overwrite the\r
+ // status field in the original packet to NDIS_STATUS_RESOURCES,\r
+ // and it is not allowed for protocols to overwrite this field\r
+ // in received packets.\r
+ //\r
+#endif // NDIS51\r
+\r
+ //\r
+ // Get a packet off the pool and indicate that up\r
+ //\r
+ NdisDprAllocatePacket(&Status,\r
+ &MyPacket,\r
+ pAdapt->RecvPacketPoolHandle);\r
+\r
+ if (Status == NDIS_STATUS_SUCCESS)\r
+ {\r
+ //\r
+ // Make our packet point to data from the original\r
+ // packet. NOTE: this works only because we are\r
+ // indicating a receive directly from the context of\r
+ // our receive indication. If we need to queue this\r
+ // packet and indicate it from another thread context,\r
+ // we will also have to allocate a new buffer and copy\r
+ // over the packet contents, OOB data and per-packet\r
+ // information. This is because the packet data\r
+ // is available only for the duration of this\r
+ // receive indication call.\r
+ //\r
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);\r
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);\r
+\r
+ //\r
+ // Get the original packet (it could be the same packet as the\r
+ // one received or a different one based on the number of layered\r
+ // miniports below) and set it on the indicated packet so the OOB\r
+ // data is visible correctly at protocols above. If the IM driver \r
+ // modifies the packet in any way it should not set the new packet's\r
+ // original packet equal to the original packet of the packet that \r
+ // was indicated to it from the underlying driver, in this case, the \r
+ // IM driver should also ensure that the related per packet info should\r
+ // be copied to the new packet.\r
+ // we can set the original packet to the original packet of the packet\r
+ // indicated from the underlying driver because the driver doesn't modify\r
+ // the data content in the packet.\r
+ //\r
+ NDIS_SET_ORIGINAL_PACKET(MyPacket, NDIS_GET_ORIGINAL_PACKET(Packet));\r
+ NDIS_SET_PACKET_HEADER_SIZE(MyPacket, HeaderBufferSize);\r
+\r
+ //\r
+ // Copy packet flags.\r
+ //\r
+ NdisGetPacketFlags(MyPacket) = NdisGetPacketFlags(Packet);\r
+\r
+ //\r
+ // Force protocols above to make a copy if they want to hang\r
+ // on to data in this packet. This is because we are in our\r
+ // Receive handler (not ReceivePacket) and we can't return a\r
+ // ref count from here.\r
+ //\r
+ NDIS_SET_PACKET_STATUS(MyPacket, NDIS_STATUS_RESOURCES);\r
+\r
+ //\r
+ // By setting NDIS_STATUS_RESOURCES, we also know that we can reclaim\r
+ // this packet as soon as the call to NdisMIndicateReceivePacket\r
+ // returns.\r
+ //\r
+\r
+ if (pAdapt->MiniportHandle != NULL)\r
+ {\r
+ NdisMIndicateReceivePacket(pAdapt->MiniportHandle, &MyPacket, 1);\r
+ }\r
+\r
+ //\r
+ // Reclaim the indicated packet. Since we had set its status\r
+ // to NDIS_STATUS_RESOURCES, we are guaranteed that protocols\r
+ // above are done with it.\r
+ //\r
+ NdisDprFreePacket(MyPacket);\r
+\r
+ break;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // The miniport below us uses the old-style (not packet)\r
+ // receive indication. Fall through.\r
+ //\r
+ }\r
+\r
+ //\r
+ // Fall through if the miniport below us has either not\r
+ // indicated a packet or we could not allocate one\r
+ //\r
+ pAdapt->ReceivedIndicationFlags[Proc] = TRUE;\r
+ if (pAdapt->MiniportHandle == NULL)\r
+ {\r
+ break;\r
+ }\r
+ switch (pAdapt->Medium)\r
+ {\r
+ case NdisMedium802_3:\r
+ case NdisMediumWan:\r
+ NdisMEthIndicateReceive(pAdapt->MiniportHandle,\r
+ MacReceiveContext,\r
+ HeaderBuffer,\r
+ HeaderBufferSize,\r
+ LookAheadBuffer,\r
+ LookAheadBufferSize,\r
+ PacketSize);\r
+ break;\r
+\r
+ case NdisMedium802_5:\r
+ NdisMTrIndicateReceive(pAdapt->MiniportHandle,\r
+ MacReceiveContext,\r
+ HeaderBuffer,\r
+ HeaderBufferSize,\r
+ LookAheadBuffer,\r
+ LookAheadBufferSize,\r
+ PacketSize);\r
+ break;\r
+\r
+#if FDDI\r
+ case NdisMediumFddi:\r
+ NdisMFddiIndicateReceive(pAdapt->MiniportHandle,\r
+ MacReceiveContext,\r
+ HeaderBuffer,\r
+ HeaderBufferSize,\r
+ LookAheadBuffer,\r
+ LookAheadBufferSize,\r
+ PacketSize);\r
+ break;\r
+#endif\r
+ default:\r
+ ASSERT(FALSE);\r
+ break;\r
+ }\r
+\r
+ } while(FALSE);\r
+\r
+ return Status;\r
+}\r
+\r
+\r
+VOID\r
+PtReceiveComplete(\r
+ IN NDIS_HANDLE ProtocolBindingContext\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ Called by the adapter below us when it is done indicating a batch of\r
+ received packets.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext Pointer to our adapter structure.\r
+\r
+Return Value:\r
+\r
+ None\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+ ULONG Proc = KeGetCurrentProcessorNumber(); \r
+\r
+ if (((pAdapt->MiniportHandle != NULL)\r
+ && (pAdapt->MPDeviceState == NdisDeviceStateD0))\r
+ && (pAdapt->ReceivedIndicationFlags[Proc]))\r
+ {\r
+ switch (pAdapt->Medium)\r
+ {\r
+ case NdisMedium802_3:\r
+ case NdisMediumWan:\r
+ NdisMEthIndicateReceiveComplete(pAdapt->MiniportHandle);\r
+ break;\r
+\r
+ case NdisMedium802_5:\r
+ NdisMTrIndicateReceiveComplete(pAdapt->MiniportHandle);\r
+ break;\r
+#if FDDI\r
+ case NdisMediumFddi:\r
+ NdisMFddiIndicateReceiveComplete(pAdapt->MiniportHandle);\r
+ break;\r
+#endif\r
+ default:\r
+ ASSERT(FALSE);\r
+ break;\r
+ }\r
+ }\r
+\r
+ pAdapt->ReceivedIndicationFlags[Proc] = FALSE;\r
+}\r
+\r
+\r
+INT\r
+PtReceivePacket(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNDIS_PACKET Packet\r
+ )\r
+/*++\r
+\r
+Routine Description:\r
+\r
+ ReceivePacket handler. Called by NDIS if the miniport below supports\r
+ NDIS 4.0 style receives. Re-package the buffer chain in a new packet\r
+ and indicate the new packet to protocols above us. Any context for\r
+ packets indicated up must be kept in the MiniportReserved field.\r
+\r
+ NDIS 5.1 - packet stacking - if there is sufficient "stack space" in\r
+ the packet passed to us, we can use the same packet in a receive\r
+ indication.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext - Pointer to our adapter structure.\r
+ Packet - Pointer to the packet\r
+\r
+Return Value:\r
+\r
+ == 0 -> We are done with the packet\r
+ != 0 -> We will keep the packet and call NdisReturnPackets() this\r
+ many times when done.\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+ NDIS_STATUS Status;\r
+ PNDIS_PACKET MyPacket;\r
+ BOOLEAN Remaining;\r
+\r
+ //\r
+ // Drop the packet silently if the upper miniport edge isn't initialized or\r
+ // the miniport edge is in low power state\r
+ //\r
+ if ((!pAdapt->MiniportHandle) || (pAdapt->MPDeviceState > NdisDeviceStateD0))\r
+ {\r
+ return 0;\r
+ }\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Check if we can reuse the same packet for indicating up.\r
+ // See also: PtReceive(). \r
+ //\r
+ (VOID)NdisIMGetCurrentPacketStack(Packet, &Remaining);\r
+ if (Remaining)\r
+ {\r
+ //\r
+ // We can reuse "Packet". Indicate it up and be done with it.\r
+ //\r
+ Status = NDIS_GET_PACKET_STATUS(Packet);\r
+ NdisMIndicateReceivePacket(pAdapt->MiniportHandle, &Packet, 1);\r
+ return((Status != NDIS_STATUS_RESOURCES) ? 1 : 0);\r
+ }\r
+#endif // NDIS51\r
+\r
+ //\r
+ // Get a packet off the pool and indicate that up\r
+ //\r
+ NdisDprAllocatePacket(&Status,\r
+ &MyPacket,\r
+ pAdapt->RecvPacketPoolHandle);\r
+\r
+ if (Status == NDIS_STATUS_SUCCESS)\r
+ {\r
+ PRECV_RSVD RecvRsvd;\r
+\r
+ RecvRsvd = (PRECV_RSVD)(MyPacket->MiniportReserved);\r
+ RecvRsvd->OriginalPkt = Packet;\r
+\r
+ NDIS_PACKET_FIRST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_FIRST_NDIS_BUFFER(Packet);\r
+ NDIS_PACKET_LAST_NDIS_BUFFER(MyPacket) = NDIS_PACKET_LAST_NDIS_BUFFER(Packet);\r
+\r
+ //\r
+ // Get the original packet (it could be the same packet as the one\r
+ // received or a different one based on the number of layered miniports\r
+ // below) and set it on the indicated packet so the OOB data is visible\r
+ // correctly to protocols above us.\r
+ //\r
+ NDIS_SET_ORIGINAL_PACKET(MyPacket, NDIS_GET_ORIGINAL_PACKET(Packet));\r
+\r
+ //\r
+ // Set Packet Flags\r
+ //\r
+ NdisGetPacketFlags(MyPacket) = NdisGetPacketFlags(Packet);\r
+\r
+ Status = NDIS_GET_PACKET_STATUS(Packet);\r
+\r
+ NDIS_SET_PACKET_STATUS(MyPacket, Status);\r
+ NDIS_SET_PACKET_HEADER_SIZE(MyPacket, NDIS_GET_PACKET_HEADER_SIZE(Packet));\r
+\r
+ if (pAdapt->MiniportHandle != NULL)\r
+ {\r
+ NdisMIndicateReceivePacket(pAdapt->MiniportHandle, &MyPacket, 1);\r
+ }\r
+\r
+ //\r
+ // Check if we had indicated up the packet with NDIS_STATUS_RESOURCES\r
+ // NOTE -- do not use NDIS_GET_PACKET_STATUS(MyPacket) for this since\r
+ // it might have changed! Use the value saved in the local variable.\r
+ //\r
+ if (Status == NDIS_STATUS_RESOURCES)\r
+ {\r
+ //\r
+ // Our ReturnPackets handler will not be called for this packet.\r
+ // We should reclaim it right here.\r
+ //\r
+ NdisDprFreePacket(MyPacket);\r
+ }\r
+\r
+ return((Status != NDIS_STATUS_RESOURCES) ? 1 : 0);\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // We are out of packets. Silently drop it.\r
+ //\r
+ return(0);\r
+ }\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+PtPNPHandler(\r
+ IN NDIS_HANDLE ProtocolBindingContext,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ )\r
+\r
+/*++\r
+Routine Description:\r
+\r
+ This is called by NDIS to notify us of a PNP event related to a lower\r
+ binding. Based on the event, this dispatches to other helper routines.\r
+\r
+ NDIS 5.1: forward this event to the upper protocol(s) by calling\r
+ NdisIMNotifyPnPEvent.\r
+\r
+Arguments:\r
+\r
+ ProtocolBindingContext - Pointer to our adapter structure. Can be NULL\r
+ for "global" notifications\r
+\r
+ pNetPnPEvent - Pointer to the PNP event to be processed.\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS code indicating status of event processing.\r
+\r
+--*/\r
+{\r
+ PADAPT pAdapt =(PADAPT)ProtocolBindingContext;\r
+ NDIS_STATUS Status = NDIS_STATUS_SUCCESS;\r
+\r
+ DBGPRINT(("PtPnPHandler: Adapt %p, Event %d\n", pAdapt, pNetPnPEvent->NetEvent));\r
+\r
+ switch (pNetPnPEvent->NetEvent)\r
+ {\r
+ case NetEventSetPower:\r
+ Status = PtPnPNetEventSetPower(pAdapt, pNetPnPEvent);\r
+ break;\r
+\r
+ case NetEventReconfigure:\r
+ Status = PtPnPNetEventReconfigure(pAdapt, pNetPnPEvent);\r
+ break;\r
+\r
+ default:\r
+#ifdef NDIS51\r
+ //\r
+ // Pass on this notification to protocol(s) above, before\r
+ // doing anything else with it.\r
+ //\r
+ if (pAdapt && pAdapt->MiniportHandle)\r
+ {\r
+ Status = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);\r
+ }\r
+#else\r
+ Status = NDIS_STATUS_SUCCESS;\r
+\r
+#endif // NDIS51\r
+\r
+ break;\r
+ }\r
+\r
+ return Status;\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+PtPnPNetEventReconfigure(\r
+ IN PADAPT pAdapt,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ )\r
+/*++\r
+Routine Description:\r
+\r
+ This routine is called from NDIS to notify our protocol edge of a\r
+ reconfiguration of parameters for either a specific binding (pAdapt\r
+ is not NULL), or global parameters if any (pAdapt is NULL).\r
+\r
+Arguments:\r
+\r
+ pAdapt - Pointer to our adapter structure.\r
+ pNetPnPEvent - the reconfigure event\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_SUCCESS\r
+\r
+--*/\r
+{\r
+ NDIS_STATUS ReconfigStatus = NDIS_STATUS_SUCCESS;\r
+ NDIS_STATUS ReturnStatus = NDIS_STATUS_SUCCESS;\r
+\r
+ do\r
+ {\r
+ //\r
+ // Is this is a global reconfiguration notification ?\r
+ //\r
+ if (pAdapt == NULL)\r
+ {\r
+ //\r
+ // An important event that causes this notification to us is if\r
+ // one of our upper-edge miniport instances was enabled after being\r
+ // disabled earlier, e.g. from Device Manager in Win2000. Note that\r
+ // NDIS calls this because we had set up an association between our\r
+ // miniport and protocol entities by calling NdisIMAssociateMiniport.\r
+ //\r
+ // Since we would have torn down the lower binding for that miniport,\r
+ // we need NDIS' assistance to re-bind to the lower miniport. The\r
+ // call to NdisReEnumerateProtocolBindings does exactly that.\r
+ //\r
+ NdisReEnumerateProtocolBindings (ProtHandle); \r
+ \r
+ break;\r
+ }\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Pass on this notification to protocol(s) above before doing anything\r
+ // with it.\r
+ //\r
+ if (pAdapt->MiniportHandle)\r
+ {\r
+ ReturnStatus = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);\r
+ }\r
+#endif // NDIS51\r
+\r
+ ReconfigStatus = NDIS_STATUS_SUCCESS;\r
+\r
+ } while(FALSE);\r
+\r
+ DBGPRINT(("<==PtPNPNetEventReconfigure: pAdapt %p\n", pAdapt));\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Overwrite status with what upper-layer protocol(s) returned.\r
+ //\r
+ ReconfigStatus = ReturnStatus;\r
+#endif\r
+\r
+ return ReconfigStatus;\r
+}\r
+\r
+\r
+NDIS_STATUS\r
+PtPnPNetEventSetPower(\r
+ IN PADAPT pAdapt,\r
+ IN PNET_PNP_EVENT pNetPnPEvent\r
+ )\r
+/*++\r
+Routine Description:\r
+\r
+ This is a notification to our protocol edge of the power state\r
+ of the lower miniport. If it is going to a low-power state, we must\r
+ wait here for all outstanding sends and requests to complete.\r
+\r
+ NDIS 5.1: Since we use packet stacking, it is not sufficient to\r
+ check usage of our local send packet pool to detect whether or not\r
+ all outstanding sends have completed. For this, use the new API\r
+ NdisQueryPendingIOCount.\r
+\r
+ NDIS 5.1: Use the 5.1 API NdisIMNotifyPnPEvent to pass on PnP\r
+ notifications to upper protocol(s).\r
+\r
+Arguments:\r
+\r
+ pAdapt - Pointer to the adpater structure\r
+ pNetPnPEvent - The Net Pnp Event. this contains the new device state\r
+\r
+Return Value:\r
+\r
+ NDIS_STATUS_SUCCESS or the status returned by upper-layer protocols.\r
+\r
+--*/\r
+{\r
+ PNDIS_DEVICE_POWER_STATE pDeviceState =(PNDIS_DEVICE_POWER_STATE)(pNetPnPEvent->Buffer);\r
+ NDIS_DEVICE_POWER_STATE PrevDeviceState = pAdapt->PTDeviceState; \r
+ NDIS_STATUS Status;\r
+ NDIS_STATUS ReturnStatus;\r
+\r
+ ReturnStatus = NDIS_STATUS_SUCCESS;\r
+\r
+ //\r
+ // Set the Internal Device State, this blocks all new sends or receives\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ pAdapt->PTDeviceState = *pDeviceState;\r
+\r
+ //\r
+ // Check if the miniport below is going to a low power state.\r
+ //\r
+ if (pAdapt->PTDeviceState > NdisDeviceStateD0)\r
+ {\r
+ //\r
+ // If the miniport below is going to standby, fail all incoming requests\r
+ //\r
+ if (PrevDeviceState == NdisDeviceStateD0)\r
+ {\r
+ pAdapt->StandingBy = TRUE;\r
+ }\r
+\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Notify upper layer protocol(s) first.\r
+ //\r
+ if (pAdapt->MiniportHandle != NULL)\r
+ {\r
+ ReturnStatus = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);\r
+ }\r
+#endif // NDIS51\r
+\r
+ //\r
+ // Wait for outstanding sends and requests to complete.\r
+ //\r
+ while (pAdapt->OutstandingSends != 0)\r
+ {\r
+ NdisMSleep(2);\r
+ }\r
+\r
+ while (pAdapt->OutstandingRequests == TRUE)\r
+ {\r
+ //\r
+ // sleep till outstanding requests complete\r
+ //\r
+ NdisMSleep(2);\r
+ }\r
+\r
+ //\r
+ // If the below miniport is going to low power state, complete the queued request\r
+ //\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ if (pAdapt->QueuedRequest)\r
+ {\r
+ pAdapt->QueuedRequest = FALSE;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ PtRequestComplete(pAdapt, &pAdapt->Request, NDIS_STATUS_FAILURE);\r
+ }\r
+ else\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ }\r
+ \r
+\r
+ ASSERT(NdisPacketPoolUsage(pAdapt->SendPacketPoolHandle) == 0);\r
+ ASSERT(pAdapt->OutstandingRequests == FALSE);\r
+ }\r
+ else\r
+ {\r
+ //\r
+ // If the physical miniport is powering up (from Low power state to D0), \r
+ // clear the flag\r
+ //\r
+ if (PrevDeviceState > NdisDeviceStateD0)\r
+ {\r
+ pAdapt->StandingBy = FALSE;\r
+ }\r
+ //\r
+ // The device below is being turned on. If we had a request\r
+ // pending, send it down now.\r
+ //\r
+ if (pAdapt->QueuedRequest == TRUE)\r
+ {\r
+ pAdapt->QueuedRequest = FALSE;\r
+ \r
+ pAdapt->OutstandingRequests = TRUE;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+\r
+ NdisRequest(&Status,\r
+ pAdapt->BindingHandle,\r
+ &pAdapt->Request);\r
+\r
+ if (Status != NDIS_STATUS_PENDING)\r
+ {\r
+ PtRequestComplete(pAdapt,\r
+ &pAdapt->Request,\r
+ Status);\r
+ \r
+ }\r
+ }\r
+ else\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ }\r
+\r
+\r
+#ifdef NDIS51\r
+ //\r
+ // Pass on this notification to protocol(s) above\r
+ //\r
+ if (pAdapt->MiniportHandle)\r
+ {\r
+ ReturnStatus = NdisIMNotifyPnPEvent(pAdapt->MiniportHandle, pNetPnPEvent);\r
+ }\r
+#endif // NDIS51\r
+\r
+ }\r
+\r
+ return ReturnStatus;\r
+}\r
+\r
+VOID\r
+PtReferenceAdapt(\r
+ IN PADAPT pAdapt\r
+ )\r
+{\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+ \r
+ ASSERT(pAdapt->RefCount >= 0);\r
+\r
+ pAdapt->RefCount ++;\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+}\r
+\r
+\r
+BOOLEAN\r
+PtDereferenceAdapt(\r
+ IN PADAPT pAdapt\r
+ )\r
+{\r
+ NdisAcquireSpinLock(&pAdapt->Lock);\r
+\r
+ ASSERT(pAdapt->RefCount > 0);\r
+\r
+ pAdapt->RefCount--;\r
+\r
+ if (pAdapt->RefCount == 0)\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+ \r
+ //\r
+ // Free all resources on this adapter structure.\r
+ //\r
+ MPFreeAllPacketPools (pAdapt);;\r
+ NdisFreeSpinLock(&pAdapt->Lock);\r
+ NdisFreeMemory(pAdapt, 0 , 0);\r
+ \r
+ return TRUE;\r
+ \r
+ }\r
+ else\r
+ {\r
+ NdisReleaseSpinLock(&pAdapt->Lock);\r
+\r
+ return FALSE;\r
+ }\r
+}\r
+\r
+\r
--- /dev/null
+TARGETNAME=passthru\r
+TARGETTYPE=DRIVER\r
+\r
+C_DEFINES=$(C_DEFINES) -DNDIS_MINIPORT_DRIVER -DNDIS_WDM=1\r
+\r
+MSC_WARNING_LEVEL=/WX /W4\r
+\r
+!if "$(DDK_TARGET_OS)"=="Win2K"\r
+#\r
+# The driver is built in the Win2K build environment\r
+#\r
+C_DEFINES=$(C_DEFINES) -DNDIS40_MINIPORT=1\r
+C_DEFINES=$(C_DEFINES) -DNDIS40=1\r
+!else \r
+#\r
+# The driver is built in the XP or .NET build environment\r
+# So let us build NDIS 5.1 version.\r
+#\r
+C_DEFINES=$(C_DEFINES) -DNDIS51_MINIPORT=1\r
+C_DEFINES=$(C_DEFINES) -DNDIS51=1\r
+!endif\r
+\r
+# Uncomment the following to build for Win98/SE/WinMe\r
+# This causes several APIs that are not present in Win9X to be\r
+# ifdef'ed out.\r
+# C_DEFINES=$(C_DEFINES) -DWIN9X=1\r
+\r
+PRECOMPILED_INCLUDE=precomp.h\r
+\r
+TARGETLIBS=$(DDK_LIB_PATH)\ndis.lib\r
+\r
+INCLUDES=\r
+\r
+SOURCES=\\r
+ miniport.c \\r
+ passthru.c \\r
+ passthru.rc \\r
+ protocol.c\r
+\r
# Runs every 5 minutes and clean ipfw expired rules
-# $Id:$
+# $Id: ipfw.cron 6069 2010-04-15 09:35:33Z marta $
*/5 * * * * root echo "super killexpired" | /vsys/ipfw-be root > /dev/null 2>&1
-# $Id:$
+# $Id: planetlab-tags.mk 4533 2009-12-16 14:39:23Z luigi $
# These are good to build the ipfw modules from svn on kernels 2.6.22
linux-2.6-SVNBRANCH := 22
linux-2.6-SVNPATH := http://svn.planet-lab.org/svn/linux-2.6/tags/linux-2.6-22-39-1
-ipfwsrc-SVNPATH := svn+ssh://luigi%40onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw_mod
+ipfwsrc-SVNPATH := svn+ssh://luigi%40onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw3
-# $Id:$
+# $Id: planetlab.mk 4533 2009-12-16 14:39:23Z luigi $
# .mk file to build a module
kernel-MODULES := linux-2.6
kernel-SPEC := kernel-2.6.spec
#
# Marta Carbone <marta.carbone@iet.unipi.it>
# 2009 - Universita` di Pisa
-# $Id:$
#
# This is a sample hook file in charge to collect
# statistical information on netconfig usage. It dumps
--- /dev/null
+/*
+ * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
+ * headers to build userland ipfw under tcc.
+ */
+
+#ifndef _TCC_GLUE_H
+#define _TCC_GLUE_H
+
+//#define __restrict
+#define NULL ((void *)0)
+typedef int size_t;
+typedef unsigned char u_char;
+typedef unsigned char uint8_t;
+typedef unsigned char u_int8_t;
+typedef unsigned short u_short;
+typedef unsigned short uint16_t;
+typedef unsigned short u_int16_t;
+typedef int __int32_t;
+typedef int int32_t;
+typedef int socklen_t;
+typedef int pid_t;
+typedef unsigned int time_t;
+typedef unsigned int uint;
+typedef unsigned int u_int;
+typedef unsigned int uint32_t;
+typedef unsigned int u_int32_t;
+typedef unsigned int gid_t;
+typedef unsigned int uid_t;
+typedef unsigned long u_long;
+typedef unsigned long uintptr_t;
+typedef long long int int64_t;
+typedef unsigned long long int uint64_t;
+typedef unsigned long long int u_int64_t;
+
+typedef uint32_t in_addr_t;
+struct in_addr {
+ uint32_t s_addr;
+};
+struct sockaddr_in {
+ uint8_t _sin_len;
+ uint8_t sin_family;
+ uint16_t sin_port;
+ struct in_addr sin_addr;
+ char sin_zero[8];
+};
+#define IFNAMSIZ 16
+#define INET6_ADDRSTRLEN 64
+
+struct in6_addr {
+ union {
+ uint8_t __s6_addr8[16];
+ uint16_t __s6_addr16[8];
+ uint32_t __s6_addr32[4];
+ } __u6; // _addr; /* 128-bit IP6 address */
+};
+
+
+#define LITTLE_ENDIAN 1234
+#define BYTE_ORDER LITTLE_ENDIAN
+
+/* to be revised */
+#define EX_OK 0
+#define EX_DATAERR 1
+#define EX_OSERR 2
+#define EX_UNAVAILABLE 3
+#define EX_USAGE 4
+#define EX_NOHOST 5
+
+#define EEXIST 1
+#define EINVAL 2
+#define ERANGE 3
+#define ESRCH 4
+
+#define IPPROTO_IP 1
+#define IPPROTO_IPV6 2
+#define IPPROTO_RAW 100
+
+#define IPTOS_LOWDELAY 100
+#define IPTOS_MINCOST 101
+#define IPTOS_RELIABILITY 102
+#define IPTOS_THROUGHPUT 103
+#define SOCK_RAW 12
+#define AF_INET 2
+#define AF_INET6 28
+
+#define INADDR_ANY 0
+
+
+#define bcmp(src, dst, len) memcmp(src, dst, len)
+#define bcopy(src, dst, len) memcpy(dst, src, len)
+#define bzero(p, len) memset(p, 0, len)
+#define index(s, c) strchr(s, c)
+
+char *strsep(char **stringp, const char *delim);
+
+void warn(const char *, ...);
+//void warnx(const char *, ...);
+#define warnx warn
+void err(int, const char *, ...);
+#define errx err
+
+uint16_t htons(uint16_t)__attribute__ ((stdcall));
+uint16_t ntohs(uint16_t)__attribute__ ((stdcall));
+uint32_t htonl(uint32_t)__attribute__ ((stdcall));
+uint32_t ntohl(uint32_t)__attribute__ ((stdcall));
+int inet_aton(const char *cp, struct in_addr *pin);
+char * inet_ntoa(struct in_addr);
+const char * inet_ntop(int af, const void * src, char * dst,
+ socklen_t size);
+int inet_pton(int af, const char * src, void * dst);
+
+struct group {
+ gid_t gr_gid;
+ char gr_name[16];
+};
+struct passwd {
+ uid_t pw_uid;
+ char pw_name[16];
+};
+
+#define getpwnam(s) (NULL)
+#define getpwuid(s) (NULL)
+
+#define getgrnam(x) (NULL)
+#define getgrgid(x) (NULL)
+
+int getopt(int argc, char * const argv[], const char *optstring);
+
+int getsockopt(int s, int level, int optname, void * optval,
+ socklen_t * optlen);
+
+int setsockopt(int s, int level, int optname, const void *optval,
+ socklen_t optlen);
+
+struct protoent {
+ char *p_name; /* official protocol name */
+ char **p_aliases; /* alias list */
+ short p_proto; /* protocol # */
+};
+
+struct servent {
+ char *s_name; /* official service name */
+ char **s_aliases; /* alias list */
+ short s_port; /* port # */
+ char *s_proto; /* protocol to use */
+};
+
+struct hostent {
+ char *h_name; /* official name of host */
+ char **h_aliases; /* alias list */
+ short h_addrtype; /* host address type */
+ short h_length; /* length of address */
+ char **h_addr_list; /* list of addresses */
+#define h_addr h_addr_list[0] /* address, for backward compat */
+};
+
+struct hostent* gethostbyaddr(const char* addr, int len, int type)__attribute__ ((stdcall));
+struct hostent* gethostbyname(const char *name)__attribute__ ((stdcall));
+
+struct protoent* getprotobynumber(int number)__attribute__ ((stdcall));
+struct protoent* getprotobyname(const char* name)__attribute__ ((stdcall));
+
+struct servent* getservbyport(int port, const char* proto)__attribute__ ((stdcall));
+struct servent* getservbyname(const char* name, const char* proto) __attribute__ ((stdcall));
+
+extern int optind;
+extern char *optarg;
+
+#include <windef.h>
+
+#define WSADESCRIPTION_LEN 256
+#define WSASYS_STATUS_LEN 128
+
+typedef struct WSAData {
+ WORD wVersion;
+ WORD wHighVersion;
+ char szDescription[WSADESCRIPTION_LEN+1];
+ char szSystemStatus[WSASYS_STATUS_LEN+1];
+ unsigned short iMaxSockets;
+ unsigned short iMaxUdpDg;
+ char FAR * lpVendorInfo;
+} WSADATA, * LPWSADATA;
+
+int WSAStartup(
+ WORD wVersionRequested,
+ LPWSADATA lpWSAData
+ );
+
+int
+WSACleanup(void);
+
+int WSAGetLastError();
+
+/* return error on process handling */
+#define pipe(f) (-1)
+#define kill(p, s) (-1)
+#define waitpid(w,s,o) (-1)
+#define fork(x) (-1)
+#define execvp(f, a) (-1)
+
+#define _W_INT(i) (i)
+#define _WSTATUS(x) (_W_INT(x) & 0177)
+#define WIFEXITED(x) (_WSTATUS(x) == 0)
+#define WEXITSTATUS(x) (_W_INT(x) >> 8)
+#define _WSTOPPED 0177 /* _WSTATUS if process is stopped */
+#define WIFSIGNALED(x) (_WSTATUS(x) != _WSTOPPED && _WSTATUS(x) != 0)
+#define WTERMSIG(x) (_WSTATUS(x))
+
+#endif /* _TCC_GLUE_H */
--- /dev/null
+#
+# $Id: Makefile 5626 2010-03-04 21:55:22Z luigi $
+#
+# Makefile for building userland tests
+# this is written in a form compatible with gmake
+
+SCHED_SRCS = test_dn_sched.c
+SCHED_SRCS += dn_sched_fifo.c
+SCHED_SRCS += dn_sched_wf2q.c
+SCHED_SRCS += dn_sched_qfq.c
+SCHED_SRCS += dn_sched_rr.c
+SCHED_SRCS += dn_heap.c
+SCHED_SRCS += main.c
+
+SCHED_OBJS=$(SCHED_SRCS:.c=.o)
+
+HEAP_SRCS = dn_heap.c test_dn_heap.c
+HEAP_OBJS=$(HEAP_SRCS:.c=.o)
+
+VPATH= .:../dummynet2
+
+#CFLAGS = -I../dummynet2/include -I. -Wall -Werror -O3 -DIPFW
+CFLAGS = -I. -I../dummynet2/include/netinet/ipfw -DIPFW
+CFLAGS += -Wall -Werror
+CFLAGS += -g -O3
+TARGETS= test_sched # no test_heap by default
+
+all: $(TARGETS)
+
+test_heap : $(HEAP_OBJS)
+ $(CC) -o $@ $(HEAP_OBJS)
+
+test_sched : $(SCHED_OBJS)
+ $(CC) -o $@ $(SCHED_OBJS)
+
+$(SCHED_OBJS): dn_test.h
+main.o: mylist.h
+
+clean:
+ - rm *.o $(TARGETS) *.core
+
+ALLSRCS = $(SCHED_SRCS) dn_test.h mylist.h \
+ dn_sched.h dn_heap.h ip_dn_private.h Makefile
+TMPBASE = /tmp/testXYZ
+TMPDIR = $(TMPBASE)/test
+
+tgz:
+ -rm -rf $(TMPDIR)
+ mkdir -p $(TMPDIR)
+ -cp -p $(ALLSRCS) $(TMPDIR)
+ -(cd ..; cp -p $(ALLSRCS) $(TMPDIR))
+ ls -la $(TMPDIR)
+ (cd $(TMPBASE); tar cvzf /tmp/test.tgz test)
--- /dev/null
+#!/bin/sh
+
+IPFW=./ipfw/ipfw
+PING=/bin/ping
+RH=127.0.0.1 # remote host
+R=10 # test rule number
+P=1 # test pipe number
+
+abort()
+{
+echo $*
+}
+
+#insmod dummynet2/ipfw_mod.ko
+#$IPFW show > /dev/null
+#$IPFW pipe show
+echo "Flushing rules, do you agree ?"
+$IPFW flush
+
+# test_msg rule counter
+clean()
+{
+ $IPFW delete $R 2> /dev/null
+ $IPFW pipe $P delete 2> /dev/null
+}
+
+# simple counter/allow test
+echo -n "counter/allow test..."
+clean
+$IPFW add $R allow icmp from any to 127.0.0.1 > /dev/null
+$PING -f -c100 $RH > /dev/null
+counter=`$IPFW show | grep $R | head -n 1 | cut -d " " -f3`
+[ ! $counter -eq 400 ] && abort "Wrong counter $counter 400"
+echo "...OK"
+
+# simple drop test
+echo -n "deny test..."
+clean
+$IPFW add $R deny icmp from any to 127.0.0.1 > /dev/null
+$PING -f -c10 -W 1 $RH > /dev/null
+counter=`$IPFW show | grep $R | head -n 1 | cut -d " " -f4`
+[ ! $counter -eq 10 ] && abort "Wrong counter $counter 10"
+echo "...OK"
+
+# pipe delay test
+echo -n "pipe delay test..."
+clean
+$IPFW pipe $P config delay 2000ms >/dev/null
+$IPFW add $R pipe $P icmp from any to $RH >/dev/null
+$PING -f -c10 -W 1 $RH > /dev/null
+counter1=`$IPFW show | grep $R | head -n 1 | cut -d " " -f4`
+sleep 2
+counter2=`$IPFW show | grep $R | head -n 1 | cut -d " " -f4`
+[ ! $counter1 -eq 10 ] && abort "Wrong counter $counter 10"
+[ ! $counter2 -eq 20 ] && abort "Wrong counter $counter 20"
+echo "...OK"
+
+# pipe bw test
+echo -n "pipe bw test..."
+clean
+$IPFW pipe $P config bw 2Kbit/s >/dev/null
+$IPFW add $R pipe $P icmp from any to $RH >/dev/null
+$PING -i 0.1 -c10 -W 1 $RH > /dev/null
+counter=`$IPFW show | grep $R | head -n 1 | cut -d " " -f4`
+[ $counter -gt 30 ] && abort "Wrong counter $counter should be < 30"
+sleep 1
+counter=`$IPFW show | grep $R | head -n 1 | cut -d " " -f4`
+[ $counter -gt 30 ] && abort "Wrong counter $counter should be < 30"
+echo "...OK"
+
+# Final clean
+clean
--- /dev/null
+/*
+ * $Id: dn_test.h 5626 2010-03-04 21:55:22Z luigi $
+ *
+ * userspace compatibility code for dummynet schedulers
+ */
+
+#ifndef _DN_TEST_H
+#define _DN_TEST_H
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h> /* bzero, ffs, ... */
+#include <string.h> /* strcmp */
+#include <errno.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+
+extern int debug;
+#define ND(fmt, args...) do {} while (0)
+#define D1(fmt, args...) do {} while (0)
+#define D(fmt, args...) fprintf(stderr, "%-8s " fmt "\n", \
+ __FUNCTION__, ## args)
+#define DX(lev, fmt, args...) do { \
+ if (debug > lev) D(fmt, ## args); } while (0)
+
+
+#define offsetof(t,m) (int)((&((t *)0L)->m))
+
+#include <mylist.h>
+
+/* prevent include of other system headers */
+#define _NETINET_IP_VAR_H_ /* ip_fw_args */
+#define _IPFW2_H
+#define _SYS_MBUF_H_
+
+enum {
+ DN_QUEUE,
+};
+
+enum {
+ DN_SCHED_FIFO,
+ DN_SCHED_WF2QP,
+};
+
+struct dn_id {
+ int type, subtype, len, id;
+};
+struct dn_fs {
+ int par[4]; /* flowset parameters */
+
+ /* simulation entries.
+ * 'index' is not strictly necessary
+ * y is used for the inverse mapping ,
+ */
+ int index;
+ int y; /* inverse mapping */
+ int base_y; /* inverse mapping */
+ int next_y; /* inverse mapping */
+ int n_flows;
+ int first_flow;
+ int next_flow; /* first_flow + n_flows */
+ /*
+ * when generating, let 'cur' go from 0 to n_flows-1,
+ * then point to flow first_flow + cur
+ */
+ int cur;
+};
+struct dn_sch {
+};
+struct dn_flow {
+ struct dn_id oid;
+ int length;
+ int len_bytes;
+ int drops;
+ uint64_t tot_bytes;
+ uint32_t flow_id;
+ struct list_head h; /* used by the generator */
+};
+struct dn_link {
+};
+
+struct ip_fw_args {
+};
+
+struct mbuf {
+ struct {
+ int len;
+ } m_pkthdr;
+ struct mbuf *m_nextpkt;
+ int flow_id; /* for testing, index of a flow */
+ //int flowset_id; /* for testing, index of a flowset */
+ void *cfg; /* config args */
+};
+
+#define MALLOC_DECLARE(x)
+#define KASSERT(x, y) do { if (!(x)) printf y ; exit(0); } while (0)
+struct ipfw_flow_id {
+};
+
+typedef void * module_t;
+struct _md_t {
+ const char *name;
+ int (*f)(module_t, int, void *);
+ void *p;
+};
+typedef struct _md_t moduledata_t;
+#define DECLARE_MODULE(name, b, c, d) \
+ moduledata_t *_g_##name = & b
+#define MODULE_DEPEND(a, b, c, d, e)
+
+#ifdef IPFW
+#include <dn_heap.h>
+#include <ip_dn_private.h>
+#include <dn_sched.h>
+#else
+struct dn_queue {
+ struct dn_fsk *fs; /* parent flowset. */
+ struct dn_sch_inst *_si; /* parent sched instance. */
+};
+struct dn_schk {
+};
+struct dn_fsk {
+ struct dn_fs fs;
+ struct dn_schk *sched;
+};
+struct dn_sch_inst {
+ struct dn_schk *sched;
+};
+struct dn_alg {
+ int type;
+ const char *name;
+ void *enqueue, *dequeue;
+ int q_datalen, si_datalen, schk_datalen;
+ int (*config)(struct dn_schk *);
+ int (*new_sched)(struct dn_sch_inst *);
+ int (*new_fsk)(struct dn_fsk *);
+ int (*new_queue)(struct dn_queue *q);
+};
+
+#endif
+
+#ifndef __FreeBSD__
+int fls(int);
+#endif
+
+static inline void
+mq_append(struct mq *q, struct mbuf *m)
+{
+ if (q->head == NULL)
+ q->head = m;
+ else
+ q->tail->m_nextpkt = m;
+ q->tail = m;
+ m->m_nextpkt = NULL;
+}
+
+#endif /* _DN_TEST_H */
--- /dev/null
+/*
+ * $Id: main.c 5626 2010-03-04 21:55:22Z luigi $
+ *
+ * Testing program for schedulers
+ *
+ * The framework include a simple controller which, at each
+ * iteration, decides whether we can enqueue and/or dequeue.
+ * Then the mainloop runs the required number of tests,
+ * keeping track of statistics.
+ */
+
+#include "dn_test.h"
+
+struct q_list {
+ struct list_head h;
+};
+
+struct cfg_s {
+ int ac;
+ char * const *av;
+
+ const char *name;
+ int loops;
+ struct timeval time;
+
+ /* running counters */
+ uint32_t _enqueue;
+ uint32_t drop;
+ uint32_t pending;
+ uint32_t dequeue;
+
+ /* generator parameters */
+ int th_min, th_max;
+ int maxburst;
+ int lmin, lmax; /* packet len */
+ int flows; /* number of flows */
+ int flowsets; /* number of flowsets */
+ int wsum; /* sum of weights of all flows */
+ int max_y; /* max random number in the generation */
+ int cur_y, cur_fs; /* used in generation, between 0 and max_y - 1 */
+ const char *fs_config; /* flowset config */
+ int can_dequeue;
+ int burst; /* count of packets sent in a burst */
+ struct mbuf *tosend; /* packet to send -- also flag to enqueue */
+
+ struct mbuf *freelist;
+
+ struct mbuf *head, *tail; /* a simple tailq */
+
+ /* scheduler hooks */
+ int (*enq)(struct dn_sch_inst *, struct dn_queue *,
+ struct mbuf *);
+ struct mbuf * (*deq)(struct dn_sch_inst *);
+ /* size of the three fields including sched-specific areas */
+ int schk_len;
+ int q_len; /* size of a queue including sched-fields */
+ int si_len; /* size of a sch_inst including sched-fields */
+ char *q; /* array of flow queues */
+ /* use a char* because size is variable */
+ struct dn_fsk *fs; /* array of flowsets */
+ struct dn_sch_inst *si;
+ struct dn_schk *sched;
+
+ /* generator state */
+ int state; /* 0 = going up, 1: going down */
+
+ /*
+ * We keep lists for each backlog level, and always serve
+ * the one with shortest backlog. llmask contains a bitmap
+ * of lists, and ll are the heads of the lists. The last
+ * entry (BACKLOG) contains all entries considered 'full'
+ * XXX to optimize things, entry i could contain queues with
+ * 2^{i-1}+1 .. 2^i entries.
+ */
+#define BACKLOG 30
+ uint32_t llmask;
+ struct list_head ll[BACKLOG + 10];
+};
+
+/* FI2Q and Q2FI converts from flow_id to dn_queue and back.
+ * We cannot easily use pointer arithmetic because it is variable size.
+ */
+#define FI2Q(c, i) ((struct dn_queue *)((c)->q + (c)->q_len * (i)))
+#define Q2FI(c, q) (((char *)(q) - (c)->q)/(c)->q_len)
+
+int debug = 0;
+
+struct dn_parms dn_cfg;
+
+static void controller(struct cfg_s *c);
+
+/* release a packet: put the mbuf in the freelist, and the queue in
+ * the bucket.
+ */
+int
+drop(struct cfg_s *c, struct mbuf *m)
+{
+ struct dn_queue *q;
+ int i;
+
+ c->drop++;
+ q = FI2Q(c, m->flow_id);
+ i = q->ni.length; // XXX or ffs...
+
+ ND("q %p id %d current length %d", q, m->flow_id, i);
+ if (i < BACKLOG) {
+ struct list_head *h = &q->ni.h;
+ c->llmask &= ~(1<<(i+1));
+ c->llmask |= (1<<(i));
+ list_del(h);
+ list_add_tail(h, &c->ll[i]);
+ }
+ m->m_nextpkt = c->freelist;
+ c->freelist = m;
+ return 0;
+}
+
+/* dequeue returns NON-NULL when a packet is dropped */
+static int
+enqueue(struct cfg_s *c, void *_m)
+{
+ struct mbuf *m = _m;
+ if (c->enq)
+ return c->enq(c->si, FI2Q(c, m->flow_id), m);
+ if (c->head == NULL)
+ c->head = m;
+ else
+ c->tail->m_nextpkt = m;
+ c->tail = m;
+ return 0; /* default - success */
+}
+
+/* dequeue returns NON-NULL when a packet is available */
+static void *
+dequeue(struct cfg_s *c)
+{
+ struct mbuf *m;
+ if (c->deq)
+ return c->deq(c->si);
+ if ((m = c->head)) {
+ m = c->head;
+ c->head = m->m_nextpkt;
+ m->m_nextpkt = NULL;
+ }
+ return m;
+}
+
+static int
+mainloop(struct cfg_s *c)
+{
+ int i;
+ struct mbuf *m;
+
+ for (i=0; i < c->loops; i++) {
+ /* implement histeresis */
+ controller(c);
+ DX(3, "loop %d enq %d send %p rx %d",
+ i, c->_enqueue, c->tosend, c->can_dequeue);
+ if ( (m = c->tosend) ) {
+ c->_enqueue++;
+ if (enqueue(c, m)) {
+ drop(c, m);
+ ND("loop %d enqueue fail", i );
+ } else {
+ ND("enqueue ok");
+ c->pending++;
+ }
+ }
+ if (c->can_dequeue) {
+ c->dequeue++;
+ if ((m = dequeue(c))) {
+ c->pending--;
+ drop(c, m);
+ c->drop--; /* compensate */
+ }
+ }
+ }
+ DX(1, "mainloop ends %d", i);
+ return 0;
+}
+
+int
+dump(struct cfg_s *c)
+{
+ int i;
+ struct dn_queue *q;
+
+ for (i=0; i < c->flows; i++) {
+ q = FI2Q(c, i);
+ DX(1, "queue %4d tot %10lld", i, q->ni.tot_bytes);
+ }
+ DX(1, "done %d loops\n", c->loops);
+ return 0;
+}
+
+/* interpret a number in human form */
+static long
+getnum(const char *s, char **next, const char *key)
+{
+ char *end = NULL;
+ long l;
+
+ if (next) /* default */
+ *next = NULL;
+ if (s && *s) {
+ DX(3, "token is <%s> %s", s, key ? key : "-");
+ l = strtol(s, &end, 0);
+ } else {
+ DX(3, "empty string");
+ l = -1;
+ }
+ if (l < 0) {
+ DX(2, "invalid %s for %s", s ? s : "NULL", (key ? key : "") );
+ return 0; // invalid
+ }
+ if (!end || !*end)
+ return l;
+ if (*end == 'n')
+ l = -l; /* multiply by n */
+ else if (*end == 'K')
+ l = l*1000;
+ else if (*end == 'M')
+ l = l*1000000;
+ else if (*end == 'k')
+ l = l*1024;
+ else if (*end == 'm')
+ l = l*1024*1024;
+ else if (*end == 'w')
+ ;
+ else {/* not recognized */
+ D("suffix %s for %s, next %p", end, key, next);
+ end--;
+ }
+ end++;
+ DX(3, "suffix now %s for %s, next %p", end, key, next);
+ if (next && *end) {
+ DX(3, "setting next to %s for %s", end, key);
+ *next = end;
+ }
+ return l;
+}
+
+/*
+ * flowsets are a comma-separated list of
+ * weight:maxlen:flows
+ * indicating how many flows are hooked to that fs.
+ * Both weight and range can be min-max-steps.
+ * In a first pass we just count the number of flowsets and flows,
+ * in a second pass we complete the setup.
+ */
+static void
+parse_flowsets(struct cfg_s *c, const char *fs, int pass)
+{
+ char *s, *cur, *next;
+ int n_flows = 0, n_fs = 0, wsum = 0;
+ int i, j;
+ struct dn_fs *prev = NULL;
+
+ DX(3, "--- pass %d flows %d flowsets %d", pass, c->flows, c->flowsets);
+ if (pass == 0)
+ c->fs_config = fs;
+ s = c->fs_config ? strdup(c->fs_config) : NULL;
+ if (s == NULL) {
+ if (pass == 0)
+ D("no fsconfig");
+ return;
+ }
+ for (next = s; (cur = strsep(&next, ","));) {
+ char *p = NULL;
+ int w, w_h, w_steps, wi;
+ int len, len_h, l_steps, li;
+ int flows;
+
+ w = getnum(strsep(&cur, ":"), &p, "weight");
+ if (w <= 0)
+ w = 1;
+ w_h = p ? getnum(p+1, &p, "weight_max") : w;
+ w_steps = p ? getnum(p+1, &p, "w_steps") : (w_h == w ?1:2);
+ len = getnum(strsep(&cur, ":"), &p, "len");
+ if (len <= 0)
+ len = 1000;
+ len_h = p ? getnum(p+1, &p, "len_max") : len;
+ l_steps = p ? getnum(p+1, &p, "l_steps") : (len_h == len ? 1 : 2);
+ flows = getnum(strsep(&cur, ":"), NULL, "flows");
+ if (flows == 0)
+ flows = 1;
+ DX(4, "weight %d..%d (%d) len %d..%d (%d) flows %d",
+ w, w_h, w_steps, len, len_h, l_steps, flows);
+ if (w == 0 || w_h < w || len == 0 || len_h < len ||
+ flows == 0) {
+ DX(4,"wrong parameters %s", fs);
+ return;
+ }
+ n_flows += flows * w_steps * l_steps;
+ for (i = 0; i < w_steps; i++) {
+ wi = w + ((w_h - w)* i)/(w_steps == 1 ? 1 : (w_steps-1));
+ for (j = 0; j < l_steps; j++, n_fs++) {
+ struct dn_fs *fs = &c->fs[n_fs].fs; // tentative
+ int x;
+
+ li = len + ((len_h - len)* j)/(l_steps == 1 ? 1 : (l_steps-1));
+ x = (wi*2048)/li;
+ DX(3, "----- fs %4d weight %4d lmax %4d X %4d flows %d",
+ n_fs, wi, li, x, flows);
+ if (pass == 0)
+ continue;
+ if (c->fs == NULL || c->flowsets <= n_fs) {
+ D("error in number of flowsets");
+ return;
+ }
+ wsum += wi * flows;
+ fs->par[0] = wi;
+ fs->par[1] = li;
+ fs->index = n_fs;
+ fs->n_flows = flows;
+ fs->cur = fs->first_flow = prev==NULL ? 0 : prev->next_flow;
+ fs->next_flow = fs->first_flow + fs->n_flows;
+ fs->y = x * flows;
+ fs->base_y = (prev == NULL) ? 0 : prev->next_y;
+ fs->next_y = fs->base_y + fs->y;
+ prev = fs;
+ }
+ }
+ }
+ c->max_y = prev ? prev->base_y + prev->y : 0;
+ c->flows = n_flows;
+ c->flowsets = n_fs;
+ c->wsum = wsum;
+ if (pass == 0)
+ return;
+
+ /* now link all flows to their parent flowsets */
+ DX(1,"%d flows on %d flowsets max_y %d", c->flows, c->flowsets, c->max_y);
+ for (i=0; i < c->flowsets; i++) {
+ struct dn_fs *fs = &c->fs[i].fs;
+ DX(1, "fs %3d w %5d l %4d flow %5d .. %5d y %6d .. %6d",
+ i, fs->par[0], fs->par[1],
+ fs->first_flow, fs->next_flow,
+ fs->base_y, fs->next_y);
+ for (j = fs->first_flow; j < fs->next_flow; j++) {
+ struct dn_queue *q = FI2Q(c, j);
+ q->fs = &c->fs[i];
+ }
+ }
+}
+
+static int
+init(struct cfg_s *c)
+{
+ int i;
+ int ac = c->ac;
+ char * const *av = c->av;
+
+ c->si_len = sizeof(struct dn_sch_inst);
+ c->q_len = sizeof(struct dn_queue);
+ moduledata_t *mod = NULL;
+ struct dn_alg *p = NULL;
+
+ c->th_min = 0;
+ c->th_max = -20;/* 20 packets per flow */
+ c->lmin = c->lmax = 1280; /* packet len */
+ c->flows = 1;
+ c->flowsets = 1;
+ c->name = "null";
+ ac--; av++;
+ while (ac > 1) {
+ if (!strcmp(*av, "-n")) {
+ c->loops = getnum(av[1], NULL, av[0]);
+ } else if (!strcmp(*av, "-d")) {
+ debug = atoi(av[1]);
+ } else if (!strcmp(*av, "-alg")) {
+ extern moduledata_t *_g_dn_fifo;
+ extern moduledata_t *_g_dn_wf2qp;
+ extern moduledata_t *_g_dn_rr;
+ extern moduledata_t *_g_dn_qfq;
+#ifdef WITH_KPS
+ extern moduledata_t *_g_dn_kps;
+#endif
+ if (!strcmp(av[1], "rr"))
+ mod = _g_dn_rr;
+ else if (!strcmp(av[1], "wf2qp"))
+ mod = _g_dn_wf2qp;
+ else if (!strcmp(av[1], "fifo"))
+ mod = _g_dn_fifo;
+ else if (!strcmp(av[1], "qfq"))
+ mod = _g_dn_qfq;
+#ifdef WITH_KPS
+ else if (!strcmp(av[1], "kps"))
+ mod = _g_dn_kps;
+#endif
+ else
+ mod = NULL;
+ c->name = mod ? mod->name : "NULL";
+ DX(3, "using scheduler %s", c->name);
+ } else if (!strcmp(*av, "-len")) {
+ c->lmin = getnum(av[1], NULL, av[0]);
+ c->lmax = c->lmin;
+ DX(3, "setting max to %d", c->th_max);
+ } else if (!strcmp(*av, "-burst")) {
+ c->maxburst = getnum(av[1], NULL, av[0]);
+ DX(3, "setting max to %d", c->th_max);
+ } else if (!strcmp(*av, "-qmax")) {
+ c->th_max = getnum(av[1], NULL, av[0]);
+ DX(3, "setting max to %d", c->th_max);
+ } else if (!strcmp(*av, "-qmin")) {
+ c->th_min = getnum(av[1], NULL, av[0]);
+ DX(3, "setting min to %d", c->th_min);
+ } else if (!strcmp(*av, "-flows")) {
+ c->flows = getnum(av[1], NULL, av[0]);
+ DX(3, "setting flows to %d", c->flows);
+ } else if (!strcmp(*av, "-flowsets")) {
+ parse_flowsets(c, av[1], 0);
+ DX(3, "setting flowsets to %d", c->flowsets);
+ } else {
+ D("option %s not recognised, ignore", *av);
+ }
+ ac -= 2; av += 2;
+ }
+ if (c->maxburst <= 0)
+ c->maxburst = 1;
+ if (c->loops <= 0)
+ c->loops = 1;
+ if (c->flows <= 0)
+ c->flows = 1;
+ if (c->flowsets <= 0)
+ c->flowsets = 1;
+ if (c->lmin <= 0)
+ c->lmin = 1;
+ if (c->lmax <= 0)
+ c->lmax = 1;
+ /* multiply by N */
+ if (c->th_min < 0)
+ c->th_min = c->flows * -c->th_min;
+ if (c->th_max < 0)
+ c->th_max = c->flows * -c->th_max;
+ if (c->th_max <= c->th_min)
+ c->th_max = c->th_min + 1;
+ if (mod) {
+ p = mod->p;
+ DX(3, "using module %s f %p p %p", mod->name, mod->f, mod->p);
+ DX(3, "modname %s ty %d", p->name, p->type);
+ c->enq = p->enqueue;
+ c->deq = p->dequeue;
+ c->si_len += p->si_datalen;
+ c->q_len += p->q_datalen;
+ c->schk_len += p->schk_datalen;
+ }
+ /* allocate queues, flowsets and one scheduler */
+ c->q = calloc(c->flows, c->q_len);
+ c->fs = calloc(c->flowsets, sizeof(struct dn_fsk));
+ c->si = calloc(1, c->si_len);
+ c->sched = calloc(c->flows, c->schk_len);
+ if (c->q == NULL || c->fs == NULL) {
+ D("error allocating memory for flows");
+ exit(1);
+ }
+ c->si->sched = c->sched;
+ if (p) {
+ if (p->config)
+ p->config(c->sched);
+ if (p->new_sched)
+ p->new_sched(c->si);
+ }
+ /* parse_flowsets links queues to their flowsets */
+ parse_flowsets(c, av[1], 1);
+ /* complete the work calling new_fsk */
+ for (i = 0; i < c->flowsets; i++) {
+ if (c->fs[i].fs.par[1] == 0)
+ c->fs[i].fs.par[1] = 1000; /* default pkt len */
+ c->fs[i].sched = c->sched;
+ if (p && p->new_fsk)
+ p->new_fsk(&c->fs[i]);
+ }
+
+ /* initialize the lists for the generator, and put
+ * all flows in the list for backlog = 0
+ */
+ for (i=0; i <= BACKLOG+5; i++)
+ INIT_LIST_HEAD(&c->ll[i]);
+
+ for (i = 0; i < c->flows; i++) {
+ struct dn_queue *q = FI2Q(c, i);
+ if (q->fs == NULL)
+ q->fs = &c->fs[0]; /* XXX */
+ q->_si = c->si;
+ if (p && p->new_queue)
+ p->new_queue(q);
+ INIT_LIST_HEAD(&q->ni.h);
+ list_add_tail(&q->ni.h, &c->ll[0]);
+ }
+ c->llmask = 1;
+ return 0;
+}
+
+
+int
+main(int ac, char *av[])
+{
+ struct cfg_s c;
+ struct timeval end;
+ double ll;
+ int i;
+ char msg[40];
+
+ bzero(&c, sizeof(c));
+ c.ac = ac;
+ c.av = av;
+ init(&c);
+ gettimeofday(&c.time, NULL);
+ mainloop(&c);
+ gettimeofday(&end, NULL);
+ end.tv_sec -= c.time.tv_sec;
+ end.tv_usec -= c.time.tv_usec;
+ if (end.tv_usec < 0) {
+ end.tv_usec += 1000000;
+ end.tv_sec--;
+ }
+ c.time = end;
+ ll = end.tv_sec*1000000 + end.tv_usec;
+ ll *= 1000; /* convert to nanoseconds */
+ ll /= c._enqueue;
+ sprintf(msg, "1::%d", c.flows);
+ D("%-8s n %d %d time %d.%06d %8.3f qlen %d %d flows %s drops %d",
+ c.name, c._enqueue, c.loops,
+ (int)c.time.tv_sec, (int)c.time.tv_usec, ll,
+ c.th_min, c.th_max,
+ c.fs_config ? c.fs_config : msg, c.drop);
+ dump(&c);
+ DX(1, "done ac %d av %p", ac, av);
+ for (i=0; i < ac; i++)
+ DX(1, "arg %d %s", i, av[i]);
+ return 0;
+}
+
+/*
+ * The controller decides whether in this iteration we should send
+ * (the packet is in c->tosend) and/or receive (flag c->can_dequeue)
+ */
+static void
+controller(struct cfg_s *c)
+{
+ struct mbuf *m;
+ struct dn_fs *fs;
+ int flow_id;
+
+ /* histeresis between max and min */
+ if (c->state == 0 && c->pending >= c->th_max)
+ c->state = 1;
+ else if (c->state == 1 && c->pending <= c->th_min)
+ c->state = 0;
+ ND(1, "state %d pending %2d", c->state, c->pending);
+ c->can_dequeue = c->state;
+ c->tosend = NULL;
+ if (c->state)
+ return;
+
+ if (1) {
+ int i;
+ struct dn_queue *q;
+ struct list_head *h;
+
+ i = ffs(c->llmask) - 1;
+ if (i < 0) {
+ DX(2, "no candidate");
+ c->can_dequeue = 1;
+ return;
+ }
+ h = &c->ll[i];
+ ND(1, "backlog %d p %p prev %p next %p", i, h, h->prev, h->next);
+ q = list_first_entry(h, struct dn_queue, ni.h);
+ list_del(&q->ni.h);
+ flow_id = Q2FI(c, q);
+ DX(2, "extracted flow %p %d backlog %d", q, flow_id, i);
+ if (list_empty(h)) {
+ ND(2, "backlog %d empty", i);
+ c->llmask &= ~(1<<i);
+ }
+ ND(1, "before %d p %p prev %p next %p", i+1, h+1, h[1].prev, h[1].next);
+ list_add_tail(&q->ni.h, h+1);
+ ND(1, " after %d p %p prev %p next %p", i+1, h+1, h[1].prev, h[1].next);
+ if (i < BACKLOG) {
+ ND(2, "backlog %d full", i+1);
+ c->llmask |= 1<<(1+i);
+ }
+ fs = &q->fs->fs;
+ c->cur_fs = q->fs - c->fs;
+ fs->cur = flow_id;
+ } else {
+ /* XXX this does not work ? */
+ /* now decide whom to send the packet, and the length */
+ /* lookup in the flow table */
+ if (c->cur_y >= c->max_y) { /* handle wraparound */
+ c->cur_y = 0;
+ c->cur_fs = 0;
+ }
+ fs = &c->fs[c->cur_fs].fs;
+ flow_id = fs->cur++;
+ if (fs->cur >= fs->next_flow)
+ fs->cur = fs->first_flow;
+ c->cur_y++;
+ if (c->cur_y >= fs->next_y)
+ c->cur_fs++;
+ }
+
+ /* construct a packet */
+ if (c->freelist) {
+ m = c->tosend = c->freelist;
+ c->freelist = c->freelist->m_nextpkt;
+ } else {
+ m = c->tosend = calloc(1, sizeof(struct mbuf));
+ }
+ if (m == NULL)
+ return;
+
+ m->cfg = c;
+ m->m_nextpkt = NULL;
+ m->m_pkthdr.len = fs->par[1]; // XXX maxlen
+ m->flow_id = flow_id;
+
+ ND(2,"y %6d flow %5d fs %3d weight %4d len %4d",
+ c->cur_y, m->flow_id, c->cur_fs,
+ fs->par[0], m->m_pkthdr.len);
+
+}
+
+/*
+Packet allocation:
+to achieve a distribution that matches weights, for each X=w/lmax class
+we should generate a number of packets proportional to Y = X times the number
+of flows in the class.
+So we construct an array with the cumulative distribution of Y's,
+and use it to identify the flow via inverse mapping (if the Y's are
+not too many we can use an array for the lookup). In practice,
+each flow will have X entries [virtually] pointing to it.
+
+*/
--- /dev/null
+#!/bin/sh
+# this script execute N times the command CMD
+# collecting the memory usage on a file.
+# The value of the Dirty memory should not increase
+# between tests.
+
+BASE_NAME=ipfw_r5808_
+N=10000
+CMD1="/sbin/insmod ../dummynet2/ipfw_mod.ko"
+CMD2="/sbin/rmmod ipfw_mod"
+
+# main
+# remove any previous loaded module
+/sbin/rmmod ipfw_mod
+
+# pre
+
+for n in `seq $N`; do
+ $CMD1
+ $CMD2
+ [ $n = 10 ] && cat /proc/meminfo > /tmp/${BASE_NAME}_${n}
+ [ $n = 100 ] && cat /proc/meminfo > /tmp/${BASE_NAME}_${n}
+ [ $n = 1000 ] && cat /proc/meminfo > /tmp/${BASE_NAME}_${n}
+done;
+
+# post
--- /dev/null
+/*
+ * $Id: mylist.h 5626 2010-03-04 21:55:22Z luigi $
+ *
+ * linux-like bidirectional lists
+ */
+
+#ifndef _MYLIST_H
+#define _MYLIST_H
+struct list_head {
+ struct list_head *prev, *next;
+};
+
+#define INIT_LIST_HEAD(l) do { (l)->prev = (l)->next = (l); } while (0)
+#define list_empty(l) ( (l)->next == l )
+static inline void
+__list_add(struct list_head *o, struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = o;
+ o->next = next;
+ o->prev = prev;
+ prev->next = o;
+}
+
+static inline void
+list_add_tail(struct list_head *o, struct list_head *head)
+{
+ __list_add(o, head->prev, head);
+}
+
+#define list_first_entry(pL, ty, member) \
+ (ty *)((char *)((pL)->next) - offsetof(ty, member))
+
+static inline void
+__list_del(struct list_head *prev, struct list_head *next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+static inline void
+list_del(struct list_head *entry)
+{
+ ND("called on %p", entry);
+ __list_del(entry->prev, entry->next);
+ entry->next = entry->prev = NULL;
+}
+
+#endif /* _MYLIST_H */
--- /dev/null
+/*-
+ * Copyright (c) 1998-2002,2010 Luigi Rizzo, Universita` di Pisa
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Userland code for testing binary heaps and hash tables
+ *
+ * $Id: test_dn_heap.c 5626 2010-03-04 21:55:22Z luigi $
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+
+#include <stdio.h>
+#include <strings.h>
+#include <stdlib.h>
+
+#include "dn_heap.h"
+#define log(x, arg...) fprintf(stderr, ## arg)
+#define panic(x...) fprintf(stderr, ## x), exit(1)
+
+#include <string.h>
+
+struct x {
+ struct x *ht_link;
+ char buf[0];
+};
+
+uint32_t hf(uintptr_t key, int flags, void *arg)
+{
+ return (flags & DNHT_KEY_IS_OBJ) ?
+ ((struct x *)key)->buf[0] : *(char *)key;
+}
+
+int matchf(void *obj, uintptr_t key, int flags, void *arg)
+{
+ char *s = (flags & DNHT_KEY_IS_OBJ) ?
+ ((struct x *)key)->buf : (char *)key;
+ return (strcmp(((struct x *)obj)->buf, s) == 0);
+}
+
+void *newfn(uintptr_t key, int flags, void *arg)
+{
+ char *s = (char *)key;
+ struct x *p = malloc(sizeof(*p) + 1 + strlen(s));
+ if (p)
+ strcpy(p->buf, s);
+ return p;
+}
+
+char *strings[] = {
+ "undici", "unico", "doppio", "devoto",
+ "uno", "due", "tre", "quattro", "cinque", "sei",
+ "uno", "due", "tre", "quattro", "cinque", "sei",
+ NULL,
+};
+
+int doprint(void *_x, void *arg)
+{
+ struct x *x = _x;
+ printf("found element <%s>\n", x->buf);
+ return (int)arg;
+}
+
+static void
+test_hash()
+{
+ char **p;
+ struct dn_ht *h;
+ uintptr_t x = 0;
+ uintptr_t x1 = 0;
+
+ /* first, find and allocate */
+ h = dn_ht_init(NULL, 10, 0, hf, matchf, newfn);
+
+ for (p = strings; *p; p++) {
+ dn_ht_find(h, (uintptr_t)*p, DNHT_INSERT, NULL);
+ }
+ dn_ht_scan(h, doprint, 0);
+ printf("/* second -- find without allocate */\n");
+ h = dn_ht_init(NULL, 10, 0, hf, matchf, NULL);
+ for (p = strings; *p; p++) {
+ void **y = newfn((uintptr_t)*p, 0, NULL);
+ if (x == 0)
+ x = (uintptr_t)y;
+ else {
+ if (x1 == 0)
+ x1 = (uintptr_t)*p;
+ }
+ dn_ht_find(h, (uintptr_t)y, DNHT_INSERT | DNHT_KEY_IS_OBJ, NULL);
+ }
+ dn_ht_scan(h, doprint, 0);
+ printf("remove %p gives %p\n", (void *)x,
+ dn_ht_find(h, x, DNHT_KEY_IS_OBJ | DNHT_REMOVE, NULL));
+ printf("remove %p gives %p\n", (void *)x,
+ dn_ht_find(h, x, DNHT_KEY_IS_OBJ | DNHT_REMOVE, NULL));
+ printf("remove %p gives %p\n", (void *)x,
+ dn_ht_find(h, x1, DNHT_REMOVE, NULL));
+ printf("remove %p gives %p\n", (void *)x,
+ dn_ht_find(h, x1, DNHT_REMOVE, NULL));
+ dn_ht_scan(h, doprint, 0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct dn_heap h;
+ int i, n, n2, n3;
+
+ test_hash();
+ return 0;
+
+ /* n = elements, n2 = cycles */
+ n = (argc > 1) ? atoi(argv[1]) : 0;
+ if (n <= 0 || n > 1000000)
+ n = 100;
+ n2 = (argc > 2) ? atoi(argv[2]) : 0;
+ if (n2 <= 0)
+ n = 1000000;
+ n3 = (argc > 3) ? atoi(argv[3]) : 0;
+ bzero(&h, sizeof(h));
+ heap_init(&h, n, -1);
+ while (n2-- > 0) {
+ uint64_t prevk = 0;
+ for (i=0; i < n; i++)
+ heap_insert(&h, n3 ? n-i: random(), (void *)(100+i));
+
+ for (i=0; h.elements > 0; i++) {
+ uint64_t k = h.p[0].key;
+ if (k < prevk)
+ panic("wrong sequence\n");
+ prevk = k;
+ if (0)
+ printf("%d key %llu, val %p\n",
+ i, h.p[0].key, h.p[0].object);
+ heap_extract(&h, NULL);
+ }
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * $Id: test_dn_sched.c 5626 2010-03-04 21:55:22Z luigi $
+ *
+ * library functions for userland testing of dummynet schedulers
+ */
+
+#include "dn_test.h"
+
+void
+m_freem(struct mbuf *m)
+{
+ printf("free %p\n", m);
+}
+
+int
+dn_sched_modevent(module_t mod, int cmd, void *arg)
+{
+ return 0;
+}
+
+void
+dn_free_pkts(struct mbuf *m)
+{
+ struct mbuf *x;
+ while ( (x = m) ) {
+ m = m->m_nextpkt;
+ m_freem(x);
+ }
+}
+
+int
+dn_delete_queue(void *_q, void *do_free)
+{
+ struct dn_queue *q = _q;
+ if (q->mq.head)
+ dn_free_pkts(q->mq.head);
+ free(q);
+ return 0;
+}
+
+/*
+ * This is a simplified function for testing purposes, which does
+ * not implement statistics or random loss.
+ * Enqueue a packet in q, subject to space and queue management policy
+ * (whose parameters are in q->fs).
+ * Update stats for the queue and the scheduler.
+ * Return 0 on success, 1 on drop. The packet is consumed anyways.
+ */
+int
+dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop)
+{
+ if (drop)
+ goto drop;
+ if (q->ni.length >= 200)
+ goto drop;
+ mq_append(&q->mq, m);
+ q->ni.length++;
+ q->ni.tot_bytes += m->m_pkthdr.len;
+ return 0;
+
+drop:
+ q->ni.drops++;
+ return 1;
+}
+
+int
+ipdn_bound_var(int *v, int dflt, int lo, int hi, const char *msg)
+{
+ if (*v < lo) {
+ *v = dflt;
+ } else if (*v > hi) {
+ *v = hi;
+ }
+ return *v;
+}
+
+#ifndef __FreeBSD__
+int
+fls(int mask)
+{
+ int bit;
+
+ if (mask == 0)
+ return (0);
+ for (bit = 1; mask != 1; bit++)
+ mask = (unsigned int)mask >> 1;
+ return (bit);
+}
+#endif
--- /dev/null
+#!/bin/bash
+
+# bash script to set a suitable environment to call MSVC's build
+# to build a 64-bit version of the kernel.
+
+#############################################################
+# edit theese variables to meet your configuration #
+# - DRIVE is the hard drive letter where DDK is installed #
+# - DDK is the path to the DDK's root directory #
+# - CYGDDK is the complete cygwin path to DDK #
+#############################################################
+if [ $# -ne 3 ]; then
+echo "invalid params" && exit 1
+fi
+DRIVE=$1
+DDK=$2
+CYGDDK=/cygdrive/c/${DDK}
+TARGETOS=$3
+
+if [ "$TARGETOS" = "wnet" ]; then
+export DDK_TARGET_OS=WinNET
+export _NT_TARGET_VERSION=0x502
+fi
+
+if [ "$TARGETOS" = "wlh" ]; then
+export DDK_TARGET_OS=WinLH
+export _NT_TARGET_VERSION=0x600
+fi
+
+if [ "$TARGETOS" = "win7" ]; then
+export DDK_TARGET_OS=Win7
+export _NT_TARGET_VERSION=0x601
+fi
+
+
+#############################################################
+# don't edit anything else below this point #
+#############################################################
+
+D=${DRIVE}${DDK}
+DB=${D}/bin
+DI=${D}/inc
+DL=${D}/lib
+
+
+export AMD64=1
+export ATL_INC_PATH=$DI
+export ATL_INC_ROOT=$DI
+export ATL_LIB_PATH=${DL}/atl/*
+export BASEDIR=$D
+export BUFFER_OVERFLOW_CHECKS=1
+export BUILD_ALLOW_COMPILER_WARNINGS=1
+export BUILD_ALT_DIR=chk_${TARGETOS}_AMD64
+export BUILD_DEFAULT="-ei -nmake -i -nosqm"
+export BUILD_DEFAULT_TARGETS="-amd64"
+export BUILD_MAKE_PROGRAM=nmake.exe
+export BUILD_MULTIPROCESSOR=1
+export BUILD_OPTIONS=" ~imca ~toastpkg"
+export COFFBASE_TXT_FILE=${DB}/coffbase.txt
+export CPU=AMD64
+export CRT_INC_PATH=${DI}/crt
+export CRT_LIB_PATH=${DL}/crt/*
+export DDKBUILDENV=chk
+export DDK_INC_PATH=${DI}/ddk
+export DDK_LIB_DEST=${DL}/${TARGETOS}
+export DDK_LIB_PATH=${DL}/${TARGETOS}/*
+export DEPRECATE_DDK_FUNCTIONS=1
+export DRIVER_INC_PATH=${DI}/ddk
+export HALKIT_INC_PATH=${DI}/ddk
+export HALKIT_LIB_PATH=${DL}/${TARGETOS}/*
+export IFSKIT_INC_PATH=${DI}/ddk
+export IFSKIT_LIB_DEST=${DL}/${TARGETOS}
+export IFSKIT_LIB_PATH=${DL}/${TARGETOS}/*
+export Include=${DI}/api
+export KMDF_INC_PATH=${DI}/wdf/kmdf
+export KMDF_LIB_PATH=${DL}/wdf/kmdf/*
+export LANGUAGE_NEUTRAL=0
+export Lib=${DL}
+export LINK_LIB_IGNORE=4198
+export MFC_INC_PATH=${DI}/mfc42
+export MFC_LIB_PATH=${DL}/mfc/*
+export MSC_OPTIMIZATION="/Od /Oi"
+export NEW_CRTS=1
+export NO_BINPLACE=TRUE
+export NO_BROWSER_FILE=TRUE
+export NTDBGFILES=1
+export NTDEBUG=ntsd
+export NTDEBUGTYPE=both
+export NTMAKEENV=${DB}
+export OAK_INC_PATH=${DI}/api
+
+export PATH="${CYGDDK}/bin/amd64:${CYGDDK}/tools/sdv/bin:${CYGDDK}/tools/pfd/bin/bin/x86_AMD64\
+:${CYGDDK}/bin/SelfSign:${CYGDDK}/bin/x86/amd64:${CYGDDK}/bin/x86\
+:${CYGDDK}/tools/pfd/bin/bin/AMD64:${CYGDDK}/tools/tracing/amd64:$PATH"
+
+export PATHEXT=".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC"
+export PROJECT_ROOT=${D}/src
+export PUBLIC_ROOT=${D}
+export RAZZLETOOLPATH=${DB}
+export RCNOFONTMAP=1
+export SDK_INC_PATH=${DI}/api
+export SDK_LIB_DEST=${DL}/${TARGETOS}
+export SDK_LIB_PATH=${DL}/${TARGETOS}/*
+export SDV=${D}/tools/sdv
+export separate_object_root=FALSE
+export TEMP=tmpbuild
+export TMP=tmpbuild
+export UMDF_INC_PATH=${DI}/wdf/umdf
+export USE_OBJECT_ROOT=1
+export WDM_INC_PATH=${DI}/ddk
+export WPP_CONFIG_PATH=${DB}/wppconfig
+export _AMD64bit=true
+export _BUILDARCH=AMD64
+export _BuildType=chk
+export _NTDRIVE=${DRIVE}
+export _NTROOT=${DDK}
+cd dummynet2-64 && build -cefg
+cp objchk_${TARGETOS}_amd64/amd64/ipfw.sys ../binary64/ipfw.sys
\ No newline at end of file
--- /dev/null
+; version section\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = NetService\r
+ClassGUID = {4D36E974-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Unipi%\r
+DriverVer = 26/02/2010,3.0.0.1\r
+\r
+; manufacturer section\r
+[Manufacturer]\r
+%Unipi% = UNIPI,NTx86,NTamd64\r
+\r
+; control flags section\r
+; optional, unused in netipfw.inf inf, used in netipfw_m.inf\r
+[ControlFlags]\r
+\r
+; models section\r
+[UNIPI] ; Win2k\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+[UNIPI.NTx86] ;For WinXP and later\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+[UNIPI.NTamd64] ;For x64\r
+%Desc% = Ipfw.ndi, unipi_ipfw\r
+\r
+; ddinstall section\r
+[Ipfw.ndi]\r
+AddReg = Ipfw.ndi.AddReg, Ipfw.AddReg\r
+Characteristics = 0x4410 ; NCF_FILTER | NCF_NDIS_PROTOCOL !--Filter Specific--!!\r
+CopyFiles = Ipfw.Files.Sys\r
+CopyInf = netipfw_m.inf\r
+\r
+; remove section\r
+[Ipfw.ndi.Remove]\r
+DelFiles = Ipfw.Files.Sys\r
+\r
+;ddinstall.services section\r
+[Ipfw.ndi.Services]\r
+AddService = Ipfw,,Ipfw.AddService\r
+\r
+[Ipfw.AddService]\r
+DisplayName = %ServiceDesc%\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\ipfw.sys\r
+AddReg = Ipfw.AddService.AddReg\r
+\r
+[Ipfw.AddService.AddReg]\r
+\r
+;file copy related sections\r
+[SourceDisksNames]\r
+1=%DiskDescription%,"",,\r
+\r
+[SourceDisksFiles]\r
+ipfw.sys=1\r
+\r
+[DestinationDirs]\r
+DefaultDestDir = 12\r
+Ipfw.Files.Sys = 12 ; %windir%\System32\drivers\r
+\r
+; ddinstall->copyfiles points here\r
+[Ipfw.Files.Sys]\r
+ipfw.sys,,,2\r
+\r
+; ddinstall->addreg points here\r
+[Ipfw.ndi.AddReg]\r
+HKR, Ndi, HelpText, , %HELP% ; this is displayed at the bottom of the General page of the Connection Properties dialog box\r
+HKR, Ndi, FilterClass, , failover\r
+HKR, Ndi, FilterDeviceInfId, , unipi_ipfwmp\r
+HKR, Ndi, Service, , Ipfw\r
+HKR, Ndi\Interfaces, UpperRange, , noupper\r
+HKR, Ndi\Interfaces, LowerRange, , nolower\r
+HKR, Ndi\Interfaces, FilterMediaTypes, , "ethernet, tokenring, fddi, wan"\r
+\r
+;strings section\r
+[Strings]\r
+Unipi = "Unipi"\r
+DiskDescription = "Ipfw Driver Disk"\r
+Desc = "ipfw+dummynet"\r
+HELP = "This is ipfw and dummynet network emulator, developed by unipi.it"\r
+ServiceDesc = "ipfw service"\r
--- /dev/null
+; version section\r
+[Version]\r
+Signature = "$Windows NT$"\r
+Class = Net\r
+ClassGUID = {4D36E972-E325-11CE-BFC1-08002BE10318}\r
+Provider = %Unipi%\r
+DriverVer = 26/02/2010,3.0.0.1\r
+\r
+; control flags section\r
+; optional, unused in netipfw.inf inf, used in netipfw_m.inf\r
+[ControlFlags]\r
+ExcludeFromSelect = unipi_ipfwmp\r
+\r
+; destinationdirs section, optional\r
+[DestinationDirs]\r
+DefaultDestDir=12\r
+; No files to copy \r
+\r
+; manufacturer section\r
+[Manufacturer]\r
+%Unipi% = UNIPI,NTx86,NTamd64\r
+\r
+; models section\r
+[UNIPI] ; Win2k\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+[UNIPI.NTx86] ;For WinXP and later\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+[UNIPI.NTamd64] ;For x64\r
+%Desc% = IpfwMP.ndi, unipi_ipfwmp\r
+\r
+; ddinstall section\r
+[IpfwMP.ndi]\r
+AddReg = IpfwMP.ndi.AddReg\r
+Characteristics = 0x29 ;NCF_NOT_USER_REMOVABLE | NCF_VIRTUAL | NCF_HIDDEN\r
+\r
+; ddinstall->addreg points here\r
+[IpfwMP.ndi.AddReg]\r
+HKR, Ndi, Service, 0, IpfwMP\r
+\r
+;ddinstall.services section\r
+[IpfwMP.ndi.Services]\r
+AddService = IpfwMP,0x2, IpfwMP.AddService\r
+\r
+[IpfwMP.AddService]\r
+ServiceType = 1 ;SERVICE_KERNEL_DRIVER\r
+StartType = 3 ;SERVICE_DEMAND_START\r
+ErrorControl = 1 ;SERVICE_ERROR_NORMAL\r
+ServiceBinary = %12%\ipfw.sys\r
+AddReg = IpfwMP.AddService.AddReg\r
+\r
+[IpfwMP.AddService.AddReg]\r
+; None\r
+\r
+[Strings]\r
+Unipi = "Unipi"\r
+Desc = "Ipfw Miniport"
\ No newline at end of file
--- /dev/null
+TARGETNAME=ipfw\r
+TARGETTYPE=DRIVER\r
+\r
+C_DEFINES=$(C_DEFINES) -DNDIS_MINIPORT_DRIVER -DNDIS_WDM=1\r
+\r
+MSC_WARNING_LEVEL=/W2\r
+\r
+# The driver is built in the XP or .NET build environment\r
+# So let us build NDIS 5.1 version.\r
+C_DEFINES=$(C_DEFINES) -DNDIS51_MINIPORT=1\r
+C_DEFINES=$(C_DEFINES) -DNDIS51=1\r
+\r
+# Enable dummynet preprocessing macros\r
+C_DEFINES=$(C_DEFINES) /D_WIN32 /DMODULENAME=Ipfw /D_BSD_SOURCE /DKERNEL_MODULE /D_KERNEL /DKLD_MODULE /D__BSD_VISIBLE /DIPFIREWALL_DEFAULT_TO_ACCEPT /D__LITTLE_ENDIAN /DSYSCTL_NODE /DEMULATE_SYSCTL -FIwinmissing.h -FImissing.h -FI../glue.h /DWIN32_LEAN_AND_MEAN=1\r
+\r
+TARGETLIBS=$(DDK_LIB_PATH)\ndis.lib\r
+\r
+INCLUDES=include; include_e\r
+\r
+SOURCES= ip_fw2.c ip_fw_pfil.c ip_fw_sockopt.c ip_fw_dynamic.c ip_fw_table.c ip_fw_log.c radix.c in_cksum.c ip_dummynet.c ip_dn_io.c ip_dn_glue.c dn_heap.c dn_sched_fifo.c dn_sched_wf2q.c dn_sched_rr.c dn_sched_qfq.c dn_sched_prio.c ipfw2_mod.c bsd_compat.c md_win.c miniport.c protocol.c passthru.c debug.c
\ No newline at end of file