-# $Id$
+# $Id: Makefile 8654 2011-05-23 08:39:50Z marta $
#
# Top level makefile for building ipfw kernel and userspace.
# You can run it manually or also under the Planetlab build.
--exclude tcc-0.9.25-bsd \
--exclude original_passthru \
--exclude ipfw3.diff --exclude add_rules \
+ --exclude test --exclude test_ \
ipfw3 )
bindist:
@echo "and commit with:"
@echo "(cd /tmp/pl-tmp/pl/trunk; svn ci -m 'Update from the mail ipfw repo.')"
+openwrt_release:
+ # create a temporary directory
+ $(eval TMPDIR := $(shell mktemp -d -p /tmp/ ipfw3_openwrt_XXXXX))
+ # create the source destination directory
+ $(eval IPFWDIR := ipfw3-$(DATE))
+ $(eval DSTDIR := $(TMPDIR)/$(IPFWDIR))
+ mkdir $(DSTDIR)
+ # copy the package, clean objects and svn info
+ cp -r ./ipfw ./dummynet2 glue.h Makefile ./configuration README $(DSTDIR)
+ (cd $(DSTDIR); make -s distclean; find . -name .svn | xargs rm -rf)
+ (cd $(TMPDIR); tar czf $(IPFWDIR).tar.gz $(IPFWDIR))
+
+ # create the port files in /tmp/ipfw3-port
+ $(eval PORTDIR := $(TMPDIR)/ipfw3)
+ mkdir -p $(PORTDIR)/patches
+ # generate the Makefile, PKG_VERSION and PKG_MD5SUM
+ md5sum $(DSTDIR).tar.gz | cut -d ' ' -f 1 > $(TMPDIR)/md5sum
+ cat ./OPENWRT/Makefile | \
+ sed s/PKG_VERSION:=/PKG_VERSION:=$(DATE)/ | \
+ sed s/PKG_MD5SUM:=/PKG_MD5SUM:=`cat $(TMPDIR)/md5sum`/ \
+ > $(PORTDIR)/Makefile
+
+ @echo ""
+ @echo "The openwrt port is in $(TMPDIR)/ipfw3-port"
+ @echo "The source file should be copied to the public server:"
+ @echo "scp $(DSTDIR).tar.gz marta@info.iet.unipi.it:~marta/public_html/dummynet"
+ @echo "after this the temporary directory $(TMPDIR) can be removed."
+
install:
#
-# $Id: README 6070 2010-04-15 11:58:21Z marta $
+# $Id: README 8977 2011-07-04 11:47:59Z luigi $
#
This directory contains a port of ipfw and dummynet to Linux/OpenWrt
=================== BUILD INSTRUCTIONS ==========================
-***** Windows XP ******
+***** Windows (XPi, Windows7) ******
You can find a pre-built version in the binary/ subdirectory.
To build your own version of the package you need:
- - MSVC DDK available from ...
- http://www.microsoft.com/whdc/DevTools/WDK/WDKpkg.mspx
+ - MSVC DDK available from
+ http://msdn.microsoft.com/en-us/windows/hardware/gg487463.aspx
- optionally, DbgView if you want to see diagnostic
http://technet.microsoft.com/en-us/sysinternals/bb896647.aspx
- cygwin, http://www.cygwin.com/
with base packages, make, c compiler, possibly an editor
- and subversion.
+ and subversion (suggest: tortoiseSvn)
Edit Makefile in the root directory, and set configuration
variables to match your current system (hard drive
ipfw.sys (an NDIS intermediate filter driver)
dummynet.inf and dummynet_m.inf (installer files)
+ Cross compilation of the userland side under FreeBSD is possible with
+ gmake TCC=`pwd`/tcc-0.9.25-bsd/win32 CC=`pwd`/tcc-0.9.25-bsd/win32/bin/wintcc
+ (wintcc is a custom version of tcc which produces Windows code)
+
***** Windows crosscompilation for 64 bit using DDK ******
Edit root directory's Makefile and set target
operating system
sudo yum -y install subversion rpm-build rpm-devel m4 redhat-rpm-config make gcc
# new build installation requires the gnupg package
sudo yum -y install gnupg
+ # the linux kernel and the ipfw source can be fetched by git
+ sudo yum -y install git
# create and move to a work directory
mkdir -p test
# extract a planetlab distribution to directory XYZ
- (cd test; svn co http://svn.planet-lab.org/svn/build/trunk XYZ)
- # copy the planetlab/*mk files here, overriding existing ones
- cp planetlab/*mk test/XYZ
+ (cd test; git clone git://git.onelab.eu/build ./XYZ)
# download the specfiles and do some patching.
# Results are into SPEC/ (takes 5 minutes)
- (cd test/XYZ; make stage1=true PLDISTRO=planetlab )
+ (cd test/XYZ; make stage1=true PLDISTRO=onelab)
# Building the slice code is fast, the root code takes longer
# as it needs to rebuild the whole kernel
- (cd test/XYZ; sudo make ipfwslice ipfwroot)
+ (cd test/XYZ; sudo make ipfwslice PLDISTRO=onelab)
+ (cd test/XYZ; sudo make ipfwroot PLDISTRO=onelab)
The kernel dependency phase is a bit time consuming, but does not
need to be redone if we are changing the ipfw sources only.
--- /dev/null
+This directorty contains some ipfw configurations and a scripts
+to safely change the firewall rules.
+
+The firewall configuration comes from the FreeBSD initial script.
+The change_rules_linux.sh allows to change the ipfw rules and
+in case os a misconfiguration which prevents to reach the remote
+host, to restore the old ruleset.
+
+To configure the firewall behavior, edit the ipfw.conf file and
+execute the ./change_rules_linux.sh script.
+
+The ipfw program executable should be located in /sbin (XXX)
+
+XXX seems we use something which is not compatible with dash
--- /dev/null
+#!/bin/sh
+#
+# Copyright (c) 2000 Alexandre Peixoto
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: src/share/examples/ipfw/change_rules.sh,v 1.6 2003/09/07 07:52:56 jmg Exp $
+
+# Change ipfw(8) rules with safety guarantees for remote operation
+#
+# Invoke this script to edit ${firewall_script}. It will call ${EDITOR},
+# or vi(1) if the environment variable is not set, for you to edit
+# ${firewall_script}, ask for confirmation, and then run
+# ${firewall_script}. You can then examine the output of ipfw list and
+# confirm whether you want the new version or not.
+#
+# If no answer is received in 30 seconds, the previous
+# ${firewall_script} is run, restoring the old rules (this assumes ipfw
+# flush is present in it).
+#
+# If the new rules are confirmed, they'll replace ${firewall_script} and
+# the previous ones will be copied to ${firewall_script}.{date}. Mail
+# will also be sent to root with a unified diff of the rule change.
+#
+# Unapproved rules are kept in ${firewall_script}.new, and you are
+# offered the option of changing them instead of the present rules when
+# you call this script.
+#
+# This script could be improved by using version control
+# software.
+
+# XXX on linux /etc/rc.conf defines:
+# firewall_type and firewall_script
+
+if [ -r /etc/defaults/rc.conf ]; then
+ . /etc/defaults/rc.conf
+ source_rc_confs
+elif [ -r /etc/rc.conf ]; then
+ . /etc/rc.conf
+fi
+
+EDITOR=${EDITOR:-/usr/bin/vi}
+PAGER=${PAGER:-/usr/bin/more}
+
+# on linux the default mktemp invocation behavior
+# is different, we should change the temporary file creation
+tempfoo=`basename $0`
+#TMPFILE=`mktemp -t ${tempfoo}` || exit 1
+TMPFILE=`mktemp -t ${tempfoo}.XXXXX` || exit 1
+
+get_yes_no() {
+ while true
+ do
+ echo -n "$1 (Y/N) ? "
+ read -t 30 a
+ if [ $? != 0 ]; then
+ a="No";
+ return;
+ fi
+ case $a in
+ [Yy]) a="Yes";
+ return;;
+ [Nn]) a="No";
+ return;;
+ *);;
+ esac
+ done
+}
+
+restore_rules() {
+ nohup sh ${firewall_script} </dev/null >/dev/null 2>&1
+ rm ${TMPFILE}
+ exit 1
+}
+
+case "${firewall_type}" in
+[Cc][Ll][Ii][Ee][Nn][Tt]|\
+[Cc][Ll][Oo][Ss][Ee][Dd]|\
+[Oo][Pp][Ee][Nn]|\
+[Ss][Ii][Mm][Pp][Ll][Ee]|\
+[Uu][Nn][Kk][Nn][Oo][Ww][Nn])
+ edit_file="${firewall_script}"
+ rules_edit=no
+ ;;
+*)
+ if [ -r "${firewall_type}" ]; then
+ edit_file="${firewall_type}"
+ rules_edit=yes
+ fi
+ ;;
+esac
+
+if [ -f ${edit_file}.new ]; then
+ get_yes_no "A new rules file already exists, do you want to use it"
+ [ $a = 'No' ] && cp ${edit_file} ${edit_file}.new
+else
+ cp ${edit_file} ${edit_file}.new
+fi
+
+trap restore_rules SIGHUP
+
+${EDITOR} ${edit_file}.new
+
+get_yes_no "Do you want to install the new rules"
+
+[ $a = 'No' ] && exit 1
+
+cat <<!
+The rules will be changed now. If the message 'Type y to keep the new
+rules' does not appear on the screen or the y key is not pressed in 30
+seconds, the original rules will be restored.
+The TCP/IP connections might be broken during the change. If so, restore
+the ssh/telnet connection being used.
+!
+
+if [ ${rules_edit} = yes ]; then
+ nohup sh ${firewall_script} ${firewall_type}.new \
+ < /dev/null > ${TMPFILE} 2>&1
+else
+ nohup sh ${firewall_script}.new \
+ < /dev/null > ${TMPFILE} 2>&1
+fi
+sleep 2;
+get_yes_no "Would you like to see the resulting new rules"
+[ $a = 'Yes' ] && ${PAGER} ${TMPFILE}
+get_yes_no "Type y to keep the new rules"
+[ $a != 'Yes' ] && restore_rules
+
+DATE=`date "+%Y%m%d%H%M"`
+cp ${edit_file} ${edit_file}.$DATE
+mv ${edit_file}.new ${edit_file}
+cat <<!
+The new rules are now installed. The previous rules have been preserved in
+the file ${edit_file}.$DATE
+!
+diff -F "^# .*[A-Za-z]" -u ${edit_file}.$DATE ${edit_file} \
+ | mail -s "`hostname` Firewall rule change" root
+rm ${TMPFILE}
+exit 0
--- /dev/null
+#!/bin/sh
+#
+# marta
+# linux wrapper for the FreeBSD change rules program
+# This file load the linux configuration and calls the
+# original change rules program
+
+if [ -r ./ipfw.conf ]; then
+ . ./ipfw.conf
+fi
+
+. ./change_rules.sh
--- /dev/null
+# ipfw and dummynet configuration file for linux
+# XXX TO BE TESTED ON LINUX
+
+# The firewall_type variable is used to configure the firewall behavior.
+# A detailed description on how a following type works is in rc.firewall
+#
+# open - will allow anyone in
+# client - will try to protect just this machine
+# simple - will try to protect a whole network
+# closed - totally disables IP services except via lo0 interface
+# workstation - will try to protect just this machine using statefull
+# firewalling. See below for rc.conf variables used
+# UNKNOWN - disables the loading of firewall rules.
+# filename - will load the rules in the given filename (full path required)
+
+# firewall_type=open
+
+# The following file is an example on how to use a filename to define a firewall
+# and how to configure a simple dummynet pipe to ... XXX shape traffic... etc...
+firewall_type=/home/marta/SVN/ports-luigi/dummynet-branches/ipfw3/configuration/ipfw.rules
+
+# Environment variables expected by the change rules script
+EDITOR=/usr/bin/vi
+PAGER=/bin/more
+
+# The following variable should point to the rc.firewall script
+# XXX TEST
+#firewall_script=`echo "please edit the firewall_script variable in ipfw.conf"`;
+firewall_script="/home/marta/SVN/ports-luigi/dummynet-branches/ipfw3/configuration/rc.firewall"
--- /dev/null
+# This is a simple configuration file
+# add dummynet pipes and a firewall section
+
+# flush all rules ...
+# flush
+
+# dummynet configuration
+
+# firewall configuration
+add 1 allow all from any to any
+# ...
+add 65000 deny all from any to any
--- /dev/null
+#!/bin/sh -
+# Copyright (c) 1996 Poul-Henning Kamp
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD: src/etc/rc.firewall,v 1.52.4.1 2008/01/29 00:22:32 dougb Exp $
+#
+
+#
+# Setup system for ipfw(4) firewall service.
+#
+
+# Suck in the configuration variables.
+if [ -z "${source_rc_confs_defined}" ]; then
+ if [ -r /etc/defaults/rc.conf ]; then
+ . /etc/defaults/rc.conf
+ source_rc_confs
+ elif [ -r /etc/rc.conf ]; then
+ . /etc/rc.conf
+ fi
+fi
+
+############
+# Define the firewall type in /etc/rc.conf. Valid values are:
+# open - will allow anyone in
+# client - will try to protect just this machine
+# simple - will try to protect a whole network
+# closed - totally disables IP services except via lo0 interface
+# workstation - will try to protect just this machine using statefull
+# firewalling. See below for rc.conf variables used
+# UNKNOWN - disables the loading of firewall rules.
+# filename - will load the rules in the given filename (full path required)
+#
+# For ``client'' and ``simple'' the entries below should be customized
+# appropriately.
+
+############
+#
+# If you don't know enough about packet filtering, we suggest that you
+# take time to read this book:
+#
+# Building Internet Firewalls, 2nd Edition
+# Brent Chapman and Elizabeth Zwicky
+#
+# O'Reilly & Associates, Inc
+# ISBN 1-56592-871-7
+# http://www.ora.com/
+# http://www.oreilly.com/catalog/fire2/
+#
+# For a more advanced treatment of Internet Security read:
+#
+# Firewalls and Internet Security: Repelling the Wily Hacker, 2nd Edition
+# William R. Cheswick, Steven M. Bellowin, Aviel D. Rubin
+#
+# Addison-Wesley / Prentice Hall
+# ISBN 0-201-63466-X
+# http://www.pearsonhighered.com/
+# http://www.pearsonhighered.com/educator/academic/product/0,3110,020163466X,00.html
+#
+
+setup_loopback () {
+ ############
+ # Only in rare cases do you want to change these rules
+ #
+ ${fwcmd} add 100 pass all from any to any via lo0
+ ${fwcmd} add 200 deny all from any to 127.0.0.0/8
+ ${fwcmd} add 300 deny ip from 127.0.0.0/8 to any
+}
+
+if [ -n "${1}" ]; then
+ firewall_type="${1}"
+fi
+
+############
+# Set quiet mode if requested
+#
+case ${firewall_quiet} in
+[Yy][Ee][Ss])
+ fwcmd="/sbin/ipfw -q"
+ ;;
+*)
+ fwcmd="/sbin/ipfw"
+ ;;
+esac
+
+############
+# Flush out the list before we begin.
+#
+${fwcmd} -f flush
+
+setup_loopback
+
+############
+# Network Address Translation. All packets are passed to natd(8)
+# before they encounter your remaining rules. The firewall rules
+# will then be run again on each packet after translation by natd
+# starting at the rule number following the divert rule.
+#
+# For ``simple'' firewall type the divert rule should be put to a
+# different place to not interfere with address-checking rules.
+#
+case ${firewall_type} in
+[Oo][Pp][Ee][Nn]|[Cc][Ll][Ii][Ee][Nn][Tt])
+ case ${natd_enable} in
+ [Yy][Ee][Ss])
+ if [ -n "${natd_interface}" ]; then
+ ${fwcmd} add 50 divert natd ip4 from any to any via ${natd_interface}
+ fi
+ ;;
+ esac
+ case ${firewall_nat_enable} in
+ [Yy][Ee][Ss])
+ if [ -n "${firewall_nat_interface}" ]; then
+ ${fwcmd} nat 123 config if ${firewall_nat_interface} log
+ ${fwcmd} add 50 nat 123 ip4 from any to any via ${firewall_nat_interface}
+ fi
+ ;;
+ esac
+esac
+
+############
+# If you just configured ipfw in the kernel as a tool to solve network
+# problems or you just want to disallow some particular kinds of traffic
+# then you will want to change the default policy to open. You can also
+# do this as your only action by setting the firewall_type to ``open''.
+#
+# ${fwcmd} add 65000 pass all from any to any
+
+
+# Prototype setups.
+#
+case ${firewall_type} in
+[Oo][Pp][Ee][Nn])
+ ${fwcmd} add 65000 pass all from any to any
+ ;;
+
+[Cc][Ll][Ii][Ee][Nn][Tt])
+ ############
+ # This is a prototype setup that will protect your system somewhat
+ # against people from outside your own network.
+ ############
+
+ # set these to your network and netmask and ip
+ net="192.0.2.0"
+ mask="255.255.255.0"
+ ip="192.0.2.1"
+
+ # Allow any traffic to or from my own net.
+ ${fwcmd} add pass all from ${ip} to ${net}:${mask}
+ ${fwcmd} add pass all from ${net}:${mask} to ${ip}
+
+ # Allow TCP through if setup succeeded
+ ${fwcmd} add pass tcp from any to any established
+
+ # Allow IP fragments to pass through
+ ${fwcmd} add pass all from any to any frag
+
+ # Allow setup of incoming email
+ ${fwcmd} add pass tcp from any to me 25 setup
+
+ # Allow setup of outgoing TCP connections only
+ ${fwcmd} add pass tcp from me to any setup
+
+ # Disallow setup of all other TCP connections
+ ${fwcmd} add deny tcp from any to any setup
+
+ # Allow DNS queries out in the world
+ ${fwcmd} add pass udp from me to any 53 keep-state
+
+ # Allow NTP queries out in the world
+ ${fwcmd} add pass udp from me to any 123 keep-state
+
+ # Everything else is denied by default, unless the
+ # IPFIREWALL_DEFAULT_TO_ACCEPT option is set in your kernel
+ # config file.
+ ;;
+
+[Ss][Ii][Mm][Pp][Ll][Ee])
+ ############
+ # This is a prototype setup for a simple firewall. Configure this
+ # machine as a DNS and NTP server, and point all the machines
+ # on the inside at this machine for those services.
+ ############
+
+ # set these to your outside interface network and netmask and ip
+ oif="ed0"
+ onet="192.0.2.0"
+ omask="255.255.255.240"
+ oip="192.0.2.1"
+
+ # set these to your inside interface network and netmask and ip
+ iif="ed1"
+ inet="192.0.2.16"
+ imask="255.255.255.240"
+ iip="192.0.2.17"
+
+ # Stop spoofing
+ ${fwcmd} add deny all from ${inet}:${imask} to any in via ${oif}
+ ${fwcmd} add deny all from ${onet}:${omask} to any in via ${iif}
+
+ # Stop RFC1918 nets on the outside interface
+ ${fwcmd} add deny all from any to 10.0.0.0/8 via ${oif}
+ ${fwcmd} add deny all from any to 172.16.0.0/12 via ${oif}
+ ${fwcmd} add deny all from any to 192.168.0.0/16 via ${oif}
+
+ # Stop draft-manning-dsua-03.txt (1 May 2000) nets (includes RESERVED-1,
+ # DHCP auto-configuration, NET-TEST, MULTICAST (class D), and class E)
+ # on the outside interface
+ ${fwcmd} add deny all from any to 0.0.0.0/8 via ${oif}
+ ${fwcmd} add deny all from any to 169.254.0.0/16 via ${oif}
+ ${fwcmd} add deny all from any to 192.0.2.0/24 via ${oif}
+ ${fwcmd} add deny all from any to 224.0.0.0/4 via ${oif}
+ ${fwcmd} add deny all from any to 240.0.0.0/4 via ${oif}
+
+ # Network Address Translation. This rule is placed here deliberately
+ # so that it does not interfere with the surrounding address-checking
+ # rules. If for example one of your internal LAN machines had its IP
+ # address set to 192.0.2.1 then an incoming packet for it after being
+ # translated by natd(8) would match the `deny' rule above. Similarly
+ # an outgoing packet originated from it before being translated would
+ # match the `deny' rule below.
+ case ${natd_enable} in
+ [Yy][Ee][Ss])
+ if [ -n "${natd_interface}" ]; then
+ ${fwcmd} add divert natd all from any to any via ${natd_interface}
+ fi
+ ;;
+ esac
+
+ # Stop RFC1918 nets on the outside interface
+ ${fwcmd} add deny all from 10.0.0.0/8 to any via ${oif}
+ ${fwcmd} add deny all from 172.16.0.0/12 to any via ${oif}
+ ${fwcmd} add deny all from 192.168.0.0/16 to any via ${oif}
+
+ # Stop draft-manning-dsua-03.txt (1 May 2000) nets (includes RESERVED-1,
+ # DHCP auto-configuration, NET-TEST, MULTICAST (class D), and class E)
+ # on the outside interface
+ ${fwcmd} add deny all from 0.0.0.0/8 to any via ${oif}
+ ${fwcmd} add deny all from 169.254.0.0/16 to any via ${oif}
+ ${fwcmd} add deny all from 192.0.2.0/24 to any via ${oif}
+ ${fwcmd} add deny all from 224.0.0.0/4 to any via ${oif}
+ ${fwcmd} add deny all from 240.0.0.0/4 to any via ${oif}
+
+ # Allow TCP through if setup succeeded
+ ${fwcmd} add pass tcp from any to any established
+
+ # Allow IP fragments to pass through
+ ${fwcmd} add pass all from any to any frag
+
+ # Allow setup of incoming email
+ ${fwcmd} add pass tcp from any to ${oip} 25 setup
+
+ # Allow access to our DNS
+ ${fwcmd} add pass tcp from any to ${oip} 53 setup
+ ${fwcmd} add pass udp from any to ${oip} 53
+ ${fwcmd} add pass udp from ${oip} 53 to any
+
+ # Allow access to our WWW
+ ${fwcmd} add pass tcp from any to ${oip} 80 setup
+
+ # Reject&Log all setup of incoming connections from the outside
+ ${fwcmd} add deny log tcp from any to any in via ${oif} setup
+
+ # Allow setup of any other TCP connection
+ ${fwcmd} add pass tcp from any to any setup
+
+ # Allow DNS queries out in the world
+ ${fwcmd} add pass udp from ${oip} to any 53 keep-state
+
+ # Allow NTP queries out in the world
+ ${fwcmd} add pass udp from ${oip} to any 123 keep-state
+
+ # Everything else is denied by default, unless the
+ # IPFIREWALL_DEFAULT_TO_ACCEPT option is set in your kernel
+ # config file.
+ ;;
+
+[Ww][Oo][Rr][Kk][Ss][Tt][Aa][Tt][Ii][Oo][Nn])
+ # Configuration:
+ # firewall_myservices: List of TCP ports on which this host
+ # offers services.
+ # firewall_allowservices: List of IPs which has access to
+ # $firewall_myservices.
+ # firewall_trusted: List of IPs which has full access
+ # to this host. Be very carefull
+ # when setting this. This option can
+ # seriously degrade the level of
+ # protection provided by the firewall.
+ # firewall_logdeny: Boolean (YES/NO) specifying if the
+ # default denied packets should be
+ # logged (in /var/log/security).
+ # firewall_nologports: List of TCP/UDP ports for which
+ # denied incomming packets are not
+ # logged.
+
+ # Allow packets for which a state has been built.
+ ${fwcmd} add check-state
+
+ # For services permitted below.
+ ${fwcmd} add pass tcp from me to any established
+
+ # Allow any connection out, adding state for each.
+ ${fwcmd} add pass tcp from me to any setup keep-state
+ ${fwcmd} add pass udp from me to any keep-state
+ ${fwcmd} add pass icmp from me to any keep-state
+
+ # Allow DHCP.
+ ${fwcmd} add pass udp from 0.0.0.0 68 to 255.255.255.255 67 out
+ ${fwcmd} add pass udp from any 67 to me 68 in
+ ${fwcmd} add pass udp from any 67 to 255.255.255.255 68 in
+ # Some servers will ping the IP while trying to decide if it's
+ # still in use.
+ ${fwcmd} add pass icmp from any to any icmptype 8
+
+ # Allow "mandatory" ICMP in.
+ ${fwcmd} add pass icmp from any to any icmptype 3,4,11
+
+ # Add permits for this workstations published services below
+ # Only IPs and nets in firewall_allowservices is allowed in.
+ # If you really wish to let anyone use services on your
+ # workstation, then set "firewall_allowservices='any'" in /etc/rc.conf
+ #
+ # Note: We don't use keep-state as that would allow DoS of
+ # our statetable.
+ # You can add 'keep-state' to the lines for slightly
+ # better performance if you fell that DoS of your
+ # workstation won't be a problem.
+ #
+ for i in ${firewall_allowservices} ; do
+ for j in ${firewall_myservices} ; do
+ ${fwcmd} add pass tcp from $i to me $j
+ done
+ done
+
+ # Allow all connections from trusted IPs.
+ # Playing with the content of firewall_trusted could seriously
+ # degrade the level of protection provided by the firewall.
+ for i in ${firewall_trusted} ; do
+ ${fwcmd} add pass ip from $i to me
+ done
+
+ ${fwcmd} add 65000 count ip from any to any
+
+ # Drop packets to ports where we don't want logging
+ for i in ${firewall_nologports} ; do
+ ${fwcmd} add deny { tcp or udp } from any to any $i in
+ done
+
+ # Broadcasts and muticasts
+ ${fwcmd} add deny ip from any to 255.255.255.255
+ ${fwcmd} add deny ip from any to 224.0.0.0/24 in # XXX
+
+ # Noise from routers
+ ${fwcmd} add deny udp from any to any 520 in
+
+ # Noise from webbrowsing.
+ # The statefull filter is a bit agressive, and will cause some
+ # connection teardowns to be logged.
+ ${fwcmd} add deny tcp from any 80,443 to any 1024-65535 in
+
+ # Deny and (if wanted) log the rest unconditionally.
+ log=""
+ if [ ${firewall_logdeny:-x} = "YES" -o ${firewall_logdeny:-x} = "yes" ] ; then
+ log="log logamount 500" # The default of 100 is too low.
+ sysctl net.inet.ip.fw.verbose=1 >/dev/null
+ fi
+ ${fwcmd} add deny $log ip from any to any
+ ;;
+
+[Cc][Ll][Oo][Ss][Ee][Dd])
+ ${fwcmd} add 65000 deny ip from any to any
+ ;;
+[Uu][Nn][Kk][Nn][Oo][Ww][Nn])
+ ;;
+*)
+ if [ -r "${firewall_type}" ]; then
+ ${fwcmd} ${firewall_flags} ${firewall_type}
+ fi
+ ;;
+esac
-# $Id: Makefile 5858 2010-03-24 16:16:19Z svn_magno $
+# $Id: Makefile 11277 2012-06-10 17:44:15Z marta $
# gnu Makefile to build linux/Windows module for ipfw+dummynet.
#
# The defaults are set to build without modifications on PlanetLab
WARN := -O1 -Wall -Werror -DDEBUG_SPINLOCK -DDEBUG_MUTEXES
# The main target
+ # Required by GCC 4.6
+ ccflags-y += -Wno-unused-but-set-variable
+
# Required by kernel <= 2.6.22, ccflags-y is used on newer version
LINUX_VERSION_CODE := $(shell grep LINUX_VERSION_CODE $(KERNELPATH)/include/linux/version.h|cut -d " " -f3)
ifeq ($(shell if [ -z $(LINUX_VERSION_CODE) ] ; then echo "true"; fi),true)
$(warning "---- Perhaps you miss a (cd $(KERNELPATH); make oldconfig; make prepare; make scripts)");
endif
- ifeq ($(shell if [ $(LINUX_VERSION_CODE) -le 132630 ] ; then echo "true"; fi),true)
+ ifeq ($(shell if [ "$(LINUX_VERSION_CODE)" -le 132630 ] ; then echo "true"; fi),true)
EXTRA_CFLAGS += $(ccflags-y)
endif
EFILES += netinet6/ip6_var.h
-EFILES += sys/_lock.h sys/_rwlock.h sys/_mutex.h sys/jail.h
+EFILES += sys/_lock.h sys/_rwlock.h sys/rmlock.h sys/_mutex.h sys/jail.h
EFILES += sys/condvar.h sys/eventhandler.h sys/domain.h
EFILES += sys/limits.h sys/lock.h sys/mutex.h sys/priv.h
EFILES += sys/proc.h sys/rwlock.h sys/socket.h sys/socketvar.h
*/
/*
- * $Id: bsd_compat.c 5813 2010-03-22 18:05:13Z svn_magno $
+ * $Id: bsd_compat.c 6320 2010-05-24 11:54:36Z svn_panicucci $
*
* kernel variables and functions that are not available in linux.
*/
int bootverbose = 0;
struct timeval boottime;
-int ip_defttl;
+int ip_defttl = 64; /* XXX set default value */
+int max_linkhdr = 16;
int fw_one_pass = 1;
u_long in_ifaddrhmask; /* mask for hash table */
struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */
return 1; /* no match */
}
+
+/*
+ * linux 2.6.33 defines these functions to access to
+ * skbuff internal structures. Define the missing
+ * function for the previous versions too.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
+{
+ skb->dst = dst;
+}
+
+inline struct dst_entry *skb_dst(const struct sk_buff *skb)
+{
+ return (struct dst_entry *)skb->dst;
+}
+#endif
+
+
/* support for sysctl emulation.
* XXX this is actually MI code that should be enabled also on openwrt
*/
/*
* Binary heap and hash tables, used in dummynet
*
- * $Id: dn_heap.c 5646 2010-03-08 12:48:30Z luigi $
+ * $Id: dn_heap.c 7119 2010-07-15 13:51:07Z luigi $
*/
#include <sys/cdefs.h>
return ht ? ht->entries : 0;
}
-/* lookup and optionally create or delete element */
+/*
+ * Helper function to scan a bucket in the hash table, it
+ * can only be called on a non-empty bucket for a valid table.
+ *
+ * In lookup and scan, consider ht->ht[i] as pointing to the tail
+ * of the queue (head is NEXTP(tail). The 'empty' value is irrelevant.
+ * While searching, start analysing p = head, end when p == tail.
+ * Note that 'tail' is a cache of the _original_ ht->ht[i]
+ * and is used to check for loop termination. If you remove
+ * it, you must also adjust 'p' when deleting the 'tail' element.
+ */
+#define NEXT(_h, _p) *((void **)((char *)(_p) + (_h)->ofs))
+static int
+dn_ht_scan_body(struct dn_ht *ht, int *bucket,
+ int (*fn)(void *, void *), void *arg)
+{
+ int ret, found = 0, i = *bucket;
+ void *tail, *pp, *p, *nextp;
+
+ pp = tail = ht->ht[i];
+ do {
+ p = NEXT(ht, pp);
+ nextp = NEXT(ht, p);
+ ret = fn(p, arg);
+ if ((ret & DNHT_SCAN_DEL) == 0) {
+ pp = p; /* prepare for next loop */
+ } else {
+ found++;
+ ht->entries--;
+ /* skip current element */
+ if (pp != p)
+ /* pp == p implies p == tail */
+ NEXT(ht, pp) = nextp;
+ if (p == tail)
+ ht->ht[i] = (pp != p) ? pp : NULL;
+ }
+ if (ret & DNHT_SCAN_END) {
+ /* Update ht->ht[i] before returning */
+ ht->ht[i] = (ht->ht[i] == NULL) ? NULL : pp;
+ return found;
+ }
+ } while (p != tail);
+
+ (*bucket)++;
+ return found;
+}
+
+/*
+ * lookup and optionally create or delete element.
+ * This is an optimized version of the scan so it is coded
+ * inline.
+ */
void *
dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg)
{
- int i;
- void **pp, *p;
+ int i, found;
+ void *tail, *pp, *p; /* pp is the prev element, pp is current */
if (ht == NULL) /* easy on an empty hash */
return NULL;
i = (ht->buckets == 1) ? 0 :
(ht->hash(key, flags, arg) & ht->buckets);
- for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) {
- if (flags & DNHT_MATCH_PTR) {
- if (key == (uintptr_t)p)
- break;
- } else if (ht->match(p, key, flags, arg)) /* found match */
- break;
+ pp = tail = ht->ht[i];
+ if (tail) { /* non empty, try a lookup */
+ do {
+ p = NEXT(ht, pp);
+ found = (flags & DNHT_MATCH_PTR) ? key == (uintptr_t)p :
+ ht->match(p, key, flags, arg);
+ if (!found)
+ continue;
+ if (flags & DNHT_REMOVE) {
+ ht->entries--;
+ if (p != pp) /* skip current element */
+ NEXT(ht, pp) = NEXT(ht, p);
+ if (p == tail)
+ ht->ht[i] = (pp != p) ? pp : NULL;
+ }
+ return p;
+ } while ( (pp = p) != tail);
}
+ /* not found */
+ if ((flags & DNHT_INSERT) == 0)
+ return NULL;
+ p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
if (p) {
- if (flags & DNHT_REMOVE) {
- /* link in the next element */
- *pp = *(void **)((char *)p + ht->ofs);
- *(void **)((char *)p + ht->ofs) = NULL;
- ht->entries--;
- }
- } else if (flags & DNHT_INSERT) {
- // printf("%s before calling new, bucket %d ofs %d\n",
- // __FUNCTION__, i, ht->ofs);
- p = ht->newh ? ht->newh(key, flags, arg) : (void *)key;
- // printf("%s newh returns %p\n", __FUNCTION__, p);
- if (p) {
- ht->entries++;
- *(void **)((char *)p + ht->ofs) = ht->ht[i];
- ht->ht[i] = p;
+ ht->entries++;
+ if (tail == NULL) {
+ ht->ht[i] = NEXT(ht, p) = p;
+ } else {
+ NEXT(ht, p) = NEXT(ht, tail);
+ NEXT(ht, tail) = p;
}
}
+
return p;
}
/*
- * do a scan with the option to delete the object. Extract next before
- * running the callback because the element may be destroyed there.
+ * do a scan with the option to delete the object.
+ * Similar to the lookup, but the match function is different,
+ * and we extract 'next' before running the callback because
+ * the element may be destroyed there.
*/
int
dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg)
{
- int i, ret, found = 0;
- void **curp, *cur, *next;
+ int i, bucket, found = 0;
if (ht == NULL || fn == NULL)
return 0;
for (i = 0; i <= ht->buckets; i++) {
- curp = &ht->ht[i];
- while ( (cur = *curp) != NULL) {
- next = *(void **)((char *)cur + ht->ofs);
- ret = fn(cur, arg);
- if (ret & DNHT_SCAN_DEL) {
- found++;
- ht->entries--;
- *curp = next;
- } else {
- curp = (void **)((char *)cur + ht->ofs);
- }
- if (ret & DNHT_SCAN_END)
+ if (ht->ht[i] == NULL)
+ continue; /* empty bucket */
+ bucket = i;
+ found += dn_ht_scan_body(ht, &bucket, fn, arg);
+ if (bucket == i) /* early exit */
return found;
- }
}
return found;
}
/*
- * Similar to dn_ht_scan(), except thah the scan is performed only
+ * Similar to dn_ht_scan(), except that the scan is performed only
* in the bucket 'bucket'. The function returns a correct bucket number if
- * the original is invalid
+ * the original is invalid.
+ * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i]
+ * pointer to the last entry processed. Moreover, the bucket number passed
+ * by caller is decremented, because usually the caller increment it.
*/
int
dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *),
void *arg)
{
- int i, ret, found = 0;
- void **curp, *cur, *next;
-
if (ht == NULL || fn == NULL)
return 0;
- if (*bucket > ht->buckets)
+ if (*bucket > ht->buckets || *bucket < 0)
*bucket = 0;
- i = *bucket;
-
- curp = &ht->ht[i];
- while ( (cur = *curp) != NULL) {
- next = *(void **)((char *)cur + ht->ofs);
- ret = fn(cur, arg);
- if (ret & DNHT_SCAN_DEL) {
- found++;
- ht->entries--;
- *curp = next;
- } else {
- curp = (void **)((char *)cur + ht->ofs);
- }
- if (ret & DNHT_SCAN_END)
- return found;
- }
- return found;
+ if (ht->ht[*bucket] == NULL) {
+ (*bucket)++;
+ return 0;
+ } else
+ return dn_ht_scan_body(ht, bucket, fn, arg);
}
-
*/
/*
- * $Id: dn_sched_prio.c 5797 2010-03-21 16:31:08Z luigi $
+ * $Id: dn_sched_prio.c 6338 2010-05-26 15:06:34Z svn_panicucci $
*/
#ifdef _KERNEL
#include <sys/malloc.h>
}
static int
-prio_free_queue(struct dn_queue *q)
+prio_free_queue(struct dn_queue *q, int safe)
{
int prio = q->fs->fs.par[0];
struct prio_si *si = (struct prio_si *)(q->_si + 1);
*/
/*
- * $Id: dn_sched_qfq.c 5621 2010-03-04 16:51:27Z luigi $
+ * $Id: dn_sched_qfq.c 6552 2010-06-15 11:24:59Z svn_panicucci $
*/
#ifdef _KERNEL
* bitmaps ops are critical. Some linux versions have __fls
* and the bitmap ops. Some machines have ffs
*/
-#if defined(_WIN32)
+#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
int fls(unsigned int n)
{
int i = 0;
}
#endif
-#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32)
+#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
static inline unsigned long __fls(unsigned long word)
{
return fls(word) - 1;
/* remove an empty queue */
static int
-qfq_free_queue(struct dn_queue *_q)
+qfq_free_queue(struct dn_queue *_q, int safe)
{
struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
struct qfq_class *cl = (struct qfq_class *)_q;
*/
/*
- * $Id: dn_sched_rr.c 5621 2010-03-04 16:51:27Z luigi $
+ * $Id: dn_sched_rr.c 6338 2010-05-26 15:06:34Z svn_panicucci $
*/
#ifdef _KERNEL
if (si->head == NULL)
return; /* empty queue */
si->head->status = 0;
-
+
if (si->head == si->tail) {
si->head = si->tail = NULL;
return;
remove_queue_q(struct rr_queue *q, struct rr_si *si)
{
struct rr_queue *prev;
-
+
if (q->status != 1)
return;
if (q == si->head) {
si->tail = si->tail->qnext;
}
-static int
+static int
rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
{
struct rr_si *si;
return 0;
}
- /* If reach this point, queue q was idle */
+ /* If reach this point, queue q was idle */
si = (struct rr_si *)(_si + 1);
rrq = (struct rr_queue *)q;
}
static int
-rr_free_queue(struct dn_queue *_q)
+rr_free_queue(struct dn_queue *_q, int safe)
{
struct rr_queue *q = (struct rr_queue *)_q;
ND("called");
+ if (safe) /* Delete only if status == 0 */
+ return q->status;
+
if (q->status == 1) {
struct rr_si *si = (struct rr_si *)(_q->_si + 1);
remove_queue_q(q, si);
*/
/*
- * $Id: dn_sched_wf2q.c 5621 2010-03-04 16:51:27Z luigi $
+ * $Id: dn_sched_wf2q.c 6338 2010-05-26 15:06:34Z svn_panicucci $
*/
#ifdef _KERNEL
}
}
-static int
+static int
wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
{
struct dn_fsk *fs = q->fs;
return 0;
}
- /* If reach this point, queue q was idle */
+ /* If reach this point, queue q was idle */
alg_fq = (struct wf2qp_queue *)q;
if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
* of weights.
*/
static int
-wf2qp_free_queue(struct dn_queue *q)
+wf2qp_free_queue(struct dn_queue *q, int safe)
{
struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
-
+
if (alg_fq->S >= alg_fq->F + 1)
return 0; /* nothing to do, not in any heap */
+
+ /* queue is in a scheduler heap */
+ if (safe) /* do not delete in safe mode */
+ return 1;
+
si->wsum -= q->fs->fs.par[0];
if (si->wsum > 0)
si->inv_wsum = ONE_FP/si->wsum;
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/lock.h>
-#include <sys/rwlock.h>
+#include <sys/rmlock.h>
struct mbuf;
struct ifnet;
*/
struct packet_filter_hook {
TAILQ_ENTRY(packet_filter_hook) pfil_link;
- int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *);
+ int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
+ struct inpcb *);
void *pfil_arg;
- int pfil_flags;
};
#define PFIL_IN 0x00000001
#if defined( __linux__ ) || defined( _WIN32 )
rwlock_t ph_mtx;
#else
- struct rwlock ph_mtx;
+ struct rmlock ph_lock;
#endif
union {
u_long phu_val;
LIST_ENTRY(pfil_head) ph_list;
};
+int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
+ int, struct inpcb *), void *, int, struct pfil_head *);
+int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
+ int, struct inpcb *), void *, int, struct pfil_head *);
int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
int, struct inpcb *inp);
-int pfil_add_hook(int (*func)(void *, struct mbuf **,
- struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *);
-int pfil_remove_hook(int (*func)(void *, struct mbuf **,
- struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *);
-
int pfil_head_register(struct pfil_head *);
int pfil_head_unregister(struct pfil_head *);
struct pfil_head *pfil_head_get(int, u_long);
#define PFIL_HOOKED(p) ((p)->ph_nhooks > 0)
-#define PFIL_RLOCK(p) rw_rlock(&(p)->ph_mtx)
-#define PFIL_WLOCK(p) rw_wlock(&(p)->ph_mtx)
-#define PFIL_RUNLOCK(p) rw_runlock(&(p)->ph_mtx)
-#define PFIL_WUNLOCK(p) rw_wunlock(&(p)->ph_mtx)
+#define PFIL_LOCK_INIT(p) \
+ rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE)
+#define PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock)
+#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t))
+#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock)
+#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t))
+#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock)
#define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock)
#define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock)
static __inline struct packet_filter_hook *
pfil_hook_get(int dir, struct pfil_head *ph)
{
+
if (dir == PFIL_IN)
return (TAILQ_FIRST(&ph->ph_in));
else if (dir == PFIL_OUT)
struct radix_node_head {
struct radix_node *rnh_treetop;
+ int rnh_addrsize; /* permit, but not require fixed keys */
+ int rnh_pktsize; /* permit, but not require fixed keys */
struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
(void *v, void *mask,
struct radix_node_head *head, struct radix_node nodes[]);
+ struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
+ (void *v, void *mask,
+ struct radix_node_head *head, struct radix_node nodes[]);
struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
(void *v, void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
+ (void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
(void *v, struct radix_node_head *head);
struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
(void *v, void *mask, struct radix_node_head *head);
+ struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
+ (void *v, struct radix_node_head *head);
int (*rnh_walktree) /* traverse tree */
(struct radix_node_head *head, walktree_f_t *f, void *w);
int (*rnh_walktree_from) /* traverse tree below a */
#define LITTLE_ENDIAN 1234
#define BIG_ENDIAN 4321
#if defined(__BIG_ENDIAN)
-#error we are in bigendian
+#define BYTE_ORDER BIG_ENDIAN
+//#warning we are in bigendian
#elif defined(__LITTLE_ENDIAN)
//#warning we are in littleendian
#define BYTE_ORDER LITTLE_ENDIAN
struct in_addr ip_src,ip_dst; /* source and dest address */
} __packed __aligned(4);
+#define IPTOS_LOWDELAY 0x10
+
#endif /* _NETINET_IP_H_ */
DN_LAST,
};
-
+
enum { /* subtype for schedulers, flowset and the like */
DN_SCHED_UNKNOWN = 0,
DN_SCHED_FIFO = 1,
*/
struct dn_fs {
struct dn_id oid;
- uint32_t fs_nr; /* the flowset number */
- uint32_t flags; /* userland flags */
- int qsize; /* queue size in slots or bytes */
- int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */
+ uint32_t fs_nr; /* the flowset number */
+ uint32_t flags; /* userland flags */
+ int qsize; /* queue size in slots or bytes */
+ int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */
uint32_t buckets; /* buckets used for the queue hash table */
struct ipfw_flow_id flow_mask;
* weight and probabilities are in the range 0..1 represented
* in fixed point arithmetic with SCALE_RED decimal bits.
*/
-#define SCALE_RED 16
-#define SCALE(x) ( (x) << SCALE_RED )
-#define SCALE_VAL(x) ( (x) >> SCALE_RED )
-#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
+#define SCALE_RED 16
+#define SCALE(x) ( (x) << SCALE_RED )
+#define SCALE_VAL(x) ( (x) >> SCALE_RED )
+#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED )
int w_q ; /* queue weight (scaled) */
int max_th ; /* maximum threshold for queue (scaled) */
int min_th ; /* minimum threshold for queue (scaled) */
struct dn_profile {
struct dn_id oid;
/* fields to simulate a delay profile */
-#define ED_MAX_NAME_LEN 32
- char name[ED_MAX_NAME_LEN];
- int link_nr;
- int loss_level;
- int bandwidth; // XXX use link bandwidth?
- int samples_no; /* actual length of samples[] */
- int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */
+#define ED_MAX_NAME_LEN 32
+ char name[ED_MAX_NAME_LEN];
+ int link_nr;
+ int loss_level;
+ int bandwidth; // XXX use link bandwidth?
+ int samples_no; /* actual len of samples[] */
+ int samples[0]; /* may be shorter */
};
-
-
/*
* Overall structure of dummynet
uint32_t src_ip;
uint16_t dst_port;
uint16_t src_port;
- uint8_t fib;
- uint8_t proto;
+ uint8_t fib;
+ uint8_t proto;
uint8_t _flags; /* protocol-specific flags */
uint8_t addr_type; /* 4=ip4, 6=ip6, 1=ether ? */
struct in6_addr dst_ip6;
* free_queue actions related to a queue removal, e.g. undo
* all the above. If the queue has data in it, also remove
* from the scheduler. This can e.g. happen during a reconfigure.
+ * If safe == 1 remove the queue only if the scheduler no longer
+ * need it, otherwise delete it even if the scheduler is using
+ * it. Usually, the flag safe is set when the drain routine is
+ * running to delete idle queues.
*/
int (*enqueue)(struct dn_sch_inst *, struct dn_queue *,
struct mbuf *);
int (*new_fsk)(struct dn_fsk *f);
int (*free_fsk)(struct dn_fsk *f);
int (*new_queue)(struct dn_queue *q);
- int (*free_queue)(struct dn_queue *q);
+ int (*free_queue)(struct dn_queue *q, int safe);
/* run-time fields */
int ref_count; /* XXX number of instances in the system */
if (m == NULL)
return NULL;
q->mq.head = m->m_nextpkt;
+
+ /* Update stats for the queue */
q->ni.length--;
q->ni.len_bytes -= m->m_pkthdr.len;
+ /* When the queue becomes idle, update idle_time (used by RED)
+ * and also update the count of idle queues (for garbage collection).
+ */
+ if (q->ni.length == 0) {
+ dn_cfg.idle_queue++;
+ q->q_time = dn_cfg.curr_time;
+ }
if (q->_si) {
- q->_si->ni.length--;
- q->_si->ni.len_bytes -= m->m_pkthdr.len;
+ struct dn_flow *ni = &(q->_si->ni);
+ /* update stats for the scheduler instance, and keep track
+ * of idle scheduler instances if needed
+ */
+ ni->length--;
+ ni->len_bytes -= m->m_pkthdr.len;
+ if (ni->length == 0)
+ dn_cfg.idle_si++;
}
- if (q->ni.length == 0) /* queue is now idle */
- q->q_time = dn_cfg.curr_time;
return m;
}
MALLOC_DECLARE(M_DUMMYNET);
-#ifndef FREE_PKT
-#define FREE_PKT(m) m_freem(m)
-#endif
-
#ifndef __linux__
#define div64(a, b) ((int64_t)(a) / (int64_t)(b))
#endif
o->subtype = 0;
};
+uint64_t readTSC (void);
+/*
+ * see if tsc (ot other timer) is supported.
+ * - FreeBSD has rdtsc macro for i386 and amd64
+ * - Linux has rdtscll and/or rdtsc (also for openWRT patched kernel source)
+ * - Windows has KeQueryPerformanceCounter() function that use tsc or other
+ * timer
+ */
+#if defined(rdtscll) || defined(rdtsc) || defined(_WIN32)
+#define HAVE_TSC
+#endif
/*
* configuration and global data for a dummynet instance
*
int queue_count;
/* ticks and other stuff */
- uint64_t curr_time;
+ uint64_t curr_time; /* in ticks */
+
+ /*
+ * Variables to manage the time spent in the drain routines.
+ * max_drain is max the fraction of a tick (0..100) to be used
+ * for draining.
+ * We also need some variables to store the average number of
+ * timecounter ticks between calls to the periodic task, etc.
+ */
+ int drain_ratio;
+ uint64_t cycle_task_new; /* TSC when dummynet_task() starts */
+ uint64_t cycle_task_old; /* TSC when prev. dummynet_task() starts */
+ uint64_t cycle_task;
+ uint64_t cycle_task_avg; /* Moving average of cicle_task */
+
/* flowsets and schedulers are in hash tables, with 'hash_size'
* buckets. fshash is looked up at every packet arrival
* so better be generous if we expect many entries.
struct dn_fsk_head fsu; /* list of unlinked flowsets */
struct dn_alg_head schedlist; /* list of algorithms */
- /* Store the fs/sch to scan when draining. The value is the
- * bucket number of the hash table. Expire can be disabled
- * with net.inet.ip.dummynet.expire=0, or it happens every
- * expire ticks.
- **/
- int drain_fs;
- int drain_sch;
- uint32_t expire;
- uint32_t expire_cycle; /* tick count */
-
+ /* Counter of idle objects -- used by drain routine
+ * We scan when idle_queue (or idle_si) > expire_object.
+ * The drain routine is called every 'expire' cycles (the counter
+ * used is expire_cycle).
+ * We can disable the expire routine by setting expire to 0.
+ * An object is kept alive for at least object_idle_tick after it
+ * becomes idle. During the scan, we count the number of objects
+ * that are idle but not ready in 'idle_si_wait' and 'idle_queue_wait'
+ */
+ int idle_queue;
+ int idle_queue_wait; /* idle but not expired yet */
+ int idle_si;
+ int idle_si_wait; /* idle but not expired yet */
+ uint32_t expire_object; /* threshold for expires */
+ uint32_t expire; /* how often to expire */
+ uint32_t expire_cycle;
+ uint32_t object_idle_tick; /* lifetime of objs */
+ uint32_t expire_object_examined; /* Burst of object examined */
+
+ /* drain_fs and drain_sch point to the next bucket to scan when
+ * draining.
+ */
+ uint32_t drain_fs;
+ uint32_t drain_sch;
+
+ int init_done;
+
/* if the upper half is busy doing something long,
* can set the busy flag and we will enqueue packets in
* a queue for later processing.
* The counter is incremented or decremented when
* a reference from the queue is created or deleted.
* It is used to make sure that a scheduler instance can be safely
- * deleted by the drain routine. See notes below.
+ * deleted by the drain routine.
*/
int q_count;
};
-/*
- * NOTE about object drain.
- * The system will automatically (XXX check when) drain queues and
- * scheduler instances when they are idle.
- * A queue is idle when it has no packets; an instance is idle when
- * it is not in the evheap heap, and the corresponding delay line is empty.
- * A queue can be safely deleted when it is idle because of the scheduler
- * function xxx_free_queue() will remove any references to it.
- * An instance can be only deleted when no queues reference it. To be sure
- * of that, a counter (q_count) stores the number of queues that are pointing
- * to the instance.
- *
- * XXX
- * Order of scan:
- * - take all flowset in a bucket for the flowset hash table
- * - take all queues in a bucket for the flowset
- * - increment the queue bucket
- * - scan next flowset bucket
- * Nothing is done if a bucket contains no entries.
- *
- * The same schema is used for sceduler instances
- */
-
-/* kernel-side flags. Linux has DN_DELETE in fcntl.h
+/* kernel-side flags. Linux has DN_DELETE in fcntl.h
*/
enum {
/* 1 and 2 are reserved for the SCAN flags */
DN_DETACH = 0x0010,
DN_ACTIVE = 0x0020, /* object is in evheap */
DN_F_DLINE = 0x0040, /* object is a delay line */
- DN_F_SCHI = 0x00C0, /* object is a sched.instance */
+ DN_DEL_SAFE = 0x0080, /* delete a queue only if no longer needed
+ * by scheduler */
DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */
};
extern struct dn_parms dn_cfg;
+//VNET_DECLARE(struct dn_parms, _base_dn_cfg);
+//#define dn_cfg VNET(_base_dn_cfg)
int dummynet_io(struct mbuf **, int , struct ip_fw_args *);
void dummynet_task(void *context, int pending);
void dn_reschedule(void);
-struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *,
- struct ipfw_flow_id *);
+struct dn_queue *ipdn_q_find(struct dn_fsk *, struct ipfw_flow_id *);
struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *);
/*
PROTO_IPV4 = 0x08,
PROTO_IPV6 = 0x10,
PROTO_IFB = 0x0c, /* layer2 + ifbridge */
- /* PROTO_OLDBDG = 0x14, unused, old bridge */
+ /* PROTO_OLDBDG = 0x14, unused, old bridge */
};
/* wrapper for freeing a packet, in case we need to do more work */
LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */
struct radix_node_head *tables[IPFW_TABLES_MAX];
#if defined( __linux__ ) || defined( _WIN32 )
- spinlock_t rwmtx;
- spinlock_t uh_lock;
+ spinlock_t rwmtx;
+ spinlock_t uh_lock;
#else
struct rwlock rwmtx;
struct rwlock uh_lock; /* lock for upper half */
u_short uh_sum; /* udp checksum */
};
+/*
+ * User-settable options (used with setsockopt).
+ */
+#define UDP_ENCAP 0x01
+
+
+/*
+ * UDP Encapsulation of IPsec Packets options.
+ */
+/* Encapsulation types. */
+#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
+#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */
+
+/* Default ESP in UDP encapsulation port. */
+#define UDP_ENCAP_ESPINUDP_PORT 500
+
+/* Maximum UDP fragment size for ESP over UDP. */
+#define UDP_ENCAP_ESPINUDP_MAXFRAGLEN 552
+
#endif
static __inline struct m_tag *
m_tag_locate(struct mbuf *m, u_int32_t n, int x, struct m_tag *t)
{
- return NULL;
+ struct m_tag *tag;
+
+ tag = m_tag_first(m);
+ if (tag == NULL)
+ return NULL;
+
+ if (tag->m_tag_cookie != n || tag->m_tag_id != x)
+ return NULL;
+ else
+ return tag;
};
#define M_SETFIB(_m, _fib) /* nothing on linux */
+
static __inline void
m_freem(struct mbuf *m)
{
#define M_GETFIB(_m) 0
+/* macro used to create a new mbuf */
+#define MT_DATA 1 /* dynamic (data) allocation */
+#define MSIZE 256 /* size of an mbuf */
+#define MGETHDR(_m, _how, _type) ((_m) = m_gethdr((_how), (_type)))
+
+/* allocate and init a new mbuf using the same structure of FreeBSD */
+static __inline struct mbuf *
+m_gethdr(int how, short type)
+{
+ struct mbuf *m;
+
+ m = malloc(MSIZE, M_IPFW, M_NOWAIT);
+
+ if (m == NULL) {
+ return m;
+ }
+
+ /* here we have MSIZE - sizeof(struct mbuf) available */
+ m->m_data = m + 1;
+
+ return m;
+}
+
#endif /* __linux__ || _WIN32 */
/*
#endif
#define callout timer_list
static __inline int
-callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg)
+callout_reset_on(struct callout *co, int ticks, void (*fn)(void *), void *arg, int cpu)
{
co->expires = jiffies + ticks;
co->function = (void (*)(unsigned long))fn;
co->data = (unsigned long)arg;
+ /*
+ * Linux 2.6.31 and above has add_timer_on(co, cpu),
+ * otherwise add_timer() always schedules a callout on the same
+ * CPU used the first time, so we don't need more.
+ */
add_timer(co);
return 0;
}
* timer is called only once a sec, this won't hurt that much.
*/
static __inline int
-callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg)
+callout_reset_on(struct callout *co, int ticks, void (*fn)(void *), void *arg, int cpu)
{
if(fn == &dummynet)
{
if(co->dpcinitialized == 0)
{
KeInitializeDpc(&co->timerdpc, dummynet_dpc, NULL);
- KeSetTargetProcessorDpc(&co->timerdpc, 0);
+ KeSetTargetProcessorDpc(&co->timerdpc, cpu);
co->dpcinitialized = 1;
}
}
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/sysctl.h>
+
#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */
#include <net/netisr.h>
+#include <net/vnet.h>
+
#include <netinet/in.h>
#include <netinet/ip.h> /* ip_len, ip_off */
#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */
*/
struct dn_parms dn_cfg;
+//VNET_DEFINE(struct dn_parms, _base_dn_cfg);
static long tick_last; /* Last tick duration (usec). */
static long tick_delta; /* Last vs standard tick diff (usec). */
SYSCTL_DECL(_net_inet_ip);
SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
+/* wrapper to pass dn_cfg fields to SYSCTL_* */
+//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x))
+#define DC(x) (&(dn_cfg.x))
/* parameters */
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
- CTLFLAG_RW, &dn_cfg.hash_size, 0, "Default hash table size");
+ CTLFLAG_RW, DC(hash_size), 0, "Default hash table size");
SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit,
- CTLFLAG_RW, &dn_cfg.slot_limit, 0,
+ CTLFLAG_RW, DC(slot_limit), 0,
"Upper limit in slots for pipe queue.");
SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit,
- CTLFLAG_RW, &dn_cfg.byte_limit, 0,
+ CTLFLAG_RW, DC(byte_limit), 0,
"Upper limit in bytes for pipe queue.");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast,
- CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io.");
+ CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io.");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug,
- CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
- CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes");
-SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
- CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes");
+ CTLFLAG_RW, DC(debug), 0, "Dummynet debug level");
/* RED parameters */
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
- CTLFLAG_RD, &dn_cfg.red_lookup_depth, 0, "Depth of RED lookup table");
+ CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
- CTLFLAG_RD, &dn_cfg.red_avg_pkt_size, 0, "RED Medium packet size");
+ CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
- CTLFLAG_RD, &dn_cfg.red_max_pkt_size, 0, "RED Max packet size");
+ CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size");
/* time adjustment */
SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta,
CTLFLAG_RD, &tick_lost, 0,
"Number of ticks coalesced by dummynet taskqueue.");
+/* Drain parameters */
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
+ CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle,
+ CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_object,
+ CTLFLAG_RW, DC(expire_object), 0, "Min # of objects before start drain routine");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, object_idle_tick,
+ CTLFLAG_RD, DC(object_idle_tick), 0, "Time (in ticks) to cosiderer an object as idle");
+SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, drain_ratio,
+ CTLFLAG_RD, DC(drain_ratio), 0, "% of dummynet_task() to dedicate to drain routine");
+
/* statistics */
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count,
- CTLFLAG_RD, &dn_cfg.schk_count, 0, "Number of schedulers");
+ CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count,
- CTLFLAG_RD, &dn_cfg.si_count, 0, "Number of scheduler instances");
+ CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count,
- CTLFLAG_RD, &dn_cfg.fsk_count, 0, "Number of flowsets");
+ CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets");
SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count,
- CTLFLAG_RD, &dn_cfg.queue_count, 0, "Number of queues");
+ CTLFLAG_RD, DC(queue_count), 0, "Number of queues");
SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt,
CTLFLAG_RD, &io_pkt, 0,
"Number of packets passed to dummynet.");
SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop,
CTLFLAG_RD, &io_pkt_drop, 0,
"Number of packets dropped by dummynet.");
-
+#undef DC
SYSEND
#endif
goto drop;
}
mq_append(&q->mq, m);
+ if (q->ni.length == 0) { /* queue was idle */
+ dn_cfg.idle_queue--;
+ if (ni->length == 0) /* scheduler was idle */
+ dn_cfg.idle_si--;
+ }
q->ni.length++;
q->ni.len_bytes += len;
ni->length++;
si->sched_time = now;
done = 0;
while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) {
+ uint64_t len_scaled;
+
+ /*
+ * Some schedulers might want wake up the scheduler later.
+ * To suppor this the caller returns an mbuf with len < 0
+ * this will result in a new wake up of the scheduler
+ * instance between m->m_pkthdr.len ticks.
+ */
if (m->m_pkthdr.len < 0) {
- /* Received a packet with negative length.
- * the scheduler instance will be waken up after
- * -m->m_pkthdr.len ticks.
- */
si->kflags |= DN_ACTIVE;
heap_insert(&dn_cfg.evheap, now - m->m_pkthdr.len, si);
-
- /* Delete the fake packet */
- free(m, M_DUMMYNET);
-
- /* Dont' touch credit, exit from the function */
+ if (delay_line_idle && done)
+ transmit_event(q, &si->dline, now);
return NULL;
- } else { /* normal behaviour */
- uint64_t len_scaled;
- done++;
- len_scaled = (bw == 0) ? 0 : hz *
- (m->m_pkthdr.len * 8 + extra_bits(m, s));
- si->credit -= len_scaled;
- /* Move packet in the delay line */
- dn_tag_get(m)->output_time += s->link.delay ;
- mq_append(&si->dline.mq, m);
}
+
+ /* a regular mbuf received */
+ done++;
+ if (bw == 0) printf("bw is null\n");
+ len_scaled = (bw == 0) ? 0 : hz *
+ (m->m_pkthdr.len * 8 + extra_bits(m, s));
+ si->credit -= len_scaled;
+ /* Move packet in the delay line */
+ dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay;
+ mq_append(&si->dline.mq, m);
}
+
/*
* If credit >= 0 the instance is idle, mark time.
* Otherwise put back in the heap, and adjust the output
return q->head;
}
+/*
+ * Support function to read the TSC (or equivalent). We use this
+ * high resolution timer to adapt the amount of work done for
+ * expiring the clock.
+ * Supports Linux and FreeBSD both i386 and amd64 platform
+ * Supports OpenWRT mips architecture
+ *
+ * SMP no special works is needed in
+ * - In linux 2.6 timers will always run in the same cpu that have added it.See
+ * (http://book.opensourceproject.org.cn/kernel/kernel3rd/opensource/0596005652/understandlk-chp-6-sect-5.html)
+ * - FreeBSD8 has a new callout_reset_on() with specify the cpu on which
+ * the timer must be run
+ * - Windows runs dummynet_task() on cpu0.
+ *
+ * - Linux 2.4 doesn't assure to run a timer in the same cpu every time.
+ */
+#ifdef HAVE_TSC
+uint64_t
+readTSC (void)
+{
+ uint64_t a=0;
+
+#ifdef __linux__
+ /* Linux and openwrt have a macro to read the tsc for i386 and
+ * amd64.
+ * Openwrt have patched the kernel and allow use of tsc with mips
+ * and other platforms
+ * rdtscll() is a macro defined in include/asm-xxx/msr.h,
+ * where xxx is the architecture (x86, mips).
+ */
+ rdtscll(a);
+#elif defined(_WIN32)
+ /* Microsoft recommends the use of KeQueryPerformanceCounter()
+ * insteead of rdtsc().
+ */
+ KeQueryPerformanceCounter((PLARGE_INTEGER)&a); //XXX not tested!
+#elif defined(__FreeBSD__)
+ /* FreeBSD (i386/amd64) has macro rdtsc() defined in machine/cpufunc.h.
+ * We could use the macro instead of explicity assembly XXX
+ */
+ return rdtsc();
+#endif
+ return a;
+}
+#endif /* HAVE_TSC */
+
+/*
+ * compute avg task period.
+ * We could do something more complex, possibly.
+ */
+static void
+do_update_cycle(void)
+{
+#ifdef HAVE_TSC
+ uint64_t tmp = readTSC();
+#if defined (LINUX_24) && defined(CONFIG_SMP)
+ /* on LINUX24 and SMP, we have no guarantees on which cpu runs
+ * the timer callbacks. If the difference between new and
+ * old value is negative, we assume that the values come from
+ * different cpus so we adjust 'new' accordingly.
+ */
+ if (tmp <= dn_cfg.cycle_task_new)
+ dn_cfg.cycle_task_new = tmp - dn_cfg.cycle_task;
+#endif /* !(linux24 && SMP) */
+ dn_cfg.cycle_task_old = dn_cfg.cycle_task_new;
+ dn_cfg.cycle_task_new = tmp;
+ dn_cfg.cycle_task = dn_cfg.cycle_task_new - dn_cfg.cycle_task_old;
+
+ /* Update the average
+ * avg = (2^N * avg + new - avg ) / 2^N * avg
+ * N==4 seems to be a good compromise between clock clock change
+ * and 'spurious' cycle_task value
+ */
+#define DN_N 4
+ dn_cfg.cycle_task_avg = (dn_cfg.cycle_task_avg << DN_N) +
+ dn_cfg.cycle_task - dn_cfg.cycle_task_avg;
+ dn_cfg.cycle_task_avg = dn_cfg.cycle_task_avg >> DN_N;
+#undef DN_N
+
+#endif /* HAVE_TSC */
+}
+
+static void
+do_drain(void)
+{
+#ifdef HAVE_TSC
+ uint64_t dt_max;
+#endif
+ if (!dn_cfg.expire || ++dn_cfg.expire_cycle < dn_cfg.expire)
+ return;
+ /* It's time to check if drain routines should be called */
+ dn_cfg.expire_cycle = 0;
+
+ dn_cfg.idle_queue_wait = 0;
+ dn_cfg.idle_si_wait = 0;
+ /* Do a drain cycle even if there isn't time to do it */
+#ifdef HAVE_TSC
+ dt_max = dn_cfg.cycle_task_avg * dn_cfg.drain_ratio;
+#endif
+ for (;;) {
+ int done = 0;
+
+ if (dn_cfg.idle_queue > dn_cfg.expire_object &&
+ dn_cfg.idle_queue_wait < dn_cfg.idle_queue) {
+ dn_drain_queue();
+ done = 1;
+ }
+ if (dn_cfg.idle_si > dn_cfg.expire_object &&
+ dn_cfg.idle_si_wait < dn_cfg.idle_si) {
+ dn_drain_scheduler();
+ done = 1;
+ }
+ /* time to end ? */
+#ifndef HAVE_TSC
+ /* If tsc does not exist, do only one drain cycle and exit */
+ break;
+#else
+ /* Exit when nothing was done or we have consumed all time */
+ if ( (done == 0) ||
+ ((readTSC() - dn_cfg.cycle_task_new) * 100 > dt_max) )
+ break;
+#endif /* HAVE_TSC */
+ }
+}
+
/*
* The timer handler for dummynet. Time is computed in ticks, but
* but the code is tolerant to the actual rate at which this is called.
{
struct timeval t;
struct mq q = { NULL, NULL }; /* queue to accumulate results */
-
+
+ CURVNET_SET((struct vnet *)context);
+
+ do_update_cycle(); /* compute avg. tick duration */
+
DN_BH_WLOCK();
/* Update number of lost(coalesced) ticks. */
transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time);
}
}
- if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) {
- dn_cfg.expire_cycle = 0;
- dn_drain_scheduler();
- dn_drain_queue();
- }
+ do_drain();
DN_BH_WUNLOCK();
dn_reschedule();
if (q.head != NULL)
dummynet_send(q.head);
+ CURVNET_RESTORE();
}
/*
goto dropit; /* This queue/pipe does not exist! */
if (fs->sched == NULL) /* should not happen */
goto dropit;
- /* find scheduler instance, possibly applying sched_mask */
- si = ipdn_si_find(fs->sched, &(fwa->f_id));
- if (si == NULL)
- goto dropit;
/*
* If the scheduler supports multiple queues, find the right one
* (otherwise it will be ignored by enqueue).
*/
if (fs->sched->fp->flags & DN_MULTIQUEUE) {
- q = ipdn_q_find(fs, si, &(fwa->f_id));
+ q = ipdn_q_find(fs, &(fwa->f_id));
if (q == NULL)
goto dropit;
- }
+ /* The scheduler instance lookup is done only for new queue.
+ * The callback q_new() will create the scheduler instance
+ * if needed.
+ */
+ si = q->_si;
+ } else
+ si = ipdn_si_find(fs->sched, &(fwa->f_id));
+
+ if (si == NULL)
+ goto dropit;
if (fs->sched->fp->enqueue(si, q, m)) {
- printf("%s dropped by enqueue\n", __FUNCTION__);
/* packet was dropped by enqueue() */
m = *m0 = NULL;
goto dropit;
}
/* compute the initial allowance */
- {
+ if (si->idle_time < dn_cfg.curr_time) {
+ /* Do this only on the first packet on an idle pipe */
struct dn_link *p = &fs->sched->link;
+
+ si->sched_time = dn_cfg.curr_time;
si->credit = dn_cfg.io_fast ? p->bandwidth : 0;
if (p->burst) {
uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth;
void
dn_reschedule(void)
{
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+ callout_reset_on(&dn_timeout, 1, dummynet, NULL, 0);
}
/*----- end of callout hooks -----*/
return 1; /* different address families */
return (id1->dst_ip == id2->dst_ip &&
- id1->src_ip == id2->src_ip &&
- id1->dst_port == id2->dst_port &&
- id1->src_port == id2->src_port &&
- id1->proto == id2->proto &&
+ id1->src_ip == id2->src_ip &&
+ id1->dst_port == id2->dst_port &&
+ id1->src_port == id2->src_port &&
+ id1->proto == id2->proto &&
id1->extra == id2->extra) ? 0 : 1;
}
/* the ipv6 case */
if (fs->fs.flags & DN_QHT_HASH)
q->ni.fid = *(struct ipfw_flow_id *)key;
q->fs = fs;
- q->_si = template->_si;
+ q->_si = ipdn_si_find(q->fs->sched, &(template->ni.fid));
+ if (q->_si == NULL) {
+ D("no memory for new si");
+ free (q, M_DUMMYNET);
+ return NULL;
+ }
+
q->_si->q_count++;
if (fs->sched->fp->new_queue)
fs->sched->fp->new_queue(q);
dn_cfg.queue_count++;
+ dn_cfg.idle_queue++;
return q;
}
* Notify schedulers that a queue is going away.
* If (flags & DN_DESTROY), also free the packets.
* The version for callbacks is called q_delete_cb().
+ * Returns 1 if the queue is NOT deleted (usually when
+ * the drain routine try to delete a queue that a scheduler
+ * instance needs), 0 otherwise.
+ * NOTE: flag DN_DEL_SAFE means that the queue should be
+ * deleted only if the scheduler no longer needs it
*/
-static void
+static int
dn_delete_queue(struct dn_queue *q, int flags)
{
struct dn_fsk *fs = q->fs;
// D("fs %p si %p\n", fs, q->_si);
/* notify the parent scheduler that the queue is going away */
if (fs && fs->sched->fp->free_queue)
- fs->sched->fp->free_queue(q);
+ if (fs->sched->fp->free_queue(q, flags & DN_DEL_SAFE) == 1)
+ return 1; /* queue NOT deleted */
q->_si->q_count--;
q->_si = NULL;
if (flags & DN_DESTROY) {
if (q->mq.head)
dn_free_pkts(q->mq.head);
+ else
+ dn_cfg.idle_queue--;
bzero(q, sizeof(*q)); // safety
free(q, M_DUMMYNET);
dn_cfg.queue_count--;
}
+ return 0;
}
static int
* We never call it for !MULTIQUEUE (the queue is in the sch_inst).
*/
struct dn_queue *
-ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si,
- struct ipfw_flow_id *id)
+ipdn_q_find(struct dn_fsk *fs, struct ipfw_flow_id *id)
{
struct dn_queue template;
- template._si = si;
template.fs = fs;
if (fs->fs.flags & DN_QHT_HASH) {
return flow_id_cmp(&o->ni.fid, id2) == 0;
}
+static int si_reset_credit(void *_si, void *arg); // XXX si_new use this
+
/*
* create a new instance for the given 'key'
* Allocate memory for instance, delay line and scheduler private data.
si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO);
if (si == NULL)
goto error;
+
/* Set length only for the part passed up to userland. */
set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow));
set_oid(&(si->dline.oid), DN_DELAY_LINE,
if (s->sch.flags & DN_HAVE_MASK)
si->ni.fid = *(struct ipfw_flow_id *)key;
+ si_reset_credit(si, NULL);
dn_cfg.si_count++;
+ dn_cfg.idle_si++;
return si;
error:
if (dl->oid.subtype) /* remove delay line from event heap */
heap_extract(&dn_cfg.evheap, dl);
+ if (si->ni.length == 0)
+ dn_cfg.idle_si--;
dn_free_pkts(dl->mq.head); /* drain delay line */
if (si->kflags & DN_ACTIVE) /* remove si from event heap */
heap_extract(&dn_cfg.evheap, si);
struct dn_sch_inst *si = _si;
struct dn_link *p = &si->sched->link;
+ si->idle_time = dn_cfg.curr_time;
si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0);
return 0;
}
h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu;
SLIST_REMOVE(h, fs, dn_fsk, sch_chain);
}
- /* Free the RED parameters, they will be recomputed on
+ /* Free the RED parameters, they will be recomputed on
* subsequent attach if needed.
*/
if (fs->w_q_lookup)
if (!locked)
DN_BH_WLOCK();
fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL);
+ if (dn_ht_entries(dn_cfg.fshash) == 0) {
+ dn_ht_free(dn_cfg.fshash, 0);
+ dn_cfg.fshash = NULL;
+ }
ND("fs %d found %p", i, fs);
if (fs) {
fsk_detach(fs, DN_DETACH | DN_DELETE_FS);
#endif
fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0);
/* no more flowset pointing to us now */
- if (s->sch.flags & DN_HAVE_MASK)
+ if (s->sch.flags & DN_HAVE_MASK) {
dn_ht_scan(s->siht, si_destroy, NULL);
+ dn_ht_free(s->siht, 0);
+ }
else if (s->siht)
si_destroy(s->siht, NULL);
if (s->profile) {
struct dn_schk *s;
s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL);
+ if (dn_ht_entries(dn_cfg.schedhash) == 0) {
+ dn_ht_free(dn_cfg.schedhash, 0);
+ dn_cfg.schedhash = NULL;
+ }
ND("%d %p", i, s);
if (!s)
return EINVAL;
/*
* This routine only copies the initial part of a profile ? XXX
+ * XXX marta: I think this routine is called to print a summary
+ * of the pipe configuration and does not need to show the
+ * profile samples list.
*/
static int
copy_profile(struct copy_args *a, struct dn_profile *p)
{
int have = a->end - *a->start;
/* XXX here we check for max length */
- int profile_len = sizeof(struct dn_profile) -
- ED_MAX_SAMPLES_NO*sizeof(int);
+ int profile_len = sizeof(struct dn_profile);
if (p == NULL)
return 0;
return 0; /* not a pipe */
/* see if the object is within one of our ranges */
- for (;r < lim; r+=2) {
+ for (;r < lim; r += 2) {
if (n < r[0] || n > r[1])
continue;
/* Found a valid entry, copy and we are done */
- if (a->flags & DN_C_LINK) {
- if (copy_obj(a->start, a->end,
+ if (a->flags & DN_C_LINK) {
+ if (copy_obj(a->start, a->end,
&s->link, "link", n))
- return DNHT_SCAN_END;
- if (copy_profile(a, s->profile))
- return DNHT_SCAN_END;
- if (copy_flowset(a, s->fs, 0))
- return DNHT_SCAN_END;
- }
- if (a->flags & DN_C_SCH) {
- if (copy_obj(a->start, a->end,
+ return DNHT_SCAN_END;
+ if (copy_profile(a, s->profile))
+ return DNHT_SCAN_END;
+ if (copy_flowset(a, s->fs, 0))
+ return DNHT_SCAN_END;
+ }
+ if (a->flags & DN_C_SCH) {
+ if (copy_obj(a->start, a->end,
&s->sch, "sched", n))
- return DNHT_SCAN_END;
- /* list all attached flowsets */
- if (copy_fsk_list(a, s, 0))
- return DNHT_SCAN_END;
- }
+ return DNHT_SCAN_END;
+ /* list all attached flowsets */
+ if (copy_fsk_list(a, s, 0))
+ return DNHT_SCAN_END;
+ }
if (a->flags & DN_C_FLOW)
- copy_si(a, s, 0);
+ copy_si(a, s, 0);
break;
}
} else if (a->type == DN_FS) {
if (n >= DN_MAX_ID)
return 0;
/* see if the object is within one of our ranges */
- for (;r < lim; r+=2) {
+ for (;r < lim; r += 2) {
if (n < r[0] || n > r[1])
continue;
- if (copy_flowset(a, fs, 0))
- return DNHT_SCAN_END;
- copy_q(a, fs, 0);
+ if (copy_flowset(a, fs, 0))
+ return DNHT_SCAN_END;
+ copy_q(a, fs, 0);
break; /* we are done */
- }
}
+ }
return 0;
}
}
if (!locked)
DN_BH_WLOCK();
+ if (dn_cfg.fshash == NULL)
+ dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
+ offsetof(struct dn_fsk, fsk_next),
+ fsk_hash, fsk_match, fsk_new);
do { /* exit with break when done */
struct dn_schk *s;
int flags = nfs->sched_nr ? DNHT_INSERT : 0;
new_flags = a.sch->flags;
}
DN_BH_WLOCK();
+ if (dn_cfg.schedhash == NULL)
+ dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
+ offsetof(struct dn_schk, schk_next),
+ schk_hash, schk_match, schk_new);
again: /* run twice, for wfq and fifo */
/*
* lookup the type. If not supplied, use the previous one
if (!pf || pf->link_nr != p.link_nr) { /* no saved value */
s->profile = NULL; /* XXX maybe not needed */
} else {
- s->profile = malloc(sizeof(struct dn_profile),
+ size_t pf_size = sizeof(struct dn_profile) +
+ s->profile->samples_no * sizeof(int);
+
+ s->profile = malloc(pf_size,
M_DUMMYNET, M_NOWAIT | M_ZERO);
if (s->profile == NULL) {
D("cannot allocate profile");
goto error; //XXX
}
- bcopy(pf, s->profile, sizeof(*pf));
+ bcopy(pf, s->profile, pf_size);
}
}
p.link_nr = 0;
bcopy(pf, s->profile, pf->oid.len);
s->profile->oid.len = olen;
}
+
DN_BH_WUNLOCK();
return err;
}
DX(4, "still %d unlinked fs", dn_cfg.fsk_count);
dn_ht_free(dn_cfg.fshash, DNHT_REMOVE);
fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS);
+
+ dn_ht_free(dn_cfg.schedhash, DNHT_REMOVE);
/* Reinitialize system heap... */
heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
}
default:
D("cmd %d not implemented", o->type);
break;
+
#ifdef EMULATE_SYSCTL
/* sysctl emulation.
* if we recognize the command, jump to the correct
* handler and return
*/
case DN_SYSCTL_SET:
- err = kesysctl_emu_set(p,l);
+ err = kesysctl_emu_set(p, l);
return err;
#endif
+
case DN_CMD_CONFIG: /* simply a header */
break;
compute_space(struct dn_id *cmd, struct copy_args *a)
{
int x = 0, need = 0;
- int profile_size = sizeof(struct dn_profile) -
- ED_MAX_SAMPLES_NO*sizeof(int);
+ int profile_size = sizeof(struct dn_profile);
/* NOTE about compute space:
* NP = dn_cfg.schk_count
}
need += sizeof(*cmd);
cmd->id = need;
- if (have >= need)
+ if (have >= need) /* got space, hold the lock */
break;
DN_BH_WUNLOCK();
} else {
error = sooptcopyout(sopt, cmd, sizeof(*cmd));
}
+ /* no enough memory, release the lock and give up */
+ /* XXX marta: here we hold the lock */
goto done;
}
ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, "
free(cmd, M_DUMMYNET);
if (start)
free(start, M_DUMMYNET);
+
return error;
}
+/*
+ * Functions to drain idle objects -- see dummynet_task() for some notes
+ */
/* Callback called on scheduler instance to delete it if idle */
static int
-drain_scheduler_cb(void *_si, void *arg)
+drain_scheduler_cb(void *_si, void *_arg)
{
struct dn_sch_inst *si = _si;
+ int *arg = _arg;
+ int empty;
+
+ if ( (*arg++) > dn_cfg.expire_object_examined)
+ return DNHT_SCAN_END;
if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL)
return 0;
- if (si->sched->fp->flags & DN_MULTIQUEUE) {
- if (si->q_count == 0)
- return si_destroy(si, NULL);
- else
- return 0;
- } else { /* !DN_MULTIQUEUE */
- if ((si+1)->ni.length == 0)
- return si_destroy(si, NULL);
+ /*
+ * if the scheduler is multiqueue, q_count also reflects empty
+ * queues that point to si, so we need to check si->q_count to
+ * tell whether we can remove the instance.
+ */
+ if (si->ni.length == 0) {
+ /* si was marked as idle:
+ * remove it or increment idle_si_wait counter
+ */
+ empty = (si->sched->fp->flags & DN_MULTIQUEUE) ?
+ (si->q_count == 0) : 1;
+ if (empty &&
+ (si->idle_time < dn_cfg.curr_time - dn_cfg.object_idle_tick))
+ return si_destroy(si, NULL);
else
- return 0;
+ dn_cfg.idle_si_wait++;
}
- return 0; /* unreachable */
+ return 0;
}
/* Callback called on scheduler to check if it has instances */
static int
-drain_scheduler_sch_cb(void *_s, void *arg)
+drain_scheduler_sch_cb(void *_s, void *_arg)
{
struct dn_schk *s = _s;
+ int *arg = _arg;
if (s->sch.flags & DN_HAVE_MASK) {
dn_ht_scan_bucket(s->siht, &s->drain_bucket,
- drain_scheduler_cb, NULL);
- s->drain_bucket++;
+ drain_scheduler_cb, _arg);
} else {
if (s->siht) {
- if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL)
+ if (drain_scheduler_cb(s->siht, _arg) == DNHT_SCAN_DEL)
s->siht = NULL;
}
}
- return 0;
+ return ( (*arg++) > dn_cfg.expire_object_examined) ? DNHT_SCAN_END : 0;
}
/* Called every tick, try to delete a 'bucket' of scheduler */
void
dn_drain_scheduler(void)
{
+ int arg = 0;
+
dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch,
- drain_scheduler_sch_cb, NULL);
- dn_cfg.drain_sch++;
+ drain_scheduler_sch_cb, &arg);
}
/* Callback called on queue to delete if it is idle */
static int
-drain_queue_cb(void *_q, void *arg)
+drain_queue_cb(void *_q, void *_arg)
{
struct dn_queue *q = _q;
+ int *arg = _arg;
+
+ if ( (*arg++) > dn_cfg.expire_object_examined)
+ return DNHT_SCAN_END;
if (q->ni.length == 0) {
- dn_delete_queue(q, DN_DESTROY);
- return DNHT_SCAN_DEL; /* queue is deleted */
+ if (q->q_time < dn_cfg.curr_time - dn_cfg.object_idle_tick) {
+ if (dn_delete_queue(q, DN_DESTROY | DN_DEL_SAFE) == 0)
+ return DNHT_SCAN_DEL; /* queue is deleted */
+ } else
+ dn_cfg.idle_queue_wait++;
}
return 0; /* queue isn't deleted */
/* Callback called on flowset used to check if it has queues */
static int
-drain_queue_fs_cb(void *_fs, void *arg)
+drain_queue_fs_cb(void *_fs, void *_arg)
{
struct dn_fsk *fs = _fs;
+ int *arg = _arg;
if (fs->fs.flags & DN_QHT_HASH) {
/* Flowset has a hash table for queues */
dn_ht_scan_bucket(fs->qht, &fs->drain_bucket,
- drain_queue_cb, NULL);
- fs->drain_bucket++;
+ drain_queue_cb, _arg);
} else {
/* No hash table for this flowset, null the pointer
* if the queue is deleted
*/
if (fs->qht) {
- if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL)
+ if (drain_queue_cb(fs->qht, _arg) == DNHT_SCAN_DEL)
fs->qht = NULL;
}
}
- return 0;
+ return ( (*arg++) > dn_cfg.expire_object_examined) ? DNHT_SCAN_END : 0;
}
/* Called every tick, try to delete a 'bucket' of queue */
void
dn_drain_queue(void)
{
+ int arg = 0;
+
/* scan a bucket of flowset */
dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs,
- drain_queue_fs_cb, NULL);
- dn_cfg.drain_fs++;
+ drain_queue_fs_cb, &arg);
}
/*
static void
ip_dn_init(void)
{
- static int init_done = 0;
-
- if (init_done)
+ if (dn_cfg.init_done)
return;
- init_done = 1;
- if (bootverbose)
- printf("DUMMYNET with IPv6 initialized (100131)\n");
-
+ printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet);
+ dn_cfg.init_done = 1;
/* Set defaults here. MSVC does not accept initializers,
* and this is also useful for vimages
*/
/* hash tables */
dn_cfg.max_hash_size = 1024; /* max in the hash tables */
- dn_cfg.hash_size = 64; /* default hash size */
- /* create hash tables for schedulers and flowsets.
- * In both we search by key and by pointer.
- */
- dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size,
- offsetof(struct dn_schk, schk_next),
- schk_hash, schk_match, schk_new);
- dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size,
- offsetof(struct dn_fsk, fsk_next),
- fsk_hash, fsk_match, fsk_new);
+ if (dn_cfg.hash_size == 0) /* XXX or <= 0 ? */
+ dn_cfg.hash_size = 64; /* default hash size */
+ /* hash tables for schedulers and flowsets are created
+ * when the first scheduler/flowset is inserted.
+ * This is done to allow to use the right hash_size value.
+ * When the last object is deleted, the table is destroyed,
+ * so a new hash_size value can be used.
+ * XXX rehash is not supported for now
+ */
+ dn_cfg.schedhash = NULL;
+ dn_cfg.fshash = NULL;
/* bucket index to drain object */
dn_cfg.drain_fs = 0;
dn_cfg.drain_sch = 0;
+ if (dn_cfg.expire_object == 0)
+ dn_cfg.expire_object = 50;
+ if (dn_cfg.object_idle_tick == 0)
+ dn_cfg.object_idle_tick = 1000;
+ if (dn_cfg.expire_object_examined == 0)
+ dn_cfg.expire_object_examined = 10;
+ if (dn_cfg.drain_ratio == 0)
+ dn_cfg.drain_ratio = 1;
+
+ // XXX what if we don't have a tsc ?
+#ifdef HAVE_TSC
+ dn_cfg.cycle_task_new = dn_cfg.cycle_task_old = readTSC();
+#endif
heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id));
SLIST_INIT(&dn_cfg.fsu);
SLIST_INIT(&dn_cfg.schedlist);
DN_LOCK_INIT();
- ip_dn_ctl_ptr = ip_dn_ctl;
- ip_dn_io_ptr = dummynet_io;
- TASK_INIT(&dn_task, 0, dummynet_task, NULL);
+ TASK_INIT(&dn_task, 0, dummynet_task, curvnet);
dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT,
taskqueue_thread_enqueue, &dn_tq);
taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet");
callout_init(&dn_timeout, CALLOUT_MPSAFE);
- callout_reset(&dn_timeout, 1, dummynet, NULL);
+ callout_reset_on(&dn_timeout, 1, dummynet, NULL, 0);
/* Initialize curr_time adjustment mechanics. */
getmicrouptime(&dn_cfg.prev_t);
#ifdef KLD_MODULE
static void
-ip_dn_destroy(void)
+ip_dn_destroy(int last)
{
callout_drain(&dn_timeout);
DN_BH_WLOCK();
- ip_dn_ctl_ptr = NULL;
- ip_dn_io_ptr = NULL;
+ if (last) {
+ printf("%s removing last instance\n", __FUNCTION__);
+ ip_dn_ctl_ptr = NULL;
+ ip_dn_io_ptr = NULL;
+ }
dummynet_flush();
DN_BH_WUNLOCK();
return EEXIST ;
}
ip_dn_init();
+ ip_dn_ctl_ptr = ip_dn_ctl;
+ ip_dn_io_ptr = dummynet_io;
return 0;
} else if (type == MOD_UNLOAD) {
#if !defined(KLD_MODULE)
printf("dummynet statically compiled, cannot unload\n");
return EINVAL ;
#else
- ip_dn_destroy();
+ ip_dn_destroy(1 /* last */);
return 0;
#endif
} else
"dummynet", dummynet_modevent, NULL
};
-DECLARE_MODULE(dummynet, dummynet_mod,
- SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY-1);
+#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN
+#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */
+DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD);
MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
MODULE_VERSION(dummynet, 1);
+
+/*
+ * Starting up. Done in order after dummynet_modevent() has been called.
+ * VNET_SYSINIT is also called for each existing vnet and each new vnet.
+ */
+//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL);
+
+/*
+ * Shutdown handlers up shop. These are done in REVERSE ORDER, but still
+ * after dummynet_modevent() has been called. Not called on reboot.
+ * VNET_SYSUNINIT is also called for each exiting vnet as it exits.
+ * or when the module is unloaded.
+ */
+//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL);
+
/* end of file */
{
#ifndef __FreeBSD__
return cred_check(insn, proto, oif,
- dst_ip, dst_port, src_ip, src_port,
- (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
+ dst_ip, dst_port, src_ip, src_port,
+ (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb);
#else /* FreeBSD */
struct inpcbinfo *pi;
int wildcard;
}
case O_LOG:
- ipfw_log(f, hlen, args, m,
+ ipfw_log(f, hlen, args, m,
oif, offset, tablearg, ip);
match = 1;
break;
break;
case O_SKIPTO:
- f->pcnt++; /* update stats */
- f->bcnt += pktlen;
- f->timestamp = time_uptime;
+ f->pcnt++; /* update stats */
+ f->bcnt += pktlen;
+ f->timestamp = time_uptime;
/* If possible use cached f_pos (in f->next_rule),
* whose version is written in f->next_rule
* (horrible hacks to avoid changing the ABI).
if (cmd->arg1 != IP_FW_TABLEARG &&
(uintptr_t)f->x_next == chain->id) {
f_pos = (uintptr_t)f->next_rule;
- } else {
+ } else {
int i = (cmd->arg1 == IP_FW_TABLEARG) ?
tablearg : cmd->arg1;
/* make sure we do not jump backward */
f->x_next =
(void *)(uintptr_t)chain->id;
}
- }
- /*
+ }
+ /*
* Skip disabled rules, and re-enter
* the inner loop with the correct
* f_pos, f, l and cmd.
- * Also clear cmdlen and skip_or
- */
+ * Also clear cmdlen and skip_or
+ */
for (; f_pos < chain->n_rules - 1 &&
(V_set_disable &
(1 << chain->map[f_pos]->set));
f_pos++)
;
- /* prepare to enter the inner loop */
+ /* Re-enter the inner loop at the skipto rule. */
f = chain->map[f_pos];
- l = f->cmd_len;
- cmd = f->cmd;
- match = 1;
- cmdlen = 0;
- skip_or = 0;
- break;
+ l = f->cmd_len;
+ cmd = f->cmd;
+ match = 1;
+ cmdlen = 0;
+ skip_or = 0;
+ continue;
+ break; /* not reached */
case O_REJECT:
/*
set_match(args, f_pos, chain);
args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ?
tablearg : cmd->arg1;
+ if (V_fw_one_pass)
+ args->rule.info |= IPFW_ONEPASS;
retval = (cmd->opcode == O_NETGRAPH) ?
IP_FW_NETGRAPH : IP_FW_NGTEE;
l = 0; /* exit inner loop */
ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq,
u_int32_t ack, int flags)
{
-#ifndef __FreeBSD__
- return NULL;
-#else
- struct mbuf *m;
+ struct mbuf *m = NULL; /* stupid compiler */
int len, dir;
struct ip *h = NULL; /* stupid compiler */
#ifdef INET6
}
return (m);
-#endif /* __FreeBSD__ */
}
/*
}
#endif
done:
- callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period * hz,
- ipfw_tick, vnetx);
+ callout_reset_on(&V_ipfw_timeout, V_dyn_keepalive_period * hz,
+ ipfw_tick, vnetx, 0);
CURVNET_RESTORE();
}
V_dyn_max = 4096; /* max # of dynamic rules */
callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE);
- callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet);
+ callout_reset_on(&V_ipfw_timeout, hz, ipfw_tick, curvnet, 0);
}
void
*/
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_log.c 200601 2009-12-16 10:48:40Z luigi $");
+__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_log.c 209845 2010-07-09 11:27:33Z glebius $");
/*
* Logging support for ipfw
return EINVAL;
}
+static int
+ipfw_log_output(struct ifnet *ifp, struct mbuf *m,
+ struct sockaddr *dst, struct route *ro)
+{
+ if (m != NULL)
+ m_freem(m);
+ return EINVAL;
+}
+
+static void
+ipfw_log_start(struct ifnet* ifp)
+{
+ panic("ipfw_log_start() must not be called");
+}
+
+static const u_char ipfwbroadcastaddr[6] =
+ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
void
ipfw_log_bpf(int onoff)
{
ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_init = (void *)log_dummy;
ifp->if_ioctl = log_dummy;
- ifp->if_start = (void *)log_dummy;
- ifp->if_output = (void *)log_dummy;
+ ifp->if_start = ipfw_log_start;
+ ifp->if_output = ipfw_log_output;
ifp->if_addrlen = 6;
ifp->if_hdrlen = 14;
if_attach(ifp);
+ ifp->if_broadcastaddr = ipfwbroadcastaddr;
ifp->if_baudrate = IF_Mbps(10);
bpfattach(ifp, DLT_EN10MB, 14);
log_if = ifp;
if (V_fw_verbose == 0) {
#ifndef WITHOUT_BPF
- struct m_hdr mh;
if (log_if == NULL || log_if->if_bpf == NULL)
return;
- /* BPF treats the "mbuf" as read-only */
- mh.mh_next = m;
- mh.mh_len = ETHER_HDR_LEN;
- if (args->eh) { /* layer2, use orig hdr */
- mh.mh_data = (char *)args->eh;
- } else {
- /* add fake header. Later we will store
- * more info in the header
+
+ if (args->eh) /* layer2, use orig hdr */
+ BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m);
+ else
+ /* Add fake header. Later we will store
+ * more info in the header.
*/
- mh.mh_data = "DDDDDDSSSSSS\x08\x00";
- }
- BPF_MTAP(log_if, (struct mbuf *)&mh);
+ BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m);
#endif /* !WITHOUT_BPF */
return;
}
static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag);
#define V_ifaddr_event_tag VNET(ifaddr_event_tag)
-static void
+static void
ifaddr_change(void *arg __unused, struct ifnet *ifp)
{
struct cfg_nat *ptr;
/* ...using nic 'ifp->if_xname' as dynamic alias address. */
if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0)
continue;
- if_addr_rlock(ifp);
- TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
- if (ifa->ifa_addr == NULL)
- continue;
- if (ifa->ifa_addr->sa_family != AF_INET)
- continue;
- ptr->ip = ((struct sockaddr_in *)
- (ifa->ifa_addr))->sin_addr;
- LibAliasSetAddress(ptr->lib, ptr->ip);
- }
- if_addr_runlock(ifp);
+ if_addr_rlock(ifp);
+ TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+ if (ifa->ifa_addr == NULL)
+ continue;
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ ptr->ip = ((struct sockaddr_in *)
+ (ifa->ifa_addr))->sin_addr;
+ LibAliasSetAddress(ptr->lib, ptr->ip);
}
+ if_addr_runlock(ifp);
+ }
IPFW_WUNLOCK(chain);
}
free(r, M_IPFW);
break;
default:
- printf("unknown redirect mode: %u\n", r->mode);
+ printf("unknown redirect mode: %u\n", r->mode);
/* XXX - panic?!?!? */
- break;
+ break;
}
}
}
remotePortCopy = 0;
r->alink[i] = LibAliasRedirectPort(ptr->lib,
r->laddr, htons(r->lport + i), r->raddr,
- htons(remotePortCopy), r->paddr,
+ htons(remotePortCopy), r->paddr,
htons(r->pport + i), r->proto);
if (r->alink[i] == NULL) {
r->alink[0] = NULL;
break;
default:
printf("unknown redirect mode: %u\n", r->mode);
- break;
+ break;
}
/* XXX perhaps return an error instead of panic ? */
if (r->alink[0] == NULL)
panic("LibAliasRedirect* returned NULL");
/* LSNAT handling. */
- for (i = 0; i < r->spool_cnt; i++) {
- ser_s = (struct cfg_spool *)&buf[off];
+ for (i = 0; i < r->spool_cnt; i++) {
+ ser_s = (struct cfg_spool *)&buf[off];
s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO);
- memcpy(s, ser_s, SOF_SPOOL);
- LibAliasAddServer(ptr->lib, r->alink[0],
- s->addr, htons(s->port));
- off += SOF_SPOOL;
- /* Hook spool entry. */
+ memcpy(s, ser_s, SOF_SPOOL);
+ LibAliasAddServer(ptr->lib, r->alink[0],
+ s->addr, htons(s->port));
+ off += SOF_SPOOL;
+ /* Hook spool entry. */
LIST_INSERT_HEAD(&r->spool_chain, s, _next);
- }
+ }
/* And finally hook this redir entry. */
LIST_INSERT_HEAD(&ptr->redir_chain, r, _next);
}
}
ip = mtod(mcl, struct ip *);
- /*
+ /*
* XXX - Libalias checksum offload 'duct tape':
- *
+ *
* locally generated packets have only pseudo-header checksum
* calculated and libalias will break it[1], so mark them for
* later fix. Moreover there are cases when libalias modifies
* it can handle delayed checksum and tso)
*/
- if (mcl->m_pkthdr.rcvif == NULL &&
+ if (mcl->m_pkthdr.rcvif == NULL &&
mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA)
ldt = 1;
c = mtod(mcl, char *);
if (args->oif == NULL)
- retval = LibAliasIn(t->lib, c,
+ retval = LibAliasIn(t->lib, c,
mcl->m_len + M_TRAILINGSPACE(mcl));
else
- retval = LibAliasOut(t->lib, c,
+ retval = LibAliasOut(t->lib, c,
mcl->m_len + M_TRAILINGSPACE(mcl));
if (retval == PKT_ALIAS_RESPOND) {
- m->m_flags |= M_SKIP_FIREWALL;
- retval = PKT_ALIAS_OK;
+ m->m_flags |= M_SKIP_FIREWALL;
+ retval = PKT_ALIAS_OK;
}
if (retval != PKT_ALIAS_OK &&
retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
}
mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
- /*
- * XXX - libalias checksum offload
- * 'duct tape' (see above)
+ /*
+ * XXX - libalias checksum offload
+ * 'duct tape' (see above)
*/
- if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
+ if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
ip->ip_p == IPPROTO_TCP) {
- struct tcphdr *th;
+ struct tcphdr *th;
th = (struct tcphdr *)(ip + 1);
- if (th->th_x2)
+ if (th->th_x2)
ldt = 1;
}
struct udphdr *uh;
u_short cksum;
- /* XXX check if ip_len can stay in net format */
- cksum = in_pseudo(
- ip->ip_src.s_addr,
- ip->ip_dst.s_addr,
- htons(ip->ip_p + ntohs(ip->ip_len) - (ip->ip_hl << 2))
- );
-
+ ip->ip_len = ntohs(ip->ip_len);
+ cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+ htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2)));
+
switch (ip->ip_p) {
case IPPROTO_TCP:
th = (struct tcphdr *)(ip + 1);
- /*
- * Maybe it was set in
- * libalias...
+ /*
+ * Maybe it was set in
+ * libalias...
*/
th->th_x2 = 0;
th->th_sum = cksum;
- mcl->m_pkthdr.csum_data =
+ mcl->m_pkthdr.csum_data =
offsetof(struct tcphdr, th_sum);
break;
case IPPROTO_UDP:
uh = (struct udphdr *)(ip + 1);
uh->uh_sum = cksum;
- mcl->m_pkthdr.csum_data =
+ mcl->m_pkthdr.csum_data =
offsetof(struct udphdr, uh_sum);
- break;
+ break;
}
/* No hw checksum offloading: do it ourselves */
if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) {
in_delayed_cksum(mcl);
mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
+ ip->ip_len = htons(ip->ip_len);
}
args->m = mcl;
return (IP_FW_NAT);
return res;
}
-static int
+static int
ipfw_nat_cfg(struct sockopt *sopt)
{
struct cfg_nat *ptr, *ser_n;
ser_n = (struct cfg_nat *)buf;
/* check valid parameter ser_n->id > 0 ? */
- /*
+ /*
* Find/create nat rule.
*/
IPFW_WLOCK(chain);
ptr = lookup_nat(&chain->nat, ser_n->id);
if (ptr == NULL) {
/* New rule: allocate and init new instance. */
- ptr = malloc(sizeof(struct cfg_nat),
+ ptr = malloc(sizeof(struct cfg_nat),
M_IPFW, M_NOWAIT | M_ZERO);
if (ptr == NULL) {
IPFW_WUNLOCK(chain);
}
IPFW_WUNLOCK(chain);
- /*
+ /*
* Basic nat configuration.
*/
ptr->id = ser_n->id;
- /*
- * XXX - what if this rule doesn't nat any ip and just
- * redirect?
+ /*
+ * XXX - what if this rule doesn't nat any ip and just
+ * redirect?
* do we set aliasaddress to 0.0.0.0?
*/
ptr->ip = ser_n->ip;
LibAliasSetAddress(ptr->lib, ptr->ip);
memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE);
- /*
+ /*
* Redir and LSNAT configuration.
*/
/* Delete old cfgs. */
struct cfg_nat *ptr;
struct ip_fw_chain *chain = &V_layer3_chain;
int i;
-
+
sooptcopyin(sopt, &i, sizeof i, sizeof i);
/* XXX validate i */
IPFW_WLOCK(chain);
static int
ipfw_nat_get_cfg(struct sockopt *sopt)
-{
+{
uint8_t *data;
struct cfg_nat *n;
struct cfg_redir *r;
int nat_cnt, off;
struct ip_fw_chain *chain;
int err = ENOSPC;
-
+
chain = &V_layer3_chain;
nat_cnt = 0;
off = sizeof(nat_cnt);
nat_cnt++;
if (off + SOF_NAT >= NAT_BUF_LEN)
goto nospace;
- bcopy(n, &data[off], SOF_NAT);
- off += SOF_NAT;
- LIST_FOREACH(r, &n->redir_chain, _next) {
+ bcopy(n, &data[off], SOF_NAT);
+ off += SOF_NAT;
+ LIST_FOREACH(r, &n->redir_chain, _next) {
if (off + SOF_REDIR >= NAT_BUF_LEN)
goto nospace;
bcopy(r, &data[off], SOF_REDIR);
- off += SOF_REDIR;
+ off += SOF_REDIR;
LIST_FOREACH(s, &r->spool_chain, _next) {
if (off + SOF_SPOOL >= NAT_BUF_LEN)
- goto nospace;
+ goto nospace;
bcopy(s, &data[off], SOF_SPOOL);
off += SOF_SPOOL;
- }
}
+ }
}
err = 0; /* all good */
nospace:
IPFW_RUNLOCK(chain);
if (err == 0) {
- bcopy(&nat_cnt, data, sizeof(nat_cnt));
- sooptcopyout(sopt, data, NAT_BUF_LEN);
+ bcopy(&nat_cnt, data, sizeof(nat_cnt));
+ sooptcopyout(sopt, data, NAT_BUF_LEN);
} else {
- printf("serialized data buffer not big enough:"
- "please increase NAT_BUF_LEN\n");
+ printf("serialized data buffer not big enough:"
+ "please increase NAT_BUF_LEN\n");
}
free(data, M_IPFW);
return (err);
/* one pass to count, one to copy the data */
i = 0;
LIST_FOREACH(ptr, &chain->nat, _next) {
- if (ptr->lib->logDesc == NULL)
+ if (ptr->lib->logDesc == NULL)
continue;
i++;
}
size = i * (LIBALIAS_BUF_SIZE + sizeof(int));
data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO);
- if (data == NULL) {
+ if (data == NULL) {
IPFW_RUNLOCK(chain);
- return (ENOSPC);
- }
+ return (ENOSPC);
+ }
i = 0;
LIST_FOREACH(ptr, &chain->nat, _next) {
if (ptr->lib->logDesc == NULL)
{
struct cfg_nat *ptr, *ptr_temp;
struct ip_fw_chain *chain;
-
+
chain = &V_layer3_chain;
IPFW_WLOCK(chain);
LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) {
* The pfilter hook to pass packets to ipfw_chk and then to
* dummynet, divert, netgraph or other modules.
* The packet may be consumed.
- */
+ */
int
ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir,
struct inpcb *inp)
if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */
goto again; /* continue with packet */
break;
-
+
case IP_FW_NAT:
+ /* honor one-pass in case of successful nat */
+ if (V_fw_one_pass)
+ break; /* ret is already 0 */
+ goto again;
+
case IP_FW_REASS:
goto again; /* continue with packet */
/* Cloning needed for tee? */
if (tee == 0) {
- clone = *m0; /* use the original mbuf */
+ clone = *m0; /* use the original mbuf */
*m0 = NULL;
} else {
clone = m_dup(*m0, M_DONTWAIT);
*/
/*
- * $Id: ipfw2_mod.c 5797 2010-03-21 16:31:08Z luigi $
+ * $Id: ipfw2_mod.c 10302 2012-01-19 21:49:23Z marta $
*
* The main interface to build ipfw+dummynet as a linux module.
* (and possibly as a windows module as well, though that part
#include <net/netfilter/nf_queue.h> /* nf_queue */
#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
#define __read_mostly
#endif
#ifdef __linux__
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
-#warning --- inet_hashtables not present on 2.4
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13)
+/* XXX was < 2.6.0: inet_hashtables.h is introduced in 2.6.14 */
+// #warning --- inet_hashtables not present on 2.4
#include <linux/tcp.h>
#include <net/route.h>
#include <net/sock.h>
return -ret; /* errors are < 0 on linux */
}
+/*
+ * Convert an mbuf into an skbuff
+ * At the moment this only works for ip packets fully contained
+ * in a single mbuf. We assume that on entry ip_len and ip_off are
+ * in host format, and the ip checksum is not computed.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* check boundary */
+int dst_output(struct skbuff *s)
+{
+ return 0;
+}
+
+struct sk_buff *
+mbuf2skbuff(struct mbuf* m)
+{
+ return NULL;
+}
+#else
+struct sk_buff *
+mbuf2skbuff(struct mbuf* m)
+{
+ struct sk_buff *skb;
+ size_t len = m->m_pkthdr.len;
+
+ /* used to lookup the routing table */
+ struct rtable *r;
+ struct flowi fl;
+ int ret = 0; /* success for ip_route_output_key() */
+
+ struct ip *ip = mtod(m, struct ip *);
+
+ /* XXX ip_output has ip_len and ip_off in network format,
+ * linux expects host format */
+ ip->ip_len = ntohs(ip->ip_len);
+ ip->ip_off = ntohs(ip->ip_off);
+
+ ip->ip_sum = 0;
+ ip->ip_sum = in_cksum(m, ip->ip_hl<<2);
+
+ /* fill flowi struct, we need just the dst addr, see XXX */
+ bzero(&fl, sizeof(fl));
+ flow_daddr.daddr = ip->ip_dst.s_addr;
+
+ /*
+ * ip_route_output_key() should increment
+ * r->u.dst.__use and call a dst_hold(dst)
+ * XXX verify how we release the resources.
+ */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) /* check boundary */
+ r = ip_route_output_key(&init_net, &fl.u.ip4);
+#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) /* check boundary */
+ ret = ip_route_output_key(&init_net, &r, &fl);
+#else
+ ret = ip_route_output_key(&r, &fl);
+#endif
+ if (ret != 0 || r == NULL ) {
+ printf("NO ROUTE FOUND\n");
+ return NULL;
+ }
+
+ /* allocate the skbuff and the data */
+ skb = alloc_skb(len + sizeof(struct ethhdr), GFP_ATOMIC);
+ if (skb == NULL) {
+ printf("%s: can not allocate SKB buffers.\n", __FUNCTION__);
+ return NULL;
+ }
+
+ skb->protocol = htons(ETH_P_IP); // XXX 8 or 16 bit ?
+ /* sk_dst_set XXX take the lock (?) */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36)
+ skb_dst_set(skb, &r->u.dst);
+#else
+ skb_dst_set(skb, &r->dst);
+#endif
+ skb->dev = skb_dst(skb)->dev;
+
+ /* reserve space for ethernet header */
+ skb_reserve(skb, sizeof(struct ethhdr));
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22)
+ skb_reset_network_header(skb); // skb->network_header = skb->data - skb->head
+#else
+ skb->nh.raw = skb->data;
+#endif
+ /* set skbuff tail pointers and copy content */
+ skb_put(skb, len);
+ memcpy(skb->data, m->m_data, len);
+
+ return skb;
+}
+#endif /* keepalives not supported on linux 2.4 */
+
+/*
+ * This function is called to reinject packets to the
+ * kernel stack within the linux netfilter system
+ * or to send a new created mbuf.
+ * In the first case we have a valid sk_buff pointer
+ * encapsulated within the fake mbuf, so we can call
+ * the reinject function trough netisr_dispatch.
+ * In the last case we need to build a sk_buff from scratch,
+ * before sending out the packet.
+ */
int
ip_output(struct mbuf *m, struct mbuf __unused *opt,
struct route __unused *ro, int __unused flags,
struct ip_moptions __unused *imo, struct inpcb __unused *inp)
{
- netisr_dispatch(0, m);
- return 0;
+ if ( m->m_skb != NULL ) { /* reinjected packet, just call dispatch */
+ netisr_dispatch(0, m);
+ } else {
+ /* self-generated packet, wrap as appropriate and send */
+#ifdef __linux__
+ struct sk_buff *skb = mbuf2skbuff(m);
+
+ if (skb != NULL)
+ dst_output(skb);
+#else /* Windows */
+#endif
+ FREE_PKT(m);
+ }
+ return 0;
}
/*
return NF_QUEUE;
}
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) /* XXX was 2.6.0 */
#define NF_STOP NF_ACCEPT
#endif
#define nf_queue_entry nf_info /* for simplicity */
/* also, 2.4 and perhaps something else have different arguments */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* unsure on the exact boundary */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) /* XXX unsure */
/* on 2.4 we use nf_info */
#define QH_ARGS struct sk_buff *skb, struct nf_info *info, void *data
-#else /* 2.6.1.. 2.6.24 */
+#else /* 2.6.14. 2.6.24 */
#define QH_ARGS struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data
#endif
m->m_pkthdr.len = skb->len; /* total packet len */
m->m_pkthdr.rcvif = info->indev;
m->queue_entry = info;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) /* XXX was 2.6.0 */
m->m_data = skb->nh.iph;
#else
m->m_data = skb_network_header(skb);
struct ip_moptions;
struct inpcb;
-
/* XXX should include prototypes for netisr_dispatch and ip_output */
/*
* The reinjection routine after a packet comes out from dummynet.
* We must update the skb timestamp so ping reports the right time.
+ * This routine is also used (with num == -1) as FREE_PKT. XXX
*/
void
netisr_dispatch(int num, struct mbuf *m)
struct nf_queue_entry *info = m->queue_entry;
struct sk_buff *skb = m->m_skb; /* always used */
+ /*
+ * This function can be called by the FREE_PKT()
+ * used when ipfw generate their own mbuf packets
+ * or by the mbuf2skbuff() function.
+ */
m_freem(m);
- KASSERT((info != NULL), ("%s info null!\n", __FUNCTION__));
+ /* XXX check
+ * info is null in the case of a real mbuf
+ * (one created by the ipfw code without a
+ * valid sk_buff pointer
+ */
+ if (info == NULL)
+ return;
+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) // XXX above 2.6.x ?
__net_timestamp(skb); /* update timestamp */
#endif
const __be32 daddr, const __be16 dport,
struct sk_buff *skb, int dir, struct bsd_ucred *u)
{
-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,13) /* XXX was 2.6.0 */
return -1;
#else
struct sock *sk;
*
* the unregister function changed arguments between 2.6.22 and 2.6.24
*/
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14)
+struct nf_queue_handler ipfw2_queue_handler_desc = {
+ .outfn = ipfw2_queue_handler,
+ .name = "ipfw2 dummynet queue",
+};
+#define REG_QH_ARG(fn) &(fn ## _desc)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) /* XXX was 2.6.0 */
static int
nf_register_hooks(struct nf_hook_ops *ops, int n)
{
nf_unregister_hook(ops + i);
}
}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) /* XXX was 2.6.0 */
#define REG_QH_ARG(fn) fn, NULL /* argument for nf_[un]register_queue_handler */
+#endif
#define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */
#define SET_MOD_OWNER
-#else /* linux >= 2.6.0 */
-
-struct nf_queue_handler ipfw2_queue_handler_desc = {
- .outfn = ipfw2_queue_handler,
- .name = "ipfw2 dummynet queue",
-};
-#define REG_QH_ARG(fn) &(fn ## _desc)
+#else /* linux > 2.6.17 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
#define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */
*/
/*
- * $Id: missing.h 5817 2010-03-23 09:50:56Z svn_panicucci $
+ * $Id: missing.h 11275 2012-06-10 17:27:40Z marta $
*
* Header for kernel variables and functions that are not available in
* userland.
#ifndef _MISSING_H_
#define _MISSING_H_
+/* sysctl.h and module.h are included before cdefs.h
+ * because of cdefs.h defines __unused */
+
+#include <linux/sysctl.h>
+#include <linux/module.h>
#include <sys/cdefs.h>
+#include <linux/moduleparam.h>
/* portability features, to be set before the rest: */
#define HAVE_NET_IPLEN /* iplen/ipoff in net format */
#include <sys/socket.h> /* bsd-compat.c */
#include <netinet/in.h> /* bsd-compat.c */
#include <netinet/ip.h> /* local version */
+#define INADDR_TO_IFP(a, b) b = NULL
#else /* __linux__ */
/* The time_uptime a FreeBSD variable increased each second */
#ifdef __linux__
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,37) /* revise boundaries */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,37) /* revise boundaries */
#define time_uptime get_seconds()
#else /* OpenWRT */
#define time_uptime CURRENT_TIME
int in_cksum(struct mbuf *m, int len);
#define divert_cookie(mtag) 0
#define divert_info(mtag) 0
-#define INADDR_TO_IFP(a, b) b = NULL
#define pf_find_mtag(a) NULL
#define pf_get_mtag(a) NULL
#ifndef _WIN32
#define AF_LINK AF_ASH /* ? our sys/socket.h */
#endif
+/* search local the ip addresses, used for the "me" keyword */
+#include <linux/inetdevice.h>
+#define INADDR_TO_IFP(ip, b) \
+ b = ip_dev_find((struct net *)&init_net, ip.s_addr)
+
/* we don't pullup, either success or free and fail */
#define m_pullup(m, x) \
((m)->m_len >= x ? (m) : (FREE_PKT(m), NULL))
struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
#endif /* Linux < 2.6 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) && \
+ LINUX_VERSION_CODE > KERNEL_VERSION(2,6,16) /* XXX NOT sure, in 2.6.9 give an error */
#define module_param_named(_name, _var, _ty, _perm) \
//module_param(_name, _ty, 0644)
#endif
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
typedef unsigned long uintptr_t;
#ifdef __i386__
#define VNET_PTR(n) (&(n))
#define VNET(n) (n)
+VNET_DECLARE(int, ip_defttl);
+#define V_ip_defttl VNET(ip_defttl);
+
int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp,
int dir, struct inpcb *inp);
if (m->rm_flags & RNF_NORMAL) {
mmask = m->rm_leaf->rn_mask;
if (tt->rn_flags & RNF_NORMAL) {
+#if !defined(RADIX_MPATH)
log(LOG_ERR,
"Non-unique normal route, mask not entered\n");
+#endif
return tt;
}
} else
* SUCH DAMAGE.
*/
/*
- * $Id: glue.h 5822 2010-03-23 10:39:56Z svn_magno $
+ * $Id: glue.h 11277 2012-06-10 17:44:15Z marta $
*
* glue code to adapt the FreeBSD version to linux and windows,
* userland and kernel.
#ifndef _GLUE_H
#define _GLUE_H
+
/*
* common definitions to allow portability
*/
#include <sys/ioctl.h>
#include <time.h>
#include <errno.h>
-#include <netinet/ether.h>
+#ifdef __linux__
+#include <netinet/ether.h> /* linux only 20111031 */
+#endif
#else /* KERNEL_MODULE, kernel headers */
#ifdef __linux__
+
#include <linux/version.h>
#define ifnet net_device /* remap */
#endif
/* on 2.6.22, msg.h requires spinlock_types.h */
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) && \
+/* XXX spinlock_type.h was introduced in 2.6.14 */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) && \
LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
#include <linux/spinlock_types.h>
#endif
/* on freebsd sys/socket.h pf specific */
#define NET_RT_IFLIST 3 /* survey interface list */
+#if defined(__linux__) || defined(__CYGWIN32__)
/* on freebsd net/if.h XXX used */
struct if_data {
/* ... */
int profhz; /* profiling clock frequency */
};
+/* no sin_len in sockaddr, we only remap in userland */
+#define sin_len sin_zero[0]
+
+#endif /* Linux/Win */
+
/*
* linux does not have a reentrant version of qsort,
* so we the FreeBSD stdlib version.
int sysctlbyname(const char *name, void *oldp, size_t *oldlenp,
void *newp, size_t newlen);
-/* no sin_len in sockaddr, we only remap in userland */
-#define sin_len sin_zero[0]
#else /* KERNEL_MODULE */
#include <linux/in6.h>
#endif
-/* skb_dst() was introduced from linux 2.6.31 */
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) // or 2.4.x
-#define skb_dst(_dummy) skb->dst
+/* skb_dst() and skb_dst_set() was introduced from linux 2.6.31 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst);
+struct dst_entry *skb_dst(const struct sk_buff *skb);
+#endif
+
+/* The struct flowi changed */
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) // check boundaries
+#define flow_daddr fl.u.ip4
+#else
+#define flow_daddr fl.nl_u.ip4_u
#endif
#endif /* __linux__ */
+/*
+ * Do not load prio_heap.h header because of conflicting names
+ * with our heap functions defined in include/netinet/ipfw/dn_heap.h
+ */
+#define _LINUX_PRIO_HEAP_H
+/*
+ * The following define prevent the ipv6.h header to be loaded.
+ * Starting from the 2.6.38 kernel the ipv6.h file, which is included
+ * by include/net/inetpeer.h in turn included by net/route.h
+ * include the system tcp.h file while we want to include
+ * our include/net/tcp.h instead.
+ */
+#ifndef _NET_IPV6_H
+#define _NET_IPV6_H
+static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2)
+{
+ memcpy(a1, a2, sizeof(struct in6_addr));
+}
+#endif /* _NET_IPV6_H */
+
#endif /* KERNEL_MODULE */
/*
#
+# $Id: Makefile 11277 2012-06-10 17:44:15Z marta $
+#
# GNUMakefile to build the userland part of ipfw on Linux and Windows
#
# enable extra debugging information
# Do not set with = or := so we can inherit from the caller
XOSARCH := $(shell uname)
OSARCH ?= $(XOSARCH)
-$(warning Building userland ipfw for $(VER) $(OSARCH))
+OSARCH := $(shell uname)
+OSARCH := $(findstring $(OSARCH),FreeBSD Linux Darwin)
+ifeq ($(OSARCH),)
+ OSARCH := Windows
+endif
-# utility to figure if gcc has a given option
-#################### extract from Kbuild.include
-# try-run
-# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
-# Exit code chooses option. "$$TMP" is can be used as temporary file and
-# is automatically cleaned up.
-try-run = $(shell set -e; \
- TMP="$(TMPOUT).$$$$.tmp"; \
- TMPO="$(TMPOUT).$$$$.o"; \
- if ($(1)) >/dev/null 2>&1; \
- then echo "$(2)"; \
- else echo "$(3)"; \
- fi; \
- rm -f "$$TMP" "$$TMPO")
-
-# cc-option
-# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
-
-cc-option = $(call try-run,\
- $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
-####################
+$(warning Building userland ipfw for $(VER) $(OSARCH))
#TCC=c:/tesi/tcc
ifeq ($(OSARCH),Linux)
EXTRA_CFLAGS += -D__BSD_VISIBLE
EXTRA_CFLAGS += -Werror
- EXTRA_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable)
-else # must be Cygwin ?
+ # Required by GCC 4.6
+ EXTRA_CFLAGS += -Wno-unused-but-set-variable
+endif
+ifeq ($(OSARCH),FreeBSD)
+ EXTRA_CFLAGS += -D__BSD_VISIBLE
+ EXTRA_CFLAGS += -Werror
+endif
+ifeq ($(OSARCH),Darwin)
+ EXTRA_CFLAGS += -D__BSD_VISIBLE
+ EXTRA_CFLAGS += -Werror
+endif
+# must be Cygwin ?
+ifeq ($(OSARCH),Windows)
ifeq ($(TCC),)
EXTRA_CFLAGS += -I/cygdrive/c/WinDDK/7600.16385.0/inc/ddk
EXTRA_CFLAGS += -I .
EFILES += netinet/ip_icmp.h
EFILES += sys/cdefs.h sys/wait.h
EFILES += sys/ioctl.h sys/socket.h
+
endif
# EXTRA_CFLAGS += -D_WIN32 # see who defines it
EXTRA_CFLAGS += -Dsetsockopt=wnd_setsockopt
CFLAGS += -I$(USRDIR)/include
LDFLAGS += -L$(USRDIR)/lib
else
- LDFLAGS += -L. -lws2_32
+ LDFLAGS += -L. -L$(TCC)/lib -lws2_32
endif
OBJS = ipfw2.o dummynet.o main.o ipv6.o qsort_r.o
*
* This software is provided ``AS IS'' without any warranties of any kind.
*
- * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/dummynet.c 203321 2010-01-31 21:39:25Z luigi $
+ * $FreeBSD: head/sbin/ipfw/dummynet.c 206843 2010-04-19 15:11:45Z luigi $
*
* dummynet support
*/
return ret;
}
+/* handle variable lenght structures moving back the pointer and fixing lenght */
+static void *
+o_compact(struct dn_id **o, int len, int real_length, int type)
+{
+ struct dn_id *ret = *o;
+
+ ret = O_NEXT(*o, -len);
+ oid_fill(ret, real_length, type, 0);
+ *o = O_NEXT(ret, real_length);
+ return ret;
+}
+
#if 0
static int
sort_q(void *arg, const void *pa, const void *pb)
id->proto,
id->src_ip, id->src_port,
id->dst_ip, id->dst_port);
-
- printf("BKT Prot ___Source IP/port____ "
- "____Dest. IP/port____ "
- "Tot_pkt/bytes Pkt/Byte Drp\n");
} else {
char buf[255];
printf("\n mask: %sproto: 0x%02x, flow_id: 0x%08x, ",
printf("%s/0x%04x -> ", buf, id->src_port);
inet_ntop(AF_INET6, &(id->dst_ip6), buf, sizeof(buf));
printf("%s/0x%04x\n", buf, id->dst_port);
+ }
+}
+static void
+print_header(struct ipfw_flow_id *id)
+{
+ if (!IS_IP6_FLOW_ID(id))
+ printf("BKT Prot ___Source IP/port____ "
+ "____Dest. IP/port____ "
+ "Tot_pkt/bytes Pkt/Byte Drp\n");
+ else
printf("BKT ___Prot___ _flow-id_ "
"______________Source IPv6/port_______________ "
"_______________Dest. IPv6/port_______________ "
"Tot_pkt/bytes Pkt/Byte Drp\n");
- }
}
static void
-list_flow(struct dn_flow *ni)
+list_flow(struct dn_flow *ni, int *print)
{
char buff[255];
struct protoent *pe = NULL;
struct in_addr ina;
struct ipfw_flow_id *id = &ni->fid;
+ if (*print) {
+ print_header(&ni->fid);
+ *print = 0;
+ }
pe = getprotobynumber(id->proto);
/* XXX: Should check for IPv4 flows */
printf("%3u%c", (ni->oid.id) & 0xff,
inet_ntop(AF_INET6, &(id->dst_ip6), buff, sizeof(buff)),
id->dst_port);
}
-
- /* Tcc relies on msvcrt.dll for printf, and
- * it does not support ANSI %llu syntax
- */
-#ifndef TCC
- printf("%4llu %8llu %2u %4u %3u\n",
- align_uint64(&ni->tot_pkts),
- align_uint64(&ni->tot_bytes),
+ pr_u64(&ni->tot_pkts, 4);
+ pr_u64(&ni->tot_bytes, 8);
+ printf("%2u %4u %3u\n",
ni->length, ni->len_bytes, ni->drops);
-#else
- /* XXX This should be printed correctly, but for some
- * weird reason, it is not. Making a printf for each
- * value is a workaround, until we don't undestand what's wrong
- */
- /*printf("%4I64u %8I64u %2u %4u %3u\n",
- align_uint64(&ni->tot_pkts),
- align_uint64(&ni->tot_bytes),
- ni->length, ni->len_bytes, ni->drops);*/
-
- printf("%4I64u ",align_uint64(&ni->tot_pkts));
- printf("%8I64u ",align_uint64(&ni->tot_bytes));
- printf("%2u ",ni->length);
- printf("%4u ",ni->len_bytes);
- printf("%3u\n",ni->drops);
-#endif
}
static void
list_pipes(struct dn_id *oid, struct dn_id *end)
{
char buf[160]; /* pending buffer */
+ int toPrint = 1; /* print header */
+
buf[0] = '\0';
-
for (; oid != end; oid = O_NEXT(oid, oid->len)) {
if (oid->len < sizeof(*oid))
errx(1, "invalid oid len %d\n", oid->len);
s->sched_nr,
s->name, s->flags, s->buckets, s->oid.id);
if (s->flags & DN_HAVE_MASK)
- print_mask(&s->sched_mask);
+ print_mask(&s->sched_mask);
}
break;
case DN_FLOW:
- list_flow((struct dn_flow *)oid);
+ list_flow((struct dn_flow *)oid, &toPrint);
break;
case DN_LINK: {
print_extra_delay_parms((struct dn_profile *)oid);
}
flush_buf(buf); // XXX does it really go here ?
- }
+ }
}
/*
#define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno
+/*
+ * Interpolate a set of proability-value tuples.
+ *
+ * This function takes as input a tuple of values <prob, value>
+ * and samples the interpolated curve described from the tuples.
+ *
+ * The user defined points are stored in the ponts structure.
+ * The number of points is stored in points_no.
+ * The user defined sampling value is stored in samples_no.
+ * The resulting samples are in the "samples" pointer.
+ *
+ * We assume that The last point for the '1' value of the
+ * probability should be defined. (XXX add checks for this)
+ *
+ * The input data are points and points_no.
+ * The output data are s (the array of s_no samples)
+ * and s_no (the number of samples)
+ *
+ */
+static void
+interpolate_samples(struct point *p, int points_no,
+ int *samples, int samples_no, const char *filename)
+{
+ double dy; /* delta on the y axis */
+ double y; /* current value of y */
+ double x; /* current value of x */
+ double m; /* the y slope */
+ int i; /* samples index */
+ int curr; /* points current index */
+
+ /* make sure that there are enough points. */
+ /* XXX Duplicated should be removed */
+ if (points_no < 3)
+ errx(EX_DATAERR, "%s too few samples, need at least %d",
+ filename, 3);
+
+ qsort(p, points_no, sizeof(struct point), compare_points);
+
+ dy = 1.0/samples_no;
+ y = 0;
+
+ for (i=0, curr = 0; i < samples_no; i++, y+=dy) {
+ /* This statment move the curr pointer to the next point
+ * skipping the points with the same x value. We are
+ * guaranteed to exit from the loop because the
+ * last possible value of y is stricly less than 1
+ * and the last possible value of the y points is 1 */
+ while ( y >= p[curr+1].prob ) curr++;
+
+ /* compute the slope of the curve */
+ m = (p[curr+1].delay - p[curr].delay) / (p[curr+1].prob - p[curr].prob);
+ /* compute the x value starting from the current point */
+ x = p[curr].delay + (y - p[curr].prob) * m;
+ samples[i] = x;
+ }
+
+ /* add the last sample */
+ samples[i] = p[curr+1].delay;
+}
+
+/*
+ * p is the link (old pipe)
+ * pf is the profile
+ */
static void
load_extra_delays(const char *filename, struct dn_profile *p,
struct dn_link *link)
char line[ED_MAX_LINE_LEN];
FILE *f;
int lineno = 0;
- int i;
int samples = -1;
double loss = -1.0;
p->link_nr = link->link_nr;
profile_name[0] = '\0';
+
f = fopen(filename, "r");
if (f == NULL)
err(EX_UNAVAILABLE, "fopen: %s", filename);
else
arg = s;
}
- if (name == NULL) /* empty line */
+
+ if ((name == NULL) || (*name == '#')) /* empty line */
continue;
- if (arg == NULL)
- errx(ED_EFMT("missing arg for %s"), name);
if (!strcasecmp(name, ED_TOK_SAMPLES)) {
if (samples > 0)
if (atoi(arg) <=0)
errx(ED_EFMT("invalid number of samples"));
samples = atoi(arg);
- if (samples>ED_MAX_SAMPLES_NO)
+ if (samples>=ED_MAX_SAMPLES_NO-1)
errx(ED_EFMT("too many samples, maximum is %d"),
- ED_MAX_SAMPLES_NO);
+ ED_MAX_SAMPLES_NO-1);
do_points = 0;
} else if (!strcasecmp(name, ED_TOK_BW)) {
char buf[IFNAMSIZ];
read_bandwidth(arg, &link->bandwidth, buf, sizeof(buf));
+ p->bandwidth = link->bandwidth;
} else if (!strcasecmp(name, ED_TOK_LOSS)) {
if (loss != -1.0)
errx(ED_EFMT("duplicated token: %s"), name);
loss = 1;
}
- /* make sure that there are enough points. */
- if (points_no < ED_MIN_SAMPLES_NO)
- errx(ED_EFMT("too few samples, need at least %d"),
- ED_MIN_SAMPLES_NO);
-
- qsort(points, points_no, sizeof(struct point), compare_points);
-
- /* interpolation */
- for (i = 0; i<points_no-1; ++i) {
- double y1 = points[i].prob * samples;
- double x1 = points[i].delay;
- double y2 = points[i+1].prob * samples;
- double x2 = points[i+1].delay;
+ interpolate_samples(points, points_no, p->samples, samples, filename);
- int ix = y1;
- int stop = y2;
-
- if (x1 == x2) {
- for (; ix<stop; ++ix)
- p->samples[ix] = x1;
- } else {
- double m = (y2-y1)/(x2-x1);
- double c = y1 - m*x1;
- for (; ix<stop ; ++ix)
- p->samples[ix] = (ix - c)/m;
- }
- }
- p->samples_no = samples;
+ p->samples_no = samples++;
p->loss_level = loss * samples;
strncpy(p->name, profile_name, sizeof(p->name));
}
struct ipfw_flow_id *mask = NULL;
int lmax;
uint32_t _foo = 0, *flags = &_foo , *buckets = &_foo;
+ size_t max_pf_size = sizeof(struct dn_profile) + ED_MAX_SAMPLES_NO * sizeof(int);
/*
* allocate space for 1 header,
*/
lmax = sizeof(struct dn_id); /* command header */
lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) +
- sizeof(struct dn_fs) + sizeof(struct dn_profile);
+ sizeof(struct dn_fs);
+ lmax += max_pf_size;
av++; ac--;
/* Pipe number */
break;
case TOK_PROFILE:
+ {
+ size_t real_length;
+
NEED((!pf), "profile already set");
NEED(p, "profile");
- {
NEED1("extra delay needs the file name\n");
- pf = o_next(&buf, sizeof(*pf), DN_PROFILE);
+
+ /* load the profile structure using the DN_API */
+ pf = o_next(&buf, max_pf_size, DN_PROFILE);
load_extra_delays(av[0], pf, p); //XXX can't fail?
+
+ /* compact the dn_id structure */
+ real_length = sizeof(struct dn_profile) +
+ pf->samples_no * sizeof(int);
+ o_compact(&buf, max_pf_size, real_length, DN_PROFILE);
--ac; ++av;
}
break;
}
if (fs) {
/* XXX accept a 0 scheduler to keep the default */
- if (fs->flags & DN_QSIZE_BYTES) {
- size_t len;
+ if (fs->flags & DN_QSIZE_BYTES) {
+ size_t len;
long limit;
len = sizeof(limit);
} else {
ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l);
if (ret != 0 || oid->id <= sizeof(*oid))
- goto done;
+ goto done;
buflen = oid->id + max_size;
oid->len = sizeof(*oid); /* restore */
}
.\"
-.\" $FreeBSD: head/sbin/ipfw/ipfw.8 205372 2010-03-20 14:42:16Z gavin $
+.\" $FreeBSD: head/sbin/ipfw/ipfw.8 211936 2010-08-28 16:32:01Z brucec $
.\"
-.Dd March 20, 2010
+.Dd July 27, 2010
.Dt IPFW 8
.Os
.Sh NAME
sysctl variable.
.It Cm ngtee Ar cookie
A copy of packet is diverted into netgraph, original
-packet is either accepted or continues with the next rule, depending on
-.Va net.inet.ip.fw.one_pass
-sysctl variable.
+packet continues with the next rule.
See
.Xr ng_ipfw 4
for more information on
.Pp
The SCHED_MASK is used to assign flows to one or more
scheduler instances, one for each
-value of the packet's 5-fuple after applying SCHED_MASK.
+value of the packet's 5-tuple after applying SCHED_MASK.
As an example, using ``src-ip 0xffffff00'' creates one instance
for each /24 destination subnet.
.Pp
to the chosen format.
The unit for delay is milliseconds.
Data points do not need to be sorted.
-Also, tne number of actual lines can be different
+Also, the number of actual lines can be different
from the value of the "samples" parameter:
.Nm
utility will sort and interpolate
interface is not available after processing by
.Nm dummynet
so those packets are dropped in the output path.
-Care should be taken to insure that link-local packets are not passed to
+Care should be taken to ensure that link-local packets are not passed to
.Nm dummynet .
.Sh CHECKLIST
Here are some important points to consider when designing your
*
* NEW command line interface for IP firewall facility
*
- * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipfw2.c 203369 2010-02-02 07:39:56Z luigi $
+ * $FreeBSD: head/sbin/ipfw/ipfw2.c 206843 2010-04-19 15:11:45Z luigi $
*/
#include <sys/types.h>
{ NULL, 0 } /* terminator */
};
-/*
- * The following is used to generate a printable argument for
- * 64-bit numbers, irrespective of platform alignment and bit size.
- * Because all the printf in this program use %llu as a format,
- * we just return an unsigned long long, which is larger than
- * we need in certain cases, but saves the hassle of using
- * PRIu64 as a format specifier.
- * We don't care about inlining, this is not performance critical code.
+/*
+ * Helper routine to print a possibly unaligned uint64_t on
+ * various platform. If width > 0, print the value with
+ * the desired width, followed by a space;
+ * otherwise, return the required width.
*/
-unsigned long long
-align_uint64(const uint64_t *pll)
+int
+pr_u64(uint64_t *pd, int width)
{
- uint64_t ret;
-
- bcopy (pll, &ret, sizeof(ret));
- return ret;
+#ifdef TCC
+#define U64_FMT "I64"
+#else
+#define U64_FMT "llu"
+#endif
+ uint64_t u;
+ unsigned long long d;
+
+ bcopy (pd, &u, sizeof(u));
+ d = u;
+ return (width > 0) ?
+ printf("%*" U64_FMT " ", width, d) :
+ snprintf(NULL, 0, "%" U64_FMT, d) ;
+#undef U64_FMT
}
void *
}
printf("%05u ", rule->rulenum);
- if (pcwidth>0 || bcwidth>0)
+ if (pcwidth > 0 || bcwidth > 0) {
+ pr_u64(&rule->pcnt, pcwidth);
+ pr_u64(&rule->bcnt, bcwidth);
+ }
- /* Tcc relies on msvcrt.dll for printf, and
- * it does not support ANSI %llu syntax
- */
-#ifndef TCC
- printf("%*llu %*llu ", pcwidth, align_uint64(&rule->pcnt),
- bcwidth, align_uint64(&rule->bcnt));
-#else
- printf("%*I64u %*I64u ", pcwidth, align_uint64(&rule->pcnt),
- bcwidth, align_uint64(&rule->bcnt));
-#endif
if (co.do_time == 2)
printf("%10u ", rule->timestamp);
else if (co.do_time == 1) {
}
bcopy(&d->rule, &rulenum, sizeof(rulenum));
printf("%05d", rulenum);
- if (pcwidth>0 || bcwidth>0)
-
- /* Tcc relies on msvcrt.dll for printf, and
- * it does not support ANSI %llu syntax
- */
-#ifndef TCC
- printf(" %*llu %*llu (%ds)", pcwidth,
- align_uint64(&d->pcnt), bcwidth,
- align_uint64(&d->bcnt), d->expire);
-#else
- /*printf(" %*I64u %*I64u (%ds)", pcwidth,
- align_uint64(&d->pcnt), bcwidth,
- align_uint64(&d->bcnt), d->expire);*/
-
- //XXX workaround here, for multiple I64 on the same printf
- printf(" %*I64u",pcwidth,align_uint64(&d->pcnt));
- printf(" %*I64u",bcwidth,align_uint64(&d->bcnt));
- printf(" (%ds)",d->expire);
-#endif
+ if (pcwidth > 0 || bcwidth > 0) {
+ printf(" ");
+ pr_u64(&d->pcnt, pcwidth);
+ pr_u64(&d->bcnt, bcwidth);
+ printf("(%ds)", d->expire);
+ }
switch (d->dyn_type) {
case O_LIMIT_PARENT:
printf(" PARENT %d", d->count);
} else if (_substrcmp(*av, "firewall") == 0) {
sysctlbyname("net.inet.ip.fw.enable", NULL, 0,
&which, sizeof(which));
+ sysctlbyname("net.inet6.ip6.fw.enable", NULL, 0,
+ &which, sizeof(which));
} else if (_substrcmp(*av, "one_pass") == 0) {
sysctlbyname("net.inet.ip.fw.one_pass", NULL, 0,
&which, sizeof(which));
continue;
/* packet counter */
-
- /* Tcc relies on msvcrt.dll for printf, and
- * it does not support ANSI %llu syntax
- */
-#ifndef TCC
- width = snprintf(NULL, 0, "%llu", align_uint64(&r->pcnt));
-#else
- width = snprintf(NULL, 0, "%I64u", align_uint64(&r->pcnt));
-#endif
+ width = pr_u64(&r->pcnt, 0);
if (width > pcwidth)
pcwidth = width;
/* byte counter */
-#ifndef TCC
- width = snprintf(NULL, 0, "%llu",align_uint64(&r->bcnt));
-#else
- width = snprintf(NULL, 0, "%I64u",align_uint64(&r->bcnt));
-#endif
+ width = pr_u64(&r->bcnt, 0);
if (width > bcwidth)
bcwidth = width;
}
if (set != co.use_set - 1)
continue;
}
-
- /* Tcc relies on msvcrt.dll for printf, and
- * it does not support ANSI %llu syntax
- */
-#ifndef TCC
- width = snprintf(NULL, 0, "%llu",align_uint64(&d->pcnt));
-#else
- width = snprintf(NULL, 0, "%I64u",align_uint64(&d->pcnt));
-#endif
+ width = pr_u64(&d->pcnt, 0);
if (width > pcwidth)
pcwidth = width;
-#ifndef TCC
- width = snprintf(NULL, 0, "%llu",align_uint64(&d->bcnt));
-#else
- width = snprintf(NULL, 0, "%I64u",align_uint64(&d->bcnt));
-#endif
+ width = pr_u64(&d->bcnt, 0);
if (width > bcwidth)
bcwidth = width;
}
*
* NEW command line interface for IP firewall facility
*
- * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipfw2.h 203280 2010-01-31 12:21:20Z luigi $
+ * $FreeBSD: head/sbin/ipfw/ipfw2.h 206843 2010-04-19 15:11:45Z luigi $
*/
/*
#define NEED(_p, msg) {if (!_p) errx(EX_USAGE, msg);}
#define NEED1(msg) {if (!(*av)) errx(EX_USAGE, msg);}
-unsigned long long align_uint64(const uint64_t *pll);
+int pr_u64(uint64_t *pd, int width);
/* memory allocation support */
void *safe_calloc(size_t number, size_t size);
* to make simpler further parsing.
*/
for (i=0; i<oldac; i++)
- l+=strlen(oldav[i]);
+ l += strlen(oldav[i]);
av_size = (oldac+1) * sizeof(char *) + l + oldac;
av = safe_calloc(av_size, 1);
# Runs every 5 minutes and clean ipfw expired rules
-# $Id: ipfw.cron 6069 2010-04-15 09:35:33Z marta $
*/5 * * * * root echo "super killexpired" | /vsys/ipfw-be root > /dev/null 2>&1
#
-# $Id: ipfwslice.spec 16174 2009-12-15 13:38:15Z marta $
-#
# TODO:
# restart crond
# modprobe ipfw_mod.ko (depmod ?)
-# $Id: planetlab-tags.mk 4533 2009-12-16 14:39:23Z luigi $
# These are good to build the ipfw modules from svn on kernels 2.6.22
linux-2.6-SVNBRANCH := 22
linux-2.6-SVNPATH := http://svn.planet-lab.org/svn/linux-2.6/tags/linux-2.6-22-39-1