From: Thierry Parmentelat Date: Mon, 11 Jun 2012 07:01:50 +0000 (+0200) Subject: integrated X-Git-Tag: ipfw-20120610-1~6 X-Git-Url: http://git.onelab.eu/?p=ipfw.git;a=commitdiff_plain;h=28a7fe9d930667786b902af6697c01eb87694173 integrated http://info.iet.unipi.it/~marta/dummynet/ipfw3-20120610.tar.gz should build under 3.3/f16 kernels as well as older ones --- diff --git a/Makefile b/Makefile index 21530fc..b2ed479 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# $Id$ +# $Id: Makefile 8654 2011-05-23 08:39:50Z marta $ # # Top level makefile for building ipfw kernel and userspace. # You can run it manually or also under the Planetlab build. @@ -62,6 +62,7 @@ snapshot: --exclude tcc-0.9.25-bsd \ --exclude original_passthru \ --exclude ipfw3.diff --exclude add_rules \ + --exclude test --exclude test_ \ ipfw3 ) bindist: @@ -119,4 +120,32 @@ planetlab_update: @echo "and commit with:" @echo "(cd /tmp/pl-tmp/pl/trunk; svn ci -m 'Update from the mail ipfw repo.')" +openwrt_release: + # create a temporary directory + $(eval TMPDIR := $(shell mktemp -d -p /tmp/ ipfw3_openwrt_XXXXX)) + # create the source destination directory + $(eval IPFWDIR := ipfw3-$(DATE)) + $(eval DSTDIR := $(TMPDIR)/$(IPFWDIR)) + mkdir $(DSTDIR) + # copy the package, clean objects and svn info + cp -r ./ipfw ./dummynet2 glue.h Makefile ./configuration README $(DSTDIR) + (cd $(DSTDIR); make -s distclean; find . -name .svn | xargs rm -rf) + (cd $(TMPDIR); tar czf $(IPFWDIR).tar.gz $(IPFWDIR)) + + # create the port files in /tmp/ipfw3-port + $(eval PORTDIR := $(TMPDIR)/ipfw3) + mkdir -p $(PORTDIR)/patches + # generate the Makefile, PKG_VERSION and PKG_MD5SUM + md5sum $(DSTDIR).tar.gz | cut -d ' ' -f 1 > $(TMPDIR)/md5sum + cat ./OPENWRT/Makefile | \ + sed s/PKG_VERSION:=/PKG_VERSION:=$(DATE)/ | \ + sed s/PKG_MD5SUM:=/PKG_MD5SUM:=`cat $(TMPDIR)/md5sum`/ \ + > $(PORTDIR)/Makefile + + @echo "" + @echo "The openwrt port is in $(TMPDIR)/ipfw3-port" + @echo "The source file should be copied to the public server:" + @echo "scp $(DSTDIR).tar.gz marta@info.iet.unipi.it:~marta/public_html/dummynet" + @echo "after this the temporary directory $(TMPDIR) can be removed." + install: diff --git a/README b/README index c65b91a..9c33bab 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ # -# $Id: README 6070 2010-04-15 11:58:21Z marta $ +# $Id: README 8977 2011-07-04 11:47:59Z luigi $ # This directory contains a port of ipfw and dummynet to Linux/OpenWrt @@ -78,18 +78,18 @@ Windows: =================== BUILD INSTRUCTIONS ========================== -***** Windows XP ****** +***** Windows (XPi, Windows7) ****** You can find a pre-built version in the binary/ subdirectory. To build your own version of the package you need: - - MSVC DDK available from ... - http://www.microsoft.com/whdc/DevTools/WDK/WDKpkg.mspx + - MSVC DDK available from + http://msdn.microsoft.com/en-us/windows/hardware/gg487463.aspx - optionally, DbgView if you want to see diagnostic http://technet.microsoft.com/en-us/sysinternals/bb896647.aspx - cygwin, http://www.cygwin.com/ with base packages, make, c compiler, possibly an editor - and subversion. + and subversion (suggest: tortoiseSvn) Edit Makefile in the root directory, and set configuration variables to match your current system (hard drive @@ -101,6 +101,10 @@ Windows: ipfw.sys (an NDIS intermediate filter driver) dummynet.inf and dummynet_m.inf (installer files) + Cross compilation of the userland side under FreeBSD is possible with + gmake TCC=`pwd`/tcc-0.9.25-bsd/win32 CC=`pwd`/tcc-0.9.25-bsd/win32/bin/wintcc + (wintcc is a custom version of tcc which produces Windows code) + ***** Windows crosscompilation for 64 bit using DDK ****** Edit root directory's Makefile and set target operating system @@ -249,19 +253,20 @@ nodes with dummynet emulation capabilities. sudo yum -y install subversion rpm-build rpm-devel m4 redhat-rpm-config make gcc # new build installation requires the gnupg package sudo yum -y install gnupg + # the linux kernel and the ipfw source can be fetched by git + sudo yum -y install git # create and move to a work directory mkdir -p test # extract a planetlab distribution to directory XYZ - (cd test; svn co http://svn.planet-lab.org/svn/build/trunk XYZ) - # copy the planetlab/*mk files here, overriding existing ones - cp planetlab/*mk test/XYZ + (cd test; git clone git://git.onelab.eu/build ./XYZ) # download the specfiles and do some patching. # Results are into SPEC/ (takes 5 minutes) - (cd test/XYZ; make stage1=true PLDISTRO=planetlab ) + (cd test/XYZ; make stage1=true PLDISTRO=onelab) # Building the slice code is fast, the root code takes longer # as it needs to rebuild the whole kernel - (cd test/XYZ; sudo make ipfwslice ipfwroot) + (cd test/XYZ; sudo make ipfwslice PLDISTRO=onelab) + (cd test/XYZ; sudo make ipfwroot PLDISTRO=onelab) The kernel dependency phase is a bit time consuming, but does not need to be redone if we are changing the ipfw sources only. diff --git a/configuration/README b/configuration/README new file mode 100644 index 0000000..778f7aa --- /dev/null +++ b/configuration/README @@ -0,0 +1,14 @@ +This directorty contains some ipfw configurations and a scripts +to safely change the firewall rules. + +The firewall configuration comes from the FreeBSD initial script. +The change_rules_linux.sh allows to change the ipfw rules and +in case os a misconfiguration which prevents to reach the remote +host, to restore the old ruleset. + +To configure the firewall behavior, edit the ipfw.conf file and +execute the ./change_rules_linux.sh script. + +The ipfw program executable should be located in /sbin (XXX) + +XXX seems we use something which is not compatible with dash diff --git a/configuration/change_rules.sh b/configuration/change_rules.sh new file mode 100755 index 0000000..8f23369 --- /dev/null +++ b/configuration/change_rules.sh @@ -0,0 +1,159 @@ +#!/bin/sh +# +# Copyright (c) 2000 Alexandre Peixoto +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD: src/share/examples/ipfw/change_rules.sh,v 1.6 2003/09/07 07:52:56 jmg Exp $ + +# Change ipfw(8) rules with safety guarantees for remote operation +# +# Invoke this script to edit ${firewall_script}. It will call ${EDITOR}, +# or vi(1) if the environment variable is not set, for you to edit +# ${firewall_script}, ask for confirmation, and then run +# ${firewall_script}. You can then examine the output of ipfw list and +# confirm whether you want the new version or not. +# +# If no answer is received in 30 seconds, the previous +# ${firewall_script} is run, restoring the old rules (this assumes ipfw +# flush is present in it). +# +# If the new rules are confirmed, they'll replace ${firewall_script} and +# the previous ones will be copied to ${firewall_script}.{date}. Mail +# will also be sent to root with a unified diff of the rule change. +# +# Unapproved rules are kept in ${firewall_script}.new, and you are +# offered the option of changing them instead of the present rules when +# you call this script. +# +# This script could be improved by using version control +# software. + +# XXX on linux /etc/rc.conf defines: +# firewall_type and firewall_script + +if [ -r /etc/defaults/rc.conf ]; then + . /etc/defaults/rc.conf + source_rc_confs +elif [ -r /etc/rc.conf ]; then + . /etc/rc.conf +fi + +EDITOR=${EDITOR:-/usr/bin/vi} +PAGER=${PAGER:-/usr/bin/more} + +# on linux the default mktemp invocation behavior +# is different, we should change the temporary file creation +tempfoo=`basename $0` +#TMPFILE=`mktemp -t ${tempfoo}` || exit 1 +TMPFILE=`mktemp -t ${tempfoo}.XXXXX` || exit 1 + +get_yes_no() { + while true + do + echo -n "$1 (Y/N) ? " + read -t 30 a + if [ $? != 0 ]; then + a="No"; + return; + fi + case $a in + [Yy]) a="Yes"; + return;; + [Nn]) a="No"; + return;; + *);; + esac + done +} + +restore_rules() { + nohup sh ${firewall_script} /dev/null 2>&1 + rm ${TMPFILE} + exit 1 +} + +case "${firewall_type}" in +[Cc][Ll][Ii][Ee][Nn][Tt]|\ +[Cc][Ll][Oo][Ss][Ee][Dd]|\ +[Oo][Pp][Ee][Nn]|\ +[Ss][Ii][Mm][Pp][Ll][Ee]|\ +[Uu][Nn][Kk][Nn][Oo][Ww][Nn]) + edit_file="${firewall_script}" + rules_edit=no + ;; +*) + if [ -r "${firewall_type}" ]; then + edit_file="${firewall_type}" + rules_edit=yes + fi + ;; +esac + +if [ -f ${edit_file}.new ]; then + get_yes_no "A new rules file already exists, do you want to use it" + [ $a = 'No' ] && cp ${edit_file} ${edit_file}.new +else + cp ${edit_file} ${edit_file}.new +fi + +trap restore_rules SIGHUP + +${EDITOR} ${edit_file}.new + +get_yes_no "Do you want to install the new rules" + +[ $a = 'No' ] && exit 1 + +cat < ${TMPFILE} 2>&1 +else + nohup sh ${firewall_script}.new \ + < /dev/null > ${TMPFILE} 2>&1 +fi +sleep 2; +get_yes_no "Would you like to see the resulting new rules" +[ $a = 'Yes' ] && ${PAGER} ${TMPFILE} +get_yes_no "Type y to keep the new rules" +[ $a != 'Yes' ] && restore_rules + +DATE=`date "+%Y%m%d%H%M"` +cp ${edit_file} ${edit_file}.$DATE +mv ${edit_file}.new ${edit_file} +cat </dev/null + fi + ${fwcmd} add deny $log ip from any to any + ;; + +[Cc][Ll][Oo][Ss][Ee][Dd]) + ${fwcmd} add 65000 deny ip from any to any + ;; +[Uu][Nn][Kk][Nn][Oo][Ww][Nn]) + ;; +*) + if [ -r "${firewall_type}" ]; then + ${fwcmd} ${firewall_flags} ${firewall_type} + fi + ;; +esac diff --git a/dummynet2/Makefile b/dummynet2/Makefile index 3d4a42b..caee67b 100644 --- a/dummynet2/Makefile +++ b/dummynet2/Makefile @@ -1,4 +1,4 @@ -# $Id: Makefile 5858 2010-03-24 16:16:19Z svn_magno $ +# $Id: Makefile 11277 2012-06-10 17:44:15Z marta $ # gnu Makefile to build linux/Windows module for ipfw+dummynet. # # The defaults are set to build without modifications on PlanetLab @@ -260,12 +260,15 @@ endif WARN := -O1 -Wall -Werror -DDEBUG_SPINLOCK -DDEBUG_MUTEXES # The main target + # Required by GCC 4.6 + ccflags-y += -Wno-unused-but-set-variable + # Required by kernel <= 2.6.22, ccflags-y is used on newer version LINUX_VERSION_CODE := $(shell grep LINUX_VERSION_CODE $(KERNELPATH)/include/linux/version.h|cut -d " " -f3) ifeq ($(shell if [ -z $(LINUX_VERSION_CODE) ] ; then echo "true"; fi),true) $(warning "---- Perhaps you miss a (cd $(KERNELPATH); make oldconfig; make prepare; make scripts)"); endif - ifeq ($(shell if [ $(LINUX_VERSION_CODE) -le 132630 ] ; then echo "true"; fi),true) + ifeq ($(shell if [ "$(LINUX_VERSION_CODE)" -le 132630 ] ; then echo "true"; fi),true) EXTRA_CFLAGS += $(ccflags-y) endif @@ -330,7 +333,7 @@ EFILES += netinet/udp_var.h EFILES += netinet6/ip6_var.h -EFILES += sys/_lock.h sys/_rwlock.h sys/_mutex.h sys/jail.h +EFILES += sys/_lock.h sys/_rwlock.h sys/rmlock.h sys/_mutex.h sys/jail.h EFILES += sys/condvar.h sys/eventhandler.h sys/domain.h EFILES += sys/limits.h sys/lock.h sys/mutex.h sys/priv.h EFILES += sys/proc.h sys/rwlock.h sys/socket.h sys/socketvar.h diff --git a/dummynet2/bsd_compat.c b/dummynet2/bsd_compat.c index 21d19b6..1397951 100644 --- a/dummynet2/bsd_compat.c +++ b/dummynet2/bsd_compat.c @@ -24,7 +24,7 @@ */ /* - * $Id: bsd_compat.c 5813 2010-03-22 18:05:13Z svn_magno $ + * $Id: bsd_compat.c 6320 2010-05-24 11:54:36Z svn_panicucci $ * * kernel variables and functions that are not available in linux. */ @@ -48,7 +48,8 @@ long tick = 1000; /* XXX is this 100000/hz ? */ int bootverbose = 0; struct timeval boottime; -int ip_defttl; +int ip_defttl = 64; /* XXX set default value */ +int max_linkhdr = 16; int fw_one_pass = 1; u_long in_ifaddrhmask; /* mask for hash table */ struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ @@ -365,6 +366,25 @@ fnmatch(const char *pattern, const char *string, int flags) return 1; /* no match */ } + +/* + * linux 2.6.33 defines these functions to access to + * skbuff internal structures. Define the missing + * function for the previous versions too. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) +inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) +{ + skb->dst = dst; +} + +inline struct dst_entry *skb_dst(const struct sk_buff *skb) +{ + return (struct dst_entry *)skb->dst; +} +#endif + + /* support for sysctl emulation. * XXX this is actually MI code that should be enabled also on openwrt */ diff --git a/dummynet2/dn_heap.c b/dummynet2/dn_heap.c index 390ae8d..a56d185 100644 --- a/dummynet2/dn_heap.c +++ b/dummynet2/dn_heap.c @@ -27,7 +27,7 @@ /* * Binary heap and hash tables, used in dummynet * - * $Id: dn_heap.c 5646 2010-03-08 12:48:30Z luigi $ + * $Id: dn_heap.c 7119 2010-07-15 13:51:07Z luigi $ */ #include @@ -442,109 +442,147 @@ dn_ht_entries(struct dn_ht *ht) return ht ? ht->entries : 0; } -/* lookup and optionally create or delete element */ +/* + * Helper function to scan a bucket in the hash table, it + * can only be called on a non-empty bucket for a valid table. + * + * In lookup and scan, consider ht->ht[i] as pointing to the tail + * of the queue (head is NEXTP(tail). The 'empty' value is irrelevant. + * While searching, start analysing p = head, end when p == tail. + * Note that 'tail' is a cache of the _original_ ht->ht[i] + * and is used to check for loop termination. If you remove + * it, you must also adjust 'p' when deleting the 'tail' element. + */ +#define NEXT(_h, _p) *((void **)((char *)(_p) + (_h)->ofs)) +static int +dn_ht_scan_body(struct dn_ht *ht, int *bucket, + int (*fn)(void *, void *), void *arg) +{ + int ret, found = 0, i = *bucket; + void *tail, *pp, *p, *nextp; + + pp = tail = ht->ht[i]; + do { + p = NEXT(ht, pp); + nextp = NEXT(ht, p); + ret = fn(p, arg); + if ((ret & DNHT_SCAN_DEL) == 0) { + pp = p; /* prepare for next loop */ + } else { + found++; + ht->entries--; + /* skip current element */ + if (pp != p) + /* pp == p implies p == tail */ + NEXT(ht, pp) = nextp; + if (p == tail) + ht->ht[i] = (pp != p) ? pp : NULL; + } + if (ret & DNHT_SCAN_END) { + /* Update ht->ht[i] before returning */ + ht->ht[i] = (ht->ht[i] == NULL) ? NULL : pp; + return found; + } + } while (p != tail); + + (*bucket)++; + return found; +} + +/* + * lookup and optionally create or delete element. + * This is an optimized version of the scan so it is coded + * inline. + */ void * dn_ht_find(struct dn_ht *ht, uintptr_t key, int flags, void *arg) { - int i; - void **pp, *p; + int i, found; + void *tail, *pp, *p; /* pp is the prev element, pp is current */ if (ht == NULL) /* easy on an empty hash */ return NULL; i = (ht->buckets == 1) ? 0 : (ht->hash(key, flags, arg) & ht->buckets); - for (pp = &ht->ht[i]; (p = *pp); pp = (void **)((char *)p + ht->ofs)) { - if (flags & DNHT_MATCH_PTR) { - if (key == (uintptr_t)p) - break; - } else if (ht->match(p, key, flags, arg)) /* found match */ - break; + pp = tail = ht->ht[i]; + if (tail) { /* non empty, try a lookup */ + do { + p = NEXT(ht, pp); + found = (flags & DNHT_MATCH_PTR) ? key == (uintptr_t)p : + ht->match(p, key, flags, arg); + if (!found) + continue; + if (flags & DNHT_REMOVE) { + ht->entries--; + if (p != pp) /* skip current element */ + NEXT(ht, pp) = NEXT(ht, p); + if (p == tail) + ht->ht[i] = (pp != p) ? pp : NULL; + } + return p; + } while ( (pp = p) != tail); } + /* not found */ + if ((flags & DNHT_INSERT) == 0) + return NULL; + p = ht->newh ? ht->newh(key, flags, arg) : (void *)key; if (p) { - if (flags & DNHT_REMOVE) { - /* link in the next element */ - *pp = *(void **)((char *)p + ht->ofs); - *(void **)((char *)p + ht->ofs) = NULL; - ht->entries--; - } - } else if (flags & DNHT_INSERT) { - // printf("%s before calling new, bucket %d ofs %d\n", - // __FUNCTION__, i, ht->ofs); - p = ht->newh ? ht->newh(key, flags, arg) : (void *)key; - // printf("%s newh returns %p\n", __FUNCTION__, p); - if (p) { - ht->entries++; - *(void **)((char *)p + ht->ofs) = ht->ht[i]; - ht->ht[i] = p; + ht->entries++; + if (tail == NULL) { + ht->ht[i] = NEXT(ht, p) = p; + } else { + NEXT(ht, p) = NEXT(ht, tail); + NEXT(ht, tail) = p; } } + return p; } /* - * do a scan with the option to delete the object. Extract next before - * running the callback because the element may be destroyed there. + * do a scan with the option to delete the object. + * Similar to the lookup, but the match function is different, + * and we extract 'next' before running the callback because + * the element may be destroyed there. */ int dn_ht_scan(struct dn_ht *ht, int (*fn)(void *, void *), void *arg) { - int i, ret, found = 0; - void **curp, *cur, *next; + int i, bucket, found = 0; if (ht == NULL || fn == NULL) return 0; for (i = 0; i <= ht->buckets; i++) { - curp = &ht->ht[i]; - while ( (cur = *curp) != NULL) { - next = *(void **)((char *)cur + ht->ofs); - ret = fn(cur, arg); - if (ret & DNHT_SCAN_DEL) { - found++; - ht->entries--; - *curp = next; - } else { - curp = (void **)((char *)cur + ht->ofs); - } - if (ret & DNHT_SCAN_END) + if (ht->ht[i] == NULL) + continue; /* empty bucket */ + bucket = i; + found += dn_ht_scan_body(ht, &bucket, fn, arg); + if (bucket == i) /* early exit */ return found; - } } return found; } /* - * Similar to dn_ht_scan(), except thah the scan is performed only + * Similar to dn_ht_scan(), except that the scan is performed only * in the bucket 'bucket'. The function returns a correct bucket number if - * the original is invalid + * the original is invalid. + * If the callback returns DNHT_SCAN_END, the function move the ht->ht[i] + * pointer to the last entry processed. Moreover, the bucket number passed + * by caller is decremented, because usually the caller increment it. */ int dn_ht_scan_bucket(struct dn_ht *ht, int *bucket, int (*fn)(void *, void *), void *arg) { - int i, ret, found = 0; - void **curp, *cur, *next; - if (ht == NULL || fn == NULL) return 0; - if (*bucket > ht->buckets) + if (*bucket > ht->buckets || *bucket < 0) *bucket = 0; - i = *bucket; - - curp = &ht->ht[i]; - while ( (cur = *curp) != NULL) { - next = *(void **)((char *)cur + ht->ofs); - ret = fn(cur, arg); - if (ret & DNHT_SCAN_DEL) { - found++; - ht->entries--; - *curp = next; - } else { - curp = (void **)((char *)cur + ht->ofs); - } - if (ret & DNHT_SCAN_END) - return found; - } - return found; + if (ht->ht[*bucket] == NULL) { + (*bucket)++; + return 0; + } else + return dn_ht_scan_body(ht, bucket, fn, arg); } - diff --git a/dummynet2/dn_sched_prio.c b/dummynet2/dn_sched_prio.c index 048945f..72af5da 100755 --- a/dummynet2/dn_sched_prio.c +++ b/dummynet2/dn_sched_prio.c @@ -25,7 +25,7 @@ */ /* - * $Id: dn_sched_prio.c 5797 2010-03-21 16:31:08Z luigi $ + * $Id: dn_sched_prio.c 6338 2010-05-26 15:06:34Z svn_panicucci $ */ #ifdef _KERNEL #include @@ -187,7 +187,7 @@ prio_new_queue(struct dn_queue *q) } static int -prio_free_queue(struct dn_queue *q) +prio_free_queue(struct dn_queue *q, int safe) { int prio = q->fs->fs.par[0]; struct prio_si *si = (struct prio_si *)(q->_si + 1); diff --git a/dummynet2/dn_sched_qfq.c b/dummynet2/dn_sched_qfq.c index 13bf659..eddb472 100644 --- a/dummynet2/dn_sched_qfq.c +++ b/dummynet2/dn_sched_qfq.c @@ -25,7 +25,7 @@ */ /* - * $Id: dn_sched_qfq.c 5621 2010-03-04 16:51:27Z luigi $ + * $Id: dn_sched_qfq.c 6552 2010-06-15 11:24:59Z svn_panicucci $ */ #ifdef _KERNEL @@ -61,7 +61,7 @@ typedef unsigned long bitmap; * bitmaps ops are critical. Some linux versions have __fls * and the bitmap ops. Some machines have ffs */ -#if defined(_WIN32) +#if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24)) int fls(unsigned int n) { int i = 0; @@ -71,7 +71,7 @@ int fls(unsigned int n) } #endif -#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) +#if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24)) static inline unsigned long __fls(unsigned long word) { return fls(word) - 1; @@ -319,7 +319,7 @@ qfq_new_queue(struct dn_queue *_q) /* remove an empty queue */ static int -qfq_free_queue(struct dn_queue *_q) +qfq_free_queue(struct dn_queue *_q, int safe) { struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1); struct qfq_class *cl = (struct qfq_class *)_q; diff --git a/dummynet2/dn_sched_rr.c b/dummynet2/dn_sched_rr.c index 59c36ac..2b58cf0 100644 --- a/dummynet2/dn_sched_rr.c +++ b/dummynet2/dn_sched_rr.c @@ -25,7 +25,7 @@ */ /* - * $Id: dn_sched_rr.c 5621 2010-03-04 16:51:27Z luigi $ + * $Id: dn_sched_rr.c 6338 2010-05-26 15:06:34Z svn_panicucci $ */ #ifdef _KERNEL @@ -94,7 +94,7 @@ rr_remove_head(struct rr_si *si) if (si->head == NULL) return; /* empty queue */ si->head->status = 0; - + if (si->head == si->tail) { si->head = si->tail = NULL; return; @@ -111,7 +111,7 @@ static inline void remove_queue_q(struct rr_queue *q, struct rr_si *si) { struct rr_queue *prev; - + if (q->status != 1) return; if (q == si->head) { @@ -141,7 +141,7 @@ next_pointer(struct rr_si *si) si->tail = si->tail->qnext; } -static int +static int rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) { struct rr_si *si; @@ -154,7 +154,7 @@ rr_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) return 0; } - /* If reach this point, queue q was idle */ + /* If reach this point, queue q was idle */ si = (struct rr_si *)(_si + 1); rrq = (struct rr_queue *)q; @@ -264,11 +264,14 @@ rr_new_queue(struct dn_queue *_q) } static int -rr_free_queue(struct dn_queue *_q) +rr_free_queue(struct dn_queue *_q, int safe) { struct rr_queue *q = (struct rr_queue *)_q; ND("called"); + if (safe) /* Delete only if status == 0 */ + return q->status; + if (q->status == 1) { struct rr_si *si = (struct rr_si *)(_q->_si + 1); remove_queue_q(q, si); diff --git a/dummynet2/dn_sched_wf2q.c b/dummynet2/dn_sched_wf2q.c index e221989..c42969e 100644 --- a/dummynet2/dn_sched_wf2q.c +++ b/dummynet2/dn_sched_wf2q.c @@ -26,7 +26,7 @@ */ /* - * $Id: dn_sched_wf2q.c 5621 2010-03-04 16:51:27Z luigi $ + * $Id: dn_sched_wf2q.c 6338 2010-05-26 15:06:34Z svn_panicucci $ */ #ifdef _KERNEL @@ -125,7 +125,7 @@ idle_check(struct wf2qp_si *si, int n, int force) } } -static int +static int wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) { struct dn_fsk *fs = q->fs; @@ -140,7 +140,7 @@ wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) return 0; } - /* If reach this point, queue q was idle */ + /* If reach this point, queue q was idle */ alg_fq = (struct wf2qp_queue *)q; if (DN_KEY_LT(alg_fq->F, alg_fq->S)) { @@ -314,13 +314,18 @@ wf2qp_new_queue(struct dn_queue *_q) * of weights. */ static int -wf2qp_free_queue(struct dn_queue *q) +wf2qp_free_queue(struct dn_queue *q, int safe) { struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q; struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1); - + if (alg_fq->S >= alg_fq->F + 1) return 0; /* nothing to do, not in any heap */ + + /* queue is in a scheduler heap */ + if (safe) /* do not delete in safe mode */ + return 1; + si->wsum -= q->fs->fs.par[0]; if (si->wsum > 0) si->inv_wsum = ONE_FP/si->wsum; diff --git a/dummynet2/include/net/pfil.h b/dummynet2/include/net/pfil.h index 19a3d9c..af26a79 100644 --- a/dummynet2/include/net/pfil.h +++ b/dummynet2/include/net/pfil.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include struct mbuf; struct ifnet; @@ -49,9 +49,9 @@ struct inpcb; */ struct packet_filter_hook { TAILQ_ENTRY(packet_filter_hook) pfil_link; - int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *); + int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int, + struct inpcb *); void *pfil_arg; - int pfil_flags; }; #define PFIL_IN 0x00000001 @@ -72,7 +72,7 @@ struct pfil_head { #if defined( __linux__ ) || defined( _WIN32 ) rwlock_t ph_mtx; #else - struct rwlock ph_mtx; + struct rmlock ph_lock; #endif union { u_long phu_val; @@ -83,30 +83,33 @@ struct pfil_head { LIST_ENTRY(pfil_head) ph_list; }; +int pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, + int, struct inpcb *), void *, int, struct pfil_head *); +int pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, + int, struct inpcb *), void *, int, struct pfil_head *); int pfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); -int pfil_add_hook(int (*func)(void *, struct mbuf **, - struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *); -int pfil_remove_hook(int (*func)(void *, struct mbuf **, - struct ifnet *, int, struct inpcb *), void *, int, struct pfil_head *); - int pfil_head_register(struct pfil_head *); int pfil_head_unregister(struct pfil_head *); struct pfil_head *pfil_head_get(int, u_long); #define PFIL_HOOKED(p) ((p)->ph_nhooks > 0) -#define PFIL_RLOCK(p) rw_rlock(&(p)->ph_mtx) -#define PFIL_WLOCK(p) rw_wlock(&(p)->ph_mtx) -#define PFIL_RUNLOCK(p) rw_runlock(&(p)->ph_mtx) -#define PFIL_WUNLOCK(p) rw_wunlock(&(p)->ph_mtx) +#define PFIL_LOCK_INIT(p) \ + rm_init_flags(&(p)->ph_lock, "PFil hook read/write mutex", RM_RECURSE) +#define PFIL_LOCK_DESTROY(p) rm_destroy(&(p)->ph_lock) +#define PFIL_RLOCK(p, t) rm_rlock(&(p)->ph_lock, (t)) +#define PFIL_WLOCK(p) rm_wlock(&(p)->ph_lock) +#define PFIL_RUNLOCK(p, t) rm_runlock(&(p)->ph_lock, (t)) +#define PFIL_WUNLOCK(p) rm_wunlock(&(p)->ph_lock) #define PFIL_LIST_LOCK() mtx_lock(&pfil_global_lock) #define PFIL_LIST_UNLOCK() mtx_unlock(&pfil_global_lock) static __inline struct packet_filter_hook * pfil_hook_get(int dir, struct pfil_head *ph) { + if (dir == PFIL_IN) return (TAILQ_FIRST(&ph->ph_in)); else if (dir == PFIL_OUT) diff --git a/dummynet2/include/net/radix.h b/dummynet2/include/net/radix.h index a69b844..e5b8ecc 100644 --- a/dummynet2/include/net/radix.h +++ b/dummynet2/include/net/radix.h @@ -105,15 +105,24 @@ typedef int walktree_f_t(struct radix_node *, void *); struct radix_node_head { struct radix_node *rnh_treetop; + int rnh_addrsize; /* permit, but not require fixed keys */ + int rnh_pktsize; /* permit, but not require fixed keys */ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ (void *v, void *mask, struct radix_node_head *head, struct radix_node nodes[]); + struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ + (void *v, void *mask, + struct radix_node_head *head, struct radix_node nodes[]); struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ (void *v, void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ + (void *v, void *mask, struct radix_node_head *head); struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ (void *v, struct radix_node_head *head); struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ (void *v, void *mask, struct radix_node_head *head); + struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ + (void *v, struct radix_node_head *head); int (*rnh_walktree) /* traverse tree */ (struct radix_node_head *head, walktree_f_t *f, void *w); int (*rnh_walktree_from) /* traverse tree below a */ diff --git a/dummynet2/include/netinet/ip.h b/dummynet2/include/netinet/ip.h index e88551f..c9da4d8 100644 --- a/dummynet2/include/netinet/ip.h +++ b/dummynet2/include/netinet/ip.h @@ -4,7 +4,8 @@ #define LITTLE_ENDIAN 1234 #define BIG_ENDIAN 4321 #if defined(__BIG_ENDIAN) -#error we are in bigendian +#define BYTE_ORDER BIG_ENDIAN +//#warning we are in bigendian #elif defined(__LITTLE_ENDIAN) //#warning we are in littleendian #define BYTE_ORDER LITTLE_ENDIAN @@ -43,4 +44,6 @@ struct ip { struct in_addr ip_src,ip_dst; /* source and dest address */ } __packed __aligned(4); +#define IPTOS_LOWDELAY 0x10 + #endif /* _NETINET_IP_H_ */ diff --git a/dummynet2/include/netinet/ip_dummynet.h b/dummynet2/include/netinet/ip_dummynet.h index 6795d7c..961f850 100644 --- a/dummynet2/include/netinet/ip_dummynet.h +++ b/dummynet2/include/netinet/ip_dummynet.h @@ -88,7 +88,7 @@ enum { DN_LAST, }; - + enum { /* subtype for schedulers, flowset and the like */ DN_SCHED_UNKNOWN = 0, DN_SCHED_FIFO = 1, @@ -132,10 +132,10 @@ struct dn_link { */ struct dn_fs { struct dn_id oid; - uint32_t fs_nr; /* the flowset number */ - uint32_t flags; /* userland flags */ - int qsize; /* queue size in slots or bytes */ - int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ + uint32_t fs_nr; /* the flowset number */ + uint32_t flags; /* userland flags */ + int qsize; /* queue size in slots or bytes */ + int32_t plr; /* PLR, pkt loss rate (2^31-1 means 100%) */ uint32_t buckets; /* buckets used for the queue hash table */ struct ipfw_flow_id flow_mask; @@ -149,10 +149,10 @@ struct dn_fs { * weight and probabilities are in the range 0..1 represented * in fixed point arithmetic with SCALE_RED decimal bits. */ -#define SCALE_RED 16 -#define SCALE(x) ( (x) << SCALE_RED ) -#define SCALE_VAL(x) ( (x) >> SCALE_RED ) -#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) +#define SCALE_RED 16 +#define SCALE(x) ( (x) << SCALE_RED ) +#define SCALE_VAL(x) ( (x) >> SCALE_RED ) +#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) int w_q ; /* queue weight (scaled) */ int max_th ; /* maximum threshold for queue (scaled) */ int min_th ; /* minimum threshold for queue (scaled) */ @@ -200,17 +200,15 @@ struct dn_sch { struct dn_profile { struct dn_id oid; /* fields to simulate a delay profile */ -#define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int link_nr; - int loss_level; - int bandwidth; // XXX use link bandwidth? - int samples_no; /* actual length of samples[] */ - int samples[ED_MAX_SAMPLES_NO]; /* may be shorter */ +#define ED_MAX_NAME_LEN 32 + char name[ED_MAX_NAME_LEN]; + int link_nr; + int loss_level; + int bandwidth; // XXX use link bandwidth? + int samples_no; /* actual len of samples[] */ + int samples[0]; /* may be shorter */ }; - - /* * Overall structure of dummynet diff --git a/dummynet2/include/netinet/ip_fw.h b/dummynet2/include/netinet/ip_fw.h index d037b45..5e77119 100644 --- a/dummynet2/include/netinet/ip_fw.h +++ b/dummynet2/include/netinet/ip_fw.h @@ -506,8 +506,8 @@ struct ipfw_flow_id { uint32_t src_ip; uint16_t dst_port; uint16_t src_port; - uint8_t fib; - uint8_t proto; + uint8_t fib; + uint8_t proto; uint8_t _flags; /* protocol-specific flags */ uint8_t addr_type; /* 4=ip4, 6=ip6, 1=ether ? */ struct in6_addr dst_ip6; diff --git a/dummynet2/include/netinet/ipfw/dn_sched.h b/dummynet2/include/netinet/ipfw/dn_sched.h index 3c75b64..a755e86 100644 --- a/dummynet2/include/netinet/ipfw/dn_sched.h +++ b/dummynet2/include/netinet/ipfw/dn_sched.h @@ -119,6 +119,10 @@ struct dn_alg { * free_queue actions related to a queue removal, e.g. undo * all the above. If the queue has data in it, also remove * from the scheduler. This can e.g. happen during a reconfigure. + * If safe == 1 remove the queue only if the scheduler no longer + * need it, otherwise delete it even if the scheduler is using + * it. Usually, the flag safe is set when the drain routine is + * running to delete idle queues. */ int (*enqueue)(struct dn_sch_inst *, struct dn_queue *, struct mbuf *); @@ -131,7 +135,7 @@ struct dn_alg { int (*new_fsk)(struct dn_fsk *f); int (*free_fsk)(struct dn_fsk *f); int (*new_queue)(struct dn_queue *q); - int (*free_queue)(struct dn_queue *q); + int (*free_queue)(struct dn_queue *q, int safe); /* run-time fields */ int ref_count; /* XXX number of instances in the system */ @@ -166,14 +170,27 @@ dn_dequeue(struct dn_queue *q) if (m == NULL) return NULL; q->mq.head = m->m_nextpkt; + + /* Update stats for the queue */ q->ni.length--; q->ni.len_bytes -= m->m_pkthdr.len; + /* When the queue becomes idle, update idle_time (used by RED) + * and also update the count of idle queues (for garbage collection). + */ + if (q->ni.length == 0) { + dn_cfg.idle_queue++; + q->q_time = dn_cfg.curr_time; + } if (q->_si) { - q->_si->ni.length--; - q->_si->ni.len_bytes -= m->m_pkthdr.len; + struct dn_flow *ni = &(q->_si->ni); + /* update stats for the scheduler instance, and keep track + * of idle scheduler instances if needed + */ + ni->length--; + ni->len_bytes -= m->m_pkthdr.len; + if (ni->length == 0) + dn_cfg.idle_si++; } - if (q->ni.length == 0) /* queue is now idle */ - q->q_time = dn_cfg.curr_time; return m; } diff --git a/dummynet2/include/netinet/ipfw/ip_dn_private.h b/dummynet2/include/netinet/ipfw/ip_dn_private.h index 47cc5e8..ecb4fe2 100644 --- a/dummynet2/include/netinet/ipfw/ip_dn_private.h +++ b/dummynet2/include/netinet/ipfw/ip_dn_private.h @@ -49,10 +49,6 @@ MALLOC_DECLARE(M_DUMMYNET); -#ifndef FREE_PKT -#define FREE_PKT(m) m_freem(m) -#endif - #ifndef __linux__ #define div64(a, b) ((int64_t)(a) / (int64_t)(b)) #endif @@ -97,6 +93,17 @@ set_oid(struct dn_id *o, int type, int len) o->subtype = 0; }; +uint64_t readTSC (void); +/* + * see if tsc (ot other timer) is supported. + * - FreeBSD has rdtsc macro for i386 and amd64 + * - Linux has rdtscll and/or rdtsc (also for openWRT patched kernel source) + * - Windows has KeQueryPerformanceCounter() function that use tsc or other + * timer + */ +#if defined(rdtscll) || defined(rdtsc) || defined(_WIN32) +#define HAVE_TSC +#endif /* * configuration and global data for a dummynet instance * @@ -129,7 +136,21 @@ struct dn_parms { int queue_count; /* ticks and other stuff */ - uint64_t curr_time; + uint64_t curr_time; /* in ticks */ + + /* + * Variables to manage the time spent in the drain routines. + * max_drain is max the fraction of a tick (0..100) to be used + * for draining. + * We also need some variables to store the average number of + * timecounter ticks between calls to the periodic task, etc. + */ + int drain_ratio; + uint64_t cycle_task_new; /* TSC when dummynet_task() starts */ + uint64_t cycle_task_old; /* TSC when prev. dummynet_task() starts */ + uint64_t cycle_task; + uint64_t cycle_task_avg; /* Moving average of cicle_task */ + /* flowsets and schedulers are in hash tables, with 'hash_size' * buckets. fshash is looked up at every packet arrival * so better be generous if we expect many entries. @@ -140,16 +161,33 @@ struct dn_parms { struct dn_fsk_head fsu; /* list of unlinked flowsets */ struct dn_alg_head schedlist; /* list of algorithms */ - /* Store the fs/sch to scan when draining. The value is the - * bucket number of the hash table. Expire can be disabled - * with net.inet.ip.dummynet.expire=0, or it happens every - * expire ticks. - **/ - int drain_fs; - int drain_sch; - uint32_t expire; - uint32_t expire_cycle; /* tick count */ - + /* Counter of idle objects -- used by drain routine + * We scan when idle_queue (or idle_si) > expire_object. + * The drain routine is called every 'expire' cycles (the counter + * used is expire_cycle). + * We can disable the expire routine by setting expire to 0. + * An object is kept alive for at least object_idle_tick after it + * becomes idle. During the scan, we count the number of objects + * that are idle but not ready in 'idle_si_wait' and 'idle_queue_wait' + */ + int idle_queue; + int idle_queue_wait; /* idle but not expired yet */ + int idle_si; + int idle_si_wait; /* idle but not expired yet */ + uint32_t expire_object; /* threshold for expires */ + uint32_t expire; /* how often to expire */ + uint32_t expire_cycle; + uint32_t object_idle_tick; /* lifetime of objs */ + uint32_t expire_object_examined; /* Burst of object examined */ + + /* drain_fs and drain_sch point to the next bucket to scan when + * draining. + */ + uint32_t drain_fs; + uint32_t drain_sch; + + int init_done; + /* if the upper half is busy doing something long, * can set the busy flag and we will enqueue packets in * a queue for later processing. @@ -310,37 +348,14 @@ struct dn_sch_inst { * The counter is incremented or decremented when * a reference from the queue is created or deleted. * It is used to make sure that a scheduler instance can be safely - * deleted by the drain routine. See notes below. + * deleted by the drain routine. */ int q_count; }; -/* - * NOTE about object drain. - * The system will automatically (XXX check when) drain queues and - * scheduler instances when they are idle. - * A queue is idle when it has no packets; an instance is idle when - * it is not in the evheap heap, and the corresponding delay line is empty. - * A queue can be safely deleted when it is idle because of the scheduler - * function xxx_free_queue() will remove any references to it. - * An instance can be only deleted when no queues reference it. To be sure - * of that, a counter (q_count) stores the number of queues that are pointing - * to the instance. - * - * XXX - * Order of scan: - * - take all flowset in a bucket for the flowset hash table - * - take all queues in a bucket for the flowset - * - increment the queue bucket - * - scan next flowset bucket - * Nothing is done if a bucket contains no entries. - * - * The same schema is used for sceduler instances - */ - -/* kernel-side flags. Linux has DN_DELETE in fcntl.h +/* kernel-side flags. Linux has DN_DELETE in fcntl.h */ enum { /* 1 and 2 are reserved for the SCAN flags */ @@ -349,18 +364,20 @@ enum { DN_DETACH = 0x0010, DN_ACTIVE = 0x0020, /* object is in evheap */ DN_F_DLINE = 0x0040, /* object is a delay line */ - DN_F_SCHI = 0x00C0, /* object is a sched.instance */ + DN_DEL_SAFE = 0x0080, /* delete a queue only if no longer needed + * by scheduler */ DN_QHT_IS_Q = 0x0100, /* in flowset, qht is a single queue */ }; extern struct dn_parms dn_cfg; +//VNET_DECLARE(struct dn_parms, _base_dn_cfg); +//#define dn_cfg VNET(_base_dn_cfg) int dummynet_io(struct mbuf **, int , struct ip_fw_args *); void dummynet_task(void *context, int pending); void dn_reschedule(void); -struct dn_queue *ipdn_q_find(struct dn_fsk *, struct dn_sch_inst *, - struct ipfw_flow_id *); +struct dn_queue *ipdn_q_find(struct dn_fsk *, struct ipfw_flow_id *); struct dn_sch_inst *ipdn_si_find(struct dn_schk *, struct ipfw_flow_id *); /* diff --git a/dummynet2/include/netinet/ipfw/ip_fw_private.h b/dummynet2/include/netinet/ipfw/ip_fw_private.h index 5bf3416..334face 100644 --- a/dummynet2/include/netinet/ipfw/ip_fw_private.h +++ b/dummynet2/include/netinet/ipfw/ip_fw_private.h @@ -127,7 +127,7 @@ enum { PROTO_IPV4 = 0x08, PROTO_IPV6 = 0x10, PROTO_IFB = 0x0c, /* layer2 + ifbridge */ - /* PROTO_OLDBDG = 0x14, unused, old bridge */ + /* PROTO_OLDBDG = 0x14, unused, old bridge */ }; /* wrapper for freeing a packet, in case we need to do more work */ @@ -218,8 +218,8 @@ struct ip_fw_chain { LIST_HEAD(nat_list, cfg_nat) nat; /* list of nat entries */ struct radix_node_head *tables[IPFW_TABLES_MAX]; #if defined( __linux__ ) || defined( _WIN32 ) - spinlock_t rwmtx; - spinlock_t uh_lock; + spinlock_t rwmtx; + spinlock_t uh_lock; #else struct rwlock rwmtx; struct rwlock uh_lock; /* lock for upper half */ diff --git a/dummynet2/include/netinet/udp.h b/dummynet2/include/netinet/udp.h index aed3099..cd75bd1 100644 --- a/dummynet2/include/netinet/udp.h +++ b/dummynet2/include/netinet/udp.h @@ -45,4 +45,23 @@ struct udphdr { u_short uh_sum; /* udp checksum */ }; +/* + * User-settable options (used with setsockopt). + */ +#define UDP_ENCAP 0x01 + + +/* + * UDP Encapsulation of IPsec Packets options. + */ +/* Encapsulation types. */ +#define UDP_ENCAP_ESPINUDP_NON_IKE 1 /* draft-ietf-ipsec-nat-t-ike-00/01 */ +#define UDP_ENCAP_ESPINUDP 2 /* draft-ietf-ipsec-udp-encaps-02+ */ + +/* Default ESP in UDP encapsulation port. */ +#define UDP_ENCAP_ESPINUDP_PORT 500 + +/* Maximum UDP fragment size for ESP over UDP. */ +#define UDP_ENCAP_ESPINUDP_MAXFRAGLEN 552 + #endif diff --git a/dummynet2/include/sys/mbuf.h b/dummynet2/include/sys/mbuf.h index a752ebd..e65bbb6 100644 --- a/dummynet2/include/sys/mbuf.h +++ b/dummynet2/include/sys/mbuf.h @@ -164,10 +164,20 @@ m_tag_delete(struct mbuf *m, struct m_tag *t) static __inline struct m_tag * m_tag_locate(struct mbuf *m, u_int32_t n, int x, struct m_tag *t) { - return NULL; + struct m_tag *tag; + + tag = m_tag_first(m); + if (tag == NULL) + return NULL; + + if (tag->m_tag_cookie != n || tag->m_tag_id != x) + return NULL; + else + return tag; }; #define M_SETFIB(_m, _fib) /* nothing on linux */ + static __inline void m_freem(struct mbuf *m) { @@ -187,6 +197,29 @@ m_freem(struct mbuf *m) #define M_GETFIB(_m) 0 +/* macro used to create a new mbuf */ +#define MT_DATA 1 /* dynamic (data) allocation */ +#define MSIZE 256 /* size of an mbuf */ +#define MGETHDR(_m, _how, _type) ((_m) = m_gethdr((_how), (_type))) + +/* allocate and init a new mbuf using the same structure of FreeBSD */ +static __inline struct mbuf * +m_gethdr(int how, short type) +{ + struct mbuf *m; + + m = malloc(MSIZE, M_IPFW, M_NOWAIT); + + if (m == NULL) { + return m; + } + + /* here we have MSIZE - sizeof(struct mbuf) available */ + m->m_data = m + 1; + + return m; +} + #endif /* __linux__ || _WIN32 */ /* diff --git a/dummynet2/include/sys/systm.h b/dummynet2/include/sys/systm.h index db8ef7a..e98335e 100644 --- a/dummynet2/include/sys/systm.h +++ b/dummynet2/include/sys/systm.h @@ -15,11 +15,16 @@ #endif #define callout timer_list static __inline int -callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg) +callout_reset_on(struct callout *co, int ticks, void (*fn)(void *), void *arg, int cpu) { co->expires = jiffies + ticks; co->function = (void (*)(unsigned long))fn; co->data = (unsigned long)arg; + /* + * Linux 2.6.31 and above has add_timer_on(co, cpu), + * otherwise add_timer() always schedules a callout on the same + * CPU used the first time, so we don't need more. + */ add_timer(co); return 0; } @@ -71,14 +76,14 @@ VOID ipfw_dpc( * timer is called only once a sec, this won't hurt that much. */ static __inline int -callout_reset(struct callout *co, int ticks, void (*fn)(void *), void *arg) +callout_reset_on(struct callout *co, int ticks, void (*fn)(void *), void *arg, int cpu) { if(fn == &dummynet) { if(co->dpcinitialized == 0) { KeInitializeDpc(&co->timerdpc, dummynet_dpc, NULL); - KeSetTargetProcessorDpc(&co->timerdpc, 0); + KeSetTargetProcessorDpc(&co->timerdpc, cpu); co->dpcinitialized = 1; } } diff --git a/dummynet2/ip_dn_io.c b/dummynet2/ip_dn_io.c index 3450466..6672424 100644 --- a/dummynet2/ip_dn_io.c +++ b/dummynet2/ip_dn_io.c @@ -45,8 +45,11 @@ __FBSDID("$FreeBSD: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c 203321 201 #include #include #include + #include /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ #include +#include + #include #include /* ip_len, ip_off */ #include /* ip_output(), IP_FORWARDING */ @@ -69,6 +72,7 @@ __FBSDID("$FreeBSD: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_dn_io.c 203321 201 */ struct dn_parms dn_cfg; +//VNET_DEFINE(struct dn_parms, _base_dn_cfg); static long tick_last; /* Last tick duration (usec). */ static long tick_delta; /* Last vs standard tick diff (usec). */ @@ -100,31 +104,30 @@ SYSCTL_DECL(_net_inet); SYSCTL_DECL(_net_inet_ip); SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); +/* wrapper to pass dn_cfg fields to SYSCTL_* */ +//#define DC(x) (&(VNET_NAME(_base_dn_cfg).x)) +#define DC(x) (&(dn_cfg.x)) /* parameters */ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, - CTLFLAG_RW, &dn_cfg.hash_size, 0, "Default hash table size"); + CTLFLAG_RW, DC(hash_size), 0, "Default hash table size"); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, - CTLFLAG_RW, &dn_cfg.slot_limit, 0, + CTLFLAG_RW, DC(slot_limit), 0, "Upper limit in slots for pipe queue."); SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, - CTLFLAG_RW, &dn_cfg.byte_limit, 0, + CTLFLAG_RW, DC(byte_limit), 0, "Upper limit in bytes for pipe queue."); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, - CTLFLAG_RW, &dn_cfg.io_fast, 0, "Enable fast dummynet io."); + CTLFLAG_RW, DC(io_fast), 0, "Enable fast dummynet io."); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, - CTLFLAG_RW, &dn_cfg.debug, 0, "Dummynet debug level"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, - CTLFLAG_RW, &dn_cfg.expire, 0, "Expire empty queues/pipes"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle, - CTLFLAG_RD, &dn_cfg.expire_cycle, 0, "Expire cycle for queues/pipes"); + CTLFLAG_RW, DC(debug), 0, "Dummynet debug level"); /* RED parameters */ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, - CTLFLAG_RD, &dn_cfg.red_lookup_depth, 0, "Depth of RED lookup table"); + CTLFLAG_RD, DC(red_lookup_depth), 0, "Depth of RED lookup table"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, - CTLFLAG_RD, &dn_cfg.red_avg_pkt_size, 0, "RED Medium packet size"); + CTLFLAG_RD, DC(red_avg_pkt_size), 0, "RED Medium packet size"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, - CTLFLAG_RD, &dn_cfg.red_max_pkt_size, 0, "RED Max packet size"); + CTLFLAG_RD, DC(red_max_pkt_size), 0, "RED Max packet size"); /* time adjustment */ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta, @@ -140,15 +143,27 @@ SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, CTLFLAG_RD, &tick_lost, 0, "Number of ticks coalesced by dummynet taskqueue."); +/* Drain parameters */ +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, + CTLFLAG_RW, DC(expire), 0, "Expire empty queues/pipes"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_cycle, + CTLFLAG_RD, DC(expire_cycle), 0, "Expire cycle for queues/pipes"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire_object, + CTLFLAG_RW, DC(expire_object), 0, "Min # of objects before start drain routine"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, object_idle_tick, + CTLFLAG_RD, DC(object_idle_tick), 0, "Time (in ticks) to cosiderer an object as idle"); +SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, drain_ratio, + CTLFLAG_RD, DC(drain_ratio), 0, "% of dummynet_task() to dedicate to drain routine"); + /* statistics */ SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, schk_count, - CTLFLAG_RD, &dn_cfg.schk_count, 0, "Number of schedulers"); + CTLFLAG_RD, DC(schk_count), 0, "Number of schedulers"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, si_count, - CTLFLAG_RD, &dn_cfg.si_count, 0, "Number of scheduler instances"); + CTLFLAG_RD, DC(si_count), 0, "Number of scheduler instances"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, fsk_count, - CTLFLAG_RD, &dn_cfg.fsk_count, 0, "Number of flowsets"); + CTLFLAG_RD, DC(fsk_count), 0, "Number of flowsets"); SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, queue_count, - CTLFLAG_RD, &dn_cfg.queue_count, 0, "Number of queues"); + CTLFLAG_RD, DC(queue_count), 0, "Number of queues"); SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, CTLFLAG_RD, &io_pkt, 0, "Number of packets passed to dummynet."); @@ -158,7 +173,7 @@ SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, CTLFLAG_RD, &io_pkt_drop, 0, "Number of packets dropped by dummynet."); - +#undef DC SYSEND #endif @@ -364,6 +379,11 @@ dn_enqueue(struct dn_queue *q, struct mbuf* m, int drop) goto drop; } mq_append(&q->mq, m); + if (q->ni.length == 0) { /* queue was idle */ + dn_cfg.idle_queue--; + if (ni->length == 0) /* scheduler was idle */ + dn_cfg.idle_si--; + } q->ni.length++; q->ni.len_bytes += len; ni->length++; @@ -455,30 +475,33 @@ serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) si->sched_time = now; done = 0; while (si->credit >= 0 && (m = s->fp->dequeue(si)) != NULL) { + uint64_t len_scaled; + + /* + * Some schedulers might want wake up the scheduler later. + * To suppor this the caller returns an mbuf with len < 0 + * this will result in a new wake up of the scheduler + * instance between m->m_pkthdr.len ticks. + */ if (m->m_pkthdr.len < 0) { - /* Received a packet with negative length. - * the scheduler instance will be waken up after - * -m->m_pkthdr.len ticks. - */ si->kflags |= DN_ACTIVE; heap_insert(&dn_cfg.evheap, now - m->m_pkthdr.len, si); - - /* Delete the fake packet */ - free(m, M_DUMMYNET); - - /* Dont' touch credit, exit from the function */ + if (delay_line_idle && done) + transmit_event(q, &si->dline, now); return NULL; - } else { /* normal behaviour */ - uint64_t len_scaled; - done++; - len_scaled = (bw == 0) ? 0 : hz * - (m->m_pkthdr.len * 8 + extra_bits(m, s)); - si->credit -= len_scaled; - /* Move packet in the delay line */ - dn_tag_get(m)->output_time += s->link.delay ; - mq_append(&si->dline.mq, m); } + + /* a regular mbuf received */ + done++; + if (bw == 0) printf("bw is null\n"); + len_scaled = (bw == 0) ? 0 : hz * + (m->m_pkthdr.len * 8 + extra_bits(m, s)); + si->credit -= len_scaled; + /* Move packet in the delay line */ + dn_tag_get(m)->output_time = dn_cfg.curr_time + s->link.delay; + mq_append(&si->dline.mq, m); } + /* * If credit >= 0 the instance is idle, mark time. * Otherwise put back in the heap, and adjust the output @@ -500,6 +523,131 @@ serve_sched(struct mq *q, struct dn_sch_inst *si, uint64_t now) return q->head; } +/* + * Support function to read the TSC (or equivalent). We use this + * high resolution timer to adapt the amount of work done for + * expiring the clock. + * Supports Linux and FreeBSD both i386 and amd64 platform + * Supports OpenWRT mips architecture + * + * SMP no special works is needed in + * - In linux 2.6 timers will always run in the same cpu that have added it.See + * (http://book.opensourceproject.org.cn/kernel/kernel3rd/opensource/0596005652/understandlk-chp-6-sect-5.html) + * - FreeBSD8 has a new callout_reset_on() with specify the cpu on which + * the timer must be run + * - Windows runs dummynet_task() on cpu0. + * + * - Linux 2.4 doesn't assure to run a timer in the same cpu every time. + */ +#ifdef HAVE_TSC +uint64_t +readTSC (void) +{ + uint64_t a=0; + +#ifdef __linux__ + /* Linux and openwrt have a macro to read the tsc for i386 and + * amd64. + * Openwrt have patched the kernel and allow use of tsc with mips + * and other platforms + * rdtscll() is a macro defined in include/asm-xxx/msr.h, + * where xxx is the architecture (x86, mips). + */ + rdtscll(a); +#elif defined(_WIN32) + /* Microsoft recommends the use of KeQueryPerformanceCounter() + * insteead of rdtsc(). + */ + KeQueryPerformanceCounter((PLARGE_INTEGER)&a); //XXX not tested! +#elif defined(__FreeBSD__) + /* FreeBSD (i386/amd64) has macro rdtsc() defined in machine/cpufunc.h. + * We could use the macro instead of explicity assembly XXX + */ + return rdtsc(); +#endif + return a; +} +#endif /* HAVE_TSC */ + +/* + * compute avg task period. + * We could do something more complex, possibly. + */ +static void +do_update_cycle(void) +{ +#ifdef HAVE_TSC + uint64_t tmp = readTSC(); +#if defined (LINUX_24) && defined(CONFIG_SMP) + /* on LINUX24 and SMP, we have no guarantees on which cpu runs + * the timer callbacks. If the difference between new and + * old value is negative, we assume that the values come from + * different cpus so we adjust 'new' accordingly. + */ + if (tmp <= dn_cfg.cycle_task_new) + dn_cfg.cycle_task_new = tmp - dn_cfg.cycle_task; +#endif /* !(linux24 && SMP) */ + dn_cfg.cycle_task_old = dn_cfg.cycle_task_new; + dn_cfg.cycle_task_new = tmp; + dn_cfg.cycle_task = dn_cfg.cycle_task_new - dn_cfg.cycle_task_old; + + /* Update the average + * avg = (2^N * avg + new - avg ) / 2^N * avg + * N==4 seems to be a good compromise between clock clock change + * and 'spurious' cycle_task value + */ +#define DN_N 4 + dn_cfg.cycle_task_avg = (dn_cfg.cycle_task_avg << DN_N) + + dn_cfg.cycle_task - dn_cfg.cycle_task_avg; + dn_cfg.cycle_task_avg = dn_cfg.cycle_task_avg >> DN_N; +#undef DN_N + +#endif /* HAVE_TSC */ +} + +static void +do_drain(void) +{ +#ifdef HAVE_TSC + uint64_t dt_max; +#endif + if (!dn_cfg.expire || ++dn_cfg.expire_cycle < dn_cfg.expire) + return; + /* It's time to check if drain routines should be called */ + dn_cfg.expire_cycle = 0; + + dn_cfg.idle_queue_wait = 0; + dn_cfg.idle_si_wait = 0; + /* Do a drain cycle even if there isn't time to do it */ +#ifdef HAVE_TSC + dt_max = dn_cfg.cycle_task_avg * dn_cfg.drain_ratio; +#endif + for (;;) { + int done = 0; + + if (dn_cfg.idle_queue > dn_cfg.expire_object && + dn_cfg.idle_queue_wait < dn_cfg.idle_queue) { + dn_drain_queue(); + done = 1; + } + if (dn_cfg.idle_si > dn_cfg.expire_object && + dn_cfg.idle_si_wait < dn_cfg.idle_si) { + dn_drain_scheduler(); + done = 1; + } + /* time to end ? */ +#ifndef HAVE_TSC + /* If tsc does not exist, do only one drain cycle and exit */ + break; +#else + /* Exit when nothing was done or we have consumed all time */ + if ( (done == 0) || + ((readTSC() - dn_cfg.cycle_task_new) * 100 > dt_max) ) + break; +#endif /* HAVE_TSC */ + } +} + /* * The timer handler for dummynet. Time is computed in ticks, but * but the code is tolerant to the actual rate at which this is called. @@ -510,7 +658,11 @@ dummynet_task(void *context, int pending) { struct timeval t; struct mq q = { NULL, NULL }; /* queue to accumulate results */ - + + CURVNET_SET((struct vnet *)context); + + do_update_cycle(); /* compute avg. tick duration */ + DN_BH_WLOCK(); /* Update number of lost(coalesced) ticks. */ @@ -565,16 +717,13 @@ dummynet_task(void *context, int pending) transmit_event(&q, (struct delay_line *)p, dn_cfg.curr_time); } } - if (dn_cfg.expire && ++dn_cfg.expire_cycle >= dn_cfg.expire) { - dn_cfg.expire_cycle = 0; - dn_drain_scheduler(); - dn_drain_queue(); - } + do_drain(); DN_BH_WUNLOCK(); dn_reschedule(); if (q.head != NULL) dummynet_send(q.head); + CURVNET_RESTORE(); } /* @@ -732,21 +881,25 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) goto dropit; /* This queue/pipe does not exist! */ if (fs->sched == NULL) /* should not happen */ goto dropit; - /* find scheduler instance, possibly applying sched_mask */ - si = ipdn_si_find(fs->sched, &(fwa->f_id)); - if (si == NULL) - goto dropit; /* * If the scheduler supports multiple queues, find the right one * (otherwise it will be ignored by enqueue). */ if (fs->sched->fp->flags & DN_MULTIQUEUE) { - q = ipdn_q_find(fs, si, &(fwa->f_id)); + q = ipdn_q_find(fs, &(fwa->f_id)); if (q == NULL) goto dropit; - } + /* The scheduler instance lookup is done only for new queue. + * The callback q_new() will create the scheduler instance + * if needed. + */ + si = q->_si; + } else + si = ipdn_si_find(fs->sched, &(fwa->f_id)); + + if (si == NULL) + goto dropit; if (fs->sched->fp->enqueue(si, q, m)) { - printf("%s dropped by enqueue\n", __FUNCTION__); /* packet was dropped by enqueue() */ m = *m0 = NULL; goto dropit; @@ -758,8 +911,11 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) } /* compute the initial allowance */ - { + if (si->idle_time < dn_cfg.curr_time) { + /* Do this only on the first packet on an idle pipe */ struct dn_link *p = &fs->sched->link; + + si->sched_time = dn_cfg.curr_time; si->credit = dn_cfg.io_fast ? p->bandwidth : 0; if (p->burst) { uint64_t burst = (dn_cfg.curr_time - si->idle_time) * p->bandwidth; diff --git a/dummynet2/ip_dummynet.c b/dummynet2/ip_dummynet.c index 817e07f..f5b6831 100644 --- a/dummynet2/ip_dummynet.c +++ b/dummynet2/ip_dummynet.c @@ -87,7 +87,7 @@ dummynet(void * __unused unused) void dn_reschedule(void) { - callout_reset(&dn_timeout, 1, dummynet, NULL); + callout_reset_on(&dn_timeout, 1, dummynet, NULL, 0); } /*----- end of callout hooks -----*/ @@ -237,10 +237,10 @@ flow_id_cmp(struct ipfw_flow_id *id1, struct ipfw_flow_id *id2) return 1; /* different address families */ return (id1->dst_ip == id2->dst_ip && - id1->src_ip == id2->src_ip && - id1->dst_port == id2->dst_port && - id1->src_port == id2->src_port && - id1->proto == id2->proto && + id1->src_ip == id2->src_ip && + id1->dst_port == id2->dst_port && + id1->src_port == id2->src_port && + id1->proto == id2->proto && id1->extra == id2->extra) ? 0 : 1; } /* the ipv6 case */ @@ -304,12 +304,19 @@ q_new(uintptr_t key, int flags, void *arg) if (fs->fs.flags & DN_QHT_HASH) q->ni.fid = *(struct ipfw_flow_id *)key; q->fs = fs; - q->_si = template->_si; + q->_si = ipdn_si_find(q->fs->sched, &(template->ni.fid)); + if (q->_si == NULL) { + D("no memory for new si"); + free (q, M_DUMMYNET); + return NULL; + } + q->_si->q_count++; if (fs->sched->fp->new_queue) fs->sched->fp->new_queue(q); dn_cfg.queue_count++; + dn_cfg.idle_queue++; return q; } @@ -317,8 +324,13 @@ q_new(uintptr_t key, int flags, void *arg) * Notify schedulers that a queue is going away. * If (flags & DN_DESTROY), also free the packets. * The version for callbacks is called q_delete_cb(). + * Returns 1 if the queue is NOT deleted (usually when + * the drain routine try to delete a queue that a scheduler + * instance needs), 0 otherwise. + * NOTE: flag DN_DEL_SAFE means that the queue should be + * deleted only if the scheduler no longer needs it */ -static void +static int dn_delete_queue(struct dn_queue *q, int flags) { struct dn_fsk *fs = q->fs; @@ -326,16 +338,20 @@ dn_delete_queue(struct dn_queue *q, int flags) // D("fs %p si %p\n", fs, q->_si); /* notify the parent scheduler that the queue is going away */ if (fs && fs->sched->fp->free_queue) - fs->sched->fp->free_queue(q); + if (fs->sched->fp->free_queue(q, flags & DN_DEL_SAFE) == 1) + return 1; /* queue NOT deleted */ q->_si->q_count--; q->_si = NULL; if (flags & DN_DESTROY) { if (q->mq.head) dn_free_pkts(q->mq.head); + else + dn_cfg.idle_queue--; bzero(q, sizeof(*q)); // safety free(q, M_DUMMYNET); dn_cfg.queue_count--; } + return 0; } static int @@ -376,12 +392,10 @@ qht_delete(struct dn_fsk *fs, int flags) * We never call it for !MULTIQUEUE (the queue is in the sch_inst). */ struct dn_queue * -ipdn_q_find(struct dn_fsk *fs, struct dn_sch_inst *si, - struct ipfw_flow_id *id) +ipdn_q_find(struct dn_fsk *fs, struct ipfw_flow_id *id) { struct dn_queue template; - template._si = si; template.fs = fs; if (fs->fs.flags & DN_QHT_HASH) { @@ -432,6 +446,8 @@ si_match(void *obj, uintptr_t key, int flags, void *arg) return flow_id_cmp(&o->ni.fid, id2) == 0; } +static int si_reset_credit(void *_si, void *arg); // XXX si_new use this + /* * create a new instance for the given 'key' * Allocate memory for instance, delay line and scheduler private data. @@ -446,6 +462,7 @@ si_new(uintptr_t key, int flags, void *arg) si = malloc(l, M_DUMMYNET, M_NOWAIT | M_ZERO); if (si == NULL) goto error; + /* Set length only for the part passed up to userland. */ set_oid(&si->ni.oid, DN_SCH_I, sizeof(struct dn_flow)); set_oid(&(si->dline.oid), DN_DELAY_LINE, @@ -463,7 +480,9 @@ si_new(uintptr_t key, int flags, void *arg) if (s->sch.flags & DN_HAVE_MASK) si->ni.fid = *(struct ipfw_flow_id *)key; + si_reset_credit(si, NULL); dn_cfg.si_count++; + dn_cfg.idle_si++; return si; error: @@ -489,6 +508,8 @@ si_destroy(void *_si, void *arg) if (dl->oid.subtype) /* remove delay line from event heap */ heap_extract(&dn_cfg.evheap, dl); + if (si->ni.length == 0) + dn_cfg.idle_si--; dn_free_pkts(dl->mq.head); /* drain delay line */ if (si->kflags & DN_ACTIVE) /* remove si from event heap */ heap_extract(&dn_cfg.evheap, si); @@ -527,6 +548,7 @@ si_reset_credit(void *_si, void *arg) struct dn_sch_inst *si = _si; struct dn_link *p = &si->sched->link; + si->idle_time = dn_cfg.curr_time; si->credit = p->burst + (dn_cfg.io_fast ? p->bandwidth : 0); return 0; } @@ -601,7 +623,7 @@ fsk_detach(struct dn_fsk *fs, int flags) h = fs->sched ? &fs->sched->fsk_list : &dn_cfg.fsu; SLIST_REMOVE(h, fs, dn_fsk, sch_chain); } - /* Free the RED parameters, they will be recomputed on + /* Free the RED parameters, they will be recomputed on * subsequent attach if needed. */ if (fs->w_q_lookup) @@ -655,6 +677,10 @@ delete_fs(int i, int locked) if (!locked) DN_BH_WLOCK(); fs = dn_ht_find(dn_cfg.fshash, i, DNHT_REMOVE, NULL); + if (dn_ht_entries(dn_cfg.fshash) == 0) { + dn_ht_free(dn_cfg.fshash, 0); + dn_cfg.fshash = NULL; + } ND("fs %d found %p", i, fs); if (fs) { fsk_detach(fs, DN_DETACH | DN_DELETE_FS); @@ -748,8 +774,10 @@ schk_delete_cb(void *obj, void *arg) #endif fsk_detach_list(&s->fsk_list, arg ? DN_DESTROY : 0); /* no more flowset pointing to us now */ - if (s->sch.flags & DN_HAVE_MASK) + if (s->sch.flags & DN_HAVE_MASK) { dn_ht_scan(s->siht, si_destroy, NULL); + dn_ht_free(s->siht, 0); + } else if (s->siht) si_destroy(s->siht, NULL); if (s->profile) { @@ -776,6 +804,10 @@ delete_schk(int i) struct dn_schk *s; s = dn_ht_find(dn_cfg.schedhash, i, DNHT_REMOVE, NULL); + if (dn_ht_entries(dn_cfg.schedhash) == 0) { + dn_ht_free(dn_cfg.schedhash, 0); + dn_cfg.schedhash = NULL; + } ND("%d %p", i, s); if (!s) return EINVAL; @@ -864,14 +896,16 @@ copy_q(struct copy_args *a, struct dn_fsk *fs, int flags) /* * This routine only copies the initial part of a profile ? XXX + * XXX marta: I think this routine is called to print a summary + * of the pipe configuration and does not need to show the + * profile samples list. */ static int copy_profile(struct copy_args *a, struct dn_profile *p) { int have = a->end - *a->start; /* XXX here we check for max length */ - int profile_len = sizeof(struct dn_profile) - - ED_MAX_SAMPLES_NO*sizeof(int); + int profile_len = sizeof(struct dn_profile); if (p == NULL) return 0; @@ -977,29 +1011,29 @@ copy_data_helper(void *_o, void *_arg) return 0; /* not a pipe */ /* see if the object is within one of our ranges */ - for (;r < lim; r+=2) { + for (;r < lim; r += 2) { if (n < r[0] || n > r[1]) continue; /* Found a valid entry, copy and we are done */ - if (a->flags & DN_C_LINK) { - if (copy_obj(a->start, a->end, + if (a->flags & DN_C_LINK) { + if (copy_obj(a->start, a->end, &s->link, "link", n)) - return DNHT_SCAN_END; - if (copy_profile(a, s->profile)) - return DNHT_SCAN_END; - if (copy_flowset(a, s->fs, 0)) - return DNHT_SCAN_END; - } - if (a->flags & DN_C_SCH) { - if (copy_obj(a->start, a->end, + return DNHT_SCAN_END; + if (copy_profile(a, s->profile)) + return DNHT_SCAN_END; + if (copy_flowset(a, s->fs, 0)) + return DNHT_SCAN_END; + } + if (a->flags & DN_C_SCH) { + if (copy_obj(a->start, a->end, &s->sch, "sched", n)) - return DNHT_SCAN_END; - /* list all attached flowsets */ - if (copy_fsk_list(a, s, 0)) - return DNHT_SCAN_END; - } + return DNHT_SCAN_END; + /* list all attached flowsets */ + if (copy_fsk_list(a, s, 0)) + return DNHT_SCAN_END; + } if (a->flags & DN_C_FLOW) - copy_si(a, s, 0); + copy_si(a, s, 0); break; } } else if (a->type == DN_FS) { @@ -1010,15 +1044,15 @@ copy_data_helper(void *_o, void *_arg) if (n >= DN_MAX_ID) return 0; /* see if the object is within one of our ranges */ - for (;r < lim; r+=2) { + for (;r < lim; r += 2) { if (n < r[0] || n > r[1]) continue; - if (copy_flowset(a, fs, 0)) - return DNHT_SCAN_END; - copy_q(a, fs, 0); + if (copy_flowset(a, fs, 0)) + return DNHT_SCAN_END; + copy_q(a, fs, 0); break; /* we are done */ - } } + } return 0; } @@ -1287,6 +1321,10 @@ config_fs(struct dn_fs *nfs, struct dn_id *arg, int locked) } if (!locked) DN_BH_WLOCK(); + if (dn_cfg.fshash == NULL) + dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, + offsetof(struct dn_fsk, fsk_next), + fsk_hash, fsk_match, fsk_new); do { /* exit with break when done */ struct dn_schk *s; int flags = nfs->sched_nr ? DNHT_INSERT : 0; @@ -1379,6 +1417,10 @@ config_sched(struct dn_sch *_nsch, struct dn_id *arg) new_flags = a.sch->flags; } DN_BH_WLOCK(); + if (dn_cfg.schedhash == NULL) + dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, + offsetof(struct dn_schk, schk_next), + schk_hash, schk_match, schk_new); again: /* run twice, for wfq and fifo */ /* * lookup the type. If not supplied, use the previous one @@ -1432,13 +1474,16 @@ again: /* run twice, for wfq and fifo */ if (!pf || pf->link_nr != p.link_nr) { /* no saved value */ s->profile = NULL; /* XXX maybe not needed */ } else { - s->profile = malloc(sizeof(struct dn_profile), + size_t pf_size = sizeof(struct dn_profile) + + s->profile->samples_no * sizeof(int); + + s->profile = malloc(pf_size, M_DUMMYNET, M_NOWAIT | M_ZERO); if (s->profile == NULL) { D("cannot allocate profile"); goto error; //XXX } - bcopy(pf, s->profile, sizeof(*pf)); + bcopy(pf, s->profile, pf_size); } } p.link_nr = 0; @@ -1585,6 +1630,7 @@ config_profile(struct dn_profile *pf, struct dn_id *arg) bcopy(pf, s->profile, pf->oid.len); s->profile->oid.len = olen; } + DN_BH_WUNLOCK(); return err; } @@ -1603,6 +1649,8 @@ dummynet_flush(void) DX(4, "still %d unlinked fs", dn_cfg.fsk_count); dn_ht_free(dn_cfg.fshash, DNHT_REMOVE); fsk_detach_list(&dn_cfg.fsu, DN_DELETE_FS); + + dn_ht_free(dn_cfg.schedhash, DNHT_REMOVE); /* Reinitialize system heap... */ heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); } @@ -1643,15 +1691,17 @@ do_config(void *p, int l) default: D("cmd %d not implemented", o->type); break; + #ifdef EMULATE_SYSCTL /* sysctl emulation. * if we recognize the command, jump to the correct * handler and return */ case DN_SYSCTL_SET: - err = kesysctl_emu_set(p,l); + err = kesysctl_emu_set(p, l); return err; #endif + case DN_CMD_CONFIG: /* simply a header */ break; @@ -1720,8 +1770,7 @@ static int compute_space(struct dn_id *cmd, struct copy_args *a) { int x = 0, need = 0; - int profile_size = sizeof(struct dn_profile) - - ED_MAX_SAMPLES_NO*sizeof(int); + int profile_size = sizeof(struct dn_profile); /* NOTE about compute space: * NP = dn_cfg.schk_count @@ -1879,7 +1928,7 @@ dummynet_get(struct sockopt *sopt, void **compat) } need += sizeof(*cmd); cmd->id = need; - if (have >= need) + if (have >= need) /* got space, hold the lock */ break; DN_BH_WUNLOCK(); @@ -1904,6 +1953,8 @@ dummynet_get(struct sockopt *sopt, void **compat) } else { error = sooptcopyout(sopt, cmd, sizeof(*cmd)); } + /* no enough memory, release the lock and give up */ + /* XXX marta: here we hold the lock */ goto done; } ND("have %d:%d sched %d, %d:%d links %d, %d:%d flowsets %d, " @@ -1950,69 +2001,92 @@ done: free(cmd, M_DUMMYNET); if (start) free(start, M_DUMMYNET); + return error; } +/* + * Functions to drain idle objects -- see dummynet_task() for some notes + */ /* Callback called on scheduler instance to delete it if idle */ static int -drain_scheduler_cb(void *_si, void *arg) +drain_scheduler_cb(void *_si, void *_arg) { struct dn_sch_inst *si = _si; + int *arg = _arg; + int empty; + + if ( (*arg++) > dn_cfg.expire_object_examined) + return DNHT_SCAN_END; if ((si->kflags & DN_ACTIVE) || si->dline.mq.head != NULL) return 0; - if (si->sched->fp->flags & DN_MULTIQUEUE) { - if (si->q_count == 0) - return si_destroy(si, NULL); - else - return 0; - } else { /* !DN_MULTIQUEUE */ - if ((si+1)->ni.length == 0) - return si_destroy(si, NULL); + /* + * if the scheduler is multiqueue, q_count also reflects empty + * queues that point to si, so we need to check si->q_count to + * tell whether we can remove the instance. + */ + if (si->ni.length == 0) { + /* si was marked as idle: + * remove it or increment idle_si_wait counter + */ + empty = (si->sched->fp->flags & DN_MULTIQUEUE) ? + (si->q_count == 0) : 1; + if (empty && + (si->idle_time < dn_cfg.curr_time - dn_cfg.object_idle_tick)) + return si_destroy(si, NULL); else - return 0; + dn_cfg.idle_si_wait++; } - return 0; /* unreachable */ + return 0; } /* Callback called on scheduler to check if it has instances */ static int -drain_scheduler_sch_cb(void *_s, void *arg) +drain_scheduler_sch_cb(void *_s, void *_arg) { struct dn_schk *s = _s; + int *arg = _arg; if (s->sch.flags & DN_HAVE_MASK) { dn_ht_scan_bucket(s->siht, &s->drain_bucket, - drain_scheduler_cb, NULL); - s->drain_bucket++; + drain_scheduler_cb, _arg); } else { if (s->siht) { - if (drain_scheduler_cb(s->siht, NULL) == DNHT_SCAN_DEL) + if (drain_scheduler_cb(s->siht, _arg) == DNHT_SCAN_DEL) s->siht = NULL; } } - return 0; + return ( (*arg++) > dn_cfg.expire_object_examined) ? DNHT_SCAN_END : 0; } /* Called every tick, try to delete a 'bucket' of scheduler */ void dn_drain_scheduler(void) { + int arg = 0; + dn_ht_scan_bucket(dn_cfg.schedhash, &dn_cfg.drain_sch, - drain_scheduler_sch_cb, NULL); - dn_cfg.drain_sch++; + drain_scheduler_sch_cb, &arg); } /* Callback called on queue to delete if it is idle */ static int -drain_queue_cb(void *_q, void *arg) +drain_queue_cb(void *_q, void *_arg) { struct dn_queue *q = _q; + int *arg = _arg; + + if ( (*arg++) > dn_cfg.expire_object_examined) + return DNHT_SCAN_END; if (q->ni.length == 0) { - dn_delete_queue(q, DN_DESTROY); - return DNHT_SCAN_DEL; /* queue is deleted */ + if (q->q_time < dn_cfg.curr_time - dn_cfg.object_idle_tick) { + if (dn_delete_queue(q, DN_DESTROY | DN_DEL_SAFE) == 0) + return DNHT_SCAN_DEL; /* queue is deleted */ + } else + dn_cfg.idle_queue_wait++; } return 0; /* queue isn't deleted */ @@ -2020,35 +2094,36 @@ drain_queue_cb(void *_q, void *arg) /* Callback called on flowset used to check if it has queues */ static int -drain_queue_fs_cb(void *_fs, void *arg) +drain_queue_fs_cb(void *_fs, void *_arg) { struct dn_fsk *fs = _fs; + int *arg = _arg; if (fs->fs.flags & DN_QHT_HASH) { /* Flowset has a hash table for queues */ dn_ht_scan_bucket(fs->qht, &fs->drain_bucket, - drain_queue_cb, NULL); - fs->drain_bucket++; + drain_queue_cb, _arg); } else { /* No hash table for this flowset, null the pointer * if the queue is deleted */ if (fs->qht) { - if (drain_queue_cb(fs->qht, NULL) == DNHT_SCAN_DEL) + if (drain_queue_cb(fs->qht, _arg) == DNHT_SCAN_DEL) fs->qht = NULL; } } - return 0; + return ( (*arg++) > dn_cfg.expire_object_examined) ? DNHT_SCAN_END : 0; } /* Called every tick, try to delete a 'bucket' of queue */ void dn_drain_queue(void) { + int arg = 0; + /* scan a bucket of flowset */ dn_ht_scan_bucket(dn_cfg.fshash, &dn_cfg.drain_fs, - drain_queue_fs_cb, NULL); - dn_cfg.drain_fs++; + drain_queue_fs_cb, &arg); } /* @@ -2113,14 +2188,10 @@ ip_dn_ctl(struct sockopt *sopt) static void ip_dn_init(void) { - static int init_done = 0; - - if (init_done) + if (dn_cfg.init_done) return; - init_done = 1; - if (bootverbose) - printf("DUMMYNET with IPv6 initialized (100131)\n"); - + printf("DUMMYNET %p with IPv6 initialized (100409)\n", curvnet); + dn_cfg.init_done = 1; /* Set defaults here. MSVC does not accept initializers, * and this is also useful for vimages */ @@ -2136,37 +2207,49 @@ ip_dn_init(void) /* hash tables */ dn_cfg.max_hash_size = 1024; /* max in the hash tables */ - dn_cfg.hash_size = 64; /* default hash size */ - /* create hash tables for schedulers and flowsets. - * In both we search by key and by pointer. - */ - dn_cfg.schedhash = dn_ht_init(NULL, dn_cfg.hash_size, - offsetof(struct dn_schk, schk_next), - schk_hash, schk_match, schk_new); - dn_cfg.fshash = dn_ht_init(NULL, dn_cfg.hash_size, - offsetof(struct dn_fsk, fsk_next), - fsk_hash, fsk_match, fsk_new); + if (dn_cfg.hash_size == 0) /* XXX or <= 0 ? */ + dn_cfg.hash_size = 64; /* default hash size */ + /* hash tables for schedulers and flowsets are created + * when the first scheduler/flowset is inserted. + * This is done to allow to use the right hash_size value. + * When the last object is deleted, the table is destroyed, + * so a new hash_size value can be used. + * XXX rehash is not supported for now + */ + dn_cfg.schedhash = NULL; + dn_cfg.fshash = NULL; /* bucket index to drain object */ dn_cfg.drain_fs = 0; dn_cfg.drain_sch = 0; + if (dn_cfg.expire_object == 0) + dn_cfg.expire_object = 50; + if (dn_cfg.object_idle_tick == 0) + dn_cfg.object_idle_tick = 1000; + if (dn_cfg.expire_object_examined == 0) + dn_cfg.expire_object_examined = 10; + if (dn_cfg.drain_ratio == 0) + dn_cfg.drain_ratio = 1; + + // XXX what if we don't have a tsc ? +#ifdef HAVE_TSC + dn_cfg.cycle_task_new = dn_cfg.cycle_task_old = readTSC(); +#endif heap_init(&dn_cfg.evheap, 16, offsetof(struct dn_id, id)); SLIST_INIT(&dn_cfg.fsu); SLIST_INIT(&dn_cfg.schedlist); DN_LOCK_INIT(); - ip_dn_ctl_ptr = ip_dn_ctl; - ip_dn_io_ptr = dummynet_io; - TASK_INIT(&dn_task, 0, dummynet_task, NULL); + TASK_INIT(&dn_task, 0, dummynet_task, curvnet); dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, taskqueue_thread_enqueue, &dn_tq); taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); callout_init(&dn_timeout, CALLOUT_MPSAFE); - callout_reset(&dn_timeout, 1, dummynet, NULL); + callout_reset_on(&dn_timeout, 1, dummynet, NULL, 0); /* Initialize curr_time adjustment mechanics. */ getmicrouptime(&dn_cfg.prev_t); @@ -2174,13 +2257,16 @@ ip_dn_init(void) #ifdef KLD_MODULE static void -ip_dn_destroy(void) +ip_dn_destroy(int last) { callout_drain(&dn_timeout); DN_BH_WLOCK(); - ip_dn_ctl_ptr = NULL; - ip_dn_io_ptr = NULL; + if (last) { + printf("%s removing last instance\n", __FUNCTION__); + ip_dn_ctl_ptr = NULL; + ip_dn_io_ptr = NULL; + } dummynet_flush(); DN_BH_WUNLOCK(); @@ -2205,13 +2291,15 @@ dummynet_modevent(module_t mod, int type, void *data) return EEXIST ; } ip_dn_init(); + ip_dn_ctl_ptr = ip_dn_ctl; + ip_dn_io_ptr = dummynet_io; return 0; } else if (type == MOD_UNLOAD) { #if !defined(KLD_MODULE) printf("dummynet statically compiled, cannot unload\n"); return EINVAL ; #else - ip_dn_destroy(); + ip_dn_destroy(1 /* last */); return 0; #endif } else @@ -2289,8 +2377,24 @@ static moduledata_t dummynet_mod = { "dummynet", dummynet_modevent, NULL }; -DECLARE_MODULE(dummynet, dummynet_mod, - SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY-1); +#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN +#define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ +DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); MODULE_VERSION(dummynet, 1); + +/* + * Starting up. Done in order after dummynet_modevent() has been called. + * VNET_SYSINIT is also called for each existing vnet and each new vnet. + */ +//VNET_SYSINIT(vnet_dn_init, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_init, NULL); + +/* + * Shutdown handlers up shop. These are done in REVERSE ORDER, but still + * after dummynet_modevent() has been called. Not called on reboot. + * VNET_SYSUNINIT is also called for each exiting vnet as it exits. + * or when the module is unloaded. + */ +//VNET_SYSUNINIT(vnet_dn_uninit, DN_SI_SUB, DN_MODEV_ORD+2, ip_dn_destroy, NULL); + /* end of file */ diff --git a/dummynet2/ip_fw2.c b/dummynet2/ip_fw2.c index b646245..c55bc0f 100644 --- a/dummynet2/ip_fw2.c +++ b/dummynet2/ip_fw2.c @@ -652,8 +652,8 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, { #ifndef __FreeBSD__ return cred_check(insn, proto, oif, - dst_ip, dst_port, src_ip, src_port, - (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); + dst_ip, dst_port, src_ip, src_port, + (struct bsd_ucred *)uc, ugid_lookupp, ((struct mbuf *)inp)->m_skb); #else /* FreeBSD */ struct inpcbinfo *pi; int wildcard; @@ -1649,7 +1649,7 @@ do { \ } case O_LOG: - ipfw_log(f, hlen, args, m, + ipfw_log(f, hlen, args, m, oif, offset, tablearg, ip); match = 1; break; @@ -1976,9 +1976,9 @@ do { \ break; case O_SKIPTO: - f->pcnt++; /* update stats */ - f->bcnt += pktlen; - f->timestamp = time_uptime; + f->pcnt++; /* update stats */ + f->bcnt += pktlen; + f->timestamp = time_uptime; /* If possible use cached f_pos (in f->next_rule), * whose version is written in f->next_rule * (horrible hacks to avoid changing the ABI). @@ -1986,7 +1986,7 @@ do { \ if (cmd->arg1 != IP_FW_TABLEARG && (uintptr_t)f->x_next == chain->id) { f_pos = (uintptr_t)f->next_rule; - } else { + } else { int i = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; /* make sure we do not jump backward */ @@ -2000,26 +2000,27 @@ do { \ f->x_next = (void *)(uintptr_t)chain->id; } - } - /* + } + /* * Skip disabled rules, and re-enter * the inner loop with the correct * f_pos, f, l and cmd. - * Also clear cmdlen and skip_or - */ + * Also clear cmdlen and skip_or + */ for (; f_pos < chain->n_rules - 1 && (V_set_disable & (1 << chain->map[f_pos]->set)); f_pos++) ; - /* prepare to enter the inner loop */ + /* Re-enter the inner loop at the skipto rule. */ f = chain->map[f_pos]; - l = f->cmd_len; - cmd = f->cmd; - match = 1; - cmdlen = 0; - skip_or = 0; - break; + l = f->cmd_len; + cmd = f->cmd; + match = 1; + cmdlen = 0; + skip_or = 0; + continue; + break; /* not reached */ case O_REJECT: /* @@ -2083,6 +2084,8 @@ do { \ set_match(args, f_pos, chain); args->rule.info = (cmd->arg1 == IP_FW_TABLEARG) ? tablearg : cmd->arg1; + if (V_fw_one_pass) + args->rule.info |= IPFW_ONEPASS; retval = (cmd->opcode == O_NETGRAPH) ? IP_FW_NETGRAPH : IP_FW_NGTEE; l = 0; /* exit inner loop */ diff --git a/dummynet2/ip_fw_dynamic.c b/dummynet2/ip_fw_dynamic.c index a601695..d33849d 100644 --- a/dummynet2/ip_fw_dynamic.c +++ b/dummynet2/ip_fw_dynamic.c @@ -894,10 +894,7 @@ struct mbuf * ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, u_int32_t ack, int flags) { -#ifndef __FreeBSD__ - return NULL; -#else - struct mbuf *m; + struct mbuf *m = NULL; /* stupid compiler */ int len, dir; struct ip *h = NULL; /* stupid compiler */ #ifdef INET6 @@ -1033,7 +1030,6 @@ ipfw_send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, } return (m); -#endif /* __FreeBSD__ */ } /* @@ -1132,8 +1128,8 @@ ipfw_tick(void * vnetx) } #endif done: - callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period * hz, - ipfw_tick, vnetx); + callout_reset_on(&V_ipfw_timeout, V_dyn_keepalive_period * hz, + ipfw_tick, vnetx, 0); CURVNET_RESTORE(); } @@ -1174,7 +1170,7 @@ ipfw_dyn_init(void) V_dyn_max = 4096; /* max # of dynamic rules */ callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); - callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); + callout_reset_on(&V_ipfw_timeout, hz, ipfw_tick, curvnet, 0); } void diff --git a/dummynet2/ip_fw_log.c b/dummynet2/ip_fw_log.c index 2babbcb..55b5c26 100644 --- a/dummynet2/ip_fw_log.c +++ b/dummynet2/ip_fw_log.c @@ -24,7 +24,7 @@ */ #include -__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_log.c 200601 2009-12-16 10:48:40Z luigi $"); +__FBSDID("$FreeBSD: head/sys/netinet/ipfw/ip_fw_log.c 209845 2010-07-09 11:27:33Z glebius $"); /* * Logging support for ipfw @@ -103,6 +103,24 @@ log_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr) return EINVAL; } +static int +ipfw_log_output(struct ifnet *ifp, struct mbuf *m, + struct sockaddr *dst, struct route *ro) +{ + if (m != NULL) + m_freem(m); + return EINVAL; +} + +static void +ipfw_log_start(struct ifnet* ifp) +{ + panic("ipfw_log_start() must not be called"); +} + +static const u_char ipfwbroadcastaddr[6] = + { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; + void ipfw_log_bpf(int onoff) { @@ -119,11 +137,12 @@ ipfw_log_bpf(int onoff) ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = (void *)log_dummy; ifp->if_ioctl = log_dummy; - ifp->if_start = (void *)log_dummy; - ifp->if_output = (void *)log_dummy; + ifp->if_start = ipfw_log_start; + ifp->if_output = ipfw_log_output; ifp->if_addrlen = 6; ifp->if_hdrlen = 14; if_attach(ifp); + ifp->if_broadcastaddr = ipfwbroadcastaddr; ifp->if_baudrate = IF_Mbps(10); bpfattach(ifp, DLT_EN10MB, 14); log_if = ifp; @@ -152,22 +171,17 @@ ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, if (V_fw_verbose == 0) { #ifndef WITHOUT_BPF - struct m_hdr mh; if (log_if == NULL || log_if->if_bpf == NULL) return; - /* BPF treats the "mbuf" as read-only */ - mh.mh_next = m; - mh.mh_len = ETHER_HDR_LEN; - if (args->eh) { /* layer2, use orig hdr */ - mh.mh_data = (char *)args->eh; - } else { - /* add fake header. Later we will store - * more info in the header + + if (args->eh) /* layer2, use orig hdr */ + BPF_MTAP2(log_if, args->eh, ETHER_HDR_LEN, m); + else + /* Add fake header. Later we will store + * more info in the header. */ - mh.mh_data = "DDDDDDSSSSSS\x08\x00"; - } - BPF_MTAP(log_if, (struct mbuf *)&mh); + BPF_MTAP2(log_if, "DDDDDDSSSSSS\x08\x00", ETHER_HDR_LEN, m); #endif /* !WITHOUT_BPF */ return; } diff --git a/dummynet2/ip_fw_nat.c b/dummynet2/ip_fw_nat.c index ead46a7..41fe919 100644 --- a/dummynet2/ip_fw_nat.c +++ b/dummynet2/ip_fw_nat.c @@ -55,7 +55,7 @@ __FBSDID("$FreeBSD: user/luigi/ipfw3-head/sys/netinet/ipfw/ip_fw_nat.c 200975 20 static VNET_DEFINE(eventhandler_tag, ifaddr_event_tag); #define V_ifaddr_event_tag VNET(ifaddr_event_tag) -static void +static void ifaddr_change(void *arg __unused, struct ifnet *ifp) { struct cfg_nat *ptr; @@ -69,18 +69,18 @@ ifaddr_change(void *arg __unused, struct ifnet *ifp) /* ...using nic 'ifp->if_xname' as dynamic alias address. */ if (strncmp(ptr->if_name, ifp->if_xname, IF_NAMESIZE) != 0) continue; - if_addr_rlock(ifp); - TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr == NULL) - continue; - if (ifa->ifa_addr->sa_family != AF_INET) - continue; - ptr->ip = ((struct sockaddr_in *) - (ifa->ifa_addr))->sin_addr; - LibAliasSetAddress(ptr->lib, ptr->ip); - } - if_addr_runlock(ifp); + if_addr_rlock(ifp); + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr == NULL) + continue; + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + ptr->ip = ((struct sockaddr_in *) + (ifa->ifa_addr))->sin_addr; + LibAliasSetAddress(ptr->lib, ptr->ip); } + if_addr_runlock(ifp); + } IPFW_WUNLOCK(chain); } @@ -131,9 +131,9 @@ del_redir_spool_cfg(struct cfg_nat *n, struct redir_chain *head) free(r, M_IPFW); break; default: - printf("unknown redirect mode: %u\n", r->mode); + printf("unknown redirect mode: %u\n", r->mode); /* XXX - panic?!?!? */ - break; + break; } } } @@ -166,7 +166,7 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) remotePortCopy = 0; r->alink[i] = LibAliasRedirectPort(ptr->lib, r->laddr, htons(r->lport + i), r->raddr, - htons(remotePortCopy), r->paddr, + htons(remotePortCopy), r->paddr, htons(r->pport + i), r->proto); if (r->alink[i] == NULL) { r->alink[0] = NULL; @@ -180,22 +180,22 @@ add_redir_spool_cfg(char *buf, struct cfg_nat *ptr) break; default: printf("unknown redirect mode: %u\n", r->mode); - break; + break; } /* XXX perhaps return an error instead of panic ? */ if (r->alink[0] == NULL) panic("LibAliasRedirect* returned NULL"); /* LSNAT handling. */ - for (i = 0; i < r->spool_cnt; i++) { - ser_s = (struct cfg_spool *)&buf[off]; + for (i = 0; i < r->spool_cnt; i++) { + ser_s = (struct cfg_spool *)&buf[off]; s = malloc(SOF_REDIR, M_IPFW, M_WAITOK | M_ZERO); - memcpy(s, ser_s, SOF_SPOOL); - LibAliasAddServer(ptr->lib, r->alink[0], - s->addr, htons(s->port)); - off += SOF_SPOOL; - /* Hook spool entry. */ + memcpy(s, ser_s, SOF_SPOOL); + LibAliasAddServer(ptr->lib, r->alink[0], + s->addr, htons(s->port)); + off += SOF_SPOOL; + /* Hook spool entry. */ LIST_INSERT_HEAD(&r->spool_chain, s, _next); - } + } /* And finally hook this redir entry. */ LIST_INSERT_HEAD(&ptr->redir_chain, r, _next); } @@ -220,9 +220,9 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) } ip = mtod(mcl, struct ip *); - /* + /* * XXX - Libalias checksum offload 'duct tape': - * + * * locally generated packets have only pseudo-header checksum * calculated and libalias will break it[1], so mark them for * later fix. Moreover there are cases when libalias modifies @@ -252,20 +252,20 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) * it can handle delayed checksum and tso) */ - if (mcl->m_pkthdr.rcvif == NULL && + if (mcl->m_pkthdr.rcvif == NULL && mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) ldt = 1; c = mtod(mcl, char *); if (args->oif == NULL) - retval = LibAliasIn(t->lib, c, + retval = LibAliasIn(t->lib, c, mcl->m_len + M_TRAILINGSPACE(mcl)); else - retval = LibAliasOut(t->lib, c, + retval = LibAliasOut(t->lib, c, mcl->m_len + M_TRAILINGSPACE(mcl)); if (retval == PKT_ALIAS_RESPOND) { - m->m_flags |= M_SKIP_FIREWALL; - retval = PKT_ALIAS_OK; + m->m_flags |= M_SKIP_FIREWALL; + retval = PKT_ALIAS_OK; } if (retval != PKT_ALIAS_OK && retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { @@ -276,17 +276,17 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) } mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); - /* - * XXX - libalias checksum offload - * 'duct tape' (see above) + /* + * XXX - libalias checksum offload + * 'duct tape' (see above) */ - if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && + if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && ip->ip_p == IPPROTO_TCP) { - struct tcphdr *th; + struct tcphdr *th; th = (struct tcphdr *)(ip + 1); - if (th->th_x2) + if (th->th_x2) ldt = 1; } @@ -295,37 +295,35 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) struct udphdr *uh; u_short cksum; - /* XXX check if ip_len can stay in net format */ - cksum = in_pseudo( - ip->ip_src.s_addr, - ip->ip_dst.s_addr, - htons(ip->ip_p + ntohs(ip->ip_len) - (ip->ip_hl << 2)) - ); - + ip->ip_len = ntohs(ip->ip_len); + cksum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(ip->ip_p + ip->ip_len - (ip->ip_hl << 2))); + switch (ip->ip_p) { case IPPROTO_TCP: th = (struct tcphdr *)(ip + 1); - /* - * Maybe it was set in - * libalias... + /* + * Maybe it was set in + * libalias... */ th->th_x2 = 0; th->th_sum = cksum; - mcl->m_pkthdr.csum_data = + mcl->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); break; case IPPROTO_UDP: uh = (struct udphdr *)(ip + 1); uh->uh_sum = cksum; - mcl->m_pkthdr.csum_data = + mcl->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); - break; + break; } /* No hw checksum offloading: do it ourselves */ if ((mcl->m_pkthdr.csum_flags & CSUM_DELAY_DATA) == 0) { in_delayed_cksum(mcl); mcl->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } + ip->ip_len = htons(ip->ip_len); } args->m = mcl; return (IP_FW_NAT); @@ -343,7 +341,7 @@ lookup_nat(struct nat_list *l, int nat_id) return res; } -static int +static int ipfw_nat_cfg(struct sockopt *sopt) { struct cfg_nat *ptr, *ser_n; @@ -355,14 +353,14 @@ ipfw_nat_cfg(struct sockopt *sopt) ser_n = (struct cfg_nat *)buf; /* check valid parameter ser_n->id > 0 ? */ - /* + /* * Find/create nat rule. */ IPFW_WLOCK(chain); ptr = lookup_nat(&chain->nat, ser_n->id); if (ptr == NULL) { /* New rule: allocate and init new instance. */ - ptr = malloc(sizeof(struct cfg_nat), + ptr = malloc(sizeof(struct cfg_nat), M_IPFW, M_NOWAIT | M_ZERO); if (ptr == NULL) { IPFW_WUNLOCK(chain); @@ -384,13 +382,13 @@ ipfw_nat_cfg(struct sockopt *sopt) } IPFW_WUNLOCK(chain); - /* + /* * Basic nat configuration. */ ptr->id = ser_n->id; - /* - * XXX - what if this rule doesn't nat any ip and just - * redirect? + /* + * XXX - what if this rule doesn't nat any ip and just + * redirect? * do we set aliasaddress to 0.0.0.0? */ ptr->ip = ser_n->ip; @@ -400,7 +398,7 @@ ipfw_nat_cfg(struct sockopt *sopt) LibAliasSetAddress(ptr->lib, ptr->ip); memcpy(ptr->if_name, ser_n->if_name, IF_NAMESIZE); - /* + /* * Redir and LSNAT configuration. */ /* Delete old cfgs. */ @@ -420,7 +418,7 @@ ipfw_nat_del(struct sockopt *sopt) struct cfg_nat *ptr; struct ip_fw_chain *chain = &V_layer3_chain; int i; - + sooptcopyin(sopt, &i, sizeof i, sizeof i); /* XXX validate i */ IPFW_WLOCK(chain); @@ -440,7 +438,7 @@ ipfw_nat_del(struct sockopt *sopt) static int ipfw_nat_get_cfg(struct sockopt *sopt) -{ +{ uint8_t *data; struct cfg_nat *n; struct cfg_redir *r; @@ -448,7 +446,7 @@ ipfw_nat_get_cfg(struct sockopt *sopt) int nat_cnt, off; struct ip_fw_chain *chain; int err = ENOSPC; - + chain = &V_layer3_chain; nat_cnt = 0; off = sizeof(nat_cnt); @@ -460,30 +458,30 @@ ipfw_nat_get_cfg(struct sockopt *sopt) nat_cnt++; if (off + SOF_NAT >= NAT_BUF_LEN) goto nospace; - bcopy(n, &data[off], SOF_NAT); - off += SOF_NAT; - LIST_FOREACH(r, &n->redir_chain, _next) { + bcopy(n, &data[off], SOF_NAT); + off += SOF_NAT; + LIST_FOREACH(r, &n->redir_chain, _next) { if (off + SOF_REDIR >= NAT_BUF_LEN) goto nospace; bcopy(r, &data[off], SOF_REDIR); - off += SOF_REDIR; + off += SOF_REDIR; LIST_FOREACH(s, &r->spool_chain, _next) { if (off + SOF_SPOOL >= NAT_BUF_LEN) - goto nospace; + goto nospace; bcopy(s, &data[off], SOF_SPOOL); off += SOF_SPOOL; - } } + } } err = 0; /* all good */ nospace: IPFW_RUNLOCK(chain); if (err == 0) { - bcopy(&nat_cnt, data, sizeof(nat_cnt)); - sooptcopyout(sopt, data, NAT_BUF_LEN); + bcopy(&nat_cnt, data, sizeof(nat_cnt)); + sooptcopyout(sopt, data, NAT_BUF_LEN); } else { - printf("serialized data buffer not big enough:" - "please increase NAT_BUF_LEN\n"); + printf("serialized data buffer not big enough:" + "please increase NAT_BUF_LEN\n"); } free(data, M_IPFW); return (err); @@ -503,16 +501,16 @@ ipfw_nat_get_log(struct sockopt *sopt) /* one pass to count, one to copy the data */ i = 0; LIST_FOREACH(ptr, &chain->nat, _next) { - if (ptr->lib->logDesc == NULL) + if (ptr->lib->logDesc == NULL) continue; i++; } size = i * (LIBALIAS_BUF_SIZE + sizeof(int)); data = malloc(size, M_IPFW, M_NOWAIT | M_ZERO); - if (data == NULL) { + if (data == NULL) { IPFW_RUNLOCK(chain); - return (ENOSPC); - } + return (ENOSPC); + } i = 0; LIST_FOREACH(ptr, &chain->nat, _next) { if (ptr->lib->logDesc == NULL) @@ -551,7 +549,7 @@ ipfw_nat_destroy(void) { struct cfg_nat *ptr, *ptr_temp; struct ip_fw_chain *chain; - + chain = &V_layer3_chain; IPFW_WLOCK(chain); LIST_FOREACH_SAFE(ptr, &chain->nat, _next, ptr_temp) { diff --git a/dummynet2/ip_fw_pfil.c b/dummynet2/ip_fw_pfil.c index 52d85a5..a125ef2 100644 --- a/dummynet2/ip_fw_pfil.c +++ b/dummynet2/ip_fw_pfil.c @@ -99,7 +99,7 @@ SYSEND * The pfilter hook to pass packets to ipfw_chk and then to * dummynet, divert, netgraph or other modules. * The packet may be consumed. - */ + */ int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp) @@ -229,8 +229,13 @@ again: if (ipfw == IP_FW_NGTEE) /* ignore errors for NGTEE */ goto again; /* continue with packet */ break; - + case IP_FW_NAT: + /* honor one-pass in case of successful nat */ + if (V_fw_one_pass) + break; /* ret is already 0 */ + goto again; + case IP_FW_REASS: goto again; /* continue with packet */ @@ -264,7 +269,7 @@ ipfw_divert(struct mbuf **m0, int incoming, struct ipfw_rule_ref *rule, /* Cloning needed for tee? */ if (tee == 0) { - clone = *m0; /* use the original mbuf */ + clone = *m0; /* use the original mbuf */ *m0 = NULL; } else { clone = m_dup(*m0, M_DONTWAIT); diff --git a/dummynet2/ipfw2_mod.c b/dummynet2/ipfw2_mod.c index 1ea6e57..7ce046b 100644 --- a/dummynet2/ipfw2_mod.c +++ b/dummynet2/ipfw2_mod.c @@ -24,7 +24,7 @@ */ /* - * $Id: ipfw2_mod.c 5797 2010-03-21 16:31:08Z luigi $ + * $Id: ipfw2_mod.c 10302 2012-01-19 21:49:23Z marta $ * * The main interface to build ipfw+dummynet as a linux module. * (and possibly as a windows module as well, though that part @@ -59,7 +59,7 @@ #include /* nf_queue */ #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) #define __read_mostly #endif @@ -73,8 +73,9 @@ #ifdef __linux__ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -#warning --- inet_hashtables not present on 2.4 +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) +/* XXX was < 2.6.0: inet_hashtables.h is introduced in 2.6.14 */ +// #warning --- inet_hashtables not present on 2.4 #include #include #include @@ -227,13 +228,127 @@ ipfw_ctl_h(struct sockopt *s, int cmd, int dir, int len, void __user *user) return -ret; /* errors are < 0 on linux */ } +/* + * Convert an mbuf into an skbuff + * At the moment this only works for ip packets fully contained + * in a single mbuf. We assume that on entry ip_len and ip_off are + * in host format, and the ip checksum is not computed. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* check boundary */ +int dst_output(struct skbuff *s) +{ + return 0; +} + +struct sk_buff * +mbuf2skbuff(struct mbuf* m) +{ + return NULL; +} +#else +struct sk_buff * +mbuf2skbuff(struct mbuf* m) +{ + struct sk_buff *skb; + size_t len = m->m_pkthdr.len; + + /* used to lookup the routing table */ + struct rtable *r; + struct flowi fl; + int ret = 0; /* success for ip_route_output_key() */ + + struct ip *ip = mtod(m, struct ip *); + + /* XXX ip_output has ip_len and ip_off in network format, + * linux expects host format */ + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + + ip->ip_sum = 0; + ip->ip_sum = in_cksum(m, ip->ip_hl<<2); + + /* fill flowi struct, we need just the dst addr, see XXX */ + bzero(&fl, sizeof(fl)); + flow_daddr.daddr = ip->ip_dst.s_addr; + + /* + * ip_route_output_key() should increment + * r->u.dst.__use and call a dst_hold(dst) + * XXX verify how we release the resources. + */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) /* check boundary */ + r = ip_route_output_key(&init_net, &fl.u.ip4); +#elif LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) /* check boundary */ + ret = ip_route_output_key(&init_net, &r, &fl); +#else + ret = ip_route_output_key(&r, &fl); +#endif + if (ret != 0 || r == NULL ) { + printf("NO ROUTE FOUND\n"); + return NULL; + } + + /* allocate the skbuff and the data */ + skb = alloc_skb(len + sizeof(struct ethhdr), GFP_ATOMIC); + if (skb == NULL) { + printf("%s: can not allocate SKB buffers.\n", __FUNCTION__); + return NULL; + } + + skb->protocol = htons(ETH_P_IP); // XXX 8 or 16 bit ? + /* sk_dst_set XXX take the lock (?) */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) + skb_dst_set(skb, &r->u.dst); +#else + skb_dst_set(skb, &r->dst); +#endif + skb->dev = skb_dst(skb)->dev; + + /* reserve space for ethernet header */ + skb_reserve(skb, sizeof(struct ethhdr)); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) + skb_reset_network_header(skb); // skb->network_header = skb->data - skb->head +#else + skb->nh.raw = skb->data; +#endif + /* set skbuff tail pointers and copy content */ + skb_put(skb, len); + memcpy(skb->data, m->m_data, len); + + return skb; +} +#endif /* keepalives not supported on linux 2.4 */ + +/* + * This function is called to reinject packets to the + * kernel stack within the linux netfilter system + * or to send a new created mbuf. + * In the first case we have a valid sk_buff pointer + * encapsulated within the fake mbuf, so we can call + * the reinject function trough netisr_dispatch. + * In the last case we need to build a sk_buff from scratch, + * before sending out the packet. + */ int ip_output(struct mbuf *m, struct mbuf __unused *opt, struct route __unused *ro, int __unused flags, struct ip_moptions __unused *imo, struct inpcb __unused *inp) { - netisr_dispatch(0, m); - return 0; + if ( m->m_skb != NULL ) { /* reinjected packet, just call dispatch */ + netisr_dispatch(0, m); + } else { + /* self-generated packet, wrap as appropriate and send */ +#ifdef __linux__ + struct sk_buff *skb = mbuf2skbuff(m); + + if (skb != NULL) + dst_output(skb); +#else /* Windows */ +#endif + FREE_PKT(m); + } + return 0; } /* @@ -347,7 +462,7 @@ call_ipfw(unsigned int __unused hooknum, return NF_QUEUE; } -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) /* XXX was 2.6.0 */ #define NF_STOP NF_ACCEPT #endif @@ -360,10 +475,10 @@ call_ipfw(unsigned int __unused hooknum, #define nf_queue_entry nf_info /* for simplicity */ /* also, 2.4 and perhaps something else have different arguments */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* unsure on the exact boundary */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) /* XXX unsure */ /* on 2.4 we use nf_info */ #define QH_ARGS struct sk_buff *skb, struct nf_info *info, void *data -#else /* 2.6.1.. 2.6.24 */ +#else /* 2.6.14. 2.6.24 */ #define QH_ARGS struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data #endif @@ -420,7 +535,7 @@ ipfw2_queue_handler(QH_ARGS) m->m_pkthdr.len = skb->len; /* total packet len */ m->m_pkthdr.rcvif = info->indev; m->queue_entry = info; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) /* XXX was 2.6.0 */ m->m_data = skb->nh.iph; #else m->m_data = skb_network_header(skb); @@ -451,11 +566,11 @@ struct route; struct ip_moptions; struct inpcb; - /* XXX should include prototypes for netisr_dispatch and ip_output */ /* * The reinjection routine after a packet comes out from dummynet. * We must update the skb timestamp so ping reports the right time. + * This routine is also used (with num == -1) as FREE_PKT. XXX */ void netisr_dispatch(int num, struct mbuf *m) @@ -463,9 +578,21 @@ netisr_dispatch(int num, struct mbuf *m) struct nf_queue_entry *info = m->queue_entry; struct sk_buff *skb = m->m_skb; /* always used */ + /* + * This function can be called by the FREE_PKT() + * used when ipfw generate their own mbuf packets + * or by the mbuf2skbuff() function. + */ m_freem(m); - KASSERT((info != NULL), ("%s info null!\n", __FUNCTION__)); + /* XXX check + * info is null in the case of a real mbuf + * (one created by the ipfw code without a + * valid sk_buff pointer + */ + if (info == NULL) + return; + #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) // XXX above 2.6.x ? __net_timestamp(skb); /* update timestamp */ #endif @@ -508,7 +635,7 @@ linux_lookup(const int proto, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, struct sk_buff *skb, int dir, struct bsd_ucred *u) { -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,13) /* XXX was 2.6.0 */ return -1; #else struct sock *sk; @@ -617,7 +744,15 @@ linux_lookup(const int proto, const __be32 saddr, const __be16 sport, * * the unregister function changed arguments between 2.6.22 and 2.6.24 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,14) +struct nf_queue_handler ipfw2_queue_handler_desc = { + .outfn = ipfw2_queue_handler, + .name = "ipfw2 dummynet queue", +}; +#define REG_QH_ARG(fn) &(fn ## _desc) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) /* XXX was 2.6.0 */ static int nf_register_hooks(struct nf_hook_ops *ops, int n) { @@ -638,17 +773,13 @@ nf_unregister_hooks(struct nf_hook_ops *ops, int n) nf_unregister_hook(ops + i); } } +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) /* XXX was 2.6.0 */ #define REG_QH_ARG(fn) fn, NULL /* argument for nf_[un]register_queue_handler */ +#endif #define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ #define SET_MOD_OWNER -#else /* linux >= 2.6.0 */ - -struct nf_queue_handler ipfw2_queue_handler_desc = { - .outfn = ipfw2_queue_handler, - .name = "ipfw2 dummynet queue", -}; -#define REG_QH_ARG(fn) &(fn ## _desc) +#else /* linux > 2.6.17 */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) #define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ diff --git a/dummynet2/missing.h b/dummynet2/missing.h index bf72a95..b48981e 100644 --- a/dummynet2/missing.h +++ b/dummynet2/missing.h @@ -24,7 +24,7 @@ */ /* - * $Id: missing.h 5817 2010-03-23 09:50:56Z svn_panicucci $ + * $Id: missing.h 11275 2012-06-10 17:27:40Z marta $ * * Header for kernel variables and functions that are not available in * userland. @@ -33,7 +33,13 @@ #ifndef _MISSING_H_ #define _MISSING_H_ +/* sysctl.h and module.h are included before cdefs.h + * because of cdefs.h defines __unused */ + +#include +#include #include +#include /* portability features, to be set before the rest: */ #define HAVE_NET_IPLEN /* iplen/ipoff in net format */ @@ -53,6 +59,7 @@ #include /* bsd-compat.c */ #include /* bsd-compat.c */ #include /* local version */ +#define INADDR_TO_IFP(a, b) b = NULL #else /* __linux__ */ @@ -211,7 +218,7 @@ extern struct timeval boottime; /* The time_uptime a FreeBSD variable increased each second */ #ifdef __linux__ -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,37) /* revise boundaries */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,4,37) /* revise boundaries */ #define time_uptime get_seconds() #else /* OpenWRT */ #define time_uptime CURRENT_TIME @@ -343,13 +350,17 @@ struct net_device { int in_cksum(struct mbuf *m, int len); #define divert_cookie(mtag) 0 #define divert_info(mtag) 0 -#define INADDR_TO_IFP(a, b) b = NULL #define pf_find_mtag(a) NULL #define pf_get_mtag(a) NULL #ifndef _WIN32 #define AF_LINK AF_ASH /* ? our sys/socket.h */ #endif +/* search local the ip addresses, used for the "me" keyword */ +#include +#define INADDR_TO_IFP(ip, b) \ + b = ip_dev_find((struct net *)&init_net, ip.s_addr) + /* we don't pullup, either success or free and fail */ #define m_pullup(m, x) \ ((m)->m_len >= x ? (m) : (FREE_PKT(m), NULL)) @@ -476,12 +487,13 @@ struct sock *inet_lookup( struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); #endif /* Linux < 2.6 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) && \ + LINUX_VERSION_CODE > KERNEL_VERSION(2,6,16) /* XXX NOT sure, in 2.6.9 give an error */ #define module_param_named(_name, _var, _ty, _perm) \ //module_param(_name, _ty, 0644) #endif -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) typedef unsigned long uintptr_t; #ifdef __i386__ @@ -597,6 +609,9 @@ extern int (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa); #define VNET_PTR(n) (&(n)) #define VNET(n) (n) +VNET_DECLARE(int, ip_defttl); +#define V_ip_defttl VNET(ip_defttl); + int ipfw_check_hook(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, struct inpcb *inp); diff --git a/dummynet2/radix.c b/dummynet2/radix.c index 47aa0b3..4bef996 100644 --- a/dummynet2/radix.c +++ b/dummynet2/radix.c @@ -762,8 +762,10 @@ on2: if (m->rm_flags & RNF_NORMAL) { mmask = m->rm_leaf->rn_mask; if (tt->rn_flags & RNF_NORMAL) { +#if !defined(RADIX_MPATH) log(LOG_ERR, "Non-unique normal route, mask not entered\n"); +#endif return tt; } } else diff --git a/glue.h b/glue.h index 76dd0f9..622ca4b 100644 --- a/glue.h +++ b/glue.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. */ /* - * $Id: glue.h 5822 2010-03-23 10:39:56Z svn_magno $ + * $Id: glue.h 11277 2012-06-10 17:44:15Z marta $ * * glue code to adapt the FreeBSD version to linux and windows, * userland and kernel. @@ -38,6 +38,7 @@ #ifndef _GLUE_H #define _GLUE_H + /* * common definitions to allow portability */ @@ -60,11 +61,14 @@ #include #include #include -#include +#ifdef __linux__ +#include /* linux only 20111031 */ +#endif #else /* KERNEL_MODULE, kernel headers */ #ifdef __linux__ + #include #define ifnet net_device /* remap */ @@ -81,7 +85,8 @@ #endif /* on 2.6.22, msg.h requires spinlock_types.h */ -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0) && \ +/* XXX spinlock_type.h was introduced in 2.6.14 */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) && \ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) #include #endif @@ -237,6 +242,7 @@ enum ipfw_msg_type { /* on freebsd sys/socket.h pf specific */ #define NET_RT_IFLIST 3 /* survey interface list */ +#if defined(__linux__) || defined(__CYGWIN32__) /* on freebsd net/if.h XXX used */ struct if_data { /* ... */ @@ -298,6 +304,11 @@ struct clockinfo { int profhz; /* profiling clock frequency */ }; +/* no sin_len in sockaddr, we only remap in userland */ +#define sin_len sin_zero[0] + +#endif /* Linux/Win */ + /* * linux does not have a reentrant version of qsort, * so we the FreeBSD stdlib version. @@ -330,8 +341,6 @@ long long int strtonum(const char *nptr, long long minval, int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -/* no sin_len in sockaddr, we only remap in userland */ -#define sin_len sin_zero[0] #else /* KERNEL_MODULE */ @@ -352,13 +361,41 @@ struct route_in6 { #include #endif -/* skb_dst() was introduced from linux 2.6.31 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) // or 2.4.x -#define skb_dst(_dummy) skb->dst +/* skb_dst() and skb_dst_set() was introduced from linux 2.6.31 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) +void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst); +struct dst_entry *skb_dst(const struct sk_buff *skb); +#endif + +/* The struct flowi changed */ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,38) // check boundaries +#define flow_daddr fl.u.ip4 +#else +#define flow_daddr fl.nl_u.ip4_u #endif #endif /* __linux__ */ +/* + * Do not load prio_heap.h header because of conflicting names + * with our heap functions defined in include/netinet/ipfw/dn_heap.h + */ +#define _LINUX_PRIO_HEAP_H +/* + * The following define prevent the ipv6.h header to be loaded. + * Starting from the 2.6.38 kernel the ipv6.h file, which is included + * by include/net/inetpeer.h in turn included by net/route.h + * include the system tcp.h file while we want to include + * our include/net/tcp.h instead. + */ +#ifndef _NET_IPV6_H +#define _NET_IPV6_H +static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2) +{ + memcpy(a1, a2, sizeof(struct in6_addr)); +} +#endif /* _NET_IPV6_H */ + #endif /* KERNEL_MODULE */ /* diff --git a/ipfw/Makefile b/ipfw/Makefile index 439bed9..4800b4a 100644 --- a/ipfw/Makefile +++ b/ipfw/Makefile @@ -1,33 +1,19 @@ # +# $Id: Makefile 11277 2012-06-10 17:44:15Z marta $ +# # GNUMakefile to build the userland part of ipfw on Linux and Windows # # enable extra debugging information # Do not set with = or := so we can inherit from the caller XOSARCH := $(shell uname) OSARCH ?= $(XOSARCH) -$(warning Building userland ipfw for $(VER) $(OSARCH)) +OSARCH := $(shell uname) +OSARCH := $(findstring $(OSARCH),FreeBSD Linux Darwin) +ifeq ($(OSARCH),) + OSARCH := Windows +endif -# utility to figure if gcc has a given option -#################### extract from Kbuild.include -# try-run -# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) -# Exit code chooses option. "$$TMP" is can be used as temporary file and -# is automatically cleaned up. -try-run = $(shell set -e; \ - TMP="$(TMPOUT).$$$$.tmp"; \ - TMPO="$(TMPOUT).$$$$.o"; \ - if ($(1)) >/dev/null 2>&1; \ - then echo "$(2)"; \ - else echo "$(3)"; \ - fi; \ - rm -f "$$TMP" "$$TMPO") - -# cc-option -# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) - -cc-option = $(call try-run,\ - $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2)) -#################### +$(warning Building userland ipfw for $(VER) $(OSARCH)) #TCC=c:/tesi/tcc @@ -42,8 +28,19 @@ ifneq ($(VER),openwrt) ifeq ($(OSARCH),Linux) EXTRA_CFLAGS += -D__BSD_VISIBLE EXTRA_CFLAGS += -Werror - EXTRA_CFLAGS += $(call cc-option, -Wno-unused-but-set-variable) -else # must be Cygwin ? + # Required by GCC 4.6 + EXTRA_CFLAGS += -Wno-unused-but-set-variable +endif +ifeq ($(OSARCH),FreeBSD) + EXTRA_CFLAGS += -D__BSD_VISIBLE + EXTRA_CFLAGS += -Werror +endif +ifeq ($(OSARCH),Darwin) + EXTRA_CFLAGS += -D__BSD_VISIBLE + EXTRA_CFLAGS += -Werror +endif +# must be Cygwin ? +ifeq ($(OSARCH),Windows) ifeq ($(TCC),) EXTRA_CFLAGS += -I/cygdrive/c/WinDDK/7600.16385.0/inc/ddk EXTRA_CFLAGS += -I . @@ -63,6 +60,7 @@ else EFILES += netinet/ip_icmp.h EFILES += sys/cdefs.h sys/wait.h EFILES += sys/ioctl.h sys/socket.h + endif # EXTRA_CFLAGS += -D_WIN32 # see who defines it EXTRA_CFLAGS += -Dsetsockopt=wnd_setsockopt @@ -83,7 +81,7 @@ ifeq ($(TCC),) CFLAGS += -I$(USRDIR)/include LDFLAGS += -L$(USRDIR)/lib else - LDFLAGS += -L. -lws2_32 + LDFLAGS += -L. -L$(TCC)/lib -lws2_32 endif OBJS = ipfw2.o dummynet.o main.o ipv6.o qsort_r.o diff --git a/ipfw/dummynet.c b/ipfw/dummynet.c index 201549a..231f52f 100644 --- a/ipfw/dummynet.c +++ b/ipfw/dummynet.c @@ -10,7 +10,7 @@ * * This software is provided ``AS IS'' without any warranties of any kind. * - * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/dummynet.c 203321 2010-01-31 21:39:25Z luigi $ + * $FreeBSD: head/sbin/ipfw/dummynet.c 206843 2010-04-19 15:11:45Z luigi $ * * dummynet support */ @@ -101,6 +101,18 @@ o_next(struct dn_id **o, int len, int type) return ret; } +/* handle variable lenght structures moving back the pointer and fixing lenght */ +static void * +o_compact(struct dn_id **o, int len, int real_length, int type) +{ + struct dn_id *ret = *o; + + ret = O_NEXT(*o, -len); + oid_fill(ret, real_length, type, 0); + *o = O_NEXT(ret, real_length); + return ret; +} + #if 0 static int sort_q(void *arg, const void *pa, const void *pb) @@ -146,10 +158,6 @@ print_mask(struct ipfw_flow_id *id) id->proto, id->src_ip, id->src_port, id->dst_ip, id->dst_port); - - printf("BKT Prot ___Source IP/port____ " - "____Dest. IP/port____ " - "Tot_pkt/bytes Pkt/Byte Drp\n"); } else { char buf[255]; printf("\n mask: %sproto: 0x%02x, flow_id: 0x%08x, ", @@ -159,22 +167,35 @@ print_mask(struct ipfw_flow_id *id) printf("%s/0x%04x -> ", buf, id->src_port); inet_ntop(AF_INET6, &(id->dst_ip6), buf, sizeof(buf)); printf("%s/0x%04x\n", buf, id->dst_port); + } +} +static void +print_header(struct ipfw_flow_id *id) +{ + if (!IS_IP6_FLOW_ID(id)) + printf("BKT Prot ___Source IP/port____ " + "____Dest. IP/port____ " + "Tot_pkt/bytes Pkt/Byte Drp\n"); + else printf("BKT ___Prot___ _flow-id_ " "______________Source IPv6/port_______________ " "_______________Dest. IPv6/port_______________ " "Tot_pkt/bytes Pkt/Byte Drp\n"); - } } static void -list_flow(struct dn_flow *ni) +list_flow(struct dn_flow *ni, int *print) { char buff[255]; struct protoent *pe = NULL; struct in_addr ina; struct ipfw_flow_id *id = &ni->fid; + if (*print) { + print_header(&ni->fid); + *print = 0; + } pe = getprotobynumber(id->proto); /* XXX: Should check for IPv4 flows */ printf("%3u%c", (ni->oid.id) & 0xff, @@ -203,31 +224,10 @@ list_flow(struct dn_flow *ni) inet_ntop(AF_INET6, &(id->dst_ip6), buff, sizeof(buff)), id->dst_port); } - - /* Tcc relies on msvcrt.dll for printf, and - * it does not support ANSI %llu syntax - */ -#ifndef TCC - printf("%4llu %8llu %2u %4u %3u\n", - align_uint64(&ni->tot_pkts), - align_uint64(&ni->tot_bytes), + pr_u64(&ni->tot_pkts, 4); + pr_u64(&ni->tot_bytes, 8); + printf("%2u %4u %3u\n", ni->length, ni->len_bytes, ni->drops); -#else - /* XXX This should be printed correctly, but for some - * weird reason, it is not. Making a printf for each - * value is a workaround, until we don't undestand what's wrong - */ - /*printf("%4I64u %8I64u %2u %4u %3u\n", - align_uint64(&ni->tot_pkts), - align_uint64(&ni->tot_bytes), - ni->length, ni->len_bytes, ni->drops);*/ - - printf("%4I64u ",align_uint64(&ni->tot_pkts)); - printf("%8I64u ",align_uint64(&ni->tot_bytes)); - printf("%2u ",ni->length); - printf("%4u ",ni->len_bytes); - printf("%3u\n",ni->drops); -#endif } static void @@ -311,8 +311,9 @@ static void list_pipes(struct dn_id *oid, struct dn_id *end) { char buf[160]; /* pending buffer */ + int toPrint = 1; /* print header */ + buf[0] = '\0'; - for (; oid != end; oid = O_NEXT(oid, oid->len)) { if (oid->len < sizeof(*oid)) errx(1, "invalid oid len %d\n", oid->len); @@ -349,12 +350,12 @@ list_pipes(struct dn_id *oid, struct dn_id *end) s->sched_nr, s->name, s->flags, s->buckets, s->oid.id); if (s->flags & DN_HAVE_MASK) - print_mask(&s->sched_mask); + print_mask(&s->sched_mask); } break; case DN_FLOW: - list_flow((struct dn_flow *)oid); + list_flow((struct dn_flow *)oid, &toPrint); break; case DN_LINK: { @@ -391,7 +392,7 @@ list_pipes(struct dn_id *oid, struct dn_id *end) print_extra_delay_parms((struct dn_profile *)oid); } flush_buf(buf); // XXX does it really go here ? - } + } } /* @@ -599,6 +600,70 @@ compare_points(const void *vp1, const void *vp2) #define ED_EFMT(s) EX_DATAERR,"error in %s at line %d: "#s,filename,lineno +/* + * Interpolate a set of proability-value tuples. + * + * This function takes as input a tuple of values + * and samples the interpolated curve described from the tuples. + * + * The user defined points are stored in the ponts structure. + * The number of points is stored in points_no. + * The user defined sampling value is stored in samples_no. + * The resulting samples are in the "samples" pointer. + * + * We assume that The last point for the '1' value of the + * probability should be defined. (XXX add checks for this) + * + * The input data are points and points_no. + * The output data are s (the array of s_no samples) + * and s_no (the number of samples) + * + */ +static void +interpolate_samples(struct point *p, int points_no, + int *samples, int samples_no, const char *filename) +{ + double dy; /* delta on the y axis */ + double y; /* current value of y */ + double x; /* current value of x */ + double m; /* the y slope */ + int i; /* samples index */ + int curr; /* points current index */ + + /* make sure that there are enough points. */ + /* XXX Duplicated should be removed */ + if (points_no < 3) + errx(EX_DATAERR, "%s too few samples, need at least %d", + filename, 3); + + qsort(p, points_no, sizeof(struct point), compare_points); + + dy = 1.0/samples_no; + y = 0; + + for (i=0, curr = 0; i < samples_no; i++, y+=dy) { + /* This statment move the curr pointer to the next point + * skipping the points with the same x value. We are + * guaranteed to exit from the loop because the + * last possible value of y is stricly less than 1 + * and the last possible value of the y points is 1 */ + while ( y >= p[curr+1].prob ) curr++; + + /* compute the slope of the curve */ + m = (p[curr+1].delay - p[curr].delay) / (p[curr+1].prob - p[curr].prob); + /* compute the x value starting from the current point */ + x = p[curr].delay + (y - p[curr].prob) * m; + samples[i] = x; + } + + /* add the last sample */ + samples[i] = p[curr+1].delay; +} + +/* + * p is the link (old pipe) + * pf is the profile + */ static void load_extra_delays(const char *filename, struct dn_profile *p, struct dn_link *link) @@ -606,7 +671,6 @@ load_extra_delays(const char *filename, struct dn_profile *p, char line[ED_MAX_LINE_LEN]; FILE *f; int lineno = 0; - int i; int samples = -1; double loss = -1.0; @@ -620,6 +684,7 @@ load_extra_delays(const char *filename, struct dn_profile *p, p->link_nr = link->link_nr; profile_name[0] = '\0'; + f = fopen(filename, "r"); if (f == NULL) err(EX_UNAVAILABLE, "fopen: %s", filename); @@ -643,10 +708,9 @@ load_extra_delays(const char *filename, struct dn_profile *p, else arg = s; } - if (name == NULL) /* empty line */ + + if ((name == NULL) || (*name == '#')) /* empty line */ continue; - if (arg == NULL) - errx(ED_EFMT("missing arg for %s"), name); if (!strcasecmp(name, ED_TOK_SAMPLES)) { if (samples > 0) @@ -654,13 +718,14 @@ load_extra_delays(const char *filename, struct dn_profile *p, if (atoi(arg) <=0) errx(ED_EFMT("invalid number of samples")); samples = atoi(arg); - if (samples>ED_MAX_SAMPLES_NO) + if (samples>=ED_MAX_SAMPLES_NO-1) errx(ED_EFMT("too many samples, maximum is %d"), - ED_MAX_SAMPLES_NO); + ED_MAX_SAMPLES_NO-1); do_points = 0; } else if (!strcasecmp(name, ED_TOK_BW)) { char buf[IFNAMSIZ]; read_bandwidth(arg, &link->bandwidth, buf, sizeof(buf)); + p->bandwidth = link->bandwidth; } else if (!strcasecmp(name, ED_TOK_LOSS)) { if (loss != -1.0) errx(ED_EFMT("duplicated token: %s"), name); @@ -716,34 +781,9 @@ load_extra_delays(const char *filename, struct dn_profile *p, loss = 1; } - /* make sure that there are enough points. */ - if (points_no < ED_MIN_SAMPLES_NO) - errx(ED_EFMT("too few samples, need at least %d"), - ED_MIN_SAMPLES_NO); - - qsort(points, points_no, sizeof(struct point), compare_points); - - /* interpolation */ - for (i = 0; isamples, samples, filename); - int ix = y1; - int stop = y2; - - if (x1 == x2) { - for (; ixsamples[ix] = x1; - } else { - double m = (y2-y1)/(x2-x1); - double c = y1 - m*x1; - for (; ixsamples[ix] = (ix - c)/m; - } - } - p->samples_no = samples; + p->samples_no = samples++; p->loss_level = loss * samples; strncpy(p->name, profile_name, sizeof(p->name)); } @@ -781,6 +821,7 @@ ipfw_config_pipe(int ac, char **av) struct ipfw_flow_id *mask = NULL; int lmax; uint32_t _foo = 0, *flags = &_foo , *buckets = &_foo; + size_t max_pf_size = sizeof(struct dn_profile) + ED_MAX_SAMPLES_NO * sizeof(int); /* * allocate space for 1 header, @@ -788,7 +829,8 @@ ipfw_config_pipe(int ac, char **av) */ lmax = sizeof(struct dn_id); /* command header */ lmax += sizeof(struct dn_sch) + sizeof(struct dn_link) + - sizeof(struct dn_fs) + sizeof(struct dn_profile); + sizeof(struct dn_fs); + lmax += max_pf_size; av++; ac--; /* Pipe number */ @@ -1127,12 +1169,21 @@ end_mask: break; case TOK_PROFILE: + { + size_t real_length; + NEED((!pf), "profile already set"); NEED(p, "profile"); - { NEED1("extra delay needs the file name\n"); - pf = o_next(&buf, sizeof(*pf), DN_PROFILE); + + /* load the profile structure using the DN_API */ + pf = o_next(&buf, max_pf_size, DN_PROFILE); load_extra_delays(av[0], pf, p); //XXX can't fail? + + /* compact the dn_id structure */ + real_length = sizeof(struct dn_profile) + + pf->samples_no * sizeof(int); + o_compact(&buf, max_pf_size, real_length, DN_PROFILE); --ac; ++av; } break; @@ -1165,8 +1216,8 @@ end_mask: } if (fs) { /* XXX accept a 0 scheduler to keep the default */ - if (fs->flags & DN_QSIZE_BYTES) { - size_t len; + if (fs->flags & DN_QSIZE_BYTES) { + size_t len; long limit; len = sizeof(limit); @@ -1381,7 +1432,7 @@ dummynet_list(int ac, char *av[], int show_counters) } else { ret = do_cmd(-IP_DUMMYNET3, oid, (uintptr_t)&l); if (ret != 0 || oid->id <= sizeof(*oid)) - goto done; + goto done; buflen = oid->id + max_size; oid->len = sizeof(*oid); /* restore */ } diff --git a/ipfw/ipfw.8 b/ipfw/ipfw.8 index f488723..b1ec24d 100644 --- a/ipfw/ipfw.8 +++ b/ipfw/ipfw.8 @@ -1,7 +1,7 @@ .\" -.\" $FreeBSD: head/sbin/ipfw/ipfw.8 205372 2010-03-20 14:42:16Z gavin $ +.\" $FreeBSD: head/sbin/ipfw/ipfw.8 211936 2010-08-28 16:32:01Z brucec $ .\" -.Dd March 20, 2010 +.Dd July 27, 2010 .Dt IPFW 8 .Os .Sh NAME @@ -859,9 +859,7 @@ accepted or continues with the next rule, depending on sysctl variable. .It Cm ngtee Ar cookie A copy of packet is diverted into netgraph, original -packet is either accepted or continues with the next rule, depending on -.Va net.inet.ip.fw.one_pass -sysctl variable. +packet continues with the next rule. See .Xr ng_ipfw 4 for more information on @@ -1919,7 +1917,7 @@ and .Pp The SCHED_MASK is used to assign flows to one or more scheduler instances, one for each -value of the packet's 5-fuple after applying SCHED_MASK. +value of the packet's 5-tuple after applying SCHED_MASK. As an example, using ``src-ip 0xffffff00'' creates one instance for each /24 destination subnet. .Pp @@ -2100,7 +2098,7 @@ with either delay or probability first, according to the chosen format. The unit for delay is milliseconds. Data points do not need to be sorted. -Also, tne number of actual lines can be different +Also, the number of actual lines can be different from the value of the "samples" parameter: .Nm utility will sort and interpolate @@ -2305,7 +2303,7 @@ Information necessary to route link-local packets to an interface is not available after processing by .Nm dummynet so those packets are dropped in the output path. -Care should be taken to insure that link-local packets are not passed to +Care should be taken to ensure that link-local packets are not passed to .Nm dummynet . .Sh CHECKLIST Here are some important points to consider when designing your diff --git a/ipfw/ipfw2.c b/ipfw/ipfw2.c index 6cb826b..bf3a9b1 100644 --- a/ipfw/ipfw2.c +++ b/ipfw/ipfw2.c @@ -17,7 +17,7 @@ * * NEW command line interface for IP firewall facility * - * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipfw2.c 203369 2010-02-02 07:39:56Z luigi $ + * $FreeBSD: head/sbin/ipfw/ipfw2.c 206843 2010-04-19 15:11:45Z luigi $ */ #include @@ -321,22 +321,29 @@ static struct _s_x rule_options[] = { { NULL, 0 } /* terminator */ }; -/* - * The following is used to generate a printable argument for - * 64-bit numbers, irrespective of platform alignment and bit size. - * Because all the printf in this program use %llu as a format, - * we just return an unsigned long long, which is larger than - * we need in certain cases, but saves the hassle of using - * PRIu64 as a format specifier. - * We don't care about inlining, this is not performance critical code. +/* + * Helper routine to print a possibly unaligned uint64_t on + * various platform. If width > 0, print the value with + * the desired width, followed by a space; + * otherwise, return the required width. */ -unsigned long long -align_uint64(const uint64_t *pll) +int +pr_u64(uint64_t *pd, int width) { - uint64_t ret; - - bcopy (pll, &ret, sizeof(ret)); - return ret; +#ifdef TCC +#define U64_FMT "I64" +#else +#define U64_FMT "llu" +#endif + uint64_t u; + unsigned long long d; + + bcopy (pd, &u, sizeof(u)); + d = u; + return (width > 0) ? + printf("%*" U64_FMT " ", width, d) : + snprintf(NULL, 0, "%" U64_FMT, d) ; +#undef U64_FMT } void * @@ -980,18 +987,11 @@ show_ipfw(struct ip_fw *rule, int pcwidth, int bcwidth) } printf("%05u ", rule->rulenum); - if (pcwidth>0 || bcwidth>0) + if (pcwidth > 0 || bcwidth > 0) { + pr_u64(&rule->pcnt, pcwidth); + pr_u64(&rule->bcnt, bcwidth); + } - /* Tcc relies on msvcrt.dll for printf, and - * it does not support ANSI %llu syntax - */ -#ifndef TCC - printf("%*llu %*llu ", pcwidth, align_uint64(&rule->pcnt), - bcwidth, align_uint64(&rule->bcnt)); -#else - printf("%*I64u %*I64u ", pcwidth, align_uint64(&rule->pcnt), - bcwidth, align_uint64(&rule->bcnt)); -#endif if (co.do_time == 2) printf("%10u ", rule->timestamp); else if (co.do_time == 1) { @@ -1592,25 +1592,12 @@ show_dyn_ipfw(ipfw_dyn_rule *d, int pcwidth, int bcwidth) } bcopy(&d->rule, &rulenum, sizeof(rulenum)); printf("%05d", rulenum); - if (pcwidth>0 || bcwidth>0) - - /* Tcc relies on msvcrt.dll for printf, and - * it does not support ANSI %llu syntax - */ -#ifndef TCC - printf(" %*llu %*llu (%ds)", pcwidth, - align_uint64(&d->pcnt), bcwidth, - align_uint64(&d->bcnt), d->expire); -#else - /*printf(" %*I64u %*I64u (%ds)", pcwidth, - align_uint64(&d->pcnt), bcwidth, - align_uint64(&d->bcnt), d->expire);*/ - - //XXX workaround here, for multiple I64 on the same printf - printf(" %*I64u",pcwidth,align_uint64(&d->pcnt)); - printf(" %*I64u",bcwidth,align_uint64(&d->bcnt)); - printf(" (%ds)",d->expire); -#endif + if (pcwidth > 0 || bcwidth > 0) { + printf(" "); + pr_u64(&d->pcnt, pcwidth); + pr_u64(&d->bcnt, bcwidth); + printf("(%ds)", d->expire); + } switch (d->dyn_type) { case O_LIMIT_PARENT: printf(" PARENT %d", d->count); @@ -1770,6 +1757,8 @@ ipfw_sysctl_handler(char *av[], int which) } else if (_substrcmp(*av, "firewall") == 0) { sysctlbyname("net.inet.ip.fw.enable", NULL, 0, &which, sizeof(which)); + sysctlbyname("net.inet6.ip6.fw.enable", NULL, 0, + &which, sizeof(which)); } else if (_substrcmp(*av, "one_pass") == 0) { sysctlbyname("net.inet.ip.fw.one_pass", NULL, 0, &which, sizeof(which)); @@ -1865,24 +1854,12 @@ ipfw_list(int ac, char *av[], int show_counters) continue; /* packet counter */ - - /* Tcc relies on msvcrt.dll for printf, and - * it does not support ANSI %llu syntax - */ -#ifndef TCC - width = snprintf(NULL, 0, "%llu", align_uint64(&r->pcnt)); -#else - width = snprintf(NULL, 0, "%I64u", align_uint64(&r->pcnt)); -#endif + width = pr_u64(&r->pcnt, 0); if (width > pcwidth) pcwidth = width; /* byte counter */ -#ifndef TCC - width = snprintf(NULL, 0, "%llu",align_uint64(&r->bcnt)); -#else - width = snprintf(NULL, 0, "%I64u",align_uint64(&r->bcnt)); -#endif + width = pr_u64(&r->bcnt, 0); if (width > bcwidth) bcwidth = width; } @@ -1896,23 +1873,11 @@ ipfw_list(int ac, char *av[], int show_counters) if (set != co.use_set - 1) continue; } - - /* Tcc relies on msvcrt.dll for printf, and - * it does not support ANSI %llu syntax - */ -#ifndef TCC - width = snprintf(NULL, 0, "%llu",align_uint64(&d->pcnt)); -#else - width = snprintf(NULL, 0, "%I64u",align_uint64(&d->pcnt)); -#endif + width = pr_u64(&d->pcnt, 0); if (width > pcwidth) pcwidth = width; -#ifndef TCC - width = snprintf(NULL, 0, "%llu",align_uint64(&d->bcnt)); -#else - width = snprintf(NULL, 0, "%I64u",align_uint64(&d->bcnt)); -#endif + width = pr_u64(&d->bcnt, 0); if (width > bcwidth) bcwidth = width; } diff --git a/ipfw/ipfw2.h b/ipfw/ipfw2.h index f37c8e2..237f815 100644 --- a/ipfw/ipfw2.h +++ b/ipfw/ipfw2.h @@ -17,7 +17,7 @@ * * NEW command line interface for IP firewall facility * - * $FreeBSD: user/luigi/ipfw3-head/sbin/ipfw/ipfw2.h 203280 2010-01-31 12:21:20Z luigi $ + * $FreeBSD: head/sbin/ipfw/ipfw2.h 206843 2010-04-19 15:11:45Z luigi $ */ /* @@ -207,7 +207,7 @@ enum tokens { #define NEED(_p, msg) {if (!_p) errx(EX_USAGE, msg);} #define NEED1(msg) {if (!(*av)) errx(EX_USAGE, msg);} -unsigned long long align_uint64(const uint64_t *pll); +int pr_u64(uint64_t *pd, int width); /* memory allocation support */ void *safe_calloc(size_t number, size_t size); diff --git a/ipfw/main.c b/ipfw/main.c index 2b2b6aa..b0e51e1 100644 --- a/ipfw/main.c +++ b/ipfw/main.c @@ -188,7 +188,7 @@ ipfw_main(int oldac, char **oldav) * to make simpler further parsing. */ for (i=0; i /dev/null 2>&1 diff --git a/planetlab/ipfwslice.spec b/planetlab/ipfwslice.spec index 34b217c..9f200b3 100644 --- a/planetlab/ipfwslice.spec +++ b/planetlab/ipfwslice.spec @@ -1,6 +1,4 @@ # -# $Id: ipfwslice.spec 16174 2009-12-15 13:38:15Z marta $ -# # TODO: # restart crond # modprobe ipfw_mod.ko (depmod ?) diff --git a/planetlab/planetlab-tags.mk b/planetlab/planetlab-tags.mk index cfb5655..392465a 100644 --- a/planetlab/planetlab-tags.mk +++ b/planetlab/planetlab-tags.mk @@ -1,4 +1,3 @@ -# $Id: planetlab-tags.mk 4533 2009-12-16 14:39:23Z luigi $ # These are good to build the ipfw modules from svn on kernels 2.6.22 linux-2.6-SVNBRANCH := 22 linux-2.6-SVNPATH := http://svn.planet-lab.org/svn/linux-2.6/tags/linux-2.6-22-39-1