From 31b969263c34f46f398eec33c0b0e95947842cda Mon Sep 17 00:00:00 2001 From: marta Date: Tue, 15 Dec 2009 13:38:15 +0000 Subject: [PATCH] Work on the radix code, added support to compile on OpenWRT, sync the code with FreeBSD-head. Changed the directory structure moving the planetlab files into a separate directory. --- Makefile | 8 +- README | 175 ++++++++- dummynet/Makefile | 17 +- dummynet/hashtable.c | 37 +- dummynet/hashtable.h | 18 +- dummynet/include/net/radix.h | 11 +- dummynet/include/netinet/ip_dummynet.h | 2 +- dummynet/include/netinet/ip_fw.h | 24 +- dummynet/ip_dummynet.c | 221 +++++------ dummynet/ip_fw2.c | 383 ++++++++++---------- dummynet/ip_fw_pfil.c | 50 ++- dummynet/ipfw2_mod.c | 61 ++-- dummynet/missing.h | 37 +- dummynet/new_glue.c | 26 +- dummynet/radix.c | 47 ++- dummynet/test_radix.c | 97 +++++ glue.h | 4 +- ipfw/glue.c | 83 ++++- ipfw/ipfw2.c | 6 +- ipfw.spec => planetlab/ipfwroot.spec | 3 + ipfw-slice.spec => planetlab/ipfwslice.spec | 3 + planetlab/planetlab-tags.mk | 5 + planetlab/planetlab.mk | 26 ++ 23 files changed, 841 insertions(+), 503 deletions(-) create mode 100644 dummynet/test_radix.c rename ipfw.spec => planetlab/ipfwroot.spec (96%) rename ipfw-slice.spec => planetlab/ipfwslice.spec (94%) create mode 100644 planetlab/planetlab-tags.mk create mode 100644 planetlab/planetlab.mk diff --git a/Makefile b/Makefile index 0442810..f863838 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ # We assume that $(USRDIR) contains include/ and lib/ used to build userland. DATE ?= $(shell date +%Y%m%d) -SNAPSHOT_NAME=ipfw_linux-$(DATE) +SNAPSHOT_NAME=ipfw_mod-$(DATE) _all: all @@ -22,10 +22,8 @@ all clean distclean: (cd dummynet && $(MAKE) $(@) ) snapshot: - -ln -s `pwd` /tmp/$(SNAPSHOT_NAME) - (cd /tmp; tar cvzhf $(SNAPSHOT_NAME).tgz --exclude .svn \ + (cd ..; tar cvzhf /tmp/$(SNAPSHOT_NAME).tgz --exclude .svn \ --exclude README.openwrt --exclude tags --exclude NOTES \ - $(SNAPSHOT_NAME) ) - -rm /tmp/$(SNAPSHOT_NAME) + ipfw_mod ) install: diff --git a/README b/README index e5b5a4a..7ab66bf 100644 --- a/README +++ b/README @@ -1,5 +1,5 @@ # -# $Id: README 4396 2009-12-09 21:52:46Z luigi $ +# $Id: README 4502 2009-12-15 11:10:33Z marta $ # This directory contains a port of ipfw and dummynet to Linux and OpenWrt @@ -42,6 +42,9 @@ Unless specified otherwise, all the code here is under a BSD license. make VER=2.4 KERNELPATH=... + For 2.4, if KERNELPATH is not specified then we use + KERNELPATH ?= /usr/src/`uname -r`/build + You need to follow the same instruction for the 2.6 kernel, enabling netfilter in the kernel options: @@ -71,30 +74,36 @@ Unless specified otherwise, all the code here is under a BSD license. + Add ipfw2 to the openwrt package, as follows: - - fetch and extract the code e.g. + - copy the code from this directory to the place used for the build: - (cd ..; \ - wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_linux-20090724.tgz;\ - tar xvzf ipfw_linux-20090724.tgz; mv ipfw_linux-20090724 ipfw_mod;) + cp -Rp /path_to_ipfw_mod ../ipfw_mod; - (but you should have done it already) + If you want, you can fetch a newer version from the web + (cd ..; rm -rf ipfw_mod; + wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_mod-latest.tgz;\ + tar xvzf ipfw_mod-latest.tgz) - run the following commands: (mkdir package/ipfw2; - cp ../ipfw_linux/Makefile.openwrt package/ipfw2/Makefile) + cp ../ipfw_mod/Makefile.openwrt package/ipfw2/Makefile) to create the package/ipfw2 directory in the OpenWrt source directory, and copy Makefile.openwrt to package/ipfw2/Makefile: - if necessary, edit package/ipfw2/Makefile and set IPFW_DIR to point to - the directory with the ipfw sources (the directory - which contains this README, dummynet/ ipfw/ and so on); + the directory ipfw_mod, which contains the ipfw sources - run "make menuconfig" and select ipfw2 as a module in - Kernel Modules -> Other modules -> ipfw2 + Kernel Modules -> Other modules -> kmod-ipfw2 - run "make" to build the package, "make V=99" for verbose build. + - to modify the code, assuming you are in directory "kamikaze_8.09.1" + + (cd ../ipfw_mod && vi ...the files you are interested in ) + rm -rf build_dir/linux-brcm-2.4/kmod-ipfw2 + make package/ipfw2/compile V=99 + The resulting package is located in bin/packages/mipsel/kmod-ipfw2*, upload the file and install on the target system, as follows: @@ -104,6 +113,148 @@ Unless specified otherwise, all the code here is under a BSD license. /lib/modules/2.4.35.4/ipfw show # launch the userspace tool rmmod ipfw_mod.o # remove the module -***** PLANETLAB BUILD ***** +***** PLANETLAB BUILD (within a slice) ***** + + Follow the instructions below. You can just cut&paste + + # install the various tools if not available + sudo yum -y install subversion rpm-build rpm-devel m4 redhat-rpm-config make gcc + # new build installation requires the gnupg package + sudo yum -y install gnupg + + # create and move to a work directory + mkdir -p test + # extract a planetlab distribution to directory XYZ + (cd test; svn co http://svn.planet-lab.org/svn/build/trunk XYZ) + # copy the planetlab/*mk files here, overriding existing ones + cp planetlab/*mk test/XYZ + # download the specfiles and do some patching. + # Results are into SPEC/ (takes 5 minutes) + (cd test/XYZ; make stage1=true PLDISTRO=planetlab ) + # Building the slice code is fast, the root code takes longer + # as it needs to rebuild the whole kernel + (cd test/XYZ; sudo make ipfwslice ipfwroot) + + The kernel dependency phase is a bit time consuming, but does not + need to be redone if we are changing the ipfw sources only. + To clean up the code do + (cd test/XYZ; sudo make ipfwroot-clean ipfwslice-clean) + then after you have updated the repository again + (cd test/XYZ; sudo make ipfwslice ipfwroot) + +--- other, instructions (to be verified) --- + +To build a kernel module for the PlanetLab distribution you need a build system. +For an up-to-date and detailed information on how to build a local myplc installation, +a local mirror, a PlanetLab test system see[1] + +To create a build system you need to do the following steps: + + 1. install CentOS 5, detailed information[2] + + 1.A download the image from the main site[3] for example: + + wget http://mi.mirror.garr.it/mirrors/CentOS/5.4/isos/i386/CentOS-5.4-i386-netinstall.iso + + 1.B Add the repository + + cat >> /etc/yum.repos.d/dhozac-vserver.repo < mybuild.log& + + 3. create the build + + 3.A Enter on the vserver, and create the build + + vserver mybuild enter + cd \ + svn co http://svn.planet-lab.org/svn/build/trunk build + + 4. build + + 4.A build[4] + cd /build + + # full cleanup + make distclean + + # the compilation is composed by several steps, + # make help for more information + # the first for the onelab compilation will download + # the SPEC file from the repository specified in + # onelab-tags.mk + make stage1=true PLDISTRO=onelab + + # to download and build a module, for example ipfw: + make ipfw + + # to do local changes + cd /build/CODEBASE + rm -rf ipfw + # download the ipfw sources and extract it into ./ipfw + # by svn + svn+ssh://onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw_mod ./ipfw + # from web + wget http://info.iet.unipi.it/~luigi/dummynet/ipfw_mod-latest.tgz + tar xvzf ipfw_mod-latest.tgz + + # start the compilation + rm -rf SOURCES/ipfw* + rm -rf BUILD/ipfw-0.1/ + rm -rf SRPMS/ipfw* + rm -rf RPMS/i386/ipfw* + make ipfw + + 5. download and install sources into a node + + 5.A Copy RPMS into the node and install it: + # exit from the root context + exit + scp /vserver/mybuild/build/RPMS/i386/ipfw-* root@node.iet.unipi.it: + ssh root@node.iet.unipi.it + rpm -e ipfw + rpm -ivh ./ipfw-0-9...TAB + modprobe ipfw_mod + + # the ipfw package should be installed + ipfw show ------------------------------------------------------------------------------ +--- References +[1] https://svn.planet-lab.org/wiki/VserverCentos +[2] http://wiki.linux-vserver.org/Installation_on_CentOS +[3] http://mirror.centos.org/centos/5/isos/ +[4] More information are in /build/README* files diff --git a/dummynet/Makefile b/dummynet/Makefile index 71b0b80..cac1958 100644 --- a/dummynet/Makefile +++ b/dummynet/Makefile @@ -37,7 +37,7 @@ VER ?= 2.6 obj-m := ipfw_mod.o # generic cflags used on all systems -#ipfw-cflags += -Dradix +#ipfw-cflags += -DIPFW_HASHTABLES ipfw-cflags += -DIPFIREWALL_DEFAULT_TO_ACCEPT -DTRACE # _BSD_SOURCE enables __FAVOR_BSD (udp/tcp bsd structs instead of posix) ipfw-cflags += -D_BSD_SOURCE @@ -52,7 +52,7 @@ $(warning "---- Building dummynet kernel module for Version $(VER)") ifeq ($(VER),openwrt) M=. obj-y := ipfw2_mod.o bsd_compat.o \ - in_cksum.o ip_dummynet.o ip_fw2.o ip_fw_pfil.o + in_cksum.o ip_dummynet.o ip_fw2.o ip_fw_pfil.o radix.o O_TARGET := ipfw_mod.o # xcflags-y is a temporary variable where we store build options @@ -134,9 +134,9 @@ ipfw_mod-y = $(IPFW_SRCS:%.c=%.o) # Original ipfw and dummynet sources + FreeBSD stuff, IPFW_SRCS = ip_fw2.c ip_dummynet.c ip_fw_pfil.c in_cksum.c -#IPFW_SRCS += radix.c +IPFW_SRCS += radix.c # Module glue and functions missing in linux -IPFW_SRCS += ipfw2_mod.c bsd_compat.c hashtable.c new_glue.c +IPFW_SRCS += ipfw2_mod.c bsd_compat.c hashtable.c # additional $(CC) flags ccflags-y += $(WARN) @@ -161,7 +161,7 @@ distclean: clean EDIRS= altq arpa machine net netinet netinet6 sys -EFILES += opt_inet6.h opt_ipfw.h opt_ipsec.h +EFILES += opt_inet6.h opt_ipfw.h opt_ipsec.h opt_mpath.h EFILES += opt_mbuf_stress_test.h opt_param.h EFILES += altq/if_altq.h @@ -172,14 +172,14 @@ EFILES += net/vnet.h EFILES += netinet/ether.h netinet/icmp6.h netinet/if_ether.h EFILES += netinet/in.h netinet/in_pcb.h netinet/in_var.h -EFILES += netinet/ip_carp.h netinet/ip_var.h netinet/pim.h +EFILES += netinet/ip_carp.h netinet/ip_var.h netinet/pim.h EFILES += netinet/sctp.h netinet/tcp_timer.h netinet/tcpip.h EFILES += netinet/udp_var.h EFILES += netinet6/ip6_var.h EFILES += sys/_lock.h sys/_rwlock.h sys/_mutex.h sys/jail.h -EFILES += sys/condvar.h sys/eventhandler.h +EFILES += sys/condvar.h sys/eventhandler.h sys/domain.h EFILES += sys/limits.h sys/lock.h sys/mutex.h sys/priv.h EFILES += sys/proc.h sys/rwlock.h sys/socket.h sys/socketvar.h EFILES += sys/sysctl.h sys/time.h sys/ucred.h @@ -192,3 +192,6 @@ include_e: -@(cd $(M)/include_e; mkdir -p $(EDIRS); touch $(EFILES) ) endif # !openwrt + +test_radix: test_radix.o radix.o +test_radix: CFLAGS=-Wall -Werror -O1 diff --git a/dummynet/hashtable.c b/dummynet/hashtable.c index 30c3b4c..3e055f0 100644 --- a/dummynet/hashtable.c +++ b/dummynet/hashtable.c @@ -13,13 +13,13 @@ struct new_obj { }; /* Hash table */ -struct new_hash_table { +struct ipfw_ht { int table_size; /* Size of the table (buckets) */ int table_obj; /* number of object in the table */ int obj_size; /* size of object (key + value) */ /* Hash function for this table */ uint32_t (*hash)(const void *key, uint32_t size); - int (*cmp)(const void *obj1, const void *obj2); + int (*cmp)(const void *obj1, const void *obj2, int sz); int hash_arg; /* hash function parameter */ struct malloc_type *mtype; struct new_obj **table_ptr; /* Pointer to the table */ @@ -34,17 +34,15 @@ struct new_hash_table { * * Return value: pointer to the hash table, NULL if error occurs */ -struct new_hash_table * -new_table_init (int size, int obj_size, +struct ipfw_ht * +ipfw_ht_new(int size, int obj_size, uint32_t (hf)(const void *, uint32_t size), - int (compare)(const void *, const void *), + int (compare)(const void *, const void *, int), struct malloc_type *mtype) { - struct new_hash_table *h; + struct ipfw_ht *h; - printf("%s called\n", __FUNCTION__); - - h = malloc(sizeof(struct new_hash_table), mtype, M_NOWAIT | M_ZERO); + h = malloc(sizeof(*h), mtype, M_NOWAIT | M_ZERO); if (h == NULL) return NULL; @@ -64,7 +62,7 @@ new_table_init (int size, int obj_size, } int -new_table_insert_obj (struct new_hash_table *h, const void *obj) +ipfw_ht_insert(struct ipfw_ht *h, const void *obj) { int i; /* array index */ struct new_obj *o, *ot; @@ -73,7 +71,7 @@ new_table_insert_obj (struct new_hash_table *h, const void *obj) /* same key not allowed */ for (ot = h->table_ptr[i]; ot; ot = ot->next) { - if (h->cmp(obj, ot->obj) == 0) + if (h->cmp(obj, ot->obj, h->obj_size) == 0) return 1; /* error */ } /* allocate a single chunk of memory */ @@ -92,7 +90,7 @@ new_table_insert_obj (struct new_hash_table *h, const void *obj) } int -new_table_delete_obj(struct new_hash_table *h, const void *obj) +ipfw_ht_remove(struct ipfw_ht *h, const void *obj) { int i; struct new_obj *obj1, *prev; @@ -100,7 +98,7 @@ new_table_delete_obj(struct new_hash_table *h, const void *obj) i = h->hash(obj, h->table_size); for (prev = NULL, obj1 = h->table_ptr[i]; obj1; obj1 = obj1->next) { - if (h->cmp(obj, (void *)obj1->obj) != 0) + if (h->cmp(obj, obj1->obj, h->obj_size) != 0) continue; /* Object found, delete */ if (prev != NULL) @@ -115,7 +113,7 @@ new_table_delete_obj(struct new_hash_table *h, const void *obj) } const void * -new_table_extract_obj(struct new_hash_table *h, const void *obj) +ipfw_ht_extract(struct ipfw_ht *h, const void *obj) { struct new_obj *o; int i; @@ -124,14 +122,14 @@ new_table_extract_obj(struct new_hash_table *h, const void *obj) i = h->hash(obj, h->table_size); for (o = h->table_ptr[i]; o; o = o->next) { - if (h->cmp(o->obj, obj) == 0) + if (h->cmp(o->obj, obj, h->obj_size) == 0) return o->obj; } return NULL; } void * -new_table_destroy(struct new_hash_table *h) +ipfw_ht_destroy(struct ipfw_ht *h) { int i; struct new_obj *cur, *next; @@ -152,18 +150,17 @@ new_table_destroy(struct new_hash_table *h) /* returns the number of elements in the table */ int -new_table_get_element(const struct new_hash_table *h) +ipfw_ht_count(const struct ipfw_ht *h) { return h ? h->table_obj : 0; } const void * -table_next(struct new_hash_table *h, const void *o) +table_next(struct ipfw_ht *h, const void *o) { int i; struct new_obj *obj; - printf("%s called\n", __FUNCTION__); if (h == NULL || h->table_obj == 0) return NULL; if (o == NULL) { @@ -178,7 +175,7 @@ table_next(struct new_hash_table *h, const void *o) */ i = h->hash(o, h->table_size); for (obj = h->table_ptr[i]; obj; obj = obj->next) { - if (h->cmp(obj->obj, o) == 0) + if (h->cmp(obj->obj, o, h->obj_size) == 0) break; } if (obj && obj->next != NULL) diff --git a/dummynet/hashtable.h b/dummynet/hashtable.h index a20b7b4..4fcba22 100644 --- a/dummynet/hashtable.h +++ b/dummynet/hashtable.h @@ -11,34 +11,34 @@ * objects are the same (XXX we could spare this if we also * pass a key_size and use a bcmp for comparisons) * Not extensible at the moment. - * max_el and max_ratio currently unused. */ struct malloc_type; -struct new_hash_table * new_table_init (int size, int obj_size, +struct ipfw_ht; +struct ipfw_ht* ipfw_ht_new(int size, int obj_size, uint32_t (hash_fn)(const void *, uint32_t size), - int (cmp_fn)(const void*, const void*), + int (cmp_fn)(const void*, const void*, int sz), struct malloc_type *mtype); +void *ipfw_ht_destroy(struct ipfw_ht *h); /* add a new object to the table, return success/failure */ -int new_table_insert_obj (struct new_hash_table *h, const void *obj); +int ipfw_ht_insert(struct ipfw_ht *h, const void *obj); /* * returns a pointer to the matching object or NULL if not found. * No refcounts. */ -const void *new_table_extract_obj(struct new_hash_table *h, const void *key); +const void *ipfw_ht_extract(struct ipfw_ht *h, const void *key); /* remove an object from the table */ -int new_table_delete_obj(struct new_hash_table *h, const void *key); -void *new_table_destroy(struct new_hash_table *h); +int ipfw_ht_remove(struct ipfw_ht *h, const void *key); /* return the number of elements in the table */ -int new_table_get_element(const struct new_hash_table *h); +int ipfw_ht_count(const struct ipfw_ht *h); /* returns the first or next element. Works by hashing the * current object and then finds the next one. * If obj == NULL returns the first object in the table */ -const void *table_next(struct new_hash_table *h, const void *obj); +const void *ipfw_ht_next(struct ipfw_ht *h, const void *obj); #endif diff --git a/dummynet/include/net/radix.h b/dummynet/include/net/radix.h index 5d99a2f..45cb7ca 100644 --- a/dummynet/include/net/radix.h +++ b/dummynet/include/net/radix.h @@ -105,24 +105,15 @@ typedef int walktree_f_t(struct radix_node *, void *); struct radix_node_head { struct radix_node *rnh_treetop; - int rnh_addrsize; /* permit, but not require fixed keys */ - int rnh_pktsize; /* permit, but not require fixed keys */ struct radix_node *(*rnh_addaddr) /* add based on sockaddr */ (void *v, void *mask, struct radix_node_head *head, struct radix_node nodes[]); - struct radix_node *(*rnh_addpkt) /* add based on packet hdr */ - (void *v, void *mask, - struct radix_node_head *head, struct radix_node nodes[]); struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */ (void *v, void *mask, struct radix_node_head *head); - struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */ - (void *v, void *mask, struct radix_node_head *head); struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */ (void *v, struct radix_node_head *head); struct radix_node *(*rnh_lookup) /* locate based on sockaddr */ (void *v, void *mask, struct radix_node_head *head); - struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */ - (void *v, struct radix_node_head *head); int (*rnh_walktree) /* traverse tree */ (struct radix_node_head *head, walktree_f_t *f, void *w); int (*rnh_walktree_from) /* traverse tree below a */ @@ -164,7 +155,7 @@ struct radix_node_head { #define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED) #endif /* _KERNEL */ -void rn_init(void); +void rn_init(int); int rn_inithead(void **, int); int rn_refines(void *, void *); struct radix_node diff --git a/dummynet/include/netinet/ip_dummynet.h b/dummynet/include/netinet/ip_dummynet.h index 81ebb33..7391719 100644 --- a/dummynet/include/netinet/ip_dummynet.h +++ b/dummynet/include/netinet/ip_dummynet.h @@ -229,7 +229,7 @@ struct dn_flow_queue { int avg ; /* average queue length est. (scaled) */ int count ; /* arrivals since last RED drop */ int random ; /* random value (scaled) */ - dn_key q_time; /* start of queue idle time */ + dn_key idle_time; /* start of queue idle time */ /* WF2Q+ support */ struct dn_flow_set *fs ; /* parent flow set */ diff --git a/dummynet/include/netinet/ip_fw.h b/dummynet/include/netinet/ip_fw.h index 694983a..743d908 100644 --- a/dummynet/include/netinet/ip_fw.h +++ b/dummynet/include/netinet/ip_fw.h @@ -653,13 +653,6 @@ int ipfw6_unhook(void); void ipfw_nat_destroy(void); #endif -#define IPFW_HAVE_SKIPTO_TABLE - -struct _rulepointer { - struct ip_fw *rule; - uint32_t id; -}; - VNET_DECLARE(int, fw_one_pass); VNET_DECLARE(int, fw_enable); #define V_fw_one_pass VNET(fw_one_pass) @@ -672,6 +665,7 @@ VNET_DECLARE(int, fw6_enable); struct ip_fw_chain { struct ip_fw *rules; /* list of rules */ + struct ip_fw *default_rule; struct ip_fw *reap; /* list of rules to reap */ LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ struct radix_node_head *tables[IPFW_TABLES_MAX]; @@ -681,8 +675,20 @@ struct ip_fw_chain { struct rwlock rwmtx; #endif /* !__linux__ */ uint32_t id; /* ruleset id */ - struct _rulepointer skipto_pointers[64*1024]; - struct new_hash_table *global_tables[128]; + /* + * To optimize jumps, we use a table with skipto_entries pointers + * (a power of 2, set with a sysctl depending on available memory). + * Entry i points to the first rule i*64k/n <= n < (i+1)*64k/n. + * On insert/delete we simply update the relevant entry + * with O(1) additional cost. Updates to the sysctl variable + * that controls the table are managed at the next add/delete. + */ + int skipto_shift; /* shifts to compute the index in skipto-ptrs */ + int skipto_size; /* number of entries in the table */ + struct ip_fw **skipto_ptrs; +#ifdef IPFW_HASHTABLES + struct ipfw_ht *hashtab[IPFW_TABLES_MAX]; +#endif }; #ifdef IPFW_INTERNAL diff --git a/dummynet/ip_dummynet.c b/dummynet/ip_dummynet.c index 5b36ecc..0b23881 100644 --- a/dummynet/ip_dummynet.c +++ b/dummynet/ip_dummynet.c @@ -58,7 +58,6 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:1 #include "missing.h" -#include #include #include #include @@ -248,9 +247,19 @@ static void dummynet(void *); static void dummynet_flush(void); static void dummynet_send(struct mbuf *); void dummynet_drain(void); -static void dn_rule_delete(void *); static int dummynet_io(struct mbuf **, int , struct ip_fw_args *); +/* + * Flow queue is idle if: + * 1) it's empty for at least 1 tick + * 2) it has invalid timestamp (WF2Q case) + * 3) parent pipe has no 'exhausted' burst. + */ +#define QUEUE_IS_IDLE(q) ((q)->head == NULL && (q)->S == (q)->F + 1 && \ + curr_time > (q)->idle_time + 1 && \ + ((q)->numbytes + (curr_time - (q)->idle_time - 1) * \ + (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst)) + /* * Heap management functions. * @@ -457,6 +466,31 @@ heap_free(struct dn_heap *h) * --- end of heap management functions --- */ +/* + * Dispose a packet in dummynet. Use an inline functions so if we + * need to free extra state associated to a packet, this is a + * central point to do it. + */ +static __inline void *dn_free_pkt(struct mbuf *m) +{ +#ifdef __linux__ + netisr_dispatch(-1, m); /* -1 drop the packet */ +#else + m_freem(m); +#endif + return NULL; +} + +static __inline void dn_free_pkts(struct mbuf *mnext) +{ + struct mbuf *m; + + while ((m = mnext) != NULL) { + mnext = m->m_nextpkt; + dn_free_pkt(m); + } +} + /* * Return the mbuf tag holding the dummynet state. As an optimization * this is assumed to be the first tag on the list. If this turns out @@ -671,7 +705,7 @@ ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) * queue on error hoping next time we are luckier. */ } else /* RED needs to know when the queue becomes empty. */ - q->q_time = curr_time; + q->idle_time = curr_time; /* * If the delay line was empty call transmit_event() now. @@ -781,23 +815,26 @@ ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) break; } } - if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0 && - p->idle_heap.elements > 0) { + if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0) { + p->idle_time = curr_time; /* * No traffic and no events scheduled. * We can get rid of idle-heap. */ - int i; + if (p->idle_heap.elements > 0) { + int i; - for (i = 0; i < p->idle_heap.elements; i++) { - struct dn_flow_queue *q = p->idle_heap.p[i].object; + for (i = 0; i < p->idle_heap.elements; i++) { + struct dn_flow_queue *q; - q->F = 0; - q->S = q->F + 1; + q = p->idle_heap.p[i].object; + q->F = 0; + q->S = q->F + 1; + } + p->sum = 0; + p->V = 0; + p->idle_heap.elements = 0; } - p->sum = 0; - p->V = 0; - p->idle_heap.elements = 0; } /* * If we are getting clocks from dummynet (not a real interface) and @@ -1008,16 +1045,12 @@ dummynet_send(struct mbuf *m) case DN_TO_DROP: /* drop the packet after some time */ -#ifdef __linux__ - netisr_dispatch(-1, m); /* -1 drop the packet */ -#else - m_freem(m); -#endif + dn_free_pkt(m); break; default: printf("dummynet: bad switch %d!\n", pkt->dn_dir); - m_freem(m); + dn_free_pkt(m); break; } } @@ -1038,7 +1071,7 @@ expire_queues(struct dn_flow_set *fs) fs->last_expired = time_uptime ; for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) - if (q->head != NULL || q->S != q->F+1) { + if (!QUEUE_IS_IDLE(q)) { prev = q ; q = q->next ; } else { /* entry is idle, expire it */ @@ -1079,7 +1112,7 @@ create_queue(struct dn_flow_set *fs, int i) q->hash_slot = i; q->next = fs->rq[i]; q->S = q->F + 1; /* hack - mark timestamp as invalid. */ - q->numbytes = io_fast ? fs->pipe->bandwidth : 0; + q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0); fs->rq[i] = q; fs->rq_elements++; return (q); @@ -1168,7 +1201,7 @@ find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) break ; /* found */ /* No match. Check if we can expire the entry */ - if (pipe_expire && q->head == NULL && q->S == q->F+1 ) { + if (pipe_expire && QUEUE_IS_IDLE(q)) { /* entry is idle and not in any heap, expire it */ struct dn_flow_queue *old_q = q ; @@ -1241,7 +1274,7 @@ red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) * XXX check wraps... */ if (q->avg) { - u_int t = div64(curr_time - q->q_time, + u_int t = div64(curr_time - q->idle_time, fs->lookup_step); q->avg = (t < fs->lookup_depth) ? @@ -1439,9 +1472,32 @@ dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) if (q->head != m) /* Flow was not idle, we are done. */ goto done; - if (q->q_time < (uint32_t)curr_time) - q->numbytes = io_fast ? fs->pipe->bandwidth : 0; - q->q_time = curr_time; + if (is_pipe) { /* Fixed rate queues. */ + if (q->idle_time < curr_time) { + /* Calculate available burst size. */ + q->numbytes += + (curr_time - q->idle_time - 1) * pipe->bandwidth; + if (q->numbytes > pipe->burst) + q->numbytes = pipe->burst; + if (io_fast) + q->numbytes += pipe->bandwidth; + } + } else { /* WF2Q. */ + if (pipe->idle_time < curr_time && + pipe->scheduler_heap.elements == 0 && + pipe->not_eligible_heap.elements == 0) { + /* Calculate available burst size. */ + pipe->numbytes += + (curr_time - pipe->idle_time - 1) * pipe->bandwidth; + if (pipe->numbytes > 0 && pipe->numbytes > pipe->burst) + pipe->numbytes = pipe->burst; + if (io_fast) + pipe->numbytes += pipe->bandwidth; + } + pipe->idle_time = curr_time; + } + /* Necessary for both: fixed rate & WF2Q queues. */ + q->idle_time = curr_time; /* * If we reach this point the flow was previously idle, so we need @@ -1533,33 +1589,10 @@ dropit: if (q) q->drops++; DUMMYNET_UNLOCK(); - /* - * set the tag, if present. dn_tag_get cannot fail - * so we need to check first - */ - if (m_tag_first(m)) { - pkt = dn_tag_get(m); - pkt->dn_dir = DN_TO_DROP; - } - dummynet_send(m); /* drop the packet */ - *m0 = NULL; + *m0 = dn_free_pkt(m); return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); } -/* - * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT) - * Doing this would probably save us the initial bzero of dn_pkt - */ -#if defined( __linux__ ) -#define DN_FREE_PKT(_m) do { \ - netisr_dispatch(-1, _m); \ -} while (0) -#else -#define DN_FREE_PKT(_m) do { \ - m_freem(_m); \ -} while (0) -#endif - /* * Dispose all packets and flow_queues on a flow_set. * If all=1, also remove red lookup table and other storage, @@ -1576,13 +1609,7 @@ purge_flow_set(struct dn_flow_set *fs, int all) for (i = 0; i <= fs->rq_size; i++) { for (q = fs->rq[i]; q != NULL; q = qn) { - struct mbuf *m, *mnext; - - mnext = q->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + dn_free_pkts(q->head); qn = q->next; free(q, M_DUMMYNET); } @@ -1610,15 +1637,10 @@ purge_flow_set(struct dn_flow_set *fs, int all) static void purge_pipe(struct dn_pipe *pipe) { - struct mbuf *m, *mnext; purge_flow_set( &(pipe->fs), 1 ); - mnext = pipe->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + dn_free_pkts(pipe->head); heap_free( &(pipe->scheduler_heap) ); heap_free( &(pipe->not_eligible_heap) ); @@ -1661,60 +1683,6 @@ dummynet_flush(void) DUMMYNET_UNLOCK(); } -extern struct ip_fw *ip_fw_default_rule; -static void -dn_rule_delete_fs(struct dn_flow_set *fs, void *r) -{ - int i ; - struct dn_flow_queue *q ; - struct mbuf *m ; - - for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */ - for (q = fs->rq[i] ; q ; q = q->next ) - for (m = q->head ; m ; m = m->m_nextpkt ) { - struct dn_pkt_tag *pkt = dn_tag_get(m) ; - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule ; - } -} - -/* - * When a firewall rule is deleted, scan all queues and remove the pointer - * to the rule from matching packets, making them point to the default rule. - * The pointer is used to reinject packets in case one_pass = 0. - */ -void -dn_rule_delete(void *r) -{ - struct dn_pipe *pipe; - struct dn_flow_set *fs; - struct dn_pkt_tag *pkt; - struct mbuf *m; - int i; - - DUMMYNET_LOCK(); - /* - * If the rule references a queue (dn_flow_set), then scan - * the flow set, otherwise scan pipes. Should do either, but doing - * both does not harm. - */ - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(fs, &flowsethash[i], next) - dn_rule_delete_fs(fs, r); - - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(pipe, &pipehash[i], next) { - fs = &(pipe->fs); - dn_rule_delete_fs(fs, r); - for (m = pipe->head ; m ; m = m->m_nextpkt ) { - pkt = dn_tag_get(m); - if (pkt->rule == r) - pkt->rule = ip_fw_default_rule; - } - } - DUMMYNET_UNLOCK(); -} - /* * setup RED parameters */ @@ -1870,11 +1838,14 @@ config_pipe(struct dn_pipe *p) } else /* Flush accumulated credit for all queues. */ for (i = 0; i <= pipe->fs.rq_size; i++) - for (q = pipe->fs.rq[i]; q; q = q->next) - q->numbytes = io_fast ? p->bandwidth : 0; + for (q = pipe->fs.rq[i]; q; q = q->next) { + q->numbytes = p->burst + + (io_fast ? p->bandwidth : 0); + } pipe->bandwidth = p->bandwidth; - pipe->numbytes = 0; /* just in case... */ + pipe->burst = p->burst; + pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0); bcopy(p->if_name, pipe->if_name, sizeof(p->if_name)); pipe->ifp = NULL; /* reset interface ptr */ pipe->delay = p->delay; @@ -2019,7 +1990,6 @@ dummynet_drain(void) { struct dn_flow_set *fs; struct dn_pipe *pipe; - struct mbuf *m, *mnext; int i; DUMMYNET_LOCK_ASSERT(); @@ -2035,12 +2005,7 @@ dummynet_drain(void) for (i = 0; i < HASHSIZE; i++) { SLIST_FOREACH(pipe, &pipehash[i], next) { purge_flow_set(&(pipe->fs), 0); - - mnext = pipe->head; - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - DN_FREE_PKT(m); - } + dn_free_pkts(pipe->head); pipe->head = pipe->tail = NULL; } } @@ -2345,7 +2310,6 @@ ip_dn_init(void) ip_dn_ctl_ptr = ip_dn_ctl; ip_dn_io_ptr = dummynet_io; - ip_dn_ruledel_ptr = dn_rule_delete; TASK_INIT(&dn_task, 0, dummynet_task, NULL); dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, @@ -2365,7 +2329,6 @@ ip_dn_destroy(void) { ip_dn_ctl_ptr = NULL; ip_dn_io_ptr = NULL; - ip_dn_ruledel_ptr = NULL; DUMMYNET_LOCK(); callout_stop(&dn_timeout); diff --git a/dummynet/ip_fw2.c b/dummynet/ip_fw2.c index 50e8701..4e46566 100644 --- a/dummynet/ip_fw2.c +++ b/dummynet/ip_fw2.c @@ -104,6 +104,10 @@ __FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz #include /* XXX for in_cksum */ +#ifdef IPFW_HASHTABLES +#include "hashtable.h" +#endif + #ifdef MAC #include #endif @@ -135,21 +139,6 @@ static int default_to_accept; #endif static uma_zone_t ipfw_dyn_rule_zone; -struct ip_fw *ip_fw_default_rule; - -/* - * Data structure to cache our ucred related - * information. This structure only gets used if - * the user specified UID/GID based constraints in - * a firewall rule. - */ -struct ip_fw_ugid { - gid_t fw_groups[NGROUPS]; - int fw_ngroups; - uid_t fw_uid; - int fw_prid; -}; - /* * list of rules for layer 3 */ @@ -194,12 +183,18 @@ SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose, SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, "Set upper limit of matches of ipfw rules logged"); +static unsigned int dummy_default_rule = IPFW_DEFAULT_RULE; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, - NULL, IPFW_DEFAULT_RULE, + &dummy_default_rule, IPFW_DEFAULT_RULE, "The default/max possible rule number."); +static unsigned int dummy_tables_max = IPFW_TABLES_MAX; SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, - NULL, IPFW_TABLES_MAX, + &dummy_tables_max, IPFW_TABLES_MAX, "The maximum number of tables."); +static unsigned int skipto_entries = 256; +SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, skipto_entries, + CTLFLAG_RW, &skipto_entries, 0, + "Number of entries in the skipto cache"); SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, &default_to_accept, 0, "Make the default rule accept all packets."); @@ -218,9 +213,6 @@ SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, #endif /* SYSCTL_NODE */ -#ifndef IPFW_NEWTABLES_MAX -#define IPFW_NEWTABLES_MAX 256 -#endif /* * Description of dynamic rules. * @@ -1894,6 +1886,61 @@ send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip) args->m = NULL; } +static void +set_skipto_table(struct ip_fw_chain *ch) +{ + int i, n, sh; + struct ip_fw *f, **t, **oldt; + + for (sh = 15; sh > 0; sh--) + if (skipto_entries > 1<rules; f; f = f->next) { + n = f->rulenum >> sh ; + while (i <= n) + t[i++] = f; + } + V_layer3_chain.skipto_shift = sh; + V_layer3_chain.skipto_size = skipto_entries; + oldt = V_layer3_chain.skipto_ptrs; + V_layer3_chain.skipto_ptrs = t; + IPFW_RUNLOCK(ch); + if (oldt) { + IPFW_WLOCK(ch); + IPFW_WUNLOCK(ch); + /* now can free oldt */ + free(oldt, M_IPFW_TBL); + } +} +#if 0 +/* + * Map a rule number to a rule pointer, using the skipto table. + * First lookup the slot, then follow the chain until we find a + * non-null entry with rulenum >= num. Return default_rule on error. + */ +static struct ip_fw * +rule2ptr(struct ip_fw_chain *ch, int num) +{ + struct ip_fw *r = NULL; + int ix = (num & 0xffff) >> ch->skipto_shift; + + while (ix < ch->skipto_size && (r = ch->skipto_ptrs[ix]) == NULL) + ix++; + while (r && num < r->rulenum) + r = r->next; + return (r ? r : ch->default_rule); +} +#endif /** * * Given an ip_fw *, lookup_next_rule will return a pointer @@ -1910,12 +1957,11 @@ send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip) */ static struct ip_fw * -lookup_next_rule(struct ip_fw *me, u_int32_t tablearg) +lookup_next_rule(struct ip_fw_chain *ch, struct ip_fw *me, uint32_t tablearg) { struct ip_fw *rule = NULL; ipfw_insn *cmd; - u_int16_t rulenum; -printf("%s called\n", __FUNCTION__); + /* look for action, in case it is a skipto */ cmd = ACTION_PTR(me); if (cmd->opcode == O_LOG) @@ -1924,56 +1970,22 @@ printf("%s called\n", __FUNCTION__); cmd += F_LEN(cmd); if (cmd->opcode == O_TAG) cmd += F_LEN(cmd); - if (cmd->opcode == O_SKIPTO ) { - if (tablearg != 0) { - rulenum = (u_int16_t)tablearg; - } else { - rulenum = cmd->arg1; - } + if (cmd->opcode != O_SKIPTO ) { + rule = me->next; + } else { + tablearg = tablearg ? tablearg : cmd->arg1; for (rule = me->next; rule ; rule = rule->next) { - if (rule->rulenum >= rulenum) { + if (rule->rulenum >= tablearg) { break; } } - } - if (rule == NULL) /* failure or not a skipto */ - rule = me->next; - me->next_rule = rule; - return rule; -} - -#ifdef IPFW_HAVE_SKIPTO_TABLE -struct ip_fw *lookup_skipto_table(struct ip_fw_chain *chain, uint16_t num); -struct ip_fw * -lookup_skipto_table(struct ip_fw_chain *chain, uint16_t num) -{ - struct ip_fw *f; - - printf("--%s called\n", __FUNCTION__); - if (1) - return NULL; - if (chain->skipto_pointers[num].id == chain->id) { - printf("-- %s pointer ok, return it\n", __FUNCTION__); - return chain->skipto_pointers[num].rule; +// rule = rule2ptr(ch, tablearg ? tablearg : cmd->arg1); } - printf("-- %s search pointer\n", __FUNCTION__); - - for (f = chain->rules; f ; f = f->next) { - if (f->rulenum == num) { - chain->skipto_pointers[num].id = chain->id; - chain->skipto_pointers[num].rule = f; - printf("-- %s found, set and return\n", __FUNCTION__); - return f; - } - } - printf("-- %s NOT found return NULL\n", __FUNCTION__); - - return NULL; + me->next_rule = rule; /* XXX perhaps unnecessary ? */ + return rule; } -#endif /* IPFW_HAVE_SKIPTO_TABLE */ -#ifdef radix static int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen, uint32_t value) @@ -1982,6 +1994,11 @@ add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, struct table_entry *ent; struct radix_node *rn; +#ifdef IPFW_HASHTABLES + if (tbl >= 2*IPFW_TABLES_MAX) + return EINVAL; + return EINVAL; // XXX to be completed +#endif if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; @@ -1989,7 +2006,16 @@ add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, if (ent == NULL) return (ENOMEM); ent->value = value; +#ifdef linux + /* there is no sin_len on linux, and the code assumes the first + * byte in the sockaddr to contain the length in bits. + * So we just dump the number right there + */ + *((uint8_t *)&(ent->addr)) = 8; + *((uint8_t *)&(ent->mask)) = 8; +#else ent->addr.sin_len = ent->mask.sin_len = 8; +#endif ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; IPFW_WLOCK(ch); @@ -2011,10 +2037,21 @@ del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, struct table_entry *ent; struct sockaddr_in sa, mask; +#ifdef IPFW_HASHTABLES + if (tbl >= 2*IPFW_TABLES_MAX) + return EINVAL; + return EINVAL; // XXX to be completed +#endif if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; +#ifdef linux + /* there is no sin_len on linux, see above */ + *((uint8_t *)&sa) = 8; + *((uint8_t *)&mask) = 8; +#else sa.sin_len = mask.sin_len = 8; +#endif mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; IPFW_WLOCK(ch); @@ -2048,6 +2085,11 @@ flush_table(struct ip_fw_chain *ch, uint16_t tbl) IPFW_WLOCK_ASSERT(ch); +#ifdef IPFW_HASHTABLES + if (tbl >= 2*IPFW_TABLES_MAX) + return EINVAL; + return EINVAL; // XXX to be completed +#endif if (tbl >= IPFW_TABLES_MAX) return (EINVAL); rnh = ch->tables[tbl]; @@ -2055,18 +2097,6 @@ flush_table(struct ip_fw_chain *ch, uint16_t tbl) rnh->rnh_walktree(rnh, flush_table_entry, rnh); return (0); } -#else -extern int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, - in_addr_t addr, uint8_t mlen, uint32_t value); -extern int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, - in_addr_t addr, uint8_t mlen); -extern int flush_table(struct ip_fw_chain *ch, uint16_t tbl); -extern int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); -extern int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl); -extern int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val); -extern int init_tables(struct ip_fw_chain *ch); -#endif static void flush_tables(struct ip_fw_chain *ch) @@ -2075,11 +2105,14 @@ flush_tables(struct ip_fw_chain *ch) IPFW_WLOCK_ASSERT(ch); - for (tbl = IPFW_TABLES_MAX -1; tbl < IPFW_NEWTABLES_MAX; tbl++) + for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) flush_table(ch, tbl); +#ifdef IPFW_HASHTABLES + for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) + ch->hashtab[tbl] = ipfw_ht_destroy(ch->hashtab[tbl]); +#endif } -#ifdef radix static int init_tables(struct ip_fw_chain *ch) { @@ -2094,6 +2127,10 @@ init_tables(struct ip_fw_chain *ch) return (ENOMEM); } } +#ifdef IPFW_HASHTABLES + for (i = 0; i < IPFW_TABLES_MAX; i++) + ch->hashtab[i] = ipfw_ht_destroy(ch->hashtab[i]); +#endif return (0); } @@ -2108,7 +2145,12 @@ lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, if (tbl >= IPFW_TABLES_MAX) return (0); rnh = ch->tables[tbl]; +#ifdef linux + /* there is no sin_len on linux, see above */ + *((uint8_t *)&sa) = 8; +#else sa.sin_len = 8; +#endif sa.sin_addr.s_addr = addr; ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); if (ent != NULL) { @@ -2117,9 +2159,7 @@ lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, } return (0); } -#endif -#ifdef radix static int count_table_entry(struct radix_node *rn, void *arg) { @@ -2175,52 +2215,37 @@ dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) rnh->rnh_walktree(rnh, dump_table_entry, tbl); return (0); } -#endif - -#ifndef linux /* FreeBSD */ -static void -fill_ugid_cache(struct inpcb *inp, struct ip_fw_ugid *ugp) -{ - struct ucred *cr; - - cr = inp->inp_cred; - ugp->fw_prid = jailed(cr) ? cr->cr_prison->pr_id : -1; - ugp->fw_uid = cr->cr_uid; - ugp->fw_ngroups = cr->cr_ngroups; - bcopy(cr->cr_groups, ugp->fw_groups, sizeof(ugp->fw_groups)); -} -#endif static int check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, - u_int16_t src_port, struct ip_fw_ugid *ugp, int *ugid_lookupp, + u_int16_t src_port, struct ucred **uc, int *ugid_lookup, struct inpcb *inp) { #ifdef linux int match = 0; struct sk_buff *skb = ((struct mbuf *)inp)->m_skb; + struct bsd_ucred *u = (struct bsd_ucred *)uc; - if (*ugid_lookupp == 0) { /* actively lookup and copy in cache */ - + if (*ugid_lookup == 0) { /* actively lookup and copy in cache */ /* returns null if any element of the chain up to file is null. * if sk != NULL then we also have a reference */ - *ugid_lookupp = linux_lookup(proto, + *ugid_lookup = linux_lookup(proto, src_ip.s_addr, htons(src_port), dst_ip.s_addr, htons(dst_port), - skb, oif ? 1 : 0, ugp); + skb, oif ? 1 : 0, u); } - if (*ugid_lookupp < 0) + if (*ugid_lookup < 0) return 0; if (insn->o.opcode == O_UID) - match = (ugp->fw_uid == (uid_t)insn->d[0]); + match = (u->uid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_JAIL) - match = (ugp->fw_groups[1] == (uid_t)insn->d[0]); + match = (u->xid == (uid_t)insn->d[0]); else if (insn->o.opcode == O_GID) - match = (ugp->fw_groups[0] == (uid_t)insn->d[0]); + match = (u->gid == (uid_t)insn->d[0]); return match; @@ -2230,7 +2255,6 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, int wildcard; struct inpcb *pcb; int match; - gid_t *gp; /* * Check to see if the UDP or TCP stack supplied us with @@ -2240,7 +2264,7 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, if (inp && *ugid_lookupp == 0) { INP_LOCK_ASSERT(inp); if (inp->inp_socket != NULL) { - fill_ugid_cache(inp, ugp); + *uc = crhold(inp->inp_cred); *ugid_lookupp = 1; } else *ugid_lookupp = -1; @@ -2273,7 +2297,7 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, dst_ip, htons(dst_port), wildcard, NULL); if (pcb != NULL) { - fill_ugid_cache(pcb, ugp); + *uc = crhold(pcb->inp_cred); *ugid_lookupp = 1; } INP_INFO_RUNLOCK(pi); @@ -2289,16 +2313,11 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, } } if (insn->o.opcode == O_UID) - match = (ugp->fw_uid == (uid_t)insn->d[0]); - else if (insn->o.opcode == O_GID) { - for (gp = ugp->fw_groups; - gp < &ugp->fw_groups[ugp->fw_ngroups]; gp++) - if (*gp == (gid_t)insn->d[0]) { - match = 1; - break; - } - } else if (insn->o.opcode == O_JAIL) - match = (ugp->fw_prid == (int)insn->d[0]); + match = ((*uc)->cr_uid == (uid_t)insn->d[0]); + else if (insn->o.opcode == O_GID) + match = groupmember((gid_t)insn->d[0], *uc); + else if (insn->o.opcode == O_JAIL) + match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); return match; #endif } @@ -2375,8 +2394,8 @@ ipfw_chk(struct ip_fw_args *args) * these types of constraints, as well as decrease contention * on pcb related locks. */ - struct ip_fw_ugid fw_ugid_cache; - int ugid_lookup = 0; + struct bsd_ucred ucred_cache; + int ucred_lookup = 0; /* * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG @@ -2735,11 +2754,20 @@ do { \ IPFW_RUNLOCK(chain); return (IP_FW_PASS); } + if (chain->id != args->chain_id) { + for (f = chain->rules; f != NULL; f = f->next) + if (f == args->rule && f->id == args->rule_id) + break; - f = args->rule->next_rule; + if (f != NULL) + f = f->next_rule; + else + f = chain->default_rule; + } else + f = args->rule->next_rule; if (f == NULL) - f = lookup_next_rule(args->rule, 0); + f = lookup_next_rule(chain, args->rule, 0); } else { /* * Find the starting rule. It can be either the first @@ -2753,12 +2781,9 @@ do { \ IPFW_RUNLOCK(chain); return (IP_FW_DENY); /* invalid */ } +// f = rule2ptr(chain, skipto+1); while (f && f->rulenum <= skipto) f = f->next; - if (f == NULL) { /* drop packet */ - IPFW_RUNLOCK(chain); - return (IP_FW_DENY); - } } } /* reset divert rule to avoid confusion later */ @@ -2857,8 +2882,8 @@ do { \ (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, - src_ip, src_port, &fw_ugid_cache, - &ugid_lookup, (struct inpcb *)args->m); + src_ip, src_port, (struct ucred **)&ucred_cache, + &ucred_lookup, (struct inpcb *)args->m); break; case O_RECV: @@ -2964,14 +2989,23 @@ do { \ (ipfw_insn_u32 *)cmd, proto, oif, dst_ip, dst_port, - src_ip, src_port, &fw_ugid_cache, - &ugid_lookup, (struct inpcb *)args->m); + src_ip, src_port, (struct ucred **)&ucred_cache, + &ucred_lookup, (struct inpcb *)args->m); +#ifdef linux + if (v ==4 /* O_UID */) + a = ucred_cache.uid; + else if (v == 5 /* O_GID */) + a = ucred_cache.gid; + else if (v == 6 /* O_JAIL */) + a = ucred_cache.xid; +#else if (v ==4 /* O_UID */) - a = fw_ugid_cache.fw_uid; + a = (*uc)->cr_uid; else if (v == 5 /* O_GID */) - a = fw_ugid_cache.fw_groups[0]; + ; // a = groupmember((gid_t)insn->d[0], *uc); else if (v == 6 /* O_JAIL */) - a = fw_ugid_cache.fw_groups[1]; + a = (*uc)->cr_prison->pr_id; +#endif } else break; } @@ -3555,37 +3589,13 @@ do { \ break; } /* handle skipto */ -#ifdef IPFW_HAVE_SKIPTO_TABLE - /* NOTE: lookup_skipto_table can return NULL - * if the rule isn't found, so the - * standard lookup function must be - * called XXX - */ - if (cmd->arg1 == IP_FW_TABLEARG) { - f = lookup_skipto_table(chain, - tablearg); - if (f == NULL) - f = lookup_next_rule(f, tablearg); - } - else { - f = lookup_skipto_table(chain, - cmd->arg1); - if (f == NULL) { - if (f->next_rule == NULL) - lookup_next_rule(f, 0); - f = f->next_rule; - } - } - -#else if (cmd->arg1 == IP_FW_TABLEARG) { - f = lookup_next_rule(f, tablearg); + f = lookup_next_rule(chain, f, tablearg); } else { if (f->next_rule == NULL) - lookup_next_rule(f, 0); + lookup_next_rule(chain, f, 0); f = f->next_rule; } -#endif /* * Skip disabled rules, and * re-enter the inner loop @@ -3809,6 +3819,10 @@ do { \ printf("ipfw: ouch!, skip past end of rules, denying packet\n"); } IPFW_RUNLOCK(chain); +#ifdef __FreeBSD__ + if (ucred_cache != NULL) + crfree(ucred_cache); +#endif return (retval); pullup_failed: @@ -3869,17 +3883,15 @@ add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) goto done; } - /* - * If rulenum is 0, find highest numbered rule before the - * default rule, and add autoinc_step - */ if (V_autoinc_step < 1) V_autoinc_step = 1; else if (V_autoinc_step > 1000) V_autoinc_step = 1000; if (rule->rulenum == 0) { /* - * locate the highest numbered rule before default + * If rulenum is 0, use highest numbered rule before + * the default, adding autoinc_step if room. + * Also set the number in the caller. */ for (f = chain->rules; f; f = f->next) { if (f->rulenum == IPFW_DEFAULT_RULE) @@ -3893,6 +3905,7 @@ add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) /* * Now insert the new rule in the right place in the sorted list. + * XXX TODO also put in the skipto table. */ for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { if (f->rulenum > rule->rulenum) { /* found the location */ @@ -3945,6 +3958,7 @@ remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, prev->next = n; V_static_count--; V_static_len -= l; + // XXX remove from the skipto table rule->next = chain->reap; chain->reap = rule; @@ -3953,12 +3967,6 @@ remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, } /* - * Hook for cleaning up dummynet when an ipfw rule is deleted. - * Set/cleared when dummynet module is loaded/unloaded. - */ -void (*ip_dn_ruledel_ptr)(void *) = NULL; - -/** * Reclaim storage associated with a list of rules. This is * typically the list created using remove_rule. * A NULL pointer on input is handled correctly. @@ -3970,8 +3978,6 @@ reap_rules(struct ip_fw *head) while ((rule = head) != NULL) { head = head->next; - if (ip_dn_ruledel_ptr) - ip_dn_ruledel_ptr(rule); free(rule, M_IPFW); } } @@ -3988,6 +3994,7 @@ free_chain(struct ip_fw_chain *chain, int kill_default) IPFW_WLOCK_ASSERT(chain); + chain->reap = NULL; flush_rule_ptrs(chain); /* more efficient to do outside the loop */ for (prev = NULL, rule = chain->rules; rule ; ) if (kill_default || rule->set != RESVD_SET) @@ -4115,10 +4122,8 @@ del_entry(struct ip_fw_chain *chain, u_int32_t arg) * avoid a LOR with dummynet. */ rule = chain->reap; - chain->reap = NULL; IPFW_WUNLOCK(chain); - if (rule) - reap_rules(rule); + reap_rules(rule); return 0; } @@ -4531,7 +4536,7 @@ ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) int i; time_t boot_seconds; - boot_seconds = boottime.tv_sec; + boot_seconds = boottime.tv_sec; /* XXX this can take a long time and locking will block packet flow */ IPFW_RLOCK(chain); for (rule = chain->rules; rule ; rule = rule->next) { @@ -4619,7 +4624,6 @@ ipfw_getdynrules(struct ip_fw_chain *chain, void *buf, size_t space) if (last != NULL) /* mark last dynamic rule */ bzero(&last->next, sizeof(last)); } - return (bp - (char *)buf); } @@ -4706,13 +4710,10 @@ ipfw_ctl(struct sockopt *sopt) */ IPFW_WLOCK(&V_layer3_chain); - V_layer3_chain.reap = NULL; free_chain(&V_layer3_chain, 0 /* keep default rule */); rule = V_layer3_chain.reap; - V_layer3_chain.reap = NULL; IPFW_WUNLOCK(&V_layer3_chain); - if (rule != NULL) - reap_rules(rule); + reap_rules(rule); break; case IP_FW_ADD: @@ -4905,14 +4906,6 @@ ipfw_ctl(struct sockopt *sopt) #undef RULE_MAXSIZE } -/** - * dummynet needs a reference to the default rule, because rules can be - * deleted while packets hold a reference to them. When this happens, - * dummynet changes the reference to the default rule (it could well be a - * NULL pointer, but this way we do not need to check for the special - * case, plus here he have info on the default behaviour). - */ -//struct ip_fw *ip_fw_default_rule; /* * This procedure is only used to handle keepalives. It is invoked @@ -5010,7 +5003,7 @@ ipfw_tick(void * vnetx) #endif done: callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period*hz, - ipfw_tick, NULL); + ipfw_tick, vnetx); CURVNET_RESTORE(); } @@ -5132,12 +5125,6 @@ vnet_ipfw_init(const void *unused) if (error) { panic("init_tables"); /* XXX Marko fix this ! */ } - -#ifdef IPFW_HAVE_SKIPTO_TABLE -// for (error = 0; error < 64*1024; error++) -// V_layer3_chain.skipto_pointers[error].id = -1; -#endif /* IPFW_HAVE_SKIPTO_TABLE */ - #ifdef IPFIREWALL_NAT LIST_INIT(&V_layer3_chain.nat); #endif @@ -5167,6 +5154,8 @@ vnet_ipfw_init(const void *unused) IPFW_LOCK_INIT(&V_layer3_chain); callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); + set_skipto_table(&V_layer3_chain); + bzero(&default_rule, sizeof default_rule); default_rule.act_ofs = 0; default_rule.rulenum = IPFW_DEFAULT_RULE; @@ -5184,7 +5173,7 @@ vnet_ipfw_init(const void *unused) return (error); } - ip_fw_default_rule = V_layer3_chain.rules; + V_layer3_chain.default_rule = V_layer3_chain.rules; /* curvnet is NULL in the !VIMAGE case */ callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); diff --git a/dummynet/ip_fw_pfil.c b/dummynet/ip_fw_pfil.c index 3fa643c..368192a 100644 --- a/dummynet/ip_fw_pfil.c +++ b/dummynet/ip_fw_pfil.c @@ -126,10 +126,14 @@ again: args.m = *m0; args.inp = inp; - ipfw = ipfw_chk(&args); - *m0 = args.m; tee = 0; + if (V_fw_one_pass == 0 || args.rule == NULL) { + ipfw = ipfw_chk(&args); + *m0 = args.m; + } else + ipfw = IP_FW_PASS; + KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); @@ -256,10 +260,14 @@ again: args.m = *m0; args.oif = ifp; args.inp = inp; - ipfw = ipfw_chk(&args); - *m0 = args.m; tee = 0; + if (V_fw_one_pass == 0 || args.rule == NULL) { + ipfw = ipfw_chk(&args); + *m0 = args.m; + } else + ipfw = IP_FW_PASS; + KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", __func__)); @@ -504,38 +512,54 @@ ipfw6_unhook(void) int ipfw_chg_hook(SYSCTL_HANDLER_ARGS) { - int enable = *(int *)arg1; + int enable; + int oldenable; int error; + if (arg1 == &VNET_NAME(fw_enable)) { + enable = V_fw_enable; + } +#ifdef INET6 + else if (arg1 == &VNET_NAME(fw6_enable)) { + enable = V_fw6_enable; + } +#endif + else + return (EINVAL); + + oldenable = enable; + error = sysctl_handle_int(oidp, &enable, 0, req); + if (error) return (error); enable = (enable) ? 1 : 0; - if (enable == *(int *)arg1) + if (enable == oldenable) return (0); - if (arg1 == &fw_enable) { + if (arg1 == &VNET_NAME(fw_enable)) { if (enable) error = ipfw_hook(); else error = ipfw_unhook(); + if (error) + return (error); + V_fw_enable = enable; } #ifdef INET6 - if (arg1 == &fw6_enable) { + else if (arg1 == &VNET_NAME(fw6_enable)) { if (enable) error = ipfw6_hook(); else error = ipfw6_unhook(); + if (error) + return (error); + V_fw6_enable = enable; } #endif - if (error) - return (error); - - *(int *)arg1 = enable; - return (0); } diff --git a/dummynet/ipfw2_mod.c b/dummynet/ipfw2_mod.c index f47d365..667d487 100644 --- a/dummynet/ipfw2_mod.c +++ b/dummynet/ipfw2_mod.c @@ -71,7 +71,19 @@ #include /* ip_fw_ctl_t, ip_fw_chk_t */ #include /* ip_dn_ctl_t, ip_dn_io_t */ #include /* PFIL_IN, PFIL_OUT */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#warning --- inet_hashtables not present on 2.4 +#include +#include +#include +static inline int inet_iif(const struct sk_buff *skb) +{ + return ((struct rtable *)skb->dst)->rt_iif; +} + +#else #include /* inet_lookup */ +#endif #include /* inet_iif */ /* @@ -103,19 +115,6 @@ struct mod_args { static unsigned int mod_idx; static struct mod_args mods[10]; /* hard limit to 10 modules */ -/* - * Data structure to cache our ucred related - * information. This structure only gets used if - * the user specified UID/GID based constraints in - * a firewall rule. - */ -struct ip_fw_ugid { - gid_t fw_groups[NGROUPS]; - int fw_ngroups; - uid_t fw_uid; - int fw_prid; -}; - /* * my_mod_register should be called automatically as the init * functions in the submodules. Unfortunately this compiler/linker @@ -490,12 +489,16 @@ extern struct inet_hashinfo tcp_hashinfo; int linux_lookup(const int proto, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - struct sk_buff *skb, int dir, struct ip_fw_ugid *ugp) + struct sk_buff *skb, int dir, struct bsd_ucred *u) { +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) + return -1; +#else struct sock *sk; int ret = -1; /* default return value */ int st = -1; /* state */ + if (proto != IPPROTO_TCP) /* XXX extend for UDP */ return -1; @@ -540,10 +543,10 @@ linux_lookup(const int proto, const __be32 saddr, const __be16 sport, ret = 1; /* retrying won't make things better */ st = sk->sk_state; #ifdef CONFIG_VSERVER - ugp->fw_groups[1] = sk->sk_xid; - ugp->fw_groups[2] = sk->sk_nid; + u->xid = sk->sk_xid; + u->nid = sk->sk_nid; #else - ugp->fw_groups[1] = ugp->fw_groups[2] = 0; + u->xid = u->nid = 0; #endif /* * Exclude tcp states where sk points to a inet_timewait_sock which @@ -564,15 +567,6 @@ linux_lookup(const int proto, const __be32 saddr, const __be16 sport, #define _CURR_GID f_cred->fsgid #endif -#ifdef CONFIG_VSERVER - ugp->fw_groups[1] = sk->sk_xid; - ugp->fw_groups[2] = sk->sk_nid; -#else - ugp->fw_groups[1] = - ugp->fw_groups[2] = 0; -#endif - ret = 1; - #define GOOD_STATES ( \ (1<sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - ugp->fw_uid = sk->sk_socket->file->_CURR_UID; - ugp->fw_groups[0] = sk->sk_socket->file->_CURR_GID; - } + if (sk->sk_socket && sk->sk_socket->file) { + u->uid = sk->sk_socket->file->_CURR_UID; + u->gid = sk->sk_socket->file->_CURR_GID; + } read_unlock_bh(&sk->sk_callback_lock); } else { - ugp->fw_uid = ugp->fw_groups[0] = 0; + u->uid = u->gid = 0; } if (!skb->sk) /* return the reference that came from the lookup */ sock_put(sk); @@ -595,6 +589,8 @@ linux_lookup(const int proto, const __be32 saddr, const __be16 sport, #undef _CURR_UID #undef _CURR_GID return ret; + +#endif /* LINUX > 2.4 */ } /* @@ -669,6 +665,7 @@ static struct nf_hook_ops ipfw_ops[] __read_mostly = { extern moduledata_t *moddesc_ipfw; extern moduledata_t *moddesc_dummynet; +extern void rn_init(void); /* * Module glue - init and exit function. */ @@ -679,6 +676,8 @@ ipfw_module_init(void) printf("%s in-hook %d svn id %s\n", __FUNCTION__, IPFW_HOOK_IN, "$Id$"); + rn_init(); + my_mod_register(moddesc_ipfw, "ipfw", 1); my_mod_register(moddesc_dummynet, "dummynet", 2); init_children(); diff --git a/dummynet/missing.h b/dummynet/missing.h index 1d2f2ec..d18f503 100644 --- a/dummynet/missing.h +++ b/dummynet/missing.h @@ -75,6 +75,7 @@ struct inpcb; #define rw_runlock(_l) spin_unlock_bh(_l) #define rw_wlock(_l) spin_lock_bh(_l) #define rw_wunlock(_l) spin_unlock_bh(_l) +#define rw_init_flags(_l, s, v) #define mtx_assert(a, b) #define mtx_destroy(m) @@ -346,6 +347,17 @@ int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen); /* defined in session.c */ int priv_check(struct thread *td, int priv); +/* struct ucred is in linux/socket.h and has pid, uid, gid. + * We need a 'bsd_ucred' to store also the extra info + */ + +struct bsd_ucred { + uid_t uid; + gid_t gid; + uint32_t xid; + uint32_t nid; +}; + int securelevel_ge(struct ucred *cr, int level); struct sysctl_oid; @@ -363,6 +375,24 @@ struct sysctl_req; #ifdef _WIN32 #define module_param_named(_name, _var, _ty, _perm) #else + +/* Linux 2.4 is mostly for openwrt */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) +#include /* generic_ffs() used in ip_fw2.c */ +typedef uint32_t __be32; +typedef uint16_t __be16; +struct sock; +struct net; +struct inet_hashinfo; +struct sock *inet_lookup( + struct inet_hashinfo *hashinfo, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, + const int dif); +static int inet_iif(const struct sk_buff *skb); +struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); +#endif /* Linux < 2.6 */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) #define module_param_named(_name, _var, _ty, _perm) \ //module_param(_name, _ty, 0644) @@ -387,7 +417,7 @@ struct sysctl_req; _SYSCTL_BASE(_name, _var, ulong, _mode) #define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \ - // _SYSCTL_BASE(_name, _var, uint, _mode) + _SYSCTL_BASE(_name, _var, uint, _mode) #define SYSCTL_HANDLER_ARGS \ struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req @@ -413,8 +443,6 @@ u_short in_cksum_skip(struct mbuf *m, int len, int skip); #define INP_LOCK_ASSERT(a) #endif -int rn_inithead(void **head, int off); - int jailed(struct ucred *cred); /* @@ -430,11 +458,10 @@ int in_localaddr(struct in_addr in); int fnmatch(const char *pattern, const char *string, int flags); -struct ip_fw_ugid; int linux_lookup(const int proto, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, - struct sk_buff *skb, int dir, struct ip_fw_ugid *ugp); + struct sk_buff *skb, int dir, struct bsd_ucred *u); /* vnet wrappers, in vnet.h and ip_var.h */ int ipfw_init(void); diff --git a/dummynet/new_glue.c b/dummynet/new_glue.c index ce0a5a3..5ceef79 100644 --- a/dummynet/new_glue.c +++ b/dummynet/new_glue.c @@ -35,19 +35,17 @@ static uint32_t simple_hash32(const void *key, uint32_t size) { uint32_t ret = *(const uint32_t *)key % size; - printf("%s called\n", __FUNCTION__); - printf("Hash returns %d\n", ret); return ret; } static int -cmp_func32(const void *key1, const void *key2) +cmp_func32(const void *key1, const void *key2, int sz) { int k1 = *(const int *)key1; int k2 = *(const int *)key2; int ret; - printf("(%s) k1=%d, k2=%d\n", __FUNCTION__, k1, k2); + if (k1 < k2) ret = -1; else if (k1 > k2) @@ -55,8 +53,6 @@ cmp_func32(const void *key1, const void *key2) else ret = 0; - printf("compare returns %d\n", ret); - return ret; } @@ -77,11 +73,9 @@ add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, int size = 128; int obj_size = sizeof(struct t_o); - printf("%s called\n", __FUNCTION__); if (i < 0 || i > size-1) /* wrong table number */ return 1; if (ch->global_tables[i] == NULL) { - printf("Creating table n %d\n", tbl); ch->global_tables[i] = new_table_init(size, obj_size, simple_hash32, cmp_func32, M_IPFW_HTBL); } @@ -101,8 +95,6 @@ new_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr) int ret; int nr = tbl - IPFW_TABLES_MAX; - printf("%s called\n", __FUNCTION__); - ret = new_table_delete_obj(ch->global_tables[nr], &addr); return ret; @@ -112,7 +104,6 @@ int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint8_t mlen) { - printf("%s called\n", __FUNCTION__); if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { new_del_table_entry(ch, tbl, addr); return 0; @@ -123,7 +114,6 @@ del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, int new_flush_table(struct ip_fw_chain *ch, uint16_t tbl) { - printf("%s called\n", __FUNCTION__); new_table_destroy(ch->global_tables[tbl - IPFW_TABLES_MAX]); return 0; } @@ -131,7 +121,6 @@ new_flush_table(struct ip_fw_chain *ch, uint16_t tbl) int flush_table(struct ip_fw_chain *ch, uint16_t tbl) { - printf("%s called\n", __FUNCTION__); if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) return new_flush_table(ch, tbl); @@ -142,20 +131,17 @@ int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, uint32_t *val) { - printf("%s called\n", __FUNCTION__); if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { struct new_hash_table *h; const struct t_o *obj; h = ch->global_tables[tbl - IPFW_TABLES_MAX]; - printf("Search %d in table number %d\n", addr, tbl); obj = new_table_extract_obj(h, (void *)&addr); if (obj == NULL) return 0; /* no match */ *val = obj->value; - printf("obj->addr=%d,obj->value=%d\n",obj->addr, obj->value); return 1; /* match */ } return 0; @@ -164,7 +150,6 @@ lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, int new_count_table_entry(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { - printf("%s called\n", __FUNCTION__); *cnt = new_table_get_element(ch->global_tables[tbl - IPFW_TABLES_MAX]); return 0; } @@ -172,7 +157,6 @@ new_count_table_entry(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) { - printf("%s called\n", __FUNCTION__); if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { new_count_table_entry(ch, tbl, cnt); return (0); @@ -191,8 +175,6 @@ new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl) int nr = tbl->tbl - IPFW_TABLES_MAX; struct new_hash_table *t = ch->global_tables[nr]; - printf("%s called\n", __FUNCTION__); - i = 0; tbl->cnt = 0; @@ -203,7 +185,6 @@ new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl) obj = table_next(t, obj); if (obj == NULL) break; - printf("Found \n"); ent = &tbl->ent[tbl->cnt]; ent->addr = obj->addr; @@ -211,14 +192,12 @@ new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl) ent->masklen = obj->mask; tbl->cnt++; } - printf("\n"); return 0; } int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) { - printf("%s called\n", __FUNCTION__); if (tbl->tbl >= IPFW_TABLES_MAX && tbl->tbl < IPFW_NEWTABLES_MAX) { new_dump_table_entry(ch, tbl); return (0); @@ -231,7 +210,6 @@ init_tables(struct ip_fw_chain *ch) { int i; - printf("%s called\n", __FUNCTION__); /* Initialize new tables XXXMPD */ for (i = 0; i < IPFW_NEWTABLES_MAX - IPFW_TABLES_MAX; i++) { memset(&ch->global_tables[i], sizeof(struct new_hash_table*), 0); diff --git a/dummynet/radix.c b/dummynet/radix.c index 2a5fcc3..575c47c 100644 --- a/dummynet/radix.c +++ b/dummynet/radix.c @@ -27,35 +27,36 @@ * SUCH DAMAGE. * * @(#)radix.c 8.5 (Berkeley) 5/19/95 - * $FreeBSD: head/sys/net/radix.c 186176 2008-12-16 11:01:36Z kmacy $ + * $FreeBSD: head/sys/net/radix.c 200354 2009-12-10 10:34:30Z luigi $ */ -#include "missing.h" /* * Routines to build and maintain radix trees for routing lookups. */ -#ifndef _RADIX_H_ #include #ifdef _KERNEL +#include +#include "missing.h" #include #include #include #include #include -// #include -#else -#include -#endif #include #include -#endif - -// #include "opt_mpath.h" - +#include "opt_mpath.h" #ifdef RADIX_MPATH #include #endif - +#else /* !_KERNEL */ +#include +#include +#include +#define log(x, arg...) fprintf(stderr, ## arg) +#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) +#define min(a, b) ((a) < (b) ? (a) : (b) ) +#include "include/net/radix.h" +#endif /* !_KERNEL */ static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, void *w); @@ -73,6 +74,8 @@ static struct radix_node_head *mask_rnhead; /* * Work area -- the following point to 3 buffers of size max_keylen, * allocated in this order in a block of memory malloc'ed by rn_init. + * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards. + * addmask_key is used in rn_addmask in rw mode and not thread-safe. */ static char *rn_zeros, *rn_ones, *addmask_key; @@ -136,8 +139,9 @@ static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, * To make the assumption more explicit, we use the LEN() macro to access * this field. It is safe to pass an expression with side effects * to LEN() as the argument is evaluated only once. + * We cast the result to int as this is the dominant usage. */ -#define LEN(x) (*(const u_char *)(x)) +#define LEN(x) ( (int) (*(const u_char *)(x)) ) /* * XXX THIS NEEDS TO BE FIXED @@ -198,7 +202,7 @@ rn_refines(m_arg, n_arg) { register caddr_t m = m_arg, n = n_arg; register caddr_t lim, lim2 = lim = n + LEN(n); - int longer = LEN(n++) - (int)LEN(m++); + int longer = LEN(n++) - LEN(m++); int masks_are_equal = 1; if (longer > 0) @@ -251,10 +255,10 @@ rn_satisfies_leaf(trial, leaf, skip) char *cplim; int length = min(LEN(cp), LEN(cp2)); - if (cp3 == 0) + if (cp3 == NULL) cp3 = rn_ones; else - length = min(length, (int)(*(u_char *)cp3)); + length = min(length, LEN(cp3)); cplim = cp + length; cp3 += skip; cp2 += skip; for (cp += skip; cp < cplim; cp++, cp2++, cp3++) if ((*cp ^ *cp2) & *cp3) @@ -425,7 +429,7 @@ rn_insert(v_arg, head, dupentry, nodes) { caddr_t v = v_arg; struct radix_node *top = head->rnh_treetop; - int head_off = top->rn_offset, vlen = (int)LEN(v); + int head_off = top->rn_offset, vlen = LEN(v); register struct radix_node *t = rn_search(v_arg, top); register caddr_t cp = v + head_off; register int b; @@ -1160,16 +1164,11 @@ rn_inithead(head, off) } void -rn_init() +rn_init(int maxk) { char *cp, *cplim; -#ifdef _KERNEL - struct domain *dom; - for (dom = domains; dom; dom = dom->dom_next) - if (dom->dom_maxrtkey > max_keylen) - max_keylen = dom->dom_maxrtkey; -#endif + max_keylen = maxk; if (max_keylen == 0) { log(LOG_ERR, "rn_init: radix functions require max_keylen be set\n"); diff --git a/dummynet/test_radix.c b/dummynet/test_radix.c new file mode 100644 index 0000000..b0e37d5 --- /dev/null +++ b/dummynet/test_radix.c @@ -0,0 +1,97 @@ +/* + * Test the radix tree net + */ + +#include +#include +#include +#include +#include /* htonl */ +#include "include/net/radix.h" + +struct d { + uint8_t len[4]; + uint32_t data; +}; + +struct table_entry { + struct radix_node rn[2]; + struct d x, mask; + int value; +}; + +static int +del(struct radix_node *rn, void *arg) +{ + struct radix_node_head * const rnh = arg; + struct table_entry *ent; + + ent = (struct table_entry *) + rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); + fprintf(stderr, "del returns %p\n", ent); + if (0 && ent != NULL) + free(ent); + return (0); +} + +int +list(struct radix_node *rn, void *arg) +{ + struct table_entry *ent = (struct table_entry *)rn; + + fprintf(stderr, "walking on node %d\n", ent->value); + return (0); +} + +static void +print_dt(struct timeval *start, struct timeval *end, int n, const char *msg) +{ + int ds = 0, du, l; + du = end->tv_usec - start->tv_usec; + if (du < 0) { + ds = -1; + du += 1000000; + } + ds += end->tv_sec - start->tv_sec; + if (n <= 1) + n = 1; + l = (ds * 1000000+ du)/n; + fprintf(stderr, "%d tries in %d.%06ds, %dus each\n", + n, ds, du, l); +} + +static void +test1(struct radix_node_head *h, int n) +{ + struct table_entry *p; + struct timeval start, end; + int i; + + p = calloc(n, sizeof(*p)); + if (!p) + return; + for (i=0; i < n; i++) { + p->value = i; + p->x.len[0] = p->mask.len[0] = 8; + p->mask.data = 0xffffffff; + p->x.data = htonl(i); + } + gettimeofday(&start, NULL); + for (i=0; i < n; i++) { + h->rnh_addaddr(&(p->x), &(p->mask), h, (void *)p); + } + gettimeofday(&end, NULL); + print_dt(&start, &end, n, NULL); + h->rnh_walktree(h, del, h); +} + +int +main(int argc, char *argv[]) +{ + struct radix_node_head *h = NULL; + + rn_init(64); // XXX bits or bytes ? + rn_inithead((void **)&h, 32); /* data offset in bits */ + test1(h, 1000000); + return 0; +} diff --git a/glue.h b/glue.h index c05ee4a..1f8aa62 100644 --- a/glue.h +++ b/glue.h @@ -23,7 +23,7 @@ * SUCH DAMAGE. */ /* - * $Id: glue.h 4413 2009-12-10 08:57:02Z luigi $ + * $Id: glue.h 4436 2009-12-10 18:31:49Z luigi $ * * glue code to adapt the FreeBSD version to linux and windows, * userland and kernel. @@ -262,7 +262,7 @@ struct route_in6 { }; #ifdef linux /* linux does not have sin_len in sockaddr */ -#define sin_len __pad[0] +#define sin_len sin_zero[0] #endif /* linux */ /* diff --git a/ipfw/glue.c b/ipfw/glue.c index db90c60..e5876b9 100644 --- a/ipfw/glue.c +++ b/ipfw/glue.c @@ -24,13 +24,14 @@ */ /* - * $Id: glue.c 4051 2009-11-16 11:30:05Z luigi $ + * $Id: glue.c 4469 2009-12-11 20:23:11Z marta $ * * Userland functions missing in linux */ #include #include +#include #ifndef HAVE_NAT /* dummy nat functions */ @@ -91,10 +92,88 @@ strtonum(const char *nptr, long long minval, long long maxval, return strtoll(nptr, (char **)errstr, 0); } +/* + * set or get system information + * XXX lock acquisition/serialize calls + * + * we export this as sys/module/ipfw_mod/parameters/___ + * This function get or/and set the value of the sysctl passed by + * the name parameter. If the old value is not desired, + * oldp and oldlenp should be set to NULL. + * + * XXX + * I do not know how this works in FreeBSD in the case + * where there are no write permission on the sysctl var. + * We read the value and set return variables in any way + * but returns -1 on write failures, regardless the + * read success. + * + * Since there is no information on types, in the following + * code we assume a lenght of 4 is a int. + * + * Returns 0 on success, -1 on errors. + */ int sysctlbyname(const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { - return -1; + FILE *fp; + char *basename = "/sys/module/ipfw_mod/parameters/"; + char filename[256]; /* full filename */ + char *varp; + int ret = 0; /* return value */ + int d; + + /* debug message */ + if (0) fprintf(stderr, "%s name %s oldp %p oldlenp %p %d newp %p newlen %d\n", __FUNCTION__, name, \ + oldp, oldlenp, oldlenp ? *oldlenp : -1 , newp, (int) newlen); + + if (name == NULL) /* XXX set errno */ + return -1; + + /* locate the filename */ + varp = strrchr(name, '.'); + if (varp == NULL) /* XXX set errno */ + return -1; + + snprintf(filename, sizeof(filename), "%s%s", basename, varp+1); + + /* + * XXX we could open the file here, in rw mode + * but need to check if a file have write + * permissions. + */ + + /* check parameters */ + if (oldp && oldlenp) { /* read mode */ + fp = fopen(filename, "r"); + if (fp == NULL) { + fprintf(stderr, "%s fopen error reading filename %s\n", __FUNCTION__, filename); + return -1; + } + if (*oldlenp == 4) { + if (fscanf(fp, "%d", &d) == 1) + memcpy(oldp, &d, *oldlenp); + else + ret = -1; + } + fclose(fp); + } + + if (newp && newlen) { /* write */ + fp = fopen(filename, "w"); + if (fp == NULL) { + fprintf(stderr, "%s fopen error writing filename %s\n", __FUNCTION__, filename); + return -1; + } + if (newlen == 4) { + if (fprintf(fp, "%d", *(int*)newp) < 1) + ret = -1; + } + + fclose(fp); + } + + return ret; } #endif /* __linux__ || _WIN32 */ diff --git a/ipfw/ipfw2.c b/ipfw/ipfw2.c index 571ff39..5d70328 100644 --- a/ipfw/ipfw2.c +++ b/ipfw/ipfw2.c @@ -365,8 +365,8 @@ do_cmd(int optname, void *optval, uintptr_t optlen) if (s < 0) err(EX_UNAVAILABLE, "socket"); - if (optname == IP_FW_GET || optname == IP_FW_DYN_GET || - optname == IP_DUMMYNET_GET || + if (optname == IP_FW_GET || optname == IP_DUMMYNET_GET || + optname == IP_FW_DYN_GET || optname == IP_FW_ADD || optname == IP_FW_TABLE_LIST || optname == IP_FW_TABLE_GETSIZE || optname == IP_FW_NAT_GET_CONFIG || @@ -2777,7 +2777,7 @@ chkarg: * the routing code seems to use it too. */ p->sa.sin_family = AF_INET; - //p->sa.sin_len = sizeof(struct sockaddr_in); + p->sa.sin_len = sizeof(struct sockaddr_in); p->sa.sin_port = 0; /* * locate the address-port separator (':' or ',') diff --git a/ipfw.spec b/planetlab/ipfwroot.spec similarity index 96% rename from ipfw.spec rename to planetlab/ipfwroot.spec index 212efb7..25b418e 100644 --- a/ipfw.spec +++ b/planetlab/ipfwroot.spec @@ -78,6 +78,9 @@ rm -rf $RPM_BUILD_ROOT LOADED=`cat /proc/modules | grep ^ipfw_mod`; if [ -n "$LOADED" ] ; then rmmod ipfw_mod; fi %changelog +* Tue Dec 15 2009 Marta Carbone +- more work on the radix code, added sysctl read/write support + * Sun Nov 29 2009 Thierry Parmentelat - ipfw-0.9-7 - added missing qsort.c - tag 0.9-6 was broken diff --git a/ipfw-slice.spec b/planetlab/ipfwslice.spec similarity index 94% rename from ipfw-slice.spec rename to planetlab/ipfwslice.spec index 9023db8..a4f1521 100644 --- a/ipfw-slice.spec +++ b/planetlab/ipfwslice.spec @@ -56,6 +56,9 @@ rm -rf $RPM_BUILD_ROOT %{_mandir}/man8/ipfw.8* %changelog +* Tue Dec 15 2009 Marta Carbone +- more work on the radix code, added sysctl read/write support + * Sun Nov 29 2009 Thierry Parmentelat - ipfw-0.9-7 - added missing qsort.c - tag 0.9-6 was broken diff --git a/planetlab/planetlab-tags.mk b/planetlab/planetlab-tags.mk new file mode 100644 index 0000000..ee3c12d --- /dev/null +++ b/planetlab/planetlab-tags.mk @@ -0,0 +1,5 @@ +# $Id: planetlab-tags.mk 4496 2009-12-14 12:01:38Z luigi $ +# These are good to build the ipfw modules from svn on kernels 2.6.22 +linux-2.6-SVNBRANCH := 22 +linux-2.6-SVNPATH := http://svn.planet-lab.org/svn/linux-2.6/tags/linux-2.6-22-39-1 +ipfwsrc-SVNPATH := svn+ssh://luigi%40onelab2.iet.unipi.it/home/svn/ports-luigi/dummynet-branches/ipfw_mod diff --git a/planetlab/planetlab.mk b/planetlab/planetlab.mk new file mode 100644 index 0000000..e49bde8 --- /dev/null +++ b/planetlab/planetlab.mk @@ -0,0 +1,26 @@ +# $Id: planetlab.mk 4496 2009-12-14 12:01:38Z luigi $ +# .mk file to build a module +kernel-MODULES := linux-2.6 +kernel-SPEC := kernel-2.6.spec +kernel-BUILD-FROM-SRPM := yes +ifeq "$(HOSTARCH)" "i386" +kernel-RPMFLAGS:= --target i686 +else +kernel-RPMFLAGS:= --target $(HOSTARCH) +endif +ALL += kernel + +ipfwroot-MODULES := ipfwsrc +ipfwroot-SPEC := planetlab/ipfwroot.spec +ipfwroot-DEPEND-DEVEL-RPMS := kernel-devel +ipfwroot-SPECVARS = kernel_version=$(kernel.rpm-version) \ + kernel_release=$(kernel.rpm-release) \ + kernel_arch=$(kernel.rpm-arch) +ALL += ipfwroot + +ipfwslice-MODULES := ipfwsrc +ipfwslice-SPEC := planetlab/ipfwslice.spec +ipfwslice-SPECVARS = kernel_version=$(kernel.rpm-version) \ + kernel_release=$(kernel.rpm-release) \ + kernel_arch=$(kernel.rpm-arch) +ALL += ipfwslice -- 2.43.0