From: marta Date: Thu, 7 Jan 2010 13:50:00 +0000 (+0000) Subject: Remove the old dummynet directory and update the new code with the missing files... X-Git-Tag: ipfw-0.9-8~3 X-Git-Url: http://git.onelab.eu/?p=ipfw.git;a=commitdiff_plain;h=10f6855044ac2d3a12f19eddbbbb24b59cbbf1fb Remove the old dummynet directory and update the new code with the missing files. Update dependencied on Makefiles. --- diff --git a/Makefile b/Makefile index 51a00a9..55b8e0b 100644 --- a/Makefile +++ b/Makefile @@ -19,7 +19,6 @@ _all: all all clean distclean: echo target is $(@) (cd ipfw && $(MAKE) $(@) ) - (cd dummynet && $(MAKE) $(@) ) (cd dummynet2 && $(MAKE) $(@) ) snapshot: diff --git a/dummynet/Makefile b/dummynet/Makefile deleted file mode 100644 index 6c6d9f6..0000000 --- a/dummynet/Makefile +++ /dev/null @@ -1,221 +0,0 @@ -# -# $Id$ -# -# gnu Makefile to build linux module for ipfw+dummynet. -# -# The defaults are set to build without modifications on PlanetLab -# and possibly 2.6 versions. - -# Some variables need to have specific names, because they are used -# by the build infrastructure on Linux and OpenWrt. They are: -# -# ccflags-y additional $(CC) flags -# M used by Kbuild, we must set it to `pwd` -# obj-m list of .o modules to build -# $(MOD)-y for each $MOD in obj-m, the list of objects -# obj-y same as above, for openwrt -# O_TARGET the link target, for openwrt -# EXTRA_CFLAGS as the name says... in openwrt -# EXTRA_CFLAGS is used in 2.6.22 module kernel compilation too -# KERNELPATH the path to the kernel sources or headers -# -# Not sure about this (the name might be reserved) -# ipfw-cflags our flags for building the module -# -# Other variables are only private and can be renamed. They include: -# -# VER linux version we are building for (2.4 2.6 or openwrt) -#--- - -$(warning including dummynet/Makefile) - -# lets default for 2.6 for planetlab builds -VER ?= 2.6 - -#--- General values for all types of build --- -# obj-m is the target module -obj-m := ipfw_mod.o - -#-- the list of source files. IPFW_SRCS is our own name. -# Original ipfw and dummynet sources + FreeBSD stuff, -IPFW_SRCS = ip_fw2.c ip_dummynet.c ip_fw_pfil.c in_cksum.c -IPFW_SRCS += radix.c -# Module glue and functions missing in linux -IPFW_SRCS += ipfw2_mod.c bsd_compat.c - -# generic cflags used on all systems -#ipfw-cflags += -DIPFW_HASHTABLES -ipfw-cflags += -DIPFIREWALL_DEFAULT_TO_ACCEPT -# _BSD_SOURCE enables __FAVOR_BSD (udp/tcp bsd structs instead of posix) -ipfw-cflags += -D_BSD_SOURCE -ipfw-cflags += -DKERNEL_MODULE # build linux kernel module -# the two header trees for empty and override files -ipfw-cflags += -I $(M)/include_e -ipfw-cflags += -I $(M)/include -ipfw-cflags += -include $(M)/../glue.h # headers -ipfw-cflags += -include $(M)/missing.h # headers - -$(warning "---- Building dummynet kernel module for Version $(VER)") - -# We have three sections for OpenWrt, Linux 2.4 and Linux 2.6 - -ifeq ($(VER),openwrt) - #--- The Makefile section for openwrt --- - # We do not include a dependency on include_e as it is called - # by Makefile.openwrt in Build/Prepare - M=. - obj-y := $(IPFW_SRCS:%.c=%.o) - O_TARGET := $(obj-m) - - # xcflags-y is a temporary variable where we store build options - xcflags-y += -O1 -DLINUX_24 - xcflags-y += -g - - EXTRA_CFLAGS := $(xcflags-y) $(ipfw-cflags) - - # we should not export anything - #export-objs := ipfw2_mod.o --include $(TOPDIR)/Rules.make - -else # !openwrt, below we do linux builds for 2.4 and 2.6 - - # KERNELPATH is where the kernel headers reside. On PlanetLab - # it is set already by the build system. - # We can override it from the command line, or let the system guess. - -ifneq ($(shell echo $(VER)|grep '2.4'),) - # Makefile section for the linux 2.4 version - # tested on linux-2.4.35.4, does not work with 2.4.37 - # - # guess the kernel path -- or is it under /lib/modules ? - KERNELPATH ?= /usr/src/`uname -r` - - # We need to figure out the gcc include directory, if not - # set by the user through MYGCC_INCLUDE - # Find compiler version (3rd field in last line returned by gcc -v) - # e.g. gcc version 4.3.2 (Debian 4.3.2-1.1) - MYGCC_VER ?= $(shell $(CC) -v 2>&1 |tail -n 1 | cut -d " " -f 3) - # We don't know the exact directory under /usr/lib/gcc so we guess - MYGCC_INCLUDE ?= $(shell echo /usr/lib/gcc/*/$(MYGCC_VER) | cut -d " " -f 1)/include - $(warning "---- gcc includes guessed to $(MYGCC_INCLUDE)") - - # additional warning - WARN += -Wall -Wundef - WARN += -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing - WARN += -fno-common -Werror-implicit-function-declaration - # WARN += -O2 -fno-stack-protector -m32 -msoft-float -mregparm=3 - # -mregparm=3 gives a printk error - WARN += -m32 -msoft-float # -mregparm=3 - #WARN += -freg-struct-return -mpreferred-stack-boundary=2 - WARN += -Wno-sign-compare - WARN += -Wdeclaration-after-statement - ifneq ($(MYGCC_VER),3.4.6) - WARN += -Wno-pointer-sign - endif - - ccflags-y += -O1 -DLINUX_24 - CFLAGS = -DMODULE -D__KERNEL__ -nostdinc \ - -isystem ${KERNELPATH}/include -isystem $(MYGCC_INCLUDE) \ - ${ccflags-y} - # The Main target -all: mod24 - -else # !2.4 -- - - # This is the Makefile section for Linux 2.6.x including planetlab - -ifeq ($(IPFW_PLANETLAB),1) - $(warning "---- Building for PlanetLab") - ipfw-cflags += -DIPFW_PLANETLAB # PlanetLab compilation -endif - # if not set, use the version from the installed system - KERNELPATH ?= /lib/modules/`uname -r`/build - # Otherwise, if you have kernel sources, try something like this: - #KERNELPATH = /usr/src/linux-2.6.22 - $(warning "---- Building Version 2.6 $(VER) in $(KERNELPATH)") - WARN := -O1 -Wall -Werror -DDEBUG_SPINLOCK -DDEBUG_MUTEXES - # The main target - - # Required by kernel <= 2.6.22, ccflags-y is used on newer version - LINUX_VERSION_CODE := $(shell grep LINUX_VERSION_CODE $(KERNELPATH)/include/linux/version.h|cut -d " " -f3) - ifeq ($(shell if [ -z $(LINUX_VERSION_CODE) ] ; then echo "true"; fi),true) - $(warning "---- Perhaps you miss a (cd $(KERNELPATH); make oldconfig; make prepare; make scripts)"); - endif - ifeq ($(shell if [ $(LINUX_VERSION_CODE) -le 132630 ] ; then echo "true"; fi),true) - EXTRA_CFLAGS += $(ccflags-y) - endif - -all: include_e - $(MAKE) -C $(KERNELPATH) V=1 M=`pwd` modules -endif # !2.4 - -#-- back to the common section of code for Linux 2.4 and 2.6 - -# the list of objects used to build the module -ipfw_mod-y = $(IPFW_SRCS:%.c=%.o) - -# additional $(CC) flags -ccflags-y += $(WARN) -ccflags-y += $(ipfw-cflags) -# if we really want debug symbols... -ccflags-y += -g - -mod24: include_e $(obj-m) - -$(obj-m): $(ipfw_mod-y) - $(LD) $(LDFLAGS) -m elf_i386 -r -o $@ $^ - -# M is the current directory, used in recursive builds -# so we allow it to be overridden -M ?= $(shell pwd) -endif # !openwrt - -#--- various common targets -clean: - -rm -f *.o *.ko Module.symvers *.mod.c - -rm -rf include_e - -distclean: clean - -rm -f .*cmd modules.order opt_* - -rm -rf .tmp_versions include_e - -rm -rf .*.o.d - -# support to create empty dirs and files in include_e/ -# EDIRS is the list of directories, EFILES is the list of files. - -EDIRS= altq arpa machine net netinet netinet6 sys - -EFILES += opt_inet6.h opt_ipfw.h opt_ipsec.h opt_mpath.h -EFILES += opt_mbuf_stress_test.h opt_param.h - -EFILES += altq/if_altq.h -EFILES += arpa/inet.h -EFILES += machine/in_cksum.h -EFILES += net/ethernet.h net/netisr.h net/pf_mtag.h -EFILES += net/vnet.h - -EFILES += netinet/ether.h netinet/icmp6.h netinet/if_ether.h -EFILES += netinet/in.h netinet/in_pcb.h netinet/in_var.h -EFILES += netinet/in_systm.h -EFILES += netinet/ip_carp.h netinet/ip_var.h netinet/pim.h -EFILES += netinet/sctp.h netinet/tcp_timer.h netinet/tcpip.h -EFILES += netinet/udp_var.h - -EFILES += netinet6/ip6_var.h - -EFILES += sys/_lock.h sys/_rwlock.h sys/_mutex.h sys/jail.h -EFILES += sys/condvar.h sys/eventhandler.h sys/domain.h -EFILES += sys/limits.h sys/lock.h sys/mutex.h sys/priv.h -EFILES += sys/proc.h sys/rwlock.h sys/socket.h sys/socketvar.h -EFILES += sys/sysctl.h sys/time.h sys/ucred.h - -include_e: - echo "running in $M" - -@rm -rf $(M)/include_e opt_* - -@mkdir -p $(M)/include_e - -@(cd $(M)/include_e; mkdir -p $(EDIRS); touch $(EFILES) ) - - -#--- some other targets for testing purposes -test_radix: test_radix.o radix.o -test_radix: CFLAGS=-Wall -Werror -O2 diff --git a/dummynet/bsd_compat.c b/dummynet/bsd_compat.c deleted file mode 100644 index cad3c5d..0000000 --- a/dummynet/bsd_compat.c +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id: bsd_compat.c 4508 2009-12-15 21:54:14Z luigi $ - * - * kernel variables and functions that are not available in linux. - */ - -#include -#include /* do_div on 2.4 */ -#include /* get_random_bytes on 2.4 */ - -/* - * gettimeofday would be in sys/time.h but it is not - * visible if _KERNEL is defined - */ -int gettimeofday(struct timeval *, struct timezone *); - -int ticks; /* kernel ticks counter */ -int hz = 1000; /* default clock time */ -long tick = 1000; /* XXX is this 100000/hz ? */ -int bootverbose = 0; -time_t time_uptime = 0; -struct timeval boottime; - -int ip_defttl; -int fw_one_pass = 1; -u_long in_ifaddrhmask; /* mask for hash table */ -struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ - -u_int rt_numfibs = RT_NUMFIBS; - -/* - * pfil hook support. - * We make pfil_head_get return a non-null pointer, which is then ignored - * in our 'add-hook' routines. - */ -struct pfil_head; -typedef int (pfil_hook_t) - (void *, struct mbuf **, struct ifnet *, int, struct inpcb *); - -struct pfil_head * -pfil_head_get(int proto, u_long flags) -{ - static int dummy; - return (struct pfil_head *)&dummy; -} - -int -pfil_add_hook(pfil_hook_t *func, void *arg, int dir, struct pfil_head *h) -{ - return 0; -} - -int -pfil_remove_hook(pfil_hook_t *func, void *arg, int dir, struct pfil_head *h) -{ - return 0; -} - -/* define empty body for kernel function */ -int -priv_check(struct thread *td, int priv) -{ - return 0; -} - -int -securelevel_ge(struct ucred *cr, int level) -{ - return 0; -} - -int -sysctl_handle_int(SYSCTL_HANDLER_ARGS) -{ - return 0; -} - -int -sysctl_handle_long(SYSCTL_HANDLER_ARGS) -{ - return 0; -} - -void -ether_demux(struct ifnet *ifp, struct mbuf *m) -{ - return; -} - -int -ether_output_frame(struct ifnet *ifp, struct mbuf *m) -{ - return 0; -} - -void -in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum) -{ - return; -} - -void -icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) -{ - return; -} - -u_short -in_cksum_skip(struct mbuf *m, int len, int skip) -{ - return 0; -} - -u_short -in_cksum_hdr(struct ip *ip) -{ - return 0; -} - -struct mbuf * -ip_reass(struct mbuf *clone) -{ - return clone; -} -#ifdef INP_LOCK_ASSERT -#undef INP_LOCK_ASSERT -#define INP_LOCK_ASSERT(a) -#endif - -int -jailed(struct ucred *cred) -{ - return 0; -} - -/* -* Return 1 if an internet address is for a ``local'' host -* (one to which we have a connection). If subnetsarelocal -* is true, this includes other subnets of the local net. -* Otherwise, it includes only the directly-connected (sub)nets. -*/ -int -in_localaddr(struct in_addr in) -{ - return 1; -} - -int -sooptcopyout(struct sockopt *sopt, const void *buf, size_t len) -{ - size_t valsize = sopt->sopt_valsize; - - if (len < valsize) - sopt->sopt_valsize = valsize = len; - bcopy(buf, sopt->sopt_val, valsize); - return 0; -} - -/* - * copy data from userland to kernel - */ -int -sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen) -{ - size_t valsize = sopt->sopt_valsize; - - if (valsize < minlen) - return EINVAL; - if (valsize > len) - sopt->sopt_valsize = valsize = len; - bcopy(sopt->sopt_val, buf, valsize); - return 0; -} - -void -getmicrouptime(struct timeval *tv) -{ -#ifdef _WIN32 -#else - do_gettimeofday(tv); -#endif -} - - -#include - -char * -inet_ntoa_r(struct in_addr ina, char *buf) -{ -#ifdef _WIN32 -#else - unsigned char *ucp = (unsigned char *)&ina; - - sprintf(buf, "%d.%d.%d.%d", - ucp[0] & 0xff, - ucp[1] & 0xff, - ucp[2] & 0xff, - ucp[3] & 0xff); -#endif - return buf; -} - -char * -inet_ntoa(struct in_addr ina) -{ - static char buf[16]; - return inet_ntoa_r(ina, buf); -} - -int -random(void) -{ -#ifdef _WIN32 - return 0x123456; -#else - int r; - get_random_bytes(&r, sizeof(r)); - return r & 0x7fffffff; -#endif -} - - -/* - * do_div really does a u64 / u32 bit division. - * we save the sign and convert to uint befor calling. - * We are safe just because we always call it with small operands. - */ -int64_t -div64(int64_t a, int64_t b) -{ -#ifdef _WIN32 - int a1 = a, b1 = b; - return a1/b1; -#else - uint64_t ua, ub; - int sign = ((a>0)?1:-1) * ((b>0)?1:-1); - - ua = ((a>0)?a:-a); - ub = ((b>0)?b:-b); - do_div(ua, ub); - return sign*ua; -#endif -} - -/* - * compact version of fnmatch. - */ -int -fnmatch(const char *pattern, const char *string, int flags) -{ - char s; - - if (!string || !pattern) - return 1; /* no match */ - while ( (s = *string++) ) { - char p = *pattern++; - if (p == '\0') /* pattern is over, no match */ - return 1; - if (p == '*') /* wildcard, match */ - return 0; - if (p == '.' || p == s) /* char match, continue */ - continue; - return 1; /* no match */ - } - /* end of string, make sure the pattern is over too */ - if (*pattern == '\0' || *pattern == '*') - return 0; - return 1; /* no match */ -} - -#ifdef _WIN32 -/* - * as good as anywhere, place here the missing calls - */ - -void * -my_alloc(int size) -{ - void *_ret = ExAllocatePoolWithTag(0, size, 'wfpi'); - if (_ret) - memset(_ret, 0, size); - return _ret; -} - -void -panic(const char *fmt, ...) -{ - printf("%s", fmt); - for (;;); -} - -#include - -extern int _vsnprintf(char *buf, int buf_size, char * fmt, va_list ap); - -/* - * Windows' _snprintf doesn't terminate buffer with zero if size > buf_size - */ -int -snprintf(char *buf, int buf_size, char *fmt, ...) -{ - va_list ap; - va_start(ap, fmt); - if (_vsnprintf(buf, buf_size, fmt, ap) < 0) - buf[buf_size - 1] = '\0'; - va_end(ap); - - return 0; -} -#endif diff --git a/dummynet/hashtable.c b/dummynet/hashtable.c deleted file mode 100644 index 3e055f0..0000000 --- a/dummynet/hashtable.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * XXX Copyright - */ -#include -#include -#include - -#include "hashtable.h" // XXX fix path later - -struct new_obj { - struct new_obj *next; /* Next object in the list */ - char obj[0]; /* actually bigger */ -}; - -/* Hash table */ -struct ipfw_ht { - int table_size; /* Size of the table (buckets) */ - int table_obj; /* number of object in the table */ - int obj_size; /* size of object (key + value) */ - /* Hash function for this table */ - uint32_t (*hash)(const void *key, uint32_t size); - int (*cmp)(const void *obj1, const void *obj2, int sz); - int hash_arg; /* hash function parameter */ - struct malloc_type *mtype; - struct new_obj **table_ptr; /* Pointer to the table */ -}; - -/* - * initialize an hash table - * - size: size of table (number of buckets) - * - obj_size: size of the object to store in the table (key + value) - * - hf: pointer to the hash function for this table - * - compare: function to compare two objects - * - * Return value: pointer to the hash table, NULL if error occurs - */ -struct ipfw_ht * -ipfw_ht_new(int size, int obj_size, - uint32_t (hf)(const void *, uint32_t size), - int (compare)(const void *, const void *, int), - struct malloc_type *mtype) -{ - struct ipfw_ht *h; - - h = malloc(sizeof(*h), mtype, M_NOWAIT | M_ZERO); - if (h == NULL) - return NULL; - - h->table_ptr = malloc(size * sizeof(struct new_obj*), mtype, - M_NOWAIT | M_ZERO); - if (h->table_ptr == NULL) { /* no memory */ - free (h, mtype); - return 0; - } - h->table_size = size; - h->hash = hf; - h->cmp = compare; - h->mtype = mtype; - h->obj_size = obj_size; - - return h; -} - -int -ipfw_ht_insert(struct ipfw_ht *h, const void *obj) -{ - int i; /* array index */ - struct new_obj *o, *ot; - - i = h->hash(obj, h->table_size); - - /* same key not allowed */ - for (ot = h->table_ptr[i]; ot; ot = ot->next) { - if (h->cmp(obj, ot->obj, h->obj_size) == 0) - return 1; /* error */ - } - /* allocate a single chunk of memory */ - o = malloc(sizeof(*o) + h->obj_size, h->mtype, M_NOWAIT); - if (o == NULL) - return 1; - bcopy(obj, o->obj, h->obj_size); - - /* put at the head */ - o->next = h->table_ptr[i]; - h->table_ptr[i] = o; - - h->table_obj++; - - return 0; -} - -int -ipfw_ht_remove(struct ipfw_ht *h, const void *obj) -{ - int i; - struct new_obj *obj1, *prev; - - i = h->hash(obj, h->table_size); - - for (prev = NULL, obj1 = h->table_ptr[i]; obj1; obj1 = obj1->next) { - if (h->cmp(obj, obj1->obj, h->obj_size) != 0) - continue; - /* Object found, delete */ - if (prev != NULL) - prev->next = obj1->next; - else - h->table_ptr[i] = obj1->next; - free(obj1, h->mtype); - h->table_obj--; - return 0; - } - return 1; /* Not found */ -} - -const void * -ipfw_ht_extract(struct ipfw_ht *h, const void *obj) -{ - struct new_obj *o; - int i; - if (h == NULL || h->table_obj == 0) - return NULL; - - i = h->hash(obj, h->table_size); - for (o = h->table_ptr[i]; o; o = o->next) { - if (h->cmp(o->obj, obj, h->obj_size) == 0) - return o->obj; - } - return NULL; -} - -void * -ipfw_ht_destroy(struct ipfw_ht *h) -{ - int i; - struct new_obj *cur, *next; - - if (!h || !h->table_ptr) - return NULL; - for (i = 0; i < h->table_size; i++) { - for (cur = h->table_ptr[i]; cur; cur = next) { - next = cur->next; - free(cur, h->mtype); - } - } - free (h->table_ptr, h->mtype); - free (h, h->mtype); - - return NULL; -} - -/* returns the number of elements in the table */ -int -ipfw_ht_count(const struct ipfw_ht *h) -{ - return h ? h->table_obj : 0; -} - -const void * -table_next(struct ipfw_ht *h, const void *o) -{ - int i; - struct new_obj *obj; - - if (h == NULL || h->table_obj == 0) - return NULL; - if (o == NULL) { - for (i = 0; i < h->table_size; i++) - if (h->table_ptr[i]) - return h->table_ptr[i]->obj; - return NULL; /* XXX should not happen */ - } - - /* here we can optimize if we can map o to the bucket, - * otherwise locate o and find the next one. - */ - i = h->hash(o, h->table_size); - for (obj = h->table_ptr[i]; obj; obj = obj->next) { - if (h->cmp(obj->obj, o, h->obj_size) == 0) - break; - } - if (obj && obj->next != NULL) - return obj->next->obj; - /* take the first of the next bucket */ - for (i++; i < h->table_size; i++) { - if (h->table_ptr[i]) - return h->table_ptr[i]->obj; - } - return NULL; -} diff --git a/dummynet/hashtable.h b/dummynet/hashtable.h deleted file mode 100644 index 4fcba22..0000000 --- a/dummynet/hashtable.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef __HASHTABLE_H_ -#define __HASHTABLE_H_ - -/* - * new_table_init creates a table with the specified - * number of buckets (size). - * obj_size is the size of individual objects (key+value), - * the first function is the hash function (called with the - * size and the payload pointer) - * the second function is the compare function, to tell if two - * objects are the same (XXX we could spare this if we also - * pass a key_size and use a bcmp for comparisons) - * Not extensible at the moment. - */ -struct malloc_type; -struct ipfw_ht; -struct ipfw_ht* ipfw_ht_new(int size, int obj_size, - uint32_t (hash_fn)(const void *, uint32_t size), - int (cmp_fn)(const void*, const void*, int sz), - struct malloc_type *mtype); -void *ipfw_ht_destroy(struct ipfw_ht *h); - -/* add a new object to the table, return success/failure */ -int ipfw_ht_insert(struct ipfw_ht *h, const void *obj); - -/* - * returns a pointer to the matching object or NULL if not found. - * No refcounts. - */ -const void *ipfw_ht_extract(struct ipfw_ht *h, const void *key); - -/* remove an object from the table */ -int ipfw_ht_remove(struct ipfw_ht *h, const void *key); - -/* return the number of elements in the table */ -int ipfw_ht_count(const struct ipfw_ht *h); - -/* returns the first or next element. Works by hashing the - * current object and then finds the next one. - * If obj == NULL returns the first object in the table - */ -const void *ipfw_ht_next(struct ipfw_ht *h, const void *obj); - -#endif diff --git a/dummynet/in_cksum.c b/dummynet/in_cksum.c deleted file mode 100644 index 8972cef..0000000 --- a/dummynet/in_cksum.c +++ /dev/null @@ -1,150 +0,0 @@ -/*- - * Copyright (c) 1988, 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)in_cksum.c 8.1 (Berkeley) 6/10/93 - */ - -#include -__FBSDID("$FreeBSD: src/sys/netinet/in_cksum.c,v 1.10 2007/10/07 20:44:22 silby Exp $"); - -#include -#include - -/* - * Checksum routine for Internet Protocol family headers (Portable Version). - * - * This routine is very heavily used in the network - * code and should be modified for each CPU to be as fast as possible. - */ - -#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x) -#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);} - -int -in_cksum(struct mbuf *m, int len) -{ - register u_short *w; - register int sum = 0; - register int mlen = 0; - int byte_swapped = 0; - - union { - char c[2]; - u_short s; - } s_util; - union { - u_short s[2]; - long l; - } l_util; - - for (;m && len; m = m->m_next) { - if (m->m_len == 0) - continue; - w = mtod(m, u_short *); - if (mlen == -1) { - /* - * The first byte of this mbuf is the continuation - * of a word spanning between this mbuf and the - * last mbuf. - * - * s_util.c[0] is already saved when scanning previous - * mbuf. - */ - s_util.c[1] = *(char *)w; - sum += s_util.s; - w = (u_short *)((char *)w + 1); - mlen = m->m_len - 1; - len--; - } else - mlen = m->m_len; - if (len < mlen) - mlen = len; - len -= mlen; - /* - * Force to even boundary. - */ -#if defined(CONFIG_X86_64) - if ((1 & (long) w) && (mlen > 0)) { -#else - if ((1 & (int) w) && (mlen > 0)) { -#endif - REDUCE; - sum <<= 8; - s_util.c[0] = *(u_char *)w; - w = (u_short *)((char *)w + 1); - mlen--; - byte_swapped = 1; - } - /* - * Unroll the loop to make overhead from - * branches &c small. - */ - while ((mlen -= 32) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7]; - sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11]; - sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15]; - w += 16; - } - mlen += 32; - while ((mlen -= 8) >= 0) { - sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3]; - w += 4; - } - mlen += 8; - if (mlen == 0 && byte_swapped == 0) - continue; - REDUCE; - while ((mlen -= 2) >= 0) { - sum += *w++; - } - if (byte_swapped) { - REDUCE; - sum <<= 8; - byte_swapped = 0; - if (mlen == -1) { - s_util.c[1] = *(char *)w; - sum += s_util.s; - mlen = 0; - } else - mlen = -1; - } else if (mlen == -1) - s_util.c[0] = *(char *)w; - } - if (len) - printf("cksum: out of data\n"); - if (mlen == -1) { - /* The last mbuf has odd # of bytes. Follow the - standard (the odd byte may be shifted left by 8 bits - or not as determined by endian-ness of the machine) */ - s_util.c[1] = 0; - sum += s_util.s; - } - REDUCE; - return (~sum & 0xffff); -} diff --git a/dummynet/include/netgraph/ng_ipfw.h b/dummynet/include/netgraph/ng_ipfw.h deleted file mode 100644 index 9b28374..0000000 --- a/dummynet/include/netgraph/ng_ipfw.h +++ /dev/null @@ -1,56 +0,0 @@ -/*- - * Copyright 2005, Gleb Smirnoff - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/netgraph/ng_ipfw.h,v 1.2 2006/02/17 09:42:49 glebius Exp $ - */ -#ifndef __NG_IPFW_H -#define __NG_IPFW_H - -#define NG_IPFW_NODE_TYPE "ipfw" -#define NGM_IPFW_COOKIE 1105988990 - -#ifdef _KERNEL - -struct mbuf; -struct ip_fw_args; -typedef int ng_ipfw_input_t(struct mbuf **, int, struct ip_fw_args *, int); -extern ng_ipfw_input_t *ng_ipfw_input_p; -#define NG_IPFW_LOADED (ng_ipfw_input_p != NULL) - -struct ng_ipfw_tag { - struct m_tag mt; /* tag header */ - struct ip_fw *rule; /* matching rule */ - uint32_t rule_id; /* matching rule id */ - uint32_t chain_id; /* ruleset id */ - struct ifnet *ifp; /* interface, for ip_output */ - int dir; -#define NG_IPFW_OUT 0 -#define NG_IPFW_IN 1 -}; - -#define TAGSIZ (sizeof(struct ng_ipfw_tag) - sizeof(struct m_tag)) - -#endif /* _KERNEL */ -#endif /* __NG_IPFW_H */ diff --git a/dummynet/include/netinet/ip_divert.h b/dummynet/include/netinet/ip_divert.h deleted file mode 100644 index 4bb6e42..0000000 --- a/dummynet/include/netinet/ip_divert.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _IP_DIVERT_H -#define _IP_DIVERT_H - -struct mbuf; -typedef void ip_divert_packet_t(struct mbuf *, int); - -extern ip_divert_packet_t *ip_divert_ptr; - -struct divert_tag { - u_int32_t info; /* port & flags */ - u_int16_t cookie; /* ipfw rule number */ -}; - -#endif /* !_IP_DIVERT_H */ diff --git a/dummynet/include/netinet/ip_dummynet.h b/dummynet/include/netinet/ip_dummynet.h deleted file mode 100644 index 7391719..0000000 --- a/dummynet/include/netinet/ip_dummynet.h +++ /dev/null @@ -1,398 +0,0 @@ -/*- - * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa - * Portions Copyright (c) 2000 Akamba Corp. - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/netinet/ip_dummynet.h,v 1.40.2.1 2008/04/25 10:26:30 oleg Exp $ - */ - -#ifndef _IP_DUMMYNET_H -#define _IP_DUMMYNET_H - -/* - * Definition of dummynet data structures. In the structures, I decided - * not to use the macros in in the hope of making the code - * easier to port to other architectures. The type of lists and queue we - * use here is pretty simple anyways. - */ - -/* - * We start with a heap, which is used in the scheduler to decide when - * to transmit packets etc. - * - * The key for the heap is used for two different values: - * - * 1. timer ticks- max 10K/second, so 32 bits are enough; - * - * 2. virtual times. These increase in steps of len/x, where len is the - * packet length, and x is either the weight of the flow, or the - * sum of all weights. - * If we limit to max 1000 flows and a max weight of 100, then - * x needs 17 bits. The packet size is 16 bits, so we can easily - * overflow if we do not allow errors. - * So we use a key "dn_key" which is 64 bits. Some macros are used to - * compare key values and handle wraparounds. - * MAX64 returns the largest of two key values. - * MY_M is used as a shift count when doing fixed point arithmetic - * (a better name would be useful...). - */ -typedef u_int64_t dn_key ; /* sorting key */ -#define DN_KEY_LT(a,b) ((int64_t)((a)-(b)) < 0) -#define DN_KEY_LEQ(a,b) ((int64_t)((a)-(b)) <= 0) -#define DN_KEY_GT(a,b) ((int64_t)((a)-(b)) > 0) -#define DN_KEY_GEQ(a,b) ((int64_t)((a)-(b)) >= 0) -#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) -#define MY_M 16 /* number of left shift to obtain a larger precision */ - -/* - * XXX With this scaling, max 1000 flows, max weight 100, 1Gbit/s, the - * virtual time wraps every 15 days. - */ - - -/* - * The maximum hash table size for queues. This value must be a power - * of 2. - */ -#define DN_MAX_HASH_SIZE 65536 - -/* - * A heap entry is made of a key and a pointer to the actual - * object stored in the heap. - * The heap is an array of dn_heap_entry entries, dynamically allocated. - * Current size is "size", with "elements" actually in use. - * The heap normally supports only ordered insert and extract from the top. - * If we want to extract an object from the middle of the heap, we - * have to know where the object itself is located in the heap (or we - * need to scan the whole array). To this purpose, an object has a - * field (int) which contains the index of the object itself into the - * heap. When the object is moved, the field must also be updated. - * The offset of the index in the object is stored in the 'offset' - * field in the heap descriptor. The assumption is that this offset - * is non-zero if we want to support extract from the middle. - */ -struct dn_heap_entry { - dn_key key ; /* sorting key. Topmost element is smallest one */ - void *object ; /* object pointer */ -} ; - -struct dn_heap { - int size ; - int elements ; - int offset ; /* XXX if > 0 this is the offset of direct ptr to obj */ - struct dn_heap_entry *p ; /* really an array of "size" entries */ -} ; - -#ifdef _KERNEL -/* - * Packets processed by dummynet have an mbuf tag associated with - * them that carries their dummynet state. This is used within - * the dummynet code as well as outside when checking for special - * processing requirements. - */ -struct dn_pkt_tag { - struct ip_fw *rule; /* matching rule */ - uint32_t rule_id; /* matching rule id */ - uint32_t chain_id; /* ruleset id */ - int dn_dir; /* action when packet comes out. */ -#define DN_TO_IP_OUT 1 -#define DN_TO_IP_IN 2 -/* Obsolete: #define DN_TO_BDG_FWD 3 */ -#define DN_TO_ETH_DEMUX 4 -#define DN_TO_ETH_OUT 5 -#define DN_TO_IP6_IN 6 -#define DN_TO_IP6_OUT 7 -#define DN_TO_IFB_FWD 8 - - dn_key output_time; /* when the pkt is due for delivery */ - struct ifnet *ifp; /* interface, for ip_output */ - struct _ip6dn_args ip6opt; /* XXX ipv6 options */ -}; -#endif /* _KERNEL */ - -/* - * Overall structure of dummynet (with WF2Q+): - -In dummynet, packets are selected with the firewall rules, and passed -to two different objects: PIPE or QUEUE. - -A QUEUE is just a queue with configurable size and queue management -policy. It is also associated with a mask (to discriminate among -different flows), a weight (used to give different shares of the -bandwidth to different flows) and a "pipe", which essentially -supplies the transmit clock for all queues associated with that -pipe. - -A PIPE emulates a fixed-bandwidth link, whose bandwidth is -configurable. The "clock" for a pipe can come from either an -internal timer, or from the transmit interrupt of an interface. -A pipe is also associated with one (or more, if masks are used) -queue, where all packets for that pipe are stored. - -The bandwidth available on the pipe is shared by the queues -associated with that pipe (only one in case the packet is sent -to a PIPE) according to the WF2Q+ scheduling algorithm and the -configured weights. - -In general, incoming packets are stored in the appropriate queue, -which is then placed into one of a few heaps managed by a scheduler -to decide when the packet should be extracted. -The scheduler (a function called dummynet()) is run at every timer -tick, and grabs queues from the head of the heaps when they are -ready for processing. - -There are three data structures definining a pipe and associated queues: - - + dn_pipe, which contains the main configuration parameters related - to delay and bandwidth; - + dn_flow_set, which contains WF2Q+ configuration, flow - masks, plr and RED configuration; - + dn_flow_queue, which is the per-flow queue (containing the packets) - -Multiple dn_flow_set can be linked to the same pipe, and multiple -dn_flow_queue can be linked to the same dn_flow_set. -All data structures are linked in a linear list which is used for -housekeeping purposes. - -During configuration, we create and initialize the dn_flow_set -and dn_pipe structures (a dn_pipe also contains a dn_flow_set). - -At runtime: packets are sent to the appropriate dn_flow_set (either -WFQ ones, or the one embedded in the dn_pipe for fixed-rate flows), -which in turn dispatches them to the appropriate dn_flow_queue -(created dynamically according to the masks). - -The transmit clock for fixed rate flows (ready_event()) selects the -dn_flow_queue to be used to transmit the next packet. For WF2Q, -wfq_ready_event() extract a pipe which in turn selects the right -flow using a number of heaps defined into the pipe itself. - - * - */ - -/* - * per flow queue. This contains the flow identifier, the queue - * of packets, counters, and parameters used to support both RED and - * WF2Q+. - * - * A dn_flow_queue is created and initialized whenever a packet for - * a new flow arrives. - */ -struct dn_flow_queue { - struct dn_flow_queue *next ; - struct ipfw_flow_id id ; - - struct mbuf *head, *tail ; /* queue of packets */ - u_int len ; - u_int len_bytes ; - - /* - * When we emulate MAC overheads, or channel unavailability due - * to other traffic on a shared medium, we augment the packet at - * the head of the queue with an 'extra_bits' field representsing - * the additional delay the packet will be subject to: - * extra_bits = bw*unavailable_time. - * With large bandwidth and large delays, extra_bits (and also numbytes) - * can become very large, so better play safe and use 64 bit - */ - uint64_t numbytes ; /* credit for transmission (dynamic queues) */ - int64_t extra_bits; /* extra bits simulating unavailable channel */ - - u_int64_t tot_pkts ; /* statistics counters */ - u_int64_t tot_bytes ; - u_int32_t drops ; - - int hash_slot ; /* debugging/diagnostic */ - - /* RED parameters */ - int avg ; /* average queue length est. (scaled) */ - int count ; /* arrivals since last RED drop */ - int random ; /* random value (scaled) */ - dn_key idle_time; /* start of queue idle time */ - - /* WF2Q+ support */ - struct dn_flow_set *fs ; /* parent flow set */ - int heap_pos ; /* position (index) of struct in heap */ - dn_key sched_time ; /* current time when queue enters ready_heap */ - - dn_key S,F ; /* start time, finish time */ - /* - * Setting F < S means the timestamp is invalid. We only need - * to test this when the queue is empty. - */ -} ; - -/* - * flow_set descriptor. Contains the "template" parameters for the - * queue configuration, and pointers to the hash table of dn_flow_queue's. - * - * The hash table is an array of lists -- we identify the slot by - * hashing the flow-id, then scan the list looking for a match. - * The size of the hash table (buckets) is configurable on a per-queue - * basis. - * - * A dn_flow_set is created whenever a new queue or pipe is created (in the - * latter case, the structure is located inside the struct dn_pipe). - */ -struct dn_flow_set { - SLIST_ENTRY(dn_flow_set) next; /* linked list in a hash slot */ - - u_short fs_nr ; /* flow_set number */ - u_short flags_fs; -#define DN_HAVE_FLOW_MASK 0x0001 -#define DN_IS_RED 0x0002 -#define DN_IS_GENTLE_RED 0x0004 -#define DN_QSIZE_IS_BYTES 0x0008 /* queue size is measured in bytes */ -#define DN_NOERROR 0x0010 /* do not report ENOBUFS on drops */ -#define DN_HAS_PROFILE 0x0020 /* the pipe has a delay profile. */ -#define DN_IS_PIPE 0x4000 -#define DN_IS_QUEUE 0x8000 - - struct dn_pipe *pipe ; /* pointer to parent pipe */ - u_short parent_nr ; /* parent pipe#, 0 if local to a pipe */ - - int weight ; /* WFQ queue weight */ - int qsize ; /* queue size in slots or bytes */ - int plr ; /* pkt loss rate (2^31-1 means 100%) */ - - struct ipfw_flow_id flow_mask ; - - /* hash table of queues onto this flow_set */ - int rq_size ; /* number of slots */ - int rq_elements ; /* active elements */ - struct dn_flow_queue **rq; /* array of rq_size entries */ - - u_int32_t last_expired ; /* do not expire too frequently */ - int backlogged ; /* #active queues for this flowset */ - - /* RED parameters */ -#define SCALE_RED 16 -#define SCALE(x) ( (x) << SCALE_RED ) -#define SCALE_VAL(x) ( (x) >> SCALE_RED ) -#define SCALE_MUL(x,y) ( ( (x) * (y) ) >> SCALE_RED ) - int w_q ; /* queue weight (scaled) */ - int max_th ; /* maximum threshold for queue (scaled) */ - int min_th ; /* minimum threshold for queue (scaled) */ - int max_p ; /* maximum value for p_b (scaled) */ - u_int c_1 ; /* max_p/(max_th-min_th) (scaled) */ - u_int c_2 ; /* max_p*min_th/(max_th-min_th) (scaled) */ - u_int c_3 ; /* for GRED, (1-max_p)/max_th (scaled) */ - u_int c_4 ; /* for GRED, 1 - 2*max_p (scaled) */ - u_int * w_q_lookup ; /* lookup table for computing (1-w_q)^t */ - u_int lookup_depth ; /* depth of lookup table */ - int lookup_step ; /* granularity inside the lookup table */ - int lookup_weight ; /* equal to (1-w_q)^t / (1-w_q)^(t+1) */ - int avg_pkt_size ; /* medium packet size */ - int max_pkt_size ; /* max packet size */ -}; -SLIST_HEAD(dn_flow_set_head, dn_flow_set); - -/* - * Pipe descriptor. Contains global parameters, delay-line queue, - * and the flow_set used for fixed-rate queues. - * - * For WF2Q+ support it also has 3 heaps holding dn_flow_queue: - * not_eligible_heap, for queues whose start time is higher - * than the virtual time. Sorted by start time. - * scheduler_heap, for queues eligible for scheduling. Sorted by - * finish time. - * idle_heap, all flows that are idle and can be removed. We - * do that on each tick so we do not slow down too much - * operations during forwarding. - * - */ -struct dn_pipe { /* a pipe */ - SLIST_ENTRY(dn_pipe) next; /* linked list in a hash slot */ - - int pipe_nr ; /* number */ - int bandwidth; /* really, bytes/tick. */ - int delay ; /* really, ticks */ - - struct mbuf *head, *tail ; /* packets in delay line */ - - /* WF2Q+ */ - struct dn_heap scheduler_heap ; /* top extract - key Finish time*/ - struct dn_heap not_eligible_heap; /* top extract- key Start time */ - struct dn_heap idle_heap ; /* random extract - key Start=Finish time */ - - dn_key V ; /* virtual time */ - int sum; /* sum of weights of all active sessions */ - - /* Same as in dn_flow_queue, numbytes can become large */ - int64_t numbytes; /* bits I can transmit (more or less). */ - uint64_t burst; /* burst size, scaled: bits * hz */ - - dn_key sched_time ; /* time pipe was scheduled in ready_heap */ - dn_key idle_time; /* start of pipe idle time */ - - /* - * When the tx clock come from an interface (if_name[0] != '\0'), its name - * is stored below, whereas the ifp is filled when the rule is configured. - */ - char if_name[IFNAMSIZ]; - struct ifnet *ifp ; - int ready ; /* set if ifp != NULL and we got a signal from it */ - - struct dn_flow_set fs ; /* used with fixed-rate flows */ - - /* fields to simulate a delay profile */ - -#define ED_MAX_NAME_LEN 32 - char name[ED_MAX_NAME_LEN]; - int loss_level; - int samples_no; - int *samples; -}; - -/* dn_pipe_max is used to pass pipe configuration from userland onto - * kernel space and back - */ -#define ED_MAX_SAMPLES_NO 1024 -struct dn_pipe_max { - struct dn_pipe pipe; - int samples[ED_MAX_SAMPLES_NO]; -}; - -SLIST_HEAD(dn_pipe_head, dn_pipe); - -#ifdef _KERNEL -typedef void ip_dn_ruledel_t(void *); /* ip_fw.c */ -extern ip_dn_ruledel_t *ip_dn_ruledel_ptr; - -/* - * Return the IPFW rule associated with the dummynet tag; if any. - * Make sure that the dummynet tag is not reused by lower layers. - */ -static __inline struct ip_fw * -ip_dn_claim_rule(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_find(m, PACKET_TAG_DUMMYNET, NULL); - if (mtag != NULL) { - mtag->m_tag_id = PACKET_TAG_NONE; - return (((struct dn_pkt_tag *)(mtag+1))->rule); - } else - return (NULL); -} -#endif -#endif /* _IP_DUMMYNET_H */ diff --git a/dummynet/include/netinet/ip_fw.h b/dummynet/include/netinet/ip_fw.h deleted file mode 100644 index 743d908..0000000 --- a/dummynet/include/netinet/ip_fw.h +++ /dev/null @@ -1,722 +0,0 @@ -/*- - * Copyright (c) 2002-2009 Luigi Rizzo, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD: src/sys/netinet/ip_fw.h,v 1.110.2.6 2008/10/14 08:03:58 rwatson Exp $ - */ - -#ifndef _IPFW2_H -#define _IPFW2_H - -/* - * The default rule number. By the design of ip_fw, the default rule - * is the last one, so its number can also serve as the highest number - * allowed for a rule. The ip_fw code relies on both meanings of this - * constant. - */ -#define IPFW_DEFAULT_RULE 65535 - -/* - * The number of ipfw tables. The maximum allowed table number is the - * (IPFW_TABLES_MAX - 1). - */ -#define IPFW_TABLES_MAX 128 - -/* - * Most commands (queue, pipe, tag, untag, limit...) can have a 16-bit - * argument between 1 and 65534. The value 0 is unused, the value - * 65535 (IP_FW_TABLEARG) is used to represent 'tablearg', i.e. the - * can be 1..65534, or 65535 to indicate the use of a 'tablearg' - * result of the most recent table() lookup. - * Note that 16bit is only a historical limit, resulting from - * the use of a 16-bit fields for that value. In reality, we can have - * 2^32 pipes, queues, tag values and so on, and use 0 as a tablearg. - */ -#define IPFW_ARG_MIN 1 -#define IPFW_ARG_MAX 65534 -#define IP_FW_TABLEARG 65535 /* XXX should use 0 */ - -/* - * The kernel representation of ipfw rules is made of a list of - * 'instructions' (for all practical purposes equivalent to BPF - * instructions), which specify which fields of the packet - * (or its metadata) should be analysed. - * - * Each instruction is stored in a structure which begins with - * "ipfw_insn", and can contain extra fields depending on the - * instruction type (listed below). - * Note that the code is written so that individual instructions - * have a size which is a multiple of 32 bits. This means that, if - * such structures contain pointers or other 64-bit entities, - * (there is just one instance now) they may end up unaligned on - * 64-bit architectures, so the must be handled with care. - * - * "enum ipfw_opcodes" are the opcodes supported. We can have up - * to 256 different opcodes. When adding new opcodes, they should - * be appended to the end of the opcode list before O_LAST_OPCODE, - * this will prevent the ABI from being broken, otherwise users - * will have to recompile ipfw(8) when they update the kernel. - */ - -enum ipfw_opcodes { /* arguments (4 byte each) */ - O_NOP, - - O_IP_SRC, /* u32 = IP */ - O_IP_SRC_MASK, /* ip = IP/mask */ - O_IP_SRC_ME, /* none */ - O_IP_SRC_SET, /* u32=base, arg1=len, bitmap */ - - O_IP_DST, /* u32 = IP */ - O_IP_DST_MASK, /* ip = IP/mask */ - O_IP_DST_ME, /* none */ - O_IP_DST_SET, /* u32=base, arg1=len, bitmap */ - - O_IP_SRCPORT, /* (n)port list:mask 4 byte ea */ - O_IP_DSTPORT, /* (n)port list:mask 4 byte ea */ - O_PROTO, /* arg1=protocol */ - - O_MACADDR2, /* 2 mac addr:mask */ - O_MAC_TYPE, /* same as srcport */ - - O_LAYER2, /* none */ - O_IN, /* none */ - O_FRAG, /* none */ - - O_RECV, /* none */ - O_XMIT, /* none */ - O_VIA, /* none */ - - O_IPOPT, /* arg1 = 2*u8 bitmap */ - O_IPLEN, /* arg1 = len */ - O_IPID, /* arg1 = id */ - - O_IPTOS, /* arg1 = id */ - O_IPPRECEDENCE, /* arg1 = precedence << 5 */ - O_IPTTL, /* arg1 = TTL */ - - O_IPVER, /* arg1 = version */ - O_UID, /* u32 = id */ - O_GID, /* u32 = id */ - O_ESTAB, /* none (tcp established) */ - O_TCPFLAGS, /* arg1 = 2*u8 bitmap */ - O_TCPWIN, /* arg1 = desired win */ - O_TCPSEQ, /* u32 = desired seq. */ - O_TCPACK, /* u32 = desired seq. */ - O_ICMPTYPE, /* u32 = icmp bitmap */ - O_TCPOPTS, /* arg1 = 2*u8 bitmap */ - - O_VERREVPATH, /* none */ - O_VERSRCREACH, /* none */ - - O_PROBE_STATE, /* none */ - O_KEEP_STATE, /* none */ - O_LIMIT, /* ipfw_insn_limit */ - O_LIMIT_PARENT, /* dyn_type, not an opcode. */ - - /* - * These are really 'actions'. - */ - - O_LOG, /* ipfw_insn_log */ - O_PROB, /* u32 = match probability */ - - O_CHECK_STATE, /* none */ - O_ACCEPT, /* none */ - O_DENY, /* none */ - O_REJECT, /* arg1=icmp arg (same as deny) */ - O_COUNT, /* none */ - O_SKIPTO, /* arg1=next rule number */ - O_PIPE, /* arg1=pipe number */ - O_QUEUE, /* arg1=queue number */ - O_DIVERT, /* arg1=port number */ - O_TEE, /* arg1=port number */ - O_FORWARD_IP, /* fwd sockaddr */ - O_FORWARD_MAC, /* fwd mac */ - O_NAT, /* nope */ - O_REASS, /* none */ - - /* - * More opcodes. - */ - O_IPSEC, /* has ipsec history */ - O_IP_SRC_LOOKUP, /* arg1=table number, u32=value */ - O_IP_DST_LOOKUP, /* arg1=table number, u32=value */ - O_ANTISPOOF, /* none */ - O_JAIL, /* u32 = id */ - O_ALTQ, /* u32 = altq classif. qid */ - O_DIVERTED, /* arg1=bitmap (1:loop, 2:out) */ - O_TCPDATALEN, /* arg1 = tcp data len */ - O_IP6_SRC, /* address without mask */ - O_IP6_SRC_ME, /* my addresses */ - O_IP6_SRC_MASK, /* address with the mask */ - O_IP6_DST, - O_IP6_DST_ME, - O_IP6_DST_MASK, - O_FLOW6ID, /* for flow id tag in the ipv6 pkt */ - O_ICMP6TYPE, /* icmp6 packet type filtering */ - O_EXT_HDR, /* filtering for ipv6 extension header */ - O_IP6, - - /* - * actions for ng_ipfw - */ - O_NETGRAPH, /* send to ng_ipfw */ - O_NGTEE, /* copy to ng_ipfw */ - - O_IP4, - - O_UNREACH6, /* arg1=icmpv6 code arg (deny) */ - - O_TAG, /* arg1=tag number */ - O_TAGGED, /* arg1=tag number */ - - O_SETFIB, /* arg1=FIB number */ - O_FIB, /* arg1=FIB desired fib number */ - - O_LAST_OPCODE /* not an opcode! */ -}; - -/* - * The extension header are filtered only for presence using a bit - * vector with a flag for each header. - */ -#define EXT_FRAGMENT 0x1 -#define EXT_HOPOPTS 0x2 -#define EXT_ROUTING 0x4 -#define EXT_AH 0x8 -#define EXT_ESP 0x10 -#define EXT_DSTOPTS 0x20 -#define EXT_RTHDR0 0x40 -#define EXT_RTHDR2 0x80 - -/* - * Template for instructions. - * - * ipfw_insn is used for all instructions which require no operands, - * a single 16-bit value (arg1), or a couple of 8-bit values. - * - * For other instructions which require different/larger arguments - * we have derived structures, ipfw_insn_*. - * - * The size of the instruction (in 32-bit words) is in the low - * 6 bits of "len". The 2 remaining bits are used to implement - * NOT and OR on individual instructions. Given a type, you can - * compute the length to be put in "len" using F_INSN_SIZE(t) - * - * F_NOT negates the match result of the instruction. - * - * F_OR is used to build or blocks. By default, instructions - * are evaluated as part of a logical AND. An "or" block - * { X or Y or Z } contains F_OR set in all but the last - * instruction of the block. A match will cause the code - * to skip past the last instruction of the block. - * - * NOTA BENE: in a couple of places we assume that - * sizeof(ipfw_insn) == sizeof(u_int32_t) - * this needs to be fixed. - * - */ -typedef struct _ipfw_insn { /* template for instructions */ - u_int8_t opcode; - u_int8_t len; /* number of 32-bit words */ -#define F_NOT 0x80 -#define F_OR 0x40 -#define F_LEN_MASK 0x3f -#define F_LEN(cmd) ((cmd)->len & F_LEN_MASK) - - u_int16_t arg1; -} ipfw_insn; - -/* - * The F_INSN_SIZE(type) computes the size, in 4-byte words, of - * a given type. - */ -#define F_INSN_SIZE(t) ((sizeof (t))/sizeof(u_int32_t)) - -/* - * This is used to store an array of 16-bit entries (ports etc.) - */ -typedef struct _ipfw_insn_u16 { - ipfw_insn o; - u_int16_t ports[2]; /* there may be more */ -} ipfw_insn_u16; - -/* - * This is used to store an array of 32-bit entries - * (uid, single IPv4 addresses etc.) - */ -typedef struct _ipfw_insn_u32 { - ipfw_insn o; - u_int32_t d[1]; /* one or more */ -} ipfw_insn_u32; - -/* - * This is used to store IP addr-mask pairs. - */ -typedef struct _ipfw_insn_ip { - ipfw_insn o; - struct in_addr addr; - struct in_addr mask; -} ipfw_insn_ip; - -/* - * This is used to forward to a given address (ip). - */ -typedef struct _ipfw_insn_sa { - ipfw_insn o; - struct sockaddr_in sa; -} ipfw_insn_sa; - -/* - * This is used for MAC addr-mask pairs. - */ -typedef struct _ipfw_insn_mac { - ipfw_insn o; - u_char addr[12]; /* dst[6] + src[6] */ - u_char mask[12]; /* dst[6] + src[6] */ -} ipfw_insn_mac; - -/* - * This is used for interface match rules (recv xx, xmit xx). - */ -typedef struct _ipfw_insn_if { - ipfw_insn o; - union { - struct in_addr ip; - int glob; - } p; - char name[IFNAMSIZ]; -} ipfw_insn_if; - -/* - * This is used for storing an altq queue id number. - */ -typedef struct _ipfw_insn_altq { - ipfw_insn o; - u_int32_t qid; -} ipfw_insn_altq; - -/* - * This is used for limit rules. - */ -typedef struct _ipfw_insn_limit { - ipfw_insn o; - u_int8_t _pad; - u_int8_t limit_mask; /* combination of DYN_* below */ -#define DYN_SRC_ADDR 0x1 -#define DYN_SRC_PORT 0x2 -#define DYN_DST_ADDR 0x4 -#define DYN_DST_PORT 0x8 - - u_int16_t conn_limit; -} ipfw_insn_limit; - -/* - * This is used for log instructions. - */ -typedef struct _ipfw_insn_log { - ipfw_insn o; - u_int32_t max_log; /* how many do we log -- 0 = all */ - u_int32_t log_left; /* how many left to log */ -} ipfw_insn_log; - -/* - * Data structures required by both ipfw(8) and ipfw(4) but not part of the - * management API are protected by IPFW_INTERNAL. - */ -#ifdef IPFW_INTERNAL -/* Server pool support (LSNAT). */ -struct cfg_spool { - LIST_ENTRY(cfg_spool) _next; /* chain of spool instances */ - struct in_addr addr; - u_short port; -}; -#endif - -/* Redirect modes id. */ -#define REDIR_ADDR 0x01 -#define REDIR_PORT 0x02 -#define REDIR_PROTO 0x04 - -#ifdef IPFW_INTERNAL -/* Nat redirect configuration. */ -struct cfg_redir { - LIST_ENTRY(cfg_redir) _next; /* chain of redir instances */ - u_int16_t mode; /* type of redirect mode */ - struct in_addr laddr; /* local ip address */ - struct in_addr paddr; /* public ip address */ - struct in_addr raddr; /* remote ip address */ - u_short lport; /* local port */ - u_short pport; /* public port */ - u_short rport; /* remote port */ - u_short pport_cnt; /* number of public ports */ - u_short rport_cnt; /* number of remote ports */ - int proto; /* protocol: tcp/udp */ - struct alias_link **alink; - /* num of entry in spool chain */ - u_int16_t spool_cnt; - /* chain of spool instances */ - LIST_HEAD(spool_chain, cfg_spool) spool_chain; -}; -#endif - -#define NAT_BUF_LEN 1024 - -#ifdef IPFW_INTERNAL -/* Nat configuration data struct. */ -struct cfg_nat { - /* chain of nat instances */ - LIST_ENTRY(cfg_nat) _next; - int id; /* nat id */ - struct in_addr ip; /* nat ip address */ - char if_name[IF_NAMESIZE]; /* interface name */ - int mode; /* aliasing mode */ - struct libalias *lib; /* libalias instance */ - /* number of entry in spool chain */ - int redir_cnt; - /* chain of redir instances */ - LIST_HEAD(redir_chain, cfg_redir) redir_chain; -}; -#endif - -#define SOF_NAT sizeof(struct cfg_nat) -#define SOF_REDIR sizeof(struct cfg_redir) -#define SOF_SPOOL sizeof(struct cfg_spool) - -/* Nat command. */ -typedef struct _ipfw_insn_nat { - ipfw_insn o; - struct cfg_nat *nat; -} ipfw_insn_nat; - -/* Apply ipv6 mask on ipv6 addr */ -#define APPLY_MASK(addr,mask) \ - (addr)->__u6_addr.__u6_addr32[0] &= (mask)->__u6_addr.__u6_addr32[0]; \ - (addr)->__u6_addr.__u6_addr32[1] &= (mask)->__u6_addr.__u6_addr32[1]; \ - (addr)->__u6_addr.__u6_addr32[2] &= (mask)->__u6_addr.__u6_addr32[2]; \ - (addr)->__u6_addr.__u6_addr32[3] &= (mask)->__u6_addr.__u6_addr32[3]; - -/* Structure for ipv6 */ -typedef struct _ipfw_insn_ip6 { - ipfw_insn o; - struct in6_addr addr6; - struct in6_addr mask6; -} ipfw_insn_ip6; - -/* Used to support icmp6 types */ -typedef struct _ipfw_insn_icmp6 { - ipfw_insn o; - uint32_t d[7]; /* XXX This number si related to the netinet/icmp6.h - * define ICMP6_MAXTYPE - * as follows: n = ICMP6_MAXTYPE/32 + 1 - * Actually is 203 - */ -} ipfw_insn_icmp6; - -/* - * Here we have the structure representing an ipfw rule. - * - * It starts with a general area (with link fields and counters) - * followed by an array of one or more instructions, which the code - * accesses as an array of 32-bit values. - * - * Given a rule pointer r: - * - * r->cmd is the start of the first instruction. - * ACTION_PTR(r) is the start of the first action (things to do - * once a rule matched). - * - * When assembling instruction, remember the following: - * - * + if a rule has a "keep-state" (or "limit") option, then the - * first instruction (at r->cmd) MUST BE an O_PROBE_STATE - * + if a rule has a "log" option, then the first action - * (at ACTION_PTR(r)) MUST be O_LOG - * + if a rule has an "altq" option, it comes after "log" - * + if a rule has an O_TAG option, it comes after "log" and "altq" - * - * NOTE: we use a simple linked list of rules because we never need - * to delete a rule without scanning the list. We do not use - * queue(3) macros for portability and readability. - */ - -struct ip_fw { - struct ip_fw *next; /* linked list of rules */ - struct ip_fw *next_rule; /* ptr to next [skipto] rule */ - /* 'next_rule' is used to pass up 'set_disable' status */ - - uint16_t act_ofs; /* offset of action in 32-bit units */ - uint16_t cmd_len; /* # of 32-bit words in cmd */ - uint16_t rulenum; /* rule number */ - uint8_t set; /* rule set (0..31) */ -#define RESVD_SET 31 /* set for default and persistent rules */ - uint8_t _pad; /* padding */ - uint32_t id; /* rule id */ - - /* These fields are present in all rules. */ - uint64_t pcnt; /* Packet counter */ - uint64_t bcnt; /* Byte counter */ - uint32_t timestamp; /* tv_sec of last match */ - - ipfw_insn cmd[1]; /* storage for commands */ -}; - -#define ACTION_PTR(rule) \ - (ipfw_insn *)( (u_int32_t *)((rule)->cmd) + ((rule)->act_ofs) ) - -#define RULESIZE(rule) (sizeof(struct ip_fw) + \ - ((struct ip_fw *)(rule))->cmd_len * 4 - 4) - -/* - * This structure is used as a flow mask and a flow id for various - * parts of the code. - */ -struct ipfw_flow_id { - u_int32_t dst_ip; - u_int32_t src_ip; - u_int16_t dst_port; - u_int16_t src_port; - u_int8_t fib; - u_int8_t proto; - u_int8_t flags; /* protocol-specific flags */ - uint8_t addr_type; /* 4 = ipv4, 6 = ipv6, 1=ether ? */ - struct in6_addr dst_ip6; /* could also store MAC addr! */ - struct in6_addr src_ip6; - u_int32_t flow_id6; - u_int32_t frag_id6; -}; - -#define IS_IP6_FLOW_ID(id) ((id)->addr_type == 6) - -/* - * Dynamic ipfw rule. - */ -typedef struct _ipfw_dyn_rule ipfw_dyn_rule; - -struct _ipfw_dyn_rule { - ipfw_dyn_rule *next; /* linked list of rules. */ - struct ip_fw *rule; /* pointer to rule */ - /* 'rule' is used to pass up the rule number (from the parent) */ - - ipfw_dyn_rule *parent; /* pointer to parent rule */ - u_int64_t pcnt; /* packet match counter */ - u_int64_t bcnt; /* byte match counter */ - struct ipfw_flow_id id; /* (masked) flow id */ - u_int32_t expire; /* expire time */ - u_int32_t bucket; /* which bucket in hash table */ - u_int32_t state; /* state of this rule (typically a - * combination of TCP flags) - */ - u_int32_t ack_fwd; /* most recent ACKs in forward */ - u_int32_t ack_rev; /* and reverse directions (used */ - /* to generate keepalives) */ - u_int16_t dyn_type; /* rule type */ - u_int16_t count; /* refcount */ -}; - -/* - * Definitions for IP option names. - */ -#define IP_FW_IPOPT_LSRR 0x01 -#define IP_FW_IPOPT_SSRR 0x02 -#define IP_FW_IPOPT_RR 0x04 -#define IP_FW_IPOPT_TS 0x08 - -/* - * Definitions for TCP option names. - */ -#define IP_FW_TCPOPT_MSS 0x01 -#define IP_FW_TCPOPT_WINDOW 0x02 -#define IP_FW_TCPOPT_SACK 0x04 -#define IP_FW_TCPOPT_TS 0x08 -#define IP_FW_TCPOPT_CC 0x10 - -#define ICMP_REJECT_RST 0x100 /* fake ICMP code (send a TCP RST) */ -#define ICMP6_UNREACH_RST 0x100 /* fake ICMPv6 code (send a TCP RST) */ - -/* - * These are used for lookup tables. - */ -typedef struct _ipfw_table_entry { - in_addr_t addr; /* network address */ - u_int32_t value; /* value */ - u_int16_t tbl; /* table number */ - u_int8_t masklen; /* mask length */ -} ipfw_table_entry; - -typedef struct _ipfw_table { - u_int32_t size; /* size of entries in bytes */ - u_int32_t cnt; /* # of entries */ - u_int16_t tbl; /* table number */ - ipfw_table_entry ent[0]; /* entries */ -} ipfw_table; - -/* - * Main firewall chains definitions and global var's definitions. - */ -#ifdef _KERNEL - -#define MTAG_IPFW 1148380143 /* IPFW-tagged cookie */ - -/* Return values from ipfw_chk() */ -enum { - IP_FW_PASS = 0, - IP_FW_DENY, - IP_FW_DIVERT, - IP_FW_TEE, - IP_FW_DUMMYNET, - IP_FW_NETGRAPH, - IP_FW_NGTEE, - IP_FW_NAT, - IP_FW_REASS, -}; - -/* flags for divert mtag */ -#define IP_FW_DIVERT_LOOPBACK_FLAG 0x00080000 -#define IP_FW_DIVERT_OUTPUT_FLAG 0x00100000 - -/* - * Structure for collecting parameters to dummynet for ip6_output forwarding - */ -struct _ip6dn_args { - struct ip6_pktopts *opt_or; - struct route_in6 ro_or; - int flags_or; - struct ip6_moptions *im6o_or; - struct ifnet *origifp_or; - struct ifnet *ifp_or; - struct sockaddr_in6 dst_or; - u_long mtu_or; - struct route_in6 ro_pmtu_or; -}; - -/* - * Arguments for calling ipfw_chk() and dummynet_io(). We put them - * all into a structure because this way it is easier and more - * efficient to pass variables around and extend the interface. - */ -struct ip_fw_args { - struct mbuf *m; /* the mbuf chain */ - struct ifnet *oif; /* output interface */ - struct sockaddr_in *next_hop; /* forward address */ - struct ip_fw *rule; /* matching rule */ - uint32_t rule_id; /* matching rule id */ - uint32_t chain_id; /* ruleset id */ - struct ether_header *eh; /* for bridged packets */ - - struct ipfw_flow_id f_id; /* grabbed from IP header */ - uint32_t cookie; /* a cookie depending on rule action */ - struct inpcb *inp; - - struct _ip6dn_args dummypar; /* dummynet->ip6_output */ - struct sockaddr_in hopstore; /* store here if cannot use a pointer */ -}; - -/* - * Function definitions. - */ - -/* Firewall hooks */ -struct sockopt; -struct dn_flow_set; - -int ipfw_check_in(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); -int ipfw_check_out(void *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); - -int ipfw_chk(struct ip_fw_args *); - -int ipfw_hook(void); -int ipfw6_hook(void); -int ipfw_unhook(void); -int ipfw6_unhook(void); -#ifdef NOTYET -void ipfw_nat_destroy(void); -#endif - -VNET_DECLARE(int, fw_one_pass); -VNET_DECLARE(int, fw_enable); -#define V_fw_one_pass VNET(fw_one_pass) -#define V_fw_enable VNET(fw_enable) - -#ifdef INET6 -VNET_DECLARE(int, fw6_enable); -#define V_fw6_enable VNET(fw6_enable) -#endif - -struct ip_fw_chain { - struct ip_fw *rules; /* list of rules */ - struct ip_fw *default_rule; - struct ip_fw *reap; /* list of rules to reap */ - LIST_HEAD(, cfg_nat) nat; /* list of nat entries */ - struct radix_node_head *tables[IPFW_TABLES_MAX]; -#if defined( __linux__ ) || defined( _WIN32 ) - spinlock_t rwmtx; -#else - struct rwlock rwmtx; -#endif /* !__linux__ */ - uint32_t id; /* ruleset id */ - /* - * To optimize jumps, we use a table with skipto_entries pointers - * (a power of 2, set with a sysctl depending on available memory). - * Entry i points to the first rule i*64k/n <= n < (i+1)*64k/n. - * On insert/delete we simply update the relevant entry - * with O(1) additional cost. Updates to the sysctl variable - * that controls the table are managed at the next add/delete. - */ - int skipto_shift; /* shifts to compute the index in skipto-ptrs */ - int skipto_size; /* number of entries in the table */ - struct ip_fw **skipto_ptrs; -#ifdef IPFW_HASHTABLES - struct ipfw_ht *hashtab[IPFW_TABLES_MAX]; -#endif -}; - -#ifdef IPFW_INTERNAL - -#define IPFW_LOCK_INIT(_chain) \ - rw_init(&(_chain)->rwmtx, "IPFW static rules") -#define IPFW_LOCK_DESTROY(_chain) rw_destroy(&(_chain)->rwmtx) -#define IPFW_WLOCK_ASSERT(_chain) rw_assert(&(_chain)->rwmtx, RA_WLOCKED) - -#define IPFW_RLOCK(p) rw_rlock(&(p)->rwmtx) -#define IPFW_RUNLOCK(p) rw_runlock(&(p)->rwmtx) -#define IPFW_WLOCK(p) rw_wlock(&(p)->rwmtx) -#define IPFW_WUNLOCK(p) rw_wunlock(&(p)->rwmtx) - -#define LOOKUP_NAT(l, i, p) do { \ - LIST_FOREACH((p), &(l.nat), _next) { \ - if ((p)->id == (i)) { \ - break; \ - } \ - } \ - } while (0) - -typedef int ipfw_nat_t(struct ip_fw_args *, struct cfg_nat *, struct mbuf *); -typedef int ipfw_nat_cfg_t(struct sockopt *); -#endif - -VNET_DECLARE(struct ip_fw_chain, layer3_chain); -#define V_layer3_chain VNET(layer3_chain) - -#endif /* _KERNEL */ -#endif /* _IPFW2_H */ diff --git a/dummynet/ip_dummynet.c b/dummynet/ip_dummynet.c deleted file mode 100644 index 9fd70e2..0000000 --- a/dummynet/ip_dummynet.c +++ /dev/null @@ -1,2378 +0,0 @@ -/*- - * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa - * Portions Copyright (c) 2000 Akamba Corp. - * All rights reserved - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.110.2.4 2008/10/31 12:58:12 oleg Exp $"); - -#define DUMMYNET_DEBUG - -#include "opt_inet6.h" - -/* - * This module implements IP dummynet, a bandwidth limiter/delay emulator - * used in conjunction with the ipfw package. - * Description of the data structures used is in ip_dummynet.h - * Here you mainly find the following blocks of code: - * + variable declarations; - * + heap management functions; - * + scheduler and dummynet functions; - * + configuration and initialization. - * - * NOTA BENE: critical sections are protected by the "dummynet lock". - * - * Most important Changes: - * - * 011004: KLDable - * 010124: Fixed WF2Q behaviour - * 010122: Fixed spl protection. - * 000601: WF2Q support - * 000106: large rewrite, use heaps to handle very many pipes. - * 980513: initial release - * - * include files marked with XXX are probably not needed - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ -#include -#include -#include /* ip_len, ip_off */ -#include -#include -#include /* ip_output(), IP_FORWARDING */ - -#include /* various ether_* routines */ - -#include /* for ip6_input, ip6_output prototypes */ -#include - -/* - * We keep a private variable for the simulation time, but we could - * probably use an existing one ("softticks" in sys/kern/kern_timeout.c) - */ -static dn_key curr_time = 0 ; /* current simulation time */ - -static int dn_hash_size = 64 ; /* default hash size */ - -/* statistics on number of queue searches and search steps */ -static long searches, search_steps ; -static int pipe_expire = 1 ; /* expire queue if empty */ -static int dn_max_ratio = 16 ; /* max queues/buckets ratio */ - -static long pipe_slot_limit = 100; /* Foot shooting limit for pipe queues. */ -static long pipe_byte_limit = 1024 * 1024; - -static int red_lookup_depth = 256; /* RED - default lookup table depth */ -static int red_avg_pkt_size = 512; /* RED - default medium packet size */ -static int red_max_pkt_size = 1500; /* RED - default max packet size */ - -static struct timeval prev_t, t; -static long tick_last; /* Last tick duration (usec). */ -static long tick_delta; /* Last vs standard tick diff (usec). */ -static long tick_delta_sum; /* Accumulated tick difference (usec).*/ -static long tick_adjustment; /* Tick adjustments done. */ -static long tick_lost; /* Lost(coalesced) ticks number. */ -/* Adjusted vs non-adjusted curr_time difference (ticks). */ -static long tick_diff; - -static int io_fast; -static unsigned long io_pkt; -static unsigned long io_pkt_fast; -static unsigned long io_pkt_drop; - -/* - * Three heaps contain queues and pipes that the scheduler handles: - * - * ready_heap contains all dn_flow_queue related to fixed-rate pipes. - * - * wfq_ready_heap contains the pipes associated with WF2Q flows - * - * extract_heap contains pipes associated with delay lines. - * - */ - -MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap"); - -static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ; - -static int heap_init(struct dn_heap *h, int size); -static int heap_insert (struct dn_heap *h, dn_key key1, void *p); -static void heap_extract(struct dn_heap *h, void *obj); -static void transmit_event(struct dn_pipe *pipe, struct mbuf **head, - struct mbuf **tail); -static void ready_event(struct dn_flow_queue *q, struct mbuf **head, - struct mbuf **tail); -static void ready_event_wfq(struct dn_pipe *p, struct mbuf **head, - struct mbuf **tail); - -#define HASHSIZE 16 -#define HASH(num) ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f) -static struct dn_pipe_head pipehash[HASHSIZE]; /* all pipes */ -static struct dn_flow_set_head flowsethash[HASHSIZE]; /* all flowsets */ - -static struct callout dn_timeout; - -extern void (*bridge_dn_p)(struct mbuf *, struct ifnet *); - -#ifdef SYSCTL_NODE -SYSCTL_DECL(_net_inet); -SYSCTL_DECL(_net_inet_ip); - -SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size, - CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size"); -#if 0 /* curr_time is 64 bit */ -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, curr_time, - CTLFLAG_RD, &curr_time, 0, "Current tick"); -#endif -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap, - CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap, - CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap"); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, searches, - CTLFLAG_RD, &searches, 0, "Number of queue searches"); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, search_steps, - CTLFLAG_RD, &search_steps, 0, "Number of queue search steps"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire, - CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len, - CTLFLAG_RW, &dn_max_ratio, 0, - "Max ratio between dynamic queues and buckets"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth, - CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size, - CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size"); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size, - CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size"); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta, - CTLFLAG_RD, &tick_delta, 0, "Last vs standard tick difference (usec)."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_delta_sum, - CTLFLAG_RD, &tick_delta_sum, 0, "Accumulated tick difference (usec)."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_adjustment, - CTLFLAG_RD, &tick_adjustment, 0, "Tick adjustments done."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_diff, - CTLFLAG_RD, &tick_diff, 0, - "Adjusted vs non-adjusted curr_time difference (ticks)."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, tick_lost, - CTLFLAG_RD, &tick_lost, 0, - "Number of ticks coalesced by dummynet taskqueue."); -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, io_fast, - CTLFLAG_RW, &io_fast, 0, "Enable fast dummynet io."); -SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt, - CTLFLAG_RD, &io_pkt, 0, - "Number of packets passed to dummynet."); -SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_fast, - CTLFLAG_RD, &io_pkt_fast, 0, - "Number of packets bypassed dummynet scheduler."); -SYSCTL_ULONG(_net_inet_ip_dummynet, OID_AUTO, io_pkt_drop, - CTLFLAG_RD, &io_pkt_drop, 0, - "Number of packets dropped by dummynet."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_slot_limit, - CTLFLAG_RW, &pipe_slot_limit, 0, "Upper limit in slots for pipe queue."); -SYSCTL_LONG(_net_inet_ip_dummynet, OID_AUTO, pipe_byte_limit, - CTLFLAG_RW, &pipe_byte_limit, 0, "Upper limit in bytes for pipe queue."); -#endif - -#ifdef DUMMYNET_DEBUG -int dummynet_debug = 0; -#ifdef SYSCTL_NODE -SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug, - 0, "control debugging printfs"); -#endif -#define DPRINTF(X) if (dummynet_debug) printf X -#else -#define DPRINTF(X) -#endif - -static struct task dn_task; -static struct taskqueue *dn_tq = NULL; -static void dummynet_task(void *, int); - -#if defined( __linux__ ) || defined( _WIN32 ) -static DEFINE_SPINLOCK(dummynet_mtx); -#else -static struct mtx dummynet_mtx; -#endif -#define DUMMYNET_LOCK_INIT() \ - mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF) -#define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx) -#define DUMMYNET_LOCK() mtx_lock(&dummynet_mtx) -#define DUMMYNET_UNLOCK() mtx_unlock(&dummynet_mtx) -#define DUMMYNET_LOCK_ASSERT() mtx_assert(&dummynet_mtx, MA_OWNED) - -static int config_pipe(struct dn_pipe *p); -static int ip_dn_ctl(struct sockopt *sopt); - -static void dummynet(void *); -static void dummynet_flush(void); -static void dummynet_send(struct mbuf *); -void dummynet_drain(void); -static int dummynet_io(struct mbuf **, int , struct ip_fw_args *); - -/* - * Flow queue is idle if: - * 1) it's empty for at least 1 tick - * 2) it has invalid timestamp (WF2Q case) - * 3) parent pipe has no 'exhausted' burst. - */ -#define QUEUE_IS_IDLE(q) ((q)->head == NULL && (q)->S == (q)->F + 1 && \ - curr_time > (q)->idle_time + 1 && \ - ((q)->numbytes + (curr_time - (q)->idle_time - 1) * \ - (q)->fs->pipe->bandwidth >= (q)->fs->pipe->burst)) - -/* - * Heap management functions. - * - * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2. - * Some macros help finding parent/children so we can optimize them. - * - * heap_init() is called to expand the heap when needed. - * Increment size in blocks of 16 entries. - * XXX failure to allocate a new element is a pretty bad failure - * as we basically stall a whole queue forever!! - * Returns 1 on error, 0 on success - */ -#define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 ) -#define HEAP_LEFT(x) ( 2*(x) + 1 ) -#define HEAP_IS_LEFT(x) ( (x) & 1 ) -#define HEAP_RIGHT(x) ( 2*(x) + 2 ) -#define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; } -#define HEAP_INCREMENT 15 - -static int -heap_init(struct dn_heap *h, int new_size) -{ - struct dn_heap_entry *p; - - if (h->size >= new_size ) { - printf("dummynet: %s, Bogus call, have %d want %d\n", __func__, - h->size, new_size); - return 0 ; - } - new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ; - p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT); - if (p == NULL) { - printf("dummynet: %s, resize %d failed\n", __func__, new_size ); - return 1 ; /* error */ - } - if (h->size > 0) { - bcopy(h->p, p, h->size * sizeof(*p) ); - free(h->p, M_DUMMYNET); - } - h->p = p ; - h->size = new_size ; - return 0 ; -} - -/* - * Insert element in heap. Normally, p != NULL, we insert p in - * a new position and bubble up. If p == NULL, then the element is - * already in place, and key is the position where to start the - * bubble-up. - * Returns 1 on failure (cannot allocate new heap entry) - * - * If offset > 0 the position (index, int) of the element in the heap is - * also stored in the element itself at the given offset in bytes. - */ -#define SET_OFFSET(heap, node) \ - if (heap->offset > 0) \ - *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ; -/* - * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value. - */ -#define RESET_OFFSET(heap, node) \ - if (heap->offset > 0) \ - *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ; -static int -heap_insert(struct dn_heap *h, dn_key key1, void *p) -{ - int son = h->elements ; - - if (p == NULL) /* data already there, set starting point */ - son = key1 ; - else { /* insert new element at the end, possibly resize */ - son = h->elements ; - if (son == h->size) /* need resize... */ - if (heap_init(h, h->elements+1) ) - return 1 ; /* failure... */ - h->p[son].object = p ; - h->p[son].key = key1 ; - h->elements++ ; - } - while (son > 0) { /* bubble up */ - int father = HEAP_FATHER(son) ; - struct dn_heap_entry tmp ; - - if (DN_KEY_LT( h->p[father].key, h->p[son].key ) ) - break ; /* found right position */ - /* son smaller than father, swap and repeat */ - HEAP_SWAP(h->p[son], h->p[father], tmp) ; - SET_OFFSET(h, son); - son = father ; - } - SET_OFFSET(h, son); - return 0 ; -} - -/* - * remove top element from heap, or obj if obj != NULL - */ -static void -heap_extract(struct dn_heap *h, void *obj) -{ - int child, father, max = h->elements - 1 ; - - if (max < 0) { - printf("dummynet: warning, extract from empty heap 0x%p\n", h); - return ; - } - father = 0 ; /* default: move up smallest child */ - if (obj != NULL) { /* extract specific element, index is at offset */ - if (h->offset <= 0) - panic("dummynet: heap_extract from middle not supported on this heap!!!\n"); - father = *((int *)((char *)obj + h->offset)) ; - if (father < 0 || father >= h->elements) { - printf("dummynet: heap_extract, father %d out of bound 0..%d\n", - father, h->elements); - panic("dummynet: heap_extract"); - } - } - RESET_OFFSET(h, father); - child = HEAP_LEFT(father) ; /* left child */ - while (child <= max) { /* valid entry */ - if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) ) - child = child+1 ; /* take right child, otherwise left */ - h->p[father] = h->p[child] ; - SET_OFFSET(h, father); - father = child ; - child = HEAP_LEFT(child) ; /* left child for next loop */ - } - h->elements-- ; - if (father != max) { - /* - * Fill hole with last entry and bubble up, reusing the insert code - */ - h->p[father] = h->p[max] ; - heap_insert(h, father, NULL); /* this one cannot fail */ - } -} - -#if 0 -/* - * change object position and update references - * XXX this one is never used! - */ -static void -heap_move(struct dn_heap *h, dn_key new_key, void *object) -{ - int temp; - int i ; - int max = h->elements-1 ; - struct dn_heap_entry buf ; - - if (h->offset <= 0) - panic("cannot move items on this heap"); - - i = *((int *)((char *)object + h->offset)); - if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */ - h->p[i].key = new_key ; - for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ; - i = temp ) { /* bubble up */ - HEAP_SWAP(h->p[i], h->p[temp], buf) ; - SET_OFFSET(h, i); - } - } else { /* must move down */ - h->p[i].key = new_key ; - while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */ - if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key)) - temp++ ; /* select child with min key */ - if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */ - HEAP_SWAP(h->p[i], h->p[temp], buf) ; - SET_OFFSET(h, i); - } else - break ; - i = temp ; - } - } - SET_OFFSET(h, i); -} -#endif /* heap_move, unused */ - -/* - * heapify() will reorganize data inside an array to maintain the - * heap property. It is needed when we delete a bunch of entries. - */ -static void -heapify(struct dn_heap *h) -{ - int i ; - - for (i = 0 ; i < h->elements ; i++ ) - heap_insert(h, i , NULL) ; -} - -/* - * cleanup the heap and free data structure - */ -static void -heap_free(struct dn_heap *h) -{ - if (h->size >0 ) - free(h->p, M_DUMMYNET); - bzero(h, sizeof(*h) ); -} - -/* - * --- end of heap management functions --- - */ - -/* - * Dispose a packet in dummynet. Use an inline functions so if we - * need to free extra state associated to a packet, this is a - * central point to do it. - */ -static __inline void *dn_free_pkt(struct mbuf *m) -{ -#ifdef __linux__ - netisr_dispatch(-1, m); /* -1 drop the packet */ -#else - m_freem(m); -#endif - return NULL; -} - -static __inline void dn_free_pkts(struct mbuf *mnext) -{ - struct mbuf *m; - - while ((m = mnext) != NULL) { - mnext = m->m_nextpkt; - dn_free_pkt(m); - } -} - -/* - * Return the mbuf tag holding the dummynet state. As an optimization - * this is assumed to be the first tag on the list. If this turns out - * wrong we'll need to search the list. - */ -static struct dn_pkt_tag * -dn_tag_get(struct mbuf *m) -{ - struct m_tag *mtag = m_tag_first(m); - KASSERT(mtag != NULL && - mtag->m_tag_cookie == MTAG_ABI_COMPAT && - mtag->m_tag_id == PACKET_TAG_DUMMYNET, - ("packet on dummynet queue w/o dummynet tag!")); - return (struct dn_pkt_tag *)(mtag+1); -} - -/* - * Scheduler functions: - * - * transmit_event() is called when the delay-line needs to enter - * the scheduler, either because of existing pkts getting ready, - * or new packets entering the queue. The event handled is the delivery - * time of the packet. - * - * ready_event() does something similar with fixed-rate queues, and the - * event handled is the finish time of the head pkt. - * - * wfq_ready_event() does something similar with WF2Q queues, and the - * event handled is the start time of the head pkt. - * - * In all cases, we make sure that the data structures are consistent - * before passing pkts out, because this might trigger recursive - * invocations of the procedures. - */ -static void -transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail) -{ - struct mbuf *m; - struct dn_pkt_tag *pkt; - - DUMMYNET_LOCK_ASSERT(); - - while ((m = pipe->head) != NULL) { - pkt = dn_tag_get(m); - if (!DN_KEY_LEQ(pkt->output_time, curr_time)) - break; - - pipe->head = m->m_nextpkt; - if (*tail != NULL) - (*tail)->m_nextpkt = m; - else - *head = m; - *tail = m; - } - if (*tail != NULL) - (*tail)->m_nextpkt = NULL; - - /* If there are leftover packets, put into the heap for next event. */ - if ((m = pipe->head) != NULL) { - pkt = dn_tag_get(m); - /* - * XXX Should check errors on heap_insert, by draining the - * whole pipe p and hoping in the future we are more successful. - */ - heap_insert(&extract_heap, pkt->output_time, pipe); - } -} - -#ifndef __linux__ -#define div64(a, b) ((int64_t)(a) / (int64_t)(b)) -#endif -#define DN_TO_DROP 0xffff -/* - * Compute how many ticks we have to wait before being able to send - * a packet. This is computed as the "wire time" for the packet - * (length + extra bits), minus the credit available, scaled to ticks. - * Check that the result is not be negative (it could be if we have - * too much leftover credit in q->numbytes). - */ -static inline dn_key -set_ticks(struct mbuf *m, struct dn_flow_queue *q, struct dn_pipe *p) -{ - int64_t ret; - - ret = div64( (m->m_pkthdr.len * 8 + q->extra_bits) * hz - - q->numbytes + p->bandwidth - 1 , p->bandwidth); -#if 0 - printf("%s %d extra_bits %d numb %d ret %d\n", - __FUNCTION__, __LINE__, - (int)(q->extra_bits & 0xffffffff), - (int)(q->numbytes & 0xffffffff), - (int)(ret & 0xffffffff)); -#endif - if (ret < 0) - ret = 0; - return ret; -} - -/* - * Convert the additional MAC overheads/delays into an equivalent - * number of bits for the given data rate. The samples are in milliseconds - * so we need to divide by 1000. - */ -static dn_key -compute_extra_bits(struct mbuf *pkt, struct dn_pipe *p) -{ - int index; - dn_key extra_bits; - - if (!p->samples || p->samples_no == 0) - return 0; - index = random() % p->samples_no; - extra_bits = div64((dn_key)p->samples[index] * p->bandwidth, 1000); - if (index >= p->loss_level) { - struct dn_pkt_tag *dt = dn_tag_get(pkt); - if (dt) - dt->dn_dir = DN_TO_DROP; - } - return extra_bits; -} - -static void -free_pipe(struct dn_pipe *p) -{ - if (p->samples) - free(p->samples, M_DUMMYNET); - free(p, M_DUMMYNET); -} - -/* - * extract pkt from queue, compute output time (could be now) - * and put into delay line (p_queue) - */ -static void -move_pkt(struct mbuf *pkt, struct dn_flow_queue *q, struct dn_pipe *p, - int len) -{ - struct dn_pkt_tag *dt = dn_tag_get(pkt); - - q->head = pkt->m_nextpkt ; - q->len-- ; - q->len_bytes -= len ; - - dt->output_time = curr_time + p->delay ; - - if (p->head == NULL) - p->head = pkt; - else - p->tail->m_nextpkt = pkt; - p->tail = pkt; - p->tail->m_nextpkt = NULL; -} - -/* - * ready_event() is invoked every time the queue must enter the - * scheduler, either because the first packet arrives, or because - * a previously scheduled event fired. - * On invokation, drain as many pkts as possible (could be 0) and then - * if there are leftover packets reinsert the pkt in the scheduler. - */ -static void -ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail) -{ - struct mbuf *pkt; - struct dn_pipe *p = q->fs->pipe; - int p_was_empty; - - DUMMYNET_LOCK_ASSERT(); - - if (p == NULL) { - printf("dummynet: ready_event- pipe is gone\n"); - return; - } - p_was_empty = (p->head == NULL); - - /* - * Schedule fixed-rate queues linked to this pipe: - * account for the bw accumulated since last scheduling, then - * drain as many pkts as allowed by q->numbytes and move to - * the delay line (in p) computing output time. - * bandwidth==0 (no limit) means we can drain the whole queue, - * setting len_scaled = 0 does the job. - */ - q->numbytes += (curr_time - q->sched_time) * p->bandwidth; - while ((pkt = q->head) != NULL) { - int len = pkt->m_pkthdr.len; - dn_key len_scaled = p->bandwidth ? len*8*hz - + q->extra_bits*hz - : 0; - - if (DN_KEY_GT(len_scaled, q->numbytes)) - break; - q->numbytes -= len_scaled; - move_pkt(pkt, q, p, len); - if (q->head) - q->extra_bits = compute_extra_bits(q->head, p); - } - /* - * If we have more packets queued, schedule next ready event - * (can only occur when bandwidth != 0, otherwise we would have - * flushed the whole queue in the previous loop). - * To this purpose we record the current time and compute how many - * ticks to go for the finish time of the packet. - */ - if ((pkt = q->head) != NULL) { /* this implies bandwidth != 0 */ - dn_key t = set_ticks(pkt, q, p); /* ticks i have to wait */ - - q->sched_time = curr_time; - heap_insert(&ready_heap, curr_time + t, (void *)q); - /* - * XXX Should check errors on heap_insert, and drain the whole - * queue on error hoping next time we are luckier. - */ - } else /* RED needs to know when the queue becomes empty. */ - q->idle_time = curr_time; - - /* - * If the delay line was empty call transmit_event() now. - * Otherwise, the scheduler will take care of it. - */ - if (p_was_empty) - transmit_event(p, head, tail); -} - -/* - * Called when we can transmit packets on WF2Q queues. Take pkts out of - * the queues at their start time, and enqueue into the delay line. - * Packets are drained until p->numbytes < 0. As long as - * len_scaled >= p->numbytes, the packet goes into the delay line - * with a deadline p->delay. For the last packet, if p->numbytes < 0, - * there is an additional delay. - */ -static void -ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail) -{ - int p_was_empty = (p->head == NULL); - struct dn_heap *sch = &(p->scheduler_heap); - struct dn_heap *neh = &(p->not_eligible_heap); - int64_t p_numbytes = p->numbytes; - - /* - * p->numbytes is only 32bits in FBSD7, but we might need 64 bits. - * Use a local variable for the computations, and write back the - * results when done, saturating if needed. - * The local variable has no impact on performance and helps - * reducing diffs between the various branches. - */ - - DUMMYNET_LOCK_ASSERT(); - - if (p->if_name[0] == 0) /* tx clock is simulated */ - p_numbytes += (curr_time - p->sched_time) * p->bandwidth; - else { /* - * tx clock is for real, - * the ifq must be empty or this is a NOP. - * XXX not supported in Linux - */ - if (1) // p->ifp && p->ifp->if_snd.ifq_head != NULL) - return; - else { - DPRINTF(("dummynet: pipe %d ready from %s --\n", - p->pipe_nr, p->if_name)); - } - } - - /* - * While we have backlogged traffic AND credit, we need to do - * something on the queue. - */ - while (p_numbytes >= 0 && (sch->elements > 0 || neh->elements > 0)) { - if (sch->elements > 0) { - /* Have some eligible pkts to send out. */ - struct dn_flow_queue *q = sch->p[0].object; - struct mbuf *pkt = q->head; - struct dn_flow_set *fs = q->fs; - uint64_t len = pkt->m_pkthdr.len; - int len_scaled = p->bandwidth ? len * 8 * hz : 0; - - heap_extract(sch, NULL); /* Remove queue from heap. */ - p_numbytes -= len_scaled; - move_pkt(pkt, q, p, len); - - p->V += div64((len << MY_M), p->sum); /* Update V. */ - q->S = q->F; /* Update start time. */ - if (q->len == 0) { - /* Flow not backlogged any more. */ - fs->backlogged--; - heap_insert(&(p->idle_heap), q->F, q); - } else { - /* Still backlogged. */ - - /* - * Update F and position in backlogged queue, - * then put flow in not_eligible_heap - * (we will fix this later). - */ - len = (q->head)->m_pkthdr.len; - q->F += div64((len << MY_M), fs->weight); - if (DN_KEY_LEQ(q->S, p->V)) - heap_insert(neh, q->S, q); - else - heap_insert(sch, q->F, q); - } - } - /* - * Now compute V = max(V, min(S_i)). Remember that all elements - * in sch have by definition S_i <= V so if sch is not empty, - * V is surely the max and we must not update it. Conversely, - * if sch is empty we only need to look at neh. - */ - if (sch->elements == 0 && neh->elements > 0) - p->V = MAX64(p->V, neh->p[0].key); - /* Move from neh to sch any packets that have become eligible */ - while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V)) { - struct dn_flow_queue *q = neh->p[0].object; - heap_extract(neh, NULL); - heap_insert(sch, q->F, q); - } - - if (p->if_name[0] != '\0') { /* Tx clock is from a real thing */ - p_numbytes = -1; /* Mark not ready for I/O. */ - break; - } - } - if (sch->elements == 0 && neh->elements == 0 && p_numbytes >= 0) { - p->idle_time = curr_time; - /* - * No traffic and no events scheduled. - * We can get rid of idle-heap. - */ - if (p->idle_heap.elements > 0) { - int i; - - for (i = 0; i < p->idle_heap.elements; i++) { - struct dn_flow_queue *q; - - q = p->idle_heap.p[i].object; - q->F = 0; - q->S = q->F + 1; - } - p->sum = 0; - p->V = 0; - p->idle_heap.elements = 0; - } - } - /* - * If we are getting clocks from dummynet (not a real interface) and - * If we are under credit, schedule the next ready event. - * Also fix the delivery time of the last packet. - */ - if (p->if_name[0]==0 && p_numbytes < 0) { /* This implies bw > 0. */ - dn_key t = 0; /* Number of ticks i have to wait. */ - - if (p->bandwidth > 0) - t = div64(p->bandwidth - 1 - p_numbytes, p->bandwidth); - dn_tag_get(p->tail)->output_time += t; - p->sched_time = curr_time; - heap_insert(&wfq_ready_heap, curr_time + t, (void *)p); - /* - * XXX Should check errors on heap_insert, and drain the whole - * queue on error hoping next time we are luckier. - */ - } - - /* Write back p_numbytes (adjust 64->32bit if necessary). */ - p->numbytes = p_numbytes; - - /* - * If the delay line was empty call transmit_event() now. - * Otherwise, the scheduler will take care of it. - */ - if (p_was_empty) - transmit_event(p, head, tail); -} - -/* - * This is called one tick, after previous run. It is used to - * schedule next run. - */ -static void -dummynet(void * __unused unused) -{ - - taskqueue_enqueue(dn_tq, &dn_task); -} - -/* - * The main dummynet processing function. - */ -static void -dummynet_task(void *context, int pending) -{ - struct mbuf *head = NULL, *tail = NULL; - struct dn_pipe *pipe; - struct dn_heap *heaps[3]; - struct dn_heap *h; - void *p; /* generic parameter to handler */ - int i; - - DUMMYNET_LOCK(); - - heaps[0] = &ready_heap; /* fixed-rate queues */ - heaps[1] = &wfq_ready_heap; /* wfq queues */ - heaps[2] = &extract_heap; /* delay line */ - - /* Update number of lost(coalesced) ticks. */ - tick_lost += pending - 1; - - getmicrouptime(&t); - /* Last tick duration (usec). */ - tick_last = (t.tv_sec - prev_t.tv_sec) * 1000000 + - (t.tv_usec - prev_t.tv_usec); - /* Last tick vs standard tick difference (usec). */ - tick_delta = (tick_last * hz - 1000000) / hz; - /* Accumulated tick difference (usec). */ - tick_delta_sum += tick_delta; - - prev_t = t; - - /* - * Adjust curr_time if accumulated tick difference greater than - * 'standard' tick. Since curr_time should be monotonically increasing, - * we do positive adjustment as required and throttle curr_time in - * case of negative adjustment. - */ - curr_time++; - if (tick_delta_sum - tick >= 0) { - int diff = tick_delta_sum / tick; - - curr_time += diff; - tick_diff += diff; - tick_delta_sum %= tick; - tick_adjustment++; - } else if (tick_delta_sum + tick <= 0) { - curr_time--; - tick_diff--; - tick_delta_sum += tick; - tick_adjustment++; - } - - for (i = 0; i < 3; i++) { - h = heaps[i]; - while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time)) { - if (h->p[0].key > curr_time) - printf("dummynet: warning, " - "heap %d is %d ticks late\n", - i, (int)(curr_time - h->p[0].key)); - /* store a copy before heap_extract */ - p = h->p[0].object; - /* need to extract before processing */ - heap_extract(h, NULL); - if (i == 0) - ready_event(p, &head, &tail); - else if (i == 1) { - struct dn_pipe *pipe = p; - if (pipe->if_name[0] != '\0') - printf("dummynet: bad ready_event_wfq " - "for pipe %s\n", pipe->if_name); - else - ready_event_wfq(p, &head, &tail); - } else - transmit_event(p, &head, &tail); - } - } - - /* Sweep pipes trying to expire idle flow_queues. */ - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(pipe, &pipehash[i], next) - if (pipe->idle_heap.elements > 0 && - DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V)) { - struct dn_flow_queue *q = - pipe->idle_heap.p[0].object; - - heap_extract(&(pipe->idle_heap), NULL); - /* Mark timestamp as invalid. */ - q->S = q->F + 1; - pipe->sum -= q->fs->weight; - } - - DUMMYNET_UNLOCK(); - - if (head != NULL) - dummynet_send(head); - - callout_reset(&dn_timeout, 1, dummynet, NULL); -} - -static void -dummynet_send(struct mbuf *m) -{ - struct dn_pkt_tag *pkt; - struct mbuf *n; - struct ip *ip; - int dst; - - for (; m != NULL; m = n) { - n = m->m_nextpkt; - m->m_nextpkt = NULL; - if (m_tag_first(m) == NULL) { - pkt = NULL; /* probably unnecessary */ - dst = DN_TO_DROP; - } else { - pkt = dn_tag_get(m); - dst = pkt->dn_dir; - } - - switch (dst) { - case DN_TO_IP_OUT: - ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); - break ; - case DN_TO_IP_IN : - ip = mtod(m, struct ip *); -#ifndef __linux__ /* restore net format for FreeBSD */ - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); -#endif - netisr_dispatch(NETISR_IP, m); - break; -#ifdef INET6 - case DN_TO_IP6_IN: - netisr_dispatch(NETISR_IPV6, m); - break; - - case DN_TO_IP6_OUT: - ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL); - break; -#endif - case DN_TO_IFB_FWD: - if (bridge_dn_p != NULL) - ((*bridge_dn_p)(m, pkt->ifp)); - else - printf("dummynet: if_bridge not loaded\n"); - - break; - case DN_TO_ETH_DEMUX: - /* - * The Ethernet code assumes the Ethernet header is - * contiguous in the first mbuf header. - * Insure this is true. - */ - if (m->m_len < ETHER_HDR_LEN && - (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) { - printf("dummynet/ether: pullup failed, " - "dropping packet\n"); - break; - } - ether_demux(m->m_pkthdr.rcvif, m); - break; - case DN_TO_ETH_OUT: - ether_output_frame(pkt->ifp, m); - break; - - case DN_TO_DROP: - /* drop the packet after some time */ - dn_free_pkt(m); - break; - - default: - printf("dummynet: bad switch %d!\n", pkt->dn_dir); - dn_free_pkt(m); - break; - } - } -} - -/* - * Unconditionally expire empty queues in case of shortage. - * Returns the number of queues freed. - */ -static int -expire_queues(struct dn_flow_set *fs) -{ - struct dn_flow_queue *q, *prev ; - int i, initial_elements = fs->rq_elements ; - - if (fs->last_expired == time_uptime) - return 0 ; - fs->last_expired = time_uptime ; - for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */ - for (prev=NULL, q = fs->rq[i] ; q != NULL ; ) - if (!QUEUE_IS_IDLE(q)) { - prev = q ; - q = q->next ; - } else { /* entry is idle, expire it */ - struct dn_flow_queue *old_q = q ; - - if (prev != NULL) - prev->next = q = q->next ; - else - fs->rq[i] = q = q->next ; - fs->rq_elements-- ; - free(old_q, M_DUMMYNET); - } - return initial_elements - fs->rq_elements ; -} - -/* - * If room, create a new queue and put at head of slot i; - * otherwise, create or use the default queue. - */ -static struct dn_flow_queue * -create_queue(struct dn_flow_set *fs, int i) -{ - struct dn_flow_queue *q; - - if (fs->rq_elements > fs->rq_size * dn_max_ratio && - expire_queues(fs) == 0) { - /* No way to get room, use or create overflow queue. */ - i = fs->rq_size; - if (fs->rq[i] != NULL) - return fs->rq[i]; - } - q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO); - if (q == NULL) { - printf("dummynet: sorry, cannot allocate queue for new flow\n"); - return (NULL); - } - q->fs = fs; - q->hash_slot = i; - q->next = fs->rq[i]; - q->S = q->F + 1; /* hack - mark timestamp as invalid. */ - q->numbytes = fs->pipe->burst + (io_fast ? fs->pipe->bandwidth : 0); - fs->rq[i] = q; - fs->rq_elements++; - return (q); -} - -/* - * Given a flow_set and a pkt in last_pkt, find a matching queue - * after appropriate masking. The queue is moved to front - * so that further searches take less time. - */ -static struct dn_flow_queue * -find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id) -{ - int i = 0 ; /* we need i and q for new allocations */ - struct dn_flow_queue *q, *prev; - int is_v6 = IS_IP6_FLOW_ID(id); - - if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) ) - q = fs->rq[0] ; - else { - /* first, do the masking, then hash */ - id->dst_port &= fs->flow_mask.dst_port ; - id->src_port &= fs->flow_mask.src_port ; - id->proto &= fs->flow_mask.proto ; - id->flags = 0 ; /* we don't care about this one */ - if (is_v6) { - APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6); - APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6); - id->flow_id6 &= fs->flow_mask.flow_id6; - - i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^ - ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^ - ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^ - ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^ - - ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^ - ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^ - ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^ - ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^ - - ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^ - - ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^ - ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^ - - (id->dst_port << 1) ^ (id->src_port) ^ - (id->proto ) ^ - (id->flow_id6); - } else { - id->dst_ip &= fs->flow_mask.dst_ip ; - id->src_ip &= fs->flow_mask.src_ip ; - - i = ( (id->dst_ip) & 0xffff ) ^ - ( (id->dst_ip >> 15) & 0xffff ) ^ - ( (id->src_ip << 1) & 0xffff ) ^ - ( (id->src_ip >> 16 ) & 0xffff ) ^ - (id->dst_port << 1) ^ (id->src_port) ^ - (id->proto ); - } - i = i % fs->rq_size ; - /* finally, scan the current list for a match */ - searches++ ; - for (prev=NULL, q = fs->rq[i] ; q ; ) { - search_steps++; - if (is_v6 && - IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) && - IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) && - id->dst_port == q->id.dst_port && - id->src_port == q->id.src_port && - id->proto == q->id.proto && - id->flags == q->id.flags && - id->flow_id6 == q->id.flow_id6) - break ; /* found */ - - if (!is_v6 && id->dst_ip == q->id.dst_ip && - id->src_ip == q->id.src_ip && - id->dst_port == q->id.dst_port && - id->src_port == q->id.src_port && - id->proto == q->id.proto && - id->flags == q->id.flags) - break ; /* found */ - - /* No match. Check if we can expire the entry */ - if (pipe_expire && QUEUE_IS_IDLE(q)) { - /* entry is idle and not in any heap, expire it */ - struct dn_flow_queue *old_q = q ; - - if (prev != NULL) - prev->next = q = q->next ; - else - fs->rq[i] = q = q->next ; - fs->rq_elements-- ; - free(old_q, M_DUMMYNET); - continue ; - } - prev = q ; - q = q->next ; - } - if (q && prev != NULL) { /* found and not in front */ - prev->next = q->next ; - q->next = fs->rq[i] ; - fs->rq[i] = q ; - } - } - if (q == NULL) { /* no match, need to allocate a new entry */ - q = create_queue(fs, i); - if (q != NULL) - q->id = *id ; - } - return q ; -} - -static int -red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len) -{ - /* - * RED algorithm - * - * RED calculates the average queue size (avg) using a low-pass filter - * with an exponential weighted (w_q) moving average: - * avg <- (1-w_q) * avg + w_q * q_size - * where q_size is the queue length (measured in bytes or * packets). - * - * If q_size == 0, we compute the idle time for the link, and set - * avg = (1 - w_q)^(idle/s) - * where s is the time needed for transmitting a medium-sized packet. - * - * Now, if avg < min_th the packet is enqueued. - * If avg > max_th the packet is dropped. Otherwise, the packet is - * dropped with probability P function of avg. - */ - - int64_t p_b = 0; - - /* Queue in bytes or packets? */ - u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? - q->len_bytes : q->len; - - DPRINTF(("\ndummynet: %d q: %2u ", (int)curr_time, q_size)); - - /* Average queue size estimation. */ - if (q_size != 0) { - /* Queue is not empty, avg <- avg + (q_size - avg) * w_q */ - int diff = SCALE(q_size) - q->avg; - int64_t v = SCALE_MUL((int64_t)diff, (int64_t)fs->w_q); - - q->avg += (int)v; - } else { - /* - * Queue is empty, find for how long the queue has been - * empty and use a lookup table for computing - * (1 - * w_q)^(idle_time/s) where s is the time to send a - * (small) packet. - * XXX check wraps... - */ - if (q->avg) { - u_int t = div64(curr_time - q->idle_time, - fs->lookup_step); - - q->avg = (t < fs->lookup_depth) ? - SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0; - } - } - DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg))); - - /* Should i drop? */ - if (q->avg < fs->min_th) { - q->count = -1; - return (0); /* accept packet */ - } - if (q->avg >= fs->max_th) { /* average queue >= max threshold */ - if (fs->flags_fs & DN_IS_GENTLE_RED) { - /* - * According to Gentle-RED, if avg is greater than - * max_th the packet is dropped with a probability - * p_b = c_3 * avg - c_4 - * where c_3 = (1 - max_p) / max_th - * c_4 = 1 - 2 * max_p - */ - p_b = SCALE_MUL((int64_t)fs->c_3, (int64_t)q->avg) - - fs->c_4; - } else { - q->count = -1; - DPRINTF(("dummynet: - drop")); - return (1); - } - } else if (q->avg > fs->min_th) { - /* - * We compute p_b using the linear dropping function - * p_b = c_1 * avg - c_2 - * where c_1 = max_p / (max_th - min_th) - * c_2 = max_p * min_th / (max_th - min_th) - */ - p_b = SCALE_MUL((int64_t)fs->c_1, (int64_t)q->avg) - fs->c_2; - } - - if (fs->flags_fs & DN_QSIZE_IS_BYTES) - p_b = div64(p_b * len, fs->max_pkt_size); - if (++q->count == 0) - q->random = random() & 0xffff; - else { - /* - * q->count counts packets arrived since last drop, so a greater - * value of q->count means a greater packet drop probability. - */ - if (SCALE_MUL(p_b, SCALE((int64_t)q->count)) > q->random) { - q->count = 0; - DPRINTF(("dummynet: - red drop")); - /* After a drop we calculate a new random value. */ - q->random = random() & 0xffff; - return (1); /* drop */ - } - } - /* End of RED algorithm. */ - - return (0); /* accept */ -} - -static __inline struct dn_flow_set * -locate_flowset(int fs_nr) -{ - struct dn_flow_set *fs; - - SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next) - if (fs->fs_nr == fs_nr) - return (fs); - - return (NULL); -} - -static __inline struct dn_pipe * -locate_pipe(int pipe_nr) -{ - struct dn_pipe *pipe; - - SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next) - if (pipe->pipe_nr == pipe_nr) - return (pipe); - - return (NULL); -} - -/* - * dummynet hook for packets. Below 'pipe' is a pipe or a queue - * depending on whether WF2Q or fixed bw is used. - * - * pipe_nr pipe or queue the packet is destined for. - * dir where shall we send the packet after dummynet. - * m the mbuf with the packet - * ifp the 'ifp' parameter from the caller. - * NULL in ip_input, destination interface in ip_output, - * rule matching rule, in case of multiple passes - */ -static int -dummynet_io(struct mbuf **m0, int dir, struct ip_fw_args *fwa) -{ - struct mbuf *m = *m0, *head = NULL, *tail = NULL; - struct dn_pkt_tag *pkt; - struct m_tag *mtag; - struct dn_flow_set *fs = NULL; - struct dn_pipe *pipe; - uint64_t len = m->m_pkthdr.len; - struct dn_flow_queue *q = NULL; - int is_pipe; - ipfw_insn *cmd = ACTION_PTR(fwa->rule); - - KASSERT(m->m_nextpkt == NULL, - ("dummynet_io: mbuf queue passed to dummynet")); - - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); - if (cmd->opcode == O_ALTQ) - cmd += F_LEN(cmd); - if (cmd->opcode == O_TAG) - cmd += F_LEN(cmd); - is_pipe = (cmd->opcode == O_PIPE); - - DUMMYNET_LOCK(); - io_pkt++; - /* - * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule. - * - * XXXGL: probably the pipe->fs and fs->pipe logic here - * below can be simplified. - */ - if (is_pipe) { - pipe = locate_pipe(fwa->cookie); - if (pipe != NULL) - fs = &(pipe->fs); - } else - fs = locate_flowset(fwa->cookie); - - if (fs == NULL) - goto dropit; /* This queue/pipe does not exist! */ - pipe = fs->pipe; - if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */ - pipe = locate_pipe(fs->parent_nr); - if (pipe != NULL) - fs->pipe = pipe; - else { - printf("dummynet: no pipe %d for queue %d, drop pkt\n", - fs->parent_nr, fs->fs_nr); - goto dropit; - } - } - q = find_queue(fs, &(fwa->f_id)); - if (q == NULL) - goto dropit; /* Cannot allocate queue. */ - - /* Update statistics, then check reasons to drop pkt. */ - q->tot_bytes += len; - q->tot_pkts++; - if (fs->plr && random() < fs->plr) - goto dropit; /* Random pkt drop. */ - if (fs->flags_fs & DN_QSIZE_IS_BYTES) { - if (q->len_bytes > fs->qsize) - goto dropit; /* Queue size overflow. */ - } else { - if (q->len >= fs->qsize) - goto dropit; /* Queue count overflow. */ - } - if (fs->flags_fs & DN_IS_RED && red_drops(fs, q, len)) - goto dropit; - - /* XXX expensive to zero, see if we can remove it. */ - mtag = m_tag_get(PACKET_TAG_DUMMYNET, - sizeof(struct dn_pkt_tag), M_NOWAIT | M_ZERO); - if (mtag == NULL) - goto dropit; /* Cannot allocate packet header. */ - m_tag_prepend(m, mtag); /* Attach to mbuf chain. */ - - pkt = (struct dn_pkt_tag *)(mtag + 1); - /* - * Ok, i can handle the pkt now... - * Build and enqueue packet + parameters. - */ - pkt->rule = fwa->rule; - pkt->rule_id = fwa->rule_id; - pkt->chain_id = fwa->chain_id; - pkt->dn_dir = dir; - - pkt->ifp = fwa->oif; - - if (q->head == NULL) - q->head = m; - else - q->tail->m_nextpkt = m; - q->tail = m; - q->len++; - q->len_bytes += len; - - if (q->head != m) /* Flow was not idle, we are done. */ - goto done; - - if (is_pipe) { /* Fixed rate queues. */ - if (q->idle_time < curr_time) { - /* Calculate available burst size. */ - q->numbytes += - (curr_time - q->idle_time - 1) * pipe->bandwidth; - if (q->numbytes > pipe->burst) - q->numbytes = pipe->burst; - if (io_fast) - q->numbytes += pipe->bandwidth; - } - } else { /* WF2Q. */ - if (pipe->idle_time < curr_time && - pipe->scheduler_heap.elements == 0 && - pipe->not_eligible_heap.elements == 0) { - /* Calculate available burst size. */ - pipe->numbytes += - (curr_time - pipe->idle_time - 1) * pipe->bandwidth; - if (pipe->numbytes > 0 && pipe->numbytes > pipe->burst) - pipe->numbytes = pipe->burst; - if (io_fast) - pipe->numbytes += pipe->bandwidth; - } - pipe->idle_time = curr_time; - } - /* Necessary for both: fixed rate & WF2Q queues. */ - q->idle_time = curr_time; - - /* - * If we reach this point the flow was previously idle, so we need - * to schedule it. This involves different actions for fixed-rate or - * WF2Q queues. - */ - if (is_pipe) { - /* Fixed-rate queue: just insert into the ready_heap. */ - dn_key t = 0; - - if (pipe->bandwidth) { - q->extra_bits = compute_extra_bits(m, pipe); - t = set_ticks(m, q, pipe); - } - q->sched_time = curr_time; - if (t == 0) /* Must process it now. */ - ready_event(q, &head, &tail); - else - heap_insert(&ready_heap, curr_time + t , q); - } else { - /* - * WF2Q. First, compute start time S: if the flow was - * idle (S = F + 1) set S to the virtual time V for the - * controlling pipe, and update the sum of weights for the pipe; - * otherwise, remove flow from idle_heap and set S to max(F,V). - * Second, compute finish time F = S + len / weight. - * Third, if pipe was idle, update V = max(S, V). - * Fourth, count one more backlogged flow. - */ - if (DN_KEY_GT(q->S, q->F)) { /* Means timestamps are invalid. */ - q->S = pipe->V; - pipe->sum += fs->weight; /* Add weight of new queue. */ - } else { - heap_extract(&(pipe->idle_heap), q); - q->S = MAX64(q->F, pipe->V); - } - q->F = q->S + div64(len << MY_M, fs->weight); - - if (pipe->not_eligible_heap.elements == 0 && - pipe->scheduler_heap.elements == 0) - pipe->V = MAX64(q->S, pipe->V); - fs->backlogged++; - /* - * Look at eligibility. A flow is not eligibile if S>V (when - * this happens, it means that there is some other flow already - * scheduled for the same pipe, so the scheduler_heap cannot be - * empty). If the flow is not eligible we just store it in the - * not_eligible_heap. Otherwise, we store in the scheduler_heap - * and possibly invoke ready_event_wfq() right now if there is - * leftover credit. - * Note that for all flows in scheduler_heap (SCH), S_i <= V, - * and for all flows in not_eligible_heap (NEH), S_i > V. - * So when we need to compute max(V, min(S_i)) forall i in - * SCH+NEH, we only need to look into NEH. - */ - if (DN_KEY_GT(q->S, pipe->V)) { /* Not eligible. */ - if (pipe->scheduler_heap.elements == 0) - printf("dummynet: ++ ouch! not eligible but empty scheduler!\n"); - heap_insert(&(pipe->not_eligible_heap), q->S, q); - } else { - heap_insert(&(pipe->scheduler_heap), q->F, q); - if (pipe->numbytes >= 0) { /* Pipe is idle. */ - if (pipe->scheduler_heap.elements != 1) - printf("dummynet: OUCH! pipe should have been idle!\n"); - DPRINTF(("dummynet: waking up pipe %d at %d\n", - pipe->pipe_nr, (int)(q->F >> MY_M))); - pipe->sched_time = curr_time; - ready_event_wfq(pipe, &head, &tail); - } - } - } -done: - if (head == m && dir != DN_TO_IFB_FWD && dir != DN_TO_ETH_DEMUX && - dir != DN_TO_ETH_OUT) { /* Fast io. */ - io_pkt_fast++; - if (m->m_nextpkt != NULL) - printf("dummynet: fast io: pkt chain detected!\n"); - head = m->m_nextpkt = NULL; - } else - *m0 = NULL; /* Normal io. */ - - DUMMYNET_UNLOCK(); - if (head != NULL) - dummynet_send(head); - return (0); - -dropit: - io_pkt_drop++; - if (q) - q->drops++; - DUMMYNET_UNLOCK(); - *m0 = dn_free_pkt(m); - return ((fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS); -} - -/* - * Dispose all packets and flow_queues on a flow_set. - * If all=1, also remove red lookup table and other storage, - * including the descriptor itself. - * For the one in dn_pipe MUST also cleanup ready_heap... - */ -static void -purge_flow_set(struct dn_flow_set *fs, int all) -{ - struct dn_flow_queue *q, *qn; - int i; - - DUMMYNET_LOCK_ASSERT(); - - for (i = 0; i <= fs->rq_size; i++) { - for (q = fs->rq[i]; q != NULL; q = qn) { - dn_free_pkts(q->head); - qn = q->next; - free(q, M_DUMMYNET); - } - fs->rq[i] = NULL; - } - - fs->rq_elements = 0; - if (all) { - /* RED - free lookup table. */ - if (fs->w_q_lookup != NULL) - free(fs->w_q_lookup, M_DUMMYNET); - if (fs->rq != NULL) - free(fs->rq, M_DUMMYNET); - /* If this fs is not part of a pipe, free it. */ - if (fs->pipe == NULL || fs != &(fs->pipe->fs)) - free(fs, M_DUMMYNET); - } -} - -/* - * Dispose all packets queued on a pipe (not a flow_set). - * Also free all resources associated to a pipe, which is about - * to be deleted. - */ -static void -purge_pipe(struct dn_pipe *pipe) -{ - - purge_flow_set( &(pipe->fs), 1 ); - - dn_free_pkts(pipe->head); - - heap_free( &(pipe->scheduler_heap) ); - heap_free( &(pipe->not_eligible_heap) ); - heap_free( &(pipe->idle_heap) ); -} - -/* - * Delete all pipes and heaps returning memory. Must also - * remove references from all ipfw rules to all pipes. - */ -static void -dummynet_flush(void) -{ - struct dn_pipe *pipe, *pipe1; - struct dn_flow_set *fs, *fs1; - int i; - - DUMMYNET_LOCK(); - /* Free heaps so we don't have unwanted events. */ - heap_free(&ready_heap); - heap_free(&wfq_ready_heap); - heap_free(&extract_heap); - - /* - * Now purge all queued pkts and delete all pipes. - * - * XXXGL: can we merge the for(;;) cycles into one or not? - */ - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) { - SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next); - purge_flow_set(fs, 1); - } - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) { - SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next); - purge_pipe(pipe); - free_pipe(pipe); - } - DUMMYNET_UNLOCK(); -} - -/* - * setup RED parameters - */ -static int -config_red(struct dn_flow_set *p, struct dn_flow_set *x) -{ - int i; - - x->w_q = p->w_q; - x->min_th = SCALE(p->min_th); - x->max_th = SCALE(p->max_th); - x->max_p = p->max_p; - - x->c_1 = p->max_p / (p->max_th - p->min_th); - x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th)); - - if (x->flags_fs & DN_IS_GENTLE_RED) { - x->c_3 = (SCALE(1) - p->max_p) / p->max_th; - x->c_4 = SCALE(1) - 2 * p->max_p; - } - - /* If the lookup table already exist, free and create it again. */ - if (x->w_q_lookup) { - free(x->w_q_lookup, M_DUMMYNET); - x->w_q_lookup = NULL; - } - if (red_lookup_depth == 0) { - printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth" - "must be > 0\n"); - free(x, M_DUMMYNET); - return (EINVAL); - } - x->lookup_depth = red_lookup_depth; - x->w_q_lookup = (u_int *)malloc(x->lookup_depth * sizeof(int), - M_DUMMYNET, M_NOWAIT); - if (x->w_q_lookup == NULL) { - printf("dummynet: sorry, cannot allocate red lookup table\n"); - free(x, M_DUMMYNET); - return(ENOSPC); - } - - /* Fill the lookup table with (1 - w_q)^x */ - x->lookup_step = p->lookup_step; - x->lookup_weight = p->lookup_weight; - x->w_q_lookup[0] = SCALE(1) - x->w_q; - - for (i = 1; i < x->lookup_depth; i++) - x->w_q_lookup[i] = - SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight); - - if (red_avg_pkt_size < 1) - red_avg_pkt_size = 512; - x->avg_pkt_size = red_avg_pkt_size; - if (red_max_pkt_size < 1) - red_max_pkt_size = 1500; - x->max_pkt_size = red_max_pkt_size; - return (0); -} - -static int -alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs) -{ - if (x->flags_fs & DN_HAVE_FLOW_MASK) { /* allocate some slots */ - int l = pfs->rq_size; - - if (l == 0) - l = dn_hash_size; - if (l < 4) - l = 4; - else if (l > DN_MAX_HASH_SIZE) - l = DN_MAX_HASH_SIZE; - x->rq_size = l; - } else /* one is enough for null mask */ - x->rq_size = 1; - x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *), - M_DUMMYNET, M_NOWAIT | M_ZERO); - if (x->rq == NULL) { - printf("dummynet: sorry, cannot allocate queue\n"); - return (ENOMEM); - } - x->rq_elements = 0; - return 0 ; -} - -static void -set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src) -{ - x->flags_fs = src->flags_fs; - x->qsize = src->qsize; - x->plr = src->plr; - x->flow_mask = src->flow_mask; - if (x->flags_fs & DN_QSIZE_IS_BYTES) { - if (x->qsize > pipe_byte_limit) - x->qsize = 1024 * 1024; - } else { - if (x->qsize == 0) - x->qsize = 50; - if (x->qsize > pipe_slot_limit) - x->qsize = 50; - } - /* Configuring RED. */ - if (x->flags_fs & DN_IS_RED) - config_red(src, x); /* XXX should check errors */ -} - -/* - * Setup pipe or queue parameters. - */ -static int -config_pipe(struct dn_pipe *p) -{ - struct dn_flow_set *pfs = &(p->fs); - struct dn_flow_queue *q; - int i, error; - - /* - * The config program passes parameters as follows: - * bw = bits/second (0 means no limits), - * delay = ms, must be translated into ticks. - * qsize = slots/bytes - */ - p->delay = (p->delay * hz) / 1000; - /* Scale burst size: bytes -> bits * hz */ - p->burst *= 8 * hz; - /* We need either a pipe number or a flow_set number. */ - if (p->pipe_nr == 0 && pfs->fs_nr == 0) - return (EINVAL); - if (p->pipe_nr != 0 && pfs->fs_nr != 0) - return (EINVAL); - if (p->pipe_nr != 0) { /* this is a pipe */ - struct dn_pipe *pipe; - - DUMMYNET_LOCK(); - pipe = locate_pipe(p->pipe_nr); /* locate pipe */ - - if (pipe == NULL) { /* new pipe */ - pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET, - M_NOWAIT | M_ZERO); - if (pipe == NULL) { - DUMMYNET_UNLOCK(); - printf("dummynet: no memory for new pipe\n"); - return (ENOMEM); - } - pipe->pipe_nr = p->pipe_nr; - pipe->fs.pipe = pipe; - /* - * idle_heap is the only one from which - * we extract from the middle. - */ - pipe->idle_heap.size = pipe->idle_heap.elements = 0; - pipe->idle_heap.offset = - offsetof(struct dn_flow_queue, heap_pos); - } else - /* Flush accumulated credit for all queues. */ - for (i = 0; i <= pipe->fs.rq_size; i++) - for (q = pipe->fs.rq[i]; q; q = q->next) { - q->numbytes = p->burst + - (io_fast ? p->bandwidth : 0); - } - - pipe->bandwidth = p->bandwidth; - pipe->burst = p->burst; - pipe->numbytes = pipe->burst + (io_fast ? pipe->bandwidth : 0); - bcopy(p->if_name, pipe->if_name, sizeof(p->if_name)); - pipe->ifp = NULL; /* reset interface ptr */ - pipe->delay = p->delay; - set_fs_parms(&(pipe->fs), pfs); - - /* Handle changes in the delay profile. */ - if (p->samples_no > 0) { - if (pipe->samples_no != p->samples_no) { - if (pipe->samples != NULL) - free(pipe->samples, M_DUMMYNET); - pipe->samples = - malloc(p->samples_no*sizeof(dn_key), - M_DUMMYNET, M_NOWAIT | M_ZERO); - if (pipe->samples == NULL) { - DUMMYNET_UNLOCK(); - printf("dummynet: no memory " - "for new samples\n"); - return (ENOMEM); - } - pipe->samples_no = p->samples_no; - } - - strncpy(pipe->name,p->name,sizeof(pipe->name)); - pipe->loss_level = p->loss_level; - for (i = 0; isamples_no; ++i) - pipe->samples[i] = p->samples[i]; - } else if (pipe->samples != NULL) { - free(pipe->samples, M_DUMMYNET); - pipe->samples = NULL; - pipe->samples_no = 0; - } - - if (pipe->fs.rq == NULL) { /* a new pipe */ - error = alloc_hash(&(pipe->fs), pfs); - if (error) { - DUMMYNET_UNLOCK(); - free_pipe(pipe); - return (error); - } - SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)], - pipe, next); - } - DUMMYNET_UNLOCK(); - } else { /* config queue */ - struct dn_flow_set *fs; - - DUMMYNET_LOCK(); - fs = locate_flowset(pfs->fs_nr); /* locate flow_set */ - - if (fs == NULL) { /* new */ - if (pfs->parent_nr == 0) { /* need link to a pipe */ - DUMMYNET_UNLOCK(); - return (EINVAL); - } - fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET, - M_NOWAIT | M_ZERO); - if (fs == NULL) { - DUMMYNET_UNLOCK(); - printf( - "dummynet: no memory for new flow_set\n"); - return (ENOMEM); - } - fs->fs_nr = pfs->fs_nr; - fs->parent_nr = pfs->parent_nr; - fs->weight = pfs->weight; - if (fs->weight == 0) - fs->weight = 1; - else if (fs->weight > 100) - fs->weight = 100; - } else { - /* - * Change parent pipe not allowed; - * must delete and recreate. - */ - if (pfs->parent_nr != 0 && - fs->parent_nr != pfs->parent_nr) { - DUMMYNET_UNLOCK(); - return (EINVAL); - } - } - - set_fs_parms(fs, pfs); - - if (fs->rq == NULL) { /* a new flow_set */ - error = alloc_hash(fs, pfs); - if (error) { - DUMMYNET_UNLOCK(); - free(fs, M_DUMMYNET); - return (error); - } - SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)], - fs, next); - } - DUMMYNET_UNLOCK(); - } - return (0); -} - -/* - * Helper function to remove from a heap queues which are linked to - * a flow_set about to be deleted. - */ -static void -fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs) -{ - int i = 0, found = 0 ; - for (; i < h->elements ;) - if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) { - h->elements-- ; - h->p[i] = h->p[h->elements] ; - found++ ; - } else - i++ ; - if (found) - heapify(h); -} - -/* - * helper function to remove a pipe from a heap (can be there at most once) - */ -static void -pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p) -{ - if (h->elements > 0) { - int i = 0 ; - for (i=0; i < h->elements ; i++ ) { - if (h->p[i].object == p) { /* found it */ - h->elements-- ; - h->p[i] = h->p[h->elements] ; - heapify(h); - break ; - } - } - } -} - -/* - * drain all queues. Called in case of severe mbuf shortage. - */ -void -dummynet_drain(void) -{ - struct dn_flow_set *fs; - struct dn_pipe *pipe; - int i; - - DUMMYNET_LOCK_ASSERT(); - - heap_free(&ready_heap); - heap_free(&wfq_ready_heap); - heap_free(&extract_heap); - /* remove all references to this pipe from flow_sets */ - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(fs, &flowsethash[i], next) - purge_flow_set(fs, 0); - - for (i = 0; i < HASHSIZE; i++) { - SLIST_FOREACH(pipe, &pipehash[i], next) { - purge_flow_set(&(pipe->fs), 0); - dn_free_pkts(pipe->head); - pipe->head = pipe->tail = NULL; - } - } -} - -/* - * Fully delete a pipe or a queue, cleaning up associated info. - */ -static int -delete_pipe(struct dn_pipe *p) -{ - - if (p->pipe_nr == 0 && p->fs.fs_nr == 0) - return EINVAL ; - if (p->pipe_nr != 0 && p->fs.fs_nr != 0) - return EINVAL ; - if (p->pipe_nr != 0) { /* this is an old-style pipe */ - struct dn_pipe *pipe; - struct dn_flow_set *fs; - int i; - - DUMMYNET_LOCK(); - pipe = locate_pipe(p->pipe_nr); /* locate pipe */ - - if (pipe == NULL) { - DUMMYNET_UNLOCK(); - return (ENOENT); /* not found */ - } - - /* Unlink from list of pipes. */ - SLIST_REMOVE(&pipehash[HASH(pipe->pipe_nr)], pipe, dn_pipe, next); - - /* Remove all references to this pipe from flow_sets. */ - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(fs, &flowsethash[i], next) - if (fs->pipe == pipe) { - printf("dummynet: ++ ref to pipe %d from fs %d\n", - p->pipe_nr, fs->fs_nr); - fs->pipe = NULL ; - purge_flow_set(fs, 0); - } - fs_remove_from_heap(&ready_heap, &(pipe->fs)); - purge_pipe(pipe); /* remove all data associated to this pipe */ - /* remove reference to here from extract_heap and wfq_ready_heap */ - pipe_remove_from_heap(&extract_heap, pipe); - pipe_remove_from_heap(&wfq_ready_heap, pipe); - DUMMYNET_UNLOCK(); - - free_pipe(pipe); - } else { /* this is a WF2Q queue (dn_flow_set) */ - struct dn_flow_set *fs; - - DUMMYNET_LOCK(); - fs = locate_flowset(p->fs.fs_nr); /* locate set */ - - if (fs == NULL) { - DUMMYNET_UNLOCK(); - return (ENOENT); /* not found */ - } - - /* Unlink from list of flowsets. */ - SLIST_REMOVE( &flowsethash[HASH(fs->fs_nr)], fs, dn_flow_set, next); - - if (fs->pipe != NULL) { - /* Update total weight on parent pipe and cleanup parent heaps. */ - fs->pipe->sum -= fs->weight * fs->backlogged ; - fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs); - fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs); -#if 1 /* XXX should i remove from idle_heap as well ? */ - fs_remove_from_heap(&(fs->pipe->idle_heap), fs); -#endif - } - purge_flow_set(fs, 1); - DUMMYNET_UNLOCK(); - } - return 0 ; -} - -/* - * helper function used to copy data from kernel in DUMMYNET_GET - */ -static char * -dn_copy_set(struct dn_flow_set *set, char *bp) -{ - int i, copied = 0 ; - struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp; - - DUMMYNET_LOCK_ASSERT(); - - for (i = 0 ; i <= set->rq_size ; i++) - for (q = set->rq[i] ; q ; q = q->next, qp++ ) { - if (q->hash_slot != i) - printf("dummynet: ++ at %d: wrong slot (have %d, " - "should be %d)\n", copied, q->hash_slot, i); - if (q->fs != set) - printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n", - i, q->fs, set); - copied++ ; - bcopy(q, qp, sizeof( *q ) ); - /* cleanup pointers */ - qp->next = NULL ; - qp->head = qp->tail = NULL ; - qp->fs = NULL ; - } - if (copied != set->rq_elements) - printf("dummynet: ++ wrong count, have %d should be %d\n", - copied, set->rq_elements); - return (char *)qp ; -} - -static size_t -dn_calc_size(void) -{ - struct dn_flow_set *fs; - struct dn_pipe *pipe; - size_t size = 0; - int i; - - DUMMYNET_LOCK_ASSERT(); - /* - * Compute size of data structures: list of pipes and flow_sets. - */ - for (i = 0; i < HASHSIZE; i++) { - SLIST_FOREACH(pipe, &pipehash[i], next) - size += sizeof(*pipe) + - pipe->fs.rq_elements * sizeof(struct dn_flow_queue); - SLIST_FOREACH(fs, &flowsethash[i], next) - size += sizeof (*fs) + - fs->rq_elements * sizeof(struct dn_flow_queue); - } - return size; -} - -static int -dummynet_get(struct sockopt *sopt) -{ - char *buf, *bp ; /* bp is the "copy-pointer" */ - size_t size ; - struct dn_flow_set *fs; - struct dn_pipe *pipe; - int error=0, i ; - - /* XXX lock held too long */ - DUMMYNET_LOCK(); - /* - * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we - * cannot use this flag while holding a mutex. - */ - for (i = 0; i < 10; i++) { - size = dn_calc_size(); - DUMMYNET_UNLOCK(); - buf = malloc(size, M_TEMP, M_WAITOK); - DUMMYNET_LOCK(); - if (size == dn_calc_size()) - break; - free(buf, M_TEMP); - buf = NULL; - } - if (buf == NULL) { - DUMMYNET_UNLOCK(); - return ENOBUFS ; - } - bp = buf; - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(pipe, &pipehash[i], next) { - struct dn_pipe *pipe_bp = (struct dn_pipe *)bp; - - /* - * Copy pipe descriptor into *bp, convert delay back to ms, - * then copy the flow_set descriptor(s) one at a time. - * After each flow_set, copy the queue descriptor it owns. - */ - bcopy(pipe, bp, sizeof(*pipe)); - pipe_bp->delay = (pipe_bp->delay * 1000) / hz; - pipe_bp->burst = div64(pipe_bp->burst, 8 * hz); - /* - * XXX the following is a hack based on ->next being the - * first field in dn_pipe and dn_flow_set. The correct - * solution would be to move the dn_flow_set to the beginning - * of struct dn_pipe. - */ - pipe_bp->next.sle_next = (struct dn_pipe *)DN_IS_PIPE; - /* Clean pointers. */ - pipe_bp->head = pipe_bp->tail = NULL; - pipe_bp->fs.next.sle_next = NULL; - pipe_bp->fs.pipe = NULL; - pipe_bp->fs.rq = NULL; - pipe_bp->samples = NULL; - - bp += sizeof(*pipe) ; - bp = dn_copy_set(&(pipe->fs), bp); - } - - for (i = 0; i < HASHSIZE; i++) - SLIST_FOREACH(fs, &flowsethash[i], next) { - struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp; - - bcopy(fs, bp, sizeof(*fs)); - /* XXX same hack as above */ - fs_bp->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE; - fs_bp->pipe = NULL; - fs_bp->rq = NULL; - bp += sizeof(*fs); - bp = dn_copy_set(fs, bp); - } - - DUMMYNET_UNLOCK(); - - error = sooptcopyout(sopt, buf, size); - free(buf, M_TEMP); - return error ; -} - -/* - * Handler for the various dummynet socket options (get, flush, config, del) - */ -static int -ip_dn_ctl(struct sockopt *sopt) -{ - int error; - struct dn_pipe *p = NULL; - - error = priv_check(sopt->sopt_td, PRIV_NETINET_DUMMYNET); - if (error) - return (error); - - /* Disallow sets in really-really secure mode. */ - if (sopt->sopt_dir == SOPT_SET) { -#if __FreeBSD_version >= 500034 - error = securelevel_ge(sopt->sopt_td->td_ucred, 3); - if (error) - return (error); -#else - if (securelevel >= 3) - return (EPERM); -#endif - } - - switch (sopt->sopt_name) { - default : - printf("dummynet: -- unknown option %d", sopt->sopt_name); - error = EINVAL ; - break ; - - case IP_DUMMYNET_GET : - error = dummynet_get(sopt); - break ; - - case IP_DUMMYNET_FLUSH : - dummynet_flush() ; - break ; - - case IP_DUMMYNET_CONFIGURE : - p = malloc(sizeof(struct dn_pipe_max), M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, p, sizeof(struct dn_pipe_max), sizeof *p); - if (error) - break ; - if (p->samples_no > 0) - p->samples = &( ((struct dn_pipe_max*) p)->samples[0] ); - - error = config_pipe(p); - break ; - - case IP_DUMMYNET_DEL : /* remove a pipe or queue */ - p = malloc(sizeof(struct dn_pipe), M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, p, sizeof (struct dn_pipe), sizeof *p); - if (error) - break ; - - error = delete_pipe(p); - break ; - } - - if (p != NULL) - free(p, M_TEMP); - - return error ; -} - -static void -ip_dn_init(void) -{ - int i; - - if (bootverbose) - printf("DUMMYNET with IPv6 initialized (040826)\n"); - - DUMMYNET_LOCK_INIT(); - - for (i = 0; i < HASHSIZE; i++) { - SLIST_INIT(&pipehash[i]); - SLIST_INIT(&flowsethash[i]); - } - ready_heap.size = ready_heap.elements = 0; - ready_heap.offset = 0; - - wfq_ready_heap.size = wfq_ready_heap.elements = 0; - wfq_ready_heap.offset = 0; - - extract_heap.size = extract_heap.elements = 0; - extract_heap.offset = 0; - - ip_dn_ctl_ptr = ip_dn_ctl; - ip_dn_io_ptr = dummynet_io; - - TASK_INIT(&dn_task, 0, dummynet_task, NULL); - dn_tq = taskqueue_create_fast("dummynet", M_NOWAIT, - taskqueue_thread_enqueue, &dn_tq); - taskqueue_start_threads(&dn_tq, 1, PI_NET, "dummynet"); - - callout_init(&dn_timeout, CALLOUT_MPSAFE); - callout_reset(&dn_timeout, 1, dummynet, NULL); - - /* Initialize curr_time adjustment mechanics. */ - getmicrouptime(&prev_t); -} - -#ifdef KLD_MODULE -static void -ip_dn_destroy(void) -{ - ip_dn_ctl_ptr = NULL; - ip_dn_io_ptr = NULL; - - DUMMYNET_LOCK(); - callout_stop(&dn_timeout); - DUMMYNET_UNLOCK(); - taskqueue_drain(dn_tq, &dn_task); - taskqueue_free(dn_tq); - - dummynet_flush(); - - DUMMYNET_LOCK_DESTROY(); -} -#endif /* KLD_MODULE */ - -static int -dummynet_modevent(module_t mod, int type, void *data) -{ - - switch (type) { - case MOD_LOAD: - if (ip_dn_io_ptr) { - printf("DUMMYNET already loaded\n"); - return EEXIST ; - } - ip_dn_init(); - break; - - case MOD_UNLOAD: -#if !defined(KLD_MODULE) - printf("dummynet statically compiled, cannot unload\n"); - return EINVAL ; -#else - ip_dn_destroy(); -#endif - break ; - default: - return EOPNOTSUPP; - break ; - } - return 0 ; -} - -static moduledata_t dummynet_mod = { - "dummynet", - dummynet_modevent, - NULL -}; -DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); -MODULE_DEPEND(dummynet, ipfw, 2, 2, 2); -MODULE_VERSION(dummynet, 1); diff --git a/dummynet/ip_fw2.c b/dummynet/ip_fw2.c deleted file mode 100644 index 21d1b41..0000000 --- a/dummynet/ip_fw2.c +++ /dev/null @@ -1,5114 +0,0 @@ -/*- - * Copyright (c) 2002 Luigi Rizzo, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/netinet/ip_fw2.c,v 1.175.2.13 2008/10/30 16:29:04 bz Exp $"); - -#define DEB(x) -#define DDB(x) x - -/* - * Implement IP packet firewall (new version) - */ - -#if !defined(KLD_MODULE) -#include "opt_ipfw.h" -#include "opt_ipdivert.h" -#include "opt_ipdn.h" -#include "opt_inet.h" -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#endif -#include "opt_inet6.h" -#include "opt_ipsec.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include /* for ETHERTYPE_IP */ -#include -#include -#include -#include -#include - -#define IPFW_INTERNAL /* Access to protected data structures in ip_fw.h. */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#ifdef INET6 -#include -#include -#endif - -#include /* XXX for in_cksum */ - -#ifdef MAC -#include -#endif - -static VNET_DEFINE(int, ipfw_vnet_ready) = 0; -#define V_ipfw_vnet_ready VNET(ipfw_vnet_ready) -/* - * set_disable contains one bit per set value (0..31). - * If the bit is set, all rules with the corresponding set - * are disabled. Set RESVD_SET(31) is reserved for the default rule - * and rules that are not deleted by the flush command, - * and CANNOT be disabled. - * Rules in set RESVD_SET can only be deleted explicitly. - */ -static VNET_DEFINE(u_int32_t, set_disable); -static VNET_DEFINE(int, fw_verbose); -static VNET_DEFINE(struct callout, ipfw_timeout); -static VNET_DEFINE(int, verbose_limit); - -#define V_set_disable VNET(set_disable) -#define V_fw_verbose VNET(fw_verbose) -#define V_ipfw_timeout VNET(ipfw_timeout) -#define V_verbose_limit VNET(verbose_limit) - -#ifdef IPFIREWALL_DEFAULT_TO_ACCEPT -static int default_to_accept = 1; -#else -static int default_to_accept; -#endif -static uma_zone_t ipfw_dyn_rule_zone; - -/* - * list of rules for layer 3 - */ -VNET_DEFINE(struct ip_fw_chain, layer3_chain); - -MALLOC_DEFINE(M_IPFW, "IpFw/IpAcct", "IpFw/IpAcct chain's"); -MALLOC_DEFINE(M_IPFW_TBL, "ipfw_tbl", "IpFw tables"); -#define IPFW_NAT_LOADED (ipfw_nat_ptr != NULL) -ipfw_nat_t *ipfw_nat_ptr = NULL; -ipfw_nat_cfg_t *ipfw_nat_cfg_ptr; -ipfw_nat_cfg_t *ipfw_nat_del_ptr; -ipfw_nat_cfg_t *ipfw_nat_get_cfg_ptr; -ipfw_nat_cfg_t *ipfw_nat_get_log_ptr; - -struct table_entry { - struct radix_node rn[2]; - struct sockaddr_in addr, mask; - u_int32_t value; -}; - -static VNET_DEFINE(int, autoinc_step); -#define V_autoinc_step VNET(autoinc_step) -static VNET_DEFINE(int, fw_deny_unknown_exthdrs); -#define V_fw_deny_unknown_exthdrs VNET(fw_deny_unknown_exthdrs) - -extern int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); - -#ifdef SYSCTL_NODE -SYSCTL_NODE(_net_inet_ip, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); -SYSCTL_VNET_PROC(_net_inet_ip_fw, OID_AUTO, enable, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_enable), 0, - ipfw_chg_hook, "I", "Enable ipfw"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, autoinc_step, - CTLFLAG_RW, &VNET_NAME(autoinc_step), 0, - "Rule number auto-increment step"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, one_pass, - CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_one_pass), 0, - "Only do a single pass through ipfw when using dummynet(4)"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose, - CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw_verbose), 0, - "Log matches to ipfw rules"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, verbose_limit, - CTLFLAG_RW, &VNET_NAME(verbose_limit), 0, - "Set upper limit of matches of ipfw rules logged"); -unsigned int dummy_default_rule = IPFW_DEFAULT_RULE; -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, default_rule, CTLFLAG_RD, - &dummy_default_rule, IPFW_DEFAULT_RULE, - "The default/max possible rule number."); -unsigned int dummy_tables_max = IPFW_TABLES_MAX; -SYSCTL_UINT(_net_inet_ip_fw, OID_AUTO, tables_max, CTLFLAG_RD, - &dummy_tables_max, IPFW_TABLES_MAX, - "The maximum number of tables."); -SYSCTL_INT(_net_inet_ip_fw, OID_AUTO, default_to_accept, CTLFLAG_RDTUN, - &default_to_accept, 0, - "Make the default rule accept all packets."); -TUNABLE_INT("net.inet.ip.fw.default_to_accept", &default_to_accept); - -#ifdef INET6 -SYSCTL_DECL(_net_inet6_ip6); -SYSCTL_NODE(_net_inet6_ip6, OID_AUTO, fw, CTLFLAG_RW, 0, "Firewall"); -SYSCTL_VNET_PROC(_net_inet6_ip6_fw, OID_AUTO, enable, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_SECURE3, &VNET_NAME(fw6_enable), 0, - ipfw_chg_hook, "I", "Enable ipfw+6"); -SYSCTL_VNET_INT(_net_inet6_ip6_fw, OID_AUTO, deny_unknown_exthdrs, - CTLFLAG_RW | CTLFLAG_SECURE, &VNET_NAME(fw_deny_unknown_exthdrs), 0, - "Deny packets with unknown IPv6 Extension Headers"); -#endif /* INET6 */ - -#endif /* SYSCTL_NODE */ - -/* - * Description of dynamic rules. - * - * Dynamic rules are stored in lists accessed through a hash table - * (ipfw_dyn_v) whose size is curr_dyn_buckets. This value can - * be modified through the sysctl variable dyn_buckets which is - * updated when the table becomes empty. - * - * XXX currently there is only one list, ipfw_dyn. - * - * When a packet is received, its address fields are first masked - * with the mask defined for the rule, then hashed, then matched - * against the entries in the corresponding list. - * Dynamic rules can be used for different purposes: - * + stateful rules; - * + enforcing limits on the number of sessions; - * + in-kernel NAT (not implemented yet) - * - * The lifetime of dynamic rules is regulated by dyn_*_lifetime, - * measured in seconds and depending on the flags. - * - * The total number of dynamic rules is stored in dyn_count. - * The max number of dynamic rules is dyn_max. When we reach - * the maximum number of rules we do not create anymore. This is - * done to avoid consuming too much memory, but also too much - * time when searching on each packet (ideally, we should try instead - * to put a limit on the length of the list on each bucket...). - * - * Each dynamic rule holds a pointer to the parent ipfw rule so - * we know what action to perform. Dynamic rules are removed when - * the parent rule is deleted. XXX we should make them survive. - * - * There are some limitations with dynamic rules -- we do not - * obey the 'randomized match', and we do not do multiple - * passes through the firewall. XXX check the latter!!! - */ -static VNET_DEFINE(ipfw_dyn_rule **, ipfw_dyn_v); -static VNET_DEFINE(u_int32_t, dyn_buckets); -static VNET_DEFINE(u_int32_t, curr_dyn_buckets); - -#define V_ipfw_dyn_v VNET(ipfw_dyn_v) -#define V_dyn_buckets VNET(dyn_buckets) -#define V_curr_dyn_buckets VNET(curr_dyn_buckets) - -#if defined( __linux__ ) || defined( _WIN32 ) -DEFINE_SPINLOCK(ipfw_dyn_mtx); -#else -static struct mtx ipfw_dyn_mtx; /* mutex guarding dynamic rules */ -#endif /* !__linux__ */ -#define IPFW_DYN_LOCK_INIT() \ - mtx_init(&ipfw_dyn_mtx, "IPFW dynamic rules", NULL, MTX_DEF) -#define IPFW_DYN_LOCK_DESTROY() mtx_destroy(&ipfw_dyn_mtx) -#define IPFW_DYN_LOCK() mtx_lock(&ipfw_dyn_mtx) -#define IPFW_DYN_UNLOCK() mtx_unlock(&ipfw_dyn_mtx) -#define IPFW_DYN_LOCK_ASSERT() mtx_assert(&ipfw_dyn_mtx, MA_OWNED) - -static struct mbuf *send_pkt(struct mbuf *, struct ipfw_flow_id *, - u_int32_t, u_int32_t, int); - - -/* - * Timeouts for various events in handing dynamic rules. - */ -static VNET_DEFINE(u_int32_t, dyn_ack_lifetime); -static VNET_DEFINE(u_int32_t, dyn_syn_lifetime); -static VNET_DEFINE(u_int32_t, dyn_fin_lifetime); -static VNET_DEFINE(u_int32_t, dyn_rst_lifetime); -static VNET_DEFINE(u_int32_t, dyn_udp_lifetime); -static VNET_DEFINE(u_int32_t, dyn_short_lifetime); - -#define V_dyn_ack_lifetime VNET(dyn_ack_lifetime) -#define V_dyn_syn_lifetime VNET(dyn_syn_lifetime) -#define V_dyn_fin_lifetime VNET(dyn_fin_lifetime) -#define V_dyn_rst_lifetime VNET(dyn_rst_lifetime) -#define V_dyn_udp_lifetime VNET(dyn_udp_lifetime) -#define V_dyn_short_lifetime VNET(dyn_short_lifetime) - -/* - * Keepalives are sent if dyn_keepalive is set. They are sent every - * dyn_keepalive_period seconds, in the last dyn_keepalive_interval - * seconds of lifetime of a rule. - * dyn_rst_lifetime and dyn_fin_lifetime should be strictly lower - * than dyn_keepalive_period. - */ - -static VNET_DEFINE(u_int32_t, dyn_keepalive_interval); -static VNET_DEFINE(u_int32_t, dyn_keepalive_period); -static VNET_DEFINE(u_int32_t, dyn_keepalive); - -#define V_dyn_keepalive_interval VNET(dyn_keepalive_interval) -#define V_dyn_keepalive_period VNET(dyn_keepalive_period) -#define V_dyn_keepalive VNET(dyn_keepalive) - -static VNET_DEFINE(u_int32_t, static_count); /* # of static rules */ -static VNET_DEFINE(u_int32_t, static_len); /* bytes of static rules */ -static VNET_DEFINE(u_int32_t, dyn_count); /* # of dynamic rules */ -static VNET_DEFINE(u_int32_t, dyn_max); /* max # of dynamic rules */ - -#define V_static_count VNET(static_count) -#define V_static_len VNET(static_len) -#define V_dyn_count VNET(dyn_count) -#define V_dyn_max VNET(dyn_max) - -#ifdef SYSCTL_NODE -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_buckets, - CTLFLAG_RW, &VNET_NAME(dyn_buckets), 0, - "Number of dyn. buckets"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, curr_dyn_buckets, - CTLFLAG_RD, &VNET_NAME(curr_dyn_buckets), 0, - "Current Number of dyn. buckets"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_count, - CTLFLAG_RD, &VNET_NAME(dyn_count), 0, - "Number of dyn. rules"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_max, - CTLFLAG_RW, &VNET_NAME(dyn_max), 0, - "Max number of dyn. rules"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, static_count, - CTLFLAG_RD, &VNET_NAME(static_count), 0, - "Number of static rules"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_ack_lifetime, - CTLFLAG_RW, &VNET_NAME(dyn_ack_lifetime), 0, - "Lifetime of dyn. rules for acks"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_syn_lifetime, - CTLFLAG_RW, &VNET_NAME(dyn_syn_lifetime), 0, - "Lifetime of dyn. rules for syn"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_fin_lifetime, - CTLFLAG_RW, &VNET_NAME(dyn_fin_lifetime), 0, - "Lifetime of dyn. rules for fin"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_rst_lifetime, - CTLFLAG_RW, &VNET_NAME(dyn_rst_lifetime), 0, - "Lifetime of dyn. rules for rst"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_udp_lifetime, - CTLFLAG_RW, &VNET_NAME(dyn_udp_lifetime), 0, - "Lifetime of dyn. rules for UDP"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_short_lifetime, - CTLFLAG_RW, &VNET_NAME(dyn_short_lifetime), 0, - "Lifetime of dyn. rules for other situations"); -SYSCTL_VNET_INT(_net_inet_ip_fw, OID_AUTO, dyn_keepalive, - CTLFLAG_RW, &VNET_NAME(dyn_keepalive), 0, - "Enable keepalives for dyn. rules"); -#endif /* SYSCTL_NODE */ - -/* - * L3HDR maps an ipv4 pointer into a layer3 header pointer of type T - * Other macros just cast void * into the appropriate type - */ -#define L3HDR(T, ip) ((T *)((u_int32_t *)(ip) + (ip)->ip_hl)) -#define TCP(p) ((struct tcphdr *)(p)) -#define SCTP(p) ((struct sctphdr *)(p)) -#define UDP(p) ((struct udphdr *)(p)) -#define ICMP(p) ((struct icmphdr *)(p)) -#define ICMP6(p) ((struct icmp6_hdr *)(p)) - -static __inline int -icmptype_match(struct icmphdr *icmp, ipfw_insn_u32 *cmd) -{ - int type = icmp->icmp_type; - - return (type <= ICMP_MAXTYPE && (cmd->d[0] & (1<icmp_type; - - return (type <= ICMP_MAXTYPE && (TT & (1<arg1 or cmd->d[0]. - * - * We scan options and store the bits we find set. We succeed if - * - * (want_set & ~bits) == 0 && (want_clear & ~bits) == want_clear - * - * The code is sometimes optimized not to store additional variables. - */ - -static int -flags_match(ipfw_insn *cmd, u_int8_t bits) -{ - u_char want_clear; - bits = ~bits; - - if ( ((cmd->arg1 & 0xff) & bits) != 0) - return 0; /* some bits we want set were clear */ - want_clear = (cmd->arg1 >> 8) & 0xff; - if ( (want_clear & bits) != want_clear) - return 0; /* some bits we want clear were set */ - return 1; -} - -static int -ipopts_match(struct ip *ip, ipfw_insn *cmd) -{ - int optlen, bits = 0; - u_char *cp = (u_char *)(ip + 1); - int x = (ip->ip_hl << 2) - sizeof (struct ip); - - for (; x > 0; x -= optlen, cp += optlen) { - int opt = cp[IPOPT_OPTVAL]; - - if (opt == IPOPT_EOL) - break; - if (opt == IPOPT_NOP) - optlen = 1; - else { - optlen = cp[IPOPT_OLEN]; - if (optlen <= 0 || optlen > x) - return 0; /* invalid or truncated */ - } - switch (opt) { - - default: - break; - - case IPOPT_LSRR: - bits |= IP_FW_IPOPT_LSRR; - break; - - case IPOPT_SSRR: - bits |= IP_FW_IPOPT_SSRR; - break; - - case IPOPT_RR: - bits |= IP_FW_IPOPT_RR; - break; - - case IPOPT_TS: - bits |= IP_FW_IPOPT_TS; - break; - } - } - return (flags_match(cmd, bits)); -} - -static int -tcpopts_match(struct tcphdr *tcp, ipfw_insn *cmd) -{ - int optlen, bits = 0; - u_char *cp = (u_char *)(tcp + 1); - int x = (tcp->th_off << 2) - sizeof(struct tcphdr); - - for (; x > 0; x -= optlen, cp += optlen) { - int opt = cp[0]; - if (opt == TCPOPT_EOL) - break; - if (opt == TCPOPT_NOP) - optlen = 1; - else { - optlen = cp[1]; - if (optlen <= 0) - break; - } - - switch (opt) { - - default: - break; - - case TCPOPT_MAXSEG: - bits |= IP_FW_TCPOPT_MSS; - break; - - case TCPOPT_WINDOW: - bits |= IP_FW_TCPOPT_WINDOW; - break; - - case TCPOPT_SACK_PERMITTED: - case TCPOPT_SACK: - bits |= IP_FW_TCPOPT_SACK; - break; - - case TCPOPT_TIMESTAMP: - bits |= IP_FW_TCPOPT_TS; - break; - - } - } - return (flags_match(cmd, bits)); -} - -static int -iface_match(struct ifnet *ifp, ipfw_insn_if *cmd) -{ - if (ifp == NULL) /* no iface with this packet, match fails */ - return 0; - /* Check by name or by IP address */ - if (cmd->name[0] != '\0') { /* match by name */ - /* Check name */ - if (cmd->p.glob) { - if (fnmatch(cmd->name, ifp->if_xname, 0) == 0) - return(1); - } else { - if (strncmp(ifp->if_xname, cmd->name, IFNAMSIZ) == 0) - return(1); - } - } else { -#if !defined( __linux__ ) && !defined( _WIN32 ) - struct ifaddr *ia; - - if_addr_rlock(ifp); - TAILQ_FOREACH(ia, &ifp->if_addrhead, ifa_link) { - if (ia->ifa_addr->sa_family != AF_INET) - continue; - if (cmd->p.ip.s_addr == ((struct sockaddr_in *) - (ia->ifa_addr))->sin_addr.s_addr) { - if_addr_runlock(ifp); - return(1); /* match */ - } - } - if_addr_runlock(ifp); -#endif - } - return(0); /* no match, fail ... */ -} - -#if !defined( __linux__ ) && !defined( _WIN32 ) -/* - * The verify_path function checks if a route to the src exists and - * if it is reachable via ifp (when provided). - * - * The 'verrevpath' option checks that the interface that an IP packet - * arrives on is the same interface that traffic destined for the - * packet's source address would be routed out of. The 'versrcreach' - * option just checks that the source address is reachable via any route - * (except default) in the routing table. These two are a measure to block - * forged packets. This is also commonly known as "anti-spoofing" or Unicast - * Reverse Path Forwarding (Unicast RFP) in Cisco-ese. The name of the knobs - * is purposely reminiscent of the Cisco IOS command, - * - * ip verify unicast reverse-path - * ip verify unicast source reachable-via any - * - * which implements the same functionality. But note that syntax is - * misleading. The check may be performed on all IP packets whether unicast, - * multicast, or broadcast. - */ -static int -verify_path(struct in_addr src, struct ifnet *ifp, u_int fib) -{ - struct route ro; - struct sockaddr_in *dst; - - bzero(&ro, sizeof(ro)); - - dst = (struct sockaddr_in *)&(ro.ro_dst); - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = src; - in_rtalloc_ign(&ro, 0, fib); - - if (ro.ro_rt == NULL) - return 0; - - /* - * If ifp is provided, check for equality with rtentry. - * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, - * in order to pass packets injected back by if_simloop(): - * if useloopback == 1 routing entry (via lo0) for our own address - * may exist, so we need to handle routing assymetry. - */ - if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { - RTFREE(ro.ro_rt); - return 0; - } - - /* if no ifp provided, check if rtentry is not default route */ - if (ifp == NULL && - satosin(rt_key(ro.ro_rt))->sin_addr.s_addr == INADDR_ANY) { - RTFREE(ro.ro_rt); - return 0; - } - - /* or if this is a blackhole/reject route */ - if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { - RTFREE(ro.ro_rt); - return 0; - } - - /* found valid route */ - RTFREE(ro.ro_rt); - return 1; -} -#endif - -#ifdef INET6 -/* - * ipv6 specific rules here... - */ -static __inline int -icmp6type_match (int type, ipfw_insn_u32 *cmd) -{ - return (type <= ICMP6_MAXTYPE && (cmd->d[type/32] & (1<<(type%32)) ) ); -} - -static int -flow6id_match( int curr_flow, ipfw_insn_u32 *cmd ) -{ - int i; - for (i=0; i <= cmd->o.arg1; ++i ) - if (curr_flow == cmd->d[i] ) - return 1; - return 0; -} - -/* support for IP6_*_ME opcodes */ -static int -search_ip6_addr_net (struct in6_addr * ip6_addr) -{ - struct ifnet *mdc; - struct ifaddr *mdc2; - struct in6_ifaddr *fdm; - struct in6_addr copia; - - TAILQ_FOREACH(mdc, &V_ifnet, if_link) { - if_addr_rlock(mdc); - TAILQ_FOREACH(mdc2, &mdc->if_addrhead, ifa_link) { - if (mdc2->ifa_addr->sa_family == AF_INET6) { - fdm = (struct in6_ifaddr *)mdc2; - copia = fdm->ia_addr.sin6_addr; - /* need for leaving scope_id in the sock_addr */ - in6_clearscope(&copia); - if (IN6_ARE_ADDR_EQUAL(ip6_addr, &copia)) { - if_addr_runlock(mdc); - return 1; - } - } - } - if_addr_runlock(mdc); - } - return 0; -} - -static int -verify_path6(struct in6_addr *src, struct ifnet *ifp) -{ - struct route_in6 ro; - struct sockaddr_in6 *dst; - - bzero(&ro, sizeof(ro)); - - dst = (struct sockaddr_in6 * )&(ro.ro_dst); - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(*dst); - dst->sin6_addr = *src; - /* XXX MRT 0 for ipv6 at this time */ - rtalloc_ign((struct route *)&ro, 0); - - if (ro.ro_rt == NULL) - return 0; - - /* - * if ifp is provided, check for equality with rtentry - * We should use rt->rt_ifa->ifa_ifp, instead of rt->rt_ifp, - * to support the case of sending packets to an address of our own. - * (where the former interface is the first argument of if_simloop() - * (=ifp), the latter is lo0) - */ - if (ifp != NULL && ro.ro_rt->rt_ifa->ifa_ifp != ifp) { - RTFREE(ro.ro_rt); - return 0; - } - - /* if no ifp provided, check if rtentry is not default route */ - if (ifp == NULL && - IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(ro.ro_rt))->sin6_addr)) { - RTFREE(ro.ro_rt); - return 0; - } - - /* or if this is a blackhole/reject route */ - if (ifp == NULL && ro.ro_rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) { - RTFREE(ro.ro_rt); - return 0; - } - - /* found valid route */ - RTFREE(ro.ro_rt); - return 1; - -} -static __inline int -hash_packet6(struct ipfw_flow_id *id) -{ - u_int32_t i; - i = (id->dst_ip6.__u6_addr.__u6_addr32[2]) ^ - (id->dst_ip6.__u6_addr.__u6_addr32[3]) ^ - (id->src_ip6.__u6_addr.__u6_addr32[2]) ^ - (id->src_ip6.__u6_addr.__u6_addr32[3]) ^ - (id->dst_port) ^ (id->src_port); - return i; -} - -static int -is_icmp6_query(int icmp6_type) -{ - if ((icmp6_type <= ICMP6_MAXTYPE) && - (icmp6_type == ICMP6_ECHO_REQUEST || - icmp6_type == ICMP6_MEMBERSHIP_QUERY || - icmp6_type == ICMP6_WRUREQUEST || - icmp6_type == ICMP6_FQDN_QUERY || - icmp6_type == ICMP6_NI_QUERY)) - return (1); - - return (0); -} - -static void -send_reject6(struct ip_fw_args *args, int code, u_int hlen, struct ip6_hdr *ip6) -{ - struct mbuf *m; - - m = args->m; - if (code == ICMP6_UNREACH_RST && args->f_id.proto == IPPROTO_TCP) { - struct tcphdr *tcp; - tcp = (struct tcphdr *)((char *)ip6 + hlen); - - if ((tcp->th_flags & TH_RST) == 0) { - struct mbuf *m0; - m0 = send_pkt(args->m, &(args->f_id), - ntohl(tcp->th_seq), ntohl(tcp->th_ack), - tcp->th_flags | TH_RST); - if (m0 != NULL) - ip6_output(m0, NULL, NULL, 0, NULL, NULL, - NULL); - } - m_freem(m); - } else if (code != ICMP6_UNREACH_RST) { /* Send an ICMPv6 unreach. */ -#if 0 - /* - * Unlike above, the mbufs need to line up with the ip6 hdr, - * as the contents are read. We need to m_adj() the - * needed amount. - * The mbuf will however be thrown away so we can adjust it. - * Remember we did an m_pullup on it already so we - * can make some assumptions about contiguousness. - */ - if (args->L3offset) - m_adj(m, args->L3offset); -#endif - icmp6_error(m, ICMP6_DST_UNREACH, code, 0); - } else - m_freem(m); - - args->m = NULL; -} - -#endif /* INET6 */ - -/* counter for ipfw_log(NULL...) */ -static VNET_DEFINE(u_int64_t, norule_counter); -#define V_norule_counter VNET(norule_counter) - -#define SNPARGS(buf, len) buf + len, sizeof(buf) > len ? sizeof(buf) - len : 0 -#define SNP(buf) buf, sizeof(buf) - -/* - * We enter here when we have a rule with O_LOG. - * XXX this function alone takes about 2Kbytes of code! - */ -static void -ipfw_log(struct ip_fw *f, u_int hlen, struct ip_fw_args *args, - struct mbuf *m, struct ifnet *oif, u_short offset, uint32_t tablearg, - struct ip *ip) -{ - struct ether_header *eh = args->eh; - char *action; - int limit_reached = 0; - char action2[40], proto[128], fragment[32]; - - fragment[0] = '\0'; - proto[0] = '\0'; - - if (f == NULL) { /* bogus pkt */ - if (V_verbose_limit != 0 && V_norule_counter >= V_verbose_limit) - return; - V_norule_counter++; - if (V_norule_counter == V_verbose_limit) - limit_reached = V_verbose_limit; - action = "Refuse"; - } else { /* O_LOG is the first action, find the real one */ - ipfw_insn *cmd = ACTION_PTR(f); - ipfw_insn_log *l = (ipfw_insn_log *)cmd; - - if (l->max_log != 0 && l->log_left == 0) - return; - l->log_left--; - if (l->log_left == 0) - limit_reached = l->max_log; - cmd += F_LEN(cmd); /* point to first action */ - if (cmd->opcode == O_ALTQ) { - ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; - - snprintf(SNPARGS(action2, 0), "Altq %d", - altq->qid); - cmd += F_LEN(cmd); - } - if (cmd->opcode == O_PROB) - cmd += F_LEN(cmd); - - if (cmd->opcode == O_TAG) - cmd += F_LEN(cmd); - - action = action2; - switch (cmd->opcode) { - case O_DENY: - action = "Deny"; - break; - - case O_REJECT: - if (cmd->arg1==ICMP_REJECT_RST) - action = "Reset"; - else if (cmd->arg1==ICMP_UNREACH_HOST) - action = "Reject"; - else - snprintf(SNPARGS(action2, 0), "Unreach %d", - cmd->arg1); - break; - - case O_UNREACH6: - if (cmd->arg1==ICMP6_UNREACH_RST) - action = "Reset"; - else - snprintf(SNPARGS(action2, 0), "Unreach %d", - cmd->arg1); - break; - - case O_ACCEPT: - action = "Accept"; - break; - case O_COUNT: - action = "Count"; - break; - case O_DIVERT: - snprintf(SNPARGS(action2, 0), "Divert %d", - cmd->arg1); - break; - case O_TEE: - snprintf(SNPARGS(action2, 0), "Tee %d", - cmd->arg1); - break; - case O_SETFIB: - snprintf(SNPARGS(action2, 0), "SetFib %d", - cmd->arg1); - break; - case O_SKIPTO: - snprintf(SNPARGS(action2, 0), "SkipTo %d", - cmd->arg1); - break; - case O_PIPE: - snprintf(SNPARGS(action2, 0), "Pipe %d", - cmd->arg1); - break; - case O_QUEUE: - snprintf(SNPARGS(action2, 0), "Queue %d", - cmd->arg1); - break; - case O_FORWARD_IP: { - ipfw_insn_sa *sa = (ipfw_insn_sa *)cmd; - int len; - struct in_addr dummyaddr; - if (sa->sa.sin_addr.s_addr == INADDR_ANY) - dummyaddr.s_addr = htonl(tablearg); - else - dummyaddr.s_addr = sa->sa.sin_addr.s_addr; - - len = snprintf(SNPARGS(action2, 0), "Forward to %s", - inet_ntoa(dummyaddr)); - - if (sa->sa.sin_port) - snprintf(SNPARGS(action2, len), ":%d", - sa->sa.sin_port); - } - break; - case O_NETGRAPH: - snprintf(SNPARGS(action2, 0), "Netgraph %d", - cmd->arg1); - break; - case O_NGTEE: - snprintf(SNPARGS(action2, 0), "Ngtee %d", - cmd->arg1); - break; - case O_NAT: - action = "Nat"; - break; - case O_REASS: - action = "Reass"; - break; - default: - action = "UNKNOWN"; - break; - } - } - - if (hlen == 0) { /* non-ip */ - snprintf(SNPARGS(proto, 0), "MAC"); - - } else { - int len; -#ifdef INET6 - char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; -#else - char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; -#endif - struct icmphdr *icmp; - struct tcphdr *tcp; - struct udphdr *udp; -#ifdef INET6 - struct ip6_hdr *ip6 = NULL; - struct icmp6_hdr *icmp6; -#endif - src[0] = '\0'; - dst[0] = '\0'; -#ifdef INET6 - if (IS_IP6_FLOW_ID(&(args->f_id))) { - char ip6buf[INET6_ADDRSTRLEN]; - snprintf(src, sizeof(src), "[%s]", - ip6_sprintf(ip6buf, &args->f_id.src_ip6)); - snprintf(dst, sizeof(dst), "[%s]", - ip6_sprintf(ip6buf, &args->f_id.dst_ip6)); - - ip6 = (struct ip6_hdr *)ip; - tcp = (struct tcphdr *)(((char *)ip) + hlen); - udp = (struct udphdr *)(((char *)ip) + hlen); - } else -#endif - { - tcp = L3HDR(struct tcphdr, ip); - udp = L3HDR(struct udphdr, ip); - - inet_ntoa_r(ip->ip_src, src); - inet_ntoa_r(ip->ip_dst, dst); - } - - switch (args->f_id.proto) { - case IPPROTO_TCP: - len = snprintf(SNPARGS(proto, 0), "TCP %s", src); - if (offset == 0) - snprintf(SNPARGS(proto, len), ":%d %s:%d", - ntohs(tcp->th_sport), - dst, - ntohs(tcp->th_dport)); - else - snprintf(SNPARGS(proto, len), " %s", dst); - break; - - case IPPROTO_UDP: - len = snprintf(SNPARGS(proto, 0), "UDP %s", src); - if (offset == 0) - snprintf(SNPARGS(proto, len), ":%d %s:%d", - ntohs(udp->uh_sport), - dst, - ntohs(udp->uh_dport)); - else - snprintf(SNPARGS(proto, len), " %s", dst); - break; - - case IPPROTO_ICMP: - icmp = L3HDR(struct icmphdr, ip); - if (offset == 0) - len = snprintf(SNPARGS(proto, 0), - "ICMP:%u.%u ", - icmp->icmp_type, icmp->icmp_code); - else - len = snprintf(SNPARGS(proto, 0), "ICMP "); - len += snprintf(SNPARGS(proto, len), "%s", src); - snprintf(SNPARGS(proto, len), " %s", dst); - break; -#ifdef INET6 - case IPPROTO_ICMPV6: - icmp6 = (struct icmp6_hdr *)(((char *)ip) + hlen); - if (offset == 0) - len = snprintf(SNPARGS(proto, 0), - "ICMPv6:%u.%u ", - icmp6->icmp6_type, icmp6->icmp6_code); - else - len = snprintf(SNPARGS(proto, 0), "ICMPv6 "); - len += snprintf(SNPARGS(proto, len), "%s", src); - snprintf(SNPARGS(proto, len), " %s", dst); - break; -#endif - default: - len = snprintf(SNPARGS(proto, 0), "P:%d %s", - args->f_id.proto, src); - snprintf(SNPARGS(proto, len), " %s", dst); - break; - } - -#ifdef INET6 - if (IS_IP6_FLOW_ID(&(args->f_id))) { - if (offset & (IP6F_OFF_MASK | IP6F_MORE_FRAG)) - snprintf(SNPARGS(fragment, 0), - " (frag %08x:%d@%d%s)", - args->f_id.frag_id6, - ntohs(ip6->ip6_plen) - hlen, - ntohs(offset & IP6F_OFF_MASK) << 3, - (offset & IP6F_MORE_FRAG) ? "+" : ""); - } else -#endif - { - int ip_off, ip_len; - if (1 || eh != NULL) { /* layer 2 packets are as on the wire */ - ip_off = ntohs(ip->ip_off); - ip_len = ntohs(ip->ip_len); - } else { - ip_off = ip->ip_off; - ip_len = ip->ip_len; - } - if (ip_off & (IP_MF | IP_OFFMASK)) - snprintf(SNPARGS(fragment, 0), - " (frag %d:%d@%d%s)", - ntohs(ip->ip_id), ip_len - (ip->ip_hl << 2), - offset << 3, - (ip_off & IP_MF) ? "+" : ""); - } - } - if (oif || m->m_pkthdr.rcvif) - log(LOG_SECURITY | LOG_INFO, - "ipfw: %d %s %s %s via %s%s\n", - f ? f->rulenum : -1, - action, proto, oif ? "out" : "in", - oif ? oif->if_xname : m->m_pkthdr.rcvif->if_xname, - fragment); - else - log(LOG_SECURITY | LOG_INFO, - "ipfw: %d %s %s [no if info]%s\n", - f ? f->rulenum : -1, - action, proto, fragment); - if (limit_reached) - log(LOG_SECURITY | LOG_NOTICE, - "ipfw: limit %d reached on entry %d\n", - limit_reached, f ? f->rulenum : -1); -} - -/* - * IMPORTANT: the hash function for dynamic rules must be commutative - * in source and destination (ip,port), because rules are bidirectional - * and we want to find both in the same bucket. - */ -static __inline int -hash_packet(struct ipfw_flow_id *id) -{ - u_int32_t i; - -#ifdef INET6 - if (IS_IP6_FLOW_ID(id)) - i = hash_packet6(id); - else -#endif /* INET6 */ - i = (id->dst_ip) ^ (id->src_ip) ^ (id->dst_port) ^ (id->src_port); - i &= (V_curr_dyn_buckets - 1); - return i; -} - -static __inline void -unlink_dyn_rule_print(struct ipfw_flow_id *id) -{ - struct in_addr da; -#ifdef INET6 - char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; -#else - char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; -#endif - -#ifdef INET6 - if (IS_IP6_FLOW_ID(id)) { - ip6_sprintf(src, &id->src_ip6); - ip6_sprintf(dst, &id->dst_ip6); - } else -#endif - { - da.s_addr = htonl(id->src_ip); - inet_ntoa_r(da, src); - da.s_addr = htonl(id->dst_ip); - inet_ntoa_r(da, dst); - } - printf("ipfw: unlink entry %s %d -> %s %d, %d left\n", - src, id->src_port, dst, id->dst_port, V_dyn_count - 1); -} - -/** - * unlink a dynamic rule from a chain. prev is a pointer to - * the previous one, q is a pointer to the rule to delete, - * head is a pointer to the head of the queue. - * Modifies q and potentially also head. - */ -#define UNLINK_DYN_RULE(prev, head, q) { \ - ipfw_dyn_rule *old_q = q; \ - \ - /* remove a refcount to the parent */ \ - if (q->dyn_type == O_LIMIT) \ - q->parent->count--; \ - DEB(unlink_dyn_rule_print(&q->id);) \ - if (prev != NULL) \ - prev->next = q = q->next; \ - else \ - head = q = q->next; \ - V_dyn_count--; \ - uma_zfree(ipfw_dyn_rule_zone, old_q); } - -#define TIME_LEQ(a,b) ((int)((a)-(b)) <= 0) - -/** - * Remove dynamic rules pointing to "rule", or all of them if rule == NULL. - * - * If keep_me == NULL, rules are deleted even if not expired, - * otherwise only expired rules are removed. - * - * The value of the second parameter is also used to point to identify - * a rule we absolutely do not want to remove (e.g. because we are - * holding a reference to it -- this is the case with O_LIMIT_PARENT - * rules). The pointer is only used for comparison, so any non-null - * value will do. - */ -static void -remove_dyn_rule(struct ip_fw *rule, ipfw_dyn_rule *keep_me) -{ - static u_int32_t last_remove = 0; - -#define FORCE (keep_me == NULL) - - ipfw_dyn_rule *prev, *q; - int i, pass = 0, max_pass = 0; - - IPFW_DYN_LOCK_ASSERT(); - - if (V_ipfw_dyn_v == NULL || V_dyn_count == 0) - return; - /* do not expire more than once per second, it is useless */ - if (!FORCE && last_remove == time_uptime) - return; - last_remove = time_uptime; - - /* - * because O_LIMIT refer to parent rules, during the first pass only - * remove child and mark any pending LIMIT_PARENT, and remove - * them in a second pass. - */ -next_pass: - for (i = 0 ; i < V_curr_dyn_buckets ; i++) { - for (prev=NULL, q = V_ipfw_dyn_v[i] ; q ; ) { - /* - * Logic can become complex here, so we split tests. - */ - if (q == keep_me) - goto next; - if (rule != NULL && rule != q->rule) - goto next; /* not the one we are looking for */ - if (q->dyn_type == O_LIMIT_PARENT) { - /* - * handle parent in the second pass, - * record we need one. - */ - max_pass = 1; - if (pass == 0) - goto next; - if (FORCE && q->count != 0 ) { - /* XXX should not happen! */ - printf("ipfw: OUCH! cannot remove rule," - " count %d\n", q->count); - } - } else { - if (!FORCE && - !TIME_LEQ( q->expire, time_uptime )) - goto next; - } - if (q->dyn_type != O_LIMIT_PARENT || !q->count) { - UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); - continue; - } -next: - prev=q; - q=q->next; - } - } - if (pass++ < max_pass) - goto next_pass; -} - - -/** - * lookup a dynamic rule. - */ -static ipfw_dyn_rule * -lookup_dyn_rule_locked(struct ipfw_flow_id *pkt, int *match_direction, - struct tcphdr *tcp) -{ - /* - * stateful ipfw extensions. - * Lookup into dynamic session queue - */ -#define MATCH_REVERSE 0 -#define MATCH_FORWARD 1 -#define MATCH_NONE 2 -#define MATCH_UNKNOWN 3 - int i, dir = MATCH_NONE; - ipfw_dyn_rule *prev, *q=NULL; - - IPFW_DYN_LOCK_ASSERT(); - - if (V_ipfw_dyn_v == NULL) - goto done; /* not found */ - i = hash_packet( pkt ); - for (prev=NULL, q = V_ipfw_dyn_v[i] ; q != NULL ; ) { - if (q->dyn_type == O_LIMIT_PARENT && q->count) - goto next; - if (TIME_LEQ( q->expire, time_uptime)) { /* expire entry */ - UNLINK_DYN_RULE(prev, V_ipfw_dyn_v[i], q); - continue; - } - if (pkt->proto == q->id.proto && - q->dyn_type != O_LIMIT_PARENT) { - if (IS_IP6_FLOW_ID(pkt)) { - if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), - &(q->id.src_ip6)) && - IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), - &(q->id.dst_ip6)) && - pkt->src_port == q->id.src_port && - pkt->dst_port == q->id.dst_port ) { - dir = MATCH_FORWARD; - break; - } - if (IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), - &(q->id.dst_ip6)) && - IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), - &(q->id.src_ip6)) && - pkt->src_port == q->id.dst_port && - pkt->dst_port == q->id.src_port ) { - dir = MATCH_REVERSE; - break; - } - } else { - if (pkt->src_ip == q->id.src_ip && - pkt->dst_ip == q->id.dst_ip && - pkt->src_port == q->id.src_port && - pkt->dst_port == q->id.dst_port ) { - dir = MATCH_FORWARD; - break; - } - if (pkt->src_ip == q->id.dst_ip && - pkt->dst_ip == q->id.src_ip && - pkt->src_port == q->id.dst_port && - pkt->dst_port == q->id.src_port ) { - dir = MATCH_REVERSE; - break; - } - } - } -next: - prev = q; - q = q->next; - } - if (q == NULL) - goto done; /* q = NULL, not found */ - - if ( prev != NULL) { /* found and not in front */ - prev->next = q->next; - q->next = V_ipfw_dyn_v[i]; - V_ipfw_dyn_v[i] = q; - } - if (pkt->proto == IPPROTO_TCP) { /* update state according to flags */ - u_char flags = pkt->flags & (TH_FIN|TH_SYN|TH_RST); - -#define BOTH_SYN (TH_SYN | (TH_SYN << 8)) -#define BOTH_FIN (TH_FIN | (TH_FIN << 8)) - q->state |= (dir == MATCH_FORWARD ) ? flags : (flags << 8); - switch (q->state) { - case TH_SYN: /* opening */ - q->expire = time_uptime + V_dyn_syn_lifetime; - break; - - case BOTH_SYN: /* move to established */ - case BOTH_SYN | TH_FIN : /* one side tries to close */ - case BOTH_SYN | (TH_FIN << 8) : - if (tcp) { -#define _SEQ_GE(a,b) ((int)(a) - (int)(b) >= 0) - u_int32_t ack = ntohl(tcp->th_ack); - if (dir == MATCH_FORWARD) { - if (q->ack_fwd == 0 || _SEQ_GE(ack, q->ack_fwd)) - q->ack_fwd = ack; - else { /* ignore out-of-sequence */ - break; - } - } else { - if (q->ack_rev == 0 || _SEQ_GE(ack, q->ack_rev)) - q->ack_rev = ack; - else { /* ignore out-of-sequence */ - break; - } - } - } - q->expire = time_uptime + V_dyn_ack_lifetime; - break; - - case BOTH_SYN | BOTH_FIN: /* both sides closed */ - if (V_dyn_fin_lifetime >= V_dyn_keepalive_period) - V_dyn_fin_lifetime = V_dyn_keepalive_period - 1; - q->expire = time_uptime + V_dyn_fin_lifetime; - break; - - default: -#if 0 - /* - * reset or some invalid combination, but can also - * occur if we use keep-state the wrong way. - */ - if ( (q->state & ((TH_RST << 8)|TH_RST)) == 0) - printf("invalid state: 0x%x\n", q->state); -#endif - if (V_dyn_rst_lifetime >= V_dyn_keepalive_period) - V_dyn_rst_lifetime = V_dyn_keepalive_period - 1; - q->expire = time_uptime + V_dyn_rst_lifetime; - break; - } - } else if (pkt->proto == IPPROTO_UDP) { - q->expire = time_uptime + V_dyn_udp_lifetime; - } else { - /* other protocols */ - q->expire = time_uptime + V_dyn_short_lifetime; - } -done: - if (match_direction) - *match_direction = dir; - return q; -} - -static ipfw_dyn_rule * -lookup_dyn_rule(struct ipfw_flow_id *pkt, int *match_direction, - struct tcphdr *tcp) -{ - ipfw_dyn_rule *q; - - IPFW_DYN_LOCK(); - q = lookup_dyn_rule_locked(pkt, match_direction, tcp); - if (q == NULL) - IPFW_DYN_UNLOCK(); - /* NB: return table locked when q is not NULL */ - return q; -} - -static void -realloc_dynamic_table(void) -{ - IPFW_DYN_LOCK_ASSERT(); - - /* - * Try reallocation, make sure we have a power of 2 and do - * not allow more than 64k entries. In case of overflow, - * default to 1024. - */ - - if (V_dyn_buckets > 65536) - V_dyn_buckets = 1024; - if ((V_dyn_buckets & (V_dyn_buckets-1)) != 0) { /* not a power of 2 */ - V_dyn_buckets = V_curr_dyn_buckets; /* reset */ - return; - } - V_curr_dyn_buckets = V_dyn_buckets; - if (V_ipfw_dyn_v != NULL) - free(V_ipfw_dyn_v, M_IPFW); - for (;;) { - V_ipfw_dyn_v = malloc(V_curr_dyn_buckets * sizeof(ipfw_dyn_rule *), - M_IPFW, M_NOWAIT | M_ZERO); - if (V_ipfw_dyn_v != NULL || V_curr_dyn_buckets <= 2) - break; - V_curr_dyn_buckets /= 2; - } -} - -/** - * Install state of type 'type' for a dynamic session. - * The hash table contains two type of rules: - * - regular rules (O_KEEP_STATE) - * - rules for sessions with limited number of sess per user - * (O_LIMIT). When they are created, the parent is - * increased by 1, and decreased on delete. In this case, - * the third parameter is the parent rule and not the chain. - * - "parent" rules for the above (O_LIMIT_PARENT). - */ -static ipfw_dyn_rule * -add_dyn_rule(struct ipfw_flow_id *id, u_int8_t dyn_type, struct ip_fw *rule) -{ - ipfw_dyn_rule *r; - int i; - - IPFW_DYN_LOCK_ASSERT(); - - if (V_ipfw_dyn_v == NULL || - (V_dyn_count == 0 && V_dyn_buckets != V_curr_dyn_buckets)) { - realloc_dynamic_table(); - if (V_ipfw_dyn_v == NULL) - return NULL; /* failed ! */ - } - i = hash_packet(id); - - r = uma_zalloc(ipfw_dyn_rule_zone, M_NOWAIT | M_ZERO); - if (r == NULL) { - printf ("ipfw: sorry cannot allocate state\n"); - return NULL; - } - - /* increase refcount on parent, and set pointer */ - if (dyn_type == O_LIMIT) { - ipfw_dyn_rule *parent = (ipfw_dyn_rule *)rule; - if ( parent->dyn_type != O_LIMIT_PARENT) - panic("invalid parent"); - parent->count++; - r->parent = parent; - rule = parent->rule; - } - - r->id = *id; - r->expire = time_uptime + V_dyn_syn_lifetime; - r->rule = rule; - r->dyn_type = dyn_type; - r->pcnt = r->bcnt = 0; - r->count = 0; - - r->bucket = i; - r->next = V_ipfw_dyn_v[i]; - V_ipfw_dyn_v[i] = r; - V_dyn_count++; - DEB({ - struct in_addr da; -#ifdef INET6 - char src[INET6_ADDRSTRLEN]; - char dst[INET6_ADDRSTRLEN]; -#else - char src[INET_ADDRSTRLEN]; - char dst[INET_ADDRSTRLEN]; -#endif - -#ifdef INET6 - if (IS_IP6_FLOW_ID(&(r->id))) { - ip6_sprintf(src, &r->id.src_ip6); - ip6_sprintf(dst, &r->id.dst_ip6); - } else -#endif - { - da.s_addr = htonl(r->id.src_ip); - inet_ntoa_r(da, src); - da.s_addr = htonl(r->id.dst_ip); - inet_ntoa_r(da, dst); - } - printf("ipfw: add dyn entry ty %d %s %d -> %s %d, total %d\n", - dyn_type, src, r->id.src_port, dst, r->id.dst_port, - V_dyn_count); - }) - return r; -} - -/** - * lookup dynamic parent rule using pkt and rule as search keys. - * If the lookup fails, then install one. - */ -static ipfw_dyn_rule * -lookup_dyn_parent(struct ipfw_flow_id *pkt, struct ip_fw *rule) -{ - ipfw_dyn_rule *q; - int i; - - IPFW_DYN_LOCK_ASSERT(); - - if (V_ipfw_dyn_v) { - int is_v6 = IS_IP6_FLOW_ID(pkt); - i = hash_packet( pkt ); - for (q = V_ipfw_dyn_v[i] ; q != NULL ; q=q->next) - if (q->dyn_type == O_LIMIT_PARENT && - rule== q->rule && - pkt->proto == q->id.proto && - pkt->src_port == q->id.src_port && - pkt->dst_port == q->id.dst_port && - ( - (is_v6 && - IN6_ARE_ADDR_EQUAL(&(pkt->src_ip6), - &(q->id.src_ip6)) && - IN6_ARE_ADDR_EQUAL(&(pkt->dst_ip6), - &(q->id.dst_ip6))) || - (!is_v6 && - pkt->src_ip == q->id.src_ip && - pkt->dst_ip == q->id.dst_ip) - ) - ) { - q->expire = time_uptime + V_dyn_short_lifetime; - DEB(printf("ipfw: lookup_dyn_parent found 0x%p\n",q);) - return q; - } - } - return add_dyn_rule(pkt, O_LIMIT_PARENT, rule); -} - -/** - * Install dynamic state for rule type cmd->o.opcode - * - * Returns 1 (failure) if state is not installed because of errors or because - * session limitations are enforced. - */ -static int -install_state(struct ip_fw *rule, ipfw_insn_limit *cmd, - struct ip_fw_args *args, uint32_t tablearg) -{ - static int last_log; - ipfw_dyn_rule *q; - struct in_addr da; -#ifdef INET6 - char src[INET6_ADDRSTRLEN + 2], dst[INET6_ADDRSTRLEN + 2]; -#else - char src[INET_ADDRSTRLEN], dst[INET_ADDRSTRLEN]; -#endif - - src[0] = '\0'; - dst[0] = '\0'; - - IPFW_DYN_LOCK(); - - DEB( -#ifdef INET6 - if (IS_IP6_FLOW_ID(&(args->f_id))) { - ip6_sprintf(src, &args->f_id.src_ip6); - ip6_sprintf(dst, &args->f_id.dst_ip6); - } else -#endif - { - da.s_addr = htonl(args->f_id.src_ip); - inet_ntoa_r(da, src); - da.s_addr = htonl(args->f_id.dst_ip); - inet_ntoa_r(da, dst); - } - printf("ipfw: %s: type %d %s %u -> %s %u\n", - __func__, cmd->o.opcode, src, args->f_id.src_port, - dst, args->f_id.dst_port); - src[0] = '\0'; - dst[0] = '\0'; - ) - - q = lookup_dyn_rule_locked(&args->f_id, NULL, NULL); - - if (q != NULL) { /* should never occur */ - if (last_log != time_uptime) { - last_log = time_uptime; - printf("ipfw: %s: entry already present, done\n", - __func__); - } - IPFW_DYN_UNLOCK(); - return (0); - } - - if (V_dyn_count >= V_dyn_max) - /* Run out of slots, try to remove any expired rule. */ - remove_dyn_rule(NULL, (ipfw_dyn_rule *)1); - - if (V_dyn_count >= V_dyn_max) { - if (last_log != time_uptime) { - last_log = time_uptime; - printf("ipfw: %s: Too many dynamic rules\n", __func__); - } - IPFW_DYN_UNLOCK(); - return (1); /* cannot install, notify caller */ - } - - switch (cmd->o.opcode) { - case O_KEEP_STATE: /* bidir rule */ - add_dyn_rule(&args->f_id, O_KEEP_STATE, rule); - break; - - case O_LIMIT: { /* limit number of sessions */ - struct ipfw_flow_id id; - ipfw_dyn_rule *parent; - uint32_t conn_limit; - uint16_t limit_mask = cmd->limit_mask; - - conn_limit = (cmd->conn_limit == IP_FW_TABLEARG) ? - tablearg : cmd->conn_limit; - - DEB( - if (cmd->conn_limit == IP_FW_TABLEARG) - printf("ipfw: %s: O_LIMIT rule, conn_limit: %u " - "(tablearg)\n", __func__, conn_limit); - else - printf("ipfw: %s: O_LIMIT rule, conn_limit: %u\n", - __func__, conn_limit); - ) - - id.dst_ip = id.src_ip = id.dst_port = id.src_port = 0; - id.proto = args->f_id.proto; - id.addr_type = args->f_id.addr_type; - id.fib = M_GETFIB(args->m); - - if (IS_IP6_FLOW_ID (&(args->f_id))) { - if (limit_mask & DYN_SRC_ADDR) - id.src_ip6 = args->f_id.src_ip6; - if (limit_mask & DYN_DST_ADDR) - id.dst_ip6 = args->f_id.dst_ip6; - } else { - if (limit_mask & DYN_SRC_ADDR) - id.src_ip = args->f_id.src_ip; - if (limit_mask & DYN_DST_ADDR) - id.dst_ip = args->f_id.dst_ip; - } - if (limit_mask & DYN_SRC_PORT) - id.src_port = args->f_id.src_port; - if (limit_mask & DYN_DST_PORT) - id.dst_port = args->f_id.dst_port; - if ((parent = lookup_dyn_parent(&id, rule)) == NULL) { - printf("ipfw: %s: add parent failed\n", __func__); - IPFW_DYN_UNLOCK(); - return (1); - } - - if (parent->count >= conn_limit) { - /* See if we can remove some expired rule. */ - remove_dyn_rule(rule, parent); - if (parent->count >= conn_limit) { - if (V_fw_verbose && last_log != time_uptime) { - last_log = time_uptime; -#ifdef INET6 - /* - * XXX IPv6 flows are not - * supported yet. - */ - if (IS_IP6_FLOW_ID(&(args->f_id))) { - char ip6buf[INET6_ADDRSTRLEN]; - snprintf(src, sizeof(src), - "[%s]", ip6_sprintf(ip6buf, - &args->f_id.src_ip6)); - snprintf(dst, sizeof(dst), - "[%s]", ip6_sprintf(ip6buf, - &args->f_id.dst_ip6)); - } else -#endif - { - da.s_addr = - htonl(args->f_id.src_ip); - inet_ntoa_r(da, src); - da.s_addr = - htonl(args->f_id.dst_ip); - inet_ntoa_r(da, dst); - } - log(LOG_SECURITY | LOG_DEBUG, - "ipfw: %d %s %s:%u -> %s:%u, %s\n", - parent->rule->rulenum, - "drop session", - src, (args->f_id.src_port), - dst, (args->f_id.dst_port), - "too many entries"); - } - IPFW_DYN_UNLOCK(); - return (1); - } - } - add_dyn_rule(&args->f_id, O_LIMIT, (struct ip_fw *)parent); - break; - } - default: - printf("ipfw: %s: unknown dynamic rule type %u\n", - __func__, cmd->o.opcode); - IPFW_DYN_UNLOCK(); - return (1); - } - - /* XXX just set lifetime */ - lookup_dyn_rule_locked(&args->f_id, NULL, NULL); - - IPFW_DYN_UNLOCK(); - return (0); -} - -/* - * Generate a TCP packet, containing either a RST or a keepalive. - * When flags & TH_RST, we are sending a RST packet, because of a - * "reset" action matched the packet. - * Otherwise we are sending a keepalive, and flags & TH_ - * The 'replyto' mbuf is the mbuf being replied to, if any, and is required - * so that MAC can label the reply appropriately. - */ -static struct mbuf * -send_pkt(struct mbuf *replyto, struct ipfw_flow_id *id, u_int32_t seq, - u_int32_t ack, int flags) -{ -#if defined( __linux__ ) || defined( _WIN32 ) - return NULL; -#else - struct mbuf *m; - int len, dir; - struct ip *h = NULL; /* stupid compiler */ -#ifdef INET6 - struct ip6_hdr *h6 = NULL; -#endif - struct tcphdr *th = NULL; - - MGETHDR(m, M_DONTWAIT, MT_DATA); - if (m == NULL) - return (NULL); - - M_SETFIB(m, id->fib); -#ifdef MAC - if (replyto != NULL) - mac_netinet_firewall_reply(replyto, m); - else - mac_netinet_firewall_send(m); -#else - (void)replyto; /* don't warn about unused arg */ -#endif - - switch (id->addr_type) { - case 4: - len = sizeof(struct ip) + sizeof(struct tcphdr); - break; -#ifdef INET6 - case 6: - len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - break; -#endif - default: - /* XXX: log me?!? */ - m_freem(m); - return (NULL); - } - dir = ((flags & (TH_SYN | TH_RST)) == TH_SYN); - - m->m_data += max_linkhdr; - m->m_flags |= M_SKIP_FIREWALL; - m->m_pkthdr.len = m->m_len = len; - m->m_pkthdr.rcvif = NULL; - bzero(m->m_data, len); - - switch (id->addr_type) { - case 4: - h = mtod(m, struct ip *); - - /* prepare for checksum */ - h->ip_p = IPPROTO_TCP; - h->ip_len = htons(sizeof(struct tcphdr)); - if (dir) { - h->ip_src.s_addr = htonl(id->src_ip); - h->ip_dst.s_addr = htonl(id->dst_ip); - } else { - h->ip_src.s_addr = htonl(id->dst_ip); - h->ip_dst.s_addr = htonl(id->src_ip); - } - - th = (struct tcphdr *)(h + 1); - break; -#ifdef INET6 - case 6: - h6 = mtod(m, struct ip6_hdr *); - - /* prepare for checksum */ - h6->ip6_nxt = IPPROTO_TCP; - h6->ip6_plen = htons(sizeof(struct tcphdr)); - if (dir) { - h6->ip6_src = id->src_ip6; - h6->ip6_dst = id->dst_ip6; - } else { - h6->ip6_src = id->dst_ip6; - h6->ip6_dst = id->src_ip6; - } - - th = (struct tcphdr *)(h6 + 1); - break; -#endif - } - - if (dir) { - th->th_sport = htons(id->src_port); - th->th_dport = htons(id->dst_port); - } else { - th->th_sport = htons(id->dst_port); - th->th_dport = htons(id->src_port); - } - th->th_off = sizeof(struct tcphdr) >> 2; - - if (flags & TH_RST) { - if (flags & TH_ACK) { - th->th_seq = htonl(ack); - // XXX th->th_ack = htonl(0); - th->th_flags = TH_RST; - } else { - if (flags & TH_SYN) - seq++; - // XXX th->th_seq = htonl(0); - th->th_ack = htonl(seq); - th->th_flags = TH_RST | TH_ACK; - } - } else { - /* - * Keepalive - use caller provided sequence numbers - */ - th->th_seq = htonl(seq); - th->th_ack = htonl(ack); - th->th_flags = TH_ACK; - } - - switch (id->addr_type) { - case 4: - th->th_sum = in_cksum(m, len); - - /* finish the ip header */ - h->ip_v = 4; - h->ip_hl = sizeof(*h) >> 2; - h->ip_tos = IPTOS_LOWDELAY; - h->ip_off = 0; - h->ip_len = len; - h->ip_ttl = V_ip_defttl; - h->ip_sum = 0; - break; -#ifdef INET6 - case 6: - th->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*h6), - sizeof(struct tcphdr)); - - /* finish the ip6 header */ - h6->ip6_vfc |= IPV6_VERSION; - h6->ip6_hlim = IPV6_DEFHLIM; - break; -#endif - } - - return (m); -#endif /* !__linux__ */ -} - -/* - * sends a reject message, consuming the mbuf passed as an argument. - */ -static void -send_reject(struct ip_fw_args *args, int code, int ip_len, struct ip *ip) -{ - -#if 0 - /* XXX When ip is not guaranteed to be at mtod() we will - * need to account for this */ - * The mbuf will however be thrown away so we can adjust it. - * Remember we did an m_pullup on it already so we - * can make some assumptions about contiguousness. - */ - if (args->L3offset) - m_adj(m, args->L3offset); -#endif - if (code != ICMP_REJECT_RST) { /* Send an ICMP unreach */ - /* We need the IP header in host order for icmp_error(). */ -#if !defined( __linux__ ) && !defined( _WIN32 ) - if (args->eh != NULL) { - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - } -#endif - icmp_error(args->m, ICMP_UNREACH, code, 0L, 0); - } else if (args->f_id.proto == IPPROTO_TCP) { - struct tcphdr *const tcp = - L3HDR(struct tcphdr, mtod(args->m, struct ip *)); - if ( (tcp->th_flags & TH_RST) == 0) { - struct mbuf *m; - m = send_pkt(args->m, &(args->f_id), - ntohl(tcp->th_seq), ntohl(tcp->th_ack), - tcp->th_flags | TH_RST); - if (m != NULL) - ip_output(m, NULL, NULL, 0, NULL, NULL); - } - m_freem(args->m); - } else - m_freem(args->m); - args->m = NULL; -} - -/** - * - * Given an ip_fw *, lookup_next_rule will return a pointer - * to the next rule, which can be either the jump - * target (for skipto instructions) or the next one in the list (in - * all other cases including a missing jump target). - * The result is also written in the "next_rule" field of the rule. - * Backward jumps are not allowed, so start looking from the next - * rule... - * - * This never returns NULL -- in case we do not have an exact match, - * the next rule is returned. When the ruleset is changed, - * pointers are flushed so we are always correct. - */ - -static struct ip_fw * -lookup_next_rule(struct ip_fw *me, u_int32_t tablearg) -{ - struct ip_fw *rule = NULL; - ipfw_insn *cmd; - u_int16_t rulenum; - - /* look for action, in case it is a skipto */ - cmd = ACTION_PTR(me); - if (cmd->opcode == O_LOG) - cmd += F_LEN(cmd); - if (cmd->opcode == O_ALTQ) - cmd += F_LEN(cmd); - if (cmd->opcode == O_TAG) - cmd += F_LEN(cmd); - if (cmd->opcode == O_SKIPTO ) { - if (tablearg != 0) { - rulenum = (u_int16_t)tablearg; - } else { - rulenum = cmd->arg1; - } - for (rule = me->next; rule ; rule = rule->next) { - if (rule->rulenum >= rulenum) { - break; - } - } - } - if (rule == NULL) /* failure or not a skipto */ - rule = me->next; - me->next_rule = rule; - return rule; -} - -static int -add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen, uint32_t value) -{ - struct radix_node_head *rnh; - struct table_entry *ent; - struct radix_node *rn; - - if (tbl >= IPFW_TABLES_MAX) - return (EINVAL); - rnh = ch->tables[tbl]; - ent = malloc(sizeof(*ent), M_IPFW_TBL, M_NOWAIT | M_ZERO); - if (ent == NULL) - return (ENOMEM); - ent->value = value; -#ifdef linux - /* there is no sin_len on linux, and the code assumes the first - * byte in the sockaddr to contain the length in bits. - * So we just dump the number right there - */ - *((uint8_t *)&(ent->addr)) = 8; - *((uint8_t *)&(ent->mask)) = 8; -#else - ent->addr.sin_len = ent->mask.sin_len = 8; -#endif - ent->mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - ent->addr.sin_addr.s_addr = addr & ent->mask.sin_addr.s_addr; - IPFW_WLOCK(ch); - rn = rnh->rnh_addaddr(&ent->addr, &ent->mask, rnh, (void *)ent); - if (rn == NULL) { - IPFW_WUNLOCK(ch); - free(ent, M_IPFW_TBL); - return (EEXIST); - } - IPFW_WUNLOCK(ch); - return (0); -} - -static int -del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen) -{ - struct radix_node_head *rnh; - struct table_entry *ent; - struct sockaddr_in sa, mask; - - if (tbl >= IPFW_TABLES_MAX) - return (EINVAL); - rnh = ch->tables[tbl]; -#ifdef linux - /* there is no sin_len on linux, see above */ - *((uint8_t *)&sa) = 8; - *((uint8_t *)&mask) = 8; -#else - sa.sin_len = mask.sin_len = 8; -#endif - mask.sin_addr.s_addr = htonl(mlen ? ~((1 << (32 - mlen)) - 1) : 0); - sa.sin_addr.s_addr = addr & mask.sin_addr.s_addr; - IPFW_WLOCK(ch); - ent = (struct table_entry *)rnh->rnh_deladdr(&sa, &mask, rnh); - if (ent == NULL) { - IPFW_WUNLOCK(ch); - return (ESRCH); - } - IPFW_WUNLOCK(ch); - free(ent, M_IPFW_TBL); - return (0); -} - -static int -flush_table_entry(struct radix_node *rn, void *arg) -{ - struct radix_node_head * const rnh = arg; - struct table_entry *ent; - - ent = (struct table_entry *) - rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); - if (ent != NULL) - free(ent, M_IPFW_TBL); - return (0); -} - -static int -flush_table(struct ip_fw_chain *ch, uint16_t tbl) -{ - struct radix_node_head *rnh; - - IPFW_WLOCK_ASSERT(ch); - - if (tbl >= IPFW_TABLES_MAX) - return (EINVAL); - rnh = ch->tables[tbl]; - KASSERT(rnh != NULL, ("NULL IPFW table")); - rnh->rnh_walktree(rnh, flush_table_entry, rnh); - return (0); -} - -static void -flush_tables(struct ip_fw_chain *ch) -{ - uint16_t tbl; - - IPFW_WLOCK_ASSERT(ch); - - for (tbl = 0; tbl < IPFW_TABLES_MAX; tbl++) - flush_table(ch, tbl); -} - -static int -init_tables(struct ip_fw_chain *ch) -{ - int i; - uint16_t j; - - for (i = 0; i < IPFW_TABLES_MAX; i++) { - if (!rn_inithead((void **)&ch->tables[i], 32)) { - for (j = 0; j < i; j++) { - (void) flush_table(ch, j); - } - return (ENOMEM); - } - } - return (0); -} - -static int -lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val) -{ - struct radix_node_head *rnh; - struct table_entry *ent; - struct sockaddr_in sa; - - if (tbl >= IPFW_TABLES_MAX) - return (0); - rnh = ch->tables[tbl]; -#ifdef linux - /* there is no sin_len on linux, see above */ - *((uint8_t *)&sa) = 8; -#else - sa.sin_len = 8; -#endif - sa.sin_addr.s_addr = addr; - ent = (struct table_entry *)(rnh->rnh_lookup(&sa, NULL, rnh)); - if (ent != NULL) { - *val = ent->value; - return (1); - } - return (0); -} - -static int -count_table_entry(struct radix_node *rn, void *arg) -{ - u_int32_t * const cnt = arg; - - (*cnt)++; - return (0); -} - -static int -count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) -{ - struct radix_node_head *rnh; - - if (tbl >= IPFW_TABLES_MAX) - return (EINVAL); - rnh = ch->tables[tbl]; - *cnt = 0; - rnh->rnh_walktree(rnh, count_table_entry, cnt); - return (0); -} - -static int -dump_table_entry(struct radix_node *rn, void *arg) -{ - struct table_entry * const n = (struct table_entry *)rn; - ipfw_table * const tbl = arg; - ipfw_table_entry *ent; - - if (tbl->cnt == tbl->size) - return (1); - ent = &tbl->ent[tbl->cnt]; - ent->tbl = tbl->tbl; - if (in_nullhost(n->mask.sin_addr)) - ent->masklen = 0; - else - ent->masklen = 33 - ffs(ntohl(n->mask.sin_addr.s_addr)); - ent->addr = n->addr.sin_addr.s_addr; - ent->value = n->value; - tbl->cnt++; - return (0); -} - -static int -dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) -{ - struct radix_node_head *rnh; - - if (tbl->tbl >= IPFW_TABLES_MAX) - return (EINVAL); - rnh = ch->tables[tbl->tbl]; - tbl->cnt = 0; - rnh->rnh_walktree(rnh, dump_table_entry, tbl); - return (0); -} - -static int -check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, - struct in_addr dst_ip, u_int16_t dst_port, struct in_addr src_ip, - u_int16_t src_port, struct ucred **uc, int *ugid_lookup, - struct inpcb *inp) -{ -#ifdef linux - int match = 0; - struct sk_buff *skb = ((struct mbuf *)inp)->m_skb; - struct bsd_ucred *u = (struct bsd_ucred *)uc; - - if (*ugid_lookup == 0) { /* actively lookup and copy in cache */ - /* returns null if any element of the chain up to file is null. - * if sk != NULL then we also have a reference - */ - *ugid_lookup = linux_lookup(proto, - src_ip.s_addr, htons(src_port), - dst_ip.s_addr, htons(dst_port), - skb, oif ? 1 : 0, u); - - } - if (*ugid_lookup < 0) - return 0; - - if (insn->o.opcode == O_UID) - match = (u->uid == (uid_t)insn->d[0]); - else if (insn->o.opcode == O_JAIL) - match = (u->xid == (uid_t)insn->d[0]); - else if (insn->o.opcode == O_GID) - match = (u->gid == (uid_t)insn->d[0]); - - return match; - -#else /* FreeBSD */ - - struct inpcbinfo *pi; - int wildcard; - struct inpcb *pcb; - int match; - - /* - * Check to see if the UDP or TCP stack supplied us with - * the PCB. If so, rather then holding a lock and looking - * up the PCB, we can use the one that was supplied. - */ - if (inp && *ugid_lookupp == 0) { - INP_LOCK_ASSERT(inp); - if (inp->inp_socket != NULL) { - *uc = crhold(inp->inp_cred); - *ugid_lookupp = 1; - } else - *ugid_lookupp = -1; - } - /* - * If we have already been here and the packet has no - * PCB entry associated with it, then we can safely - * assume that this is a no match. - */ - if (*ugid_lookupp == -1) - return (0); - if (proto == IPPROTO_TCP) { - wildcard = 0; - pi = &V_tcbinfo; - } else if (proto == IPPROTO_UDP) { - wildcard = INPLOOKUP_WILDCARD; - pi = &V_udbinfo; - } else - return 0; - match = 0; - if (*ugid_lookupp == 0) { - INP_INFO_RLOCK(pi); - pcb = (oif) ? - in_pcblookup_hash(pi, - dst_ip, htons(dst_port), - src_ip, htons(src_port), - wildcard, oif) : - in_pcblookup_hash(pi, - src_ip, htons(src_port), - dst_ip, htons(dst_port), - wildcard, NULL); - if (pcb != NULL) { - *uc = crhold(pcb->inp_cred); - *ugid_lookupp = 1; - } - INP_INFO_RUNLOCK(pi); - if (*ugid_lookupp == 0) { - /* - * If the lookup did not yield any results, there - * is no sense in coming back and trying again. So - * we can set lookup to -1 and ensure that we wont - * bother the pcb system again. - */ - *ugid_lookupp = -1; - return (0); - } - } - if (insn->o.opcode == O_UID) - match = ((*uc)->cr_uid == (uid_t)insn->d[0]); - else if (insn->o.opcode == O_GID) - match = groupmember((gid_t)insn->d[0], *uc); - else if (insn->o.opcode == O_JAIL) - match = ((*uc)->cr_prison->pr_id == (int)insn->d[0]); - return match; -#endif -} - -/* - * The main check routine for the firewall. - * - * All arguments are in args so we can modify them and return them - * back to the caller. - * - * Parameters: - * - * args->m (in/out) The packet; we set to NULL when/if we nuke it. - * Starts with the IP header. - * args->eh (in) Mac header if present, or NULL for layer3 packet. - * args->L3offset Number of bytes bypassed if we came from L2. - * e.g. often sizeof(eh) ** NOTYET ** - * args->oif Outgoing interface, or NULL if packet is incoming. - * The incoming interface is in the mbuf. (in) - * args->divert_rule (in/out) - * Skip up to the first rule past this rule number; - * upon return, non-zero port number for divert or tee. - * - * args->rule Pointer to the last matching rule (in/out) - * args->next_hop Socket we are forwarding to (out). - * args->f_id Addresses grabbed from the packet (out) - * args->cookie a cookie depending on rule action - * - * Return value: - * - * IP_FW_PASS the packet must be accepted - * IP_FW_DENY the packet must be dropped - * IP_FW_DIVERT divert packet, port in m_tag - * IP_FW_TEE tee packet, port in m_tag - * IP_FW_DUMMYNET to dummynet, pipe in args->cookie - * IP_FW_NETGRAPH into netgraph, cookie args->cookie - * - */ -int -ipfw_chk(struct ip_fw_args *args) -{ - - /* - * Local variables holding state during the processing of a packet: - * - * IMPORTANT NOTE: to speed up the processing of rules, there - * are some assumption on the values of the variables, which - * are documented here. Should you change them, please check - * the implementation of the various instructions to make sure - * that they still work. - * - * args->eh The MAC header. It is non-null for a layer2 - * packet, it is NULL for a layer-3 packet. - * **notyet** - * args->L3offset Offset in the packet to the L3 (IP or equiv.) header. - * - * m | args->m Pointer to the mbuf, as received from the caller. - * It may change if ipfw_chk() does an m_pullup, or if it - * consumes the packet because it calls send_reject(). - * XXX This has to change, so that ipfw_chk() never modifies - * or consumes the buffer. - * ip is the beginning of the ip(4 or 6) header. - * Calculated by adding the L3offset to the start of data. - * (Until we start using L3offset, the packet is - * supposed to start with the ip header). - */ - struct mbuf *m = args->m; - struct ip *ip = mtod(m, struct ip *); - - /* - * For rules which contain uid/gid or jail constraints, cache - * a copy of the users credentials after the pcb lookup has been - * executed. This will speed up the processing of rules with - * these types of constraints, as well as decrease contention - * on pcb related locks. - */ - struct bsd_ucred ucred_cache; - int ucred_lookup = 0; - - /* - * divinput_flags If non-zero, set to the IP_FW_DIVERT_*_FLAG - * associated with a packet input on a divert socket. This - * will allow to distinguish traffic and its direction when - * it originates from a divert socket. - */ - u_int divinput_flags = 0; - - /* - * oif | args->oif If NULL, ipfw_chk has been called on the - * inbound path (ether_input, ip_input). - * If non-NULL, ipfw_chk has been called on the outbound path - * (ether_output, ip_output). - */ - struct ifnet *oif = args->oif; - - struct ip_fw *f = NULL; /* matching rule */ - int retval = 0; - - /* - * hlen The length of the IP header. - */ - u_int hlen = 0; /* hlen >0 means we have an IP pkt */ - - /* - * offset The offset of a fragment. offset != 0 means that - * we have a fragment at this offset of an IPv4 packet. - * offset == 0 means that (if this is an IPv4 packet) - * this is the first or only fragment. - * For IPv6 offset == 0 means there is no Fragment Header. - * If offset != 0 for IPv6 always use correct mask to - * get the correct offset because we add IP6F_MORE_FRAG - * to be able to dectect the first fragment which would - * otherwise have offset = 0. - */ - u_short offset = 0; - - /* - * Local copies of addresses. They are only valid if we have - * an IP packet. - * - * proto The protocol. Set to 0 for non-ip packets, - * or to the protocol read from the packet otherwise. - * proto != 0 means that we have an IPv4 packet. - * - * src_port, dst_port port numbers, in HOST format. Only - * valid for TCP and UDP packets. - * - * src_ip, dst_ip ip addresses, in NETWORK format. - * Only valid for IPv4 packets. - */ - u_int8_t proto; - u_int16_t src_port = 0, dst_port = 0; /* NOTE: host format */ - struct in_addr src_ip, dst_ip; /* NOTE: network format */ - u_int16_t ip_len=0; - int pktlen; - u_int16_t etype = 0; /* Host order stored ether type */ - - /* - * dyn_dir = MATCH_UNKNOWN when rules unchecked, - * MATCH_NONE when checked and not matched (q = NULL), - * MATCH_FORWARD or MATCH_REVERSE otherwise (q != NULL) - */ - int dyn_dir = MATCH_UNKNOWN; - ipfw_dyn_rule *q = NULL; - struct ip_fw_chain *chain = &V_layer3_chain; - struct m_tag *mtag; - - /* - * We store in ulp a pointer to the upper layer protocol header. - * In the ipv4 case this is easy to determine from the header, - * but for ipv6 we might have some additional headers in the middle. - * ulp is NULL if not found. - */ - void *ulp = NULL; /* upper layer protocol pointer. */ - /* XXX ipv6 variables */ - int is_ipv6 = 0; - u_int16_t ext_hd = 0; /* bits vector for extension header filtering */ - /* end of ipv6 variables */ - int is_ipv4 = 0; - - int done = 0; /* flag to exit the outer loop */ - - if (m->m_flags & M_SKIP_FIREWALL || (! V_ipfw_vnet_ready)) - return (IP_FW_PASS); /* accept */ - - dst_ip.s_addr = 0; /* make sure it is initialized */ - src_ip.s_addr = 0; /* make sure it is initialized */ - pktlen = m->m_pkthdr.len; - args->f_id.fib = M_GETFIB(m); /* note mbuf not altered) */ - proto = args->f_id.proto = 0; /* mark f_id invalid */ - /* XXX 0 is a valid proto: IP/IPv6 Hop-by-Hop Option */ - -/* - * PULLUP_TO(len, p, T) makes sure that len + sizeof(T) is contiguous, - * then it sets p to point at the offset "len" in the mbuf. WARNING: the - * pointer might become stale after other pullups (but we never use it - * this way). - */ -#define PULLUP_TO(_len, p, T) \ -do { \ - int x = (_len) + sizeof(T); \ - if ((m)->m_len < x) { \ - goto pullup_failed; \ - } \ - p = (mtod(m, char *) + (_len)); \ -} while (0) - - /* - * if we have an ether header, - */ - if (args->eh) - etype = ntohs(args->eh->ether_type); - - /* Identify IP packets and fill up variables. */ - if (pktlen >= sizeof(struct ip6_hdr) && - (args->eh == NULL || etype == ETHERTYPE_IPV6) && ip->ip_v == 6) { - struct ip6_hdr *ip6 = (struct ip6_hdr *)ip; - is_ipv6 = 1; - args->f_id.addr_type = 6; - hlen = sizeof(struct ip6_hdr); - proto = ip6->ip6_nxt; - - /* Search extension headers to find upper layer protocols */ - while (ulp == NULL) { - switch (proto) { - case IPPROTO_ICMPV6: - PULLUP_TO(hlen, ulp, struct icmp6_hdr); - args->f_id.flags = ICMP6(ulp)->icmp6_type; - break; - - case IPPROTO_TCP: - PULLUP_TO(hlen, ulp, struct tcphdr); - dst_port = TCP(ulp)->th_dport; - src_port = TCP(ulp)->th_sport; - args->f_id.flags = TCP(ulp)->th_flags; - break; - - case IPPROTO_SCTP: - PULLUP_TO(hlen, ulp, struct sctphdr); - src_port = SCTP(ulp)->src_port; - dst_port = SCTP(ulp)->dest_port; - break; - - case IPPROTO_UDP: - PULLUP_TO(hlen, ulp, struct udphdr); - dst_port = UDP(ulp)->uh_dport; - src_port = UDP(ulp)->uh_sport; - break; - - case IPPROTO_HOPOPTS: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_hbh); - ext_hd |= EXT_HOPOPTS; - hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; - proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; - ulp = NULL; - break; - - case IPPROTO_ROUTING: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_rthdr); - switch (((struct ip6_rthdr *)ulp)->ip6r_type) { - case 0: - ext_hd |= EXT_RTHDR0; - break; - case 2: - ext_hd |= EXT_RTHDR2; - break; - default: - printf("IPFW2: IPV6 - Unknown Routing " - "Header type(%d)\n", - ((struct ip6_rthdr *)ulp)->ip6r_type); - if (V_fw_deny_unknown_exthdrs) - return (IP_FW_DENY); - break; - } - ext_hd |= EXT_ROUTING; - hlen += (((struct ip6_rthdr *)ulp)->ip6r_len + 1) << 3; - proto = ((struct ip6_rthdr *)ulp)->ip6r_nxt; - ulp = NULL; - break; - - case IPPROTO_FRAGMENT: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_frag); - ext_hd |= EXT_FRAGMENT; - hlen += sizeof (struct ip6_frag); - proto = ((struct ip6_frag *)ulp)->ip6f_nxt; - offset = ((struct ip6_frag *)ulp)->ip6f_offlg & - IP6F_OFF_MASK; - /* Add IP6F_MORE_FRAG for offset of first - * fragment to be != 0. */ - offset |= ((struct ip6_frag *)ulp)->ip6f_offlg & - IP6F_MORE_FRAG; - if (offset == 0) { - printf("IPFW2: IPV6 - Invalid Fragment " - "Header\n"); - if (V_fw_deny_unknown_exthdrs) - return (IP_FW_DENY); - break; - } - args->f_id.frag_id6 = - ntohl(((struct ip6_frag *)ulp)->ip6f_ident); - ulp = NULL; - break; - - case IPPROTO_DSTOPTS: /* RFC 2460 */ - PULLUP_TO(hlen, ulp, struct ip6_hbh); - ext_hd |= EXT_DSTOPTS; - hlen += (((struct ip6_hbh *)ulp)->ip6h_len + 1) << 3; - proto = ((struct ip6_hbh *)ulp)->ip6h_nxt; - ulp = NULL; - break; - - case IPPROTO_AH: /* RFC 2402 */ - PULLUP_TO(hlen, ulp, struct ip6_ext); - ext_hd |= EXT_AH; - hlen += (((struct ip6_ext *)ulp)->ip6e_len + 2) << 2; - proto = ((struct ip6_ext *)ulp)->ip6e_nxt; - ulp = NULL; - break; - - case IPPROTO_ESP: /* RFC 2406 */ - PULLUP_TO(hlen, ulp, uint32_t); /* SPI, Seq# */ - /* Anything past Seq# is variable length and - * data past this ext. header is encrypted. */ - ext_hd |= EXT_ESP; - break; - - case IPPROTO_NONE: /* RFC 2460 */ - /* - * Packet ends here, and IPv6 header has - * already been pulled up. If ip6e_len!=0 - * then octets must be ignored. - */ - ulp = ip; /* non-NULL to get out of loop. */ - break; - - case IPPROTO_OSPFIGP: - /* XXX OSPF header check? */ - PULLUP_TO(hlen, ulp, struct ip6_ext); - break; - - case IPPROTO_PIM: - /* XXX PIM header check? */ - PULLUP_TO(hlen, ulp, struct pim); - break; - - case IPPROTO_CARP: - PULLUP_TO(hlen, ulp, struct carp_header); - if (((struct carp_header *)ulp)->carp_version != - CARP_VERSION) - return (IP_FW_DENY); - if (((struct carp_header *)ulp)->carp_type != - CARP_ADVERTISEMENT) - return (IP_FW_DENY); - break; - - case IPPROTO_IPV6: /* RFC 2893 */ - PULLUP_TO(hlen, ulp, struct ip6_hdr); - break; - - case IPPROTO_IPV4: /* RFC 2893 */ - PULLUP_TO(hlen, ulp, struct ip); - break; - - default: - printf("IPFW2: IPV6 - Unknown Extension " - "Header(%d), ext_hd=%x\n", proto, ext_hd); - if (V_fw_deny_unknown_exthdrs) - return (IP_FW_DENY); - PULLUP_TO(hlen, ulp, struct ip6_ext); - break; - } /*switch */ - } - ip = mtod(m, struct ip *); - ip6 = (struct ip6_hdr *)ip; - args->f_id.src_ip6 = ip6->ip6_src; - args->f_id.dst_ip6 = ip6->ip6_dst; - args->f_id.src_ip = 0; - args->f_id.dst_ip = 0; - args->f_id.flow_id6 = ntohl(ip6->ip6_flow); - } else if (pktlen >= sizeof(struct ip) && - (args->eh == NULL || etype == ETHERTYPE_IP) && ip->ip_v == 4) { - is_ipv4 = 1; - hlen = ip->ip_hl << 2; - args->f_id.addr_type = 4; - - /* - * Collect parameters into local variables for faster matching. - */ - proto = ip->ip_p; - src_ip = ip->ip_src; - dst_ip = ip->ip_dst; - - if (1 || args->eh != NULL) { /* layer 2 packets are as on the wire */ - offset = ntohs(ip->ip_off) & IP_OFFMASK; - ip_len = ntohs(ip->ip_len); - } else { - offset = ip->ip_off & IP_OFFMASK; - ip_len = ip->ip_len; - } - pktlen = ip_len < pktlen ? ip_len : pktlen; - - if (offset == 0) { - switch (proto) { - case IPPROTO_TCP: - PULLUP_TO(hlen, ulp, struct tcphdr); - dst_port = TCP(ulp)->th_dport; - src_port = TCP(ulp)->th_sport; - args->f_id.flags = TCP(ulp)->th_flags; - break; - - case IPPROTO_UDP: - PULLUP_TO(hlen, ulp, struct udphdr); - dst_port = UDP(ulp)->uh_dport; - src_port = UDP(ulp)->uh_sport; - break; - - case IPPROTO_ICMP: - PULLUP_TO(hlen, ulp, struct icmphdr); - args->f_id.flags = ICMP(ulp)->icmp_type; - break; - - default: - break; - } - } - - ip = mtod(m, struct ip *); - args->f_id.src_ip = ntohl(src_ip.s_addr); - args->f_id.dst_ip = ntohl(dst_ip.s_addr); - } -#undef PULLUP_TO - if (proto) { /* we may have port numbers, store them */ - args->f_id.proto = proto; - args->f_id.src_port = src_port = ntohs(src_port); - args->f_id.dst_port = dst_port = ntohs(dst_port); - } - - IPFW_RLOCK(chain); - if (! V_ipfw_vnet_ready) { /* shutting down, leave NOW. */ - IPFW_RUNLOCK(chain); - return (IP_FW_PASS); /* accept */ - } - mtag = m_tag_find(m, PACKET_TAG_DIVERT, NULL); - if (args->rule) { - /* - * Packet has already been tagged. Look for the next rule - * to restart processing. Make sure that args->rule still - * exists and not changed. - * If fw_one_pass != 0 then just accept it. - * XXX should not happen here, but optimized out in - * the caller. - */ - if (V_fw_one_pass) { - IPFW_RUNLOCK(chain); - return (IP_FW_PASS); - } - if (chain->id != args->chain_id) { - for (f = chain->rules; f != NULL; f = f->next) - if (f == args->rule && f->id == args->rule_id) - break; - - if (f != NULL) - f = f->next_rule; - else - f = chain->default_rule; - } else - f = args->rule->next_rule; - - if (f == NULL) - f = lookup_next_rule(args->rule, 0); - } else { - /* - * Find the starting rule. It can be either the first - * one, or the one after divert_rule if asked so. - */ - int skipto = mtag ? divert_cookie(mtag) : 0; - - f = chain->rules; - if (args->eh == NULL && skipto != 0) { - if (skipto >= IPFW_DEFAULT_RULE) { - IPFW_RUNLOCK(chain); - return (IP_FW_DENY); /* invalid */ - } -// f = rule2ptr(chain, skipto+1); - while (f && f->rulenum <= skipto) - f = f->next; - } - } - /* reset divert rule to avoid confusion later */ - if (mtag) { - divinput_flags = divert_info(mtag) & - (IP_FW_DIVERT_OUTPUT_FLAG | IP_FW_DIVERT_LOOPBACK_FLAG); - m_tag_delete(m, mtag); - } - - /* - * Now scan the rules, and parse microinstructions for each rule. - * We have two nested loops and an inner switch. Sometimes we - * need to break out of one or both loops, or re-enter one of - * the loops with updated variables. Loop variables are: - * - * f (outer loop) points to the current rule. - * On output it points to the matching rule. - * done (outer loop) is used as a flag to break the loop. - * l (inner loop) residual length of current rule. - * cmd points to the current microinstruction. - * - * We break the inner loop by setting l=0 and possibly - * cmdlen=0 if we don't want to advance cmd. - * We break the outer loop by setting done=1 - * We can restart the inner loop by setting l>0 and f, cmd - * as needed. - */ - for (; f; f = f->next) { - ipfw_insn *cmd; - uint32_t tablearg = 0; - int l, cmdlen, skip_or; /* skip rest of OR block */ - -/* again: */ - if (V_set_disable & (1 << f->set) ) - continue; - - skip_or = 0; - for (l = f->cmd_len, cmd = f->cmd ; l > 0 ; - l -= cmdlen, cmd += cmdlen) { - int match; - - /* - * check_body is a jump target used when we find a - * CHECK_STATE, and need to jump to the body of - * the target rule. - */ - -/* check_body: */ - cmdlen = F_LEN(cmd); - /* - * An OR block (insn_1 || .. || insn_n) has the - * F_OR bit set in all but the last instruction. - * The first match will set "skip_or", and cause - * the following instructions to be skipped until - * past the one with the F_OR bit clear. - */ - if (skip_or) { /* skip this instruction */ - if ((cmd->len & F_OR) == 0) - skip_or = 0; /* next one is good */ - continue; - } - match = 0; /* set to 1 if we succeed */ - - switch (cmd->opcode) { - /* - * The first set of opcodes compares the packet's - * fields with some pattern, setting 'match' if a - * match is found. At the end of the loop there is - * logic to deal with F_NOT and F_OR flags associated - * with the opcode. - */ - case O_NOP: - match = 1; - break; - - case O_FORWARD_MAC: - printf("ipfw: opcode %d unimplemented\n", - cmd->opcode); - break; - - case O_GID: - case O_UID: - case O_JAIL: - /* - * We only check offset == 0 && proto != 0, - * as this ensures that we have a - * packet with the ports info. - */ - if (offset!=0) - break; - if (is_ipv6) /* XXX to be fixed later */ - break; - if (proto == IPPROTO_TCP || - proto == IPPROTO_UDP) - match = check_uidgid( - (ipfw_insn_u32 *)cmd, - proto, oif, - dst_ip, dst_port, - src_ip, src_port, (struct ucred **)&ucred_cache, - &ucred_lookup, (struct inpcb *)args->m); - break; - - case O_RECV: - match = iface_match(m->m_pkthdr.rcvif, - (ipfw_insn_if *)cmd); - break; - - case O_XMIT: - match = iface_match(oif, (ipfw_insn_if *)cmd); - break; - - case O_VIA: - match = iface_match(oif ? oif : - m->m_pkthdr.rcvif, (ipfw_insn_if *)cmd); - break; - - case O_MACADDR2: - if (args->eh != NULL) { /* have MAC header */ - u_int32_t *want = (u_int32_t *) - ((ipfw_insn_mac *)cmd)->addr; - u_int32_t *mask = (u_int32_t *) - ((ipfw_insn_mac *)cmd)->mask; - u_int32_t *hdr = (u_int32_t *)args->eh; - - match = - ( want[0] == (hdr[0] & mask[0]) && - want[1] == (hdr[1] & mask[1]) && - want[2] == (hdr[2] & mask[2]) ); - } - break; - - case O_MAC_TYPE: - if (args->eh != NULL) { - u_int16_t *p = - ((ipfw_insn_u16 *)cmd)->ports; - int i; - - for (i = cmdlen - 1; !match && i>0; - i--, p += 2) - match = (etype >= p[0] && - etype <= p[1]); - } - break; - - case O_FRAG: - match = (offset != 0); - break; - - case O_IN: /* "out" is "not in" */ - match = (oif == NULL); - break; - - case O_LAYER2: - match = (args->eh != NULL); - break; - - case O_DIVERTED: - match = (cmd->arg1 & 1 && divinput_flags & - IP_FW_DIVERT_LOOPBACK_FLAG) || - (cmd->arg1 & 2 && divinput_flags & - IP_FW_DIVERT_OUTPUT_FLAG); - break; - - case O_PROTO: - /* - * We do not allow an arg of 0 so the - * check of "proto" only suffices. - */ - match = (proto == cmd->arg1); - break; - - case O_IP_SRC: - match = is_ipv4 && - (((ipfw_insn_ip *)cmd)->addr.s_addr == - src_ip.s_addr); - break; - - case O_IP_SRC_LOOKUP: - case O_IP_DST_LOOKUP: - if (is_ipv4) { - uint32_t a = - (cmd->opcode == O_IP_DST_LOOKUP) ? - dst_ip.s_addr : src_ip.s_addr; - uint32_t v = 0; - - if (cmdlen > F_INSN_SIZE(ipfw_insn_u32)) { - v = ((ipfw_insn_u32 *)cmd)->d[1]; - if (v == 0) - a = dst_ip.s_addr; - else if (v == 1) - a = src_ip.s_addr; - else if (offset != 0) - break; - else if (proto != IPPROTO_TCP && - proto != IPPROTO_UDP) - break; - else if (v == 2) - a = dst_port; - else if (v == 3) - a = src_port; - else if (v == 4 || v == 5) { - check_uidgid( - (ipfw_insn_u32 *)cmd, - proto, oif, - dst_ip, dst_port, - src_ip, src_port, (struct ucred **)&ucred_cache, - &ucred_lookup, (struct inpcb *)args->m); -#ifdef linux - if (v ==4 /* O_UID */) - a = ucred_cache.uid; - else if (v == 5 /* O_JAIL */) - a = ucred_cache.xid; -#else - if (v ==4 /* O_UID */) - a = (*uc)->cr_uid; - else if (v == 5 /* O_JAIL */) - a = (*uc)->cr_prison->pr_id; -#endif - } else - break; - } - match = lookup_table(chain, cmd->arg1, a, - &v); - if (!match) - break; - if (cmdlen == F_INSN_SIZE(ipfw_insn_u32)) - match = - ((ipfw_insn_u32 *)cmd)->d[0] == v; - else - tablearg = v; - } - break; - - case O_IP_SRC_MASK: - case O_IP_DST_MASK: - if (is_ipv4) { - uint32_t a = - (cmd->opcode == O_IP_DST_MASK) ? - dst_ip.s_addr : src_ip.s_addr; - uint32_t *p = ((ipfw_insn_u32 *)cmd)->d; - int i = cmdlen-1; - - for (; !match && i>0; i-= 2, p+= 2) - match = (p[0] == (a & p[1])); - } - break; - - case O_IP_SRC_ME: - if (is_ipv4) { - struct ifnet *tif; - - INADDR_TO_IFP(src_ip, tif); - match = (tif != NULL); - } - break; - - case O_IP_DST_SET: - case O_IP_SRC_SET: - if (is_ipv4) { - u_int32_t *d = (u_int32_t *)(cmd+1); - u_int32_t addr = - cmd->opcode == O_IP_DST_SET ? - args->f_id.dst_ip : - args->f_id.src_ip; - - if (addr < d[0]) - break; - addr -= d[0]; /* subtract base */ - match = (addr < cmd->arg1) && - ( d[ 1 + (addr>>5)] & - (1<<(addr & 0x1f)) ); - } - break; - - case O_IP_DST: - match = is_ipv4 && - (((ipfw_insn_ip *)cmd)->addr.s_addr == - dst_ip.s_addr); - break; - - case O_IP_DST_ME: - if (is_ipv4) { - struct ifnet *tif; - - INADDR_TO_IFP(dst_ip, tif); - match = (tif != NULL); - } - break; - - case O_IP_SRCPORT: - case O_IP_DSTPORT: - /* - * offset == 0 && proto != 0 is enough - * to guarantee that we have a - * packet with port info. - */ - if ((proto==IPPROTO_UDP || proto==IPPROTO_TCP) - && offset == 0) { - u_int16_t x = - (cmd->opcode == O_IP_SRCPORT) ? - src_port : dst_port ; - u_int16_t *p = - ((ipfw_insn_u16 *)cmd)->ports; - int i; - - for (i = cmdlen - 1; !match && i>0; - i--, p += 2) - match = (x>=p[0] && x<=p[1]); - } - break; - - case O_ICMPTYPE: - match = (offset == 0 && proto==IPPROTO_ICMP && - icmptype_match(ICMP(ulp), (ipfw_insn_u32 *)cmd) ); - break; - -#ifdef INET6 - case O_ICMP6TYPE: - match = is_ipv6 && offset == 0 && - proto==IPPROTO_ICMPV6 && - icmp6type_match( - ICMP6(ulp)->icmp6_type, - (ipfw_insn_u32 *)cmd); - break; -#endif /* INET6 */ - - case O_IPOPT: - match = (is_ipv4 && - ipopts_match(ip, cmd) ); - break; - - case O_IPVER: - match = (is_ipv4 && - cmd->arg1 == ip->ip_v); - break; - - case O_IPID: - case O_IPLEN: - case O_IPTTL: - if (is_ipv4) { /* only for IP packets */ - uint16_t x; - uint16_t *p; - int i; - - if (cmd->opcode == O_IPLEN) - x = ip_len; - else if (cmd->opcode == O_IPTTL) - x = ip->ip_ttl; - else /* must be IPID */ - x = ntohs(ip->ip_id); - if (cmdlen == 1) { - match = (cmd->arg1 == x); - break; - } - /* otherwise we have ranges */ - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for (; !match && i>0; i--, p += 2) - match = (x >= p[0] && x <= p[1]); - } - break; - - case O_IPPRECEDENCE: - match = (is_ipv4 && - (cmd->arg1 == (ip->ip_tos & 0xe0)) ); - break; - - case O_IPTOS: - match = (is_ipv4 && - flags_match(cmd, ip->ip_tos)); - break; - - case O_TCPDATALEN: - if (proto == IPPROTO_TCP && offset == 0) { - struct tcphdr *tcp; - uint16_t x; - uint16_t *p; - int i; - - tcp = TCP(ulp); - x = ip_len - - ((ip->ip_hl + tcp->th_off) << 2); - if (cmdlen == 1) { - match = (cmd->arg1 == x); - break; - } - /* otherwise we have ranges */ - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for (; !match && i>0; i--, p += 2) - match = (x >= p[0] && x <= p[1]); - } - break; - - case O_TCPFLAGS: - match = (proto == IPPROTO_TCP && offset == 0 && - flags_match(cmd, TCP(ulp)->th_flags)); - break; - - case O_TCPOPTS: - match = (proto == IPPROTO_TCP && offset == 0 && - tcpopts_match(TCP(ulp), cmd)); - break; - - case O_TCPSEQ: - match = (proto == IPPROTO_TCP && offset == 0 && - ((ipfw_insn_u32 *)cmd)->d[0] == - TCP(ulp)->th_seq); - break; - - case O_TCPACK: - match = (proto == IPPROTO_TCP && offset == 0 && - ((ipfw_insn_u32 *)cmd)->d[0] == - TCP(ulp)->th_ack); - break; - - case O_TCPWIN: - match = (proto == IPPROTO_TCP && offset == 0 && - cmd->arg1 == TCP(ulp)->th_win); - break; - - case O_ESTAB: - /* reject packets which have SYN only */ - /* XXX should i also check for TH_ACK ? */ - match = (proto == IPPROTO_TCP && offset == 0 && - (TCP(ulp)->th_flags & - (TH_RST | TH_ACK | TH_SYN)) != TH_SYN); - break; - - case O_ALTQ: { - struct pf_mtag *at; - ipfw_insn_altq *altq = (ipfw_insn_altq *)cmd; - - match = 1; - at = pf_find_mtag(m); - if (at != NULL && at->qid != 0) - break; - at = pf_get_mtag(m); - if (at == NULL) { - /* - * Let the packet fall back to the - * default ALTQ. - */ - break; - } - at->qid = altq->qid; - if (is_ipv4) - at->af = AF_INET; - else - at->af = AF_LINK; - at->hdr = ip; - break; - } - - case O_LOG: - if (V_fw_verbose) - ipfw_log(f, hlen, args, m, - oif, offset, tablearg, ip); - match = 1; - break; - - case O_PROB: - match = (random()<((ipfw_insn_u32 *)cmd)->d[0]); - break; - -#if 0 - case O_VERREVPATH: - /* Outgoing packets automatically pass/match */ - match = ((oif != NULL) || - (m->m_pkthdr.rcvif == NULL) || - ( -#ifdef INET6 - is_ipv6 ? - verify_path6(&(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : -#endif - verify_path(src_ip, m->m_pkthdr.rcvif, - args->f_id.fib))); - break; - - case O_VERSRCREACH: - /* Outgoing packets automatically pass/match */ - match = (hlen > 0 && ((oif != NULL) || -#ifdef INET6 - is_ipv6 ? - verify_path6(&(args->f_id.src_ip6), - NULL) : -#endif - verify_path(src_ip, NULL, args->f_id.fib))); - break; - - case O_ANTISPOOF: - /* Outgoing packets automatically pass/match */ - if (oif == NULL && hlen > 0 && - ( (is_ipv4 && in_localaddr(src_ip)) -#ifdef INET6 - || (is_ipv6 && - in6_localaddr(&(args->f_id.src_ip6))) -#endif - )) - match = -#ifdef INET6 - is_ipv6 ? verify_path6( - &(args->f_id.src_ip6), - m->m_pkthdr.rcvif) : -#endif - verify_path(src_ip, - m->m_pkthdr.rcvif, - args->f_id.fib); - else - match = 1; - break; -#endif - - case O_IPSEC: -#ifdef IPSEC - match = (m_tag_find(m, - PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL); -#endif - /* otherwise no match */ - break; - -#ifdef INET6 - case O_IP6_SRC: - match = is_ipv6 && - IN6_ARE_ADDR_EQUAL(&args->f_id.src_ip6, - &((ipfw_insn_ip6 *)cmd)->addr6); - break; - - case O_IP6_DST: - match = is_ipv6 && - IN6_ARE_ADDR_EQUAL(&args->f_id.dst_ip6, - &((ipfw_insn_ip6 *)cmd)->addr6); - break; - case O_IP6_SRC_MASK: - case O_IP6_DST_MASK: - if (is_ipv6) { - int i = cmdlen - 1; - struct in6_addr p; - struct in6_addr *d = - &((ipfw_insn_ip6 *)cmd)->addr6; - - for (; !match && i > 0; d += 2, - i -= F_INSN_SIZE(struct in6_addr) - * 2) { - p = (cmd->opcode == - O_IP6_SRC_MASK) ? - args->f_id.src_ip6: - args->f_id.dst_ip6; - APPLY_MASK(&p, &d[1]); - match = - IN6_ARE_ADDR_EQUAL(&d[0], - &p); - } - } - break; - - case O_IP6_SRC_ME: - match= is_ipv6 && search_ip6_addr_net(&args->f_id.src_ip6); - break; - - case O_IP6_DST_ME: - match= is_ipv6 && search_ip6_addr_net(&args->f_id.dst_ip6); - break; - - case O_FLOW6ID: - match = is_ipv6 && - flow6id_match(args->f_id.flow_id6, - (ipfw_insn_u32 *) cmd); - break; - - case O_EXT_HDR: - match = is_ipv6 && - (ext_hd & ((ipfw_insn *) cmd)->arg1); - break; - - case O_IP6: - match = is_ipv6; - break; -#endif - - case O_IP4: - match = is_ipv4; - break; - -#if 0 - case O_TAG: { - uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? - tablearg : cmd->arg1; - - /* Packet is already tagged with this tag? */ - mtag = m_tag_locate(m, MTAG_IPFW, tag, NULL); - - /* We have `untag' action when F_NOT flag is - * present. And we must remove this mtag from - * mbuf and reset `match' to zero (`match' will - * be inversed later). - * Otherwise we should allocate new mtag and - * push it into mbuf. - */ - if (cmd->len & F_NOT) { /* `untag' action */ - if (mtag != NULL) - m_tag_delete(m, mtag); - } else if (mtag == NULL) { - if ((mtag = m_tag_alloc(MTAG_IPFW, - tag, 0, M_NOWAIT)) != NULL) - m_tag_prepend(m, mtag); - } - match = (cmd->len & F_NOT) ? 0: 1; - break; - } - - case O_FIB: /* try match the specified fib */ - if (args->f_id.fib == cmd->arg1) - match = 1; - break; - - case O_TAGGED: { - uint32_t tag = (cmd->arg1 == IP_FW_TABLEARG) ? - tablearg : cmd->arg1; - - if (cmdlen == 1) { - match = m_tag_locate(m, MTAG_IPFW, - tag, NULL) != NULL; - break; - } - - /* we have ranges */ - for (mtag = m_tag_first(m); - mtag != NULL && !match; - mtag = m_tag_next(m, mtag)) { - uint16_t *p; - int i; - - if (mtag->m_tag_cookie != MTAG_IPFW) - continue; - - p = ((ipfw_insn_u16 *)cmd)->ports; - i = cmdlen - 1; - for(; !match && i > 0; i--, p += 2) - match = - mtag->m_tag_id >= p[0] && - mtag->m_tag_id <= p[1]; - } - break; - } -#endif - - /* - * The second set of opcodes represents 'actions', - * i.e. the terminal part of a rule once the packet - * matches all previous patterns. - * Typically there is only one action for each rule, - * and the opcode is stored at the end of the rule - * (but there are exceptions -- see below). - * - * In general, here we set retval and terminate the - * outer loop (would be a 'break 3' in some language, - * but we need to set l=0, done=1) - * - * Exceptions: - * O_COUNT and O_SKIPTO actions: - * instead of terminating, we jump to the next rule - * (setting l=0), or to the SKIPTO target (by - * setting f, cmd and l as needed), respectively. - * - * O_TAG, O_LOG and O_ALTQ action parameters: - * perform some action and set match = 1; - * - * O_LIMIT and O_KEEP_STATE: these opcodes are - * not real 'actions', and are stored right - * before the 'action' part of the rule. - * These opcodes try to install an entry in the - * state tables; if successful, we continue with - * the next opcode (match=1; break;), otherwise - * the packet must be dropped (set retval, - * break loops with l=0, done=1) - * - * O_PROBE_STATE and O_CHECK_STATE: these opcodes - * cause a lookup of the state table, and a jump - * to the 'action' part of the parent rule - * if an entry is found, or - * (CHECK_STATE only) a jump to the next rule if - * the entry is not found. - * The result of the lookup is cached so that - * further instances of these opcodes become NOPs. - * The jump to the next rule is done by setting - * l=0, cmdlen=0. - */ - case O_LIMIT: - case O_KEEP_STATE: - if (install_state(f, - (ipfw_insn_limit *)cmd, args, tablearg)) { - /* error or limit violation */ - retval = IP_FW_DENY; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - } - match = 1; - break; - - case O_PROBE_STATE: - case O_CHECK_STATE: - /* - * dynamic rules are checked at the first - * keep-state or check-state occurrence, - * with the result being stored in dyn_dir. - * The compiler introduces a PROBE_STATE - * instruction for us when we have a - * KEEP_STATE (because PROBE_STATE needs - * to be run first). - */ - if (dyn_dir == MATCH_UNKNOWN && - (q = lookup_dyn_rule(&args->f_id, - &dyn_dir, proto == IPPROTO_TCP ? - TCP(ulp) : NULL)) - != NULL) { - /* - * Found dynamic entry, update stats - * and jump to the 'action' part of - * the parent rule by setting - * f, cmd, l and clearing cmdlen. - */ - q->pcnt++; - q->bcnt += pktlen; - f = q->rule; - cmd = ACTION_PTR(f); - l = f->cmd_len - f->act_ofs; - IPFW_DYN_UNLOCK(); - cmdlen = 0; - match = 1; - break; - } - /* - * Dynamic entry not found. If CHECK_STATE, - * skip to next rule, if PROBE_STATE just - * ignore and continue with next opcode. - */ - if (cmd->opcode == O_CHECK_STATE) - l = 0; /* exit inner loop */ - match = 1; - break; - - case O_ACCEPT: - retval = 0; /* accept */ - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_PIPE: - case O_QUEUE: - args->rule = f; /* report matching rule */ - args->rule_id = f->id; - args->chain_id = chain->id; - if (cmd->arg1 == IP_FW_TABLEARG) - args->cookie = tablearg; - else - args->cookie = cmd->arg1; - retval = IP_FW_DUMMYNET; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - -#if 0 - case O_DIVERT: - case O_TEE: - if (args->eh) /* not on layer 2 */ - break; - /* otherwise this is terminal */ - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - mtag = m_tag_get(PACKET_TAG_DIVERT, - sizeof(struct divert_tag), - M_NOWAIT); - if (mtag == NULL) { - retval = IP_FW_DENY; - } else { - struct divert_tag *dt; - dt = (struct divert_tag *)(mtag+1); - dt->cookie = f->rulenum; - if (cmd->arg1 == IP_FW_TABLEARG) - dt->info = tablearg; - else - dt->info = cmd->arg1; - m_tag_prepend(m, mtag); - retval = (cmd->opcode == O_DIVERT) ? - IP_FW_DIVERT : IP_FW_TEE; - } - break; -#endif - - case O_COUNT: - case O_SKIPTO: - f->pcnt++; /* update stats */ - f->bcnt += pktlen; - f->timestamp = time_uptime; - if (cmd->opcode == O_COUNT) { - l = 0; /* exit inner loop */ - break; - } - /* handle skipto */ - if (cmd->arg1 == IP_FW_TABLEARG) { - f = lookup_next_rule(f, tablearg); - } else { // XXX ? - if (f->next_rule == NULL) - lookup_next_rule(f, 0); - f = f->next_rule; - } - /* - * Skip disabled rules, and - * re-enter the inner loop - * with the correct f, l and cmd. - * Also clear cmdlen and skip_or - */ - while (f && (V_set_disable & (1 << f->set))) - f = f->next; - if (f) { /* found a valid rule */ - l = f->cmd_len; - cmd = f->cmd; - } else { - l = 0; /* exit inner loop */ - } - match = 1; - cmdlen = 0; - skip_or = 0; - break; - - case O_REJECT: - /* - * Drop the packet and send a reject notice - * if the packet is not ICMP (or is an ICMP - * query), and it is not multicast/broadcast. - */ - if (hlen > 0 && is_ipv4 && offset == 0 && - (proto != IPPROTO_ICMP || - is_icmp_query(ICMP(ulp))) && - !(m->m_flags & (M_BCAST|M_MCAST)) && - !IN_MULTICAST(ntohl(dst_ip.s_addr))) { - send_reject(args, cmd->arg1, ip_len, ip); - m = args->m; - } - /* FALLTHROUGH */ -#ifdef INET6 - case O_UNREACH6: - if (hlen > 0 && is_ipv6 && - ((offset & IP6F_OFF_MASK) == 0) && - (proto != IPPROTO_ICMPV6 || - (is_icmp6_query(args->f_id.flags) == 1)) && - !(m->m_flags & (M_BCAST|M_MCAST)) && - !IN6_IS_ADDR_MULTICAST(&args->f_id.dst_ip6)) { - send_reject6( - args, cmd->arg1, hlen, - (struct ip6_hdr *)ip); - m = args->m; - } - /* FALLTHROUGH */ -#endif - case O_DENY: - retval = IP_FW_DENY; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_FORWARD_IP: - if (args->eh) /* not valid on layer2 pkts */ - break; - if (!q || dyn_dir == MATCH_FORWARD) { - struct sockaddr_in *sa; - sa = &(((ipfw_insn_sa *)cmd)->sa); - if (sa->sin_addr.s_addr == INADDR_ANY) { - bcopy(sa, &args->hopstore, - sizeof(*sa)); - args->hopstore.sin_addr.s_addr = - htonl(tablearg); - args->next_hop = &args->hopstore; - } else { - args->next_hop = sa; - } - } - retval = IP_FW_PASS; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_NETGRAPH: - case O_NGTEE: - args->rule = f; /* report matching rule */ - args->rule_id = f->id; - args->chain_id = chain->id; - if (cmd->arg1 == IP_FW_TABLEARG) - args->cookie = tablearg; - else - args->cookie = cmd->arg1; - retval = (cmd->opcode == O_NETGRAPH) ? - IP_FW_NETGRAPH : IP_FW_NGTEE; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - -#if 0 - case O_SETFIB: - f->pcnt++; /* update stats */ - f->bcnt += pktlen; - f->timestamp = time_uptime; - M_SETFIB(m, cmd->arg1); - args->f_id.fib = cmd->arg1; - l = 0; /* exit inner loop */ - break; - - case O_NAT: - if (!IPFW_NAT_LOADED) { - retval = IP_FW_DENY; - } else { - struct cfg_nat *t; - int nat_id; - - args->rule = f; /* Report matching rule. */ - args->rule_id = f->id; - args->chain_id = chain->id; - t = ((ipfw_insn_nat *)cmd)->nat; - if (t == NULL) { - nat_id = (cmd->arg1 == IP_FW_TABLEARG) ? - tablearg : cmd->arg1; - LOOKUP_NAT(V_layer3_chain, nat_id, t); - if (t == NULL) { - retval = IP_FW_DENY; - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - } - if (cmd->arg1 != IP_FW_TABLEARG) - ((ipfw_insn_nat *)cmd)->nat = t; - } - retval = ipfw_nat_ptr(args, t, m); - } - l = 0; /* exit inner loop */ - done = 1; /* exit outer loop */ - break; - - case O_REASS: { - int ip_off; - - f->pcnt++; - f->bcnt += pktlen; - l = 0; /* in any case exit inner loop */ - - ip_off = (args->eh != NULL) ? - ntohs(ip->ip_off) : ip->ip_off; - /* if not fragmented, go to next rule */ - if ((ip_off & (IP_MF | IP_OFFMASK)) == 0) - break; - /* - * ip_reass() expects len & off in host - * byte order: fix them in case we come - * from layer2. - */ - if (args->eh != NULL) { - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - } - - args->m = m = ip_reass(m); - - /* - * IP header checksum fixup after - * reassembly and leave header - * in network byte order. - */ - if (m == NULL) { /* fragment got swallowed */ - retval = IP_FW_DENY; - } else { /* good, packet complete */ - int hlen; - - ip = mtod(m, struct ip *); - hlen = ip->ip_hl << 2; - /* revert len & off for layer2 pkts */ - if (args->eh != NULL) - ip->ip_len = htons(ip->ip_len); - ip->ip_sum = 0; - if (hlen == sizeof(struct ip)) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(m, hlen); - retval = IP_FW_REASS; - args->rule = f; - args->rule_id = f->id; - args->chain_id = chain->id; - } - done = 1; /* exit outer loop */ - break; - } -#endif - - default: - break; // XXX we disabled some - panic("-- unknown opcode %d\n", cmd->opcode); - } /* end of switch() on opcodes */ - /* - * if we get here with l=0, then match is irrelevant. - */ - - if (cmd->len & F_NOT) - match = !match; - - if (match) { - if (cmd->len & F_OR) - skip_or = 1; - } else { - if (!(cmd->len & F_OR)) /* not an OR block, */ - break; /* try next rule */ - } - - } /* end of inner loop, scan opcodes */ - - if (done) - break; - -/* next_rule:;*/ /* try next rule */ - - } /* end of outer for, scan rules */ - - if (done) { - /* Update statistics */ - f->pcnt++; - f->bcnt += pktlen; - f->timestamp = time_uptime; - } else { - retval = IP_FW_DENY; - printf("ipfw: ouch!, skip past end of rules, denying packet\n"); - } - IPFW_RUNLOCK(chain); -#ifdef __FreeBSD__ - if (ucred_cache != NULL) - crfree(ucred_cache); -#endif - return (retval); - -pullup_failed: - if (V_fw_verbose) - printf("ipfw: pullup failed\n"); - return (IP_FW_DENY); -} - -/* - * When a rule is added/deleted, clear the next_rule pointers in all rules. - * These will be reconstructed on the fly as packets are matched. - */ -static void -flush_rule_ptrs(struct ip_fw_chain *chain) -{ - struct ip_fw *rule; - - IPFW_WLOCK_ASSERT(chain); - - chain->id++; - - for (rule = chain->rules; rule; rule = rule->next) - rule->next_rule = NULL; -} - -/* - * Add a new rule to the list. Copy the rule into a malloc'ed area, then - * possibly create a rule number and add the rule to the list. - * Update the rule_number in the input struct so the caller knows it as well. - */ -static int -add_rule(struct ip_fw_chain *chain, struct ip_fw *input_rule) -{ - struct ip_fw *rule, *f, *prev; - int l = RULESIZE(input_rule); - - if (chain->rules == NULL && input_rule->rulenum != IPFW_DEFAULT_RULE) - return (EINVAL); - - rule = malloc(l, M_IPFW, M_NOWAIT | M_ZERO); - if (rule == NULL) - return (ENOSPC); - - bcopy(input_rule, rule, l); - - rule->next = NULL; - rule->next_rule = NULL; - - rule->pcnt = 0; - rule->bcnt = 0; - rule->timestamp = 0; - - IPFW_WLOCK(chain); - - if (chain->rules == NULL) { /* default rule */ - chain->rules = rule; - rule->id = ++chain->id; - goto done; - } - - /* - * If rulenum is 0, find highest numbered rule before the - * default rule, and add autoinc_step - */ - if (V_autoinc_step < 1) - V_autoinc_step = 1; - else if (V_autoinc_step > 1000) - V_autoinc_step = 1000; - if (rule->rulenum == 0) { - /* - * locate the highest numbered rule before default - */ - for (f = chain->rules; f; f = f->next) { - if (f->rulenum == IPFW_DEFAULT_RULE) - break; - rule->rulenum = f->rulenum; - } - if (rule->rulenum < IPFW_DEFAULT_RULE - V_autoinc_step) - rule->rulenum += V_autoinc_step; - input_rule->rulenum = rule->rulenum; - } - - /* - * Now insert the new rule in the right place in the sorted list. - */ - for (prev = NULL, f = chain->rules; f; prev = f, f = f->next) { - if (f->rulenum > rule->rulenum) { /* found the location */ - if (prev) { - rule->next = f; - prev->next = rule; - } else { /* head insert */ - rule->next = chain->rules; - chain->rules = rule; - } - break; - } - } - flush_rule_ptrs(chain); - /* chain->id incremented inside flush_rule_ptrs() */ - rule->id = chain->id; -done: - V_static_count++; - V_static_len += l; - IPFW_WUNLOCK(chain); - DEB(printf("ipfw: installed rule %d, static count now %d\n", - rule->rulenum, V_static_count);) - return (0); -} - -/** - * Remove a static rule (including derived * dynamic rules) - * and place it on the ``reap list'' for later reclamation. - * The caller is in charge of clearing rule pointers to avoid - * dangling pointers. - * @return a pointer to the next entry. - * Arguments are not checked, so they better be correct. - */ -static struct ip_fw * -remove_rule(struct ip_fw_chain *chain, struct ip_fw *rule, - struct ip_fw *prev) -{ - struct ip_fw *n; - int l = RULESIZE(rule); - - IPFW_WLOCK_ASSERT(chain); - - n = rule->next; - IPFW_DYN_LOCK(); - remove_dyn_rule(rule, NULL /* force removal */); - IPFW_DYN_UNLOCK(); - if (prev == NULL) - chain->rules = n; - else - prev->next = n; - V_static_count--; - V_static_len -= l; - - rule->next = chain->reap; - chain->reap = rule; - - return n; -} - -/* - * Reclaim storage associated with a list of rules. This is - * typically the list created using remove_rule. - * A NULL pointer on input is handled correctly. - */ -static void -reap_rules(struct ip_fw *head) -{ - struct ip_fw *rule; - - while ((rule = head) != NULL) { - head = head->next; - free(rule, M_IPFW); - } -} - -/* - * Remove all rules from a chain (except rules in set RESVD_SET - * unless kill_default = 1). The caller is responsible for - * reclaiming storage for the rules left in chain->reap. - */ -static void -free_chain(struct ip_fw_chain *chain, int kill_default) -{ - struct ip_fw *prev, *rule; - - IPFW_WLOCK_ASSERT(chain); - - chain->reap = NULL; - flush_rule_ptrs(chain); /* more efficient to do outside the loop */ - for (prev = NULL, rule = chain->rules; rule ; ) - if (kill_default || rule->set != RESVD_SET) - rule = remove_rule(chain, rule, prev); - else { - prev = rule; - rule = rule->next; - } -} - -/** - * Remove all rules with given number, and also do set manipulation. - * Assumes chain != NULL && *chain != NULL. - * - * The argument is an u_int32_t. The low 16 bit are the rule or set number, - * the next 8 bits are the new set, the top 8 bits are the command: - * - * 0 delete rules with given number - * 1 delete rules with given set number - * 2 move rules with given number to new set - * 3 move rules with given set number to new set - * 4 swap sets with given numbers - * 5 delete rules with given number and with given set number - */ -static int -del_entry(struct ip_fw_chain *chain, u_int32_t arg) -{ - struct ip_fw *prev = NULL, *rule; - u_int16_t rulenum; /* rule or old_set */ - u_int8_t cmd, new_set; - - rulenum = arg & 0xffff; - cmd = (arg >> 24) & 0xff; - new_set = (arg >> 16) & 0xff; - - if (cmd > 5 || new_set > RESVD_SET) - return EINVAL; - if (cmd == 0 || cmd == 2 || cmd == 5) { - if (rulenum >= IPFW_DEFAULT_RULE) - return EINVAL; - } else { - if (rulenum > RESVD_SET) /* old_set */ - return EINVAL; - } - - IPFW_WLOCK(chain); - rule = chain->rules; /* common starting point */ - chain->reap = NULL; /* prepare for deletions */ - switch (cmd) { - case 0: /* delete rules with given number */ - /* - * locate first rule to delete - */ - for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) - ; - if (rule->rulenum != rulenum) { - IPFW_WUNLOCK(chain); - return EINVAL; - } - - /* - * flush pointers outside the loop, then delete all matching - * rules. prev remains the same throughout the cycle. - */ - flush_rule_ptrs(chain); - while (rule->rulenum == rulenum) - rule = remove_rule(chain, rule, prev); - break; - - case 1: /* delete all rules with given set number */ - flush_rule_ptrs(chain); - while (rule->rulenum < IPFW_DEFAULT_RULE) { - if (rule->set == rulenum) - rule = remove_rule(chain, rule, prev); - else { - prev = rule; - rule = rule->next; - } - } - break; - - case 2: /* move rules with given number to new set */ - for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) - if (rule->rulenum == rulenum) - rule->set = new_set; - break; - - case 3: /* move rules with given set number to new set */ - for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) - if (rule->set == rulenum) - rule->set = new_set; - break; - - case 4: /* swap two sets */ - for (; rule->rulenum < IPFW_DEFAULT_RULE; rule = rule->next) - if (rule->set == rulenum) - rule->set = new_set; - else if (rule->set == new_set) - rule->set = rulenum; - break; - - case 5: /* delete rules with given number and with given set number. - * rulenum - given rule number; - * new_set - given set number. - */ - for (; rule->rulenum < rulenum; prev = rule, rule = rule->next) - ; - if (rule->rulenum != rulenum) { - IPFW_WUNLOCK(chain); - return (EINVAL); - } - flush_rule_ptrs(chain); - while (rule->rulenum == rulenum) { - if (rule->set == new_set) - rule = remove_rule(chain, rule, prev); - else { - prev = rule; - rule = rule->next; - } - } - } - /* - * Look for rules to reclaim. We grab the list before - * releasing the lock then reclaim them w/o the lock to - * avoid a LOR with dummynet. - */ - rule = chain->reap; - IPFW_WUNLOCK(chain); - reap_rules(rule); - return 0; -} - -/* - * Clear counters for a specific rule. - * The enclosing "table" is assumed locked. - */ -static void -clear_counters(struct ip_fw *rule, int log_only) -{ - ipfw_insn_log *l = (ipfw_insn_log *)ACTION_PTR(rule); - - if (log_only == 0) { - rule->bcnt = rule->pcnt = 0; - rule->timestamp = 0; - } - if (l->o.opcode == O_LOG) - l->log_left = l->max_log; -} - -/** - * Reset some or all counters on firewall rules. - * The argument `arg' is an u_int32_t. The low 16 bit are the rule number, - * the next 8 bits are the set number, the top 8 bits are the command: - * 0 work with rules from all set's; - * 1 work with rules only from specified set. - * Specified rule number is zero if we want to clear all entries. - * log_only is 1 if we only want to reset logs, zero otherwise. - */ -static int -zero_entry(struct ip_fw_chain *chain, u_int32_t arg, int log_only) -{ - struct ip_fw *rule; - char *msg; - - uint16_t rulenum = arg & 0xffff; - uint8_t set = (arg >> 16) & 0xff; - uint8_t cmd = (arg >> 24) & 0xff; - - if (cmd > 1) - return (EINVAL); - if (cmd == 1 && set > RESVD_SET) - return (EINVAL); - - IPFW_WLOCK(chain); - if (rulenum == 0) { - V_norule_counter = 0; - for (rule = chain->rules; rule; rule = rule->next) { - /* Skip rules from another set. */ - if (cmd == 1 && rule->set != set) - continue; - clear_counters(rule, log_only); - } - msg = log_only ? "All logging counts reset" : - "Accounting cleared"; - } else { - int cleared = 0; - /* - * We can have multiple rules with the same number, so we - * need to clear them all. - */ - for (rule = chain->rules; rule; rule = rule->next) - if (rule->rulenum == rulenum) { - while (rule && rule->rulenum == rulenum) { - if (cmd == 0 || rule->set == set) - clear_counters(rule, log_only); - rule = rule->next; - } - cleared = 1; - break; - } - if (!cleared) { /* we did not find any matching rules */ - IPFW_WUNLOCK(chain); - return (EINVAL); - } - msg = log_only ? "logging count reset" : "cleared"; - } - IPFW_WUNLOCK(chain); - - if (V_fw_verbose) { -#define lev LOG_SECURITY | LOG_NOTICE - - if (rulenum) - log(lev, "ipfw: Entry %d %s.\n", rulenum, msg); - else - log(lev, "ipfw: %s.\n", msg); - } - return (0); -} - -/* - * Check validity of the structure before insert. - * Fortunately rules are simple, so this mostly need to check rule sizes. - */ -static int -check_ipfw_struct(struct ip_fw *rule, int size) -{ - int l, cmdlen = 0; - int have_action=0; - ipfw_insn *cmd; - - if (size < sizeof(*rule)) { - printf("ipfw: rule too short\n"); - return (EINVAL); - } - /* first, check for valid size */ - l = RULESIZE(rule); - if (l != size) { - printf("ipfw: size mismatch (have %d want %d)\n", size, l); - return (EINVAL); - } - if (rule->act_ofs >= rule->cmd_len) { - printf("ipfw: bogus action offset (%u > %u)\n", - rule->act_ofs, rule->cmd_len - 1); - return (EINVAL); - } - /* - * Now go for the individual checks. Very simple ones, basically only - * instruction sizes. - */ - for (l = rule->cmd_len, cmd = rule->cmd ; - l > 0 ; l -= cmdlen, cmd += cmdlen) { - cmdlen = F_LEN(cmd); - if (cmdlen > l) { - printf("ipfw: opcode %d size truncated\n", - cmd->opcode); - return EINVAL; - } - DEB(printf("ipfw: opcode %d\n", cmd->opcode);) - switch (cmd->opcode) { - case O_PROBE_STATE: - case O_KEEP_STATE: - case O_PROTO: - case O_IP_SRC_ME: - case O_IP_DST_ME: - case O_LAYER2: - case O_IN: - case O_FRAG: - case O_DIVERTED: - case O_IPOPT: - case O_IPTOS: - case O_IPPRECEDENCE: - case O_IPVER: - case O_TCPWIN: - case O_TCPFLAGS: - case O_TCPOPTS: - case O_ESTAB: - case O_VERREVPATH: - case O_VERSRCREACH: - case O_ANTISPOOF: - case O_IPSEC: -#ifdef INET6 - case O_IP6_SRC_ME: - case O_IP6_DST_ME: - case O_EXT_HDR: - case O_IP6: -#endif - case O_IP4: - case O_TAG: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - break; - - case O_FIB: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - if (cmd->arg1 >= rt_numfibs) { - printf("ipfw: invalid fib number %d\n", - cmd->arg1); - return EINVAL; - } - break; - - case O_SETFIB: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - if (cmd->arg1 >= rt_numfibs) { - printf("ipfw: invalid fib number %d\n", - cmd->arg1); - return EINVAL; - } - goto check_action; - - case O_UID: - case O_GID: - case O_JAIL: - case O_IP_SRC: - case O_IP_DST: - case O_TCPSEQ: - case O_TCPACK: - case O_PROB: - case O_ICMPTYPE: - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32)) - goto bad_size; - break; - - case O_LIMIT: - if (cmdlen != F_INSN_SIZE(ipfw_insn_limit)) - goto bad_size; - break; - - case O_LOG: - if (cmdlen != F_INSN_SIZE(ipfw_insn_log)) - goto bad_size; - - ((ipfw_insn_log *)cmd)->log_left = - ((ipfw_insn_log *)cmd)->max_log; - - break; - - case O_IP_SRC_MASK: - case O_IP_DST_MASK: - /* only odd command lengths */ - if ( !(cmdlen & 1) || cmdlen > 31) - goto bad_size; - break; - - case O_IP_SRC_SET: - case O_IP_DST_SET: - if (cmd->arg1 == 0 || cmd->arg1 > 256) { - printf("ipfw: invalid set size %d\n", - cmd->arg1); - return EINVAL; - } - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + - (cmd->arg1+31)/32 ) - goto bad_size; - break; - - case O_IP_SRC_LOOKUP: - case O_IP_DST_LOOKUP: - if (cmd->arg1 >= IPFW_TABLES_MAX) { - printf("ipfw: invalid table number %d\n", - cmd->arg1); - return (EINVAL); - } - if (cmdlen != F_INSN_SIZE(ipfw_insn) && - cmdlen != F_INSN_SIZE(ipfw_insn_u32) + 1 && - cmdlen != F_INSN_SIZE(ipfw_insn_u32)) - goto bad_size; - break; - - case O_MACADDR2: - if (cmdlen != F_INSN_SIZE(ipfw_insn_mac)) - goto bad_size; - break; - - case O_NOP: - case O_IPID: - case O_IPTTL: - case O_IPLEN: - case O_TCPDATALEN: - case O_TAGGED: - if (cmdlen < 1 || cmdlen > 31) - goto bad_size; - break; - - case O_MAC_TYPE: - case O_IP_SRCPORT: - case O_IP_DSTPORT: /* XXX artificial limit, 30 port pairs */ - if (cmdlen < 2 || cmdlen > 31) - goto bad_size; - break; - - case O_RECV: - case O_XMIT: - case O_VIA: - if (cmdlen != F_INSN_SIZE(ipfw_insn_if)) - goto bad_size; - break; - - case O_ALTQ: - if (cmdlen != F_INSN_SIZE(ipfw_insn_altq)) - goto bad_size; - break; - - case O_PIPE: - case O_QUEUE: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; - goto check_action; - - case O_FORWARD_IP: -#ifdef IPFIREWALL_FORWARD - if (cmdlen != F_INSN_SIZE(ipfw_insn_sa)) - goto bad_size; - goto check_action; -#else - return EINVAL; -#endif - - case O_DIVERT: - case O_TEE: - if (ip_divert_ptr == NULL) - return EINVAL; - else - goto check_size; - case O_NETGRAPH: - case O_NGTEE: - if (!NG_IPFW_LOADED) - return EINVAL; - else - goto check_size; - case O_NAT: - if (!IPFW_NAT_LOADED) - return EINVAL; - if (cmdlen != F_INSN_SIZE(ipfw_insn_nat)) - goto bad_size; - goto check_action; - case O_FORWARD_MAC: /* XXX not implemented yet */ - case O_CHECK_STATE: - case O_COUNT: - case O_ACCEPT: - case O_DENY: - case O_REJECT: -#ifdef INET6 - case O_UNREACH6: -#endif - case O_SKIPTO: - case O_REASS: -check_size: - if (cmdlen != F_INSN_SIZE(ipfw_insn)) - goto bad_size; -check_action: - if (have_action) { - printf("ipfw: opcode %d, multiple actions" - " not allowed\n", - cmd->opcode); - return EINVAL; - } - have_action = 1; - if (l != cmdlen) { - printf("ipfw: opcode %d, action must be" - " last opcode\n", - cmd->opcode); - return EINVAL; - } - break; -#ifdef INET6 - case O_IP6_SRC: - case O_IP6_DST: - if (cmdlen != F_INSN_SIZE(struct in6_addr) + - F_INSN_SIZE(ipfw_insn)) - goto bad_size; - break; - - case O_FLOW6ID: - if (cmdlen != F_INSN_SIZE(ipfw_insn_u32) + - ((ipfw_insn_u32 *)cmd)->o.arg1) - goto bad_size; - break; - - case O_IP6_SRC_MASK: - case O_IP6_DST_MASK: - if ( !(cmdlen & 1) || cmdlen > 127) - goto bad_size; - break; - case O_ICMP6TYPE: - if( cmdlen != F_INSN_SIZE( ipfw_insn_icmp6 ) ) - goto bad_size; - break; -#endif - - default: - switch (cmd->opcode) { -#ifndef INET6 - case O_IP6_SRC_ME: - case O_IP6_DST_ME: - case O_EXT_HDR: - case O_IP6: - case O_UNREACH6: - case O_IP6_SRC: - case O_IP6_DST: - case O_FLOW6ID: - case O_IP6_SRC_MASK: - case O_IP6_DST_MASK: - case O_ICMP6TYPE: - printf("ipfw: no IPv6 support in kernel\n"); - return EPROTONOSUPPORT; -#endif - default: - printf("ipfw: opcode %d, unknown opcode\n", - cmd->opcode); - return EINVAL; - } - } - } - if (have_action == 0) { - printf("ipfw: missing action\n"); - return EINVAL; - } - return 0; - -bad_size: - printf("ipfw: opcode %d size %d wrong\n", - cmd->opcode, cmdlen); - return EINVAL; -} - -/* - * Copy the static rules to the supplied buffer - * and return the amount of space actually used. - */ -static size_t -ipfw_getrules(struct ip_fw_chain *chain, void *buf, size_t space) -{ - char *bp = buf; - char *ep = bp + space; - struct ip_fw *rule; - int i; - time_t boot_seconds; - - boot_seconds = boottime.tv_sec; - /* XXX this can take a long time and locking will block packet flow */ - IPFW_RLOCK(chain); - for (rule = chain->rules; rule ; rule = rule->next) { - /* - * Verify the entry fits in the buffer in case the - * rules changed between calculating buffer space and - * now. This would be better done using a generation - * number but should suffice for now. - */ - i = RULESIZE(rule); - if (bp + i <= ep) { - bcopy(rule, bp, i); - /* - * XXX HACK. Store the disable mask in the "next" - * pointer in a wild attempt to keep the ABI the same. - * Why do we do this on EVERY rule? - */ - bcopy(&V_set_disable, - &(((struct ip_fw *)bp)->next_rule), - sizeof(V_set_disable)); - if (((struct ip_fw *)bp)->timestamp) - ((struct ip_fw *)bp)->timestamp += boot_seconds; - bp += i; - } - } - IPFW_RUNLOCK(chain); - return (bp - (char *)buf); -} - -/* - * Copy the dynamic rules to the supplied buffer - * and return the amount of space actually used. - * XXX marta if we allocate X and rules grows - * we check for size limit while copying rules into the buffer - */ -static size_t -ipfw_getdynrules(struct ip_fw_chain *chain, void *buf, size_t space) -{ - char *bp = buf; - char *ep = bp + space; - int i; - time_t boot_seconds; - - printf("dynrules requested\n"); - boot_seconds = boottime.tv_sec; - - if (V_ipfw_dyn_v) { - ipfw_dyn_rule *p, *last = NULL; - - IPFW_DYN_LOCK(); - for (i = 0 ; i < V_curr_dyn_buckets; i++) - for (p = V_ipfw_dyn_v[i] ; p != NULL; p = p->next) { - if (bp + sizeof *p <= ep) { - ipfw_dyn_rule *dst = - (ipfw_dyn_rule *)bp; - bcopy(p, dst, sizeof *p); - bcopy(&(p->rule->rulenum), &(dst->rule), - sizeof(p->rule->rulenum)); - /* - * store set number into high word of - * dst->rule pointer. - */ - bcopy(&(p->rule->set), - (char *)&dst->rule + - sizeof(p->rule->rulenum), - sizeof(p->rule->set)); - /* - * store a non-null value in "next". - * The userland code will interpret a - * NULL here as a marker - * for the last dynamic rule. - */ - bcopy(&dst, &dst->next, sizeof(dst)); - last = dst; - dst->expire = - TIME_LEQ(dst->expire, time_uptime) ? - 0 : dst->expire - time_uptime ; - bp += sizeof(ipfw_dyn_rule); - } else { - p = NULL; /* break the loop */ - i = V_curr_dyn_buckets; - } - } - IPFW_DYN_UNLOCK(); - if (last != NULL) /* mark last dynamic rule */ - bzero(&last->next, sizeof(last)); - } - return (bp - (char *)buf); -} - - -/** - * {set|get}sockopt parser. - */ -static int -ipfw_ctl(struct sockopt *sopt) -{ -#define RULE_MAXSIZE (256*sizeof(u_int32_t)) - int error; - size_t size; - struct ip_fw *buf, *rule; - u_int32_t rulenum[2]; - - error = priv_check(sopt->sopt_td, PRIV_NETINET_IPFW); - if (error) - return (error); - - /* - * Disallow modifications in really-really secure mode, but still allow - * the logging counters to be reset. - */ - if (sopt->sopt_name == IP_FW_ADD || - (sopt->sopt_dir == SOPT_SET && sopt->sopt_name != IP_FW_RESETLOG)) { - error = securelevel_ge(sopt->sopt_td->td_ucred, 3); - if (error) - return (error); - } - - error = 0; - - switch (sopt->sopt_name) { - case IP_FW_GET: - /* - * pass up a copy of the current static rules. - * The last static rule has number IPFW_DEFAULT_RULE. - * - * Note that the calculated size is used to bound the - * amount of data returned to the user. The rule set may - * change between calculating the size and returning the - * data in which case we'll just return what fits. - */ - size = V_static_len; /* size of static rules */ - - /* - * XXX todo: if the user passes a short length just to know - * how much room is needed, do not bother filling up the - * buffer, just jump to the sooptcopyout. - */ - buf = malloc(size, M_TEMP, M_WAITOK); - error = sooptcopyout(sopt, buf, - ipfw_getrules(&V_layer3_chain, buf, size)); - free(buf, M_TEMP); - break; - - case IP_FW_DYN_GET: - /* - * pass up a copy of the current dynamic rules. - * The last dynamic rule has NULL in the "next" field. - */ - /* if (!V_ipfw_dyn_v) XXX check for empty set ? */ - size = (V_dyn_count * sizeof(ipfw_dyn_rule)); /* size of dyn. rules */ - - buf = malloc(size, M_TEMP, M_WAITOK); - error = sooptcopyout(sopt, buf, - ipfw_getdynrules(&V_layer3_chain, buf, size)); - free(buf, M_TEMP); - break; - - case IP_FW_FLUSH: - /* - * Normally we cannot release the lock on each iteration. - * We could do it here only because we start from the head all - * the times so there is no risk of missing some entries. - * On the other hand, the risk is that we end up with - * a very inconsistent ruleset, so better keep the lock - * around the whole cycle. - * - * XXX this code can be improved by resetting the head of - * the list to point to the default rule, and then freeing - * the old list without the need for a lock. - */ - - IPFW_WLOCK(&V_layer3_chain); - free_chain(&V_layer3_chain, 0 /* keep default rule */); - rule = V_layer3_chain.reap; - IPFW_WUNLOCK(&V_layer3_chain); - reap_rules(rule); - break; - - case IP_FW_ADD: - rule = malloc(RULE_MAXSIZE, M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, rule, RULE_MAXSIZE, - sizeof(struct ip_fw) ); - if (error == 0) - error = check_ipfw_struct(rule, sopt->sopt_valsize); - if (error == 0) { - error = add_rule(&V_layer3_chain, rule); - size = RULESIZE(rule); - if (!error && sopt->sopt_dir == SOPT_GET) - error = sooptcopyout(sopt, rule, size); - } - free(rule, M_TEMP); - break; - - case IP_FW_DEL: - /* - * IP_FW_DEL is used for deleting single rules or sets, - * and (ab)used to atomically manipulate sets. Argument size - * is used to distinguish between the two: - * sizeof(u_int32_t) - * delete single rule or set of rules, - * or reassign rules (or sets) to a different set. - * 2*sizeof(u_int32_t) - * atomic disable/enable sets. - * first u_int32_t contains sets to be disabled, - * second u_int32_t contains sets to be enabled. - */ - error = sooptcopyin(sopt, rulenum, - 2*sizeof(u_int32_t), sizeof(u_int32_t)); - if (error) - break; - size = sopt->sopt_valsize; - if (size == sizeof(u_int32_t)) /* delete or reassign */ - error = del_entry(&V_layer3_chain, rulenum[0]); - else if (size == 2*sizeof(u_int32_t)) /* set enable/disable */ - V_set_disable = - (V_set_disable | rulenum[0]) & ~rulenum[1] & - ~(1<sopt_val != 0) { - error = sooptcopyin(sopt, rulenum, - sizeof(u_int32_t), sizeof(u_int32_t)); - if (error) - break; - } - error = zero_entry(&V_layer3_chain, rulenum[0], - sopt->sopt_name == IP_FW_RESETLOG); - break; - - case IP_FW_TABLE_ADD: - { - ipfw_table_entry ent; - - error = sooptcopyin(sopt, &ent, - sizeof(ent), sizeof(ent)); - if (error) - break; - error = add_table_entry(&V_layer3_chain, ent.tbl, - ent.addr, ent.masklen, ent.value); - } - break; - - case IP_FW_TABLE_DEL: - { - ipfw_table_entry ent; - - error = sooptcopyin(sopt, &ent, - sizeof(ent), sizeof(ent)); - if (error) - break; - error = del_table_entry(&V_layer3_chain, ent.tbl, - ent.addr, ent.masklen); - } - break; - - case IP_FW_TABLE_FLUSH: - { - u_int16_t tbl; - - error = sooptcopyin(sopt, &tbl, - sizeof(tbl), sizeof(tbl)); - if (error) - break; - IPFW_WLOCK(&V_layer3_chain); - error = flush_table(&V_layer3_chain, tbl); - IPFW_WUNLOCK(&V_layer3_chain); - } - break; - - case IP_FW_TABLE_GETSIZE: - { - u_int32_t tbl, cnt; - - if ((error = sooptcopyin(sopt, &tbl, sizeof(tbl), - sizeof(tbl)))) - break; - IPFW_RLOCK(&V_layer3_chain); - error = count_table(&V_layer3_chain, tbl, &cnt); - IPFW_RUNLOCK(&V_layer3_chain); - if (error) - break; - error = sooptcopyout(sopt, &cnt, sizeof(cnt)); - } - break; - - case IP_FW_TABLE_LIST: - { - ipfw_table *tbl; - - if (sopt->sopt_valsize < sizeof(*tbl)) { - error = EINVAL; - break; - } - size = sopt->sopt_valsize; - tbl = malloc(size, M_TEMP, M_WAITOK); - error = sooptcopyin(sopt, tbl, size, sizeof(*tbl)); - if (error) { - free(tbl, M_TEMP); - break; - } - tbl->size = (size - sizeof(*tbl)) / - sizeof(ipfw_table_entry); - IPFW_RLOCK(&V_layer3_chain); - error = dump_table(&V_layer3_chain, tbl); - IPFW_RUNLOCK(&V_layer3_chain); - if (error) { - free(tbl, M_TEMP); - break; - } - error = sooptcopyout(sopt, tbl, size); - free(tbl, M_TEMP); - } - break; - - case IP_FW_NAT_CFG: - if (IPFW_NAT_LOADED) - error = ipfw_nat_cfg_ptr(sopt); - else { - printf("IP_FW_NAT_CFG: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - case IP_FW_NAT_DEL: - if (IPFW_NAT_LOADED) - error = ipfw_nat_del_ptr(sopt); - else { - printf("IP_FW_NAT_DEL: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - case IP_FW_NAT_GET_CONFIG: - if (IPFW_NAT_LOADED) - error = ipfw_nat_get_cfg_ptr(sopt); - else { - printf("IP_FW_NAT_GET_CFG: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - case IP_FW_NAT_GET_LOG: - if (IPFW_NAT_LOADED) - error = ipfw_nat_get_log_ptr(sopt); - else { - printf("IP_FW_NAT_GET_LOG: %s\n", - "ipfw_nat not present, please load it"); - error = EINVAL; - } - break; - - default: - printf("ipfw: ipfw_ctl invalid option %d\n", sopt->sopt_name); - error = EINVAL; - } - - return (error); -#undef RULE_MAXSIZE -} - - -/* - * This procedure is only used to handle keepalives. It is invoked - * every dyn_keepalive_period - */ -static void -ipfw_tick(void * vnetx) -{ - struct mbuf *m0, *m, *mnext, **mtailp; -#ifdef INET6 - struct mbuf *m6, **m6_tailp; -#endif - int i; - ipfw_dyn_rule *q; -#ifdef VIMAGE - struct vnet *vp = vnetx; -#endif - - CURVNET_SET(vp); - if (V_dyn_keepalive == 0 || V_ipfw_dyn_v == NULL || V_dyn_count == 0) - goto done; - - /* - * We make a chain of packets to go out here -- not deferring - * until after we drop the IPFW dynamic rule lock would result - * in a lock order reversal with the normal packet input -> ipfw - * call stack. - */ - m0 = NULL; - mtailp = &m0; -#ifdef INET6 - m6 = NULL; - m6_tailp = &m6; -#endif - IPFW_DYN_LOCK(); - for (i = 0 ; i < V_curr_dyn_buckets ; i++) { - for (q = V_ipfw_dyn_v[i] ; q ; q = q->next ) { - if (q->dyn_type == O_LIMIT_PARENT) - continue; - if (q->id.proto != IPPROTO_TCP) - continue; - if ( (q->state & BOTH_SYN) != BOTH_SYN) - continue; - if (TIME_LEQ( time_uptime+V_dyn_keepalive_interval, - q->expire)) - continue; /* too early */ - if (TIME_LEQ(q->expire, time_uptime)) - continue; /* too late, rule expired */ - - m = send_pkt(NULL, &(q->id), q->ack_rev - 1, - q->ack_fwd, TH_SYN); - mnext = send_pkt(NULL, &(q->id), q->ack_fwd - 1, - q->ack_rev, 0); - - switch (q->id.addr_type) { - case 4: - if (m != NULL) { - *mtailp = m; - mtailp = &(*mtailp)->m_nextpkt; - } - if (mnext != NULL) { - *mtailp = mnext; - mtailp = &(*mtailp)->m_nextpkt; - } - break; -#ifdef INET6 - case 6: - if (m != NULL) { - *m6_tailp = m; - m6_tailp = &(*m6_tailp)->m_nextpkt; - } - if (mnext != NULL) { - *m6_tailp = mnext; - m6_tailp = &(*m6_tailp)->m_nextpkt; - } - break; -#endif - } - - m = mnext = NULL; - } - } - IPFW_DYN_UNLOCK(); - for (m = mnext = m0; m != NULL; m = mnext) { - mnext = m->m_nextpkt; - m->m_nextpkt = NULL; - ip_output(m, NULL, NULL, 0, NULL, NULL); - } -#ifdef INET6 - for (m = mnext = m6; m != NULL; m = mnext) { - mnext = m->m_nextpkt; - m->m_nextpkt = NULL; - ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); - } -#endif -done: - callout_reset(&V_ipfw_timeout, V_dyn_keepalive_period*hz, - ipfw_tick, vnetx); - CURVNET_RESTORE(); -} - -static int vnet_ipfw_init(const void *); - -int -ipfw_init(void) -{ - int error = 0; - - ipfw_dyn_rule_zone = uma_zcreate("IPFW dynamic rule", - sizeof(ipfw_dyn_rule), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - - IPFW_DYN_LOCK_INIT(); - error = vnet_ipfw_init(NULL); - if (error) { - IPFW_DYN_LOCK_DESTROY(); - IPFW_LOCK_DESTROY(&V_layer3_chain); - uma_zdestroy(ipfw_dyn_rule_zone); - return (error); - } - - /* - * Only print out this stuff the first time around, - * when called from the sysinit code. - */ - printf("ipfw2 " -#ifdef INET6 - "(+ipv6) " -#endif - "initialized, divert %s, nat %s, " - "rule-based forwarding " -#ifdef IPFIREWALL_FORWARD - "enabled, " -#else - "disabled, " -#endif - "default to %s, logging ", -#ifdef IPDIVERT - "enabled", -#else - "loadable", -#endif -#ifdef IPFIREWALL_NAT - "enabled", -#else - "loadable", -#endif - default_to_accept ? "accept" : "deny"); - - /* - * Note: V_xxx variables can be accessed here but the vnet specific - * initializer may not have been called yet for the VIMAGE case. - * Tuneables will have been processed. We will print out values for - * the default vnet. - * XXX This should all be rationalized AFTER 8.0 - */ - if (V_fw_verbose == 0) - printf("disabled\n"); - else if (V_verbose_limit == 0) - printf("unlimited\n"); - else - printf("limited to %d packets/entry by default\n", - V_verbose_limit); - - return (error); -} - -void -ipfw_destroy(void) -{ - struct ip_fw *reap; - - ip_fw_chk_ptr = NULL; - ip_fw_ctl_ptr = NULL; - callout_drain(&ipfw_timeout); - IPFW_WLOCK(&V_layer3_chain); - flush_tables(&V_layer3_chain); - V_layer3_chain.reap = NULL; - free_chain(&V_layer3_chain, 1 /* kill default rule */); - reap = V_layer3_chain.reap, V_layer3_chain.reap = NULL; - IPFW_WUNLOCK(&V_layer3_chain); - if (reap != NULL) - reap_rules(reap); - uma_zdestroy(ipfw_dyn_rule_zone); - IPFW_DYN_LOCK_DESTROY(); - if (V_ipfw_dyn_v != NULL) - free(V_ipfw_dyn_v, M_IPFW); - IPFW_LOCK_DESTROY(&V_layer3_chain); - - printf("IP firewall unloaded\n"); -} - -/**************** - * Stuff that must be initialized for every instance - * (including the first of course). - */ -static int -vnet_ipfw_init(const void *unused) -{ - int error; - struct ip_fw default_rule; - - /* First set up some values that are compile time options */ -#ifdef IPFIREWALL_VERBOSE - V_fw_verbose = 1; -#endif -#ifdef IPFIREWALL_VERBOSE_LIMIT - V_verbose_limit = IPFIREWALL_VERBOSE_LIMIT; -#endif - - error = init_tables(&V_layer3_chain); - if (error) { - panic("init_tables"); /* XXX Marko fix this ! */ - } -#ifdef IPFIREWALL_NAT - LIST_INIT(&V_layer3_chain.nat); -#endif - - V_autoinc_step = 100; /* bounded to 1..1000 in add_rule() */ - - V_ipfw_dyn_v = NULL; - V_dyn_buckets = 256; /* must be power of 2 */ - V_curr_dyn_buckets = 256; /* must be power of 2 */ - - V_dyn_ack_lifetime = 300; - V_dyn_syn_lifetime = 20; - V_dyn_fin_lifetime = 1; - V_dyn_rst_lifetime = 1; - V_dyn_udp_lifetime = 10; - V_dyn_short_lifetime = 5; - - V_dyn_keepalive_interval = 20; - V_dyn_keepalive_period = 5; - V_dyn_keepalive = 1; /* do send keepalives */ - - V_dyn_max = 4096; /* max # of dynamic rules */ - - V_fw_deny_unknown_exthdrs = 1; - - V_layer3_chain.rules = NULL; - IPFW_LOCK_INIT(&V_layer3_chain); - callout_init(&V_ipfw_timeout, CALLOUT_MPSAFE); - - bzero(&default_rule, sizeof default_rule); - default_rule.act_ofs = 0; - default_rule.rulenum = IPFW_DEFAULT_RULE; - default_rule.cmd_len = 1; - default_rule.set = RESVD_SET; - default_rule.cmd[0].len = 1; - default_rule.cmd[0].opcode = default_to_accept ? O_ACCEPT : O_DENY; - error = add_rule(&V_layer3_chain, &default_rule); - - if (error != 0) { - printf("ipfw2: error %u initializing default rule " - "(support disabled)\n", error); - IPFW_LOCK_DESTROY(&V_layer3_chain); - printf("leaving ipfw_iattach (1) with error %d\n", error); - return (error); - } - - V_layer3_chain.default_rule = V_layer3_chain.rules; - - /* curvnet is NULL in the !VIMAGE case */ - callout_reset(&V_ipfw_timeout, hz, ipfw_tick, curvnet); - - /* First set up some values that are compile time options */ - V_ipfw_vnet_ready = 1; /* Open for business */ - - /* - * Hook the sockopt handler, and the layer2 (V_ip_fw_chk_ptr) - * and pfil hooks for ipv4 and ipv6. Even if the latter two fail - * we still keep the module alive because the sockopt and - * layer2 paths are still useful. - * ipfw[6]_hook return 0 on success, ENOENT on failure, - * so we can ignore the exact return value and just set a flag. - * - * Note that V_fw[6]_enable are manipulated by a SYSCTL_PROC so - * changes in the underlying (per-vnet) variables trigger - * immediate hook()/unhook() calls. - * In layer2 we have the same behaviour, except that V_ether_ipfw - * is checked on each packet because there are no pfil hooks. - */ - V_ip_fw_ctl_ptr = ipfw_ctl; - V_ip_fw_chk_ptr = ipfw_chk; -#ifndef linux - if (V_fw_enable && ipfw_hook() != 0) { - error = ENOENT; /* see ip_fw_pfil.c::ipfw_hook() */ - printf("ipfw_hook() error\n"); - } -#ifdef INET6 - if (V_fw6_enable && ipfw6_hook() != 0) { - error = ENOENT; - printf("ipfw6_hook() error\n"); - } -#endif -#endif /* !linux */ - return (error); -} diff --git a/dummynet/ip_fw_pfil.c b/dummynet/ip_fw_pfil.c deleted file mode 100644 index b3fcba6..0000000 --- a/dummynet/ip_fw_pfil.c +++ /dev/null @@ -1,615 +0,0 @@ -/*- - * Copyright (c) 2004 Andre Oppermann, Internet Business Solutions AG - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD: src/sys/netinet/ip_fw_pfil.c,v 1.25.2.2 2008/04/25 10:26:30 oleg Exp $"); - -#if !defined(KLD_MODULE) -#include "opt_ipfw.h" -#include "opt_ipdn.h" -#include "opt_inet.h" -#ifndef INET -#error IPFIREWALL requires INET. -#endif /* INET */ -#endif /* KLD_MODULE */ -#include "opt_inet6.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -VNET_DEFINE(int, fw_enable) = 1; -#ifdef INET6 -VNET_DEFINE(int, fw6_enable) = 1; -#endif - -int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); - -/* Divert hooks. */ -ip_divert_packet_t *ip_divert_ptr = NULL; - -/* ng_ipfw hooks. */ -ng_ipfw_input_t *ng_ipfw_input_p = NULL; - -/* Forward declarations. */ -static int ipfw_divert(struct mbuf **, int, int); -#define DIV_DIR_IN 1 -#define DIV_DIR_OUT 0 - -int -ipfw_check_in(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) -{ - struct ip_fw_args args; - struct ng_ipfw_tag *ng_tag; - struct m_tag *dn_tag; - int ipfw = 0; - int divert; - int tee; -#ifdef IPFIREWALL_FORWARD - struct m_tag *fwd_tag; -#endif - - KASSERT(dir == PFIL_IN, ("ipfw_check_in wrong direction!")); - - bzero(&args, sizeof(args)); - - ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, - NULL); - if (ng_tag != NULL) { - KASSERT(ng_tag->dir == NG_IPFW_IN, - ("ng_ipfw tag with wrong direction")); - args.rule = ng_tag->rule; - args.rule_id = ng_tag->rule_id; - args.chain_id = ng_tag->chain_id; - m_tag_delete(*m0, (struct m_tag *)ng_tag); - } - -again: - dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); - if (dn_tag != NULL){ - struct dn_pkt_tag *dt; - - dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; - args.rule_id = dt->rule_id; - args.chain_id = dt->chain_id; - - m_tag_delete(*m0, dn_tag); - } - - args.m = *m0; - args.inp = inp; - tee = 0; - - if (V_fw_one_pass == 0 || args.rule == NULL) { - ipfw = ipfw_chk(&args); - *m0 = args.m; - } else - ipfw = IP_FW_PASS; - - KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", - __func__)); - - switch (ipfw) { - case IP_FW_PASS: - if (args.next_hop == NULL) - goto pass; - -#ifdef IPFIREWALL_FORWARD - fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, - sizeof(struct sockaddr_in), M_NOWAIT); - if (fwd_tag == NULL) - goto drop; - bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in)); - m_tag_prepend(*m0, fwd_tag); - - if (in_localip(args.next_hop->sin_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - goto pass; -#endif - break; /* not reached */ - - case IP_FW_DENY: - goto drop; - break; /* not reached */ - - case IP_FW_DUMMYNET: - if (ip_dn_io_ptr == NULL) - goto drop; - if (mtod(*m0, struct ip *)->ip_v == 4) - ip_dn_io_ptr(m0, DN_TO_IP_IN, &args); - else if (mtod(*m0, struct ip *)->ip_v == 6) - ip_dn_io_ptr(m0, DN_TO_IP6_IN, &args); - if (*m0 != NULL) - goto again; - return 0; /* packet consumed */ - - case IP_FW_TEE: - tee = 1; - /* fall through */ - - case IP_FW_DIVERT: - divert = ipfw_divert(m0, DIV_DIR_IN, tee); - if (divert) { - *m0 = NULL; - return 0; /* packet consumed */ - } else { - args.rule = NULL; - goto again; /* continue with packet */ - } - - case IP_FW_NGTEE: - if (!NG_IPFW_LOADED) - goto drop; - (void)ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 1); - goto again; /* continue with packet */ - - case IP_FW_NETGRAPH: - if (!NG_IPFW_LOADED) - goto drop; - return ng_ipfw_input_p(m0, NG_IPFW_IN, &args, 0); - - case IP_FW_NAT: - goto again; /* continue with packet */ - - case IP_FW_REASS: - goto again; - - default: - KASSERT(0, ("%s: unknown retval", __func__)); - } - -drop: - if (*m0) - m_freem(*m0); - *m0 = NULL; - return (EACCES); -pass: - return 0; /* not filtered */ -} - -int -ipfw_check_out(void *arg, struct mbuf **m0, struct ifnet *ifp, int dir, - struct inpcb *inp) -{ - struct ip_fw_args args; - struct ng_ipfw_tag *ng_tag; - struct m_tag *dn_tag; - int ipfw = 0; - int divert; - int tee; -#ifdef IPFIREWALL_FORWARD - struct m_tag *fwd_tag; -#endif - - KASSERT(dir == PFIL_OUT, ("ipfw_check_out wrong direction!")); - - bzero(&args, sizeof(args)); - - ng_tag = (struct ng_ipfw_tag *)m_tag_locate(*m0, NGM_IPFW_COOKIE, 0, - NULL); - if (ng_tag != NULL) { - KASSERT(ng_tag->dir == NG_IPFW_OUT, - ("ng_ipfw tag with wrong direction")); - args.rule = ng_tag->rule; - args.rule_id = ng_tag->rule_id; - args.chain_id = ng_tag->chain_id; - m_tag_delete(*m0, (struct m_tag *)ng_tag); - } - -again: - dn_tag = m_tag_find(*m0, PACKET_TAG_DUMMYNET, NULL); - if (dn_tag != NULL) { - struct dn_pkt_tag *dt; - - dt = (struct dn_pkt_tag *)(dn_tag+1); - args.rule = dt->rule; - args.rule_id = dt->rule_id; - args.chain_id = dt->chain_id; - - m_tag_delete(*m0, dn_tag); - } - - args.m = *m0; - args.oif = ifp; - args.inp = inp; - tee = 0; - - if (V_fw_one_pass == 0 || args.rule == NULL) { - ipfw = ipfw_chk(&args); - *m0 = args.m; - } else - ipfw = IP_FW_PASS; - - KASSERT(*m0 != NULL || ipfw == IP_FW_DENY, ("%s: m0 is NULL", - __func__)); - - switch (ipfw) { - case IP_FW_PASS: - if (args.next_hop == NULL) - goto pass; -#ifdef IPFIREWALL_FORWARD - /* Overwrite existing tag. */ - fwd_tag = m_tag_find(*m0, PACKET_TAG_IPFORWARD, NULL); - if (fwd_tag == NULL) { - fwd_tag = m_tag_get(PACKET_TAG_IPFORWARD, - sizeof(struct sockaddr_in), M_NOWAIT); - if (fwd_tag == NULL) - goto drop; - } else - m_tag_unlink(*m0, fwd_tag); - bcopy(args.next_hop, (fwd_tag+1), sizeof(struct sockaddr_in)); - m_tag_prepend(*m0, fwd_tag); - - if (in_localip(args.next_hop->sin_addr)) - (*m0)->m_flags |= M_FASTFWD_OURS; - goto pass; -#endif - break; /* not reached */ - - case IP_FW_DENY: - goto drop; - break; /* not reached */ - - case IP_FW_DUMMYNET: - if (ip_dn_io_ptr == NULL) - break; - if (mtod(*m0, struct ip *)->ip_v == 4) - ip_dn_io_ptr(m0, DN_TO_IP_OUT, &args); - else if (mtod(*m0, struct ip *)->ip_v == 6) - ip_dn_io_ptr(m0, DN_TO_IP6_OUT, &args); - if (*m0 != NULL) - goto again; - return 0; /* packet consumed */ - - break; - - case IP_FW_TEE: - tee = 1; - /* fall through */ - - case IP_FW_DIVERT: - divert = ipfw_divert(m0, DIV_DIR_OUT, tee); - if (divert) { - *m0 = NULL; - return 0; /* packet consumed */ - } else { - args.rule = NULL; - goto again; /* continue with packet */ - } - - case IP_FW_NGTEE: - if (!NG_IPFW_LOADED) - goto drop; - (void)ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 1); - goto again; /* continue with packet */ - - case IP_FW_NETGRAPH: - if (!NG_IPFW_LOADED) - goto drop; - return ng_ipfw_input_p(m0, NG_IPFW_OUT, &args, 0); - - case IP_FW_NAT: - goto again; /* continue with packet */ - - case IP_FW_REASS: - goto again; - - default: - KASSERT(0, ("%s: unknown retval", __func__)); - } - -drop: - if (*m0) - m_freem(*m0); - *m0 = NULL; - return (EACCES); -pass: - return 0; /* not filtered */ -} - -static int -ipfw_divert(struct mbuf **m, int incoming, int tee) -{ - /* - * ipfw_chk() has already tagged the packet with the divert tag. - * If tee is set, copy packet and return original. - * If not tee, consume packet and send it to divert socket. - */ - struct mbuf *clone, *reass; - struct ip *ip; - int hlen; - - reass = NULL; - - /* Is divert module loaded? */ - if (ip_divert_ptr == NULL) - goto nodivert; - - /* Cloning needed for tee? */ - if (tee) - clone = m_dup(*m, M_DONTWAIT); - else - clone = *m; - - /* In case m_dup was unable to allocate mbufs. */ - if (clone == NULL) - goto teeout; - - /* - * Divert listeners can only handle non-fragmented packets. - * However when tee is set we will *not* de-fragment the packets; - * Doing do would put the reassembly into double-jeopardy. On top - * of that someone doing a tee will probably want to get the packet - * in its original form. - */ - ip = mtod(clone, struct ip *); - if (!tee && ip->ip_off & (IP_MF | IP_OFFMASK)) { - - /* Reassemble packet. */ - reass = ip_reass(clone); - - /* - * IP header checksum fixup after reassembly and leave header - * in network byte order. - */ - if (reass != NULL) { - ip = mtod(reass, struct ip *); - hlen = ip->ip_hl << 2; - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); - ip->ip_sum = 0; - if (hlen == sizeof(struct ip)) - ip->ip_sum = in_cksum_hdr(ip); - else - ip->ip_sum = in_cksum(reass, hlen); - clone = reass; - } else - clone = NULL; - } else { - /* Convert header to network byte order. */ - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); - } - - /* Do the dirty job... */ - if (clone && ip_divert_ptr != NULL) - ip_divert_ptr(clone, incoming); - -teeout: - /* - * For tee we leave the divert tag attached to original packet. - * It will then continue rule evaluation after the tee rule. - */ - if (tee) - return 0; - - /* Packet diverted and consumed */ - return 1; - -nodivert: - m_freem(*m); - return 1; -} - -int -ipfw_hook(void) -{ - struct pfil_head *pfh_inet; - - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return ENOENT; - - (void)pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - (void)pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); - - return 0; -} - -int -ipfw_unhook(void) -{ - struct pfil_head *pfh_inet; - - pfh_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); - if (pfh_inet == NULL) - return ENOENT; - - (void)pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet); - (void)pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet); - - return 0; -} - -#ifdef INET6 -int -ipfw6_hook(void) -{ - struct pfil_head *pfh_inet6; - - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) - return ENOENT; - - (void)pfil_add_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet6); - (void)pfil_add_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet6); - - return 0; -} - -int -ipfw6_unhook(void) -{ - struct pfil_head *pfh_inet6; - - pfh_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); - if (pfh_inet6 == NULL) - return ENOENT; - - (void)pfil_remove_hook(ipfw_check_in, NULL, PFIL_IN | PFIL_WAITOK, - pfh_inet6); - (void)pfil_remove_hook(ipfw_check_out, NULL, PFIL_OUT | PFIL_WAITOK, - pfh_inet6); - - return 0; -} -#endif /* INET6 */ - -int -ipfw_chg_hook(SYSCTL_HANDLER_ARGS) -{ - int enable; - int oldenable; - int error; - - if (arg1 == &VNET_NAME(fw_enable)) { - enable = V_fw_enable; - } -#ifdef INET6 - else if (arg1 == &VNET_NAME(fw6_enable)) { - enable = V_fw6_enable; - } -#endif - else - return (EINVAL); - - oldenable = enable; - - error = sysctl_handle_int(oidp, &enable, 0, req); - - if (error) - return (error); - - enable = (enable) ? 1 : 0; - - if (enable == oldenable) - return (0); - - if (arg1 == &VNET_NAME(fw_enable)) { - if (enable) - error = ipfw_hook(); - else - error = ipfw_unhook(); - if (error) - return (error); - V_fw_enable = enable; - } -#ifdef INET6 - else if (arg1 == &VNET_NAME(fw6_enable)) { - if (enable) - error = ipfw6_hook(); - else - error = ipfw6_unhook(); - if (error) - return (error); - V_fw6_enable = enable; - } -#endif - - return (0); -} - -static int -ipfw_modevent(module_t mod, int type, void *unused) -{ - int err = 0; - - switch (type) { - case MOD_LOAD: - if ((err = ipfw_init()) != 0) { - printf("ipfw_init() error\n"); - break; - } - if ((err = ipfw_hook()) != 0) { - printf("ipfw_hook() error\n"); - break; - } -#ifdef INET6 - if ((err = ipfw6_hook()) != 0) { - printf("ipfw_hook() error\n"); - break; - } -#endif - break; - - case MOD_UNLOAD: - if ((err = ipfw_unhook()) > 0) - break; -#ifdef INET6 - if ((err = ipfw6_unhook()) > 0) - break; -#endif - ipfw_destroy(); - break; - - default: - return EOPNOTSUPP; - break; - } - return err; -} - -static moduledata_t ipfwmod = { - "ipfw", - ipfw_modevent, - 0 -}; -DECLARE_MODULE(ipfw, ipfwmod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY - 256); -MODULE_VERSION(ipfw, 2); diff --git a/dummynet/ipfw2_mod.c b/dummynet/ipfw2_mod.c deleted file mode 100644 index 4b7edd1..0000000 --- a/dummynet/ipfw2_mod.c +++ /dev/null @@ -1,742 +0,0 @@ -/* - * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id$ - * - * The main interface to build ipfw+dummynet as a linux module. - * (and possibly as a windows module as well, though that part - * is not complete yet). - * - * The control interface uses the sockopt mechanism - * on a socket(AF_INET, SOCK_RAW, IPPROTO_RAW). - * - * The data interface uses the netfilter interface, at the moment - * hooked to the PRE_ROUTING and POST_ROUTING hooks. - * Unfortunately the netfilter interface is a moving target, - * so we need a set of macros to adapt to the various cases. - * - * In the netfilter hook we just mark packet as 'QUEUE' and then - * let the queue handler to do the whole work (filtering and - * possibly emulation). - * As we receive packets, we wrap them with an mbuf descriptor - * so the existing ipfw+dummynet code runs unmodified. - */ - -#include -#include /* sizeof struct mbuf */ -#include /* NGROUPS */ - -#ifdef __linux__ -#include -#include -#include -#include /* NF_IP_PRI_FILTER */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) -#include /* nf_queue */ -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -#define __read_mostly -#endif - -#endif /* !__linux__ */ - -#include /* in_addr */ -#include /* ip_fw_ctl_t, ip_fw_chk_t */ -#include /* ip_dn_ctl_t, ip_dn_io_t */ -#include /* PFIL_IN, PFIL_OUT */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -#warning --- inet_hashtables not present on 2.4 -#include -#include -#include -static inline int inet_iif(const struct sk_buff *skb) -{ - return ((struct rtable *)skb->dst)->rt_iif; -} - -#else -#include /* inet_lookup */ -#endif -#include /* inet_iif */ - -/* - * Here we allocate some global variables used in the firewall. - */ -//ip_dn_ctl_t *ip_dn_ctl_ptr; -int (*ip_dn_ctl_ptr)(struct sockopt *); - -ip_fw_ctl_t *ip_fw_ctl_ptr; - -int (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa); -ip_fw_chk_t *ip_fw_chk_ptr; - -void (*bridge_dn_p)(struct mbuf *, struct ifnet *); - -/*--- - * Glue code to implement the registration of children with the parent. - * Each child should call my_mod_register() when linking, so that - * module_init() and module_exit() can call init_children() and - * fini_children() to provide the necessary initialization. - */ -#include -struct mod_args { - struct moduledata *mod; - const char *name; - int order; -}; - -static unsigned int mod_idx; -static struct mod_args mods[10]; /* hard limit to 10 modules */ - -/* - * my_mod_register should be called automatically as the init - * functions in the submodules. Unfortunately this compiler/linker - * trick is not supported yet so we call it manually. - */ -int -my_mod_register(struct moduledata *mod, const char *name, int order) -{ - struct mod_args m = { mod, name, order }; - - printf("%s %s called\n", __FUNCTION__, name); - if (mod_idx < sizeof(mods) / sizeof(mods[0])) - mods[mod_idx++] = m; - return 0; -} - -static void -init_children(void) -{ - unsigned int i; - - /* Call the functions registered at init time. */ - printf("%s mod_idx value %d\n", __FUNCTION__, mod_idx); - for (i = 0; i < mod_idx; i++) { - printf("+++ start module %d %s %s at %p order 0x%x\n", - i, mods[i].name, mods[i].mod->name, - mods[i].mod, mods[i].order); - mods[i].mod->evhand(NULL, MOD_LOAD, mods[i].mod->priv); - } -} - -static void -fini_children(void) -{ - int i; - - /* Call the functions registered at init time. */ - for (i = mod_idx - 1; i >= 0; i--) { - printf("+++ end module %d %s %s at %p order 0x%x\n", - i, mods[i].name, mods[i].mod->name, - mods[i].mod, mods[i].order); - mods[i].mod->evhand(NULL, MOD_UNLOAD, mods[i].mod->priv); - } -} -/*--- end of module binding helper functions ---*/ - -/*--- - * Control hooks: - * ipfw_ctl_h() is a wrapper for linux to FreeBSD sockopt call convention. - * then call the ipfw handler in order to manage requests. - * In turn this is called by the linux set/get handlers. - */ -static int -ipfw_ctl_h(struct sockopt *s, int cmd, int dir, int len, void __user *user) -{ - struct thread t; - int ret = EINVAL; - - memset(s, 0, sizeof(s)); - s->sopt_name = cmd; - s->sopt_dir = dir; - s->sopt_valsize = len; - s->sopt_val = user; - - /* sopt_td is not used but it is referenced */ - memset(&t, 0, sizeof(t)); - s->sopt_td = &t; - - // printf("%s called with cmd %d len %d\n", __FUNCTION__, cmd, len); - - if (cmd < IP_DUMMYNET_CONFIGURE && ip_fw_ctl_ptr) - ret = ip_fw_ctl_ptr(s); - else if (cmd >= IP_DUMMYNET_CONFIGURE && ip_dn_ctl_ptr) - ret = ip_dn_ctl_ptr(s); - - return -ret; /* errors are < 0 on linux */ -} - -#ifdef _WIN32 - -void -netisr_dispatch(int __unused num, struct mbuf *m) -{ -} - -int -ip_output(struct mbuf *m, struct mbuf __unused *opt, - struct route __unused *ro, int __unused flags, - struct ip_moptions __unused *imo, struct inpcb __unused *inp) -{ - netisr_dispatch(0, m); - return 0; -} - -#else /* this is the linux glue */ -/* - * setsockopt hook has no return value other than the error code. - */ -static int -do_ipfw_set_ctl(struct sock __unused *sk, int cmd, - void __user *user, unsigned int len) -{ - struct sockopt s; /* pass arguments */ - - return ipfw_ctl_h(&s, cmd, SOPT_SET, len, user); -} - -/* - * getsockopt can can return a block of data in response. - */ -static int -do_ipfw_get_ctl(struct sock __unused *sk, - int cmd, void __user *user, int *len) -{ - struct sockopt s; /* pass arguments */ - int ret = ipfw_ctl_h(&s, cmd, SOPT_GET, *len, user); - - *len = s.sopt_valsize; /* return lenght back to the caller */ - return ret; -} - -/* - * declare our [get|set]sockopt hooks - */ -static struct nf_sockopt_ops ipfw_sockopts = { - .pf = PF_INET, - .set_optmin = _IPFW_SOCKOPT_BASE, - .set_optmax = _IPFW_SOCKOPT_END, - .set = do_ipfw_set_ctl, - .get_optmin = _IPFW_SOCKOPT_BASE, - .get_optmax = _IPFW_SOCKOPT_END, - .get = do_ipfw_get_ctl, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24) - .owner = THIS_MODULE, -#endif -}; - -/*---- - * We need a number of macros to adapt to the various APIs in - * different linux versions. Among them: - * - * - the hook names change between macros (NF_IP*) and enum NF_INET_* - * - * - the second argument to the netfilter hook is - * struct sk_buff ** in kernels <= 2.6.22 - * struct sk_buff * in kernels > 2.6.22 - * - * - NF_STOP is not defined before 2.6 so we remap it to NF_ACCEPT - * - * - the packet descriptor passed to the queue handler is - * struct nf_info in kernels <= 2.6.24 - * struct nf_queue_entry in kernels <= 2.6.24 - * - * - the arguments to the queue handler also change; - */ - -/* - * declare hook to grab packets from the netfilter interface. - * The NF_* names change in different versions of linux, in some - * cases they are #defines, in others they are enum, so we - * need to adapt. - */ -#ifndef NF_IP_PRE_ROUTING -#define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING -#endif -#ifndef NF_IP_POST_ROUTING -#define NF_IP_POST_ROUTING NF_INET_POST_ROUTING -#endif - -/* - * ipfw hooks into the POST_ROUTING and the PRE_ROUTING chains. - * PlanetLab sets skb_tag to the slice id in the LOCAL_INPUT and - * POST_ROUTING chains, so if we want to use that information we - * need to hook the LOCAL_INPUT chain instead of the PRE_ROUTING. - * However at the moment the skb_tag info is not reliable so - * we stay with the standard hooks. - */ -#if 0 // defined(IPFW_PLANETLAB) -#define IPFW_HOOK_IN NF_IP_LOCAL_IN -#else -#define IPFW_HOOK_IN NF_IP_PRE_ROUTING -#endif - -/* - * The main netfilter hook. - * To make life simple, we queue everything and then do all the - * decision in the queue handler. - * - * XXX note that in 2.4 and up to 2.6.22 the skbuf is passed as sk_buff** - * so we have an #ifdef to set the proper argument type. - */ -static unsigned int -call_ipfw(unsigned int __unused hooknum, -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) // in 2.6.22 we have ** - struct sk_buff __unused **skb, -#else - struct sk_buff __unused *skb, -#endif - const struct net_device __unused *in, - const struct net_device __unused *out, - int __unused (*okfn)(struct sk_buff *)) -{ - return NF_QUEUE; -} - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -#define NF_STOP NF_ACCEPT -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - -/* - * nf_queue_entry is a recent addition, in previous versions - * of the code the struct is called nf_info. - */ -#define nf_queue_entry nf_info /* for simplicity */ - -/* also, 2.4 and perhaps something else have different arguments */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) /* unsure on the exact boundary */ -/* on 2.4 we use nf_info */ -#define QH_ARGS struct sk_buff *skb, struct nf_info *info, void *data -#else /* 2.6.1.. 2.6.24 */ -#define QH_ARGS struct sk_buff *skb, struct nf_info *info, unsigned int qnum, void *data -#endif - -#define DEFINE_SKB /* nothing, already an argument */ -#define REINJECT(_inf, _verd) nf_reinject(skb, _inf, _verd) - -#else /* 2.6.25 and above */ - -#define QH_ARGS struct nf_queue_entry *info, unsigned int queuenum -#define DEFINE_SKB struct sk_buff *skb = info->skb; -#define REINJECT(_inf, _verd) nf_reinject(_inf, _verd) -#endif - -/* - * used by dummynet when dropping packets - * XXX use dummynet_send() - */ -void -reinject_drop(struct mbuf* m) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) /* unsure on the exact boundary */ - struct sk_buff *skb = (struct sk_buff *)m; -#endif - REINJECT(m->queue_entry, NF_DROP); -} - -/* - * The real call to the firewall. nf_queue_entry points to the skbuf, - * and eventually we need to return both through nf_reinject(). - */ -static int -ipfw2_queue_handler(QH_ARGS) -{ - DEFINE_SKB /* no semicolon here, goes in the macro */ - int ret = 0; /* return value */ - struct mbuf *m; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - if (skb->nh.iph == NULL) { - printf("null dp, len %d reinject now\n", skb->len); - REINJECT(info, NF_ACCEPT); - return 0; - } -#endif - m = malloc(sizeof(*m), 0, 0); - if (m == NULL) { - printf("malloc fail, len %d reinject now\n", skb->len); - REINJECT(info, NF_ACCEPT); - return 0; - } - - m->m_skb = skb; - m->m_len = skb->len; /* len in this skbuf */ - m->m_pkthdr.len = skb->len; /* total packet len */ - m->m_pkthdr.rcvif = info->indev; - m->queue_entry = info; -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) - m->m_data = skb->nh.iph; -#else - m->m_data = skb_network_header(skb); -#endif - - /* XXX add the interface */ - if (info->hook == IPFW_HOOK_IN) { - ret = ipfw_check_in(NULL, &m, info->indev, PFIL_IN, NULL); - } else { - ret = ipfw_check_out(NULL, &m, info->outdev, PFIL_OUT, NULL); - } - - if (m != NULL) { /* Accept. reinject and free the mbuf */ - REINJECT(info, NF_ACCEPT); - m_freem(m); - } else if (ret == 0) { - /* dummynet has kept the packet, will reinject later. */ - } else { - /* - * Packet dropped by ipfw or dummynet, reinject as NF_DROP - * mbuf already released by ipfw itself - */ - REINJECT(info, NF_DROP); - } - return 0; -} - -struct route; -struct ip_moptions; -struct inpcb; - - -/* XXX should include prototypes for netisr_dispatch and ip_output */ -/* - * The reinjection routine after a packet comes out from dummynet. - * We must update the skb timestamp so ping reports the right time. - */ -void -netisr_dispatch(int num, struct mbuf *m) -{ - struct nf_queue_entry *info = m->queue_entry; - struct sk_buff *skb = m->m_skb; /* always used */ - - m_freem(m); - - KASSERT((info != NULL), ("%s info null!\n", __FUNCTION__)); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,22) // XXX above 2.6.x ? - __net_timestamp(skb); /* update timestamp */ -#endif - - /* XXX to obey one-pass, possibly call the queue handler here */ - REINJECT(info, ((num == -1)?NF_DROP:NF_STOP)); /* accept but no more firewall */ -} - -int -ip_output(struct mbuf *m, struct mbuf __unused *opt, - struct route __unused *ro, int __unused flags, - struct ip_moptions __unused *imo, struct inpcb __unused *inp) -{ - netisr_dispatch(0, m); - return 0; -} - -/* - * socket lookup function for linux. - * This code is used to associate uid, gid, jail/xid to packets, - * and store the info in a cache *ugp where they can be accessed quickly. - * The function returns 1 if the info is found, -1 otherwise. - * - * We do this only on selected protocols: TCP, ... - * - * The chain is the following - * sk_buff* sock* socket* file* - * skb -> sk ->sk_socket->file ->f_owner ->pid - * skb -> sk ->sk_socket->file ->f_uid (direct) - * skb -> sk ->sk_socket->file ->f_cred->fsuid (2.6.29+) - * - * Related headers: - * linux/skbuff.h struct skbuff - * net/sock.h struct sock - * linux/net.h struct socket - * linux/fs.h struct file - * - * With vserver we may have sk->sk_xid and sk->sk_nid that - * which we store in fw_groups[1] (matches O_JAIL) and fw_groups[2] - * (no matches yet) - * - * Note- for locally generated, outgoing packets we should not need - * need a lookup because the sk_buff already points to the socket where - * the info is. - */ -extern struct inet_hashinfo tcp_hashinfo; -int -linux_lookup(const int proto, const __be32 saddr, const __be16 sport, - const __be32 daddr, const __be16 dport, - struct sk_buff *skb, int dir, struct bsd_ucred *u) -{ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,0) - return -1; -#else - struct sock *sk; - int ret = -1; /* default return value */ - int st = -1; /* state */ - - - if (proto != IPPROTO_TCP) /* XXX extend for UDP */ - return -1; - - if ((dir ? (void *)skb_dst(skb) : (void *)skb->dev) == NULL) { - panic(" -- this should not happen\n"); - return -1; - } - - if (skb->sk) { - sk = skb->sk; - } else { - /* - * Try a lookup. On a match, sk has a refcount that we must - * release on exit (we know it because skb->sk = NULL). - * - * inet_lookup above 2.6.24 has an additional 'net' parameter - * so we use a macro to conditionally supply it. - * swap dst and src depending on the direction. - */ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) -#define _OPT_NET_ARG -#else -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) -/* there is no dev_net() on 2.6.25 */ -#define _OPT_NET_ARG (skb->dev->nd_net), -#else /* 2.6.26 and above */ -#define _OPT_NET_ARG dev_net(skb->dev), -#endif -#endif - sk = (dir) ? /* dir != 0 on output */ - inet_lookup(_OPT_NET_ARG &tcp_hashinfo, - daddr, dport, saddr, sport, // match outgoing - inet_iif(skb)) : - inet_lookup(_OPT_NET_ARG &tcp_hashinfo, - saddr, sport, daddr, dport, // match incoming - skb->dev->ifindex); -#undef _OPT_NET_ARG - - if (sk == NULL) /* no match, nothing to be done */ - return -1; - } - ret = 1; /* retrying won't make things better */ - st = sk->sk_state; -#ifdef CONFIG_VSERVER - u->xid = sk->sk_xid; - u->nid = sk->sk_nid; -#else - u->xid = u->nid = 0; -#endif - /* - * Exclude tcp states where sk points to a inet_timewait_sock which - * has no sk_socket field (surely TCP_TIME_WAIT, perhaps more). - * To be safe, use a whitelist and not a blacklist. - * Before dereferencing sk_socket grab a lock on sk_callback_lock. - * - * Once again we need conditional code because the UID and GID - * location changes between kernels. - */ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,28) -/* use the current's real uid/gid */ -#define _CURR_UID f_uid -#define _CURR_GID f_gid -#else /* 2.6.29 and above */ -/* use the current's file access real uid/gid */ -#define _CURR_UID f_cred->fsuid -#define _CURR_GID f_cred->fsgid -#endif - -#define GOOD_STATES ( \ - (1<sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - u->uid = sk->sk_socket->file->_CURR_UID; - u->gid = sk->sk_socket->file->_CURR_GID; - } - read_unlock_bh(&sk->sk_callback_lock); - } else { - u->uid = u->gid = 0; - } - if (!skb->sk) /* return the reference that came from the lookup */ - sock_put(sk); -#undef GOOD_STATES -#undef _CURR_UID -#undef _CURR_GID - return ret; - -#endif /* LINUX > 2.4 */ -} - -/* - * Now prepare to hook the various functions. - * Linux 2.4 has a different API so we need some adaptation - * for register and unregister hooks - * - * the unregister function changed arguments between 2.6.22 and 2.6.24 - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -static int -nf_register_hooks(struct nf_hook_ops *ops, int n) -{ - int i, ret = 0; - for (i = 0; i < n; i++) { - ret = nf_register_hook(ops + i); - if (ret < 0) - break; - } - return ret; -} - -static void -nf_unregister_hooks(struct nf_hook_ops *ops, int n) -{ - int i; - for (i = 0; i < n; i++) { - nf_unregister_hook(ops + i); - } -} -#define REG_QH_ARG(fn) fn, NULL /* argument for nf_[un]register_queue_handler */ -#define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ -#define SET_MOD_OWNER - -#else /* linux >= 2.6.0 */ - -struct nf_queue_handler ipfw2_queue_handler_desc = { - .outfn = ipfw2_queue_handler, - .name = "ipfw2 dummynet queue", -}; -#define REG_QH_ARG(fn) &(fn ## _desc) - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) -#define UNREG_QH_ARG(fn) //fn /* argument for nf_[un]register_queue_handler */ -#else -#define UNREG_QH_ARG(fn) , &(fn ## _desc) -#endif /* 2.6.0 < LINUX > 2.6.24 */ - -#define SET_MOD_OWNER .owner = THIS_MODULE, - -#endif /* !LINUX < 2.6.0 */ - -static struct nf_hook_ops ipfw_ops[] __read_mostly = { - { - .hook = call_ipfw, - .pf = PF_INET, - .hooknum = IPFW_HOOK_IN, - .priority = NF_IP_PRI_FILTER, - SET_MOD_OWNER - }, - { - .hook = call_ipfw, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_FILTER, - SET_MOD_OWNER - }, -}; -#endif /* !__linux__ */ - -/* descriptors for the children */ -extern moduledata_t *moddesc_ipfw; -extern moduledata_t *moddesc_dummynet; - -extern void rn_init(void); -/* - * Module glue - init and exit function. - */ -static int __init -ipfw_module_init(void) -{ - int ret = 0; - - printf("%s in-hook %d svn id %s\n", __FUNCTION__, IPFW_HOOK_IN, "$Id$"); - - rn_init(); - - my_mod_register(moddesc_ipfw, "ipfw", 1); - my_mod_register(moddesc_dummynet, "dummynet", 2); - init_children(); - -#ifdef _WIN32 - return ret; - -#else /* linux hook */ - /* sockopt register, in order to talk with user space */ - ret = nf_register_sockopt(&ipfw_sockopts); - if (ret < 0) { - printf("error %d in nf_register_sockopt\n", ret); - goto clean_modules; - } - - /* queue handler registration, in order to get network - * packet under a private queue */ - ret = nf_register_queue_handler(PF_INET, REG_QH_ARG(ipfw2_queue_handler) ); - if (ret < 0) /* queue busy */ - goto unregister_sockopt; - - ret = nf_register_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); - if (ret < 0) - goto unregister_sockopt; - - printf("%s loaded\n", __FUNCTION__); - return 0; - - -/* handle errors on load */ -unregister_sockopt: - nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); - nf_unregister_sockopt(&ipfw_sockopts); - -clean_modules: - fini_children(); - printf("%s error\n", __FUNCTION__); - - return ret; -#endif /* linux */ -} - -/* module shutdown */ -static void __exit -ipfw_module_exit(void) -{ -#ifdef _WIN32 -#else /* linux hook */ - nf_unregister_hooks(ipfw_ops, ARRAY_SIZE(ipfw_ops)); - /* maybe drain the queue before unregistering ? */ - nf_unregister_queue_handler(PF_INET UNREG_QH_ARG(ipfw2_queue_handler) ); - nf_unregister_sockopt(&ipfw_sockopts); -#endif /* linux */ - - fini_children(); - - printf("%s unloaded\n", __FUNCTION__); -} - -#ifdef __linux__ -module_init(ipfw_module_init) -module_exit(ipfw_module_exit) -MODULE_LICENSE("Dual BSD/GPL"); /* the code here is all BSD. */ -#endif diff --git a/dummynet/missing.h b/dummynet/missing.h deleted file mode 100644 index 5b04dce..0000000 --- a/dummynet/missing.h +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Copyright (C) 2009 Luigi Rizzo, Marta Carbone, Universita` di Pisa - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/* - * $Id$ - * - * Header for kernel variables and functions that are not available in - * userland. - */ - -#ifndef _MISSING_H_ -#define _MISSING_H_ - -#include - -#ifdef _WIN32 - -#ifndef DEFINE_SPINLOCK -#define DEFINE_SPINLOCK(x) FAST_MUTEX x -#endif -/* spinlock --> Guarded Mutex KGUARDED_MUTEX */ -/* http://www.reactos.org/wiki/index.php/Guarded_Mutex */ -#define spin_lock_init(_l) -#define spin_lock_bh(_l) -#define spin_unlock_bh(_l) - -#include /* bsd-compat.c */ -#include /* bsd-compat.c */ -#include /* local version */ - -#else /* __linux__ */ - -#define MALLOC_DECLARE(x) /* nothing */ -#include /* do_gettimeofday */ -#include /* local version */ -struct inpcb; - -/* - * Kernel locking support. - * FreeBSD uses mtx in dummynet.c and struct rwlock ip_fw2.c - * - * In linux we use spinlock_bh to implement both. - * For 'struct rwlock' we need an #ifdef to change it to spinlock_t - */ - -#ifndef DEFINE_SPINLOCK /* this is for linux 2.4 */ -#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED -#endif - -#endif /* __linux__ */ - -#define rw_assert(a, b) -#define rw_destroy(_l) -#define rw_init(_l, msg) spin_lock_init(_l) -#define rw_rlock(_l) spin_lock_bh(_l) -#define rw_runlock(_l) spin_unlock_bh(_l) -#define rw_wlock(_l) spin_lock_bh(_l) -#define rw_wunlock(_l) spin_unlock_bh(_l) -#define rw_init_flags(_l, s, v) - -#define mtx_assert(a, b) -#define mtx_destroy(m) -#define mtx_init(m, a,b,c) spin_lock_init(m) -#define mtx_lock(_l) spin_lock_bh(_l) -#define mtx_unlock(_l) spin_unlock_bh(_l) - -/* end of locking support */ - -/* in netinet/in.h */ -#define in_nullhost(x) ((x).s_addr == INADDR_ANY) - -/* bzero not present on linux, but this should go in glue.h */ -#define bzero(s, n) memset(s, 0, n) -#define bcmp(p1, p2, n) memcmp(p1, p2, n) - -/* ethernet stuff */ -#define ETHERTYPE_IP 0x0800 /* IP protocol */ -#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ -struct ether_header { - u_char ether_dhost[ETHER_ADDR_LEN]; - u_char ether_shost[ETHER_ADDR_LEN]; - u_short ether_type; -}; - -#define ETHER_ADDR_LEN 6 /* length of an Ethernet address */ -#define ETHER_TYPE_LEN 2 /* length of the Ethernet type field */ -#define ETHER_HDR_LEN (ETHER_ADDR_LEN*2+ETHER_TYPE_LEN) - -/* ip_dummynet.c */ -#define __FreeBSD_version 500035 - -#ifdef __linux__ -struct moduledata; -int my_mod_register(struct moduledata *mod, const char *name, int order); - -/* define some macro for ip_dummynet */ - -struct malloc_type { -}; - -#define MALLOC_DEFINE(type, shortdesc, longdesc) \ - struct malloc_type type[1]; void *md_dummy_ ## type = type - -#define CTASSERT(x) - -/* log... does not use the first argument */ -#define LOG_ERR 0x100 -#define LOG_INFO 0x200 -#define log(_level, fmt, arg...) do { \ - int __unused x=_level;printk(KERN_ERR fmt, ##arg); } while (0) - -/* - * gettimeofday would be in sys/time.h but it is not - * visible if _KERNEL is defined - */ -int gettimeofday(struct timeval *, struct timezone *); - -#else /* _WIN32 */ -#define MALLOC_DEFINE(a,b,c) -#endif /* _WIN32 */ - -extern int hz; -extern long tick; /* exists in 2.4 but not in 2.6 */ -extern int bootverbose; -extern time_t time_uptime; -extern struct timeval boottime; - -extern int max_linkhdr; -extern int ip_defttl; -extern u_long in_ifaddrhmask; /* mask for hash table */ -extern struct in_ifaddrhashhead *in_ifaddrhashtbl; /* inet addr hash table */ - -/*-------------------------------------------------*/ - -/* define, includes and functions missing in linux */ -/* include and define */ -#include /* inet_ntoa */ - -struct mbuf; - -/* used by ip_dummynet.c */ -void reinject_drop(struct mbuf* m); - -#include /* error define */ -#include /* IFNAMESIZ */ - -/* - * some network structure can be defined in the bsd way - * by using the _FAVOR_BSD definition. This is not true - * for icmp structure. - * XXX struct icmp contains bsd names in - * /usr/include/netinet/ip_icmp.h - */ -#ifdef __linux__ -#define icmp_code code -#define icmp_type type - -/* linux in6_addr has no member __u6_addr - * replace the whole structure ? - */ -#define __u6_addr in6_u -#define __u6_addr32 u6_addr32 -#endif /* __linux__ */ - -/* defined in linux/sctp.h with no bsd definition */ -struct sctphdr { - uint16_t src_port; /* source port */ - uint16_t dest_port; /* destination port */ - uint32_t v_tag; /* verification tag of packet */ - uint32_t checksum; /* Adler32 C-Sum */ - /* chunks follow... */ -}; - -/* missing definition */ -#define TH_FIN 0x01 -#define TH_SYN 0x02 -#define TH_RST 0x04 -#define TH_ACK 0x10 - -#define RTF_CLONING 0x100 /* generate new routes on use */ - -#define IPPROTO_OSPFIGP 89 /* OSPFIGP */ -#define IPPROTO_CARP 112 /* CARP */ -#ifndef _WIN32 -#define IPPROTO_IPV4 IPPROTO_IPIP /* for compatibility */ -#endif - -#define CARP_VERSION 2 -#define CARP_ADVERTISEMENT 0x01 - -#define PRIV_NETINET_IPFW 491 /* Administer IPFW firewall. */ - -#define IP_FORWARDING 0x1 /* most of ip header exists */ - -#define NETISR_IP 2 /* same as AF_INET */ - -#define PRIV_NETINET_DUMMYNET 494 /* Administer DUMMYNET. */ - -extern int securelevel; - -struct carp_header { -#if BYTE_ORDER == LITTLE_ENDIAN - u_int8_t carp_type:4, - carp_version:4; -#endif -#if BYTE_ORDER == BIG_ENDIAN - u_int8_t carp_version:4, - carp_type:4; -#endif -}; - -struct pim { - int dummy; /* windows compiler does not like empty definition */ -}; - -struct route { - struct rtentry *ro_rt; - struct sockaddr ro_dst; -}; - -struct ifaltq { - void *ifq_head; -}; - -/* - * ifnet->if_snd is used in ip_dummynet.c to take the transmission - * clock. - */ -#if defined( __linux__) -#define if_xname name -#define if_snd XXX -#elif defined( _WIN32 ) -/* used in ip_dummynet.c */ -struct ifnet { - char if_xname[IFNAMSIZ]; /* external name (name + unit) */ -// struct ifaltq if_snd; /* output queue (includes altq) */ -}; - -struct net_device { - char if_xname[IFNAMSIZ]; /* external name (name + unit) */ -}; -#endif - -/* involves mbufs */ -int in_cksum(struct mbuf *m, int len); -#define divert_cookie(mtag) 0 -#define divert_info(mtag) 0 -#define INADDR_TO_IFP(a, b) b = NULL -#define pf_find_mtag(a) NULL -#define pf_get_mtag(a) NULL -/* we don't pullup, fail */ -#define m_pullup(m, x) \ - ((m)->m_len >= x ? (m) : (netisr_dispatch(-1, m), NULL)) - -#ifndef _WIN32 -#define AF_LINK AF_ASH /* ? our sys/socket.h */ -#endif - -struct pf_mtag { - void *hdr; /* saved hdr pos in mbuf, for ECN */ - sa_family_t af; /* for ECN */ - u_int32_t qid; /* queue id */ -}; - -#if 0 // ndef radix -/* radix stuff in radix.h and radix.c */ -struct radix_node { - caddr_t rn_key; /* object of search */ - caddr_t rn_mask; /* netmask, if present */ -}; -#endif /* !radix */ - -/* missing kernel functions */ -char *inet_ntoa(struct in_addr ina); -int random(void); - -/* - * Return the risult of a/b - * - * this is used in linux kernel space, - * since the 64bit division needs to - * be done using a macro - */ -int64_t -div64(int64_t a, int64_t b); - -char * -inet_ntoa_r(struct in_addr ina, char *buf); - -/* from bsd sys/queue.h */ -#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = TAILQ_FIRST((head)); \ - (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \ - (var) = (tvar)) - -#define SLIST_FOREACH_SAFE(var, head, field, tvar) \ - for ((var) = SLIST_FIRST((head)); \ - (var) && ((tvar) = SLIST_NEXT((var), field), 1); \ - (var) = (tvar)) - -/* depending of linux version */ -#ifndef ETHERTYPE_IPV6 -#define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */ -#endif - -/*-------------------------------------------------*/ -#define RT_NUMFIBS 1 -extern u_int rt_numfibs; - -/* involves kernel locking function */ -#ifdef RTFREE -#undef RTFREE -#define RTFREE(a) fprintf(stderr, "RTFREE: commented out locks\n"); -#endif - -void getmicrouptime(struct timeval *tv); - -/* from sys/netinet/ip_output.c */ -struct ip_moptions; -struct route; -struct ip; - -struct mbuf *ip_reass(struct mbuf *); -u_short in_cksum_hdr(struct ip *); -int ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, - struct ip_moptions *imo, struct inpcb *inp); - -/* from net/netisr.c */ -void netisr_dispatch(int num, struct mbuf *m); - -/* definition moved in missing.c */ -int sooptcopyout(struct sockopt *sopt, const void *buf, size_t len); - -int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen); - -/* defined in session.c */ -int priv_check(struct thread *td, int priv); - -/* struct ucred is in linux/socket.h and has pid, uid, gid. - * We need a 'bsd_ucred' to store also the extra info - */ - -struct bsd_ucred { - uid_t uid; - gid_t gid; - uint32_t xid; - uint32_t nid; -}; - -int securelevel_ge(struct ucred *cr, int level); - -struct sysctl_oid; -struct sysctl_req; - -/* - * sysctl are mapped into /sys/module/ipfw_mod parameters - */ -#define CTLFLAG_RD 1 -#define CTLFLAG_RDTUN 1 -#define CTLFLAG_RW 2 -#define CTLFLAG_SECURE3 0 // unsupported -#define CTLFLAG_VNET 0 /* unsupported */ - -#ifdef _WIN32 -#define module_param_named(_name, _var, _ty, _perm) -#else - -/* Linux 2.4 is mostly for openwrt */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) -#include /* generic_ffs() used in ip_fw2.c */ -typedef uint32_t __be32; -typedef uint16_t __be16; -struct sock; -struct net; -struct inet_hashinfo; -struct sock *inet_lookup( - struct inet_hashinfo *hashinfo, - const __be32 saddr, const __be16 sport, - const __be32 daddr, const __be16 dport, - const int dif); -struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); -#endif /* Linux < 2.6 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -#define module_param_named(_name, _var, _ty, _perm) \ - //module_param(_name, _ty, 0644) -#endif -#endif /* __linux__ */ - -#define SYSCTL_DECL(_1) -#define SYSCTL_OID(_1, _2, _3, _4, _5, _6, _7, _8) -#define SYSCTL_NODE(_1, _2, _3, _4, _5, _6) -#define _SYSCTL_BASE(_name, _var, _ty, _perm) \ - module_param_named(_name, *(_var), _ty, \ - ( (_perm) == CTLFLAG_RD) ? 0444: 0644 ) -#define SYSCTL_PROC(_base, _oid, _name, _mode, _var, _val, _desc, _a, _b) - -#define SYSCTL_INT(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, int, _mode) - -#define SYSCTL_LONG(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, long, _mode) - -#define SYSCTL_ULONG(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, ulong, _mode) - -#define SYSCTL_UINT(_base, _oid, _name, _mode, _var, _val, _desc) \ - _SYSCTL_BASE(_name, _var, uint, _mode) - -#define SYSCTL_HANDLER_ARGS \ - struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req -int sysctl_handle_int(SYSCTL_HANDLER_ARGS); -int sysctl_handle_long(SYSCTL_HANDLER_ARGS); - -#define TUNABLE_INT(_name, _ptr) - -void ether_demux(struct ifnet *ifp, struct mbuf *m); - -int ether_output_frame(struct ifnet *ifp, struct mbuf *m); - -void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); - -void icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu); - -void rtfree(struct rtentry *rt); - -u_short in_cksum_skip(struct mbuf *m, int len, int skip); - -#ifdef INP_LOCK_ASSERT -#undef INP_LOCK_ASSERT -#define INP_LOCK_ASSERT(a) -#endif - -int jailed(struct ucred *cred); - -/* -* Return 1 if an internet address is for a ``local'' host -* (one to which we have a connection). If subnetsarelocal -* is true, this includes other subnets of the local net. -* Otherwise, it includes only the directly-connected (sub)nets. -*/ -int in_localaddr(struct in_addr in); - -/* the prototype is already in the headers */ -//int ipfw_chg_hook(SYSCTL_HANDLER_ARGS); - -int fnmatch(const char *pattern, const char *string, int flags); - -int -linux_lookup(const int proto, const __be32 saddr, const __be16 sport, - const __be32 daddr, const __be16 dport, - struct sk_buff *skb, int dir, struct bsd_ucred *u); - -/* vnet wrappers, in vnet.h and ip_var.h */ -int ipfw_init(void); -void ipfw_destroy(void); -struct ip_fw_args; -extern int (*ip_dn_io_ptr)(struct mbuf **m, int dir, struct ip_fw_args *fwa); - -#define curvnet NULL -#define CURVNET_SET(_v) -#define CURVNET_RESTORE() -#define VNET_ASSERT(condition) - -#define VNET_NAME(n) n -#define VNET_DECLARE(t, n) extern t n -#define VNET_DEFINE(t, n) t n -#define _VNET_PTR(b, n) &VNET_NAME(n) -/* - * Virtualized global variable accessor macros. - */ -#define VNET_VNET_PTR(vnet, n) (&(n)) -#define VNET_VNET(vnet, n) (n) - -#define VNET_PTR(n) (&(n)) -#define VNET(n) (n) - -extern int (*ip_dn_ctl_ptr)(struct sockopt *); -typedef int ip_fw_ctl_t(struct sockopt *); -extern ip_fw_ctl_t *ip_fw_ctl_ptr; - -/* For kernel ipfw_ether and ipfw_bridge. */ -struct ip_fw_args; -typedef int ip_fw_chk_t(struct ip_fw_args *args); -extern ip_fw_chk_t *ip_fw_chk_ptr; - -#define V_ip_fw_chk_ptr VNET(ip_fw_chk_ptr) -#define V_ip_fw_ctl_ptr VNET(ip_fw_ctl_ptr) -#define V_tcbinfo VNET(tcbinfo) -#define V_udbinfo VNET(udbinfo) - -#define SYSCTL_VNET_PROC SYSCTL_PROC -#define SYSCTL_VNET_INT SYSCTL_INT - -int my_mod_register(struct moduledata *mod, const char *name, int order); - -#endif /* !_MISSING_H_ */ diff --git a/dummynet/new_glue.c b/dummynet/new_glue.c deleted file mode 100644 index 5ceef79..0000000 --- a/dummynet/new_glue.c +++ /dev/null @@ -1,219 +0,0 @@ -#include "missing.h" - -#define IPFW_INTERNAL -#include - -#include "hashtable.h" -#define IPFW_NEWTABLES_MAX 256 - -struct t_o { - /* Object stored in the hash table */ - uint32_t addr; - uint32_t value; - uint8_t mask; -}; - -MALLOC_DEFINE(M_IPFW_HTBL, "ipfw_tbl", "IpFw tables"); - -int add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen, uint32_t value); -int new_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr); -int del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen); -int new_flush_table(struct ip_fw_chain *ch, uint16_t tbl); -int flush_table(struct ip_fw_chain *ch, uint16_t tbl); -int lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val); -int new_count_table_entry(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); -int count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt); -int new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl); -int dump_table(struct ip_fw_chain *ch, ipfw_table *tbl); -int init_tables(struct ip_fw_chain *ch); - -/* hash and compare functions for 32-bit entries */ -static uint32_t -simple_hash32(const void *key, uint32_t size) -{ - uint32_t ret = *(const uint32_t *)key % size; - - return ret; -} - -static int -cmp_func32(const void *key1, const void *key2, int sz) -{ - int k1 = *(const int *)key1; - int k2 = *(const int *)key2; - int ret; - - if (k1 < k2) - ret = -1; - else if (k1 > k2) - ret = 1; - else - ret = 0; - - return ret; -} - -int -add_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen, uint32_t value) -{ - /* TODO: - * - Search the correct hash table (tbl - IPFW_TABLES_MAX) - * - Search if the entry already exists - * - Insert the new entry in the table - * - Possibly reallocate the table if it is too small - */ - - struct t_o obj; - int ret; - int i = tbl - IPFW_TABLES_MAX; - int size = 128; - int obj_size = sizeof(struct t_o); - - if (i < 0 || i > size-1) /* wrong table number */ - return 1; - if (ch->global_tables[i] == NULL) { - ch->global_tables[i] = new_table_init(size, obj_size, - simple_hash32, cmp_func32, M_IPFW_HTBL); - } - - obj.addr = addr; - obj.value = value; - obj.mask = mlen; - - /* Insert the object in the table */ - ret = new_table_insert_obj(ch->global_tables[i], &obj); - return ret; -} - -int -new_del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr) -{ - int ret; - int nr = tbl - IPFW_TABLES_MAX; - - ret = new_table_delete_obj(ch->global_tables[nr], &addr); - - return ret; -} - -int -del_table_entry(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint8_t mlen) -{ - if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { - new_del_table_entry(ch, tbl, addr); - return 0; - } - return (EINVAL); -} - -int -new_flush_table(struct ip_fw_chain *ch, uint16_t tbl) -{ - new_table_destroy(ch->global_tables[tbl - IPFW_TABLES_MAX]); - return 0; -} - -int -flush_table(struct ip_fw_chain *ch, uint16_t tbl) -{ - if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) - return new_flush_table(ch, tbl); - - return (EINVAL); -} - -int -lookup_table(struct ip_fw_chain *ch, uint16_t tbl, in_addr_t addr, - uint32_t *val) -{ - if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { - struct new_hash_table *h; - const struct t_o *obj; - - h = ch->global_tables[tbl - IPFW_TABLES_MAX]; - - obj = new_table_extract_obj(h, (void *)&addr); - if (obj == NULL) - return 0; /* no match */ - - *val = obj->value; - return 1; /* match */ - } - return 0; -} - -int -new_count_table_entry(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) -{ - *cnt = new_table_get_element(ch->global_tables[tbl - IPFW_TABLES_MAX]); - return 0; -} - -int -count_table(struct ip_fw_chain *ch, uint32_t tbl, uint32_t *cnt) -{ - if (tbl >= IPFW_TABLES_MAX && tbl < IPFW_NEWTABLES_MAX) { - new_count_table_entry(ch, tbl, cnt); - return (0); - } - return (EINVAL); -} - -int -new_dump_table_entry(struct ip_fw_chain *ch, ipfw_table *tbl) -{ - /* fill the tbl with all entryes */ - ipfw_table_entry *ent; - const struct t_o *obj; - int i; - int n_el; - int nr = tbl->tbl - IPFW_TABLES_MAX; - struct new_hash_table *t = ch->global_tables[nr]; - - i = 0; - tbl->cnt = 0; - - /* XXX determine tbl->size */ - n_el = new_table_get_element(t); - obj = NULL; - for (; n_el > 0; n_el--) { - obj = table_next(t, obj); - if (obj == NULL) - break; - ent = &tbl->ent[tbl->cnt]; - - ent->addr = obj->addr; - ent->value = obj->value; - ent->masklen = obj->mask; - tbl->cnt++; - } - return 0; -} - -int -dump_table(struct ip_fw_chain *ch, ipfw_table *tbl) -{ - if (tbl->tbl >= IPFW_TABLES_MAX && tbl->tbl < IPFW_NEWTABLES_MAX) { - new_dump_table_entry(ch, tbl); - return (0); - } - return (EINVAL); -} - -int -init_tables(struct ip_fw_chain *ch) -{ - - int i; - /* Initialize new tables XXXMPD */ - for (i = 0; i < IPFW_NEWTABLES_MAX - IPFW_TABLES_MAX; i++) { - memset(&ch->global_tables[i], sizeof(struct new_hash_table*), 0); - } - - return (0); -} diff --git a/dummynet/radix.c b/dummynet/radix.c deleted file mode 100644 index 639a561..0000000 --- a/dummynet/radix.c +++ /dev/null @@ -1,1186 +0,0 @@ -/*- - * Copyright (c) 1988, 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * @(#)radix.c 8.5 (Berkeley) 5/19/95 - * $FreeBSD: head/sys/net/radix.c 200354 2009-12-10 10:34:30Z luigi $ - */ - -/* - * Routines to build and maintain radix trees for routing lookups. - */ -#include -#ifdef _KERNEL -#include -#include -#include -#include -#include -#include -#include -#include -#include "opt_mpath.h" -#ifdef RADIX_MPATH -#include -#endif -#else /* !_KERNEL */ -#include -#include -#include -#define log(x, arg...) fprintf(stderr, ## arg) -#define panic(x) fprintf(stderr, "PANIC: %s", x), exit(1) -#define min(a, b) ((a) < (b) ? (a) : (b) ) -#include "include/net/radix.h" -#endif /* !_KERNEL */ - -static int rn_walktree_from(struct radix_node_head *h, void *a, void *m, - walktree_f_t *f, void *w); -static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); -static struct radix_node - *rn_insert(void *, struct radix_node_head *, int *, - struct radix_node [2]), - *rn_newpair(void *, int, struct radix_node[2]), - *rn_search(void *, struct radix_node *), - *rn_search_m(void *, struct radix_node *, void *); - -static int max_keylen; -static struct radix_mask *rn_mkfreelist; -static struct radix_node_head *mask_rnhead; -/* - * Work area -- the following point to 3 buffers of size max_keylen, - * allocated in this order in a block of memory malloc'ed by rn_init. - * rn_zeros, rn_ones are set in rn_init and used in readonly afterwards. - * addmask_key is used in rn_addmask in rw mode and not thread-safe. - */ -static char *rn_zeros, *rn_ones, *addmask_key; - -#define MKGet(m) { \ - if (rn_mkfreelist) { \ - m = rn_mkfreelist; \ - rn_mkfreelist = (m)->rm_mklist; \ - } else \ - R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); } - -#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);} - -#define rn_masktop (mask_rnhead->rnh_treetop) - -static int rn_lexobetter(void *m_arg, void *n_arg); -static struct radix_mask * - rn_new_radix_mask(struct radix_node *tt, - struct radix_mask *next); -static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, - int skip); - -/* - * The data structure for the keys is a radix tree with one way - * branching removed. The index rn_bit at an internal node n represents a bit - * position to be tested. The tree is arranged so that all descendants - * of a node n have keys whose bits all agree up to position rn_bit - 1. - * (We say the index of n is rn_bit.) - * - * There is at least one descendant which has a one bit at position rn_bit, - * and at least one with a zero there. - * - * A route is determined by a pair of key and mask. We require that the - * bit-wise logical and of the key and mask to be the key. - * We define the index of a route to associated with the mask to be - * the first bit number in the mask where 0 occurs (with bit number 0 - * representing the highest order bit). - * - * We say a mask is normal if every bit is 0, past the index of the mask. - * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, - * and m is a normal mask, then the route applies to every descendant of n. - * If the index(m) < rn_bit, this implies the trailing last few bits of k - * before bit b are all 0, (and hence consequently true of every descendant - * of n), so the route applies to all descendants of the node as well. - * - * Similar logic shows that a non-normal mask m such that - * index(m) <= index(n) could potentially apply to many children of n. - * Thus, for each non-host route, we attach its mask to a list at an internal - * node as high in the tree as we can go. - * - * The present version of the code makes use of normal routes in short- - * circuiting an explict mask and compare operation when testing whether - * a key satisfies a normal route, and also in remembering the unique leaf - * that governs a subtree. - */ - -/* - * Most of the functions in this code assume that the key/mask arguments - * are sockaddr-like structures, where the first byte is an u_char - * indicating the size of the entire structure. - * - * To make the assumption more explicit, we use the LEN() macro to access - * this field. It is safe to pass an expression with side effects - * to LEN() as the argument is evaluated only once. - * We cast the result to int as this is the dominant usage. - */ -#define LEN(x) ( (int) (*(const u_char *)(x)) ) - -/* - * XXX THIS NEEDS TO BE FIXED - * In the code, pointers to keys and masks are passed as either - * 'void *' (because callers use to pass pointers of various kinds), or - * 'caddr_t' (which is fine for pointer arithmetics, but not very - * clean when you dereference it to access data). Furthermore, caddr_t - * is really 'char *', while the natural type to operate on keys and - * masks would be 'u_char'. This mismatch require a lot of casts and - * intermediate variables to adapt types that clutter the code. - */ - -/* - * Search a node in the tree matching the key. - */ -static struct radix_node * -rn_search(v_arg, head) - void *v_arg; - struct radix_node *head; -{ - register struct radix_node *x; - register caddr_t v; - - for (x = head, v = v_arg; x->rn_bit >= 0;) { - if (x->rn_bmask & v[x->rn_offset]) - x = x->rn_right; - else - x = x->rn_left; - } - return (x); -} - -/* - * Same as above, but with an additional mask. - * XXX note this function is used only once. - */ -static struct radix_node * -rn_search_m(v_arg, head, m_arg) - struct radix_node *head; - void *v_arg, *m_arg; -{ - register struct radix_node *x; - register caddr_t v = v_arg, m = m_arg; - - for (x = head; x->rn_bit >= 0;) { - if ((x->rn_bmask & m[x->rn_offset]) && - (x->rn_bmask & v[x->rn_offset])) - x = x->rn_right; - else - x = x->rn_left; - } - return x; -} - -int -rn_refines(m_arg, n_arg) - void *m_arg, *n_arg; -{ - register caddr_t m = m_arg, n = n_arg; - register caddr_t lim, lim2 = lim = n + LEN(n); - int longer = LEN(n++) - LEN(m++); - int masks_are_equal = 1; - - if (longer > 0) - lim -= longer; - while (n < lim) { - if (*n & ~(*m)) - return 0; - if (*n++ != *m++) - masks_are_equal = 0; - } - while (n < lim2) - if (*n++) - return 0; - if (masks_are_equal && (longer < 0)) - for (lim2 = m - longer; m < lim2; ) - if (*m++) - return 1; - return (!masks_are_equal); -} - -struct radix_node * -rn_lookup(v_arg, m_arg, head) - void *v_arg, *m_arg; - struct radix_node_head *head; -{ - register struct radix_node *x; - caddr_t netmask = 0; - - if (m_arg) { - x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); - if (x == 0) - return (0); - netmask = x->rn_key; - } - x = rn_match(v_arg, head); - if (x && netmask) { - while (x && x->rn_mask != netmask) - x = x->rn_dupedkey; - } - return x; -} - -static int -rn_satisfies_leaf(trial, leaf, skip) - char *trial; - register struct radix_node *leaf; - int skip; -{ - register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; - char *cplim; - int length = min(LEN(cp), LEN(cp2)); - - if (cp3 == NULL) - cp3 = rn_ones; - else - length = min(length, LEN(cp3)); - cplim = cp + length; cp3 += skip; cp2 += skip; - for (cp += skip; cp < cplim; cp++, cp2++, cp3++) - if ((*cp ^ *cp2) & *cp3) - return 0; - return 1; -} - -struct radix_node * -rn_match(v_arg, head) - void *v_arg; - struct radix_node_head *head; -{ - caddr_t v = v_arg; - register struct radix_node *t = head->rnh_treetop, *x; - register caddr_t cp = v, cp2; - caddr_t cplim; - struct radix_node *saved_t, *top = t; - int off = t->rn_offset, vlen = LEN(cp), matched_off; - register int test, b, rn_bit; - - /* - * Open code rn_search(v, top) to avoid overhead of extra - * subroutine call. - */ - for (; t->rn_bit >= 0; ) { - if (t->rn_bmask & cp[t->rn_offset]) - t = t->rn_right; - else - t = t->rn_left; - } - /* - * See if we match exactly as a host destination - * or at least learn how many bits match, for normal mask finesse. - * - * It doesn't hurt us to limit how many bytes to check - * to the length of the mask, since if it matches we had a genuine - * match and the leaf we have is the most specific one anyway; - * if it didn't match with a shorter length it would fail - * with a long one. This wins big for class B&C netmasks which - * are probably the most common case... - */ - if (t->rn_mask) - vlen = *(u_char *)t->rn_mask; - cp += off; cp2 = t->rn_key + off; cplim = v + vlen; - for (; cp < cplim; cp++, cp2++) - if (*cp != *cp2) - goto on1; - /* - * This extra grot is in case we are explicitly asked - * to look up the default. Ugh! - * - * Never return the root node itself, it seems to cause a - * lot of confusion. - */ - if (t->rn_flags & RNF_ROOT) - t = t->rn_dupedkey; - return t; -on1: - test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ - for (b = 7; (test >>= 1) > 0;) - b--; - matched_off = cp - v; - b += matched_off << 3; - rn_bit = -1 - b; - /* - * If there is a host route in a duped-key chain, it will be first. - */ - if ((saved_t = t)->rn_mask == 0) - t = t->rn_dupedkey; - for (; t; t = t->rn_dupedkey) - /* - * Even if we don't match exactly as a host, - * we may match if the leaf we wound up at is - * a route to a net. - */ - if (t->rn_flags & RNF_NORMAL) { - if (rn_bit <= t->rn_bit) - return t; - } else if (rn_satisfies_leaf(v, t, matched_off)) - return t; - t = saved_t; - /* start searching up the tree */ - do { - register struct radix_mask *m; - t = t->rn_parent; - m = t->rn_mklist; - /* - * If non-contiguous masks ever become important - * we can restore the masking and open coding of - * the search and satisfaction test and put the - * calculation of "off" back before the "do". - */ - while (m) { - if (m->rm_flags & RNF_NORMAL) { - if (rn_bit <= m->rm_bit) - return (m->rm_leaf); - } else { - off = min(t->rn_offset, matched_off); - x = rn_search_m(v, t, m->rm_mask); - while (x && x->rn_mask != m->rm_mask) - x = x->rn_dupedkey; - if (x && rn_satisfies_leaf(v, x, off)) - return x; - } - m = m->rm_mklist; - } - } while (t != top); - return 0; -} - -#ifdef RN_DEBUG -int rn_nodenum; -struct radix_node *rn_clist; -int rn_saveinfo; -int rn_debug = 1; -#endif - -/* - * Whenever we add a new leaf to the tree, we also add a parent node, - * so we allocate them as an array of two elements: the first one must be - * the leaf (see RNTORT() in route.c), the second one is the parent. - * This routine initializes the relevant fields of the nodes, so that - * the leaf is the left child of the parent node, and both nodes have - * (almost) all fields filled as appropriate. - * (XXX some fields are left unset, see the '#if 0' section). - * The function returns a pointer to the parent node. - */ - -static struct radix_node * -rn_newpair(v, b, nodes) - void *v; - int b; - struct radix_node nodes[2]; -{ - register struct radix_node *tt = nodes, *t = tt + 1; - t->rn_bit = b; - t->rn_bmask = 0x80 >> (b & 7); - t->rn_left = tt; - t->rn_offset = b >> 3; - -#if 0 /* XXX perhaps we should fill these fields as well. */ - t->rn_parent = t->rn_right = NULL; - - tt->rn_mask = NULL; - tt->rn_dupedkey = NULL; - tt->rn_bmask = 0; -#endif - tt->rn_bit = -1; - tt->rn_key = (caddr_t)v; - tt->rn_parent = t; - tt->rn_flags = t->rn_flags = RNF_ACTIVE; - tt->rn_mklist = t->rn_mklist = 0; -#ifdef RN_DEBUG - tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; - tt->rn_twin = t; - tt->rn_ybro = rn_clist; - rn_clist = tt; -#endif - return t; -} - -static struct radix_node * -rn_insert(v_arg, head, dupentry, nodes) - void *v_arg; - struct radix_node_head *head; - int *dupentry; - struct radix_node nodes[2]; -{ - caddr_t v = v_arg; - struct radix_node *top = head->rnh_treetop; - int head_off = top->rn_offset, vlen = LEN(v); - register struct radix_node *t = rn_search(v_arg, top); - register caddr_t cp = v + head_off; - register int b; - struct radix_node *tt; - /* - * Find first bit at which v and t->rn_key differ - */ - { - register caddr_t cp2 = t->rn_key + head_off; - register int cmp_res; - caddr_t cplim = v + vlen; - - while (cp < cplim) - if (*cp2++ != *cp++) - goto on1; - *dupentry = 1; - return t; -on1: - *dupentry = 0; - cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; - for (b = (cp - v) << 3; cmp_res; b--) - cmp_res >>= 1; - } - { - register struct radix_node *p, *x = top; - cp = v; - do { - p = x; - if (cp[x->rn_offset] & x->rn_bmask) - x = x->rn_right; - else - x = x->rn_left; - } while (b > (unsigned) x->rn_bit); - /* x->rn_bit < b && x->rn_bit >= 0 */ -#ifdef RN_DEBUG - if (rn_debug) - log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); -#endif - t = rn_newpair(v_arg, b, nodes); - tt = t->rn_left; - if ((cp[p->rn_offset] & p->rn_bmask) == 0) - p->rn_left = t; - else - p->rn_right = t; - x->rn_parent = t; - t->rn_parent = p; /* frees x, p as temp vars below */ - if ((cp[t->rn_offset] & t->rn_bmask) == 0) { - t->rn_right = x; - } else { - t->rn_right = tt; - t->rn_left = x; - } -#ifdef RN_DEBUG - if (rn_debug) - log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); -#endif - } - return (tt); -} - -struct radix_node * -rn_addmask(n_arg, search, skip) - int search, skip; - void *n_arg; -{ - caddr_t netmask = (caddr_t)n_arg; - register struct radix_node *x; - register caddr_t cp, cplim; - register int b = 0, mlen, j; - int maskduplicated, m0, isnormal; - struct radix_node *saved_x; - static int last_zeroed = 0; - - if ((mlen = LEN(netmask)) > max_keylen) - mlen = max_keylen; - if (skip == 0) - skip = 1; - if (mlen <= skip) - return (mask_rnhead->rnh_nodes); - if (skip > 1) - bcopy(rn_ones + 1, addmask_key + 1, skip - 1); - if ((m0 = mlen) > skip) - bcopy(netmask + skip, addmask_key + skip, mlen - skip); - /* - * Trim trailing zeroes. - */ - for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) - cp--; - mlen = cp - addmask_key; - if (mlen <= skip) { - if (m0 >= last_zeroed) - last_zeroed = mlen; - return (mask_rnhead->rnh_nodes); - } - if (m0 < last_zeroed) - bzero(addmask_key + m0, last_zeroed - m0); - *addmask_key = last_zeroed = mlen; - x = rn_search(addmask_key, rn_masktop); - if (bcmp(addmask_key, x->rn_key, mlen) != 0) - x = 0; - if (x || search) - return (x); - R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); - if ((saved_x = x) == 0) - return (0); - netmask = cp = (caddr_t)(x + 2); - bcopy(addmask_key, cp, mlen); - x = rn_insert(cp, mask_rnhead, &maskduplicated, x); - if (maskduplicated) { - log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); - Free(saved_x); - return (x); - } - /* - * Calculate index of mask, and check for normalcy. - * First find the first byte with a 0 bit, then if there are - * more bits left (remember we already trimmed the trailing 0's), - * the pattern must be one of those in normal_chars[], or we have - * a non-contiguous mask. - */ - cplim = netmask + mlen; - isnormal = 1; - for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) - cp++; - if (cp != cplim) { - static char normal_chars[] = { - 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; - - for (j = 0x80; (j & *cp) != 0; j >>= 1) - b++; - if (*cp != normal_chars[b] || cp != (cplim - 1)) - isnormal = 0; - } - b += (cp - netmask) << 3; - x->rn_bit = -1 - b; - if (isnormal) - x->rn_flags |= RNF_NORMAL; - return (x); -} - -static int /* XXX: arbitrary ordering for non-contiguous masks */ -rn_lexobetter(m_arg, n_arg) - void *m_arg, *n_arg; -{ - register u_char *mp = m_arg, *np = n_arg, *lim; - - if (LEN(mp) > LEN(np)) - return 1; /* not really, but need to check longer one first */ - if (LEN(mp) == LEN(np)) - for (lim = mp + LEN(mp); mp < lim;) - if (*mp++ > *np++) - return 1; - return 0; -} - -static struct radix_mask * -rn_new_radix_mask(tt, next) - register struct radix_node *tt; - register struct radix_mask *next; -{ - register struct radix_mask *m; - - MKGet(m); - if (m == 0) { - log(LOG_ERR, "Mask for route not entered\n"); - return (0); - } - bzero(m, sizeof *m); - m->rm_bit = tt->rn_bit; - m->rm_flags = tt->rn_flags; - if (tt->rn_flags & RNF_NORMAL) - m->rm_leaf = tt; - else - m->rm_mask = tt->rn_mask; - m->rm_mklist = next; - tt->rn_mklist = m; - return m; -} - -struct radix_node * -rn_addroute(v_arg, n_arg, head, treenodes) - void *v_arg, *n_arg; - struct radix_node_head *head; - struct radix_node treenodes[2]; -{ - caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; - register struct radix_node *t, *x = 0, *tt; - struct radix_node *saved_tt, *top = head->rnh_treetop; - short b = 0, b_leaf = 0; - int keyduplicated; - caddr_t mmask; - struct radix_mask *m, **mp; - - /* - * In dealing with non-contiguous masks, there may be - * many different routes which have the same mask. - * We will find it useful to have a unique pointer to - * the mask to speed avoiding duplicate references at - * nodes and possibly save time in calculating indices. - */ - if (netmask) { - if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) - return (0); - b_leaf = x->rn_bit; - b = -1 - x->rn_bit; - netmask = x->rn_key; - } - /* - * Deal with duplicated keys: attach node to previous instance - */ - saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); - if (keyduplicated) { - for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { -#ifdef RADIX_MPATH - /* permit multipath, if enabled for the family */ - if (rn_mpath_capable(head) && netmask == tt->rn_mask) { - /* - * go down to the end of multipaths, so that - * new entry goes into the end of rn_dupedkey - * chain. - */ - do { - t = tt; - tt = tt->rn_dupedkey; - } while (tt && t->rn_mask == tt->rn_mask); - break; - } -#endif - if (tt->rn_mask == netmask) - return (0); - if (netmask == 0 || - (tt->rn_mask && - ((b_leaf < tt->rn_bit) /* index(netmask) > node */ - || rn_refines(netmask, tt->rn_mask) - || rn_lexobetter(netmask, tt->rn_mask)))) - break; - } - /* - * If the mask is not duplicated, we wouldn't - * find it among possible duplicate key entries - * anyway, so the above test doesn't hurt. - * - * We sort the masks for a duplicated key the same way as - * in a masklist -- most specific to least specific. - * This may require the unfortunate nuisance of relocating - * the head of the list. - * - * We also reverse, or doubly link the list through the - * parent pointer. - */ - if (tt == saved_tt) { - struct radix_node *xx = x; - /* link in at head of list */ - (tt = treenodes)->rn_dupedkey = t; - tt->rn_flags = t->rn_flags; - tt->rn_parent = x = t->rn_parent; - t->rn_parent = tt; /* parent */ - if (x->rn_left == t) - x->rn_left = tt; - else - x->rn_right = tt; - saved_tt = tt; x = xx; - } else { - (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; - t->rn_dupedkey = tt; - tt->rn_parent = t; /* parent */ - if (tt->rn_dupedkey) /* parent */ - tt->rn_dupedkey->rn_parent = tt; /* parent */ - } -#ifdef RN_DEBUG - t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; - tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; -#endif - tt->rn_key = (caddr_t) v; - tt->rn_bit = -1; - tt->rn_flags = RNF_ACTIVE; - } - /* - * Put mask in tree. - */ - if (netmask) { - tt->rn_mask = netmask; - tt->rn_bit = x->rn_bit; - tt->rn_flags |= x->rn_flags & RNF_NORMAL; - } - t = saved_tt->rn_parent; - if (keyduplicated) - goto on2; - b_leaf = -1 - t->rn_bit; - if (t->rn_right == saved_tt) - x = t->rn_left; - else - x = t->rn_right; - /* Promote general routes from below */ - if (x->rn_bit < 0) { - for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) - if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { - *mp = m = rn_new_radix_mask(x, 0); - if (m) - mp = &m->rm_mklist; - } - } else if (x->rn_mklist) { - /* - * Skip over masks whose index is > that of new node - */ - for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) - if (m->rm_bit >= b_leaf) - break; - t->rn_mklist = m; *mp = 0; - } -on2: - /* Add new route to highest possible ancestor's list */ - if ((netmask == 0) || (b > t->rn_bit )) - return tt; /* can't lift at all */ - b_leaf = tt->rn_bit; - do { - x = t; - t = t->rn_parent; - } while (b <= t->rn_bit && x != top); - /* - * Search through routes associated with node to - * insert new route according to index. - * Need same criteria as when sorting dupedkeys to avoid - * double loop on deletion. - */ - for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { - if (m->rm_bit < b_leaf) - continue; - if (m->rm_bit > b_leaf) - break; - if (m->rm_flags & RNF_NORMAL) { - mmask = m->rm_leaf->rn_mask; - if (tt->rn_flags & RNF_NORMAL) { - log(LOG_ERR, - "Non-unique normal route, mask not entered\n"); - return tt; - } - } else - mmask = m->rm_mask; - if (mmask == netmask) { - m->rm_refs++; - tt->rn_mklist = m; - return tt; - } - if (rn_refines(netmask, mmask) - || rn_lexobetter(netmask, mmask)) - break; - } - *mp = rn_new_radix_mask(tt, *mp); - return tt; -} - -struct radix_node * -rn_delete(v_arg, netmask_arg, head) - void *v_arg, *netmask_arg; - struct radix_node_head *head; -{ - register struct radix_node *t, *p, *x, *tt; - struct radix_mask *m, *saved_m, **mp; - struct radix_node *dupedkey, *saved_tt, *top; - caddr_t v, netmask; - int b, head_off, vlen; - - v = v_arg; - netmask = netmask_arg; - x = head->rnh_treetop; - tt = rn_search(v, x); - head_off = x->rn_offset; - vlen = LEN(v); - saved_tt = tt; - top = x; - if (tt == 0 || - bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) - return (0); - /* - * Delete our route from mask lists. - */ - if (netmask) { - if ((x = rn_addmask(netmask, 1, head_off)) == 0) - return (0); - netmask = x->rn_key; - while (tt->rn_mask != netmask) - if ((tt = tt->rn_dupedkey) == 0) - return (0); - } - if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) - goto on1; - if (tt->rn_flags & RNF_NORMAL) { - if (m->rm_leaf != tt || m->rm_refs > 0) { - log(LOG_ERR, "rn_delete: inconsistent annotation\n"); - return 0; /* dangling ref could cause disaster */ - } - } else { - if (m->rm_mask != tt->rn_mask) { - log(LOG_ERR, "rn_delete: inconsistent annotation\n"); - goto on1; - } - if (--m->rm_refs >= 0) - goto on1; - } - b = -1 - tt->rn_bit; - t = saved_tt->rn_parent; - if (b > t->rn_bit) - goto on1; /* Wasn't lifted at all */ - do { - x = t; - t = t->rn_parent; - } while (b <= t->rn_bit && x != top); - for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) - if (m == saved_m) { - *mp = m->rm_mklist; - MKFree(m); - break; - } - if (m == 0) { - log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); - if (tt->rn_flags & RNF_NORMAL) - return (0); /* Dangling ref to us */ - } -on1: - /* - * Eliminate us from tree - */ - if (tt->rn_flags & RNF_ROOT) - return (0); -#ifdef RN_DEBUG - /* Get us out of the creation list */ - for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} - if (t) t->rn_ybro = tt->rn_ybro; -#endif - t = tt->rn_parent; - dupedkey = saved_tt->rn_dupedkey; - if (dupedkey) { - /* - * Here, tt is the deletion target and - * saved_tt is the head of the dupekey chain. - */ - if (tt == saved_tt) { - /* remove from head of chain */ - x = dupedkey; x->rn_parent = t; - if (t->rn_left == tt) - t->rn_left = x; - else - t->rn_right = x; - } else { - /* find node in front of tt on the chain */ - for (x = p = saved_tt; p && p->rn_dupedkey != tt;) - p = p->rn_dupedkey; - if (p) { - p->rn_dupedkey = tt->rn_dupedkey; - if (tt->rn_dupedkey) /* parent */ - tt->rn_dupedkey->rn_parent = p; - /* parent */ - } else log(LOG_ERR, "rn_delete: couldn't find us\n"); - } - t = tt + 1; - if (t->rn_flags & RNF_ACTIVE) { -#ifndef RN_DEBUG - *++x = *t; - p = t->rn_parent; -#else - b = t->rn_info; - *++x = *t; - t->rn_info = b; - p = t->rn_parent; -#endif - if (p->rn_left == t) - p->rn_left = x; - else - p->rn_right = x; - x->rn_left->rn_parent = x; - x->rn_right->rn_parent = x; - } - goto out; - } - if (t->rn_left == tt) - x = t->rn_right; - else - x = t->rn_left; - p = t->rn_parent; - if (p->rn_right == t) - p->rn_right = x; - else - p->rn_left = x; - x->rn_parent = p; - /* - * Demote routes attached to us. - */ - if (t->rn_mklist) { - if (x->rn_bit >= 0) { - for (mp = &x->rn_mklist; (m = *mp);) - mp = &m->rm_mklist; - *mp = t->rn_mklist; - } else { - /* If there are any key,mask pairs in a sibling - duped-key chain, some subset will appear sorted - in the same order attached to our mklist */ - for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) - if (m == x->rn_mklist) { - struct radix_mask *mm = m->rm_mklist; - x->rn_mklist = 0; - if (--(m->rm_refs) < 0) - MKFree(m); - m = mm; - } - if (m) - log(LOG_ERR, - "rn_delete: Orphaned Mask %p at %p\n", - (void *)m, (void *)x); - } - } - /* - * We may be holding an active internal node in the tree. - */ - x = tt + 1; - if (t != x) { -#ifndef RN_DEBUG - *t = *x; -#else - b = t->rn_info; - *t = *x; - t->rn_info = b; -#endif - t->rn_left->rn_parent = t; - t->rn_right->rn_parent = t; - p = x->rn_parent; - if (p->rn_left == x) - p->rn_left = t; - else - p->rn_right = t; - } -out: - tt->rn_flags &= ~RNF_ACTIVE; - tt[1].rn_flags &= ~RNF_ACTIVE; - return (tt); -} - -/* - * This is the same as rn_walktree() except for the parameters and the - * exit. - */ -static int -rn_walktree_from(h, a, m, f, w) - struct radix_node_head *h; - void *a, *m; - walktree_f_t *f; - void *w; -{ - int error; - struct radix_node *base, *next; - u_char *xa = (u_char *)a; - u_char *xm = (u_char *)m; - register struct radix_node *rn, *last = 0 /* shut up gcc */; - int stopping = 0; - int lastb; - - /* - * rn_search_m is sort-of-open-coded here. We cannot use the - * function because we need to keep track of the last node seen. - */ - /* printf("about to search\n"); */ - for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { - last = rn; - /* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n", - rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */ - if (!(rn->rn_bmask & xm[rn->rn_offset])) { - break; - } - if (rn->rn_bmask & xa[rn->rn_offset]) { - rn = rn->rn_right; - } else { - rn = rn->rn_left; - } - } - /* printf("done searching\n"); */ - - /* - * Two cases: either we stepped off the end of our mask, - * in which case last == rn, or we reached a leaf, in which - * case we want to start from the last node we looked at. - * Either way, last is the node we want to start from. - */ - rn = last; - lastb = rn->rn_bit; - - /* printf("rn %p, lastb %d\n", rn, lastb);*/ - - /* - * This gets complicated because we may delete the node - * while applying the function f to it, so we need to calculate - * the successor node in advance. - */ - while (rn->rn_bit >= 0) - rn = rn->rn_left; - - while (!stopping) { - /* printf("node %p (%d)\n", rn, rn->rn_bit); */ - base = rn; - /* If at right child go back up, otherwise, go right */ - while (rn->rn_parent->rn_right == rn - && !(rn->rn_flags & RNF_ROOT)) { - rn = rn->rn_parent; - - /* if went up beyond last, stop */ - if (rn->rn_bit <= lastb) { - stopping = 1; - /* printf("up too far\n"); */ - /* - * XXX we should jump to the 'Process leaves' - * part, because the values of 'rn' and 'next' - * we compute will not be used. Not a big deal - * because this loop will terminate, but it is - * inefficient and hard to understand! - */ - } - } - - /* - * At the top of the tree, no need to traverse the right - * half, prevent the traversal of the entire tree in the - * case of default route. - */ - if (rn->rn_parent->rn_flags & RNF_ROOT) - stopping = 1; - - /* Find the next *leaf* since next node might vanish, too */ - for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) - rn = rn->rn_left; - next = rn; - /* Process leaves */ - while ((rn = base) != 0) { - base = rn->rn_dupedkey; - /* printf("leaf %p\n", rn); */ - if (!(rn->rn_flags & RNF_ROOT) - && (error = (*f)(rn, w))) - return (error); - } - rn = next; - - if (rn->rn_flags & RNF_ROOT) { - /* printf("root, stopping"); */ - stopping = 1; - } - - } - return 0; -} - -static int -rn_walktree(h, f, w) - struct radix_node_head *h; - walktree_f_t *f; - void *w; -{ - int error; - struct radix_node *base, *next; - register struct radix_node *rn = h->rnh_treetop; - /* - * This gets complicated because we may delete the node - * while applying the function f to it, so we need to calculate - * the successor node in advance. - */ - - /* First time through node, go left */ - while (rn->rn_bit >= 0) - rn = rn->rn_left; - for (;;) { - base = rn; - /* If at right child go back up, otherwise, go right */ - while (rn->rn_parent->rn_right == rn - && (rn->rn_flags & RNF_ROOT) == 0) - rn = rn->rn_parent; - /* Find the next *leaf* since next node might vanish, too */ - for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) - rn = rn->rn_left; - next = rn; - /* Process leaves */ - while ((rn = base)) { - base = rn->rn_dupedkey; - if (!(rn->rn_flags & RNF_ROOT) - && (error = (*f)(rn, w))) - return (error); - } - rn = next; - if (rn->rn_flags & RNF_ROOT) - return (0); - } - /* NOTREACHED */ -} - -/* - * Allocate and initialize an empty tree. This has 3 nodes, which are - * part of the radix_node_head (in the order ) and are - * marked RNF_ROOT so they cannot be freed. - * The leaves have all-zero and all-one keys, with significant - * bits starting at 'off'. - * Return 1 on success, 0 on error. - */ -int -rn_inithead(head, off) - void **head; - int off; -{ - register struct radix_node_head *rnh; - register struct radix_node *t, *tt, *ttt; - if (*head) - return (1); - R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh)); - if (rnh == 0) - return (0); -#ifdef _KERNEL - RADIX_NODE_HEAD_LOCK_INIT(rnh); -#endif - *head = rnh; - t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); - ttt = rnh->rnh_nodes + 2; - t->rn_right = ttt; - t->rn_parent = t; - tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */ - tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; - tt->rn_bit = -1 - off; - *ttt = *tt; - ttt->rn_key = rn_ones; - rnh->rnh_addaddr = rn_addroute; - rnh->rnh_deladdr = rn_delete; - rnh->rnh_matchaddr = rn_match; - rnh->rnh_lookup = rn_lookup; - rnh->rnh_walktree = rn_walktree; - rnh->rnh_walktree_from = rn_walktree_from; - rnh->rnh_treetop = t; - return (1); -} - -void -rn_init(int maxk) -{ - char *cp, *cplim; - - max_keylen = maxk; - if (max_keylen == 0) { - log(LOG_ERR, - "rn_init: radix functions require max_keylen be set\n"); - return; - } - R_Malloc(rn_zeros, char *, 3 * max_keylen); - if (rn_zeros == NULL) - panic("rn_init"); - bzero(rn_zeros, 3 * max_keylen); - rn_ones = cp = rn_zeros + max_keylen; - addmask_key = cplim = rn_ones + max_keylen; - while (cp < cplim) - *cp++ = -1; - if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0) - panic("rn_init 2"); -} diff --git a/dummynet/test_radix.c b/dummynet/test_radix.c deleted file mode 100644 index b0e37d5..0000000 --- a/dummynet/test_radix.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Test the radix tree net - */ - -#include -#include -#include -#include -#include /* htonl */ -#include "include/net/radix.h" - -struct d { - uint8_t len[4]; - uint32_t data; -}; - -struct table_entry { - struct radix_node rn[2]; - struct d x, mask; - int value; -}; - -static int -del(struct radix_node *rn, void *arg) -{ - struct radix_node_head * const rnh = arg; - struct table_entry *ent; - - ent = (struct table_entry *) - rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh); - fprintf(stderr, "del returns %p\n", ent); - if (0 && ent != NULL) - free(ent); - return (0); -} - -int -list(struct radix_node *rn, void *arg) -{ - struct table_entry *ent = (struct table_entry *)rn; - - fprintf(stderr, "walking on node %d\n", ent->value); - return (0); -} - -static void -print_dt(struct timeval *start, struct timeval *end, int n, const char *msg) -{ - int ds = 0, du, l; - du = end->tv_usec - start->tv_usec; - if (du < 0) { - ds = -1; - du += 1000000; - } - ds += end->tv_sec - start->tv_sec; - if (n <= 1) - n = 1; - l = (ds * 1000000+ du)/n; - fprintf(stderr, "%d tries in %d.%06ds, %dus each\n", - n, ds, du, l); -} - -static void -test1(struct radix_node_head *h, int n) -{ - struct table_entry *p; - struct timeval start, end; - int i; - - p = calloc(n, sizeof(*p)); - if (!p) - return; - for (i=0; i < n; i++) { - p->value = i; - p->x.len[0] = p->mask.len[0] = 8; - p->mask.data = 0xffffffff; - p->x.data = htonl(i); - } - gettimeofday(&start, NULL); - for (i=0; i < n; i++) { - h->rnh_addaddr(&(p->x), &(p->mask), h, (void *)p); - } - gettimeofday(&end, NULL); - print_dt(&start, &end, n, NULL); - h->rnh_walktree(h, del, h); -} - -int -main(int argc, char *argv[]) -{ - struct radix_node_head *h = NULL; - - rn_init(64); // XXX bits or bytes ? - rn_inithead((void **)&h, 32); /* data offset in bits */ - test1(h, 1000000); - return 0; -} diff --git a/dummynet2/Makefile b/dummynet2/Makefile index 2fe1d7b..e51ccb2 100644 --- a/dummynet2/Makefile +++ b/dummynet2/Makefile @@ -53,8 +53,6 @@ ipfw-cflags += -DKERNEL_MODULE # build linux kernel module # the two header trees for empty and override files ipfw-cflags += -I $(M)/include_e ipfw-cflags += -I $(M)/include -# XXX eventually ../dummynet/include will go away -ipfw-cflags += -I $(M)/../dummynet/include ipfw-cflags += -include $(M)/../glue.h # headers ipfw-cflags += -include $(M)/missing.h # headers diff --git a/dummynet/include/net/if.h b/dummynet2/include/net/if.h similarity index 100% rename from dummynet/include/net/if.h rename to dummynet2/include/net/if.h diff --git a/dummynet/include/net/pfil.h b/dummynet2/include/net/pfil.h similarity index 100% rename from dummynet/include/net/pfil.h rename to dummynet2/include/net/pfil.h diff --git a/dummynet/include/net/radix.h b/dummynet2/include/net/radix.h similarity index 100% rename from dummynet/include/net/radix.h rename to dummynet2/include/net/radix.h diff --git a/dummynet/include/netinet/ip.h b/dummynet2/include/netinet/ip.h similarity index 100% rename from dummynet/include/netinet/ip.h rename to dummynet2/include/netinet/ip.h diff --git a/dummynet/include/netinet/ip6.h b/dummynet2/include/netinet/ip6.h similarity index 100% rename from dummynet/include/netinet/ip6.h rename to dummynet2/include/netinet/ip6.h diff --git a/dummynet/include/netinet/ip_icmp.h b/dummynet2/include/netinet/ip_icmp.h similarity index 100% rename from dummynet/include/netinet/ip_icmp.h rename to dummynet2/include/netinet/ip_icmp.h diff --git a/dummynet/include/netinet/tcp.h b/dummynet2/include/netinet/tcp.h similarity index 100% rename from dummynet/include/netinet/tcp.h rename to dummynet2/include/netinet/tcp.h diff --git a/dummynet/include/netinet/tcp_var.h b/dummynet2/include/netinet/tcp_var.h similarity index 100% rename from dummynet/include/netinet/tcp_var.h rename to dummynet2/include/netinet/tcp_var.h diff --git a/dummynet/include/netinet/udp.h b/dummynet2/include/netinet/udp.h similarity index 100% rename from dummynet/include/netinet/udp.h rename to dummynet2/include/netinet/udp.h diff --git a/dummynet/include/sys/cdefs.h b/dummynet2/include/sys/cdefs.h similarity index 100% rename from dummynet/include/sys/cdefs.h rename to dummynet2/include/sys/cdefs.h diff --git a/dummynet/include/sys/kernel.h b/dummynet2/include/sys/kernel.h similarity index 100% rename from dummynet/include/sys/kernel.h rename to dummynet2/include/sys/kernel.h diff --git a/dummynet/include/sys/malloc.h b/dummynet2/include/sys/malloc.h similarity index 100% rename from dummynet/include/sys/malloc.h rename to dummynet2/include/sys/malloc.h diff --git a/dummynet/include/sys/mbuf.h b/dummynet2/include/sys/mbuf.h similarity index 100% rename from dummynet/include/sys/mbuf.h rename to dummynet2/include/sys/mbuf.h diff --git a/dummynet/include/sys/module.h b/dummynet2/include/sys/module.h similarity index 100% rename from dummynet/include/sys/module.h rename to dummynet2/include/sys/module.h diff --git a/dummynet/include/sys/param.h b/dummynet2/include/sys/param.h similarity index 100% rename from dummynet/include/sys/param.h rename to dummynet2/include/sys/param.h diff --git a/dummynet/include/sys/queue.h b/dummynet2/include/sys/queue.h similarity index 100% rename from dummynet/include/sys/queue.h rename to dummynet2/include/sys/queue.h diff --git a/dummynet/include/sys/syslog.h b/dummynet2/include/sys/syslog.h similarity index 100% rename from dummynet/include/sys/syslog.h rename to dummynet2/include/sys/syslog.h diff --git a/dummynet/include/sys/systm.h b/dummynet2/include/sys/systm.h similarity index 100% rename from dummynet/include/sys/systm.h rename to dummynet2/include/sys/systm.h diff --git a/dummynet/include/sys/taskqueue.h b/dummynet2/include/sys/taskqueue.h similarity index 100% rename from dummynet/include/sys/taskqueue.h rename to dummynet2/include/sys/taskqueue.h diff --git a/ipfw/Makefile b/ipfw/Makefile index 7b4a272..9d1efa3 100644 --- a/ipfw/Makefile +++ b/ipfw/Makefile @@ -62,7 +62,7 @@ include/netinet: -@mkdir -p include/netinet -(cd include/netinet; \ for i in ip_fw.h ip_dummynet.h tcp.h; do \ - ln -s ../../../dummynet/include/netinet/$$i; done; ) + ln -s ../../../dummynet2/include/netinet/$$i; done; ) clean distclean: -rm -f $(OBJS) ipfw diff --git a/ipfw/include/sys/sockio.h b/ipfw/include/sys/sockio.h deleted file mode 100644 index e69de29..0000000