This commit was generated by cvs2svn to compensate for changes in r2587,
authorMarc Fiuczynski <mef@cs.princeton.edu>
Fri, 19 Jan 2007 18:46:40 +0000 (18:46 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Fri, 19 Jan 2007 18:46:40 +0000 (18:46 +0000)
which included commits to RCS files with non-trunk default branches.

96 files changed:
ChangeLog
Makefile
README.decnet
doc/actions/actions-general [new file with mode: 0644]
doc/actions/dummy-README [new file with mode: 0644]
doc/actions/mirred-usage
etc/iproute2/ematch_map [new file with mode: 0644]
etc/iproute2/rt_dsfield
etc/iproute2/rt_protos
etc/iproute2/rt_realms
etc/iproute2/rt_scopes
etc/iproute2/rt_tables
examples/README.cbq [new file with mode: 0644]
examples/cbq.init-v0.7.3 [new file with mode: 0644]
include/SNAPSHOT.h
include/iptables.h
include/iptables_common.h
include/linux/inet_diag.h [new file with mode: 0644]
include/linux/ip_mp_alg.h [new file with mode: 0644]
include/linux/netfilter_ipv4/ip_tables.h
include/linux/netlink.h
include/linux/pkt_cls.h
include/linux/pkt_sched.h
include/linux/rtnetlink.h
include/linux/socket.h [new file with mode: 0644]
include/linux/tc_act/tc_defact.h [new file with mode: 0644]
include/linux/tc_ematch/tc_em_cmp.h [new file with mode: 0644]
include/linux/tc_ematch/tc_em_meta.h [new file with mode: 0644]
include/linux/tc_ematch/tc_em_nbyte.h [new file with mode: 0644]
include/linux/tcp.h
include/linux/xfrm.h
include/ll_map.h
include/net/tcp_states.h [new file with mode: 0644]
include/rt_names.h
include/utils.h
ip/Makefile
ip/ip.c
ip/ip_common.h
ip/ipaddress.c
ip/iplink.c
ip/ipmaddr.c
ip/ipmonitor.c
ip/ipmroute.c
ip/ipneigh.c
ip/ipntable.c [new file with mode: 0644]
ip/iproute.c
ip/iprule.c
ip/ipxfrm.c
ip/xfrm.h
ip/xfrm_monitor.c [new file with mode: 0644]
ip/xfrm_policy.c
ip/xfrm_state.c
lib/libnetlink.c
lib/ll_addr.c
lib/ll_map.c
lib/rt_names.c
lib/utils.c
man/man8/ip.8
man/man8/tc-pfifo.8 [new file with mode: 0644]
misc/Makefile
misc/arpd.c
misc/ifstat.c
misc/lnstat.c
misc/lnstat_util.c
misc/nstat.c
misc/rtacct.c
misc/ss.c
netem/Makefile
netem/normal.c
netem/paretonormal.c
tc/Makefile
tc/em_cmp.c [new file with mode: 0644]
tc/em_meta.c [new file with mode: 0644]
tc/em_nbyte.c [new file with mode: 0644]
tc/em_u32.c [new file with mode: 0644]
tc/emp_ematch.l [new file with mode: 0644]
tc/emp_ematch.y [new file with mode: 0644]
tc/f_basic.c [new file with mode: 0644]
tc/f_u32.c
tc/m_ematch.c [new file with mode: 0644]
tc/m_ematch.h [new file with mode: 0644]
tc/m_ipt.c
tc/m_mirred.c
tc/m_pedit.c
tc/q_cbq.c
tc/q_dsmark.c
tc/q_netem.c
tc/tc.c
tc/tc_class.c
tc/tc_qdisc.c
testsuite/Makefile
testsuite/iproute2/Makefile [new file with mode: 0644]
testsuite/lib/generic.sh [new file with mode: 0644]
testsuite/tests/cbq.t [new file with mode: 0644]
testsuite/tests/cls-testbed.t [new file with mode: 0644]
testsuite/tests/dsmark.t [new file with mode: 0644]

index 53bd530..3590a64 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,234 @@
+2006-03-21  Stephen Hemminger  <shemminger@freekitty.pdx.osdl.net>
+
+       * Back out the 2.4 utsname patch
+
+2006-03-21  James Lentini <jlentini@netapp.com>
+       
+       * Increase size of hw address allowed for ip neigh to allow
+         for IB.
+
+2006-03-14  Russell Stuart <russell-lartc@stuart.id.au>
+       
+       * Fix missing memset in tc sample
+       * Fixes for tc hash samples
+       * Add sample divisor
+
+2006-03-10  Alpt <alpt@freaknet.org>
+
+       * Add more rt_proto values
+
+2006-03-10 Dale Sedivec <darkness@caliginous.net>
+       
+       * Warn when using "handle" instead of "classid" with "tc class"
+
+2006-03-10  Jean Tourrilhes <jt@hpl.hp.com>
+
+       * Fix endless loop in netlink error handling
+
+
+2006-03-10  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Change default lnstat count to 1
+       * Update to 2.6.16 headers
+       * Add fake version of include/linux/socket.h to fix warnings
+
+2006-01-12  Patrick McHardy <kaber@trash.net>
+
+       * Handle DCCP in ipxfrm.c to allow using port numbers in the selector.
+
+2006-01-10  Masahide NAKAMURA <nakam@linux-ipv6.org>
+
+       * Add ip link ntable
+
+2006-01-10  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Update headers to santized kernel 2.6.15
+       * Fix ipv6 priority option in u32 
+       
+2006-01-03 Alpt <alpt@freaknet.org>
+
+       * Ip man page addition
+
+2006-01-03  Jamal Hadi Salim <hadi@znyx.com>
+
+       * Documentation for ifb
+
+2005-12-09  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Add corrupt feature to netem
+
+2005-12-02  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Backout ambigious ip command matches
+
+2005-11-22  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Handle ambigious ip command matches
+
+2005-11-22  Patrick McHardy <kaber@trash.net>
+
+       * Add back ip command aliases
+
+2005-11-07  Masahide NAKAMURA <nakam@linux-ipv6.org>
+
+       * Updating for 2.6.14
+       - Show UPD{SA,POLICY} message information from kernel instead of error
+       - Add lengh check of deleting message from kernel
+       - Use macro for struct xfrm_user{sa,policy}_id
+
+       * Minor fix:
+       - Add fflush at the end of normal dump
+
+2005-11-01  Jamal Hadi Salim <hadi@znyx.com>
+
+       * Fix handling of XFRM monitor and state
+
+2005-11-01  Stephen Hemminger  <shemminger@osdl.org
+
+       * Update to 2.6.14 sanitized headers
+
+2005-10-24  Patrick McHardy <kaber@trash.net>
+
+       * Fix ip commnad shortcuts
+
+2005-10-12  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Add more CBQ examples from Fedora Core
+       * Fix buffer overrun in iproute because of bits vs. bytes confusion
+       
+2005-10-12  Jamal Hadi Salim <hadi@znyx.com>
+
+       * Fix ip rule flush, need to reopen rtnl
+
+2005-10-07  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Reenable ip mroute
+
+2005-10-07  Mike Frysinger <vapier@gentoo.org>
+
+       * Handle pfifo_fast that has no qopt without segfaulting
+
+2005-10-05  Mads Martin Joergensen <mmj@suse.de>
+
+       * Trivial netem ccopts
+
+2005-10-04  Jerome Borsboom <j.borsboom@erasmusmc.nl>
+
+       * Fix regression in ip addr (libnetlink) handling
+
+2005-09-21  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Fix uninitialized memory and leaks with valgrind
+         Reported by Redhat
+
+2005-09-01   Mike Frysinger <vapier@gentoo.org>
+
+       * Fix build issues with netem tables (parallel make and HOSTCC)
+       
+2005-09-01  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Integrate support for DCCP into 'ss' (from acme)
+       * Add -batch option to ip.
+       * Update to 2.6.14 headers
+
+2005-09-01  Eric Dumazet <dada1@cosmosbay.com>
+       
+       * Fix lnstat : First column should not be summed
+
+2005-08-16  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Limit ip route flush to 10 rounds.
+       * Cleanup ip rule flush error message
+       
+2005-08-08  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Update to 2.6.13+ kernel headers
+       * Fix array overrun in paretonormal
+       * Fix ematch to not include dropped fields from skb.
+       
+2005-07-14  Thomas Graf <tgraf@suug.ch>
+
+       * Make ematch bison/lex build with common flex
+       
+2005-07-10  Stephen Hemminger  <shemminger@osdl.org>
+       
+       * Fix Gcc 4.0 build warnings signed/unsigned
+
+2005-06-23  Jamal Hadi Salim <hadi@znyx.com>
+
+       * Fix for options process with ipt
+
+2005-06-23  Thomas Graf <tgraf@suug.ch>
+       
+       * Add extended matches (nbyte, cmp, u32, meta)
+       * Add basic classifier
+       * Fix clean/distclean makefile targets
+       * update local header file copies
+       * IPv4 multipath algorithm selection support
+       * cscope Makefile target
+       * Fix off-by-one while generating argument vector
+         in batched mode.
+       * Assume stdin if no argument is given to -batch
+
+2005-06-22  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Update include files to 2.6.12
+       * Add ss support for TCP_CONG
+
+2005-06-13  Steven Whitehouse <steve@chygwyn.com>
+
+       * Decnet doc's update
+
+2005-06-07  Stephen Hemminger  <shemminger@osdl.org>
+
+       * Fix 'ip link' map to handle case where device gets autoloaded
+         by using if_nametoindex as fallback
+       * Device indices are unsigned not int.
+
+2005-06-07   Masahide NAKAMURA <nakam@linux-ipv6.org>
+       
+       * [ip] show timestamp when using '-t' option.
+       * [ip] remove duplicated code for expired message of xfrm.
+       * [ip] add "deleteall" command for xfrm;
+         "flush" uses kernel's flush interface and
+         "deleteall" uses legacy iproute2's flush feature like
+          getting-and-deleting-for-each.
+
+2005-03-30  Stephen Hemminger  <shemminger@osdl.org>
+
+       * include/linux/netfilter_ipv4/ip_tables.h dont include compiler.h
+         because it isn't needed and not on all systems
+       * Update rtnetlink.h and pkt_cls.h to be stripped versions
+         of headers from 2.6.12-rc1
+
+2005-03-30  Jamal Hadi Salim <hadi@znyx.com>
+
+       * Proper verison of iptables headers (from 1.3.1)
+       * Set revision file in m_ipt
+       * Fix action_util naming in mirred
+       * don't call ll_init_map in mirred
+
+2005-03-19  Thomas Graf <tgraf@suug.ch>
+
+       * Warn about wildcard deletions and provide IFA_ADDRESS upon
+         deletions to enforce prefix length validation for IPv4.
+       * Fix netlink message alignment when the last routing attribute added
+         has a data length not aligned to RTA_ALIGNTO.
+       
+2005-03-30  Masahide NAKAMURA <nakam@linux-ipv6.org>
+       
+       * ipv6 xfrm allocspi and monitor support.
+       
+2005-03-29  Stephen Hemminger  <shemminger@osdl.org>
+
+       * switch to stack for netem tables
+
+2005-03-18  Stephen Hemminger  <shemminger@osdl.org>
+
+       * add -force option to batch mode
+       * handle midline comments in batch mode
+       * sum per cpu fields in lnstat correctly
+
 2005-03-14  Stephen Hemminger  <shemminger@osdl.org>
 
        * cleanup batch mode, allow continuation, comments etc.
        * need to call getline() with null for first usage
        * don't overwrite const arg
 
-2005-02-07  Stephen Hemminger  <shemminger@linux.site>
+2005-02-07  Stephen Hemminger  <shemminger@osdl.org>
 
        * Add experimental distribution
 
index 1d11462..ac58cd9 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,6 @@ SBINDIR=/usr/sbin
 CONFDIR=/etc/iproute2
 DOCDIR=/usr/share/doc/iproute2
 MANDIR=/usr/share/man
-KERNEL_INCLUDE=/usr/include
 
 # Path to db_185.h include
 DBM_INCLUDE:=/usr/include
@@ -24,6 +23,7 @@ CC = gcc
 HOSTCC = gcc
 CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall
 CFLAGS = $(CCOPTS) -I../include $(DEFINES)
+YACCFLAGS = -d -t -v
 
 LDLIBS += -L../lib -lnetlink -lutil
 
@@ -36,7 +36,7 @@ all: Config
        do $(MAKE) $(MFLAGS) -C $$i; done
 
 Config:
-       ./configure $(KERNEL_INCLUDE)
+       sh configure $(KERNEL_INCLUDE)
 
 install: all
        install -m 0755 -d $(DESTDIR)$(SBINDIR)
@@ -51,18 +51,22 @@ install: all
        install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR)
        install -m 0755 -d $(DESTDIR)$(MANDIR)/man8
        install -m 0644 $(shell find man/man8 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man8
-       ln -sf $(MANDIR)/man8/tc-pbfifo.8  $(DESTDIR)$(MANDIR)/man8/tc-bfifo.8
-       ln -sf $(MANDIR)/man8/tc-pbfifo.8  $(DESTDIR)$(MANDIR)/man8/tc-pfifo.8
+       ln -sf tc-pbfifo.8  $(DESTDIR)$(MANDIR)/man8/tc-bfifo.8
+       ln -sf tc-pbfifo.8  $(DESTDIR)$(MANDIR)/man8/tc-pfifo.8
        install -m 0755 -d $(DESTDIR)$(MANDIR)/man3
        install -m 0644 $(shell find man/man3 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man3
 
 clean:
+       rm -f cscope.*
        @for i in $(SUBDIRS) doc; \
        do $(MAKE) $(MFLAGS) -C $$i clean; done
 
 clobber: clean
        rm -f Config
 
-distclean: clean clobber
+distclean: clobber
+
+cscope:
+       cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc
 
 .EXPORT_ALL_VARIABLES:
index 4d7453a..4300f90 100644 (file)
@@ -1,41 +1,33 @@
 
 Here are a few quick points about DECnet support...
 
+ o iproute2 is the tool of choice for configuring the DECnet support for
+   Linux. For many features, it is the only tool which can be used to
+   configure them.
+
  o No name resolution is available as yet, all addresses must be
    entered numerically.
 
- o The neighbour cache may well list every entry as having the address
-   0.170. This is due to a problem that I need to sort out kernel side.
-   It is harmless (but don't try and use neigh add yet) just look in
-   /proc/net/decnet_neigh to see the real addresses for now.
+ o Remember to set the hardware address of the interface using: 
+
+   ip link set ethX address xx:xx:xx:xx:xx:xx
+      (where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node
+       address)
 
- o The rtnetlink support in the kernel is rather exprimental, expect a
-   few odd things to happen for the next few DECnet kernel releases.
+   if your Ethernet card won't listen to more than one unicast
+   mac address at once. If the Linux DECnet stack doesn't talk to
+   any other DECnet nodes, then check this with tcpdump and if its
+   a problem, change the mac address (but do this _before_ starting
+   any other network protocol on the interface)
 
  o Whilst you can use ip addr add to add more than one DECnet address to an
    interface, don't expect addresses which are not the same as the
-   kernels node address to work properly. i.e. You will break the DECnet
-   protocol if you do add anything other than the automatically generated
-   interface addresses to ethernet cards. This option is there for future
-   link layer support, where the device will have to be configed for
-   DECnet explicitly.
-
- o The DECnet support is currently self contained. You do not need the
-   libdnet library to use it. In fact until I've sent the dnet_pton and
-   dnet_ntop functions to Patrick to add, you can't use libdnet.
-
- o If you are not using the very latest 2.3.xx series kernels, don't
-   try and list DECnet routes if you've got IPv6 compiled into the
-   kernel. It will oops.
-
- o My main reason for writing the DECnet support for iproute2 was to
-   check out the DECnet routing code, so the route get and
-   route show cache commands are likely to be the most debugged out of
-   all of them.
-
- o If you find bugs in the DECnet support, please send them to me in the
-   first instance, and then I'll send Alexey a patch to fix it. IPv4/6
-   bugs should be sent to Alexey as before.
-
-Steve Whitehouse <SteveW@ACM.org>
+   kernels node address to work properly with 2.4 kernels. This should
+   be fine with 2.6 kernels as the routing code has been extensively
+   modified and improved.
+
+ o The DECnet support is currently self contained. It does not depend on
+   the libdnet library.
+
+Steve Whitehouse <steve@chygwyn.com>
 
diff --git a/doc/actions/actions-general b/doc/actions/actions-general
new file mode 100644 (file)
index 0000000..bb2295d
--- /dev/null
@@ -0,0 +1,254 @@
+
+This documented is slightly dated but should give you idea of how things
+work.
+
+What is it?
+-----------
+
+An extension to the filtering/classification architecture of Linux Traffic
+Control. 
+Up to 2.6.8 the only action that could be "attached" to a filter was policing. 
+i.e you could say something like:
+
+-----
+tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \
+127.0.0.1/32 flowid 1:1 police mtu 4000 rate 1500kbit burst 90k
+-----
+
+which implies "if a packet is seen on the ingress of the lo device with
+a source IP address of 127.0.0.1/32 we give it a classification id  of 1:1 and
+we execute a policing action which rate limits its bandwidth utilization 
+to 1.5Mbps".
+
+The new extensions allow for more than just policing actions to be added.
+They are also fully backward compatible. If you have a kernel that doesnt
+understand them, then the effect is null i.e if you have a newer tc
+but older kernel, the actions are not installed. Likewise if you
+have a newer kernel but older tc, obviously the tc will use current
+syntax which will work fine. Of course to get the required effect you need
+both newer tc and kernel. If you are reading this you have the
+right tc ;->
+
+A side effect is that we can now get stateless firewalling to work with tc. 
+Essentially this is now an alternative to iptables.
+I wont go into details of my dislike for iptables at times, but 
+scalability is one of the main issues; however, if you need stateful
+classification - use netfilter (for now).
+
+This stuff works on both ingress and egress qdiscs.
+
+Features
+--------
+
+1) new additional syntax and actions enabled. Note old syntax is still valid.
+
+Essentially this is still the same syntax as tc with a new construct
+"action". The syntax is of the form:
+tc filter add <DEVICE> parent 1:0 protocol ip prio 10 <Filter description>
+flowid 1:1 action <ACTION description>*
+
+You can have as many actions as you want (within sensible reasoning).
+
+In the past the only real action was the policer; i.e you could do something
+along the lines of:
+tc filter add dev lo parent ffff: protocol ip prio 10 u32 \
+match ip src 127.0.0.1/32 flowid 1:1 \
+police mtu 4000 rate 1500kbit burst 90k
+
+Although you can still use the same syntax, now you can say:
+
+tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \
+match ip src 127.0.0.1/32 flowid 1:1 \
+action police mtu 4000 rate 1500kbit burst 90k
+
+" generic Actions" (gact) at the moment are: 
+{ drop, pass, reclassify, continue}
+(If you have others, no listed here give me a reason and we will add them)
++drop says to drop the packet
++pass says to accept it
++reclassify requests for reclassification of the packet
++continue requests for next lookup to match
+
+2)In order to take advantage of some of the targets written by the
+iptables people, a classifier can have a packet being massaged by an
+iptable target. I have only tested with mangler targets up to now.
+(infact anything that is not in the mangling table is disabled right now)
+
+In terms of hooks:
+*ingress is mapped to pre-routing hook
+*egress is mapped to post-routing hook
+I dont see much value in the other hooks, if you see it and email me good
+reasons, the addition is trivial.
+
+Example syntax for iptables targets usage becomes:
+tc filter add ..... u32 <u32 syntax> action ipt -j <iptables target syntax>
+
+example:
+tc filter add dev lo parent ffff: protocol ip prio 8 u32 \
+match ip dst 127.0.0.8/32 flowid 1:12 \
+action ipt -j mark --set-mark 2
+
+3) A feature i call pipe
+The motivation is derived from Unix pipe mechanism but applied to packets.
+Essentially take a matching packet and pass it through 
+action1 | action2 | action3 etc.
+You could do something similar to this with the tc policer and the "continue"
+operator but this rather restricts it to just the policer and requires 
+multiple rules (and lookups, hence quiet inefficient); 
+
+as an example -- and please note that this is just an example _not_ The 
+Word Youve Been Waiting For (yes i have had problems giving examples
+which ended becoming dogma in documents and people modifying them a little
+to look clever); 
+
+i selected the metering rates to be small so that i can show better how 
+things work.
+The script below does the following: 
+- an incoming packet from 10.0.0.21 is first given a firewall mark of 1. 
+
+- It is then metered to make sure it does not exceed its allocated rate of 
+1Kbps. If it doesnt exceed rate, this is where we terminate action execution.
+
+- If it does exceed its rate, its "color" changes to a mark of 2 and it is 
+then passed through a second meter.
+
+-The second meter is shared across all flows on that device [i am suprised 
+that this seems to be not a well know feature of the policer; Bert was telling 
+me that someone was writing a qdisc just to do sharing across multiple devices;
+it must be the summer heat again; weve had someone doing that every year around
+summer  -- the key to sharing is to use a operator "index" in your policer 
+rules (example "index 20"). All your rules have to use the same index to 
+share.]
+-If the second meter is exceeded the color of the flow changes further to 3.
+
+-We then pass the packet to another meter which is shared across all devices
+in the system. If this meter is exceeded we drop the packet.
+
+Note the mark can be used further up the system to do things like policy 
+or more interesting things on the egress.
+
+------------------ cut here -------------------------------
+#
+# Add an ingress qdisc on eth0
+tc qdisc add dev eth0 ingress
+#
+#if you see an incoming packet from 10.0.0.21
+tc filter add dev eth0 parent ffff: protocol ip prio 1 \
+u32 match ip src 10.0.0.21/32 flowid 1:15 \
+#
+# first give it a mark of 1
+action ipt -j mark --set-mark 1 index 2 \
+#
+# then pass it through a policer which allows 1kbps; if the flow
+# doesnt exceed that rate, this is where we stop, if it exceeds we
+# pipe the packet to the next action
+action police rate 1kbit burst 9k pipe \
+#
+# which marks the packet fwmark as 2 and pipes
+action ipt -j mark --set-mark 2 \
+#
+# next attempt to borrow b/width from a meter
+# used across all flows incoming on eth0("index 30")
+# and if that is exceeded we pipe to the next action
+action police index 30 mtu 5000 rate 1kbit burst 10k pipe \
+# mark it as fwmark 3 if exceeded
+action ipt -j mark --set-mark 3 \
+# and then attempt to borrow from a meter used by all devices in the
+# system. Should this be exceeded, drop the packet on the floor.
+action police index 20 mtu 5000 rate 1kbit burst 90k drop
+--------------------------------- 
+
+Now lets see the actions installed with 
+"tc filter show parent ffff: dev eth0"
+
+-------- output -----------
+jroot# tc filter show parent ffff: dev eth0
+filter protocol ip pref 1 u32 
+filter protocol ip pref 1 u32 fh 800: ht divisor 1 
+filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 
+
+   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x1  index 2
+
+   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb 
+
+   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x2  index 1
+
+   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b 
+
+   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x3  index 3
+
+   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b 
+
+  match 0a000015/ffffffff at 12
+-------------------------------
+
+Note the ordering of the actions is based on the order in which we entered
+them. In the future i will add explicit priorities.
+
+Now lets run a ping -f from 10.0.0.21 to this host; stop the ping after
+you see a few lines of dots
+
+----
+[root@jzny hadi]# ping -f  10.0.0.22
+PING 10.0.0.22 (10.0.0.22): 56 data bytes
+....................................................................................................................................................................................................................................................................................................................................................................................................................................................
+--- 10.0.0.22 ping statistics ---
+2248 packets transmitted, 1811 packets received, 19% packet loss
+round-trip min/avg/max = 0.7/9.3/20.1 ms
+-----------------------------
+
+Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0"
+
+--------------
+jroot# tc -s filter show parent ffff: dev eth0
+filter protocol ip pref 1 u32 
+filter protocol ip pref 1 u32 fh 800: ht divisor 1 
+filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
+5 
+
+   action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x1  index 2
+         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0) 
+
+   action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb 
+         Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122) 
+
+   action order 3: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x2  index 1
+         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0) 
+
+   action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b 
+         Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945) 
+
+   action order 5: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x3  index 3
+         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0) 
+
+   action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b 
+         Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437) 
+
+  match 0a000015/ffffffff at 12
+-------------------------------
+
+Neat, eh?
+
+
+Wanna write an action module?
+------------------------------
+Its easy. Either look at the code or send me email. I will document at
+some point; will also accept documentation.
+
+TODO
+----
+
+Lotsa goodies/features coming. Requests also being accepted.
+At the moment the focus has been on getting the architecture in place.
+Expect new things in the spurious time i have to work on this
+(particularly around end of year when i have typically get time off
+from work).
+
diff --git a/doc/actions/dummy-README b/doc/actions/dummy-README
new file mode 100644 (file)
index 0000000..3ef9f21
--- /dev/null
@@ -0,0 +1,155 @@
+
+Advantage over current IMQ; cleaner in particular in in SMP;
+with a _lot_ less code.
+Old Dummy device functionality is preserved while new one only
+kicks in if you use actions.
+
+IMQ USES
+--------
+As far as i know the reasons listed below is why people use IMQ. 
+It would be nice to know of anything else that i missed.
+
+1) qdiscs/policies that are per device as opposed to system wide.
+IMQ allows for sharing.
+
+2) Allows for queueing incoming traffic for shaping instead of
+dropping. I am not aware of any study that shows policing is 
+worse than shaping in achieving the end goal of rate control.
+I would be interested if anyone is experimenting.
+
+3) Very interesting use: if you are serving p2p you may wanna give 
+preference to your own localy originated traffic (when responses come back)
+vs someone using your system to do bittorent. So QoSing based on state
+comes in as the solution. What people did to achive this was stick
+the IMQ somewhere prelocal hook.
+I think this is a pretty neat feature to have in Linux in general.
+(i.e not just for IMQ).
+But i wont go back to putting netfilter hooks in the device to satisfy
+this.  I also dont think its worth it hacking dummy some more to be 
+aware of say L3 info and play ip rule tricks to achieve this.
+--> Instead the plan is to have a contrack related action. This action will
+selectively either query/create contrack state on incoming packets. 
+Packets could then be redirected to dummy based on what happens -> eg 
+on incoming packets; if we find they are of known state we could send to 
+a different queue than one which didnt have existing state. This
+all however is dependent on whatever rules the admin enters.
+
+At the moment this function does not exist yet. I have decided instead
+of sitting on the patch to release it and then if theres pressure i will
+add this feature.
+
+What you can do with dummy currently with actions
+--------------------------------------------------
+
+Lets say you are policing packets from alias 192.168.200.200/32
+you dont want those to exceed 100kbps going out.
+
+tc filter add dev eth0 parent 1: protocol ip prio 10 u32 \
+match ip src 192.168.200.200/32 flowid 1:2 \
+action police rate 100kbit burst 90k drop
+
+If you run tcpdump on eth0 you will see all packets going out
+with src 192.168.200.200/32 dropped or not
+Extend the rule a little to see only the ones that made it out:
+
+tc filter add dev eth0 parent 1: protocol ip prio 10 u32 \
+match ip src 192.168.200.200/32 flowid 1:2 \
+action police rate 10kbit burst 90k drop \
+action mirred egress mirror dev dummy0 
+
+Now fire tcpdump on dummy0 to see only those packets ..
+tcpdump -n -i dummy0 -x -e -t 
+
+Essentially a good debugging/logging interface.
+
+If you replace mirror with redirect, those packets will be
+blackholed and will never make it out. This redirect behavior
+changes with new patch (but not the mirror). 
+
+What you can do with the patch to provide functionality
+that most people use IMQ for below:
+
+--------
+export TC="/sbin/tc"
+
+$TC qdisc add dev dummy0 root handle 1: prio 
+$TC qdisc add dev dummy0 parent 1:1 handle 10: sfq
+$TC qdisc add dev dummy0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000
+$TC qdisc add dev dummy0 parent 1:3 handle 30: sfq                                
+$TC filter add dev dummy0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1
+$TC filter add dev dummy0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2
+
+ifconfig dummy0 up
+
+$TC qdisc add dev eth0 ingress
+
+# redirect all IP packets arriving in eth0 to dummy0 
+# use mark 1 --> puts them onto class 1:1
+$TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \
+match u32 0 0 flowid 1:1 \
+action ipt -j MARK --set-mark 1 \
+action mirred egress redirect dev dummy0
+
+--------
+
+
+Run A Little test:
+
+from another machine ping so that you have packets going into the box:
+-----
+[root@jzny action-tests]# ping 10.22
+PING 10.22 (10.0.0.22): 56 data bytes
+64 bytes from 10.0.0.22: icmp_seq=0 ttl=64 time=2.8 ms
+64 bytes from 10.0.0.22: icmp_seq=1 ttl=64 time=0.6 ms
+64 bytes from 10.0.0.22: icmp_seq=2 ttl=64 time=0.6 ms
+
+--- 10.22 ping statistics ---
+3 packets transmitted, 3 packets received, 0% packet loss
+round-trip min/avg/max = 0.6/1.3/2.8 ms
+[root@jzny action-tests]# 
+-----
+Now look at some stats:
+
+---
+[root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0
+filter protocol ip pref 10 u32 
+filter protocol ip pref 10 u32 fh 800: ht divisor 1 
+filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 
+  match 00000000/00000000 at 0
+        action order 1: tablename: mangle  hook: NF_IP_PRE_ROUTING 
+        target MARK set 0x1  
+        index 1 ref 1 bind 1 installed 4195sec  used 27sec 
+         Sent 252 bytes 3 pkts (dropped 0, overlimits 0) 
+
+        action order 2: mirred (Egress Redirect to device dummy0) stolen
+        index 1 ref 1 bind 1 installed 165 sec used 27 sec
+         Sent 252 bytes 3 pkts (dropped 0, overlimits 0) 
+
+[root@jmandrake]:~# $TC -s qdisc
+qdisc sfq 30: dev dummy0 limit 128p quantum 1514b 
+ Sent 0 bytes 0 pkts (dropped 0, overlimits 0) 
+qdisc tbf 20: dev dummy0 rate 20Kbit burst 1575b lat 2147.5s 
+ Sent 210 bytes 3 pkts (dropped 0, overlimits 0) 
+qdisc sfq 10: dev dummy0 limit 128p quantum 1514b 
+ Sent 294 bytes 3 pkts (dropped 0, overlimits 0) 
+qdisc prio 1: dev dummy0 bands 3 priomap  1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1
+ Sent 504 bytes 6 pkts (dropped 0, overlimits 0) 
+qdisc ingress ffff: dev eth0 ---------------- 
+ Sent 308 bytes 5 pkts (dropped 0, overlimits 0) 
+
+[root@jmandrake]:~# ifconfig dummy0
+dummy0    Link encap:Ethernet  HWaddr 00:00:00:00:00:00  
+          inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link
+          UP BROADCAST RUNNING NOARP  MTU:1500  Metric:1
+          RX packets:6 errors:0 dropped:3 overruns:0 frame:0
+          TX packets:3 errors:0 dropped:0 overruns:0 carrier:0
+          collisions:0 txqueuelen:32 
+          RX bytes:504 (504.0 b)  TX bytes:252 (252.0 b)
+-----
+
+Dummy continues to behave like it always did.
+You send it any packet not originating from the actions it will drop them.
+[In this case the three dropped packets were ipv6 ndisc].
+
+cheers,
+jamal
index 3e135a0..aa942e5 100644 (file)
@@ -66,6 +66,6 @@ action mirred egress mirror dev eth1
 ---
 
 A more interesting example is when you mirror flows to a dummy device
-so you could tcpdump them (dummy by defaults drops all devices it sees).
+so you could tcpdump them (dummy by defaults drops all packets it sees).
 This is a very useful debug feature.
 
diff --git a/etc/iproute2/ematch_map b/etc/iproute2/ematch_map
new file mode 100644 (file)
index 0000000..7c6a281
--- /dev/null
@@ -0,0 +1,5 @@
+# lookup table for ematch kinds
+1      cmp
+2      nbyte
+3      u32
+4      meta
index 2b36e49..110061a 100644 (file)
@@ -1,15 +1,13 @@
-#0x10  lowdelay
-#0x08  throughput
-#0x04  reliability
-
+0x10   lowdelay
+0x08   throughput
+0x04   reliability
 # This value overlap with ECT, do not use it!
-#0x02  mincost
-
+0x02   mincost
 # These values seems do not want to die, Cisco likes them by a strange reason.
-#0x20  priority
-#0x40  immediate
-#0x60  flash
-#0x80  flash-override
-#0xa0  critical
-#0xc0  internet
-#0xe0  network
+0x20   priority
+0x40   immediate
+0x60   flash
+0x80   flash-override
+0xa0   critical
+0xc0   internet
+0xe0   network
index 2569edf..5304770 100644 (file)
@@ -1,26 +1,29 @@
 #
 # Reserved protocols.
 #
-#0     unspec
-#1     redirect
-#2     kernel
-#3     boot
-#4     static
-#8     gated
-#9     ra
-#10    mrt
-#11    zebra
-#12    bird
+0      unspec
+1      redirect
+2      kernel
+3      boot
+4      static
+8      gated
+9      ra
+10     mrt
+11     zebra
+12     bird
+13     dnrouted
+14     xorp
+15     ntk
 
 #
 #      Used by me for gated
 #
-#254   gated/aggr
-#253   gated/bgp
-#252   gated/ospf
-#251   gated/ospfase
-#250   gated/rip
-#249   gated/static
-#248   gated/conn
-#247   gated/inet
-#246   gated/default
+254    gated/aggr
+253    gated/bgp
+252    gated/ospf
+251    gated/ospfase
+250    gated/rip
+249    gated/static
+248    gated/conn
+247    gated/inet
+246    gated/default
index 332179d..eedd76d 100644 (file)
@@ -1,7 +1,7 @@
 #
 # reserved values
 #
-#0     cosmos
+     cosmos
 #
 # local
 #
index 36fbc01..8514bc1 100644 (file)
@@ -1,12 +1,11 @@
 #
 # reserved values
 #
-#0     global
-#255   nowhere
-#254   host
-#253   link
-
+0      global
+255    nowhere
+254    host
+253    link
 #
 # pseudo-reserved
 #
-#200   site
+200    site
index 558716b..541abfd 100644 (file)
@@ -1,10 +1,10 @@
 #
 # reserved values
 #
-#255   local
-#254   main
-#253   default
-#0     unspec
+255    local
+254    main
+253    default
+     unspec
 #
 # local
 #
diff --git a/examples/README.cbq b/examples/README.cbq
new file mode 100644 (file)
index 0000000..38c1089
--- /dev/null
@@ -0,0 +1,122 @@
+# CHANGES
+# -------
+# v0.3a2- fixed bug in "if" operator. Thanks kad@dgtu.donetsk.ua.
+# v0.3a-  added TIME parameter. Example:
+#         TIME=00:00-19:00;64Kbit/6Kbit
+#         So, between 00:00 and 19:00 RATE will be 64Kbit.
+#         Just start "cbq.init timecheck" periodically from cron (every 10
+#         minutes for example).
+#         !!! Anyway you MUST start "cbq.init start" for CBQ initialize.
+# v0.2 -  Some cosmetique changes. Now it more compatible with
+#         old bash version. Thanks to Stanislav V. Voronyi
+#         <stas@cnti.uanet.kharkov.ua>.
+# v0.1 -  First public release
+# 
+# README
+# ------
+# 
+# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
+# Don't ask me "why" and "how" :)
+# 
+# This is an example of using CBQ (Class Based Queueing) and policy-based
+# filter for building smart ethernet shapers. All CBQ parameters are
+# correct only for ETHERNET (eth0,1,2..) linux interfaces. It works for
+# ARCNET too (just set bandwidth parameter to 2Mbit). It was tested
+# on 2.1.125-2.1.129 linux kernels (KSI linux, Nostromo version) and 
+# ip-route utility by A.Kuznetsov (iproute2-ss981101 version). 
+# You can download ip-route from ftp://ftp.inr.ac.ru/ip-routing or
+# get iproute2*.rpm (compiled with glibc) from ftp.ksi-linux.com.
+# 
+# 
+# HOW IT WORKS
+# 
+# Each shaper must be described by config file in $CBQ_PATH
+# (/etc/sysconfig/cbq/) directory - one config file for each CBQ shaper.
+# 
+# Some words about config file name:
+# Each shaper has its personal ID - two byte HEX number. Really ID is 
+# CBQ class.
+# So, filename looks like:
+# 
+# cbq-1280.My_first_shaper
+# ^^^ ^^^  ^^^^^^^^^^^^^
+#  |  |            |______ Shaper name - any word
+#  |  |___________________ ID (0000-FFFF), let ID looks like shaper's rate
+#  |______________________ Filename must begin from "cbq-" 
+# 
+# 
+# Config file describes shaper parameters and source[destination] 
+# address[port].
+# For example let's prepare /etc/sysconfig/cbq/cbq-1280.My_first_shaper:
+# 
+# ----------8<---------------------
+# DEVICE=eth0,10Mbit,1Mbit
+# RATE=128Kbit
+# WEIGHT=10Kbit
+# PRIO=5
+# RULE=192.168.1.0/24
+# ----------8<---------------------
+# 
+# This is minimal configuration, where:
+# DEVICE:  eth0   - device where we do control our traffic
+#          10Mbit - REAL ethernet card bandwidth
+#          1Mbit  - "weight" of :1 class (parent for all shapers for eth0),
+#                   as a rule of thumb weight=batdwidth/10.
+#          100Mbit adapter's example: DEVICE=eth0,100Mbit,10Mbit
+#          *** If you want to build more than one shaper per device it's
+#              enough to describe bandwidth and weight once  - cbq.init
+#              is smart :) You can put only 'DEVICE=eth0' into cbq-* 
+#              config file for eth0.
+# 
+# RATE:    Shaper's speed - Kbit,Mbit or bps (bytes per second)
+# 
+# WEIGHT:  "weight" of shaper (CBQ class). Like for DEVICE - approx. RATE/10
+# 
+# PRIO:    shaper's priority from 1 to 8 where 1 is the highest one.
+#          I do always use "5" for all my shapers.
+# 
+# RULE:    [source addr][:source port],[dest addr][:dest port]
+#          Some examples:
+# RULE=10.1.1.0/24:80         - all traffic for network 10.1.1.0 to port 80
+#                               will be shaped.
+# RULE=10.2.2.5               - shaper works only for IP address 10.2.2.5   
+# RULE=:25,10.2.2.128/25:5000 - all traffic from any address and port 25 to
+#                               address 10.2.2.128 - 10.2.2.255 and port 5000
+#                               will be shaped.
+# RULE=10.5.5.5:80,           - shaper active only for traffic from port 80 of
+#                               address 10.5.5.5
+# Multiple RULE fields per one config file are allowed. For example:
+# RULE=10.1.1.2:80
+# RULE=10.1.1.2:25
+# RULE=10.1.1.2:110
+# 
+# *** ATTENTION!!!
+# All shapers do work only for outgoing traffic!
+# So, if you want to build bidirectional shaper you must set it up for
+# both ethernet card. For example let's build shaper for our linux box like:
+# 
+#                     ---------             192.168.1.1
+# BACKBONE -----eth0-|  linux  |-eth1------*[our client]
+#                     ---------
+# 
+# Let all traffic from backbone to client will be shaped at 28Kbit and
+# traffic from client to backbone - at 128Kbit. We need two config files:
+# 
+# ---8<-----/etc/sysconfig/cbq/cbq-28.client-out----
+# DEVICE=eth1,10Mbit,1Mbit
+# RATE=28Kbit
+# WEIGHT=2Kbit
+# PRIO=5
+# RULE=192.168.1.1
+# ---8<---------------------------------------------
+# 
+# ---8<-----/etc/sysconfig/cbq/cbq-128.client-in----
+# DEVICE=eth0,10Mbit,1Mbit
+# RATE=128Kbit
+# WEIGHT=10Kbit
+# PRIO=5
+# RULE=192.168.1.1,
+# ---8<---------------------------------------------
+#                 ^pay attention to "," - this is source address!
+# 
+# Enjoy.
diff --git a/examples/cbq.init-v0.7.3 b/examples/cbq.init-v0.7.3
new file mode 100644 (file)
index 0000000..888aba4
--- /dev/null
@@ -0,0 +1,984 @@
+#!/bin/bash
+#
+#    cbq.init v0.7.3
+#    Copyright (C) 1999  Pavel Golubev <pg@ksi-linux.com>
+#    Copyright (C) 2001-2004  Lubomir Bulej <pallas@kadan.cz>
+#
+#    chkconfig:   2345 11 89
+#    description: sets up CBQ-based traffic control
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program; if not, write to the Free Software
+#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+#    To get the latest version, check on Freshmeat for actual location:
+#
+#              http://freshmeat.net/projects/cbq.init
+#
+#
+# VERSION HISTORY
+# ---------------
+# v0.7.3- Deepak Singhal <singhal at users.sourceforge.net>
+#        - fix timecheck to not ignore regular TIME rules after
+#          encountering a TIME rule that spans over midnight
+#      - Nathan Shafer <nicodemus at users.sourceforge.net>
+#        - allow symlinks to class files
+#      - Seth J. Blank <antifreeze at users.sourceforge.net>
+#        - replace hardcoded ip/tc location with variables
+#      - Mark Davis <mark.davis at gmx.de>
+#        - allow setting of PRIO_{MARK,RULE,REALM} in class file
+#      - Fernando Sanch <toptnc at users.sourceforge.net>
+#        - allow underscores in interface names
+# v0.7.2- Paulo Sedrez
+#        - fix time2abs to allow hours with leading zero in TIME rules
+#      - Svetlin Simeonov <zvero at yahoo.com>
+#        - fix cbq_device_list to allow VLAN interfaces
+#      - Mark Davis <mark.davis at gmx.de>
+#        - ignore *~ backup files when looking for classes
+#      - Mike Boyer <boyer at administrative.com>
+#        - fix to allow arguments to be passed to "restart" command
+# v0.7.1- Lubomir Bulej <pallas at kadan.cz>
+#        - default value for PERTURB
+#        - fixed small bug in RULE parser to correctly parse rules with
+#          identical source and destination fields
+#        - faster initial scanning of DEVICE fields
+# v0.7 - Lubomir Bulej <pallas at kadan.cz>
+#        - lots of various cleanups and reorganizations; the parsing is now
+#          some 40% faster, but the class ID must be in range 0x0002-0xffff
+#          (again). Because of the number of internal changes and the above
+#          class ID restriction, I bumped the version to 0.7 to indicate
+#          something might have got broken :)
+#        - changed PRIO_{U32,FW,ROUTE} to PRIO_{RULE,MARK,REALM}
+#          for consistency with filter keywords
+#        - exposed "compile" command
+#      - Catalin Petrescu <taz at dntis.ro>
+#        - support for port masks in RULE (u32) filter
+#      - Jordan Vrtanoski <obeliks at mt.net.mk>
+#        - support for week days in TIME rules
+# v0.6.4- Lubomir Bulej <pallas at kadan.cz>
+#        - added PRIO_* variables to allow easy control of filter priorities
+#        - added caching to speed up CBQ start, the cache is invalidated
+#          whenever any of the configuration files changes
+#        - updated the readme section + some cosmetic fixes
+# v0.6.3- Lubomir Bulej <pallas at kadan.cz>
+#        - removed setup of (unnecessary) class 1:1 - all classes
+#          now use qdisc's default class 1:0 as their parent
+#        - minor fix in the timecheck branch - classes
+#          without leaf qdisc were not updated
+#        - minor fix to avoid timecheck failure when run
+#          at time with minutes equal to 08 or 09
+#        - respect CBQ_PATH setting in environment
+#        - made PRIO=5 default, rendering it optional in configs
+#        - added support for route filter, see notes about REALM keyword
+#        - added support for fw filter, see notes about MARK keyword
+#        - added filter display to "list" and "stats" commands
+#        - readme section update + various cosmetic fixes
+# v0.6.2- Catalin Petrescu <taz at dntis.ro>
+#        - added tunnels interface handling
+# v0.6.1- Pavel Golubev <pg at ksi-linux.com>
+#        - added sch_prio module loading
+#          (thanks johan at iglo.virtual.or.id for reminding)
+#        - resolved errors resulting from stricter syntax checking in bash2
+#      - Lubomir Bulej <pallas at kadan.cz>
+#        - various cosmetic fixes
+# v0.6 - Lubomir Bulej <pallas at kadan.cz>
+#        - attempt to limit number of spawned processes by utilizing
+#          more of sed power (use sed instead of grep+cut)
+#        - simplified TIME parser, using bash builtins
+#        - added initial support for SFQ as leaf qdisc
+#        - reworked the documentation part a little
+#        - incorporated pending patches and ideas submitted by
+#          following people for versions 0.3 into version 0.6
+#      - Miguel Freitas <miguel at cetuc.puc-rio.br>
+#        - in case of overlapping TIME parameters, the last match is taken
+#      - Juanjo Ciarlante <jjo at mendoza.gov.ar>
+#        - chkconfig tags, list + stats startup parameters
+#        - optional tc & ip command logging (into /var/run/cbq-*)
+#      - Rafal Maszkowski <rzm at icm.edu.pl>
+#        - PEAK parameter for setting TBF's burst peak rate
+#        - fix for many config files (use find instead of ls)
+# v0.5.1- Lubomir Bulej <pallas at kadan.cz>
+#        - fixed little but serious bug in RULE parser
+# v0.5 - Lubomir Bulej <pallas at kadan.cz>
+#        - added options PARENT, LEAF, ISOLATED and BOUNDED. This allows
+#          (with some attention to config file ordering) for creating
+#          hierarchical structures of shapers with classes able (or unable)
+#          to borrow bandwidth from their parents.
+#        - class ID check allows hexadecimal numbers
+#        - rewritten & simplified RULE parser
+#        - cosmetic changes to improve readability
+#        - reorganization to avoid duplicate code (timecheck etc.)
+#        - timecheck doesn't check classes without TIME fields anymore
+# v0.4  - Lubomir Bulej <pallas at kadan.cz>
+#        - small bugfix in RULE parsing code
+#        - simplified configuration parsing code
+#        - several small cosmetic changes
+#        - TIME parameter can be now specified more than once allowing you to
+#          differentiate RATE throughout the whole day. Time overlapping is
+#          not checked, first match is taken. Midnight wrap (eg. 20:00-6:00)
+#          is allowed and taken care of.
+# v0.3a4- fixed small bug in IF operator. Thanks to
+#        Rafal Maszkowski <rzm at icm.edu.pl>
+# v0.3a3- fixed grep bug when using more than 10 eth devices. Thanks to David
+#        Trcka <trcka at poda.cz>.
+# v0.3a2- fixed bug in "if" operator. Thanks kad at dgtu.donetsk.ua.
+# v0.3a - added TIME parameter. Example: TIME=00:00-19:00;64Kbit/6Kbit
+#        So, between 00:00 and 19:00 the RATE will be 64Kbit.
+#        Just start "cbq.init timecheck" periodically from cron
+#        (every 10 minutes for example). DON'T FORGET though, to run
+#        "cbq.init start" for CBQ to initialize.
+# v0.2  - Some cosmetic changes. Now it is more compatible with old bash
+#        version. Thanks to Stanislav V. Voronyi <stas at cnti.uanet.kharkov.ua>.
+# v0.1  - First public release
+#
+#
+# README
+# ------
+#
+# First of all - this is just a SIMPLE EXAMPLE of CBQ power.
+# Don't ask me "why" and "how" :)
+#
+# This script is meant to simplify setup and management of relatively simple
+# CBQ-based traffic control on Linux. Access to advanced networking features
+# of Linux kernel is provided by "ip" and "tc" utilities from A. Kuznetsov's
+# iproute2 package, available at ftp://ftp.inr.ac.ru/ip-routing. Because the
+# utilities serve primarily to translate user wishes to RTNETLINK commands,
+# their interface is rather spartan, intolerant and requires quite a lot of
+# typing. And typing is what this script attempts to reduce :)
+#
+# The advanced networking stuff in Linux is pretty flexible and this script
+# aims to bring some of its features to the not-so-hard-core Linux users. Of
+# course, there is a tradeoff between simplicity and flexibility and you may
+# realize that the flexibility suffered too much for your needs -- time to
+# face "ip" and "tc" interface.
+#
+# To speed up the "start" command, simple caching was introduced in version
+# 0.6.4. The caching works so that the sequence of "tc" commands for given
+# configuration is stored in a file (/var/cache/cbq.init by default) which
+# is used next time the "start" command is run to avoid repeated parsing of
+# configuration files. This cache is invalidated whenever any of the CBQ
+# configuration files changes. If you want to run "cbq.init start" without
+# caching, run it as "cbq.init start nocache". If you want to force cache
+# invalidation, run it as "cbq.init start invalidate". Caching is disabled
+# if you have logging enabled (ie. CBQ_DEBUG is not empty).
+#
+# If you only want cqb.init to translate your configuration to "tc" commands,
+# use "compile" command which will output "tc" commands required to build
+# your configuration. Bear in mind that "compile" does not check if the "tc"
+# commands were successful - this is done (in certain places) only when the
+# "start nocache" command is used, which is also useful when creating the
+# configuration to check whether it is completely valid.
+#
+# All CBQ parameters are valid for Ethernet interfaces only, The script was
+# tested on various Linux kernel versions from series 2.1 to 2.4 and several
+# distributions with KSI Linux (Nostromo version) as the premier one.
+#
+#
+# HOW DOES IT WORK?
+# -----------------
+#
+# Every traffic class must be described by a file in the $CBQ_PATH directory
+# (/etc/sysconfig/cbq by default) - one file per class.
+#
+# The config file names must obey mandatory format: cbq-<clsid>.<name> where
+# <clsid> is two-byte hexadecimal number in range <0002-FFFF> (which in fact
+# is a CBQ class ID) and <name> is the name of the class -- anything to help
+# you distinguish the configuration files. For small amount of classes it is
+# often possible (and convenient) to let <clsid> resemble bandwidth of the
+# class.
+#
+# Example of valid config name:
+#      cbq-1280.My_first_shaper
+#
+#
+# The configuration file may contain the following parameters:
+#
+### Device parameters
+#
+# DEVICE=<ifname>,<bandwidth>[,<weight>]       mandatory
+# DEVICE=eth0,10Mbit,1Mbit
+#
+#      <ifname> is the name of the interface you want to control
+#              traffic on, e.g. eth0
+#      <bandwidth> is the physical bandwidth of the device, e.g. for
+#              ethernet 10Mbit or 100Mbit, for arcnet 2Mbit
+#      <weight> is tuning parameter that should be proportional to
+#              <bandwidth>. As a rule of thumb: <weight> = <bandwidth> / 10
+#
+# When you have more classes on one interface, it is enough to specify
+# <bandwidth> [and <weight>] only once, therefore in other files you only
+# need to set DEVICE=<ifname>.
+#
+### Class parameters
+#
+# RATE=<speed>                                 mandatory
+# RATE=5Mbit
+#
+#      Bandwidth allocated to the class. Traffic going through the class is
+#      shaped to conform to specified rate. You can use Kbit, Mbit or bps,
+#      Kbps and Mbps as suffices. If you don't specify any unit, bits/sec
+#      are used. Also note that "bps" means "bytes per second", not bits.
+#
+# WEIGHT=<speed>                               mandatory
+# WEIGHT=500Kbit
+#
+#      Tuning parameter that should be proportional to RATE. As a rule
+#      of thumb, use WEIGHT ~= RATE / 10.
+#
+# PRIO=<1-8>                                   optional, default 5
+# PRIO=5
+#
+#      Priority of class traffic. The higher the number, the lesser
+#      the priority. Priority of 5 is just fine.
+#
+# PARENT=<clsid>                               optional, default not set
+# PARENT=1280
+#
+#      Specifies ID of the parent class to which you want this class be
+#      attached. You might want to use LEAF=none for the parent class as
+#      mentioned below. By using this parameter and carefully ordering the
+#      configuration files, it is possible to create simple hierarchical
+#      structures of CBQ classes. The ordering is important so that parent
+#      classes are constructed prior to their children.
+#
+# LEAF=none|tbf|sfq                            optional, default "tbf"
+#
+#      Tells the script to attach specified leaf queueing discipline to CBQ
+#      class. By default, TBF is used. Note that attaching TBF to CBQ class
+#      shapes the traffic to conform to TBF parameters and prevents the class
+#      from borrowing bandwidth from its parent even if you have BOUNDED set
+#      to "no". To allow the class to borrow bandwith (provided it is not
+#      bounded), you must set LEAF to "none" or "sfq".
+#
+#      If you want to ensure (approximately) fair sharing of bandwidth among
+#      several hosts in the same class, you might want to specify LEAF=sfq to
+#      attach SFQ as leaf queueing discipline to that class.
+#
+# BOUNDED=yes|no                               optional, default "yes"
+#
+#      If set to "yes", the class is not allowed to borrow bandwidth from
+#      its parent class in overlimit situation. If set to "no", the class
+#      will be allowed to borrow bandwidth from its parent.
+#
+# Note:        Don't forget to set LEAF to "none" or "sfq", otherwise the class will
+#      have TBF attached to itself and will not be able to borrow unused
+#      bandwith from its parent.
+#
+# ISOLATED=yes|no                              optional, default "no"
+#
+#      If set to "yes", the class will not lend unused bandwidth to
+#      its children.
+#
+### TBF qdisc parameters
+#
+# BUFFER=<bytes>[/<bytes>]                     optional, default "10Kb/8"
+#
+#      This parameter controls the depth of the token bucket. In other
+#      words it represents the maximal burst size the class can send.
+#      The optional part of parameter is used to determine the length
+#      of intervals in packet sizes, for which the transmission times
+#      are kept.
+#
+# LIMIT=<bytes>                                        optional, default "15Kb"
+#
+#      This parameter determines the maximal length of backlog. If
+#      the queue contains more data than specified by LIMIT, the
+#      newly arriving packets are dropped. The length of backlog
+#      determines queue latency in case of congestion.
+#
+# PEAK=<speed>                                 optional, default not set
+#
+#      Maximal peak rate for short-term burst traffic. This allows you
+#      to control the absolute peak rate the class can send at, because
+#      single TBF that allows 256Kbit/s would of course allow rate of
+#      512Kbit for half a second or 1Mbit for a quarter of second.
+#
+# MTU=<bytes>                                          optional, default "1500"
+#
+#      Maximum number of bytes that can be sent at once over the
+#      physical medium. This parameter is required when you specify
+#      PEAK parameter. It defaults to MTU of ethernet - for other
+#      media types you might want to change it.
+#
+# Note: Setting TBF as leaf qdisc will effectively prevent the class from
+#      borrowing bandwidth from the ancestor class, because even if the
+#      class allows more traffic to pass through, it is then shaped to
+#      conform to TBF.
+#
+### SFQ qdisc parameters
+#
+# The SFQ queueing discipline is a cheap way for sharing class bandwidth
+# among several hosts. As it is stochastic, the fairness is approximate but
+# it will do the job in most cases. If you want real fairness, you should
+# probably use WRR (weighted round robin) or WFQ queueing disciplines. Note
+# that SFQ does not do any traffic shaping - the shaping is done by the CBQ
+# class the SFQ is attached to.
+#
+# QUANTUM=<bytes>                              optional, default not set
+#
+#      This parameter should not be set lower than link MTU, for ethernet
+#      it is 1500b, or (with MAC header) 1514b which is the value used
+#      in Alexey Kuznetsov's examples.
+#
+# PERTURB=<seconds>                            optional, default "10"
+#
+#      Period of hash function perturbation. If unset, hash reconfiguration
+#      will never take place which is what you probably don't want. The
+#      default value of 10 seconds is probably a good one.
+#
+### Filter parameters
+#
+# RULE=[[saddr[/prefix]][:port[/mask]],][daddr[/prefix]][:port[/mask]]
+#
+#      These parameters make up "u32" filter rules that select traffic for
+#      each of the classes. You can use multiple RULE fields per config.
+#
+#      The optional port mask should only be used by advanced users who
+#      understand how the u32 filter works.
+#
+# Some examples:
+#
+#      RULE=10.1.1.0/24:80
+#              selects traffic going to port 80 in network 10.1.1.0
+#
+#      RULE=10.2.2.5
+#              selects traffic going to any port on single host 10.2.2.5
+#
+#      RULE=10.2.2.5:20/0xfffe
+#              selects traffic going to ports 20 and 21 on host 10.2.2.5
+#
+#      RULE=:25,10.2.2.128/26:5000
+#              selects traffic going from anywhere on port 50 to
+#              port 5000 in network 10.2.2.128
+#
+#      RULE=10.5.5.5:80,
+#              selects traffic going from port 80 of single host 10.5.5.5
+#
+#
+#
+# REALM=[srealm,][drealm]
+#
+#      These parameters make up "route" filter rules that classify traffic
+#      according to packet source/destination realms. For information about
+#      realms, see Alexey Kuznetsov's IP Command Reference. This script
+#      does not define any realms, it justs builds "tc filter" commands
+#      for you if you need to classify traffic this way.
+#
+#      Realm is either a decimal number or a string referencing entry in
+#      /etc/iproute2/rt_realms (usually).
+#
+# Some examples:
+#
+#      REALM=russia,internet
+#              selects traffic going from realm "russia" to realm "internet"
+#
+#      REALM=freenet,
+#              selects traffic going from realm "freenet"
+#
+#      REALM=10
+#              selects traffic going to realm 10
+#
+#
+#
+# MARK=<mark>
+#
+#      These parameters make up "fw" filter rules that select traffic for
+#      each of the classes accoring to firewall "mark". Mark is a decimal
+#      number packets are tagged with if firewall rules say so. You can
+#      use multiple MARK fields per config.
+#
+#
+# Note: Rules for different filter types can be combined. Attention must be
+#      paid to the priority of filter rules, which can be set below using
+#      PRIO_{RULE,MARK,REALM} variables.
+#
+### Time ranging parameters
+#
+# TIME=[<dow>,<dow>, ...,<dow>/]<from>-<till>;<rate>/<weight>[/<peak>]
+# TIME=0,1,2,5/18:00-06:00;256Kbit/25Kbit
+# TIME=60123/18:00-06:00;256Kbit/25Kbit
+# TIME=18:00-06:00;256Kbit/25Kbit
+#
+#      This parameter allows you to differentiate the class bandwidth
+#      throughout the day. You can specify multiple TIME parameters, if
+#      the times overlap, last match is taken. The fields <rate>, <weight>
+#      and <peak> correspond to parameters RATE, WEIGHT and PEAK (which
+#      is optional and applies to TBF leaf qdisc only).
+#
+#      You can also specify days of week when the TIME rule applies. <dow>
+#      is numeric, 0 corresponds to sunday, 1 corresponds to monday, etc.
+#
+###
+#
+# Sample configuration file: cbq-1280.My_first_shaper
+#
+# --------------------------------------------------------------------------
+# DEVICE=eth0,10Mbit,1Mbit
+# RATE=128Kbit
+# WEIGHT=10Kbit
+# PRIO=5
+# RULE=192.128.1.0/24
+# --------------------------------------------------------------------------
+#
+# The configuration says that we will control traffic on 10Mbit ethernet
+# device eth0 and the traffic going to network 192.168.1.0 will be
+# processed with priority 5 and shaped to rate of 128Kbit.
+#
+# Note that you can control outgoing traffic only. If you want to control
+# traffic in both directions, you must set up CBQ for both interfaces.
+#
+# Consider the following example:
+#
+#                    +---------+      192.168.1.1
+# BACKBONE -----eth0-|  linux  |-eth1------*-[client]
+#                    +---------+
+#
+# Imagine you want to shape traffic from backbone to the client to 28Kbit
+# and traffic in the opposite direction to 128Kbit. You need to setup CBQ
+# on both eth0 and eth1 interfaces, thus you need two config files:
+#
+# cbq-028.backbone-client
+# --------------------------------------------------------------------------
+# DEVICE=eth1,10Mbit,1Mbit
+# RATE=28Kbit
+# WEIGHT=2Kbit
+# PRIO=5
+# RULE=192.168.1.1
+# --------------------------------------------------------------------------
+#
+# cbq-128.client-backbone
+# --------------------------------------------------------------------------
+# DEVICE=eth0,10Mbit,1Mbit
+# RATE=128Kbit
+# WEIGHT=10Kbit
+# PRIO=5
+# RULE=192.168.1.1,
+# --------------------------------------------------------------------------
+#
+# Pay attention to comma "," in the RULE field - it denotes source address!
+#
+# Enjoy.
+#
+#############################################################################
+
+export LC_ALL=C
+
+### Command locations
+TC=/sbin/tc
+IP=/sbin/ip
+MP=/sbin/modprobe
+
+### Default filter priorities (must be different)
+PRIO_RULE_DEFAULT=${PRIO_RULE:-100}
+PRIO_MARK_DEFAULT=${PRIO_MARK:-200}
+PRIO_REALM_DEFAULT=${PRIO_REALM:-300}
+
+### Default CBQ_PATH & CBQ_CACHE settings
+CBQ_PATH=${CBQ_PATH:-/etc/sysconfig/cbq}
+CBQ_CACHE=${CBQ_CACHE:-/var/cache/cbq.init}
+
+### Uncomment to enable logfile for debugging
+#CBQ_DEBUG="/var/run/cbq-$1"
+
+### Modules to probe for. Uncomment the last CBQ_PROBE
+### line if you have QoS support compiled into kernel
+CBQ_PROBE="sch_cbq sch_tbf sch_sfq sch_prio"
+CBQ_PROBE="$CBQ_PROBE cls_fw cls_u32 cls_route"
+#CBQ_PROBE=""
+
+### Keywords required for qdisc & class configuration
+CBQ_WORDS="DEVICE|RATE|WEIGHT|PRIO|PARENT|LEAF|BOUNDED|ISOLATED"
+CBQ_WORDS="$CBQ_WORDS|PRIO_MARK|PRIO_RULE|PRIO_REALM|BUFFER"
+CBQ_WORDS="$CBQ_WORDS|LIMIT|PEAK|MTU|QUANTUM|PERTURB"
+
+### Source AVPKT if it exists
+[ -r /etc/sysconfig/cbq/avpkt ] && . /etc/sysconfig/cbq/avpkt
+AVPKT=${AVPKT:-3000}
+
+
+#############################################################################
+############################# SUPPORT FUNCTIONS #############################
+#############################################################################
+
+### Get list of network devices
+cbq_device_list () {
+       ip link show| sed -n "/^[0-9]/ \
+               { s/^[0-9]\+: \([a-z0-9._]\+\)[:@].*/\1/; p; }"
+} # cbq_device_list
+
+
+### Remove root class from device $1
+cbq_device_off () {
+       tc qdisc del dev $1 root 2> /dev/null
+} # cbq_device_off
+
+
+### Remove CBQ from all devices
+cbq_off () {
+       for dev in `cbq_device_list`; do
+               cbq_device_off $dev
+       done
+} # cbq_off
+
+
+### Prefixed message
+cbq_message () {
+       echo -e "**CBQ: $@"
+} # cbq_message
+
+### Failure message
+cbq_failure () {
+       cbq_message "$@"
+       exit 1
+} # cbq_failure
+
+### Failure w/ cbq-off
+cbq_fail_off () {
+       cbq_message "$@"
+       cbq_off
+       exit 1
+} # cbq_fail_off
+
+
+### Convert time to absolute value
+cbq_time2abs () {
+       local min=${1##*:}; min=${min##0}
+       local hrs=${1%%:*}; hrs=${hrs##0}
+       echo $[hrs*60 + min]
+} # cbq_time2abs
+
+
+### Display CBQ setup
+cbq_show () {
+       for dev in `cbq_device_list`; do
+               [ `tc qdisc show dev $dev| wc -l` -eq 0 ] && continue
+               echo -e "### $dev: queueing disciplines\n"
+               tc $1 qdisc show dev $dev; echo
+
+               [ `tc class show dev $dev| wc -l` -eq 0 ] && continue
+               echo -e "### $dev: traffic classes\n"
+               tc $1 class show dev $dev; echo
+
+               [ `tc filter show dev $dev| wc -l` -eq 0 ] && continue
+               echo -e "### $dev: filtering rules\n"
+               tc $1 filter show dev $dev; echo
+       done
+} # cbq_show
+
+
+### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1
+cbq_init () {
+       ### Get a list of configured classes
+       CLASSLIST=`find $1 \( -type f -or -type l \) -name 'cbq-*' \
+               -not -name '*~' -maxdepth 1 -printf "%f\n"| sort`
+       [ -z "$CLASSLIST" ] &&
+               cbq_failure "no configuration files found in $1!"
+
+       ### Gather all DEVICE fields from $1/cbq-*
+       DEVFIELDS=`find $1 \( -type f -or -type l \) -name 'cbq-*' \
+                 -not -name '*~' -maxdepth 1| xargs sed -n 's/#.*//; \
+                 s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \
+                 { s/.*=//; p; }'| sort -u`
+       [ -z "$DEVFIELDS" ] &&
+               cbq_failure "no DEVICE field found in $1/cbq-*!"
+
+       ### Check for different DEVICE fields for the same device
+       DEVICES=`echo "$DEVFIELDS"| sed 's/,.*//'| sort -u`
+       [ `echo "$DEVICES"| wc -l` -ne `echo "$DEVFIELDS"| wc -l` ] &&
+               cbq_failure "different DEVICE fields for single device!\n$DEVFIELDS"
+} # cbq_init
+
+
+### Load class configuration from $1/$2
+cbq_load_class () {
+       CLASS=`echo $2| sed 's/^cbq-0*//; s/^\([0-9a-fA-F]\+\).*/\1/'`
+       CFILE=`sed -n 's/#.*//; s/[[:space:]]//g; /^[[:alnum:]_]\+=[[:alnum:].,:;/*@-_]\+$/ p' $1/$2`
+
+       ### Check class number
+       IDVAL=`/usr/bin/printf "%d" 0x$CLASS 2> /dev/null`
+       [ $? -ne 0 -o $IDVAL -lt 2 -o $IDVAL -gt 65535 ] &&
+               cbq_fail_off "class ID of $2 must be in range <0002-FFFF>!"
+
+       ### Set defaults & load class
+       RATE=""; WEIGHT=""; PARENT=""; PRIO=5
+       LEAF=tbf; BOUNDED=yes; ISOLATED=no
+       BUFFER=10Kb/8; LIMIT=15Kb; MTU=1500
+       PEAK=""; PERTURB=10; QUANTUM=""
+
+       PRIO_RULE=$PRIO_RULE_DEFAULT
+       PRIO_MARK=$PRIO_MARK_DEFAULT
+       PRIO_REALM=$PRIO_REALM_DEFAULT
+
+       eval `echo "$CFILE"| grep -E "^($CBQ_WORDS)="`
+
+       ### Require RATE/WEIGHT
+       [ -z "$RATE" -o -z "$WEIGHT" ] &&
+               cbq_fail_off "missing RATE or WEIGHT in $2!"
+
+       ### Class device
+       DEVICE=${DEVICE%%,*}
+       [ -z "$DEVICE" ] && cbq_fail_off "missing DEVICE field in $2!"
+
+       BANDWIDTH=`echo "$DEVFIELDS"| sed -n "/^$DEVICE,/ \
+                 { s/[^,]*,\([^,]*\).*/\1/; p; q; }"`
+
+       ### Convert to "tc" options
+       PEAK=${PEAK:+peakrate $PEAK}
+       PERTURB=${PERTURB:+perturb $PERTURB}
+       QUANTUM=${QUANTUM:+quantum $QUANTUM}
+
+       [ "$BOUNDED" = "no" ] && BOUNDED="" || BOUNDED="bounded"
+       [ "$ISOLATED" = "yes" ] && ISOLATED="isolated" || ISOLATED=""
+} # cbq_load_class
+
+
+#############################################################################
+#################################### INIT ###################################
+#############################################################################
+
+### Check for presence of ip-route2 in usual place
+[ -x $TC -a -x $IP ] ||
+       cbq_failure "ip-route2 utilities not installed or executable!"
+
+
+### ip/tc wrappers
+if [ "$1" = "compile" ]; then
+       ### no module probing
+       CBQ_PROBE=""
+
+       ip () {
+               $IP "$@"
+       } # ip
+
+       ### echo-only version of "tc" command
+       tc () {
+               echo "$TC $@"
+       } # tc
+
+elif [ -n "$CBQ_DEBUG" ]; then
+       echo -e "# `date`" > $CBQ_DEBUG
+
+       ### Logging version of "ip" command
+       ip () {
+               echo -e "\n# ip $@" >> $CBQ_DEBUG
+               $IP "$@" 2>&1 | tee -a $CBQ_DEBUG
+       } # ip
+
+       ### Logging version of "tc" command
+       tc () {
+               echo -e "\n# tc $@" >> $CBQ_DEBUG
+               $TC "$@" 2>&1 | tee -a $CBQ_DEBUG
+       } # tc
+else
+       ### Default wrappers
+       
+       ip () {
+               $IP "$@"
+       } # ip
+       
+       tc () {
+               $TC "$@"
+       } # tc
+fi # ip/tc wrappers
+
+
+case "$1" in
+
+#############################################################################
+############################### START/COMPILE ###############################
+#############################################################################
+
+start|compile)
+
+### Probe QoS modules (start only)
+for module in $CBQ_PROBE; do
+       $MP $module || cbq_failure "failed to load module $module"
+done
+
+### If we are in compile/nocache/logging mode, don't bother with cache
+if [ "$1" != "compile" -a "$2" != "nocache" -a -z "$CBQ_DEBUG" ]; then
+       VALID=1
+
+       ### validate the cache
+       [ "$2" = "invalidate" -o ! -f $CBQ_CACHE ] && VALID=0
+       if [ $VALID -eq 1 ]; then
+               [ `find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \
+                 wc -l` -gt 0 ] && VALID=0
+       fi
+
+       ### compile the config if the cache is invalid
+       if [ $VALID -ne 1 ]; then
+               $0 compile > $CBQ_CACHE ||
+                       cbq_fail_off "failed to compile CBQ configuration!"
+       fi
+
+       ### run the cached commands
+       exec /bin/sh $CBQ_CACHE 2> /dev/null
+fi
+
+### Load DEVICES, DEVFIELDS and CLASSLIST
+cbq_init $CBQ_PATH
+
+
+### Setup root qdisc on all configured devices
+for dev in $DEVICES; do
+       ### Retrieve device bandwidth and, optionally, weight
+       DEVTEMP=`echo "$DEVFIELDS"| sed -n "/^$dev,/ { s/$dev,//; p; q; }"`
+       DEVBWDT=${DEVTEMP%%,*}; DEVWGHT=${DEVTEMP##*,}
+       [ "$DEVBWDT" = "$DEVWGHT" ] && DEVWGHT=""
+
+       ### Device bandwidth is required
+       if [ -z "$DEVBWDT" ]; then
+               cbq_message "could not determine bandwidth for device $dev!"
+               cbq_failure "please set up the DEVICE fields properly!"
+       fi
+
+       ### Check if the device is there
+       ip link show $dev &> /dev/null ||
+               cbq_fail_off "device $dev not found!"
+
+       ### Remove old root qdisc from device
+       cbq_device_off $dev
+
+
+       ### Setup root qdisc + class for device
+       tc qdisc add dev $dev root handle 1 cbq \
+       bandwidth $DEVBWDT avpkt $AVPKT cell 8
+
+       ### Set weight of the root class if set
+       [ -n "$DEVWGHT" ] &&
+               tc class change dev $dev root cbq weight $DEVWGHT allot 1514
+
+       [ "$1" = "compile" ] && echo
+done # dev
+
+
+### Setup traffic classes
+for classfile in $CLASSLIST; do
+       cbq_load_class $CBQ_PATH $classfile
+
+       ### Create the class
+       tc class add dev $DEVICE parent 1:$PARENT classid 1:$CLASS cbq \
+       bandwidth $BANDWIDTH rate $RATE weight $WEIGHT prio $PRIO \
+       allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED ||
+               cbq_fail_off "failed to add class $CLASS with parent $PARENT on $DEVICE!"
+
+       ### Create leaf qdisc if set
+       if [ "$LEAF" = "tbf" ]; then
+               tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS tbf \
+               rate $RATE buffer $BUFFER limit $LIMIT mtu $MTU $PEAK
+       elif [ "$LEAF" = "sfq" ]; then
+               tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS sfq \
+               $PERTURB $QUANTUM
+       fi
+
+
+       ### Create fw filter for MARK fields
+       for mark in `echo "$CFILE"| sed -n '/^MARK/ { s/.*=//; p; }'`; do
+               ### Attach fw filter to root class
+               tc filter add dev $DEVICE parent 1:0 protocol ip \
+               prio $PRIO_MARK handle $mark fw classid 1:$CLASS
+       done ### mark
+
+       ### Create route filter for REALM fields
+       for realm in `echo "$CFILE"| sed -n '/^REALM/ { s/.*=//; p; }'`; do
+               ### Split realm into source & destination realms
+               SREALM=${realm%%,*}; DREALM=${realm##*,}
+               [ "$SREALM" = "$DREALM" ] && SREALM=""
+
+               ### Convert asterisks to empty strings
+               SREALM=${SREALM#\*}; DREALM=${DREALM#\*}
+
+               ### Attach route filter to the root class
+               tc filter add dev $DEVICE parent 1:0 protocol ip \
+               prio $PRIO_REALM route ${SREALM:+from $SREALM} \
+               ${DREALM:+to $DREALM} classid 1:$CLASS
+       done ### realm
+
+       ### Create u32 filter for RULE fields
+       for rule in `echo "$CFILE"| sed -n '/^RULE/ { s/.*=//; p; }'`; do
+               ### Split rule into source & destination
+               SRC=${rule%%,*}; DST=${rule##*,}
+               [ "$SRC" = "$rule" ] && SRC=""
+
+
+               ### Split destination into address, port & mask fields
+               DADDR=${DST%%:*}; DTEMP=${DST##*:}
+               [ "$DADDR" = "$DST" ] && DTEMP=""
+
+               DPORT=${DTEMP%%/*}; DMASK=${DTEMP##*/}
+               [ "$DPORT" = "$DTEMP" ] && DMASK="0xffff"
+
+
+               ### Split up source (if specified)
+               SADDR=""; SPORT=""
+               if [ -n "$SRC" ]; then
+                       SADDR=${SRC%%:*}; STEMP=${SRC##*:}
+                       [ "$SADDR" = "$SRC" ] && STEMP=""
+
+                       SPORT=${STEMP%%/*}; SMASK=${STEMP##*/}
+                       [ "$SPORT" = "$STEMP" ] && SMASK="0xffff"
+               fi
+
+
+               ### Convert asterisks to empty strings
+               SADDR=${SADDR#\*}; DADDR=${DADDR#\*}
+
+               ### Compose u32 filter rules
+               u32_s="${SPORT:+match ip sport $SPORT $SMASK}"
+               u32_s="${SADDR:+match ip src $SADDR} $u32_s"
+               u32_d="${DPORT:+match ip dport $DPORT $DMASK}"
+               u32_d="${DADDR:+match ip dst $DADDR} $u32_d"
+
+               ### Uncomment the following if you want to see parsed rules
+               #echo "$rule: $u32_s $u32_d"
+
+               ### Attach u32 filter to the appropriate class
+               tc filter add dev $DEVICE parent 1:0 protocol ip \
+               prio $PRIO_RULE u32 $u32_s $u32_d classid 1:$CLASS
+       done ### rule
+
+       [ "$1" = "compile" ] && echo
+done ### classfile
+;;
+
+
+#############################################################################
+################################# TIME CHECK ################################
+#############################################################################
+
+timecheck)
+
+### Get time + weekday
+TIME_TMP=`date +%w/%k:%M`
+TIME_DOW=${TIME_TMP%%/*}
+TIME_NOW=${TIME_TMP##*/}
+
+### Load DEVICES, DEVFIELDS and CLASSLIST
+cbq_init $CBQ_PATH
+
+### Run through all classes
+for classfile in $CLASSLIST; do
+       ### Gather all TIME rules from class config
+       TIMESET=`sed -n 's/#.*//; s/[[:space:]]//g; /^TIME/ { s/.*=//; p; }' \
+               $CBQ_PATH/$classfile`
+       [ -z "$TIMESET" ] && continue
+
+       MATCH=0; CHANGE=0
+       for timerule in $TIMESET; do
+               TIME_ABS=`cbq_time2abs $TIME_NOW`
+               
+               ### Split TIME rule to pieces
+               TIMESPEC=${timerule%%;*}; PARAMS=${timerule##*;}
+               WEEKDAYS=${TIMESPEC%%/*}; INTERVAL=${TIMESPEC##*/}
+               BEG_TIME=${INTERVAL%%-*}; END_TIME=${INTERVAL##*-}
+
+               ### Check the day-of-week (if present)
+               [ "$WEEKDAYS" != "$INTERVAL" -a \
+                 -n "${WEEKDAYS##*$TIME_DOW*}" ] && continue
+
+               ### Compute interval boundaries
+               BEG_ABS=`cbq_time2abs $BEG_TIME`
+               END_ABS=`cbq_time2abs $END_TIME`
+
+               ### Midnight wrap fixup
+               if [ $BEG_ABS -gt $END_ABS ]; then
+                       [ $TIME_ABS -le $END_ABS ] &&
+                               TIME_ABS=$[TIME_ABS + 24*60]
+
+                       END_ABS=$[END_ABS + 24*60]
+               fi
+
+               ### If the time matches, remember params and set MATCH flag
+               if [ $TIME_ABS -ge $BEG_ABS -a $TIME_ABS -lt $END_ABS ]; then
+                       TMP_RATE=${PARAMS%%/*}; PARAMS=${PARAMS#*/}
+                       TMP_WGHT=${PARAMS%%/*}; TMP_PEAK=${PARAMS##*/}
+
+                       [ "$TMP_PEAK" = "$TMP_WGHT" ] && TMP_PEAK=""
+                       TMP_PEAK=${TMP_PEAK:+peakrate $TMP_PEAK}
+
+                       MATCH=1
+               fi
+       done ### timerule
+
+
+       cbq_load_class $CBQ_PATH $classfile
+
+       ### Get current RATE of CBQ class
+       RATE_NOW=`tc class show dev $DEVICE| sed -n \
+                "/cbq 1:$CLASS / { s/.*rate //; s/ .*//; p; q; }"`
+       [ -z "$RATE_NOW" ] && continue
+
+       ### Time interval matched
+       if [ $MATCH -ne 0 ]; then
+
+               ### Check if there is any change in class RATE
+               if [ "$RATE_NOW" != "$TMP_RATE" ]; then
+                       NEW_RATE="$TMP_RATE"
+                       NEW_WGHT="$TMP_WGHT"
+                       NEW_PEAK="$TMP_PEAK"
+                       CHANGE=1
+               fi
+
+       ### Match not found, reset to default RATE if necessary
+       elif [ "$RATE_NOW" != "$RATE" ]; then
+               NEW_WGHT="$WEIGHT"
+               NEW_RATE="$RATE"
+               NEW_PEAK="$PEAK"
+               CHANGE=1
+       fi
+
+       ### If there are no changes, go for next class
+       [ $CHANGE -eq 0 ] && continue
+
+       ### Replace CBQ class
+       tc class replace dev $DEVICE classid 1:$CLASS cbq \
+       bandwidth $BANDWIDTH rate $NEW_RATE weight $NEW_WGHT prio $PRIO \
+       allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED
+
+       ### Replace leaf qdisc (if any)
+       if [ "$LEAF" = "tbf" ]; then
+               tc qdisc replace dev $DEVICE handle $CLASS tbf \
+               rate $NEW_RATE buffer $BUFFER limit $LIMIT mtu $MTU $NEW_PEAK
+       fi
+
+       cbq_message "$TIME_NOW: class $CLASS on $DEVICE changed rate ($RATE_NOW -> $NEW_RATE)"
+done ### class file
+;;
+
+
+#############################################################################
+################################## THE REST #################################
+#############################################################################
+
+stop)
+       cbq_off
+       ;;
+
+list)
+       cbq_show
+       ;;
+
+stats)
+       cbq_show -s
+       ;;
+
+restart)
+       shift
+       $0 stop
+       $0 start "$@"
+       ;;
+
+*)
+       echo "Usage: `basename $0` {start|compile|stop|restart|timecheck|list|stats}"
+esac
index 8375d76..9438c0f 100644 (file)
@@ -1 +1 @@
-static char SNAPSHOT[] = "050314";
+static char SNAPSHOT[] = "060323";
index 5aca69a..25f36ae 100644 (file)
@@ -4,10 +4,26 @@
 #include "iptables_common.h"
 #include "libiptc/libiptc.h"
 
+#ifndef IPT_LIB_DIR
+#define IPT_LIB_DIR "/usr/local/lib/iptables"
+#endif
+
 #ifndef IPPROTO_SCTP
 #define IPPROTO_SCTP 132
 #endif
 
+#ifndef IPT_SO_GET_REVISION_MATCH /* Old kernel source. */
+#define IPT_SO_GET_REVISION_MATCH      (IPT_BASE_CTL + 2)
+#define IPT_SO_GET_REVISION_TARGET     (IPT_BASE_CTL + 3)
+
+struct ipt_get_revision
+{
+       char name[IPT_FUNCTION_MAXNAMELEN-1];
+
+       u_int8_t revision;
+};
+#endif /* IPT_SO_GET_REVISION_MATCH   Old kernel source */
+
 struct iptables_rule_match
 {
        struct iptables_rule_match *next;
@@ -22,6 +38,9 @@ struct iptables_match
 
        ipt_chainlabel name;
 
+       /* Revision of match (0 by default). */
+       u_int8_t revision;
+
        const char *version;
 
        /* Size of match data. */
@@ -72,6 +91,9 @@ struct iptables_target
 
        ipt_chainlabel name;
 
+       /* Revision of target (0 by default). */
+       u_int8_t revision;
+
        const char *version;
 
        /* Size of target data. */
index e3b99aa..ed5b9c0 100644 (file)
@@ -26,6 +26,7 @@ extern int iptables_insmod(const char *modname, const char *modprobe);
 void exit_error(enum exittype, char *, ...)__attribute__((noreturn,
                                                          format(printf,2,3)));
 extern const char *program_name, *program_version;
+extern char *lib_dir;
 
 #ifdef NO_SHARED_LIBS
 # ifdef _INIT
diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
new file mode 100644 (file)
index 0000000..85d456d
--- /dev/null
@@ -0,0 +1,122 @@
+#ifndef _INET_DIAG_H_
+#define _INET_DIAG_H_ 1
+
+/* Just some random number */
+#define TCPDIAG_GETSOCK 18
+#define DCCPDIAG_GETSOCK 19
+
+#define INET_DIAG_GETSOCK_MAX 24
+
+/* Socket identity */
+struct inet_diag_sockid {
+       __u16   idiag_sport;
+       __u16   idiag_dport;
+       __u32   idiag_src[4];
+       __u32   idiag_dst[4];
+       __u32   idiag_if;
+       __u32   idiag_cookie[2];
+#define INET_DIAG_NOCOOKIE (~0U)
+};
+
+/* Request structure */
+
+struct inet_diag_req {
+       __u8    idiag_family;           /* Family of addresses. */
+       __u8    idiag_src_len;
+       __u8    idiag_dst_len;
+       __u8    idiag_ext;              /* Query extended information */
+
+       struct inet_diag_sockid id;
+
+       __u32   idiag_states;           /* States to dump */
+       __u32   idiag_dbs;              /* Tables to dump (NI) */
+};
+
+enum {
+       INET_DIAG_REQ_NONE,
+       INET_DIAG_REQ_BYTECODE,
+};
+
+#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE
+
+/* Bytecode is sequence of 4 byte commands followed by variable arguments.
+ * All the commands identified by "code" are conditional jumps forward:
+ * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
+ * length of the command and its arguments.
+ */
+struct inet_diag_bc_op {
+       unsigned char   code;
+       unsigned char   yes;
+       unsigned short  no;
+};
+
+enum {
+       INET_DIAG_BC_NOP,
+       INET_DIAG_BC_JMP,
+       INET_DIAG_BC_S_GE,
+       INET_DIAG_BC_S_LE,
+       INET_DIAG_BC_D_GE,
+       INET_DIAG_BC_D_LE,
+       INET_DIAG_BC_AUTO,
+       INET_DIAG_BC_S_COND,
+       INET_DIAG_BC_D_COND,
+};
+
+struct inet_diag_hostcond {
+       __u8    family;
+       __u8    prefix_len;
+       int     port;
+       __u32   addr[0];
+};
+
+/* Base info structure. It contains socket identity (addrs/ports/cookie)
+ * and, alas, the information shown by netstat. */
+struct inet_diag_msg {
+       __u8    idiag_family;
+       __u8    idiag_state;
+       __u8    idiag_timer;
+       __u8    idiag_retrans;
+
+       struct inet_diag_sockid id;
+
+       __u32   idiag_expires;
+       __u32   idiag_rqueue;
+       __u32   idiag_wqueue;
+       __u32   idiag_uid;
+       __u32   idiag_inode;
+};
+
+/* Extensions */
+
+enum {
+       INET_DIAG_NONE,
+       INET_DIAG_MEMINFO,
+       INET_DIAG_INFO,
+       INET_DIAG_VEGASINFO,
+       INET_DIAG_CONG,
+};
+
+#define INET_DIAG_MAX INET_DIAG_CONG
+
+
+/* INET_DIAG_MEM */
+
+struct inet_diag_meminfo {
+       __u32   idiag_rmem;
+       __u32   idiag_wmem;
+       __u32   idiag_fmem;
+       __u32   idiag_tmem;
+};
+
+/* INET_DIAG_VEGASINFO */
+
+struct tcpvegas_info {
+       __u32   tcpv_enabled;
+       __u32   tcpv_rttcnt;
+       __u32   tcpv_rtt;
+       __u32   tcpv_minrtt;
+};
+
+
+#endif /* _INET_DIAG_H_ */
diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h
new file mode 100644 (file)
index 0000000..e234e20
--- /dev/null
@@ -0,0 +1,22 @@
+/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
+ *
+ * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+ * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+ */
+
+#ifndef _LINUX_IP_MP_ALG_H
+#define _LINUX_IP_MP_ALG_H
+
+enum ip_mp_alg {
+       IP_MP_ALG_NONE,
+       IP_MP_ALG_RR,
+       IP_MP_ALG_DRR,
+       IP_MP_ALG_RANDOM,
+       IP_MP_ALG_WRANDOM,
+       __IP_MP_ALG_MAX
+};
+
+#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
+
+#endif /* _LINUX_IP_MP_ALG_H */
+
index 7346ead..17d8eff 100644 (file)
@@ -15,7 +15,6 @@
 #ifndef _IPTABLES_H
 #define _IPTABLES_H
 
-#include <linux/compiler.h>
 #include <linux/netfilter_ipv4.h>
 
 #define IPT_FUNCTION_MAXNAMELEN 30
index 13828e5..24a38ae 100644 (file)
@@ -5,20 +5,22 @@
 #include <linux/types.h>
 
 #define NETLINK_ROUTE          0       /* Routing/device hook                          */
-#define NETLINK_SKIP           1       /* Reserved for ENskip                          */
+#define NETLINK_W1             1       /* 1-wire subsystem                             */
 #define NETLINK_USERSOCK       2       /* Reserved for user mode socket protocols      */
 #define NETLINK_FIREWALL       3       /* Firewalling hook                             */
-#define NETLINK_TCPDIAG                4       /* TCP socket monitoring                        */
+#define NETLINK_INET_DIAG      4       /* INET socket monitoring                       */
 #define NETLINK_NFLOG          5       /* netfilter/iptables ULOG */
 #define NETLINK_XFRM           6       /* ipsec */
 #define NETLINK_SELINUX                7       /* SELinux event notifications */
-#define NETLINK_ARPD           8
+#define NETLINK_ISCSI          8       /* Open-iSCSI */
 #define NETLINK_AUDIT          9       /* auditing */
-#define NETLINK_ROUTE6         11      /* af_inet6 route comm channel */
+#define NETLINK_FIB_LOOKUP     10      
+#define NETLINK_CONNECTOR      11
+#define NETLINK_NETFILTER      12      /* netfilter subsystem */
 #define NETLINK_IP6_FW         13
 #define NETLINK_DNRTMSG                14      /* DECnet routing messages */
 #define NETLINK_KOBJECT_UEVENT 15      /* Kernel messages to userspace */
-#define NETLINK_TAPBASE                16      /* 16 to 31 are ethertap */
+#define NETLINK_GENERIC                16
 
 #define MAX_LINKS 32           
 
@@ -69,7 +71,8 @@ struct nlmsghdr
 
 #define NLMSG_ALIGNTO  4
 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
-#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_HDRLEN    ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN))
 #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
 #define NLMSG_DATA(nlh)  ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
 #define NLMSG_NEXT(nlh,len)     ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
@@ -84,12 +87,23 @@ struct nlmsghdr
 #define NLMSG_DONE             0x3     /* End of a dump        */
 #define NLMSG_OVERRUN          0x4     /* Data lost            */
 
+#define NLMSG_MIN_TYPE         0x10    /* < 0x10: reserved control messages */
+
 struct nlmsgerr
 {
        int             error;
        struct nlmsghdr msg;
 };
 
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP        2
+#define NETLINK_PKTINFO                3
+
+struct nl_pktinfo
+{
+       __u32   group;
+};
+
 #define NET_MAJOR 36           /* Major 36 is reserved for networking                                          */
 
 enum {
@@ -97,5 +111,24 @@ enum {
        NETLINK_CONNECTED,
 };
 
+/*
+ *  <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * |        Header       | Pad |     Payload       | Pad |
+ * |   (struct nlattr)   | ing |                   | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ *  <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr
+{
+       __u16           nla_len;
+       __u16           nla_type;
+};
+
+#define NLA_ALIGNTO            4
+#define NLA_ALIGN(len)         (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN             ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
 
 #endif /* __LINUX_NETLINK_H */
index 741d15b..bd2c5a2 100644 (file)
@@ -80,6 +80,7 @@ enum
        TCA_ACT_KIND,
        TCA_ACT_OPTIONS,
        TCA_ACT_INDEX,
+       TCA_ACT_STATS,
        __TCA_ACT_MAX
 };
 
@@ -275,6 +276,7 @@ struct tc_rsvp_pinfo
        __u8    protocol;
        __u8    tunnelid;
        __u8    tunnelhdr;
+       __u8    pad;
 };
 
 /* ROUTE filter */
@@ -407,6 +409,7 @@ enum
        TCF_EM_NBYTE,
        TCF_EM_U32,
        TCF_EM_META,
+       TCF_EM_TEXT,
        __TCF_EM_MAX
 };
 
index 73d84c0..d10f353 100644 (file)
@@ -93,6 +93,7 @@ struct tc_fifo_qopt
 /* PRIO section */
 
 #define TCQ_PRIO_BANDS 16
+#define TCQ_MIN_PRIO_BANDS 2
 
 struct tc_prio_qopt
 {
@@ -169,6 +170,7 @@ struct tc_red_qopt
        unsigned char   Scell_log;      /* cell size for idle damping */
        unsigned char   flags;
 #define TC_RED_ECN     1
+#define TC_RED_HARDDROP        2
 };
 
 struct tc_red_xstats
@@ -194,36 +196,34 @@ enum
 
 #define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
 
-#define TCA_SET_OFF TCA_GRED_PARMS
 struct tc_gred_qopt
 {
-       __u32           limit;          /* HARD maximal queue length (bytes)    
-*/
-       __u32           qth_min;        /* Min average length threshold (bytes) 
-*/
-       __u32           qth_max;        /* Max average length threshold (bytes) 
-*/
-       __u32           DP;             /* upto 2^32 DPs */
-       __u32           backlog;        
-       __u32           qave;   
-       __u32           forced; 
-       __u32           early;  
-       __u32           other;  
-       __u32           pdrop;  
-
-       unsigned char   Wlog;           /* log(W)               */
-       unsigned char   Plog;           /* log(P_max/(qth_max-qth_min)) */
-       unsigned char   Scell_log;      /* cell size for idle damping */
-       __u8            prio;           /* prio of this VQ */
-       __u32   packets;
-       __u32   bytesin;
+       __u32           limit;        /* HARD maximal queue length (bytes)    */
+       __u32           qth_min;      /* Min average length threshold (bytes) */
+       __u32           qth_max;      /* Max average length threshold (bytes) */
+       __u32           DP;           /* upto 2^32 DPs */
+       __u32           backlog;
+       __u32           qave;
+       __u32           forced;
+       __u32           early;
+       __u32           other;
+       __u32           pdrop;
+       __u8            Wlog;         /* log(W)               */
+       __u8            Plog;         /* log(P_max/(qth_max-qth_min)) */
+       __u8            Scell_log;    /* cell size for idle damping */
+       __u8            prio;         /* prio of this VQ */
+       __u32           packets;
+       __u32           bytesin;
 };
+
 /* gred setup */
 struct tc_gred_sopt
 {
-       __u32           DPs;
-       __u32           def_DP;
-       __u8            grio;
+       __u32           DPs;
+       __u32           def_DP;
+       __u8            grio;
+       __u8            flags;
+       __u16           pad1;
 };
 
 /* HTB section */
@@ -351,6 +351,7 @@ struct tc_cbq_ovl
 #define        TC_CBQ_OVL_DROP         3
 #define        TC_CBQ_OVL_RCLASSIC     4
        unsigned char   priority2;
+       __u16           pad;
        __u32           penalty;
 };
 
@@ -427,6 +428,8 @@ enum
        TCA_NETEM_UNSPEC,
        TCA_NETEM_CORR,
        TCA_NETEM_DELAY_DIST,
+       TCA_NETEM_REORDER,
+       TCA_NETEM_CORRUPT,
        __TCA_NETEM_MAX,
 };
 
@@ -437,7 +440,7 @@ struct tc_netem_qopt
        __u32   latency;        /* added delay (us) */
        __u32   limit;          /* fifo limit (packets) */
        __u32   loss;           /* random packet loss (0=none ~0=100%) */
-       __u32   gap;            /* re-ordering gap (0 for delay all) */
+       __u32   gap;            /* re-ordering gap (0 for none) */
        __u32   duplicate;      /* random packet dup  (0=none ~0=100%) */
        __u32   jitter;         /* random jitter in latency (us) */
 };
@@ -449,6 +452,18 @@ struct tc_netem_corr
        __u32   dup_corr;       /* duplicate correlation  */
 };
 
+struct tc_netem_reorder
+{
+       __u32   probability;
+       __u32   correlation;
+};
+
+struct tc_netem_corrupt
+{
+       __u32   probability;
+       __u32   correlation;
+};
+
 #define NETEM_DIST_SCALE       8192
 
 #endif
index 1facfe9..7504618 100644 (file)
@@ -89,10 +89,21 @@ enum {
        RTM_GETANYCAST  = 62,
 #define RTM_GETANYCAST RTM_GETANYCAST
 
-       RTM_MAX,
-#define RTM_MAX                RTM_MAX
+       RTM_NEWNEIGHTBL = 64,
+#define RTM_NEWNEIGHTBL        RTM_NEWNEIGHTBL
+       RTM_GETNEIGHTBL = 66,
+#define RTM_GETNEIGHTBL        RTM_GETNEIGHTBL
+       RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL        RTM_SETNEIGHTBL
+
+       __RTM_MAX,
+#define RTM_MAX                (((__RTM_MAX + 3) & ~3) - 1)
 };
 
+#define RTM_NR_MSGTYPES        (RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES        (RTM_NR_MSGTYPES >> 2)
+#define RTM_FAM(cmd)   (((cmd) - RTM_BASE) >> 2)
+
 /* 
    Generic structure for encapsulation of optional route information.
    It is reminiscent of sockaddr, but with sa_family replaced
@@ -188,6 +199,7 @@ enum
 #define RTPROT_BIRD    12      /* BIRD */
 #define RTPROT_DNROUTED        13      /* DECnet routing daemon */
 #define RTPROT_XORP    14      /* XORP */
+#define RTPROT_NTK     15      /* Netsukuku */
 
 /* rtm_scope
 
@@ -250,6 +262,7 @@ enum rtattr_type_t
        RTA_FLOW,
        RTA_CACHEINFO,
        RTA_SESSION,
+       RTA_MP_ALGO,
        __RTA_MAX
 };
 
@@ -346,10 +359,13 @@ enum
 #define RTAX_FEATURE_ECN       0x00000001
 #define RTAX_FEATURE_SACK      0x00000002
 #define RTAX_FEATURE_TIMESTAMP 0x00000004
+#define RTAX_FEATURE_ALLFRAG   0x00000008
 
 struct rta_session
 {
        __u8    proto;
+       __u8    pad1;
+       __u16   pad2;
 
        union {
                struct {
@@ -446,6 +462,7 @@ enum
        NDA_DST,
        NDA_LLADDR,
        NDA_CACHEINFO,
+       NDA_PROBES,
        __NDA_MAX
 };
 
@@ -486,6 +503,106 @@ struct nda_cacheinfo
        __u32           ndm_refcnt;
 };
 
+
+/*****************************************************************
+ *             Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats
+{
+       __u64           ndts_allocs;
+       __u64           ndts_destroys;
+       __u64           ndts_hash_grows;
+       __u64           ndts_res_failed;
+       __u64           ndts_lookups;
+       __u64           ndts_hits;
+       __u64           ndts_rcv_probes_mcast;
+       __u64           ndts_rcv_probes_ucast;
+       __u64           ndts_periodic_gc_runs;
+       __u64           ndts_forced_gc_runs;
+};
+
+enum {
+       NDTPA_UNSPEC,
+       NDTPA_IFINDEX,                  /* u32, unchangeable */
+       NDTPA_REFCNT,                   /* u32, read-only */
+       NDTPA_REACHABLE_TIME,           /* u64, read-only, msecs */
+       NDTPA_BASE_REACHABLE_TIME,      /* u64, msecs */
+       NDTPA_RETRANS_TIME,             /* u64, msecs */
+       NDTPA_GC_STALETIME,             /* u64, msecs */
+       NDTPA_DELAY_PROBE_TIME,         /* u64, msecs */
+       NDTPA_QUEUE_LEN,                /* u32 */
+       NDTPA_APP_PROBES,               /* u32 */
+       NDTPA_UCAST_PROBES,             /* u32 */
+       NDTPA_MCAST_PROBES,             /* u32 */
+       NDTPA_ANYCAST_DELAY,            /* u64, msecs */
+       NDTPA_PROXY_DELAY,              /* u64, msecs */
+       NDTPA_PROXY_QLEN,               /* u32 */
+       NDTPA_LOCKTIME,                 /* u64, msecs */
+       __NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg
+{
+       __u8            ndtm_family;
+       __u8            ndtm_pad1;
+       __u16           ndtm_pad2;
+};
+
+struct ndt_config
+{
+       __u16           ndtc_key_len;
+       __u16           ndtc_entry_size;
+       __u32           ndtc_entries;
+       __u32           ndtc_last_flush;        /* delta to now in msecs */
+       __u32           ndtc_last_rand;         /* delta to now in msecs */
+       __u32           ndtc_hash_rnd;
+       __u32           ndtc_hash_mask;
+       __u32           ndtc_hash_chain_gc;
+       __u32           ndtc_proxy_qlen;
+};
+
+enum {
+       NDTA_UNSPEC,
+       NDTA_NAME,                      /* char *, unchangeable */
+       NDTA_THRESH1,                   /* u32 */
+       NDTA_THRESH2,                   /* u32 */
+       NDTA_THRESH3,                   /* u32 */
+       NDTA_CONFIG,                    /* struct ndt_config, read-only */
+       NDTA_PARMS,                     /* nested TLV NDTPA_* */
+       NDTA_STATS,                     /* struct ndt_stats, read-only */
+       NDTA_GC_INTERVAL,               /* u64, msecs */
+       __NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \
+                    NLMSG_ALIGN(sizeof(struct ndtmsg))))
+#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg))
+
+
 /****
  *             General form of address family dependent message.
  ****/
@@ -521,10 +638,13 @@ struct ifinfomsg
 struct prefixmsg
 {
        unsigned char   prefix_family;
+       unsigned char   prefix_pad1;
+       unsigned short  prefix_pad2;
        int             prefix_ifindex;
        unsigned char   prefix_type;
        unsigned char   prefix_len;
        unsigned char   prefix_flags;
+       unsigned char   prefix_pad3;
 };
 
 enum 
@@ -699,7 +819,6 @@ enum
        TCA_RATE,
        TCA_FCNT,
        TCA_STATS2,
-       TCA_ACT_STATS,
        __TCA_MAX
 };
 
@@ -708,9 +827,7 @@ enum
 #define TCA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
 #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
 
-
-/* RTnetlink multicast groups */
-
+/* RTnetlink multicast groups - backwards compatibility for userspace */
 #define RTMGRP_LINK            1
 #define RTMGRP_NOTIFY          2
 #define RTMGRP_NEIGH           4
@@ -730,6 +847,46 @@ enum
 
 #define RTMGRP_IPV6_PREFIX     0x20000
 
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+       RTNLGRP_NONE,
+#define RTNLGRP_NONE           RTNLGRP_NONE
+       RTNLGRP_LINK,
+#define RTNLGRP_LINK           RTNLGRP_LINK
+       RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY         RTNLGRP_NOTIFY
+       RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH          RTNLGRP_NEIGH
+       RTNLGRP_TC,
+#define RTNLGRP_TC             RTNLGRP_TC
+       RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR    RTNLGRP_IPV4_IFADDR
+       RTNLGRP_IPV4_MROUTE,
+#define        RTNLGRP_IPV4_MROUTE     RTNLGRP_IPV4_MROUTE
+       RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE     RTNLGRP_IPV4_ROUTE
+       RTNLGRP_NOP1,
+       RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR    RTNLGRP_IPV6_IFADDR
+       RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE    RTNLGRP_IPV6_MROUTE
+       RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE     RTNLGRP_IPV6_ROUTE
+       RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO    RTNLGRP_IPV6_IFINFO
+       RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR  RTNLGRP_DECnet_IFADDR
+       RTNLGRP_NOP2,
+       RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE   RTNLGRP_DECnet_ROUTE
+       RTNLGRP_NOP3,
+       RTNLGRP_NOP4,
+       RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX    RTNLGRP_IPV6_PREFIX
+       __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX    (__RTNLGRP_MAX - 1)
+
 /* TC action piece */
 struct tcamsg
 {
diff --git a/include/linux/socket.h b/include/linux/socket.h
new file mode 100644 (file)
index 0000000..dc979c0
--- /dev/null
@@ -0,0 +1 @@
+#include <sys/socket.h>
diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h
new file mode 100644 (file)
index 0000000..964f473
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef __LINUX_TC_DEF_H
+#define __LINUX_TC_DEF_H
+
+#include <linux/pkt_cls.h>
+
+struct tc_defact
+{
+       tc_gen;
+};
+                                                                                
+enum
+{
+       TCA_DEF_UNSPEC,
+       TCA_DEF_TM,
+       TCA_DEF_PARMS,
+       TCA_DEF_DATA,
+       __TCA_DEF_MAX
+};
+#define TCA_DEF_MAX (__TCA_DEF_MAX - 1)
+
+#endif
diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/linux/tc_ematch/tc_em_cmp.h
new file mode 100644 (file)
index 0000000..c7f4d43
--- /dev/null
@@ -0,0 +1,26 @@
+#ifndef __LINUX_TC_EM_CMP_H
+#define __LINUX_TC_EM_CMP_H
+
+#include <linux/pkt_cls.h>
+
+struct tcf_em_cmp
+{
+       __u32           val;
+       __u32           mask;
+       __u16           off;
+       __u8            align:4;
+       __u8            flags:4;
+       __u8            layer:4;
+       __u8            opnd:4;
+};
+
+enum
+{
+       TCF_EM_ALIGN_U8  = 1,
+       TCF_EM_ALIGN_U16 = 2,
+       TCF_EM_ALIGN_U32 = 4
+};
+
+#define TCF_EM_CMP_TRANS       1
+
+#endif
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
new file mode 100644 (file)
index 0000000..e21937c
--- /dev/null
@@ -0,0 +1,94 @@
+#ifndef __LINUX_TC_EM_META_H
+#define __LINUX_TC_EM_META_H
+
+#include <linux/pkt_cls.h>
+
+enum
+{
+       TCA_EM_META_UNSPEC,
+       TCA_EM_META_HDR,
+       TCA_EM_META_LVALUE,
+       TCA_EM_META_RVALUE,
+       __TCA_EM_META_MAX
+};
+#define TCA_EM_META_MAX (__TCA_EM_META_MAX - 1)
+
+struct tcf_meta_val
+{
+       __u16                   kind;
+       __u8                    shift;
+       __u8                    op;
+};
+
+#define TCF_META_TYPE_MASK     (0xf << 12)
+#define TCF_META_TYPE(kind)    (((kind) & TCF_META_TYPE_MASK) >> 12)
+#define TCF_META_ID_MASK       0x7ff
+#define TCF_META_ID(kind)      ((kind) & TCF_META_ID_MASK)
+
+enum
+{
+       TCF_META_TYPE_VAR,
+       TCF_META_TYPE_INT,
+       __TCF_META_TYPE_MAX
+};
+#define TCF_META_TYPE_MAX (__TCF_META_TYPE_MAX - 1)
+
+enum
+{
+       TCF_META_ID_VALUE,
+       TCF_META_ID_RANDOM,
+       TCF_META_ID_LOADAVG_0,
+       TCF_META_ID_LOADAVG_1,
+       TCF_META_ID_LOADAVG_2,
+       TCF_META_ID_DEV,
+       TCF_META_ID_PRIORITY,
+       TCF_META_ID_PROTOCOL,
+       TCF_META_ID_PKTTYPE,
+       TCF_META_ID_PKTLEN,
+       TCF_META_ID_DATALEN,
+       TCF_META_ID_MACLEN,
+       TCF_META_ID_NFMARK,
+       TCF_META_ID_TCINDEX,
+       TCF_META_ID_RTCLASSID,
+       TCF_META_ID_RTIIF,
+       TCF_META_ID_SK_FAMILY,
+       TCF_META_ID_SK_STATE,
+       TCF_META_ID_SK_REUSE,
+       TCF_META_ID_SK_BOUND_IF,
+       TCF_META_ID_SK_REFCNT,
+       TCF_META_ID_SK_SHUTDOWN,
+       TCF_META_ID_SK_PROTO,
+       TCF_META_ID_SK_TYPE,
+       TCF_META_ID_SK_RCVBUF,
+       TCF_META_ID_SK_RMEM_ALLOC,
+       TCF_META_ID_SK_WMEM_ALLOC,
+       TCF_META_ID_SK_OMEM_ALLOC,
+       TCF_META_ID_SK_WMEM_QUEUED,
+       TCF_META_ID_SK_RCV_QLEN,
+       TCF_META_ID_SK_SND_QLEN,
+       TCF_META_ID_SK_ERR_QLEN,
+       TCF_META_ID_SK_FORWARD_ALLOCS,
+       TCF_META_ID_SK_SNDBUF,
+       TCF_META_ID_SK_ALLOCS,
+       TCF_META_ID_SK_ROUTE_CAPS,
+       TCF_META_ID_SK_HASH,
+       TCF_META_ID_SK_LINGERTIME,
+       TCF_META_ID_SK_ACK_BACKLOG,
+       TCF_META_ID_SK_MAX_ACK_BACKLOG,
+       TCF_META_ID_SK_PRIO,
+       TCF_META_ID_SK_RCVLOWAT,
+       TCF_META_ID_SK_RCVTIMEO,
+       TCF_META_ID_SK_SNDTIMEO,
+       TCF_META_ID_SK_SENDMSG_OFF,
+       TCF_META_ID_SK_WRITE_PENDING,
+       __TCF_META_ID_MAX
+};
+#define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1)
+
+struct tcf_meta_hdr
+{
+       struct tcf_meta_val     left;
+       struct tcf_meta_val     right;
+};
+
+#endif
diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/linux/tc_ematch/tc_em_nbyte.h
new file mode 100644 (file)
index 0000000..f19d1f5
--- /dev/null
@@ -0,0 +1,13 @@
+#ifndef __LINUX_TC_EM_NBYTE_H
+#define __LINUX_TC_EM_NBYTE_H
+
+#include <linux/pkt_cls.h>
+
+struct tcf_em_nbyte
+{
+       __u16           off;
+       __u16           len:12;
+       __u8            layer:4;
+};
+
+#endif
index 9703d6b..b4d74eb 100644 (file)
@@ -55,40 +55,6 @@ struct tcphdr {
        __u16   urg_ptr;
 };
 
-
-enum {
-  TCP_ESTABLISHED = 1,
-  TCP_SYN_SENT,
-  TCP_SYN_RECV,
-  TCP_FIN_WAIT1,
-  TCP_FIN_WAIT2,
-  TCP_TIME_WAIT,
-  TCP_CLOSE,
-  TCP_CLOSE_WAIT,
-  TCP_LAST_ACK,
-  TCP_LISTEN,
-  TCP_CLOSING,  /* now a valid state */
-
-  TCP_MAX_STATES /* Leave at the end! */
-};
-
-#define TCP_STATE_MASK 0xF
-#define TCP_ACTION_FIN (1 << 7)
-
-enum {
-  TCPF_ESTABLISHED = (1 << 1),
-  TCPF_SYN_SENT  = (1 << 2),
-  TCPF_SYN_RECV  = (1 << 3),
-  TCPF_FIN_WAIT1 = (1 << 4),
-  TCPF_FIN_WAIT2 = (1 << 5),
-  TCPF_TIME_WAIT = (1 << 6),
-  TCPF_CLOSE     = (1 << 7),
-  TCPF_CLOSE_WAIT = (1 << 8),
-  TCPF_LAST_ACK  = (1 << 9),
-  TCPF_LISTEN    = (1 << 10),
-  TCPF_CLOSING   = (1 << 11) 
-};
-
 /*
  *     The union cast uses a gcc extension to avoid aliasing problems
  *  (union is compatible to any of its members)
@@ -127,6 +93,7 @@ enum {
 #define TCP_WINDOW_CLAMP       10      /* Bound advertised window */
 #define TCP_INFO               11      /* Information about this connection. */
 #define TCP_QUICKACK           12      /* Block/reenable quick acks */
+#define TCP_CONGESTION         13      /* Congestion control algorithm */
 
 #define TCPI_OPT_TIMESTAMPS    1
 #define TCPI_OPT_SACK          2
index f0df02a..f2bbf4b 100644 (file)
@@ -27,6 +27,22 @@ struct xfrm_id
        __u8            proto;
 };
 
+struct xfrm_sec_ctx {
+       __u8    ctx_doi;
+       __u8    ctx_alg;
+       __u16   ctx_len;
+       __u32   ctx_sid;
+       char    ctx_str[0];
+};
+
+/* Security Context Domains of Interpretation */
+#define XFRM_SC_DOI_RESERVED 0
+#define XFRM_SC_DOI_LSM 1
+
+/* Security Context Algorithms */
+#define XFRM_SC_ALG_RESERVED 0
+#define XFRM_SC_ALG_SELINUX 1
+
 /* Selector, used as selector both on policy rules (SPD) and SAs. */
 
 struct xfrm_selector
@@ -140,7 +156,22 @@ enum {
        XFRM_MSG_FLUSHPOLICY,
 #define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY
 
-       XFRM_MSG_MAX
+       __XFRM_MSG_MAX
+};
+#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1)
+
+#define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)
+
+/*
+ * Generic LSM security context for comunicating to user space
+ * NOTE: Same format as sadb_x_sec_ctx
+ */
+struct xfrm_user_sec_ctx {
+       __u16                   len;
+       __u16                   exttype;
+       __u8                    ctx_alg;  /* LSMs: e.g., selinux == 1 */
+       __u8                    ctx_doi;
+       __u16                   ctx_len;
 };
 
 struct xfrm_user_tmpl {
@@ -171,6 +202,9 @@ enum xfrm_attr_type_t {
        XFRMA_ALG_COMP,         /* struct xfrm_algo */
        XFRMA_ENCAP,            /* struct xfrm_algo + struct xfrm_encap_tmpl */
        XFRMA_TMPL,             /* 1 or more struct xfrm_user_tmpl */
+       XFRMA_SA,
+       XFRMA_POLICY,
+       XFRMA_SEC_CTX,          /* struct xfrm_sec_ctx */
        __XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
@@ -191,6 +225,7 @@ struct xfrm_usersa_info {
        __u8                            flags;
 #define XFRM_STATE_NOECN       1
 #define XFRM_STATE_DECAP_DSCP  2
+#define XFRM_STATE_NOPMTUDISC  4
 };
 
 struct xfrm_usersa_id {
@@ -252,7 +287,25 @@ struct xfrm_usersa_flush {
        __u8                            proto;
 };
 
+/* backwards compatibility for userspace */
 #define XFRMGRP_ACQUIRE                1
 #define XFRMGRP_EXPIRE         2
+#define XFRMGRP_SA             4
+#define XFRMGRP_POLICY         8
+
+enum xfrm_nlgroups {
+       XFRMNLGRP_NONE,
+#define XFRMNLGRP_NONE         XFRMNLGRP_NONE
+       XFRMNLGRP_ACQUIRE,
+#define XFRMNLGRP_ACQUIRE      XFRMNLGRP_ACQUIRE
+       XFRMNLGRP_EXPIRE,
+#define XFRMNLGRP_EXPIRE       XFRMNLGRP_EXPIRE
+       XFRMNLGRP_SA,
+#define XFRMNLGRP_SA           XFRMNLGRP_SA
+       XFRMNLGRP_POLICY,
+#define XFRMNLGRP_POLICY       XFRMNLGRP_POLICY
+       __XFRMNLGRP_MAX
+};
+#define XFRMNLGRP_MAX  (__XFRMNLGRP_MAX - 1)
 
 #endif /* _LINUX_XFRM_H */
index 3bff5e9..d085813 100644 (file)
@@ -4,10 +4,10 @@
 extern int ll_remember_index(const struct sockaddr_nl *who, 
                             struct nlmsghdr *n, void *arg);
 extern int ll_init_map(struct rtnl_handle *rth);
-extern int ll_name_to_index(const char *name);
-extern const char *ll_index_to_name(int idx);
-extern const char *ll_idx_n2a(int idx, char *buf);
-extern int ll_index_to_type(int idx);
-extern unsigned ll_index_to_flags(int idx);
+extern unsigned ll_name_to_index(const char *name);
+extern const char *ll_index_to_name(unsigned idx);
+extern const char *ll_idx_n2a(unsigned idx, char *buf);
+extern int ll_index_to_type(unsigned idx);
+extern unsigned ll_index_to_flags(unsigned idx);
 
 #endif /* __LL_MAP_H__ */
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
new file mode 100644 (file)
index 0000000..b0b6459
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * INET                An implementation of the TCP/IP protocol suite for the LINUX
+ *             operating system.  INET is implemented using the  BSD Socket
+ *             interface as the means of communication with the user level.
+ *
+ *             Definitions for the TCP protocol sk_state field.
+ *
+ *             This program is free software; you can redistribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_TCP_STATES_H
+#define _LINUX_TCP_STATES_H
+
+enum {
+       TCP_ESTABLISHED = 1,
+       TCP_SYN_SENT,
+       TCP_SYN_RECV,
+       TCP_FIN_WAIT1,
+       TCP_FIN_WAIT2,
+       TCP_TIME_WAIT,
+       TCP_CLOSE,
+       TCP_CLOSE_WAIT,
+       TCP_LAST_ACK,
+       TCP_LISTEN,
+       TCP_CLOSING,    /* Now a valid state */
+
+       TCP_MAX_STATES  /* Leave at the end! */
+};
+
+#define TCP_STATE_MASK 0xF
+
+#define TCP_ACTION_FIN (1 << 7)
+
+enum {
+       TCPF_ESTABLISHED = (1 << 1),
+       TCPF_SYN_SENT    = (1 << 2),
+       TCPF_SYN_RECV    = (1 << 3),
+       TCPF_FIN_WAIT1   = (1 << 4),
+       TCPF_FIN_WAIT2   = (1 << 5),
+       TCPF_TIME_WAIT   = (1 << 6),
+       TCPF_CLOSE       = (1 << 7),
+       TCPF_CLOSE_WAIT  = (1 << 8),
+       TCPF_LAST_ACK    = (1 << 9),
+       TCPF_LISTEN      = (1 << 10),
+       TCPF_CLOSING     = (1 << 11) 
+};
+
+#endif /* _LINUX_TCP_STATES_H */
index 249231e..2d9ef10 100644 (file)
@@ -21,7 +21,7 @@ int inet_proto_a2n(char *buf);
 const char * ll_type_n2a(int type, char *buf, int len);
 
 const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen);
-int ll_addr_a2n(unsigned char *lladdr, int len, char *arg);
+int ll_addr_a2n(char *lladdr, int len, char *arg);
 
 const char * ll_proto_n2a(unsigned short id, char *buf, int len);
 int ll_proto_a2n(unsigned short *id, char *buf);
index 906e394..0f1d1f6 100644 (file)
@@ -14,6 +14,7 @@ extern int show_details;
 extern int show_raw;
 extern int resolve_hosts;
 extern int oneline;
+extern int timestamp;
 extern char * _SL_;
 
 #ifndef IPPROTO_ESP
@@ -43,9 +44,12 @@ typedef struct
        __u8 family;
        __u8 bytelen;
        __s16 bitlen;
+       __u32 flags;
        __u32 data[4];
 } inet_prefix;
 
+#define PREFIXLEN_SPECIFIED 1
+
 #define DN_MAXADDL 20
 #ifndef AF_DECnet
 #define AF_DECnet 12
@@ -82,8 +86,8 @@ extern int get_s16(__s16 *val, const char *arg, int base);
 extern int get_u8(__u8 *val, const char *arg, int base);
 extern int get_s8(__s8 *val, const char *arg, int base);
 
-extern __u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen);
-extern __u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen);
+extern char* hexstring_n2a(const __u8 *str, int len, char *buf, int blen);
+extern __u8* hexstring_a2n(const char *str, __u8 *buf, int blen);
 
 extern const char *format_host(int af, int len, const void *addr, 
                               char *buf, int buflen);
@@ -123,4 +127,12 @@ static __inline__ int get_user_hz(void)
        return __iproute2_user_hz_internal;
 }
 
+int print_timestamp(FILE *fp);
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+extern int cmdlineno;
+extern size_t getcmdline(char **line, size_t *len, FILE *in);
+extern int makeargs(char *line, char *argv[], int maxargs);
+
 #endif /* __UTILS_H__ */
index bcc419b..3383c72 100644 (file)
@@ -1,7 +1,7 @@
 IPOBJ=ip.o ipaddress.o iproute.o iprule.o \
-    rtm_map.o iptunnel.o ipneigh.o iplink.o \
+    rtm_map.o iptunnel.o ipneigh.o ipntable.o iplink.o \
     ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o \
-    ipxfrm.o xfrm_state.o xfrm_policy.o
+    ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o
 
 RTMONOBJ=rtmon.o
 
diff --git a/ip/ip.c b/ip/ip.c
index 6358ec4..c29d2f3 100644 (file)
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -22,6 +22,7 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
 #include <string.h>
+#include <errno.h>
 
 #include "SNAPSHOT.h"
 #include "utils.h"
@@ -31,7 +32,11 @@ int preferred_family = AF_UNSPEC;
 int show_stats = 0;
 int resolve_hosts = 0;
 int oneline = 0;
+int timestamp = 0;
 char * _SL_ = NULL;
+char *batch_file = NULL;
+int force = 0;
+struct rtnl_handle rth;
 
 static void usage(void) __attribute__((noreturn));
 
@@ -39,13 +44,98 @@ static void usage(void)
 {
        fprintf(stderr,
 "Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n"
-"where  OBJECT := { link | addr | route | rule | neigh | tunnel |\n"
+"       ip [ -force ] [-batch filename\n"
+"where  OBJECT := { link | addr | route | rule | neigh | ntable | tunnel |\n"
 "                   maddr | mroute | monitor | xfrm }\n"
 "       OPTIONS := { -V[ersion] | -s[tatistics] | -r[esolve] |\n"
-"                    -f[amily] { inet | inet6 | ipx | dnet | link } | -o[neline] }\n");
+"                    -f[amily] { inet | inet6 | ipx | dnet | link } |\n"
+"                    -o[neline] | -t[imestamp] }\n");
        exit(-1);
 }
 
+static int do_help(int argc, char **argv)
+{
+       usage();
+}
+
+static const struct cmd {
+       const char *cmd;
+       int (*func)(int argc, char **argv);
+} cmds[] = {
+       { "address",    do_ipaddr },
+       { "maddress",   do_multiaddr },
+       { "route",      do_iproute },
+       { "rule",       do_iprule },
+       { "neighbor",   do_ipneigh },
+       { "neighbour",  do_ipneigh },
+       { "ntable",     do_ipntable },
+       { "ntbl",       do_ipntable },
+       { "link",       do_iplink },
+       { "tunnel",     do_iptunnel },
+       { "tunl",       do_iptunnel },
+       { "monitor",    do_ipmonitor },
+       { "xfrm",       do_xfrm },
+       { "mroute",     do_multiroute },
+       { "help",       do_help },
+       { 0 }
+};
+
+static int do_cmd(const char *argv0, int argc, char **argv)
+{
+       const struct cmd *c;
+
+       for (c = cmds; c->cmd; ++c) {
+               if (matches(argv0, c->cmd) == 0)
+                       return c->func(argc-1, argv+1);
+       }
+
+       fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv0);
+       return -1;
+}
+
+static int batch(const char *name)
+{
+       char *line = NULL;
+       size_t len = 0;
+       int ret = 0;
+       int lineno = 0;
+
+       if (name && strcmp(name, "-") != 0) {
+               if (freopen(name, "r", stdin) == NULL) {
+                       fprintf(stderr, "Cannot open file \"%s\" for reading: %s=n",
+                               name, strerror(errno));
+                       return -1;
+               }
+       }
+
+       if (rtnl_open(&rth, 0) < 0) {
+               fprintf(stderr, "Cannot open rtnetlink\n");
+               return -1;
+       }
+
+       while (getcmdline(&line, &len, stdin) != -1) {
+               char *largv[100];
+               int largc;
+
+               largc = makeargs(line, largv, 100);
+               if (largc == 0)
+                       continue;       /* blank line */
+
+               if (do_cmd(largv[0], largc, largv)) {
+                       fprintf(stderr, "Command failed %s:%d\n", name, lineno);
+                       ret = 1;
+                       if (!force)
+                               break;
+               }
+       }
+       if (line)
+               free(line);
+
+       rtnl_close(&rth);
+       return ret;
+}
+
+
 int main(int argc, char **argv)
 {
        char *basename;
@@ -102,6 +192,8 @@ int main(int argc, char **argv)
                        ++resolve_hosts;
                } else if (matches(opt, "-oneline") == 0) {
                        ++oneline;
+               } else if (matches(opt, "-timestamp") == 0) {
+                       ++timestamp;
 #if 0
                } else if (matches(opt, "-numeric") == 0) {
                        rtnl_names_numeric++;
@@ -109,6 +201,14 @@ int main(int argc, char **argv)
                } else if (matches(opt, "-Version") == 0) {
                        printf("ip utility, iproute2-ss%s\n", SNAPSHOT);
                        exit(0);
+               } else if (matches(opt, "-force") == 0) {
+                       ++force;
+               } else if (matches(opt, "-batch") == 0) {
+                       argc--;
+                       argv++;
+                       if (argc <= 1)
+                               usage();
+                       batch_file = argv[1];
                } else if (matches(opt, "-help") == 0) {
                        usage();
                } else {
@@ -120,52 +220,18 @@ int main(int argc, char **argv)
 
        _SL_ = oneline ? "\\" : "\n" ;
 
-       if (strcmp(basename, "ipaddr") == 0)
-               return do_ipaddr(argc-1, argv+1);
-       if (strcmp(basename, "ipmaddr") == 0)
-               return do_multiaddr(argc-1, argv+1);
-       if (strcmp(basename, "iproute") == 0)
-               return do_iproute(argc-1, argv+1);
-       if (strcmp(basename, "iprule") == 0)
-               return do_iprule(argc-1, argv+1);
-       if (strcmp(basename, "ipneigh") == 0)
-               return do_ipneigh(argc-1, argv+1);
-       if (strcmp(basename, "iplink") == 0)
-               return do_iplink(argc-1, argv+1);
-       if (strcmp(basename, "iptunnel") == 0)
-               return do_iptunnel(argc-1, argv+1);
-       if (strcmp(basename, "ipmonitor") == 0)
-               return do_ipmonitor(argc-1, argv+1);
-       if (strcmp(basename, "ipxfrm") == 0)
-               return do_xfrm(argc-1, argv+1);
-
-       if (argc > 1) {
-               if (matches(argv[1], "address") == 0)
-                       return do_ipaddr(argc-2, argv+2);
-               if (matches(argv[1], "maddress") == 0)
-                       return do_multiaddr(argc-2, argv+2);
-               if (matches(argv[1], "route") == 0)
-                       return do_iproute(argc-2, argv+2);
-               if (matches(argv[1], "rule") == 0)
-                       return do_iprule(argc-2, argv+2);
-               if (matches(argv[1], "mroute") == 0)
-                       return do_multiroute(argc-2, argv+2);
-               if (matches(argv[1], "neighbor") == 0 ||
-                   matches(argv[1], "neighbour") == 0)
-                       return do_ipneigh(argc-2, argv+2);
-               if (matches(argv[1], "link") == 0)
-                       return do_iplink(argc-2, argv+2);
-               if (matches(argv[1], "tunnel") == 0 ||
-                   strcmp(argv[1], "tunl") == 0)
-                       return do_iptunnel(argc-2, argv+2);
-               if (matches(argv[1], "monitor") == 0)
-                       return do_ipmonitor(argc-2, argv+2);
-               if (matches(argv[1], "xfrm") == 0)
-                       return do_xfrm(argc-2, argv+2);
-               if (matches(argv[1], "help") == 0)
-                       usage();
-               fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv[1]);
-               exit(-1);
-       }
+       if (batch_file) 
+               return batch(batch_file);
+               
+       if (rtnl_open(&rth, 0) < 0)
+               exit(1);
+
+       if (strlen(basename) > 2) 
+               return do_cmd(basename+2, argc, argv);
+
+       if (argc > 1) 
+               return do_cmd(argv[1], argc-1, argv+1);
+
+       rtnl_close(&rth);
        usage();
 }
index 688d384..1fe4a69 100644 (file)
@@ -6,6 +6,8 @@ extern int print_addrinfo(const struct sockaddr_nl *who,
                          void *arg);
 extern int print_neigh(const struct sockaddr_nl *who,
                       struct nlmsghdr *n, void *arg);
+extern int print_ntable(const struct sockaddr_nl *who,
+                       struct nlmsghdr *n, void *arg);
 extern int ipaddr_list(int argc, char **argv);
 extern int ipaddr_list_link(int argc, char **argv);
 extern int iproute_monitor(int argc, char **argv);
@@ -13,6 +15,7 @@ extern void iplink_usage(void) __attribute__((noreturn));
 extern void iproute_reset_filter(void);
 extern void ipaddr_reset_filter(int);
 extern void ipneigh_reset_filter(void);
+extern void ipntable_reset_filter(void);
 extern int print_route(const struct sockaddr_nl *who, 
                       struct nlmsghdr *n, void *arg);
 extern int print_prefix(const struct sockaddr_nl *who,
@@ -21,9 +24,12 @@ extern int do_ipaddr(int argc, char **argv);
 extern int do_iproute(int argc, char **argv);
 extern int do_iprule(int argc, char **argv);
 extern int do_ipneigh(int argc, char **argv);
+extern int do_ipntable(int argc, char **argv);
 extern int do_iptunnel(int argc, char **argv);
 extern int do_iplink(int argc, char **argv);
 extern int do_ipmonitor(int argc, char **argv);
 extern int do_multiaddr(int argc, char **argv);
 extern int do_multiroute(int argc, char **argv);
 extern int do_xfrm(int argc, char **argv);
+
+extern struct rtnl_handle rth;
index 92f0089..cb164c0 100644 (file)
@@ -49,7 +49,6 @@ static struct
        char *flushb;
        int flushp;
        int flushe;
-       struct rtnl_handle *rth;
 } filter;
 
 static int do_link;
@@ -269,7 +268,7 @@ int print_linkinfo(const struct sockaddr_nl *who,
 
 static int flush_update(void)
 {
-       if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
+       if (rtnl_send(&rth, filter.flushb, filter.flushp) < 0) {
                perror("Failed to send flush request\n");
                return -1;
        }
@@ -345,7 +344,7 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
                memcpy(fn, n, n->nlmsg_len);
                fn->nlmsg_type = RTM_DELADDR;
                fn->nlmsg_flags = NLM_F_REQUEST;
-               fn->nlmsg_seq = ++filter.rth->seq;
+               fn->nlmsg_seq = ++rth.seq;
                filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
                filter.flushed++;
                if (show_stats < 2)
@@ -495,8 +494,7 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush)
 {
        struct nlmsg_list *linfo = NULL;
        struct nlmsg_list *ainfo = NULL;
-       struct nlmsg_list *l;
-       struct rtnl_handle rth;
+       struct nlmsg_list *l, *n;
        char *filter_dev = NULL;
        int no_link = 0;
 
@@ -524,7 +522,7 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush)
                        if (filter.family == AF_UNSPEC)
                                filter.family = filter.pfx.family;
                } else if (strcmp(*argv, "scope") == 0) {
-                       int scope = 0;
+                       unsigned scope = 0;
                        NEXT_ARG();
                        filter.scopemask = -1;
                        if (rtnl_rtscope_a2n(&scope, *argv)) {
@@ -570,9 +568,6 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush)
                argv++; argc--;
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETLINK) < 0) {
                perror("Cannot send dump request");
                exit(1);
@@ -598,7 +593,6 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush)
                filter.flushb = flushb;
                filter.flushp = 0;
                filter.flushe = sizeof(flushb);
-               filter.rth = &rth;
 
                for (;;) {
                        if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) {
@@ -620,7 +614,8 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush)
                        }
                        round++;
                        if (flush_update() < 0)
-                               exit(1);
+                               return 1;
+
                        if (show_stats) {
                                printf("\n*** Round %d, deleting %d addresses ***\n", round, filter.flushed);
                                fflush(stdout);
@@ -700,16 +695,18 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush)
                }
        }
 
-       for (l=linfo; l; l = l->next) {
+       for (l=linfo; l; l = n) {
+               n = l->next;
                if (no_link || print_linkinfo(NULL, &l->h, stdout) == 0) {
                        struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
                        if (filter.family != AF_PACKET)
                                print_selected_addrinfo(ifi->ifi_index, ainfo, stdout);
                }
                fflush(stdout);
+               free(l);
        }
 
-       exit(0);
+       return 0;
 }
 
 int ipaddr_list_link(int argc, char **argv)
@@ -736,7 +733,6 @@ int default_scope(inet_prefix *lcl)
 
 int ipaddr_modify(int cmd, int argc, char **argv)
 {
-       struct rtnl_handle rth;
        struct {
                struct nlmsghdr         n;
                struct ifaddrmsg        ifa;
@@ -744,6 +740,7 @@ int ipaddr_modify(int cmd, int argc, char **argv)
        } req;
        char  *d = NULL;
        char  *l = NULL;
+       char  *lcl_arg = NULL;
        inet_prefix lcl;
        inet_prefix peer;
        int local_len = 0;
@@ -800,7 +797,7 @@ int ipaddr_modify(int cmd, int argc, char **argv)
                        addattr_l(&req.n, sizeof(req), IFA_ANYCAST, &addr.data, addr.bytelen);
                        any_len = addr.bytelen;
                } else if (strcmp(*argv, "scope") == 0) {
-                       int scope = 0;
+                       unsigned scope = 0;
                        NEXT_ARG();
                        if (rtnl_rtscope_a2n(&scope, *argv))
                                invarg(*argv, "invalid scope value.");
@@ -821,6 +818,7 @@ int ipaddr_modify(int cmd, int argc, char **argv)
                                usage();
                        if (local_len)
                                duparg2("local", *argv);
+                       lcl_arg = *argv;
                        get_prefix(&lcl, *argv, req.ifa.ifa_family);
                        if (req.ifa.ifa_family == AF_UNSPEC)
                                req.ifa.ifa_family = lcl.family;
@@ -838,9 +836,17 @@ int ipaddr_modify(int cmd, int argc, char **argv)
                exit(1);
        }
 
-       if (peer_len == 0 && local_len && cmd != RTM_DELADDR) {
-               peer = lcl;
-               addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &lcl.data, lcl.bytelen);
+       if (peer_len == 0 && local_len) {
+               if (cmd == RTM_DELADDR && lcl.family == AF_INET && !(lcl.flags & PREFIXLEN_SPECIFIED)) {
+                       fprintf(stderr,
+                           "Warning: Executing wildcard deletion to stay compatible with old scripts.\n" \
+                           "         Explicitly specify the prefix length (%s/%d) to avoid this warning.\n" \
+                           "         This special behaviour is likely to disappear in further releases,\n" \
+                           "         fix your scripts!\n", lcl_arg, local_len*8);
+               } else {
+                       peer = lcl;
+                       addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &lcl.data, lcl.bytelen);
+               }
        }
        if (req.ifa.ifa_prefixlen == 0)
                req.ifa.ifa_prefixlen = lcl.bitlen;
@@ -867,9 +873,6 @@ int ipaddr_modify(int cmd, int argc, char **argv)
        if (!scoped && cmd != RTM_DELADDR)
                req.ifa.ifa_scope = default_scope(&lcl);
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        ll_init_map(&rth);
 
        if ((req.ifa.ifa_index = ll_name_to_index(d)) == 0) {
@@ -880,7 +883,7 @@ int ipaddr_modify(int cmd, int argc, char **argv)
        if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
                exit(2);
 
-       exit(0);
+       return 0;
 }
 
 int do_ipaddr(int argc, char **argv)
index 520280e..ffc9f06 100644 (file)
@@ -178,7 +178,7 @@ static int get_address(const char *dev, int *htype)
 {
        struct ifreq ifr;
        struct sockaddr_ll me;
-       int alen;
+       socklen_t alen;
        int s;
 
        s = socket(PF_PACKET, SOCK_DGRAM, 0);
@@ -216,7 +216,8 @@ static int get_address(const char *dev, int *htype)
        return me.sll_halen;
 }
 
-static int parse_address(const char *dev, int hatype, int halen, char *lla, struct ifreq *ifr)
+static int parse_address(const char *dev, int hatype, int halen, 
+               char *lla, struct ifreq *ifr)
 {
        int alen;
 
index 1cdab0b..e6bd625 100644 (file)
@@ -298,7 +298,8 @@ int multiaddr_modify(int cmd, int argc, char **argv)
                                usage();
                        if (ifr.ifr_hwaddr.sa_data[0])
                                duparg("address", *argv);
-                       if (ll_addr_a2n(ifr.ifr_hwaddr.sa_data, 14, *argv) < 0) {
+                       if (ll_addr_a2n(ifr.ifr_hwaddr.sa_data, 
+                                       14, *argv) < 0) {
                                fprintf(stderr, "Error: \"%s\" is not a legal ll address.\n", *argv);
                                exit(1);
                        }
index cdaeb6f..50b6327 100644 (file)
@@ -38,6 +38,9 @@ int accept_msg(const struct sockaddr_nl *who,
 {
        FILE *fp = (FILE*)arg;
 
+       if (timestamp)
+               print_timestamp(fp);
+
        if (n->nlmsg_type == RTM_NEWROUTE || n->nlmsg_type == RTM_DELROUTE) {
                print_route(who, n, arg);
                return 0;
@@ -85,7 +88,6 @@ int accept_msg(const struct sockaddr_nl *who,
 
 int do_ipmonitor(int argc, char **argv)
 {
-       struct rtnl_handle rth;
        char *file = NULL;
        unsigned groups = ~RTMGRP_TC;
        int llink=0;
@@ -93,6 +95,7 @@ int do_ipmonitor(int argc, char **argv)
        int lroute=0;
        int lprefix=0;
 
+       rtnl_close(&rth);
        ipaddr_reset_filter(1);
        iproute_reset_filter();
        ipneigh_reset_filter();
@@ -150,16 +153,15 @@ int do_ipmonitor(int argc, char **argv)
                        perror("Cannot fopen");
                        exit(-1);
                }
-               return rtnl_from_file(fp, accept_msg, (void*)stdout);
+               return rtnl_from_file(fp, accept_msg, stdout);
        }
 
        if (rtnl_open(&rth, groups) < 0)
                exit(1);
-
        ll_init_map(&rth);
 
-       if (rtnl_listen(&rth, accept_msg, (void*)stdout) < 0)
+       if (rtnl_listen(&rth, accept_msg, stdout) < 0)
                exit(2);
 
-       exit(0);
+       return 0;
 }
index b24caee..951a54f 100644 (file)
@@ -42,7 +42,7 @@ static void usage(void)
        exit(-1);
 }
 
-char *viftable[32];
+static char *viftable[32];
 
 struct rtfilter
 {
@@ -50,7 +50,7 @@ struct rtfilter
        inet_prefix msrc;
 } filter;
 
-void read_viftable(void)
+static void read_viftable(void)
 {
        char buf[256];
        FILE *fp = fopen("/proc/net/ip_mr_vif", "r");
@@ -75,7 +75,7 @@ void read_viftable(void)
        fclose(fp);
 }
 
-void read_mroute_list(FILE *ofp)
+static void read_mroute_list(FILE *ofp)
 {
        char buf[256];
        FILE *fp = fopen("/proc/net/ip_mr_cache", "r");
index e8ab291..249ee68 100644 (file)
@@ -31,6 +31,7 @@
 #include "ip_common.h"
 
 #define NUD_VALID      (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
+#define MAX_ROUNDS     10
 
 static struct
 {
@@ -43,7 +44,6 @@ static struct
        char *flushb;
        int flushp;
        int flushe;
-       struct rtnl_handle *rth;
 } filter;
 
 static void usage(void) __attribute__((noreturn));
@@ -88,7 +88,7 @@ int nud_state_a2n(unsigned *state, char *arg)
 
 static int flush_update(void)
 {
-       if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
+       if (rtnl_send(&rth, filter.flushb, filter.flushp) < 0) {
                perror("Failed to send flush request\n");
                return -1;
        }
@@ -99,7 +99,6 @@ static int flush_update(void)
 
 static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
 {
-       struct rtnl_handle rth;
        struct {
                struct nlmsghdr         n;
                struct ndmsg            ndm;
@@ -166,16 +165,13 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
        addattr_l(&req.n, sizeof(req), NDA_DST, &dst.data, dst.bytelen);
 
        if (lla && strcmp(lla, "null")) {
-               __u8 llabuf[16];
+               char llabuf[20];
                int l;
 
                l = ll_addr_a2n(llabuf, sizeof(llabuf), lla);
                addattr_l(&req.n, sizeof(req), NDA_LLADDR, llabuf, l);
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        ll_init_map(&rth);
 
        if ((req.ndm.ndm_ifindex = ll_name_to_index(d)) == 0) {
@@ -186,7 +182,7 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
        if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
                exit(2);
 
-       exit(0);
+       return 0;
 }
 
 
@@ -250,7 +246,7 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                memcpy(fn, n, n->nlmsg_len);
                fn->nlmsg_type = RTM_DELNEIGH;
                fn->nlmsg_flags = NLM_F_REQUEST;
-               fn->nlmsg_seq = ++filter.rth->seq;
+               fn->nlmsg_seq = ++rth.seq;
                filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
                filter.flushed++;
                if (show_stats < 2)
@@ -325,7 +321,6 @@ void ipneigh_reset_filter()
 int do_show_or_flush(int argc, char **argv, int flush)
 {
        char *filter_dev = NULL;
-       struct rtnl_handle rth;
        int state_given = 0;
 
        ipneigh_reset_filter();
@@ -380,9 +375,6 @@ int do_show_or_flush(int argc, char **argv, int flush)
                argc--; argv++;
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        ll_init_map(&rth);
 
        if (filter_dev) {
@@ -399,10 +391,9 @@ int do_show_or_flush(int argc, char **argv, int flush)
                filter.flushb = flushb;
                filter.flushp = 0;
                filter.flushe = sizeof(flushb);
-               filter.rth = &rth;
                filter.state &= ~NUD_FAILED;
 
-               for (;;) {
+               while (round < MAX_ROUNDS) {
                        if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) {
                                perror("Cannot send dump request");
                                exit(1);
@@ -428,6 +419,9 @@ int do_show_or_flush(int argc, char **argv, int flush)
                                fflush(stdout);
                        }
                }
+               printf("*** Flush not complete bailing out after %d rounds\n",
+                       MAX_ROUNDS);
+               return 1;
        }
 
        if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) {
diff --git a/ip/ipntable.c b/ip/ipntable.c
new file mode 100644 (file)
index 0000000..5655d93
--- /dev/null
@@ -0,0 +1,657 @@
+/*
+ * Copyright (C)2006 USAGI/WIDE Project
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * based on ipneigh.c
+ */
+/*
+ * Authors:
+ *     Masahide NAKAMURA @USAGI
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "utils.h"
+#include "ip_common.h"
+
+static struct
+{
+       int family;
+        int index;
+#define NONE_DEV       (-1)
+       char name[1024];
+} filter;
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr,
+               "Usage: ip ntable change name NAME [ dev DEV ]\n"
+               "          [ thresh1 VAL ] [ thresh2 VAL ] [ thresh3 VAL ] [ gc_int MSEC ]\n"
+               "          [ PARMS ]\n"
+               "Usage: ip ntable show [ dev DEV ] [ name NAME ]\n"
+
+               "PARMS := [ base_reachable MSEC ] [ retrans MSEC ] [ gc_stale MSEC ]\n"
+               "         [ delay_probe MSEC ] [ queue LEN ]\n"
+               "         [ app_probs VAL ] [ ucast_probes VAL ] [ mcast_probes VAL ]\n"
+               "         [ anycast_delay MSEC ] [ proxy_delay MSEC ] [ proxy_queue LEN ]\n"
+               "         [ locktime MSEC ]\n"
+               );
+
+       exit(-1);
+}
+
+static int ipntable_modify(int cmd, int flags, int argc, char **argv)
+{
+       struct {
+               struct nlmsghdr         n;
+               struct ndtmsg           ndtm;
+               char                    buf[1024];
+       } req;
+       char *namep = NULL;
+       char *threshsp = NULL;
+       char *gc_intp = NULL;
+       char parms_buf[1024];
+       struct rtattr *parms_rta = (struct rtattr *)parms_buf;
+       int parms_change = 0;
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg));
+       req.n.nlmsg_flags = NLM_F_REQUEST|flags;
+       req.n.nlmsg_type = cmd;
+
+       req.ndtm.ndtm_family = preferred_family;
+       req.ndtm.ndtm_pad1 = 0;
+       req.ndtm.ndtm_pad2 = 0;
+
+       memset(&parms_buf, 0, sizeof(parms_buf));
+
+       parms_rta->rta_type = NDTA_PARMS;
+       parms_rta->rta_len = RTA_LENGTH(0);
+
+       while (argc > 0) {
+               if (strcmp(*argv, "name") == 0) {
+                       int len;
+
+                       NEXT_ARG();
+                       if (namep)
+                               duparg("NAME", *argv);
+
+                       namep = *argv;
+                       len = strlen(namep) + 1;
+                       addattr_l(&req.n, sizeof(req), NDTA_NAME, namep, len);
+               } else if (strcmp(*argv, "thresh1") == 0) {
+                       __u32 thresh1;
+
+                       NEXT_ARG();
+                       threshsp = *argv;
+
+                       if (get_u32(&thresh1, *argv, 0))
+                               invarg("\"thresh1\" value is invalid", *argv);
+
+                       addattr32(&req.n, sizeof(req), NDTA_THRESH1, thresh1);
+               } else if (strcmp(*argv, "thresh2") == 0) {
+                       __u32 thresh2;
+
+                       NEXT_ARG();
+                       threshsp = *argv;
+
+                       if (get_u32(&thresh2, *argv, 0))
+                               invarg("\"thresh2\" value is invalid", *argv);
+
+                       addattr32(&req.n, sizeof(req), NDTA_THRESH2, thresh2);
+               } else if (strcmp(*argv, "thresh3") == 0) {
+                       __u32 thresh3;
+
+                       NEXT_ARG();
+                       threshsp = *argv;
+
+                       if (get_u32(&thresh3, *argv, 0))
+                               invarg("\"thresh3\" value is invalid", *argv);
+
+                       addattr32(&req.n, sizeof(req), NDTA_THRESH3, thresh3);
+               } else if (strcmp(*argv, "gc_int") == 0) {
+                       __u64 gc_int;
+
+                       NEXT_ARG();
+                       gc_intp = *argv;
+
+                       if (get_u64(&gc_int, *argv, 0))
+                               invarg("\"gc_int\" value is invalid", *argv);
+
+                       addattr_l(&req.n, sizeof(req), NDTA_GC_INTERVAL,
+                                 &gc_int, sizeof(gc_int));
+               } else if (strcmp(*argv, "dev") == 0) {
+                       __u32 ifindex;
+
+                       NEXT_ARG();
+                       ifindex = ll_name_to_index(*argv);
+                       if (ifindex == 0) {
+                               fprintf(stderr, "Cannot find device \"%s\"\n", *argv);
+                               return -1;
+                       }
+
+                       rta_addattr32(parms_rta, sizeof(parms_buf),
+                                     NDTPA_IFINDEX, ifindex);
+               } else if (strcmp(*argv, "base_reachable") == 0) {
+                       __u64 breachable;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&breachable, *argv, 0))
+                               invarg("\"base_reachable\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_BASE_REACHABLE_TIME,
+                                     &breachable, sizeof(breachable));
+                       parms_change = 1;
+               } else if (strcmp(*argv, "retrans") == 0) {
+                       __u64 retrans;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&retrans, *argv, 0))
+                               invarg("\"retrans\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_RETRANS_TIME,
+                                     &retrans, sizeof(retrans));
+                       parms_change = 1;
+               } else if (strcmp(*argv, "gc_stale") == 0) {
+                       __u64 gc_stale;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&gc_stale, *argv, 0))
+                               invarg("\"gc_stale\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_GC_STALETIME,
+                                     &gc_stale, sizeof(gc_stale));
+                       parms_change = 1;
+               } else if (strcmp(*argv, "delay_probe") == 0) {
+                       __u64 delay_probe;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&delay_probe, *argv, 0))
+                               invarg("\"delay_probe\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_DELAY_PROBE_TIME,
+                                     &delay_probe, sizeof(delay_probe));
+                       parms_change = 1;
+               } else if (strcmp(*argv, "queue") == 0) {
+                       __u32 queue;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&queue, *argv, 0))
+                               invarg("\"queue\" value is invalid", *argv);
+
+                       if (!parms_rta)
+                               parms_rta = (struct rtattr *)&parms_buf;
+                       rta_addattr32(parms_rta, sizeof(parms_buf),
+                                     NDTPA_QUEUE_LEN, queue);
+                       parms_change = 1;
+               } else if (strcmp(*argv, "app_probes") == 0) {
+                       __u32 aprobe;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&aprobe, *argv, 0))
+                               invarg("\"app_probes\" value is invalid", *argv);
+
+                       rta_addattr32(parms_rta, sizeof(parms_buf),
+                                     NDTPA_APP_PROBES, aprobe);
+                       parms_change = 1;
+               } else if (strcmp(*argv, "ucast_probes") == 0) {
+                       __u32 uprobe;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&uprobe, *argv, 0))
+                               invarg("\"ucast_probes\" value is invalid", *argv);
+
+                       rta_addattr32(parms_rta, sizeof(parms_buf),
+                                     NDTPA_UCAST_PROBES, uprobe);
+                       parms_change = 1;
+               } else if (strcmp(*argv, "mcast_probes") == 0) {
+                       __u32 mprobe;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&mprobe, *argv, 0))
+                               invarg("\"mcast_probes\" value is invalid", *argv);
+
+                       rta_addattr32(parms_rta, sizeof(parms_buf),
+                                     NDTPA_MCAST_PROBES, mprobe);
+                       parms_change = 1;
+               } else if (strcmp(*argv, "anycast_delay") == 0) {
+                       __u64 anycast_delay;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&anycast_delay, *argv, 0))
+                               invarg("\"anycast_delay\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_ANYCAST_DELAY,
+                                     &anycast_delay, sizeof(anycast_delay));
+                       parms_change = 1;
+               } else if (strcmp(*argv, "proxy_delay") == 0) {
+                       __u64 proxy_delay;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&proxy_delay, *argv, 0))
+                               invarg("\"proxy_delay\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_PROXY_DELAY,
+                                     &proxy_delay, sizeof(proxy_delay));
+                       parms_change = 1;
+               } else if (strcmp(*argv, "proxy_queue") == 0) {
+                       __u32 pqueue;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&pqueue, *argv, 0))
+                               invarg("\"proxy_queue\" value is invalid", *argv);
+
+                       rta_addattr32(parms_rta, sizeof(parms_buf),
+                                     NDTPA_PROXY_QLEN, pqueue);
+                       parms_change = 1;
+               } else if (strcmp(*argv, "locktime") == 0) {
+                       __u64 locktime;
+
+                       NEXT_ARG();
+
+                       if (get_u64(&locktime, *argv, 0))
+                               invarg("\"locktime\" value is invalid", *argv);
+
+                       rta_addattr_l(parms_rta, sizeof(parms_buf),
+                                     NDTPA_LOCKTIME,
+                                     &locktime, sizeof(locktime));
+                       parms_change = 1;
+               } else {
+                       invarg("unknown", *argv);
+               }
+
+               argc--; argv++;
+       }
+
+       if (!namep)
+               missarg("NAME");
+       if (!threshsp && !gc_intp && !parms_change) {
+               fprintf(stderr, "Not enough information: changable attributes required.\n");
+               exit(-1);
+       }
+
+       if (parms_rta->rta_len > RTA_LENGTH(0)) {
+               addattr_l(&req.n, sizeof(req), NDTA_PARMS, RTA_DATA(parms_rta),
+                         RTA_PAYLOAD(parms_rta));
+       }
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
+               exit(2);
+
+       return 0;
+}
+
+static const char *ntable_strtime_delta(__u32 msec)
+{
+       static char str[32];
+       struct timeval now;
+       time_t t;
+       struct tm *tp;
+
+       if (msec == 0)
+               goto error;
+
+       memset(&now, 0, sizeof(now));
+
+       if (gettimeofday(&now, NULL) < 0) {
+               perror("gettimeofday");
+               goto error;
+       }
+
+       t = now.tv_sec - (msec / 1000);
+       tp = localtime(&t);
+       if (!tp)
+               goto error;
+
+       strftime(str, sizeof(str), "%Y-%m-%d %T", tp);
+
+       return str;
+ error:
+       strcpy(str, "(error)");
+       return str;
+}
+
+int print_ntable(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct ndtmsg *ndtm = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr *tb[NDTA_MAX+1];
+       struct rtattr *tpb[NDTPA_MAX+1];
+       int ret;
+
+       if (n->nlmsg_type != RTM_NEWNEIGHTBL) {
+               fprintf(stderr, "Not NEIGHTBL: %08x %08x %08x\n",
+                       n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+               return 0;
+       }
+       len -= NLMSG_LENGTH(sizeof(*ndtm));
+       if (len < 0) {
+               fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+               return -1;
+       }
+
+       if (preferred_family && preferred_family != ndtm->ndtm_family)
+               return 0;
+
+       parse_rtattr(tb, NDTA_MAX, NDTA_RTA(ndtm),
+                    n->nlmsg_len - NLMSG_LENGTH(sizeof(*ndtm)));
+
+       if (tb[NDTA_NAME]) {
+               char *name = RTA_DATA(tb[NDTA_NAME]);
+
+               if (strlen(filter.name) > 0 && strcmp(filter.name, name))
+                       return 0;
+       }
+       if (tb[NDTA_PARMS]) {
+               parse_rtattr(tpb, NDTPA_MAX, RTA_DATA(tb[NDTA_PARMS]),
+                            RTA_PAYLOAD(tb[NDTA_PARMS]));
+
+               if (tpb[NDTPA_IFINDEX]) {
+                       __u32 ifindex = *(__u32 *)RTA_DATA(tpb[NDTPA_IFINDEX]);
+
+                       if (filter.index && filter.index != ifindex)
+                               return 0;
+               } else {
+                       if (filter.index && filter.index != NONE_DEV)
+                               return 0;
+               }
+       }
+
+       if (ndtm->ndtm_family == AF_INET)
+               fprintf(fp, "inet ");
+       else if (ndtm->ndtm_family == AF_INET6)
+               fprintf(fp, "inet6 ");
+       else if (ndtm->ndtm_family == AF_DECnet)
+               fprintf(fp, "dnet ");
+       else
+               fprintf(fp, "(%d) ", ndtm->ndtm_family);
+
+       if (tb[NDTA_NAME]) {
+               char *name = RTA_DATA(tb[NDTA_NAME]);
+               fprintf(fp, "%s ", name);
+       }
+
+       fprintf(fp, "%s", _SL_);
+
+       ret = (tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || tb[NDTA_THRESH3] ||
+              tb[NDTA_GC_INTERVAL]);
+       if (ret)
+               fprintf(fp, "    ");
+
+       if (tb[NDTA_THRESH1]) {
+               __u32 thresh1 = *(__u32 *)RTA_DATA(tb[NDTA_THRESH1]);
+               fprintf(fp, "thresh1 %u ", thresh1);
+       }
+       if (tb[NDTA_THRESH2]) {
+               __u32 thresh2 = *(__u32 *)RTA_DATA(tb[NDTA_THRESH2]);
+               fprintf(fp, "thresh2 %u ", thresh2);
+       }
+       if (tb[NDTA_THRESH3]) {
+               __u32 thresh3 = *(__u32 *)RTA_DATA(tb[NDTA_THRESH3]);
+               fprintf(fp, "thresh3 %u ", thresh3);
+       }
+       if (tb[NDTA_GC_INTERVAL]) {
+               __u64 gc_int = *(__u64 *)RTA_DATA(tb[NDTA_GC_INTERVAL]);
+               fprintf(fp, "gc_int %llu ", gc_int);
+       }
+
+       if (ret)
+               fprintf(fp, "%s", _SL_);
+
+       if (tb[NDTA_CONFIG] && show_stats) {
+               struct ndt_config *ndtc = RTA_DATA(tb[NDTA_CONFIG]);
+
+               fprintf(fp, "    ");
+               fprintf(fp, "config ");
+
+               fprintf(fp, "key_len %u ", ndtc->ndtc_key_len);
+               fprintf(fp, "entry_size %u ", ndtc->ndtc_entry_size);
+               fprintf(fp, "entries %u ", ndtc->ndtc_entries);
+
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "        ");
+
+               fprintf(fp, "last_flush %s ",
+                       ntable_strtime_delta(ndtc->ndtc_last_flush));
+               fprintf(fp, "last_rand %s ",
+                       ntable_strtime_delta(ndtc->ndtc_last_rand));
+
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "        ");
+
+               fprintf(fp, "hash_rnd %u ", ndtc->ndtc_hash_rnd);
+               fprintf(fp, "hash_mask %08x ", ndtc->ndtc_hash_mask);
+
+               fprintf(fp, "hash_chain_gc %u ", ndtc->ndtc_hash_chain_gc);
+               fprintf(fp, "proxy_qlen %u ", ndtc->ndtc_proxy_qlen);
+
+               fprintf(fp, "%s", _SL_);
+       }
+
+       if (tb[NDTA_PARMS]) {
+               if (tpb[NDTPA_IFINDEX]) {
+                       __u32 ifindex = *(__u32 *)RTA_DATA(tpb[NDTPA_IFINDEX]);
+
+                       fprintf(fp, "    ");
+                       fprintf(fp, "dev %s ", ll_index_to_name(ifindex));
+                       fprintf(fp, "%s", _SL_);
+               }
+
+               fprintf(fp, "    ");
+
+               if (tpb[NDTPA_REFCNT]) {
+                       __u32 refcnt = *(__u32 *)RTA_DATA(tpb[NDTPA_REFCNT]);
+                       fprintf(fp, "refcnt %u ", refcnt);
+               }
+               if (tpb[NDTPA_REACHABLE_TIME]) {
+                       __u64 reachable = *(__u64 *)RTA_DATA(tpb[NDTPA_REACHABLE_TIME]);
+                       fprintf(fp, "reachable %llu ", reachable);
+               }
+               if (tpb[NDTPA_BASE_REACHABLE_TIME]) {
+                       __u64 breachable = *(__u64 *)RTA_DATA(tpb[NDTPA_BASE_REACHABLE_TIME]);
+                       fprintf(fp, "base_reachable %llu ", breachable);
+               }
+               if (tpb[NDTPA_RETRANS_TIME]) {
+                       __u64 retrans = *(__u64 *)RTA_DATA(tpb[NDTPA_RETRANS_TIME]);
+                       fprintf(fp, "retrans %llu ", retrans);
+               }
+
+               fprintf(fp, "%s", _SL_);
+
+               fprintf(fp, "    ");
+
+               if (tpb[NDTPA_GC_STALETIME]) {
+                       __u64 gc_stale = *(__u64 *)RTA_DATA(tpb[NDTPA_GC_STALETIME]);
+                       fprintf(fp, "gc_stale %llu ", gc_stale);
+               }
+               if (tpb[NDTPA_DELAY_PROBE_TIME]) {
+                       __u64 delay_probe = *(__u64 *)RTA_DATA(tpb[NDTPA_DELAY_PROBE_TIME]);
+                       fprintf(fp, "delay_probe %llu ", delay_probe);
+               }
+               if (tpb[NDTPA_QUEUE_LEN]) {
+                       __u32 queue = *(__u32 *)RTA_DATA(tpb[NDTPA_QUEUE_LEN]);
+                       fprintf(fp, "queue %u ", queue);
+               }
+
+               fprintf(fp, "%s", _SL_);
+
+               fprintf(fp, "    ");
+
+               if (tpb[NDTPA_APP_PROBES]) {
+                       __u32 aprobe = *(__u32 *)RTA_DATA(tpb[NDTPA_APP_PROBES]);
+                       fprintf(fp, "app_probes %u ", aprobe);
+               }
+               if (tpb[NDTPA_UCAST_PROBES]) {
+                       __u32 uprobe = *(__u32 *)RTA_DATA(tpb[NDTPA_UCAST_PROBES]);
+                       fprintf(fp, "ucast_probes %u ", uprobe);
+               }
+               if (tpb[NDTPA_MCAST_PROBES]) {
+                       __u32 mprobe = *(__u32 *)RTA_DATA(tpb[NDTPA_MCAST_PROBES]);
+                       fprintf(fp, "mcast_probes %u ", mprobe);
+               }
+
+               fprintf(fp, "%s", _SL_);
+
+               fprintf(fp, "    ");
+
+               if (tpb[NDTPA_ANYCAST_DELAY]) {
+                       __u64 anycast_delay = *(__u64 *)RTA_DATA(tpb[NDTPA_ANYCAST_DELAY]);
+                       fprintf(fp, "anycast_delay %llu ", anycast_delay);
+               }
+               if (tpb[NDTPA_PROXY_DELAY]) {
+                       __u64 proxy_delay = *(__u64 *)RTA_DATA(tpb[NDTPA_PROXY_DELAY]);
+                       fprintf(fp, "proxy_delay %llu ", proxy_delay);
+               }
+               if (tpb[NDTPA_PROXY_QLEN]) {
+                       __u32 pqueue = *(__u32 *)RTA_DATA(tpb[NDTPA_PROXY_QLEN]);
+                       fprintf(fp, "proxy_queue %u ", pqueue);
+               }
+               if (tpb[NDTPA_LOCKTIME]) {
+                       __u64 locktime = *(__u64 *)RTA_DATA(tpb[NDTPA_LOCKTIME]);
+                       fprintf(fp, "locktime %llu ", locktime);
+               }
+
+               fprintf(fp, "%s", _SL_);
+       }
+
+       if (tb[NDTA_STATS] && show_stats) {
+               struct ndt_stats *ndts = RTA_DATA(tb[NDTA_STATS]);
+
+               fprintf(fp, "    ");
+               fprintf(fp, "stats ");
+
+               fprintf(fp, "allocs %llu ", ndts->ndts_allocs);
+               fprintf(fp, "destroys %llu ", ndts->ndts_destroys);
+               fprintf(fp, "hash_grows %llu ", ndts->ndts_hash_grows);
+
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "        ");
+
+               fprintf(fp, "res_failed %llu ", ndts->ndts_res_failed);
+               fprintf(fp, "lookups %llu ", ndts->ndts_lookups);
+               fprintf(fp, "hits %llu ", ndts->ndts_hits);
+
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "        ");
+
+               fprintf(fp, "rcv_probes_mcast %llu ", ndts->ndts_rcv_probes_mcast);
+               fprintf(fp, "rcv_probes_ucast %llu ", ndts->ndts_rcv_probes_ucast);
+
+               fprintf(fp, "%s", _SL_);
+               fprintf(fp, "        ");
+
+               fprintf(fp, "periodic_gc_runs %llu ", ndts->ndts_periodic_gc_runs);
+               fprintf(fp, "forced_gc_runs %llu ", ndts->ndts_forced_gc_runs);
+
+               fprintf(fp, "%s", _SL_);
+       }
+
+       fprintf(fp, "\n");
+
+       fflush(fp);
+       return 0;
+}
+
+void ipntable_reset_filter(void)
+{
+       memset(&filter, 0, sizeof(filter));
+}
+
+static int ipntable_show(int argc, char **argv)
+{
+       ipntable_reset_filter();
+
+       filter.family = preferred_family;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "dev") == 0) {
+                       NEXT_ARG();
+
+                       if (strcmp("none", *argv) == 0)
+                               filter.index = NONE_DEV;
+                       else if ((filter.index = ll_name_to_index(*argv)) == 0)
+                               invarg("\"DEV\" is invalid", *argv);
+               } else if (strcmp(*argv, "name") == 0) {
+                       NEXT_ARG();
+
+                       strncpy(filter.name, *argv, sizeof(filter.name));
+               } else
+                       invarg("unknown", *argv);
+
+               argc--; argv++;
+       }
+
+       if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETNEIGHTBL) < 0) {
+               perror("Cannot send dump request");
+               exit(1);
+       }
+
+       if (rtnl_dump_filter(&rth, print_ntable, stdout, NULL, NULL) < 0) {
+               fprintf(stderr, "Dump terminated\n");
+               exit(1);
+       }
+
+       return 0;
+}
+
+int do_ipntable(int argc, char **argv)
+{
+       ll_init_map(&rth);
+
+       if (argc > 0) {
+               if (matches(*argv, "change") == 0 ||
+                   matches(*argv, "chg") == 0)
+                       return ipntable_modify(RTM_SETNEIGHTBL,
+                                              NLM_F_REPLACE,
+                                              argc-1, argv+1);
+               if (matches(*argv, "show") == 0 ||
+                   matches(*argv, "lst") == 0 ||
+                   matches(*argv, "list") == 0)
+                       return ipntable_show(argc-1, argv+1);
+               if (matches(*argv, "help") == 0)
+                       usage();
+       } else
+               return ipntable_show(0, NULL);
+
+       fprintf(stderr, "Command \"%s\" is unknown, try \"ip ntable help\".\n", *argv);
+       exit(-1);
+}
index 1e23e49..a43c09e 100644 (file)
@@ -28,6 +28,7 @@
 #include <netinet/ip.h>
 #include <arpa/inet.h>
 #include <linux/in_route.h>
+#include <linux/ip_mp_alg.h>
 
 #include "rt_names.h"
 #include "utils.h"
@@ -53,6 +54,7 @@ static void usage(void)
        fprintf(stderr, "NODE_SPEC := [ TYPE ] PREFIX [ tos TOS ]\n");
        fprintf(stderr, "             [ table TABLE_ID ] [ proto RTPROTO ]\n");
        fprintf(stderr, "             [ scope SCOPE ] [ metric METRIC ]\n");
+       fprintf(stderr, "             [ mpath MP_ALGO ]\n");
        fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
        fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
        fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
@@ -64,6 +66,7 @@ static void usage(void)
        fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
        fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n");
        fprintf(stderr, "FLAGS := [ equalize ]\n");
+       fprintf(stderr, "MP_ALGO := { rr | drr | random | wrandom }\n");
        fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
        fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
        exit(-1);
@@ -77,7 +80,6 @@ static struct
        char *flushb;
        int flushp;
        int flushe;
-       struct rtnl_handle *rth;
        int protocol, protocolmask;
        int scope, scopemask;
        int type, typemask;
@@ -93,9 +95,17 @@ static struct
        inet_prefix msrc;
 } filter;
 
+static char *mp_alg_names[IP_MP_ALG_MAX+1] = {
+       [IP_MP_ALG_NONE] = "none",
+       [IP_MP_ALG_RR] = "rr",
+       [IP_MP_ALG_DRR] = "drr",
+       [IP_MP_ALG_RANDOM] = "random",
+       [IP_MP_ALG_WRANDOM] = "wrandom"
+};
+
 static int flush_update(void)
 {
-       if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
+       if (rtnl_send(&rth, filter.flushb, filter.flushp) < 0) {
                perror("Failed to send flush request\n");
                return -1;
        }
@@ -206,13 +216,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                memset(&via, 0, sizeof(via));
                via.family = r->rtm_family;
                if (tb[RTA_GATEWAY])
-                       memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len);
+                       memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8);
        }
        if (filter.rprefsrc.bitlen>0) {
                memset(&prefsrc, 0, sizeof(prefsrc));
                prefsrc.family = r->rtm_family;
                if (tb[RTA_PREFSRC])
-                       memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len);
+                       memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len/8);
        }
 
        if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen))
@@ -270,7 +280,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                memcpy(fn, n, n->nlmsg_len);
                fn->nlmsg_type = RTM_DELROUTE;
                fn->nlmsg_flags = NLM_F_REQUEST;
-               fn->nlmsg_seq = ++filter.rth->seq;
+               fn->nlmsg_seq = ++rth.seq;
                filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
                filter.flushed++;
                if (show_stats < 2)
@@ -324,6 +334,15 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                SPRINT_BUF(b1);
                fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
        }
+
+       if (tb[RTA_MP_ALGO]) {
+               __u32 mp_alg = *(__u32*) RTA_DATA(tb[RTA_MP_ALGO]);
+               if (mp_alg > IP_MP_ALG_NONE) {
+                       fprintf(fp, "mpath %s ",
+                           mp_alg < IP_MP_ALG_MAX ? mp_alg_names[mp_alg] : "unknown");
+               }
+       }
+
        if (tb[RTA_GATEWAY] && filter.rvia.bitlen != host_len) {
                fprintf(fp, "via %s ", 
                        format_host(r->rtm_family,
@@ -616,9 +635,6 @@ int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv)
                }
                memset(rtnh, 0, sizeof(*rtnh));
                rtnh->rtnh_len = sizeof(*rtnh);
-               rtnh->rtnh_ifindex = 0;
-               rtnh->rtnh_flags = 0;
-               rtnh->rtnh_hops = 0;
                rta->rta_len += rtnh->rtnh_len;
                parse_one_nh(rta, rtnh, &argc, &argv);
                rtnh = RTNH_NEXT(rtnh);
@@ -632,7 +648,6 @@ int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv)
 
 int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
 {
-       struct rtnl_handle rth;
        struct {
                struct nlmsghdr         n;
                struct rtmsg            r;
@@ -709,7 +724,7 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                                invarg("\"metric\" value is invalid\n", *argv);
                        addattr32(&req.n, sizeof(req), RTA_PRIORITY, metric);
                } else if (strcmp(*argv, "scope") == 0) {
-                       int scope = 0;
+                       __u32 scope = 0;
                        NEXT_ARG();
                        if (rtnl_rtscope_a2n(&scope, *argv))
                                invarg("invalid \"scope\" value\n", *argv);
@@ -814,14 +829,14 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                        nhs_ok = 1;
                        break;
                } else if (matches(*argv, "protocol") == 0) {
-                       int prot;
+                       __u32 prot;
                        NEXT_ARG();
                        if (rtnl_rtprot_a2n(&prot, *argv))
                                invarg("\"protocol\" value is invalid\n", *argv);
                        req.r.rtm_protocol = prot;
                        proto_ok =1;
                } else if (matches(*argv, "table") == 0) {
-                       int tid;
+                       __u32 tid;
                        NEXT_ARG();
                        if (rtnl_rttable_a2n(&tid, *argv))
                                invarg("\"table\" value is invalid\n", *argv);
@@ -831,6 +846,18 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                           strcmp(*argv, "oif") == 0) {
                        NEXT_ARG();
                        d = *argv;
+               } else if (strcmp(*argv, "mpath") == 0 ||
+                          strcmp(*argv, "mp") == 0) {
+                       int i;
+                       __u32 mp_alg = IP_MP_ALG_NONE;
+
+                       NEXT_ARG();
+                       for (i = 1; i < ARRAY_SIZE(mp_alg_names); i++)
+                               if (strcmp(*argv, mp_alg_names[i]) == 0)
+                                       mp_alg = i;
+                       if (mp_alg == IP_MP_ALG_NONE)
+                               invarg("\"mpath\" value is invalid\n", *argv);
+                       addattr_l(&req.n, sizeof(req), RTA_MP_ALGO, &mp_alg, sizeof(mp_alg));
                } else {
                        int type;
                        inet_prefix dst;
@@ -860,9 +887,6 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                argc--; argv++;
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        if (d || nhs_ok)  {
                int idx;
 
@@ -969,7 +993,6 @@ static int iproute_flush_cache(void)
 static int iproute_list_or_flush(int argc, char **argv, int flush)
 {
        int do_ipv6 = preferred_family;
-       struct rtnl_handle rth;
        char *id = NULL;
        char *od = NULL;
 
@@ -983,7 +1006,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
 
        while (argc > 0) {
                if (matches(*argv, "table") == 0) {
-                       int tid;
+                       __u32 tid;
                        NEXT_ARG();
                        if (rtnl_rttable_a2n(&tid, *argv)) {
                                if (strcmp(*argv, "all") == 0) {
@@ -1009,7 +1032,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
                        filter.tos = tos;
                        filter.tosmask = -1;
                } else if (matches(*argv, "protocol") == 0) {
-                       int prot = 0;
+                       __u32 prot = 0;
                        NEXT_ARG();
                        filter.protocolmask = -1;
                        if (rtnl_rtprot_a2n(&prot, *argv)) {
@@ -1020,7 +1043,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
                        }
                        filter.protocol = prot;
                } else if (matches(*argv, "scope") == 0) {
-                       int scope = 0;
+                       __u32 scope = 0;
                        NEXT_ARG();
                        filter.scopemask = -1;
                        if (rtnl_rtscope_a2n(&scope, *argv)) {
@@ -1103,9 +1126,6 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
        if (do_ipv6 == AF_UNSPEC && filter.tb)
                do_ipv6 = AF_INET;
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        ll_init_map(&rth);
 
        if (id || od)  {
@@ -1147,7 +1167,6 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
                filter.flushb = flushb;
                filter.flushp = 0;
                filter.flushe = sizeof(flushb);
-               filter.rth = &rth;
 
                for (;;) {
                        if (rtnl_wilddump_request(&rth, do_ipv6, RTM_GETROUTE) < 0) {
@@ -1208,7 +1227,6 @@ static int iproute_list_or_flush(int argc, char **argv, int flush)
 
 int iproute_get(int argc, char **argv)
 {
-       struct rtnl_handle rth;
        struct {
                struct nlmsghdr         n;
                struct rtmsg            r;
@@ -1288,9 +1306,6 @@ int iproute_get(int argc, char **argv)
                exit(1);
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               exit(1);
-
        ll_init_map(&rth);
 
        if (idev || odev)  {
index 764edc8..ccf699f 100644 (file)
@@ -28,6 +28,8 @@
 #include "rt_names.h"
 #include "utils.h"
 
+extern struct rtnl_handle rth;
+
 static void usage(void) __attribute__((noreturn));
 
 static void usage(void)
@@ -161,7 +163,6 @@ static int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n,
 
 static int iprule_list(int argc, char **argv)
 {
-       struct rtnl_handle rth;
        int af = preferred_family;
 
        if (af == AF_UNSPEC)
@@ -172,9 +173,6 @@ static int iprule_list(int argc, char **argv)
                return -1;
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               return 1;
-
        if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
                perror("Cannot send dump request");
                return 1;
@@ -192,7 +190,6 @@ static int iprule_list(int argc, char **argv)
 static int iprule_modify(int cmd, int argc, char **argv)
 {
        int table_ok = 0;
-       struct rtnl_handle rth;
        struct {
                struct nlmsghdr         n;
                struct rtmsg            r;
@@ -256,7 +253,7 @@ static int iprule_modify(int cmd, int argc, char **argv)
                        addattr32(&req.n, sizeof(req), RTA_FLOW, realm);
                } else if (matches(*argv, "table") == 0 ||
                           strcmp(*argv, "lookup") == 0) {
-                       int tid;
+                       __u32 tid;
                        NEXT_ARG();
                        if (rtnl_rttable_a2n(&tid, *argv))
                                invarg("invalid table ID\n", *argv);
@@ -294,9 +291,6 @@ static int iprule_modify(int cmd, int argc, char **argv)
        if (!table_ok && cmd == RTM_NEWRULE)
                req.r.rtm_table = RT_TABLE_MAIN;
 
-       if (rtnl_open(&rth, 0) < 0)
-               return 1;
-
        if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
                return 2;
 
@@ -306,7 +300,7 @@ static int iprule_modify(int cmd, int argc, char **argv)
 
 static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
-       struct rtnl_handle rth;
+       struct rtnl_handle rth2;
        struct rtmsg *r = NLMSG_DATA(n);
        int len = n->nlmsg_len;
        struct rtattr * tb[RTA_MAX+1];
@@ -321,11 +315,13 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *a
                n->nlmsg_type = RTM_DELRULE;
                n->nlmsg_flags = NLM_F_REQUEST;
 
-               if (rtnl_open(&rth, 0) < 0)
+               if (rtnl_open(&rth2, 0) < 0)
                        return -1;
 
-               if (rtnl_talk(&rth, n, 0, 0, NULL, NULL, NULL) < 0)
+               if (rtnl_talk(&rth2, n, 0, 0, NULL, NULL, NULL) < 0)
                        return -2;
+
+               rtnl_close(&rth2);
        }
 
        return 0;
@@ -333,20 +329,16 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *a
 
 static int iprule_flush(int argc, char **argv)
 {
-       struct rtnl_handle rth;
        int af = preferred_family;
 
        if (af == AF_UNSPEC)
                af = AF_INET;
 
        if (argc > 0) {
-               fprintf(stderr, "\"ip rule flush\" need not any arguments.\n");
+               fprintf(stderr, "\"ip rule flush\" does not allow arguments\n");
                return -1;
        }
 
-       if (rtnl_open(&rth, 0) < 0)
-               return 1;
-
        if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
                perror("Cannot send dump request");
                return 1;
index fc0f0d9..8baaabd 100644 (file)
 #include "utils.h"
 #include "xfrm.h"
 
+#define STRBUF_SIZE    (128)
+#define STRBUF_CAT(buf, str) \
+       do { \
+               int rest = sizeof(buf) - 1 - strlen(buf); \
+               if (rest > 0) { \
+                       int len = strlen(str); \
+                       if (len > rest) \
+                               len = rest; \
+                       strncat(buf, str, len); \
+                       buf[sizeof(buf) - 1] = '\0'; \
+               } \
+       } while(0);
+
 struct xfrm_filter filter;
 
 static void usage(void) __attribute__((noreturn));
@@ -48,7 +61,7 @@ static void usage(void)
 {
        fprintf(stderr, 
                "Usage: ip xfrm XFRM_OBJECT { COMMAND | help }\n"
-               "where  XFRM_OBJECT := { state | policy }\n");
+               "where  XFRM_OBJECT := { state | policy | monitor }\n");
        exit(-1);
 }
 
@@ -227,12 +240,12 @@ const char *strxf_proto(__u8 proto)
 
 void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id,
                        __u8 mode, __u32 reqid, __u16 family, int force_spi,
-                       FILE *fp, const char *prefix)
+                       FILE *fp, const char *prefix, const char *title)
 {
        char abuf[256];
 
-       if (prefix)
-               fprintf(fp, prefix);
+       if (title)
+               fprintf(fp, title);
 
        memset(abuf, '\0', sizeof(abuf));
        fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr),
@@ -248,7 +261,6 @@ void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id,
 
        fprintf(fp, "proto %s ", strxf_xfrmproto(id->proto));
 
-
        if (show_stats > 0 || force_spi || id->spi) {
                __u32 spi = ntohl(id->spi);
                fprintf(fp, "spi 0x%08x", spi);
@@ -433,6 +445,7 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family,
        case IPPROTO_TCP:
        case IPPROTO_UDP:
        case IPPROTO_SCTP:
+       case IPPROTO_DCCP:
        default: /* XXX */
                if (sel->sport_mask)
                        fprintf(fp, "sport %u ", ntohs(sel->sport));
@@ -519,9 +532,8 @@ static void xfrm_tmpl_print(struct xfrm_user_tmpl *tmpls, int len,
                if (prefix)
                        fprintf(fp, prefix);
 
-               fprintf(fp, "tmpl");
                xfrm_id_info_print(&tmpl->saddr, &tmpl->id, tmpl->mode,
-                                  tmpl->reqid, family, 0, fp, prefix);
+                                  tmpl->reqid, family, 0, fp, prefix, "tmpl ");
 
                if (show_stats > 0 || tmpl->optional) {
                        if (prefix)
@@ -630,6 +642,125 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
        }
 }
 
+static int xfrm_selector_iszero(struct xfrm_selector *s)
+{
+       struct xfrm_selector s0;
+
+       memset(&s0, 0, sizeof(s0));
+
+       return (memcmp(&s0, s, sizeof(s0)) == 0);
+}
+
+void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo,
+                           struct rtattr *tb[], FILE *fp, const char *prefix,
+                           const char *title)
+{
+       char buf[STRBUF_SIZE];
+
+       memset(buf, '\0', sizeof(buf));
+
+       xfrm_id_info_print(&xsinfo->saddr, &xsinfo->id, xsinfo->mode,
+                          xsinfo->reqid, xsinfo->family, 1, fp, prefix,
+                          title);
+
+       if (prefix)
+               STRBUF_CAT(buf, prefix);
+       STRBUF_CAT(buf, "\t");
+
+       fprintf(fp, buf);
+       fprintf(fp, "replay-window %u ", xsinfo->replay_window);
+       if (show_stats > 0)
+               fprintf(fp, "seq 0x%08u ", xsinfo->seq);
+       if (show_stats > 0 || xsinfo->flags) {
+               __u8 flags = xsinfo->flags;
+
+               fprintf(fp, "flag ");
+               XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_NOECN, "noecn");
+               XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_DECAP_DSCP, "decap-dscp");
+               if (flags)
+                       fprintf(fp, "%x", flags);
+               if (show_stats > 0)
+                       fprintf(fp, " (0x%s)", strxf_mask8(flags));
+       }
+       fprintf(fp, "%s", _SL_);
+
+       xfrm_xfrma_print(tb, xsinfo->family, fp, buf);
+
+       if (!xfrm_selector_iszero(&xsinfo->sel)) {
+               char sbuf[STRBUF_SIZE];
+
+               memcpy(sbuf, buf, sizeof(sbuf));
+               STRBUF_CAT(sbuf, "sel ");
+
+               xfrm_selector_print(&xsinfo->sel, xsinfo->family, fp, sbuf);
+       }
+
+       if (show_stats > 0) {
+               xfrm_lifetime_print(&xsinfo->lft, &xsinfo->curlft, fp, buf);
+               xfrm_stats_print(&xsinfo->stats, fp, buf);
+       }
+}
+
+void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo,
+                           struct rtattr *tb[], FILE *fp, const char *prefix,
+                           const char *title)
+{
+       char buf[STRBUF_SIZE];
+
+       memset(buf, '\0', sizeof(buf));
+
+       xfrm_selector_print(&xpinfo->sel, preferred_family, fp, title);
+
+       if (prefix)
+               STRBUF_CAT(buf, prefix);
+       STRBUF_CAT(buf, "\t");
+
+       fprintf(fp, buf);
+       fprintf(fp, "dir ");
+       switch (xpinfo->dir) {
+       case XFRM_POLICY_IN:
+               fprintf(fp, "in");
+               break;
+       case XFRM_POLICY_OUT:
+               fprintf(fp, "out");
+               break;
+       case XFRM_POLICY_FWD:
+               fprintf(fp, "fwd");
+               break;
+       default:
+               fprintf(fp, "%u", xpinfo->dir);
+               break;
+       }
+       fprintf(fp, " ");
+
+       switch (xpinfo->action) {
+       case XFRM_POLICY_ALLOW:
+               if (show_stats > 0)
+                       fprintf(fp, "action allow ");
+               break;
+       case XFRM_POLICY_BLOCK:
+               fprintf(fp, "action block ");
+               break;
+       default:
+               fprintf(fp, "action %u ", xpinfo->action);
+               break;
+       }
+
+       if (show_stats)
+               fprintf(fp, "index %u ", xpinfo->index);
+       fprintf(fp, "priority %u ", xpinfo->priority);
+       if (show_stats > 0) {
+               fprintf(fp, "share %s ", strxf_share(xpinfo->share));
+               fprintf(fp, "flag 0x%s", strxf_mask8(xpinfo->flags));
+       }
+       fprintf(fp, "%s", _SL_);
+
+       if (show_stats > 0)
+               xfrm_lifetime_print(&xpinfo->lft, &xpinfo->curlft, fp, buf);
+
+       xfrm_xfrma_print(tb, xpinfo->sel.family, fp, buf);
+}
+
 int xfrm_id_parse(xfrm_address_t *saddr, struct xfrm_id *id, __u16 *family,
                  int loose, int *argcp, char ***argvp)
 {
@@ -869,6 +1000,7 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel,
                case IPPROTO_TCP:
                case IPPROTO_UDP:
                case IPPROTO_SCTP:
+               case IPPROTO_DCCP:
                        break;
                default:
                        fprintf(stderr, "\"sport\" and \"dport\" are invalid with proto=%s\n", strxf_proto(sel->proto));
@@ -1038,10 +1170,12 @@ int do_xfrm(int argc, char **argv)
                usage();
 
        if (matches(*argv, "state") == 0 ||
-           matches(*argv, "sa") == 0) {
+           matches(*argv, "sa") == 0)
                return do_xfrm_state(argc-1, argv+1);
-       else if (matches(*argv, "policy") == 0)
+       else if (matches(*argv, "policy") == 0)
                return do_xfrm_policy(argc-1, argv+1);
+       else if (matches(*argv, "monitor") == 0)
+               return do_xfrm_monitor(argc-1, argv+1);
        else if (matches(*argv, "help") == 0) {
                usage();
                fprintf(stderr, "xfrm Object \"%s\" is unknown.\n", *argv);
index fa551b1..4833b36 100644 (file)
--- a/ip/xfrm.h
+++ b/ip/xfrm.h
@@ -32,6 +32,9 @@
 #ifndef IPPROTO_SCTP
 # define IPPROTO_SCTP  132
 #endif
+#ifndef IPPPROTO_DCCP
+# define IPPROTO_DCCP  33
+#endif
 
 #define XFRMS_RTA(x)  ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_usersa_info))))
 #define XFRMS_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_usersa_info))
 #define XFRMP_RTA(x)  ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_info))))
 #define XFRMP_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_userpoilcy_info))
 
+#define XFRMSID_RTA(x)  ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_usersa_id))))
+#define XFRMSID_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_usersa_id))
+
+#define XFRMPID_RTA(x)  ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id))))
+#define XFRMPID_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_userpoilcy_id))
+
+#define XFRMACQ_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_user_acquire))))
+#define XFRMEXP_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_user_expire))))
+#define XFRMPEXP_RTA(x)        ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire))))
+
 #define XFRM_FLAG_PRINT(fp, flags, f, s) \
        do { \
                if (flags & f) { \
@@ -84,8 +97,13 @@ struct xfrm_filter {
 
 extern struct xfrm_filter filter;
 
+int xfrm_state_print(const struct sockaddr_nl *who, struct nlmsghdr *n,
+                    void *arg);
+int xfrm_policy_print(const struct sockaddr_nl *who, struct nlmsghdr *n,
+                     void *arg);
 int do_xfrm_state(int argc, char **argv);
 int do_xfrm_policy(int argc, char **argv);
+int do_xfrm_monitor(int argc, char **argv);
 
 int xfrm_addr_match(xfrm_address_t *x1, xfrm_address_t *x2, int bits);
 int xfrm_xfrmproto_getbyname(char *name);
@@ -98,7 +116,7 @@ const char *strxf_share(__u8 share);
 const char *strxf_proto(__u8 proto);
 void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id,
                        __u8 mode, __u32 reqid, __u16 family, int force_spi,
-                       FILE *fp, const char *prefix);
+                       FILE *fp, const char *prefix, const char *title);
 void xfrm_stats_print(struct xfrm_stats *s, FILE *fp, const char *prefix);
 void xfrm_lifetime_print(struct xfrm_lifetime_cfg *cfg,
                         struct xfrm_lifetime_cur *cur,
@@ -107,6 +125,12 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family,
                         FILE *fp, const char *prefix);
 void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
                      FILE *fp, const char *prefix);
+void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo,
+                           struct rtattr *tb[], FILE *fp, const char *prefix,
+                          const char *title);
+void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo,
+                           struct rtattr *tb[], FILE *fp, const char *prefix,
+                           const char *title);
 int xfrm_id_parse(xfrm_address_t *saddr, struct xfrm_id *id, __u16 *family,
                  int loose, int *argcp, char ***argvp);
 int xfrm_mode_parse(__u8 *mode, int *argcp, char ***argvp);
diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c
new file mode 100644 (file)
index 0000000..153621f
--- /dev/null
@@ -0,0 +1,218 @@
+/* $USAGI: $ */
+
+/*
+ * Copyright (C)2005 USAGI/WIDE Project
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+/*
+ * based on ipmonitor.c
+ */
+/*
+ * Authors:
+ *     Masahide NAKAMURA @USAGI
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/xfrm.h>
+#include "utils.h"
+#include "xfrm.h"
+#include "ip_common.h"
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+       fprintf(stderr, "Usage: ip xfrm monitor [ all | LISTofOBJECTS ]\n");
+       exit(-1);
+}
+
+static int xfrm_acquire_print(const struct sockaddr_nl *who,
+                             struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+       struct xfrm_user_acquire *xacq = NLMSG_DATA(n);
+       int len = n->nlmsg_len;
+       struct rtattr * tb[XFRMA_MAX+1];
+       __u16 family;
+
+       if (n->nlmsg_type != XFRM_MSG_ACQUIRE) {
+               fprintf(stderr, "Not an acquire: %08x %08x %08x\n",
+                       n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+               return 0;
+       }
+
+       len -= NLMSG_LENGTH(sizeof(*xacq));
+       if (len < 0) {
+               fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+               return -1;
+       }
+
+       parse_rtattr(tb, XFRMA_MAX, XFRMACQ_RTA(xacq), len);
+
+       family = xacq->sel.family;
+       if (family == AF_UNSPEC)
+               family = xacq->policy.sel.family;
+       if (family == AF_UNSPEC)
+               family = preferred_family;
+
+       fprintf(fp, "acquire ");
+
+       fprintf(fp, "proto %s ", strxf_xfrmproto(xacq->id.proto));
+       if (show_stats > 0 || xacq->id.spi) {
+               __u32 spi = ntohl(xacq->id.spi);
+               fprintf(fp, "spi 0x%08x", spi);
+               if (show_stats > 0)
+                       fprintf(fp, "(%u)", spi);
+               fprintf(fp, " ");
+       }
+       fprintf(fp, "%s", _SL_);
+
+       xfrm_selector_print(&xacq->sel, family, fp, "  sel ");
+
+       xfrm_policy_info_print(&xacq->policy, tb, fp, "    ", "  policy ");
+
+       if (show_stats > 0)
+               fprintf(fp, "  seq 0x%08u ", xacq->seq);
+       if (show_stats > 0) {
+               fprintf(fp, "%s-mask %s ",
+                       strxf_algotype(XFRMA_ALG_CRYPT),
+                       strxf_mask32(xacq->ealgos));
+               fprintf(fp, "%s-mask %s ",
+                       strxf_algotype(XFRMA_ALG_AUTH),
+                       strxf_mask32(xacq->aalgos));
+               fprintf(fp, "%s-mask %s",
+                       strxf_algotype(XFRMA_ALG_COMP),
+                       strxf_mask32(xacq->calgos));
+       }
+       fprintf(fp, "%s", _SL_);
+
+       if (oneline)
+               fprintf(fp, "\n");
+       fflush(fp);
+
+       return 0;
+}
+
+static int xfrm_accept_msg(const struct sockaddr_nl *who,
+                          struct nlmsghdr *n, void *arg)
+{
+       FILE *fp = (FILE*)arg;
+
+       if (timestamp)
+               print_timestamp(fp);
+
+       if (n->nlmsg_type == XFRM_MSG_NEWSA ||
+           n->nlmsg_type == XFRM_MSG_DELSA ||
+           n->nlmsg_type == XFRM_MSG_UPDSA ||
+           n->nlmsg_type == XFRM_MSG_EXPIRE) {
+               xfrm_state_print(who, n, arg);
+               return 0;
+       }
+       if (n->nlmsg_type == XFRM_MSG_NEWPOLICY ||
+           n->nlmsg_type == XFRM_MSG_DELPOLICY ||
+           n->nlmsg_type == XFRM_MSG_UPDPOLICY ||
+           n->nlmsg_type == XFRM_MSG_POLEXPIRE) {
+               xfrm_policy_print(who, n, arg);
+               return 0;
+       }
+
+       if (n->nlmsg_type == XFRM_MSG_ACQUIRE) {
+               xfrm_acquire_print(who, n, arg);
+               return 0;
+       }
+       if (n->nlmsg_type == XFRM_MSG_FLUSHSA) {
+               /* XXX: Todo: show proto in xfrm_usersa_flush */
+               fprintf(fp, "Flushed state\n");
+               return 0;
+       }
+       if (n->nlmsg_type == XFRM_MSG_FLUSHPOLICY) {
+               fprintf(fp, "Flushed policy\n");
+               return 0;
+       }
+       if (n->nlmsg_type != NLMSG_ERROR && n->nlmsg_type != NLMSG_NOOP &&
+           n->nlmsg_type != NLMSG_DONE) {
+               fprintf(fp, "Unknown message: %08d 0x%08x 0x%08x\n",
+                       n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+       }
+       return 0;
+}
+
+int do_xfrm_monitor(int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       char *file = NULL;
+       unsigned groups = ~((unsigned)0); /* XXX */
+       int lacquire=0;
+       int lexpire=0;
+       int lpolicy=0;
+       int lsa=0;
+
+       while (argc > 0) {
+               if (matches(*argv, "file") == 0) {
+                       NEXT_ARG();
+                       file = *argv;
+               } else if (matches(*argv, "acquire") == 0) {
+                       lacquire=1;
+                       groups = 0;
+               } else if (matches(*argv, "expire") == 0) {
+                       lexpire=1;
+                       groups = 0;
+               } else if (matches(*argv, "SA") == 0) {
+                       lsa=1;
+                       groups = 0;
+               } else if (matches(*argv, "policy") == 0) {
+                       lpolicy=1;
+                       groups = 0;
+               } else if (matches(*argv, "help") == 0) {
+                       usage();
+               } else {
+                       fprintf(stderr, "Argument \"%s\" is unknown, try \"ip xfrm monitor help\".\n", *argv);
+                       exit(-1);
+               }
+               argc--; argv++;
+       }
+
+       if (lacquire)
+               groups |= XFRMGRP_ACQUIRE;
+       if (lexpire)
+               groups |= XFRMGRP_EXPIRE;
+       if (lsa)
+               groups |= XFRMGRP_SA;
+       if (lpolicy)
+               groups |= XFRMGRP_POLICY;
+
+       if (file) {
+               FILE *fp;
+               fp = fopen(file, "r");
+               if (fp == NULL) {
+                       perror("Cannot fopen");
+                       exit(-1);
+               }
+               return rtnl_from_file(fp, xfrm_accept_msg, (void*)stdout);
+       }
+
+       //ll_init_map(&rth);
+
+       if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0)
+               exit(1);
+
+       if (rtnl_listen(&rth, xfrm_accept_msg, (void*)stdout) < 0)
+               exit(2);
+
+       return 0;
+}
index c1331a4..433b513 100644 (file)
@@ -35,8 +35,8 @@
 #include "xfrm.h"
 #include "ip_common.h"
 
-//#define NLMSG_FLUSH_BUF_SIZE (4096-512)
-#define NLMSG_FLUSH_BUF_SIZE 8192
+//#define NLMSG_DELETEALL_BUF_SIZE (4096-512)
+#define NLMSG_DELETEALL_BUF_SIZE 8192
 
 /*
  * Receiving buffer defines:
@@ -56,8 +56,9 @@ static void usage(void)
        fprintf(stderr, "Usage: ip xfrm policy { add | update } dir DIR SELECTOR [ index INDEX ] \n");
        fprintf(stderr, "        [ action ACTION ] [ priority PRIORITY ] [ LIMIT-LIST ] [ TMPL-LIST ]\n");
        fprintf(stderr, "Usage: ip xfrm policy { delete | get } dir DIR [ SELECTOR | index INDEX ]\n");
-       fprintf(stderr, "Usage: ip xfrm policy { flush | list } [ dir DIR ] [ SELECTOR ]\n");
+       fprintf(stderr, "Usage: ip xfrm policy { deleteall | list } [ dir DIR ] [ SELECTOR ]\n");
        fprintf(stderr, "        [ index INDEX ] [ action ACTION ] [ priority PRIORITY ]\n");
+       fprintf(stderr, "Usage: ip xfrm policy flush\n");
        fprintf(stderr, "DIR := [ in | out | fwd ]\n");
 
        fprintf(stderr, "SELECTOR := src ADDR[/PLEN] dst ADDR[/PLEN] [ UPSPEC ] [ dev DEV ]\n");
@@ -331,84 +332,87 @@ static int xfrm_policy_filter_match(struct xfrm_userpolicy_info *xpinfo)
        return 1;
 }
 
-static int xfrm_policy_print(const struct sockaddr_nl *who, 
-                            struct nlmsghdr *n, void *arg)
+int xfrm_policy_print(const struct sockaddr_nl *who, struct nlmsghdr *n,
+                     void *arg)
 {
+       struct rtattr * tb[XFRMA_MAX+1];
+       struct rtattr * rta;
+       struct xfrm_userpolicy_info *xpinfo = NULL;
+       struct xfrm_user_polexpire *xpexp = NULL;
+       struct xfrm_userpolicy_id *xpid = NULL;
        FILE *fp = (FILE*)arg;
-       struct xfrm_userpolicy_info *xpinfo = NLMSG_DATA(n);
        int len = n->nlmsg_len;
-       struct rtattr * tb[XFRMA_MAX+1];
 
        if (n->nlmsg_type != XFRM_MSG_NEWPOLICY &&
-           n->nlmsg_type != XFRM_MSG_DELPOLICY) {
+           n->nlmsg_type != XFRM_MSG_DELPOLICY &&
+           n->nlmsg_type != XFRM_MSG_UPDPOLICY &&
+           n->nlmsg_type != XFRM_MSG_POLEXPIRE) {
                fprintf(stderr, "Not a policy: %08x %08x %08x\n",
                        n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
                return 0;
        }
 
-       len -= NLMSG_LENGTH(sizeof(*xpinfo));
+       if (n->nlmsg_type == XFRM_MSG_DELPOLICY)  {
+               xpid = NLMSG_DATA(n);
+               len -= NLMSG_LENGTH(sizeof(*xpid));
+       } else if (n->nlmsg_type == XFRM_MSG_POLEXPIRE) {
+               xpexp = NLMSG_DATA(n);
+               xpinfo = &xpexp->pol;
+               len -= NLMSG_LENGTH(sizeof(*xpexp));
+       } else {
+               xpexp = NULL;
+               xpinfo = NLMSG_DATA(n);
+               len -= NLMSG_LENGTH(sizeof(*xpinfo));
+       }
+
        if (len < 0) {
                fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
                return -1;
        }
 
-       if (!xfrm_policy_filter_match(xpinfo))
+       if (xpinfo && !xfrm_policy_filter_match(xpinfo))
                return 0;
 
-       parse_rtattr(tb, XFRMA_MAX, XFRMP_RTA(xpinfo), len);
-
        if (n->nlmsg_type == XFRM_MSG_DELPOLICY)
                fprintf(fp, "Deleted ");
+       else if (n->nlmsg_type == XFRM_MSG_UPDPOLICY)
+               fprintf(fp, "Updated ");
+       else if (n->nlmsg_type == XFRM_MSG_POLEXPIRE)
+               fprintf(fp, "Expired ");
 
-       xfrm_selector_print(&xpinfo->sel, preferred_family, fp, NULL);
-
-       fprintf(fp, "\t");
-       fprintf(fp, "dir ");
-       switch (xpinfo->dir) {
-       case XFRM_POLICY_IN:
-               fprintf(fp, "in");
-               break;
-       case XFRM_POLICY_OUT:
-               fprintf(fp, "out");
-               break;
-       case XFRM_POLICY_FWD:
-               fprintf(fp, "fwd");
-               break;
-       default:
-               fprintf(fp, "%u", xpinfo->dir);
-               break;
-       }
-       fprintf(fp, " ");
-
-       switch (xpinfo->action) {
-       case XFRM_POLICY_ALLOW:
-               if (show_stats > 0)
-                       fprintf(fp, "action allow ");
-               break;
-       case XFRM_POLICY_BLOCK:
-               fprintf(fp, "action block ");
-               break;
-       default:
-               fprintf(fp, "action %u ", xpinfo->action);
-               break;
-       }
+       if (n->nlmsg_type == XFRM_MSG_DELPOLICY)
+               rta = XFRMPID_RTA(xpid);
+       else if (n->nlmsg_type == XFRM_MSG_POLEXPIRE)
+               rta = XFRMPEXP_RTA(xpexp);
+       else
+               rta = XFRMP_RTA(xpinfo);
+
+       parse_rtattr(tb, XFRMA_MAX, rta, len);
 
-       if (show_stats)
-               fprintf(fp, "index %u ", xpinfo->index);
-       fprintf(fp, "priority %u ", xpinfo->priority);
-       if (show_stats > 0) {
-               fprintf(fp, "share %s ", strxf_share(xpinfo->share));
-               fprintf(fp, "flag 0x%s", strxf_mask8(xpinfo->flags));
+       if (n->nlmsg_type == XFRM_MSG_DELPOLICY) {
+               //xfrm_policy_id_print();
+               if (!tb[XFRMA_POLICY]) {
+                       fprintf(stderr, "Buggy XFRM_MSG_DELPOLICY: no XFRMA_POLICY\n");
+                       return -1;
+               }
+               if (RTA_PAYLOAD(tb[XFRMA_POLICY]) < sizeof(*xpinfo)) {
+                       fprintf(stderr, "Buggy XFRM_MSG_DELPOLICY: too short XFRMA_POLICY len\n");
+                       return -1;
+               }
+               xpinfo = (struct xfrm_userpolicy_info *)RTA_DATA(tb[XFRMA_POLICY]);
        }
-       fprintf(fp, "%s", _SL_);
 
-       if (show_stats > 0)
-               xfrm_lifetime_print(&xpinfo->lft, &xpinfo->curlft, fp, "\t");
+       xfrm_policy_info_print(xpinfo, tb, fp, NULL, NULL);
 
-       xfrm_xfrma_print(tb, xpinfo->sel.family, fp, "\t");
+       if (n->nlmsg_type == XFRM_MSG_POLEXPIRE) {
+               fprintf(fp, "\t");
+               fprintf(fp, "hard %u", xpexp->hard);
+               fprintf(fp, "%s", _SL_);
+       }
 
        if (oneline)
                fprintf(fp, "\n");
+       fflush(fp);
 
        return 0;
 }
@@ -541,7 +545,7 @@ static int xfrm_policy_keep(const struct sockaddr_nl *who,
                return 0;
 
        if (xb->offset > xb->size) {
-               fprintf(stderr, "Flush buffer overflow\n");
+               fprintf(stderr, "Policy buffer overflow\n");
                return -1;
        }
 
@@ -562,7 +566,7 @@ static int xfrm_policy_keep(const struct sockaddr_nl *who,
        return 0;
 }
 
-static int xfrm_policy_list_or_flush(int argc, char **argv, int flush)
+static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall)
 {
        char *selp = NULL;
        struct rtnl_handle rth;
@@ -620,9 +624,9 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush)
        if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
                exit(1);
 
-       if (flush) {
+       if (deleteall) {
                struct xfrm_buffer xb;
-               char buf[NLMSG_FLUSH_BUF_SIZE];
+               char buf[NLMSG_DELETEALL_BUF_SIZE];
                int i;
 
                xb.buf = buf;
@@ -634,7 +638,7 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush)
                        xb.nlmsg_count = 0;
 
                        if (show_stats > 1)
-                               fprintf(stderr, "Flush round = %d\n", i);
+                               fprintf(stderr, "Delete-all round = %d\n", i);
 
                        if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETPOLICY) < 0) {
                                perror("Cannot send dump request");
@@ -642,21 +646,21 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush)
                        }
 
                        if (rtnl_dump_filter(&rth, xfrm_policy_keep, &xb, NULL, NULL) < 0) {
-                               fprintf(stderr, "Flush terminated\n");
+                               fprintf(stderr, "Delete-all terminated\n");
                                exit(1);
                        }
                        if (xb.nlmsg_count == 0) {
                                if (show_stats > 1)
-                                       fprintf(stderr, "Flush completed\n");
+                                       fprintf(stderr, "Delete-all completed\n");
                                break;
                        }
 
                        if (rtnl_send(&rth, xb.buf, xb.offset) < 0) {
-                               perror("Failed to send flush request\n");
+                               perror("Failed to send delete-all request\n");
                                exit(1);
                        }
                        if (show_stats > 1)
-                               fprintf(stderr, "Flushed nlmsg count = %d\n", xb.nlmsg_count);
+                               fprintf(stderr, "Delete-all nlmsg count = %d\n", xb.nlmsg_count);
 
                        xb.offset = 0;
                        xb.nlmsg_count = 0;
@@ -678,7 +682,7 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush)
        exit(0);
 }
 
-static int xfrm_policy_flush_all(void)
+static int xfrm_policy_flush(void)
 {
        struct rtnl_handle rth;
        struct {
@@ -695,7 +699,7 @@ static int xfrm_policy_flush_all(void)
                exit(1);
 
        if (show_stats > 1)
-               fprintf(stderr, "Flush all\n");
+               fprintf(stderr, "Flush policy\n");
 
        if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
                exit(2);
@@ -708,7 +712,7 @@ static int xfrm_policy_flush_all(void)
 int do_xfrm_policy(int argc, char **argv)
 {
        if (argc < 1)
-               return xfrm_policy_list_or_flush(0, NULL, 0);
+               return xfrm_policy_list_or_deleteall(0, NULL, 0);
 
        if (matches(*argv, "add") == 0)
                return xfrm_policy_modify(XFRM_MSG_NEWPOLICY, 0,
@@ -716,19 +720,17 @@ int do_xfrm_policy(int argc, char **argv)
        if (matches(*argv, "update") == 0)
                return xfrm_policy_modify(XFRM_MSG_UPDPOLICY, 0,
                                          argc-1, argv+1);
-       if (matches(*argv, "delete") == 0 || matches(*argv, "del") == 0)
+       if (matches(*argv, "delete") == 0)
                return xfrm_policy_delete(argc-1, argv+1);
+       if (matches(*argv, "deleteall") == 0 || matches(*argv, "delall") == 0)
+               return xfrm_policy_list_or_deleteall(argc-1, argv+1, 1);
        if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
            || matches(*argv, "lst") == 0)
-               return xfrm_policy_list_or_flush(argc-1, argv+1, 0);
+               return xfrm_policy_list_or_deleteall(argc-1, argv+1, 0);
        if (matches(*argv, "get") == 0)
                return xfrm_policy_get(argc-1, argv+1);
-       if (matches(*argv, "flush") == 0) {
-               if (argc-1 < 1)
-                       return xfrm_policy_flush_all();
-               else
-                       return xfrm_policy_list_or_flush(argc-1, argv+1, 1);
-       }
+       if (matches(*argv, "flush") == 0)
+               return xfrm_policy_flush();
        if (matches(*argv, "help") == 0)
                usage();
        fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm policy help\".\n", *argv);
index b5b6214..3eefaff 100644 (file)
@@ -34,8 +34,8 @@
 #include "xfrm.h"
 #include "ip_common.h"
 
-//#define NLMSG_FLUSH_BUF_SIZE (4096-512)
-#define NLMSG_FLUSH_BUF_SIZE 8192
+//#define NLMSG_DELETEALL_BUF_SIZE (4096-512)
+#define NLMSG_DELETEALL_BUF_SIZE 8192
 
 /*
  * Receiving buffer defines:
@@ -56,11 +56,14 @@ static void usage(void) __attribute__((noreturn));
 static void usage(void)
 {
        fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ ALGO-LIST ] [ mode MODE ]\n");
-       fprintf(stderr, "        [ reqid REQID ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n");
+       fprintf(stderr, "        [ reqid REQID ] [ seq SEQ ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n");
        fprintf(stderr, "        [ encap ENCAP ] [ sel SELECTOR ] [ LIMIT-LIST ]\n");
+       fprintf(stderr, "Usage: ip xfrm state allocspi ID [ mode MODE ] [ reqid REQID ] [ seq SEQ ]\n");
+       fprintf(stderr, "        [ min SPI max SPI ]\n");
        fprintf(stderr, "Usage: ip xfrm state { delete | get } ID\n");
-       fprintf(stderr, "Usage: ip xfrm state { flush | list } [ ID ] [ mode MODE ] [ reqid REQID ]\n");
+       fprintf(stderr, "Usage: ip xfrm state { deleteall | list } [ ID ] [ mode MODE ] [ reqid REQID ]\n");
        fprintf(stderr, "        [ flag FLAG_LIST ]\n");
+       fprintf(stderr, "Usage: ip xfrm state flush [ proto XFRM_PROTO ]\n");
 
        fprintf(stderr, "ID := [ src ADDR ] [ dst ADDR ] [ proto XFRM_PROTO ] [ spi SPI ]\n");
        //fprintf(stderr, "XFRM_PROTO := [ esp | ah | comp ]\n");
@@ -137,7 +140,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type,
 
                for (i = - (plen % 2), j = 0; j < len; i += 2, j++) {
                        char vbuf[3];
-                       char val;
+                       __u8 val;
 
                        vbuf[0] = i >= 0 ? p[i] : '0';
                        vbuf[1] = p[i + 1];
@@ -163,6 +166,22 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type,
        return 0;
 }
 
+static int xfrm_seq_parse(__u32 *seq, int *argcp, char ***argvp)
+{
+       int argc = *argcp;
+       char **argv = *argvp;
+
+       if (get_u32(seq, *argv, 0))
+               invarg("\"SEQ\" is invalid", *argv);
+
+       *seq = htonl(*seq);
+
+       *argcp = argc;
+       *argvp = argv;
+
+       return 0;
+}
+
 static int xfrm_state_flag_parse(__u8 *flags, int *argcp, char ***argvp)
 {
        int argc = *argcp;
@@ -232,6 +251,9 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
                } else if (strcmp(*argv, "reqid") == 0) {
                        NEXT_ARG();
                        xfrm_reqid_parse(&req.xsinfo.reqid, &argc, &argv);
+               } else if (strcmp(*argv, "seq") == 0) {
+                       NEXT_ARG();
+                       xfrm_seq_parse(&req.xsinfo.seq, &argc, &argv);
                } else if (strcmp(*argv, "replay-window") == 0) {
                        NEXT_ARG();
                        if (get_u8(&req.xsinfo.replay_window, *argv, 0))
@@ -372,6 +394,136 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
        return 0;
 }
 
+static int xfrm_state_allocspi(int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr         n;
+               struct xfrm_userspi_info xspi;
+               char                    buf[RTA_BUF_SIZE];
+       } req;
+       char *idp = NULL;
+       char *minp = NULL;
+       char *maxp = NULL;
+       char res_buf[NLMSG_BUF_SIZE];
+       struct nlmsghdr *res_n = (struct nlmsghdr *)res_buf;
+
+       memset(res_buf, 0, sizeof(res_buf));
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xspi));
+       req.n.nlmsg_flags = NLM_F_REQUEST;
+       req.n.nlmsg_type = XFRM_MSG_ALLOCSPI;
+       req.xspi.info.family = preferred_family;
+
+#if 0
+       req.xsinfo.lft.soft_byte_limit = XFRM_INF;
+       req.xsinfo.lft.hard_byte_limit = XFRM_INF;
+       req.xsinfo.lft.soft_packet_limit = XFRM_INF;
+       req.xsinfo.lft.hard_packet_limit = XFRM_INF;
+#endif
+
+       while (argc > 0) {
+               if (strcmp(*argv, "mode") == 0) {
+                       NEXT_ARG();
+                       xfrm_mode_parse(&req.xspi.info.mode, &argc, &argv);
+               } else if (strcmp(*argv, "reqid") == 0) {
+                       NEXT_ARG();
+                       xfrm_reqid_parse(&req.xspi.info.reqid, &argc, &argv);
+               } else if (strcmp(*argv, "seq") == 0) {
+                       NEXT_ARG();
+                       xfrm_seq_parse(&req.xspi.info.seq, &argc, &argv);
+               } else if (strcmp(*argv, "min") == 0) {
+                       if (minp)
+                               duparg("min", *argv);
+                       minp = *argv;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&req.xspi.min, *argv, 0))
+                               invarg("\"min\" value is invalid", *argv);
+               } else if (strcmp(*argv, "max") == 0) {
+                       if (maxp)
+                               duparg("max", *argv);
+                       maxp = *argv;
+
+                       NEXT_ARG();
+
+                       if (get_u32(&req.xspi.max, *argv, 0))
+                               invarg("\"max\" value is invalid", *argv);
+               } else {
+                       /* try to assume ID */
+                       if (idp)
+                               invarg("unknown", *argv);
+                       idp = *argv;
+
+                       /* ID */
+                       xfrm_id_parse(&req.xspi.info.saddr, &req.xspi.info.id,
+                                     &req.xspi.info.family, 0, &argc, &argv);
+                       if (req.xspi.info.id.spi) {
+                               fprintf(stderr, "\"SPI\" must be zero\n");
+                               exit(1);
+                       }
+                       if (preferred_family == AF_UNSPEC)
+                               preferred_family = req.xspi.info.family;
+               }
+               argc--; argv++;
+       }
+
+       if (!idp) {
+               fprintf(stderr, "Not enough information: \"ID\" is required\n");
+               exit(1);
+       }
+
+       if (minp) {
+               if (!maxp) {
+                       fprintf(stderr, "\"max\" is missing\n");
+                       exit(1);
+               }
+               if (req.xspi.min > req.xspi.max) {
+                       fprintf(stderr, "\"min\" valie is larger than \"max\" one\n");
+                       exit(1);
+               }
+       } else {
+               if (maxp) {
+                       fprintf(stderr, "\"min\" is missing\n");
+                       exit(1);
+               }
+
+               /* XXX: Default value defined in PF_KEY;
+                * See kernel's net/key/af_key.c(pfkey_getspi).
+                */
+               req.xspi.min = 0x100;
+               req.xspi.max = 0x0fffffff;
+
+               /* XXX: IPCOMP spi is 16-bits;
+                * See kernel's net/xfrm/xfrm_user(verify_userspi_info).
+                */
+               if (req.xspi.info.id.proto == IPPROTO_COMP)
+                       req.xspi.max = 0xffff;
+       }
+
+       if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
+               exit(1);
+
+       if (req.xspi.info.family == AF_UNSPEC)
+               req.xspi.info.family = AF_INET;
+
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, res_n, NULL, NULL) < 0)
+               exit(2);
+
+       if (xfrm_state_print(NULL, res_n, (void*)stdout) < 0) {
+               fprintf(stderr, "An error :-)\n");
+               exit(1);
+       }
+
+       rtnl_close(&rth);
+
+       return 0;
+}
+
 static int xfrm_state_filter_match(struct xfrm_usersa_info *xsinfo)
 {
        if (!filter.use)
@@ -400,77 +552,89 @@ static int xfrm_state_filter_match(struct xfrm_usersa_info *xsinfo)
        return 1;
 }
 
-static int xfrm_selector_iszero(struct xfrm_selector *s)
-{
-       struct xfrm_selector s0;
-
-       memset(&s0, 0, sizeof(s0));
-
-       return (memcmp(&s0, s, sizeof(s0)) == 0);
-}
-
-static int xfrm_state_print(const struct sockaddr_nl *who,
-                           struct nlmsghdr *n,
-                           void *arg)
+int xfrm_state_print(const struct sockaddr_nl *who, struct nlmsghdr *n,
+                    void *arg)
 {
        FILE *fp = (FILE*)arg;
-       struct xfrm_usersa_info *xsinfo = NLMSG_DATA(n);
-       int len = n->nlmsg_len;
        struct rtattr * tb[XFRMA_MAX+1];
+       struct rtattr * rta;
+       struct xfrm_usersa_info *xsinfo = NULL;
+       struct xfrm_user_expire *xexp = NULL;
+       struct xfrm_usersa_id   *xsid = NULL;
+       int len = n->nlmsg_len;
 
        if (n->nlmsg_type != XFRM_MSG_NEWSA &&
-           n->nlmsg_type != XFRM_MSG_DELSA) {
+           n->nlmsg_type != XFRM_MSG_DELSA &&
+           n->nlmsg_type != XFRM_MSG_UPDSA &&
+           n->nlmsg_type != XFRM_MSG_EXPIRE) {
                fprintf(stderr, "Not a state: %08x %08x %08x\n",
                        n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
                return 0;
        }
 
-       len -= NLMSG_LENGTH(sizeof(*xsinfo));
+       if (n->nlmsg_type == XFRM_MSG_DELSA) {
+               /* Dont blame me for this .. Herbert made me do it */
+               xsid = NLMSG_DATA(n);
+               len -= NLMSG_LENGTH(sizeof(*xsid));
+       } else if (n->nlmsg_type == XFRM_MSG_EXPIRE) {
+               xexp = NLMSG_DATA(n);
+               xsinfo = &xexp->state;
+               len -= NLMSG_LENGTH(sizeof(*xexp));
+       } else {
+               xexp = NULL;
+               xsinfo = NLMSG_DATA(n);
+               len -= NLMSG_LENGTH(sizeof(*xsinfo));
+       }
+
        if (len < 0) {
                fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
                return -1;
        }
 
-       if (!xfrm_state_filter_match(xsinfo))
+       if (xsinfo && !xfrm_state_filter_match(xsinfo))
                return 0;
 
-       parse_rtattr(tb, XFRMA_MAX, XFRMS_RTA(xsinfo), len);
-
        if (n->nlmsg_type == XFRM_MSG_DELSA)
                fprintf(fp, "Deleted ");
+       else if (n->nlmsg_type == XFRM_MSG_UPDSA)
+               fprintf(fp, "Updated ");
+       else if (n->nlmsg_type == XFRM_MSG_EXPIRE)
+               fprintf(fp, "Expired ");
 
-       xfrm_id_info_print(&xsinfo->saddr, &xsinfo->id, xsinfo->mode,
-                          xsinfo->reqid, xsinfo->family, 1, fp, NULL);
-
-       fprintf(fp, "\t");
-       fprintf(fp, "replay-window %u ", xsinfo->replay_window);
-       if (show_stats > 0)
-               fprintf(fp, "seq 0x%08u ", xsinfo->seq);
-       if (show_stats > 0 || xsinfo->flags) {
-               __u8 flags = xsinfo->flags;
-
-               fprintf(fp, "flag ");
-               XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_NOECN, "noecn");
-               XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_DECAP_DSCP, "decap-dscp");
-               if (flags)
-                       fprintf(fp, "%x", flags);
-               if (show_stats > 0)
-                       fprintf(fp, " (0x%s)", strxf_mask8(flags));
-       }
-       fprintf(fp, "%s", _SL_);
+       if (n->nlmsg_type == XFRM_MSG_DELSA)
+               rta = XFRMSID_RTA(xsid);
+       else if (n->nlmsg_type == XFRM_MSG_EXPIRE)
+               rta = XFRMEXP_RTA(xexp);
+       else 
+               rta = XFRMS_RTA(xsinfo);
 
-       xfrm_xfrma_print(tb, xsinfo->family, fp, "\t");
+       parse_rtattr(tb, XFRMA_MAX, rta, len);
 
-       if (!xfrm_selector_iszero(&xsinfo->sel))
-               xfrm_selector_print(&xsinfo->sel, xsinfo->family, fp, "\tsel ");
+       if (n->nlmsg_type == XFRM_MSG_DELSA) {
+               //xfrm_policy_id_print();
 
-       if (show_stats > 0) {
-               xfrm_lifetime_print(&xsinfo->lft, &xsinfo->curlft, fp, "\t");
-               xfrm_stats_print(&xsinfo->stats, fp, "\t");
+               if (!tb[XFRMA_SA]) {
+                       fprintf(stderr, "Buggy XFRM_MSG_DELSA: no XFRMA_SA\n");
+                       return -1;
+               }
+               if (RTA_PAYLOAD(tb[XFRMA_SA]) < sizeof(*xsinfo)) {
+                       fprintf(stderr, "Buggy XFRM_MSG_DELPOLICY: too short XFRMA_POLICY len\n");
+                       return -1;
+               }
+               xsinfo = (struct xfrm_usersa_info *)RTA_DATA(tb[XFRMA_SA]);
+       }
+
+       xfrm_state_info_print(xsinfo, tb, fp, NULL, NULL);
+
+       if (n->nlmsg_type == XFRM_MSG_EXPIRE) {
+               fprintf(fp, "\t");
+               fprintf(fp, "hard %u", xexp->hard);
+               fprintf(fp, "%s", _SL_);
        }
 
        if (oneline)
                fprintf(fp, "\n");
+       fflush(fp);
 
        return 0;
 }
@@ -576,7 +740,7 @@ static int xfrm_state_keep(const struct sockaddr_nl *who,
                return 0;
 
        if (xb->offset > xb->size) {
-               fprintf(stderr, "Flush buffer overflow\n");
+               fprintf(stderr, "State buffer overflow\n");
                return -1;
        }
 
@@ -598,7 +762,7 @@ static int xfrm_state_keep(const struct sockaddr_nl *who,
        return 0;
 }
 
-static int xfrm_state_list_or_flush(int argc, char **argv, int flush)
+static int xfrm_state_list_or_deleteall(int argc, char **argv, int deleteall)
 {
        char *idp = NULL;
        struct rtnl_handle rth;
@@ -643,9 +807,9 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush)
        if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
                exit(1);
 
-       if (flush) {
+       if (deleteall) {
                struct xfrm_buffer xb;
-               char buf[NLMSG_FLUSH_BUF_SIZE];
+               char buf[NLMSG_DELETEALL_BUF_SIZE];
                int i;
 
                xb.buf = buf;
@@ -657,7 +821,7 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush)
                        xb.nlmsg_count = 0;
 
                        if (show_stats > 1)
-                               fprintf(stderr, "Flush round = %d\n", i);
+                               fprintf(stderr, "Delete-all round = %d\n", i);
 
                        if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETSA) < 0) {
                                perror("Cannot send dump request");
@@ -665,21 +829,21 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush)
                        }
 
                        if (rtnl_dump_filter(&rth, xfrm_state_keep, &xb, NULL, NULL) < 0) {
-                               fprintf(stderr, "Flush terminated\n");
+                               fprintf(stderr, "Delete-all terminated\n");
                                exit(1);
                        }
                        if (xb.nlmsg_count == 0) {
                                if (show_stats > 1)
-                                       fprintf(stderr, "Flush completed\n");
+                                       fprintf(stderr, "Delete-all completed\n");
                                break;
                        }
 
                        if (rtnl_send(&rth, xb.buf, xb.offset) < 0) {
-                               perror("Failed to send flush request\n");
+                               perror("Failed to send delete-all request\n");
                                exit(1);
                        }
                        if (show_stats > 1)
-                               fprintf(stderr, "Flushed nlmsg count = %d\n", xb.nlmsg_count);
+                               fprintf(stderr, "Delete-all nlmsg count = %d\n", xb.nlmsg_count);
 
                        xb.offset = 0;
                        xb.nlmsg_count = 0;
@@ -702,13 +866,14 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush)
        exit(0);
 }
 
-static int xfrm_state_flush_all(void)
+static int xfrm_state_flush(int argc, char **argv)
 {
        struct rtnl_handle rth;
        struct {
                struct nlmsghdr                 n;
                struct xfrm_usersa_flush        xsf;
        } req;
+       char *protop = NULL;
 
        memset(&req, 0, sizeof(req));
 
@@ -717,11 +882,34 @@ static int xfrm_state_flush_all(void)
        req.n.nlmsg_type = XFRM_MSG_FLUSHSA;
        req.xsf.proto = IPSEC_PROTO_ANY;
 
+       while (argc > 0) {
+               if (strcmp(*argv, "proto") == 0) {
+                       int ret;
+
+                       if (protop)
+                               duparg("proto", *argv);
+                       protop = *argv;
+
+                       NEXT_ARG();
+
+                       ret = xfrm_xfrmproto_getbyname(*argv);
+                       if (ret < 0)
+                               invarg("\"XFRM_PROTO\" is invalid", *argv);
+
+                       req.xsf.proto = (__u8)ret;
+               } else
+                       invarg("unknown", *argv);
+
+               argc--; argv++;
+       }
+
        if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
                exit(1);
 
        if (show_stats > 1)
-               fprintf(stderr, "Flush all\n");
+               fprintf(stderr, "Flush state proto=%s\n",
+                       (req.xsf.proto == IPSEC_PROTO_ANY) ? "any" :
+                       strxf_xfrmproto(req.xsf.proto));
 
        if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
                exit(2);
@@ -734,7 +922,7 @@ static int xfrm_state_flush_all(void)
 int do_xfrm_state(int argc, char **argv)
 {
        if (argc < 1)
-               return xfrm_state_list_or_flush(0, NULL, 0);
+               return xfrm_state_list_or_deleteall(0, NULL, 0);
 
        if (matches(*argv, "add") == 0)
                return xfrm_state_modify(XFRM_MSG_NEWSA, 0,
@@ -742,19 +930,19 @@ int do_xfrm_state(int argc, char **argv)
        if (matches(*argv, "update") == 0)
                return xfrm_state_modify(XFRM_MSG_UPDSA, 0,
                                         argc-1, argv+1);
-       if (matches(*argv, "delete") == 0 || matches(*argv, "del") == 0)
+       if (matches(*argv, "allocspi") == 0)
+               return xfrm_state_allocspi(argc-1, argv+1);
+       if (matches(*argv, "delete") == 0)
                return xfrm_state_get_or_delete(argc-1, argv+1, 1);
+       if (matches(*argv, "deleteall") == 0 || matches(*argv, "delall") == 0)
+               return xfrm_state_list_or_deleteall(argc-1, argv+1, 1);
        if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
            || matches(*argv, "lst") == 0)
-               return xfrm_state_list_or_flush(argc-1, argv+1, 0);
+               return xfrm_state_list_or_deleteall(argc-1, argv+1, 0);
        if (matches(*argv, "get") == 0)
                return xfrm_state_get_or_delete(argc-1, argv+1, 0);
-       if (matches(*argv, "flush") == 0) {
-               if (argc-1 < 1)
-                       return xfrm_state_flush_all();
-               else
-                       return xfrm_state_list_or_flush(argc-1, argv+1, 1);
-       }
+       if (matches(*argv, "flush") == 0)
+               return xfrm_state_flush(argc-1, argv+1);
        if (matches(*argv, "help") == 0)
                usage();
        fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm state help\".\n", *argv);
index 4cd2b2a..67951fe 100644 (file)
@@ -30,9 +30,10 @@ void rtnl_close(struct rtnl_handle *rth)
        close(rth->fd);
 }
 
-int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions, int protocol)
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions,
+                     int protocol)
 {
-       int addr_len;
+       socklen_t addr_len;
        int sndbuf = 32768;
        int rcvbuf = 32768;
 
@@ -95,6 +96,7 @@ int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type)
        memset(&nladdr, 0, sizeof(nladdr));
        nladdr.nl_family = AF_NETLINK;
 
+       memset(&req, 0, sizeof(req));
        req.nlh.nlmsg_len = sizeof(req);
        req.nlh.nlmsg_type = type;
        req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
@@ -102,7 +104,8 @@ int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type)
        req.nlh.nlmsg_seq = rth->dump = ++rth->seq;
        req.g.rtgen_family = family;
 
-       return sendto(rth->fd, (void*)&req, sizeof(req), 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
+       return sendto(rth->fd, (void*)&req, sizeof(req), 0,
+                     (struct sockaddr*)&nladdr, sizeof(nladdr));
 }
 
 int rtnl_send(struct rtnl_handle *rth, const char *buf, int len)
@@ -119,12 +122,15 @@ int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len)
 {
        struct nlmsghdr nlh;
        struct sockaddr_nl nladdr;
-       struct iovec iov[2] = { { &nlh, sizeof(nlh) }, { req, len } };
+       struct iovec iov[2] = {
+               { .iov_base = &nlh, .iov_len = sizeof(nlh) },
+               { .iov_base = req, .iov_len = len }
+       };
        struct msghdr msg = {
-               (void*)&nladdr, sizeof(nladdr),
-               iov,    2,
-               NULL,   0,
-               0
+               .msg_name = &nladdr,
+               .msg_namelen =  sizeof(nladdr),
+               .msg_iov = iov,
+               .msg_iovlen = 2,
        };
 
        memset(&nladdr, 0, sizeof(nladdr));
@@ -145,21 +151,22 @@ int rtnl_dump_filter(struct rtnl_handle *rth,
                     rtnl_filter_t junk,
                     void *arg2)
 {
-       char    buf[16384];
        struct sockaddr_nl nladdr;
-       struct iovec iov = { buf, sizeof(buf) };
+       struct iovec iov;
+       struct msghdr msg = {
+               .msg_name = &nladdr,
+               .msg_namelen = sizeof(nladdr),
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
+       };
+       char buf[16384];
 
+       iov.iov_base = buf;
        while (1) {
                int status;
                struct nlmsghdr *h;
 
-               struct msghdr msg = {
-                       (void*)&nladdr, sizeof(nladdr),
-                       &iov,   1,
-                       NULL,   0,
-                       0
-               };
-
+               iov.iov_len = sizeof(buf);
                status = recvmsg(rth->fd, &msg, 0);
 
                if (status < 0) {
@@ -168,14 +175,11 @@ int rtnl_dump_filter(struct rtnl_handle *rth,
                        perror("OVERRUN");
                        continue;
                }
+
                if (status == 0) {
                        fprintf(stderr, "EOF on netlink\n");
                        return -1;
                }
-               if (msg.msg_namelen != sizeof(nladdr)) {
-                       fprintf(stderr, "sender address length == %d\n", msg.msg_namelen);
-                       exit(1);
-               }
 
                h = (struct nlmsghdr*)buf;
                while (NLMSG_OK(h, status)) {
@@ -231,14 +235,17 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
        unsigned seq;
        struct nlmsghdr *h;
        struct sockaddr_nl nladdr;
-       struct iovec iov = { (void*)n, n->nlmsg_len };
-       char   buf[16384];
+       struct iovec iov = {
+               .iov_base = (void*) n,
+               .iov_len = n->nlmsg_len
+       };
        struct msghdr msg = {
-               (void*)&nladdr, sizeof(nladdr),
-               &iov,   1,
-               NULL,   0,
-               0
+               .msg_name = &nladdr,
+               .msg_namelen = sizeof(nladdr),
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
        };
+       char   buf[16384];
 
        memset(&nladdr, 0, sizeof(nladdr));
        nladdr.nl_family = AF_NETLINK;
@@ -301,6 +308,9 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
                                        if (err < 0)
                                                return err;
                                }
+                               /* Don't forget to skip that message. */
+                               status -= NLMSG_ALIGN(len);
+                               h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
                                continue;
                        }
 
@@ -340,7 +350,7 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
        }
 }
 
-int rtnl_listen(struct rtnl_handle *rtnl, 
+int rtnl_listen(struct rtnl_handle *rtnl,
                rtnl_filter_t handler,
                void *jarg)
 {
@@ -348,22 +358,20 @@ int rtnl_listen(struct rtnl_handle *rtnl,
        struct nlmsghdr *h;
        struct sockaddr_nl nladdr;
        struct iovec iov;
-       char   buf[8192];
        struct msghdr msg = {
-               (void*)&nladdr, sizeof(nladdr),
-               &iov,   1,
-               NULL,   0,
-               0
+               .msg_name = &nladdr,
+               .msg_namelen = sizeof(nladdr),
+               .msg_iov = &iov,
+               .msg_iovlen = 1,
        };
+       char   buf[8192];
 
        memset(&nladdr, 0, sizeof(nladdr));
        nladdr.nl_family = AF_NETLINK;
        nladdr.nl_pid = 0;
        nladdr.nl_groups = 0;
 
-
        iov.iov_base = buf;
-
        while (1) {
                iov.iov_len = sizeof(buf);
                status = recvmsg(rtnl->fd, &msg, 0);
@@ -485,13 +493,13 @@ int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
        return 0;
 }
 
-int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, 
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
              int alen)
 {
        int len = RTA_LENGTH(alen);
        struct rtattr *rta;
 
-       if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen) {
+       if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
                fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n",maxlen);
                return -1;
        }
@@ -499,7 +507,7 @@ int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
        rta->rta_type = type;
        rta->rta_len = len;
        memcpy(RTA_DATA(rta), data, alen);
-       n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len;
+       n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
        return 0;
 }
 
@@ -533,13 +541,13 @@ int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data)
        return 0;
 }
 
-int rta_addattr_l(struct rtattr *rta, int maxlen, int type, 
+int rta_addattr_l(struct rtattr *rta, int maxlen, int type,
                  const void *data, int alen)
 {
        struct rtattr *subrta;
        int len = RTA_LENGTH(alen);
 
-       if (RTA_ALIGN(rta->rta_len) + len > maxlen) {
+       if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) {
                fprintf(stderr,"rta_addattr_l: Error! max allowed bound %d exceeded\n",maxlen);
                return -1;
        }
@@ -547,7 +555,7 @@ int rta_addattr_l(struct rtattr *rta, int maxlen, int type,
        subrta->rta_type = type;
        subrta->rta_len = len;
        memcpy(RTA_DATA(subrta), data, alen);
-       rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
+       rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len);
        return 0;
 }
 
index ea3d660..581487d 100644 (file)
@@ -53,7 +53,8 @@ const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int
        return buf;
 }
 
-int ll_addr_a2n(unsigned char *lladdr, int len, char *arg)
+/*NB: lladdr is char * (rather than u8 *) because sa_data is char * (1003.1g) */
+int ll_addr_a2n(char *lladdr, int len, char *arg)
 {
        if (strchr(arg, '.')) {
                inet_prefix pfx;
index 89c0d20..1acbf8f 100644 (file)
@@ -17,6 +17,7 @@
 #include <fcntl.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <net/if.h>
 #include <string.h>
 
 #include "libnetlink.h"
@@ -25,7 +26,7 @@
 struct idxmap
 {
        struct idxmap * next;
-       int             index;
+       unsigned        index;
        int             type;
        int             alen;
        unsigned        flags;
@@ -86,7 +87,7 @@ int ll_remember_index(const struct sockaddr_nl *who,
        return 0;
 }
 
-const char *ll_idx_n2a(int idx, char *buf)
+const char *ll_idx_n2a(unsigned idx, char *buf)
 {
        struct idxmap *im;
 
@@ -100,14 +101,14 @@ const char *ll_idx_n2a(int idx, char *buf)
 }
 
 
-const char *ll_index_to_name(int idx)
+const char *ll_index_to_name(unsigned idx)
 {
        static char nbuf[16];
 
        return ll_idx_n2a(idx, nbuf);
 }
 
-int ll_index_to_type(int idx)
+int ll_index_to_type(unsigned idx)
 {
        struct idxmap *im;
 
@@ -119,7 +120,7 @@ int ll_index_to_type(int idx)
        return -1;
 }
 
-unsigned ll_index_to_flags(int idx)
+unsigned ll_index_to_flags(unsigned idx)
 {
        struct idxmap *im;
 
@@ -132,7 +133,7 @@ unsigned ll_index_to_flags(int idx)
        return 0;
 }
 
-int ll_name_to_index(const char *name)
+unsigned ll_name_to_index(const char *name)
 {
        static char ncache[16];
        static int icache;
@@ -152,7 +153,8 @@ int ll_name_to_index(const char *name)
                        }
                }
        }
-       return 0;
+
+       return if_nametoindex(name);
 }
 
 int ll_init_map(struct rtnl_handle *rth)
index 03df086..05046c2 100644 (file)
@@ -71,9 +71,8 @@ static char * rtnl_rtprot_tab[256] = {
        [RTPROT_ZEBRA] ="zebra",
        [RTPROT_BIRD] = "bird",
        [RTPROT_DNROUTED] = "dnrouted",
-#ifdef RTPROT_XORP
        [RTPROT_XORP] = "xorp",
-#endif
+       [RTPROT_NTK] = "ntk",
 };
 
 
index 73ce865..4bdda71 100644 (file)
@@ -27,6 +27,9 @@
 #include <resolv.h>
 #include <asm/types.h>
 #include <linux/pkt_sched.h>
+#include <time.h>
+#include <sys/time.h>
+
 
 #include "utils.h"
 
@@ -237,10 +240,11 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family)
                                dst->bitlen = 32;
                }
                if (slash) {
-                       if (get_integer(&plen, slash+1, 0) || plen > dst->bitlen) {
+                       if (get_unsigned(&plen, slash+1, 0) || plen > dst->bitlen) {
                                err = -1;
                                goto done;
                        }
+                       dst->flags |= PREFIXLEN_SPECIFIED;
                        dst->bitlen = plen;
                }
        }
@@ -500,9 +504,9 @@ const char *format_host(int af, int len, const void *addr,
 }
 
 
-__u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen)
+char *hexstring_n2a(const __u8 *str, int len, char *buf, int blen)
 {
-       __u8 *ptr = buf;
+       char *ptr = buf;
        int i;
 
        for (i=0; i<len; i++) {
@@ -519,7 +523,7 @@ __u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen)
        return buf;
 }
 
-__u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen)
+__u8* hexstring_a2n(const char *str, __u8 *buf, int blen)
 {
        int cnt = 0;
 
@@ -556,3 +560,81 @@ __u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen)
                memset(buf+cnt, 0, blen-cnt);
        return buf;
 }
+
+int print_timestamp(FILE *fp)
+{
+       struct timeval tv;
+       char *tstr;
+
+       memset(&tv, 0, sizeof(tv));
+       gettimeofday(&tv, NULL);
+
+       tstr = asctime(localtime(&tv.tv_sec));
+       tstr[strlen(tstr)-1] = 0;
+       fprintf(fp, "Timestamp: %s %lu usec\n", tstr, tv.tv_usec);
+       return 0;
+}
+
+int cmdlineno;
+
+/* Like glibc getline but handle continuation lines and comments */
+size_t getcmdline(char **linep, size_t *lenp, FILE *in)
+{
+       size_t cc;
+       char *cp;
+               
+       if ((cc = getline(linep, lenp, in)) < 0)
+               return cc;      /* eof or error */
+       ++cmdlineno;
+
+       cp = strchr(*linep, '#');
+       if (cp) 
+               *cp = '\0';
+       
+       while ((cp = strstr(*linep, "\\\n")) != NULL) {
+               char *line1 = NULL;
+               size_t len1 = 0;
+               size_t cc1;
+
+               if ((cc1 = getline(&line1, &len1, in)) < 0) {
+                       fprintf(stderr, "Missing continuation line\n");
+                       return cc1;
+               }
+
+               ++cmdlineno;
+               *cp = 0;
+
+               cp = strchr(line1, '#');
+               if (cp) 
+                       *cp = '\0';
+
+               *linep = realloc(*linep, strlen(*linep) + strlen(line1) + 1);
+               if (!*linep) {
+                       fprintf(stderr, "Out of memory\n");
+                       return -1;
+               }
+               cc += cc1 - 2;
+               strcat(*linep, line1);
+               free(line1);
+       }
+       return cc;
+}
+
+/* split command line into argument vector */
+int makeargs(char *line, char *argv[], int maxargs)
+{
+       static const char ws[] = " \t\r\n";
+       char *cp;
+       int argc = 0;
+
+       for (cp = strtok(line, ws); cp; cp = strtok(NULL, ws)) {
+               if (argc >= (maxargs - 1)) {
+                       fprintf(stderr, "Too many arguments to command\n");
+                       exit(1);
+               }
+               argv[argc++] = cp;
+       }
+       argv[argc] = NULL;
+
+       return argc;
+}
index cca6d1c..12da6d5 100644 (file)
@@ -211,7 +211,7 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]"
 
 .ti -8
 .B  ip rule
-.RB " [ " list " | " add " | " del " ]"
+.RB " [ " list " | " add " | " del " | " flush " ]"
 .I  SELECTOR ACTION
 
 .ti -8
@@ -1574,6 +1574,9 @@ immediately.  It is assumed that after a script finishes a batch of
 updates, it flushes the routing cache with
 .BR "ip route flush cache" .
 
+.SS ip rule flush - also dumps all the deleted rules.
+This command has no arguments.
+
 .SS ip rule show - list rules
 This command has no arguments.
 
diff --git a/man/man8/tc-pfifo.8 b/man/man8/tc-pfifo.8
new file mode 100644 (file)
index 0000000..e69de29
index 2ddf950..bda37e5 100644 (file)
@@ -28,8 +28,8 @@ lnstat: $(LNSTATOBJ)
 
 install: all
        install -m 0755 -s $(TARGETS) $(DESTDIR)$(SBINDIR)
-       ln -sf $(SBINDIR)/lnstat $(DESTDIR)$(SBINDIR)/rtstat
-       ln -sf $(SBINDIR)/lnstat $(DESTDIR)$(SBINDIR)/ctstat
+       ln -sf lnstat $(DESTDIR)$(SBINDIR)/rtstat
+       ln -sf lnstat $(DESTDIR)$(SBINDIR)/ctstat
 
 clean:
        rm -f *.o $(TARGETS) ssfilter.c
index 85b2a1c..4fd226e 100644 (file)
@@ -180,7 +180,7 @@ int send_probe(int ifindex, __u32 addr)
 {
        struct ifreq ifr;
        struct sockaddr_in dst;
-       int len;
+       socklen_t len;
        unsigned char buf[256];
        struct arphdr *ah = (struct arphdr*)buf;
        unsigned char *p = (unsigned char *)(ah+1);
@@ -228,8 +228,7 @@ int send_probe(int ifindex, __u32 addr)
        memcpy(p, &addr, 4);
        p+=4;
 
-       len = sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr*)&sll, sizeof(sll));
-       if (len < 0)
+       if (sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr*)&sll, sizeof(sll)) < 0)
                return -1;
        stats.probes_sent++;
        return 0;
@@ -480,13 +479,14 @@ void get_arp_pkt(void)
 {
        unsigned char buf[1024];
        struct sockaddr_ll sll;
-       int sll_len = sizeof(sll);
+       socklen_t sll_len = sizeof(sll);
        struct arphdr *a = (struct arphdr*)buf;
        struct dbkey key;
        DBT dbkey, dbdat;
        int n;
 
-       n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, (struct sockaddr*)&sll, &sll_len);
+       n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, 
+                    (struct sockaddr*)&sll, &sll_len);
        if (n < 0) {
                if (errno != EINTR && errno != EAGAIN)
                        syslog(LOG_ERR, "recvfrom: %m");
@@ -708,6 +708,7 @@ int main(int argc, char **argv)
                                fprintf(stderr, "Invalid IP address: \"%s\"\n", ipbuf);
                                goto do_abort;
                        }
+
                        dbdat.data = hexstring_a2n(macbuf, b1, 6);
                        if (dbdat.data == NULL)
                                goto do_abort;
@@ -730,7 +731,7 @@ int main(int argc, char **argv)
                        struct dbkey *key = dbkey.data; 
                        if (handle_if(key->iface)) {
                                if (!IS_NEG(dbdat.data)) {
-                                       __u8 b1[18];
+                                       char b1[18];
                                        printf("%-8d %-15s %s\n",
                                               key->iface,
                                               inet_ntoa(*(struct in_addr*)&key->addr),
index 1379a81..4b87994 100644 (file)
@@ -484,7 +484,7 @@ void update_db(int interval)
 
 void server_loop(int fd)
 {
-       struct timeval snaptime;
+       struct timeval snaptime = { 0 };
        struct pollfd p;
        p.fd = fd;
        p.events = p.revents = POLLIN;
@@ -498,6 +498,7 @@ void server_loop(int fd)
                int status;
                int tdiff;
                struct timeval now;
+
                gettimeofday(&now, NULL);
                tdiff = T_DIFF(now, snaptime);
                if (tdiff >= scan_interval) {
@@ -505,6 +506,7 @@ void server_loop(int fd)
                        snaptime = now;
                        tdiff = 0;
                }
+
                if (poll(&p, 1, tdiff + scan_interval) > 0
                    && (p.revents&POLLIN)) {
                        int clnt = accept(fd, NULL, NULL);
@@ -535,7 +537,8 @@ void server_loop(int fd)
 int verify_forging(int fd)
 {
        struct ucred cred;
-       int olen = sizeof(cred);
+       socklen_t olen = sizeof(cred);
+
        if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
            olen < sizeof(cred))
                return -1;
index 03e6f3f..460540e 100644 (file)
@@ -218,7 +218,7 @@ int main(int argc, char **argv)
                MODE_NORMAL,
        } mode = MODE_NORMAL;
 
-       unsigned long count = 0;
+       unsigned long count = 1;
        static struct field_params fp;
        int num_req_files = 0;
        char *req_files[LNSTAT_MAX_FILES];
@@ -242,9 +242,10 @@ int main(int argc, char **argv)
 
        while ((c = getopt_long(argc, argv,"Vc:df:h?i:k:s:w:", 
                                opts, NULL)) != -1) {
+               int i, len = 0;
+               char *tmp, *tok;
+
                switch (c) {
-                       int i, len = 0;
-                       char *tmp, *tok;
                        case 'c':
                                count = strtoul(optarg, NULL, 0);
                                break;
index 6ff3779..59c5e96 100644 (file)
@@ -52,8 +52,13 @@ static int scan_lines(struct lnstat_file *lf, int i)
                fgets(buf, sizeof(buf)-1, lf->fp); 
                gettimeofday(&lf->last_read, NULL);
 
-               for (j = 0; j < lf->num_fields; j++)
-                       lf->fields[j].values[i] = strtoul(ptr, &ptr, 16);
+               for (j = 0; j < lf->num_fields; j++) {
+                       unsigned long f = strtoul(ptr, &ptr, 16);
+                       if (j == 0) 
+                               lf->fields[j].values[i] = f;
+                       else
+                               lf->fields[j].values[i] += f;
+               }
        }
        return num_lines;
 }
index f2887ec..fc4b03b 100644 (file)
@@ -369,7 +369,7 @@ void update_db(int interval)
 
 void server_loop(int fd)
 {
-       struct timeval snaptime;
+       struct timeval snaptime = { 0 };
        struct pollfd p;
        p.fd = fd;
        p.events = p.revents = POLLIN;
@@ -422,7 +422,8 @@ void server_loop(int fd)
 int verify_forging(int fd)
 {
        struct ucred cred;
-       int olen = sizeof(cred);
+       socklen_t olen = sizeof(cred);
+
        if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
            olen < sizeof(cred))
                return -1;
index 5c6748b..0d8ff7b 100644 (file)
@@ -68,7 +68,7 @@ struct rtacct_data
 
        unsigned long long      val[256*4];
        double                  rate[256*4];
-       __u8                    signature[128];
+       char                    signature[128];
 };
 
 struct rtacct_data kern_db_static;
@@ -363,13 +363,15 @@ void pad_kern_table(struct rtacct_data *dat, __u32 *ival)
 
 void server_loop(int fd)
 {
-       struct timeval snaptime;
+       struct timeval snaptime = { 0 };
        struct pollfd p;
        p.fd = fd;
        p.events = p.revents = POLLIN;
 
-       sprintf(kern_db->signature, "%d.%lu sampling_interval=%d time_const=%d",
-               getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
+       sprintf(kern_db->signature, 
+               "%u.%lu sampling_interval=%d time_const=%d",
+               (unsigned) getpid(), (unsigned long)random(), 
+               scan_interval/1000, time_constant/1000);
 
        pad_kern_table(kern_db, read_kern_table(kern_db->ival));
 
@@ -411,7 +413,8 @@ void server_loop(int fd)
 int verify_forging(int fd)
 {
        struct ucred cred;
-       int olen = sizeof(cred);
+       socklen_t olen = sizeof(cred);
+
        if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
            olen < sizeof(cred))
                return -1;
index 668a5bf..f902560 100644 (file)
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -33,8 +33,9 @@
 #include "libnetlink.h"
 #include "SNAPSHOT.h"
 
+#include <linux/inet_diag.h>
 #include <linux/tcp.h>
-#include <linux/tcp_diag.h>
+#include <net/tcp_states.h>
 
 int resolve_hosts = 0;
 int resolve_services = 1;
@@ -60,6 +61,7 @@ static const char *dg_proto = NULL;
 enum
 {
        TCP_DB,
+       DCCP_DB,
        UDP_DB,
        RAW_DB,
        UNIX_DG_DB,
@@ -730,7 +732,7 @@ int run_ssfilter(struct ssfilter *f, struct tcpstat *s)
 static void ssfilter_patch(char *a, int len, int reloc)
 {
        while (len > 0) {
-               struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)a;
+               struct inet_diag_bc_op *op = (struct inet_diag_bc_op*)a;
                if (op->no == len+4)
                        op->no += reloc;
                len -= op->yes;
@@ -746,7 +748,7 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
                case SSF_S_AUTO:
        {
                if (!(*bytecode=malloc(4))) abort();
-               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_AUTO, 4, 8 };
+               ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_AUTO, 4, 8 };
                return 8;
        }
                case SSF_DCOND:
@@ -755,11 +757,11 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
                struct aafilter *a = (void*)f->pred;
                struct aafilter *b;
                char *ptr;
-               int  code = (f->type == SSF_DCOND ? TCPDIAG_BC_D_COND : TCPDIAG_BC_S_COND);
+               int  code = (f->type == SSF_DCOND ? INET_DIAG_BC_D_COND : INET_DIAG_BC_S_COND);
                int len = 0;
 
                for (b=a; b; b=b->next) {
-                       len += 4 + sizeof(struct tcpdiag_hostcond);
+                       len += 4 + sizeof(struct inet_diag_hostcond);
                        if (a->addr.family == AF_INET6)
                                len += 16;
                        else
@@ -770,20 +772,20 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
                if (!(ptr = malloc(len))) abort();
                *bytecode = ptr;
                for (b=a; b; b=b->next) {
-                       struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op *)ptr;
+                       struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)ptr;
                        int alen = (a->addr.family == AF_INET6 ? 16 : 4);
-                       int oplen = alen + 4 + sizeof(struct tcpdiag_hostcond);
-                       struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(ptr+4);
+                       int oplen = alen + 4 + sizeof(struct inet_diag_hostcond);
+                       struct inet_diag_hostcond *cond = (struct inet_diag_hostcond*)(ptr+4);
 
-                       *op = (struct tcpdiag_bc_op){ code, oplen, oplen+4 };
+                       *op = (struct inet_diag_bc_op){ code, oplen, oplen+4 };
                        cond->family = a->addr.family;
                        cond->port = a->port;
                        cond->prefix_len = a->addr.bitlen;
                        memcpy(cond->addr, a->addr.data, alen);
                        ptr += oplen;
                        if (b->next) {
-                               op = (struct tcpdiag_bc_op *)ptr;
-                               *op = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, len - (ptr-*bytecode)};
+                               op = (struct inet_diag_bc_op *)ptr;
+                               *op = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, len - (ptr-*bytecode)};
                                ptr += 4;
                        }
                }
@@ -793,32 +795,32 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
        {
                struct aafilter *x = (void*)f->pred;
                if (!(*bytecode=malloc(8))) abort();
-               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_D_GE, 8, 12 };
-               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_GE, 8, 12 };
+               ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
                return 8;
        }
                case SSF_D_LE:
        {
                struct aafilter *x = (void*)f->pred;
                if (!(*bytecode=malloc(8))) abort();
-               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_D_LE, 8, 12 };
-               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_LE, 8, 12 };
+               ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
                return 8;
        }
                case SSF_S_GE:
        {
                struct aafilter *x = (void*)f->pred;
                if (!(*bytecode=malloc(8))) abort();
-               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_S_GE, 8, 12 };
-               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_GE, 8, 12 };
+               ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
                return 8;
        }
                case SSF_S_LE:
        {
                struct aafilter *x = (void*)f->pred;
                if (!(*bytecode=malloc(8))) abort();
-               ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_S_LE, 8, 12 };
-               ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port };
+               ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_LE, 8, 12 };
+               ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port };
                return 8;
        }
 
@@ -844,7 +846,7 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
                memcpy(a, a1, l1);
                memcpy(a+l1+4, a2, l2);
                free(a1); free(a2);
-               *(struct tcpdiag_bc_op*)(a+l1) = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, l2+4 };
+               *(struct inet_diag_bc_op*)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, l2+4 };
                *bytecode = a;
                return l1+l2+4;
        }
@@ -855,7 +857,7 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode)
                if (!(a = malloc(l1+4))) abort();
                memcpy(a, a1, l1);
                free(a1);
-               *(struct tcpdiag_bc_op*)(a+l1) = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, 8 };
+               *(struct inet_diag_bc_op*)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, 8 };
                *bytecode = a;
                return l1+4;
        }
@@ -1299,36 +1301,36 @@ static char *sprint_bw(char *buf, double bw)
        return buf;
 }
 
-static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r)
+static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r)
 {
-       struct rtattr * tb[TCPDIAG_MAX+1];
+       struct rtattr * tb[INET_DIAG_MAX+1];
        char b1[64];
        double rtt = 0;
 
-       parse_rtattr(tb, TCPDIAG_MAX, (struct rtattr*)(r+1),
+       parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr*)(r+1),
                     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
 
-       if (tb[TCPDIAG_MEMINFO]) {
-               const struct tcpdiag_meminfo *minfo
-                       = RTA_DATA(tb[TCPDIAG_MEMINFO]);
+       if (tb[INET_DIAG_MEMINFO]) {
+               const struct inet_diag_meminfo *minfo
+                       = RTA_DATA(tb[INET_DIAG_MEMINFO]);
                printf(" mem:(r%u,w%u,f%u,t%u)",
-                      minfo->tcpdiag_rmem,
-                      minfo->tcpdiag_wmem,
-                      minfo->tcpdiag_fmem,
-                      minfo->tcpdiag_tmem);
+                      minfo->idiag_rmem,
+                      minfo->idiag_wmem,
+                      minfo->idiag_fmem,
+                      minfo->idiag_tmem);
        }
 
-       if (tb[TCPDIAG_INFO]) {
+       if (tb[INET_DIAG_INFO]) {
                struct tcp_info *info;
-               int len = RTA_PAYLOAD(tb[TCPDIAG_INFO]);
+               int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
 
                /* workaround for older kernels with less fields */
                if (len < sizeof(*info)) {
                        info = alloca(sizeof(*info));
                        memset(info, 0, sizeof(*info));
-                       memcpy(info, RTA_DATA(tb[TCPDIAG_INFO]), len);
+                       memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
                } else
-                       info = RTA_DATA(tb[TCPDIAG_INFO]);
+                       info = RTA_DATA(tb[INET_DIAG_INFO]);
 
                if (show_options) {
                        if (info->tcpi_options & TCPI_OPT_TIMESTAMPS)
@@ -1338,6 +1340,10 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r)
                        if (info->tcpi_options & TCPI_OPT_ECN)
                                printf(" ecn");
                }
+
+               if (tb[INET_DIAG_CONG])
+                       printf("%s", (char *) RTA_DATA(tb[INET_DIAG_CONG]));
+
                if (info->tcpi_options & TCPI_OPT_WSCALE) 
                        printf(" wscale:%d,%d", info->tcpi_snd_wscale,
                               info->tcpi_rcv_wscale);
@@ -1352,18 +1358,15 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r)
                        printf(" cwnd:%d", info->tcpi_snd_cwnd);
                if (info->tcpi_snd_ssthresh < 0xFFFF)
                        printf(" ssthresh:%d", info->tcpi_snd_ssthresh);
-               
+
                rtt = (double) info->tcpi_rtt;
-               if (tb[TCPDIAG_VEGASINFO]) {
+               if (tb[INET_DIAG_VEGASINFO]) {
                        const struct tcpvegas_info *vinfo
-                               = RTA_DATA(tb[TCPDIAG_VEGASINFO]);
+                               = RTA_DATA(tb[INET_DIAG_VEGASINFO]);
 
-                       if (vinfo->tcpv_enabled)
-                               printf(" vegas");
-
-                       if (vinfo->tcpv_rtt && 
-                           vinfo->tcpv_rtt != 0x7fffffff)
-                           rtt =  vinfo->tcpv_rtt;
+                       if (vinfo->tcpv_enabled && 
+                           vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff)
+                               rtt =  vinfo->tcpv_rtt;
                }
 
                if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) {
@@ -1383,20 +1386,20 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r)
 
 int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f)
 {
-       struct tcpdiagmsg *r = NLMSG_DATA(nlh);
+       struct inet_diag_msg *r = NLMSG_DATA(nlh);
        struct tcpstat s;
 
-       s.state = r->tcpdiag_state;
-       s.local.family = s.remote.family = r->tcpdiag_family;
-       s.lport = ntohs(r->id.tcpdiag_sport);
-       s.rport = ntohs(r->id.tcpdiag_dport);
+       s.state = r->idiag_state;
+       s.local.family = s.remote.family = r->idiag_family;
+       s.lport = ntohs(r->id.idiag_sport);
+       s.rport = ntohs(r->id.idiag_dport);
        if (s.local.family == AF_INET) {
                s.local.bytelen = s.remote.bytelen = 4;
        } else {
                s.local.bytelen = s.remote.bytelen = 16;
        }
-       memcpy(s.local.data, r->id.tcpdiag_src, s.local.bytelen);
-       memcpy(s.remote.data, r->id.tcpdiag_dst, s.local.bytelen);
+       memcpy(s.local.data, r->id.idiag_src, s.local.bytelen);
+       memcpy(s.remote.data, r->id.idiag_dst, s.local.bytelen);
 
        if (f && f->f && run_ssfilter(f->f, &s) == 0)
                return 0;
@@ -1406,33 +1409,33 @@ int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f)
        if (state_width)
                printf("%-*s ", state_width, sstate_name[s.state]);
 
-       printf("%-6d %-6d ", r->tcpdiag_rqueue, r->tcpdiag_wqueue);
+       printf("%-6d %-6d ", r->idiag_rqueue, r->idiag_wqueue);
 
        formatted_print(&s.local, s.lport);
        formatted_print(&s.remote, s.rport);
 
        if (show_options) {
-               if (r->tcpdiag_timer) {
-                       if (r->tcpdiag_timer > 4)
-                               r->tcpdiag_timer = 5;
+               if (r->idiag_timer) {
+                       if (r->idiag_timer > 4)
+                               r->idiag_timer = 5;
                        printf(" timer:(%s,%s,%d)",
-                              tmr_name[r->tcpdiag_timer],
-                              print_ms_timer(r->tcpdiag_expires),
-                              r->tcpdiag_retrans);
+                              tmr_name[r->idiag_timer],
+                              print_ms_timer(r->idiag_expires),
+                              r->idiag_retrans);
                }
        }
        if (show_users) {
                char ubuf[4096];
-               if (find_users(r->tcpdiag_inode, ubuf, sizeof(ubuf)) > 0)
+               if (find_users(r->idiag_inode, ubuf, sizeof(ubuf)) > 0)
                        printf(" users:(%s)", ubuf);
        }
        if (show_details) {
-               if (r->tcpdiag_uid)
-                       printf(" uid:%u", (unsigned)r->tcpdiag_uid);
-               printf(" ino:%u", (unsigned)r->tcpdiag_inode);
-               printf(" sk:%08x", r->id.tcpdiag_cookie[0]);
-               if (r->id.tcpdiag_cookie[1] != 0)
-                       printf("%08x", r->id.tcpdiag_cookie[1]);
+               if (r->idiag_uid)
+                       printf(" uid:%u", (unsigned)r->idiag_uid);
+               printf(" ino:%u", (unsigned)r->idiag_inode);
+               printf(" sk:%08x", r->id.idiag_cookie[0]);
+               if (r->id.idiag_cookie[1] != 0)
+                       printf("%08x", r->id.idiag_cookie[1]);
        }
        if (show_mem || show_tcpinfo) {
                printf("\n\t");
@@ -1442,16 +1445,15 @@ int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f)
        printf("\n");
 
        return 0;
-
 }
 
-int tcp_show_netlink(struct filter *f, FILE *dump_fp)
+int tcp_show_netlink(struct filter *f, FILE *dump_fp, int socktype)
 {
        int fd;
        struct sockaddr_nl nladdr;
        struct {
                struct nlmsghdr nlh;
-               struct tcpdiagreq r;
+               struct inet_diag_req r;
        } req;
        char    *bc = NULL;
        int     bclen;
@@ -1460,32 +1462,36 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp)
        char    buf[8192];
        struct iovec iov[3];
 
-       if ((fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_TCPDIAG)) < 0)
+       if ((fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_INET_DIAG)) < 0)
                return -1;
 
        memset(&nladdr, 0, sizeof(nladdr));
        nladdr.nl_family = AF_NETLINK;
 
        req.nlh.nlmsg_len = sizeof(req);
-       req.nlh.nlmsg_type = TCPDIAG_GETSOCK;
+       req.nlh.nlmsg_type = socktype;
        req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
        req.nlh.nlmsg_pid = 0;
        req.nlh.nlmsg_seq = 123456;
        memset(&req.r, 0, sizeof(req.r));
-       req.r.tcpdiag_family = AF_INET;
-       req.r.tcpdiag_states = f->states;
+       req.r.idiag_family = AF_INET;
+       req.r.idiag_states = f->states;
        if (show_mem)
-               req.r.tcpdiag_ext |= (1<<(TCPDIAG_MEMINFO-1)); 
+               req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1)); 
 
        if (show_tcpinfo) {
-               req.r.tcpdiag_ext |= (1<<(TCPDIAG_INFO-1));
-               req.r.tcpdiag_ext |= (1<<(TCPDIAG_VEGASINFO-1));
+               req.r.idiag_ext |= (1<<(INET_DIAG_INFO-1));
+               req.r.idiag_ext |= (1<<(INET_DIAG_VEGASINFO-1));
+               req.r.idiag_ext |= (1<<(INET_DIAG_CONG-1));
        }
 
-       iov[0] = (struct iovec){ &req, sizeof(req) };
+       iov[0] = (struct iovec){ 
+               .iov_base = &req, 
+               .iov_len = sizeof(req) 
+       };
        if (f->f) {
                bclen = ssfilter_bytecompile(f->f, &bc);
-               rta.rta_type = TCPDIAG_REQ_BYTECODE;
+               rta.rta_type = INET_DIAG_REQ_BYTECODE;
                rta.rta_len = RTA_LENGTH(bclen);
                iov[1] = (struct iovec){ &rta, sizeof(rta) };
                iov[2] = (struct iovec){ bc, bclen };
@@ -1493,17 +1499,19 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp)
        }
 
        msg = (struct msghdr) {
-               (void*)&nladdr, sizeof(nladdr),
-               iov,    f->f ? 3 : 1,
-               NULL,   0,
-               0
+               .msg_name = (void*)&nladdr, 
+               .msg_namelen = sizeof(nladdr),
+               .msg_iov = iov, 
+               .msg_iovlen = f->f ? 3 : 1,
        };
 
        if (sendmsg(fd, &msg, 0) < 0)
                return -1;
 
-
-       iov[0] = (struct iovec){ buf, sizeof(buf) };
+       iov[0] = (struct iovec){ 
+               .iov_base = buf, 
+               .iov_len = sizeof(buf) 
+       };
 
        while (1) {
                int status;
@@ -1629,7 +1637,7 @@ int tcp_show_netlink_file(struct filter *f)
        }
 }
 
-int tcp_show(struct filter *f)
+int tcp_show(struct filter *f, int socktype)
 {
        int fd = -1;
        char *buf = NULL;
@@ -1641,7 +1649,7 @@ int tcp_show(struct filter *f)
                return tcp_show_netlink_file(f);
 
        if (!getenv("PROC_NET_TCP") && !getenv("PROC_ROOT")
-           && tcp_show_netlink(f, NULL) == 0)
+           && tcp_show_netlink(f, NULL, socktype) == 0)
                return 0;
 
        /* Sigh... We have to parse /proc/net/tcp... */
@@ -2383,6 +2391,7 @@ static void usage(void)
 "   -0, --packet       display PACKET sockets\n"
 "   -t, --tcp          display only TCP sockets\n"
 "   -u, --udp          display only UDP sockets\n"
+"   -d, --dccp         display only DCCP sockets\n"
 "   -w, --raw          display only RAW sockets\n"
 "   -x, --unix         display only Unix domain sockets\n"
 "   -f, --family=FAMILY display sockets of type FAMILY\n"
@@ -2432,6 +2441,7 @@ static const struct option long_opts[] = {
        { "memory", 0, 0, 'm' },
        { "info", 0, 0, 'i' },
        { "processes", 0, 0, 'p' },
+       { "dccp", 0, 0, 'd' },
        { "tcp", 0, 0, 't' },
        { "udp", 0, 0, 'u' },
        { "raw", 0, 0, 'w' },
@@ -2466,7 +2476,7 @@ int main(int argc, char *argv[])
 
        current_filter.states = default_filter.states;
 
-       while ((ch = getopt_long(argc, argv, "haletuwxnro460spf:miA:D:F:vV",
+       while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spf:miA:D:F:vV",
                                 long_opts, NULL)) != EOF) {
                switch(ch) {
                case 'n':
@@ -2491,6 +2501,10 @@ int main(int argc, char *argv[])
                case 'p':
                        show_users++;
                        break;
+               case 'd':
+                       current_filter.dbs |= (1<<DCCP_DB);
+                       do_default = 0;
+                       break;
                case 't':
                        current_filter.dbs |= (1<<TCP_DB);
                        do_default = 0;
@@ -2555,9 +2569,11 @@ int main(int argc, char *argv[])
                                if (strcmp(p, "all") == 0) {
                                        current_filter.dbs = ALL_DB;
                                } else if (strcmp(p, "inet") == 0) {
-                                       current_filter.dbs |= (1<<TCP_DB)|(1<<UDP_DB)|(1<<RAW_DB);
+                                       current_filter.dbs |= (1<<TCP_DB)|(1<<DCCP_DB)|(1<<UDP_DB)|(1<<RAW_DB);
                                } else if (strcmp(p, "udp") == 0) {
                                        current_filter.dbs |= (1<<UDP_DB);
+                               } else if (strcmp(p, "dccp") == 0) {
+                                       current_filter.dbs |= (1<<DCCP_DB);
                                } else if (strcmp(p, "tcp") == 0) {
                                        current_filter.dbs |= (1<<TCP_DB);
                                } else if (strcmp(p, "raw") == 0) {
@@ -2680,7 +2696,7 @@ int main(int argc, char *argv[])
        }
 
        if (resolve_services && resolve_hosts &&
-           (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB))))
+           (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB))))
                init_service_resolver();
 
        /* Now parse filter... */
@@ -2729,7 +2745,7 @@ int main(int argc, char *argv[])
                                exit(-1);
                        }
                }
-               tcp_show_netlink(&current_filter, dump_fp);
+               tcp_show_netlink(&current_filter, dump_fp, TCPDIAG_GETSOCK);
                fflush(dump_fp);
                exit(0);
        }
@@ -2798,6 +2814,8 @@ int main(int argc, char *argv[])
        if (current_filter.dbs & (1<<UDP_DB))
                udp_show(&current_filter);
        if (current_filter.dbs & (1<<TCP_DB))
-               tcp_show(&current_filter);
+               tcp_show(&current_filter, TCPDIAG_GETSOCK);
+       if (current_filter.dbs & (1<<DCCP_DB))
+               tcp_show(&current_filter, DCCPDIAG_GETSOCK);
        return 0;
 }
index 881ed9f..59c7e08 100644 (file)
@@ -1,16 +1,19 @@
 DISTGEN = maketable normal pareto paretonormal
 DISTDATA = normal.dist pareto.dist paretonormal.dist experimental.dist
 
+HOSTCC ?= $(CC)
 LDLIBS += -lm 
 
-%.dist: %.c
-       $(HOSTCC) $(CFLAGS) -o $* $< -lm
-       ./$* >$@
+all: $(DISTGEN) $(DISTDATA)
 
-%.dist: %.dat
-       ./maketable $< >$@
+$(DISTGEN):
+       $(HOSTCC) $(CCOPTS) -I../include -o $@ $@.c -lm
 
-all: $(DISTGEN) $(DISTDATA)
+%.dist: %
+       ./$* > $@
+
+experimental.dist: maketable experimental.dat
+       ./maketable experimental.dat > experimental.dist
 
 install: all
        mkdir -p $(DESTDIR)/usr/lib/tc
@@ -20,8 +23,3 @@ install: all
 
 clean:
        rm -f $(DISTDATA) $(DISTGEN)
-
-maketable: maketable.c
-       $(HOSTCC) $(CFLAGS) -o $@ $< -lm
-
-
index e6683db..dbdebb1 100644 (file)
@@ -20,21 +20,16 @@ normal(double x, double mu, double sigma)
        return .5 + .5*erf((x-mu)/(sqrt(2.0)*sigma));
 }
 
+
 int
 main(int argc, char **argv)
 {
-       double x, *table;
        int i, n;
-
-       table = calloc(sizeof(double), TABLESIZE+1);
-       if (!table) {
-               fprintf(stderr, "Not enough memory\n");
-               return 1;
-       }
-
+       double x;
+       double table[TABLESIZE+1];
 
        for (x = -10.0; x < 10.05; x += .00005) {
-               i = (int)rint(TABLESIZE*normal(x, 0.0, 1.0));
+               i = rint(TABLESIZE * normal(x, 0.0, 1.0));
                table[i] = x;
        }
 
@@ -51,6 +46,6 @@ main(int argc, char **argv)
                        n = 0;
                }
        }
-       free(table);
+
        return 0;
 }
index c793df6..ed75f28 100644 (file)
@@ -29,7 +29,6 @@ normal(double x, double mu, double sigma)
        return .5 + .5*erf((x-mu)/(sqrt(2.0)*sigma));
 }
 
-
 static const double a=3.0;
 
 static int
@@ -50,18 +49,12 @@ paretovalue(int i)
 int
 main(int argc, char **argv)
 {
-       double x;
-       double *table;
        int i,n;
-
-       table = calloc(TABLESIZE+1, sizeof(double));
-       if (!table) {
-               fprintf(stderr, "Out of memory!\n");
-               exit(1);
-       }
+       double x;
+       double table[TABLESIZE+1];
 
        for (x = -10.0; x < 10.05; x += .00005) {
-               i = (int)rint(TABLESIZE*normal(x, 0.0, 1.0));
+               i = rint(TABLESIZE*normal(x, 0.0, 1.0));
                table[i] = x;
        }
        printf(
@@ -84,7 +77,6 @@ main(int argc, char **argv)
                        n = 0;
                }
        }
-       free(table);
 
        return 0;
 }
index 06546f9..9d618ff 100644 (file)
@@ -1,5 +1,6 @@
 TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o \
-       m_police.o m_estimator.o m_action.o
+       m_police.o m_estimator.o m_action.o m_ematch.o \
+       emp_ematch.yacc.o emp_ematch.lex.o
 
 include ../Config
 
@@ -14,6 +15,7 @@ TCMODULES += f_rsvp.o
 TCMODULES += f_u32.o
 TCMODULES += f_route.o
 TCMODULES += f_fw.o
+TCMODULES += f_basic.o
 TCMODULES += q_dsmark.o
 TCMODULES += q_gred.o
 TCMODULES += f_tcindex.o
@@ -28,6 +30,10 @@ TCMODULES += p_ip.o
 TCMODULES += p_icmp.o
 TCMODULES += p_tcp.o
 TCMODULES += p_udp.o
+TCMODULES += em_nbyte.o
+TCMODULES += em_cmp.o
+TCMODULES += em_u32.o
+TCMODULES += em_meta.o
 
 TCOBJ += $(TCMODULES)
 
@@ -48,6 +54,9 @@ LDLIBS += -L. -ltc -lm -ldl
 
 LDFLAGS += -Wl,-export-dynamic
 
+YACC := bison
+LEX := flex
+
 %.so: %.c
        $(CC) $(CFLAGS) -shared -fpic $< -o $@
 
@@ -67,8 +76,14 @@ install: all
        done
 
 clean:
-       rm -f $(TCOBJ) $(TCLIB) libtc.a tc *.so
+       rm -f $(TCOBJ) $(TCLIB) libtc.a tc *.so emp_ematch.yacc.h; \
+       rm -f emp_ematch.yacc.output
 
 q_atm.so: q_atm.c
        $(CC) $(CFLAGS) -shared -fpic -o q_atm.so q_atm.c -latm
 
+%.yacc.c: %.y
+       $(YACC) $(YACCFLAGS) -o $@ $<
+
+%.lex.c: %.l
+       $(LEX) $(LEXFLAGS) -o$@ $<
diff --git a/tc/em_cmp.c b/tc/em_cmp.c
new file mode 100644 (file)
index 0000000..c636c53
--- /dev/null
@@ -0,0 +1,188 @@
+/*
+ * em_cmp.c            Simle coparison Ematch
+ *
+ *             This program is free software; you can distribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <errno.h>
+
+#include "m_ematch.h"
+#include <linux/tc_ematch/tc_em_cmp.h>
+
+extern struct ematch_util cmp_ematch_util;
+
+static void cmp_print_usage(FILE *fd)
+{
+       fprintf(fd,
+           "Usage: cmp(ALIGN at OFFSET [ ATTRS ] { eq | lt | gt } VALUE)\n" \
+           "where: ALIGN  := { u8 | u16 | u32 }\n" \
+           "       ATTRS  := [ layer LAYER ] [ mask MASK ] [ trans ]\n" \
+           "       LAYER  := { link | header | next-header | 0..%d }\n" \
+           "\n" \
+           "Example: cmp(u16 at 3 layer 2 mask 0xff00 gt 20)\n",
+           TCF_LAYER_MAX);
+}
+
+static int cmp_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr,
+                         struct bstr *args)
+{
+       struct bstr *a;
+       int align, opnd = 0;
+       unsigned long offset = 0, layer = TCF_LAYER_NETWORK, mask = 0, value = 0;
+       int offset_present = 0, value_present = 0;
+       struct tcf_em_cmp cmp;
+
+       memset(&cmp, 0, sizeof(cmp));
+
+#define PARSE_ERR(CARG, FMT, ARGS...) \
+       em_parse_error(EINVAL, args, CARG, &cmp_ematch_util, FMT ,##ARGS)
+
+       if (args == NULL)
+               return PARSE_ERR(args, "cmp: missing arguments");
+
+       if (!bstrcmp(args, "u8"))
+               align = TCF_EM_ALIGN_U8;
+       else if (!bstrcmp(args, "u16"))
+               align = TCF_EM_ALIGN_U16;
+       else if (!bstrcmp(args, "u32"))
+               align = TCF_EM_ALIGN_U32;
+       else
+               return PARSE_ERR(args, "cmp: invalid alignment");
+       
+       for (a = bstr_next(args); a; a = bstr_next(a)) {
+               if (!bstrcmp(a, "at")) {
+                       if (a->next == NULL)
+                               return PARSE_ERR(a, "cmp: missing argument");
+                       a = bstr_next(a);
+
+                       offset = bstrtoul(a);
+                       if (offset == ULONG_MAX)
+                               return PARSE_ERR(a, "cmp: invalid offset, " \
+                                   "must be numeric");
+
+                       offset_present = 1;
+               } else if (!bstrcmp(a, "layer")) {
+                       if (a->next == NULL)
+                               return PARSE_ERR(a, "cmp: missing argument");
+                       a = bstr_next(a);
+
+                       layer = parse_layer(a);
+                       if (layer == INT_MAX) {
+                               layer = bstrtoul(a);
+                               if (layer == ULONG_MAX)
+                                       return PARSE_ERR(a, "cmp: invalid " \
+                                           "layer");
+                       }
+
+                       if (layer > TCF_LAYER_MAX)
+                               return PARSE_ERR(a, "cmp: illegal layer, " \
+                                   "must be in 0..%d", TCF_LAYER_MAX);
+               } else if (!bstrcmp(a, "mask")) {
+                       if (a->next == NULL)
+                               return PARSE_ERR(a, "cmp: missing argument");
+                       a = bstr_next(a);
+
+                       mask = bstrtoul(a);
+                       if (mask == ULONG_MAX)
+                               return PARSE_ERR(a, "cmp: invalid mask");
+               } else if (!bstrcmp(a, "trans")) {
+                       cmp.flags |= TCF_EM_CMP_TRANS;
+               } else if (!bstrcmp(a, "eq") || !bstrcmp(a, "gt") ||
+                   !bstrcmp(a, "lt")) {
+
+                       if (!bstrcmp(a, "eq"))
+                               opnd = TCF_EM_OPND_EQ;
+                       else if (!bstrcmp(a, "gt"))
+                               opnd = TCF_EM_OPND_GT;
+                       else if (!bstrcmp(a, "lt"))
+                               opnd = TCF_EM_OPND_LT;
+                       
+                       if (a->next == NULL)
+                               return PARSE_ERR(a, "cmp: missing argument");
+                       a = bstr_next(a);
+
+                       value = bstrtoul(a);
+                       if (value == ULONG_MAX)
+                               return PARSE_ERR(a, "cmp: invalid value");
+
+                       value_present = 1;
+               } else
+                       return PARSE_ERR(a, "nbyte: unknown parameter");
+       }
+
+       if (offset_present == 0 || value_present == 0)
+               return PARSE_ERR(a, "cmp: offset and value required");
+
+       cmp.val = (__u32) value;
+       cmp.mask = (__u32) mask;
+       cmp.off = (__u16) offset;
+       cmp.align = (__u8) align;
+       cmp.layer = (__u8) layer;
+       cmp.opnd = (__u8) opnd;
+
+       addraw_l(n, MAX_MSG, hdr, sizeof(*hdr));
+       addraw_l(n, MAX_MSG, &cmp, sizeof(cmp));
+
+#undef PARSE_ERR
+       return 0;
+}
+
+static int cmp_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data,
+                         int data_len)
+{
+       struct tcf_em_cmp *cmp = data;
+
+       if (data_len < sizeof(*cmp)) {
+               fprintf(stderr, "CMP header size mismatch\n");
+               return -1;
+       }
+
+       if (cmp->align == TCF_EM_ALIGN_U8)
+               fprintf(fd, "u8 ");
+       else if (cmp->align == TCF_EM_ALIGN_U16)
+               fprintf(fd, "u16 ");
+       else if (cmp->align == TCF_EM_ALIGN_U16)
+               fprintf(fd, "u32 ");
+
+       fprintf(fd, "at %d layer %d ", cmp->off, cmp->layer);
+
+       if (cmp->mask)
+               fprintf(fd, "mask 0x%x ", cmp->mask);
+
+       if (cmp->flags & TCF_EM_CMP_TRANS)
+               fprintf(fd, "trans ");
+
+       if (cmp->opnd == TCF_EM_OPND_EQ)
+               fprintf(fd, "eq ");
+       else if (cmp->opnd == TCF_EM_OPND_LT)
+               fprintf(fd, "lt ");
+       else if (cmp->opnd == TCF_EM_OPND_GT)
+               fprintf(fd, "gt ");
+
+       fprintf(fd, "%d", cmp->val);
+
+       return 0;
+}
+
+struct ematch_util cmp_ematch_util = {
+       .kind = "cmp",
+       .kind_num = TCF_EM_CMP,
+       .parse_eopt = cmp_parse_eopt,
+       .print_eopt = cmp_print_eopt,
+       .print_usage = cmp_print_usage
+};
diff --git a/tc/em_meta.c b/tc/em_meta.c
new file mode 100644 (file)
index 0000000..bd1e151
--- /dev/null
@@ -0,0 +1,550 @@
+/*
+ * em_meta.c           Metadata Ematch
+ *
+ *             This program is free software; you can distribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <errno.h>
+
+#include "m_ematch.h"
+#include <linux/tc_ematch/tc_em_meta.h>
+
+extern struct ematch_util meta_ematch_util;
+
+static void meta_print_usage(FILE *fd)
+{
+       fprintf(fd,
+           "Usage: meta(OBJECT { eq | lt | gt } OBJECT)\n" \
+           "where: OBJECT  := { META_ID | VALUE }\n" \
+           "       META_ID := id [ shift SHIFT ] [ mask MASK ]\n" \
+           "\n" \
+           "Example: meta(nfmark gt 24)\n" \
+           "         meta(indev shift 1 eq \"ppp\"\n" \
+           "         meta(tcindex mask 0xf0 eq 0xf0)\n" \
+           "         meta(dev eq indev)\n" \
+           "\n" \
+           "For a list of meta identifiers, use meta(list).\n");
+}
+
+struct meta_entry {
+       int             id;
+       char *          kind;
+       char *          mask;
+       char *          desc;
+} meta_table[] = {
+#define TCF_META_ID_SECTION 0
+#define __A(id, name, mask, desc) { TCF_META_ID_##id, name, mask, desc }
+       __A(SECTION,            "Generic", "", ""),
+       __A(RANDOM,             "random",       "i",
+                               "Random value (32 bit)"),
+       __A(LOADAVG_0,          "loadavg_1",    "i",
+                               "Load average in last minute"),
+       __A(LOADAVG_1,          "loadavg_5",    "i",
+                               "Load average in last 5 minutes"),
+       __A(LOADAVG_2,          "loadavg_15",   "i",
+                               "Load average in last 15 minutes"),
+
+       __A(SECTION,            "Interfaces", "", ""),
+       __A(DEV,                "dev",          "iv",
+                               "Device the packet is on"),
+       __A(SECTION,            "Packet attributes", "", ""),
+       __A(PRIORITY,           "priority",     "i",
+                               "Priority of packet"),
+       __A(PROTOCOL,           "protocol",     "i",
+                               "Link layer protocol"),
+       __A(PKTTYPE,            "pkt_type",     "i",
+                               "Packet type (uni|multi|broad|...)cast"),
+       __A(PKTLEN,             "pkt_len",      "i",
+                               "Length of packet"),
+       __A(DATALEN,            "data_len",     "i",
+                               "Length of data in packet"),
+       __A(MACLEN,             "mac_len",      "i",
+                               "Length of link layer header"),
+
+       __A(SECTION,            "Netfilter", "", ""),
+       __A(NFMARK,             "nf_mark",      "i",
+                               "Netfilter mark"),
+       __A(NFMARK,             "fwmark",       "i",
+                               "Alias for nf_mark"),
+
+       __A(SECTION,            "Traffic Control", "", ""),
+       __A(TCINDEX,            "tc_index",     "i",    "TC Index"),
+       __A(SECTION,            "Routing", "", ""),
+       __A(RTCLASSID,          "rt_classid",   "i",
+                               "Routing ClassID (cls_route)"),
+       __A(RTIIF,              "rt_iif",       "i",
+                               "Incoming interface index"),
+
+       __A(SECTION,            "Sockets", "", ""),
+       __A(SK_FAMILY,          "sk_family",    "i",    "Address family"),
+       __A(SK_STATE,           "sk_state",     "i",    "State"),
+       __A(SK_REUSE,           "sk_reuse",     "i",    "Reuse Flag"),
+       __A(SK_BOUND_IF,        "sk_bind_if",   "iv",   "Bound interface"),
+       __A(SK_REFCNT,          "sk_refcnt",    "i",    "Reference counter"),
+       __A(SK_SHUTDOWN,        "sk_shutdown",  "i",    "Shutdown mask"),
+       __A(SK_PROTO,           "sk_proto",     "i",    "Protocol"),
+       __A(SK_TYPE,            "sk_type",      "i",    "Type"),
+       __A(SK_RCVBUF,          "sk_rcvbuf",    "i",    "Receive buffer size"),
+       __A(SK_RMEM_ALLOC,      "sk_rmem",      "i",    "RMEM"),
+       __A(SK_WMEM_ALLOC,      "sk_wmem",      "i",    "WMEM"),
+       __A(SK_OMEM_ALLOC,      "sk_omem",      "i",    "OMEM"),
+       __A(SK_WMEM_QUEUED,     "sk_wmem_queue","i",    "WMEM queue"),
+       __A(SK_SND_QLEN,        "sk_snd_queue", "i",    "Send queue length"),
+       __A(SK_RCV_QLEN,        "sk_rcv_queue", "i",    "Receive queue length"),
+       __A(SK_ERR_QLEN,        "sk_err_queue", "i",    "Error queue length"),
+       __A(SK_FORWARD_ALLOCS,  "sk_fwd_alloc", "i",    "Forward allocations"),
+       __A(SK_SNDBUF,          "sk_sndbuf",    "i",    "Send buffer size"),
+#undef __A
+};
+
+static inline int map_type(char k)
+{
+       switch (k) {
+               case 'i': return TCF_META_TYPE_INT;
+               case 'v': return TCF_META_TYPE_VAR;
+       }
+
+       fprintf(stderr, "BUG: Unknown map character '%c'\n", k);
+       return INT_MAX;
+}
+
+static struct meta_entry * lookup_meta_entry(struct bstr *kind)
+{
+       int i;
+
+       for (i = 0; i < (sizeof(meta_table)/sizeof(meta_table[0])); i++)
+               if (!bstrcmp(kind, meta_table[i].kind) &&
+                   meta_table[i].id != 0)
+                       return &meta_table[i];
+       
+       return NULL;
+}
+
+static struct meta_entry * lookup_meta_entry_byid(int id)
+{
+       int i;
+
+       for (i = 0; i < (sizeof(meta_table)/sizeof(meta_table[0])); i++)
+               if (meta_table[i].id == id)
+                       return &meta_table[i];
+       
+       return NULL;
+}
+
+static inline void dump_value(struct nlmsghdr *n, int tlv, unsigned long val,
+                             struct tcf_meta_val *hdr)
+{
+       __u32 t;
+
+       switch (TCF_META_TYPE(hdr->kind)) {
+               case TCF_META_TYPE_INT:
+                       t = val;
+                       addattr_l(n, MAX_MSG, tlv, &t, sizeof(t));
+                       break;
+
+               case TCF_META_TYPE_VAR:
+                       if (TCF_META_ID(hdr->kind) == TCF_META_ID_VALUE) {
+                               struct bstr *a = (struct bstr *) val;
+                               addattr_l(n, MAX_MSG, tlv, a->data, a->len);
+                       }
+                       break;
+       }
+}
+
+static inline int is_compatible(struct tcf_meta_val *what,
+                               struct tcf_meta_val *needed)
+{
+       char *p;
+       struct meta_entry *entry;
+       
+       entry = lookup_meta_entry_byid(TCF_META_ID(what->kind));
+
+       if (entry == NULL)
+               return 0;
+       
+       for (p = entry->mask; p; p++)
+               if (map_type(*p) == TCF_META_TYPE(needed->kind))
+                       return 1;
+
+       return 0;
+}
+
+static void list_meta_ids(FILE *fd)
+{
+       int i;
+
+       fprintf(fd,
+           "--------------------------------------------------------\n" \
+           "  ID               Type       Description\n" \
+           "--------------------------------------------------------");
+
+       for (i = 0; i < (sizeof(meta_table)/sizeof(meta_table[0])); i++) {
+               if (meta_table[i].id == TCF_META_ID_SECTION) {
+                       fprintf(fd, "\n%s:\n", meta_table[i].kind);
+               } else {
+                       char *p = meta_table[i].mask;
+                       char buf[64] = {0};
+
+                       fprintf(fd, "  %-16s ", meta_table[i].kind);
+
+                       while (*p) {
+                               int type = map_type(*p);
+
+                               switch (type) {
+                                       case TCF_META_TYPE_INT:
+                                               strcat(buf, "INT");
+                                               break;
+
+                                       case TCF_META_TYPE_VAR:
+                                               strcat(buf, "VAR");
+                                               break;
+                               }
+
+                               if (*(++p))
+                                       strcat(buf, ",");
+                       }
+
+                       fprintf(fd, "%-10s %s\n", buf, meta_table[i].desc);
+               }
+       }
+
+       fprintf(fd,
+           "--------------------------------------------------------\n");
+}
+
+#undef TCF_META_ID_SECTION
+
+#define PARSE_FAILURE ((void *) (-1))
+
+#define PARSE_ERR(CARG, FMT, ARGS...) \
+       em_parse_error(EINVAL, args, CARG, &meta_ematch_util, FMT ,##ARGS)
+
+static inline int can_adopt(struct tcf_meta_val *val)
+{
+       return !!TCF_META_ID(val->kind);
+}
+
+static inline int overwrite_type(struct tcf_meta_val *src,
+                                struct tcf_meta_val *dst)
+{
+       return (TCF_META_TYPE(dst->kind) << 12) | TCF_META_ID(src->kind);
+}
+       
+
+static inline struct bstr *
+parse_object(struct bstr *args, struct bstr *arg, struct tcf_meta_val *obj,
+            unsigned long *dst, struct tcf_meta_val *left)
+{
+       struct meta_entry *entry;
+       unsigned long num;
+       struct bstr *a;
+
+       if (arg->quoted) {
+               obj->kind = TCF_META_TYPE_VAR << 12;
+               obj->kind |= TCF_META_ID_VALUE;
+               *dst = (unsigned long) arg;
+               return bstr_next(arg);
+       }
+
+       num = bstrtoul(arg);
+       if (num != LONG_MAX) {
+               obj->kind = TCF_META_TYPE_INT << 12;
+               obj->kind |= TCF_META_ID_VALUE;
+               *dst = (unsigned long) num;
+               return bstr_next(arg);
+       }
+
+       entry = lookup_meta_entry(arg);
+
+       if (entry == NULL) {
+               PARSE_ERR(arg, "meta: unknown meta id\n");
+               return PARSE_FAILURE;
+       }
+
+       obj->kind = entry->id | (map_type(entry->mask[0]) << 12);
+
+       if (left) {
+               struct tcf_meta_val *right = obj;
+               
+               if (TCF_META_TYPE(right->kind) == TCF_META_TYPE(left->kind))
+                       goto compatible;
+
+               if (can_adopt(left) && !can_adopt(right)) {
+                       if (is_compatible(left, right))
+                               left->kind = overwrite_type(left, right);
+                       else
+                               goto not_compatible;
+               } else if (can_adopt(right) && !can_adopt(left)) {
+                       if (is_compatible(right, left))
+                               right->kind = overwrite_type(right, left);
+                       else
+                               goto not_compatible;
+               } else if (can_adopt(left) && can_adopt(right)) {
+                       if (is_compatible(left, right))
+                               left->kind = overwrite_type(left, right);
+                       else if (is_compatible(right, left))
+                               right->kind = overwrite_type(right, left);
+                       else
+                               goto not_compatible;
+               } else 
+                       goto not_compatible;
+       }
+
+compatible:
+
+       a = bstr_next(arg);
+
+       while(a) {
+               if (!bstrcmp(a, "shift")) {
+                       unsigned long shift;
+
+                       if (a->next == NULL) {
+                               PARSE_ERR(a, "meta: missing argument");
+                               return PARSE_FAILURE;
+                       }
+                       a = bstr_next(a);
+                       
+                       shift = bstrtoul(a);
+                       if (shift == LONG_MAX) {
+                               PARSE_ERR(a, "meta: invalid shift, must " \
+                                   "be numeric");
+                               return PARSE_FAILURE;
+                       }
+
+                       obj->shift = (__u8) shift;
+                       a = bstr_next(a);
+               } else if (!bstrcmp(a, "mask")) {
+                       unsigned long mask;
+
+                       if (a->next == NULL) {
+                               PARSE_ERR(a, "meta: missing argument");
+                               return PARSE_FAILURE;
+                       }
+                       a = bstr_next(a);
+                       
+                       mask = bstrtoul(a);
+                       if (mask == LONG_MAX) {
+                               PARSE_ERR(a, "meta: invalid mask, must be " \
+                                   "numeric");
+                               return PARSE_FAILURE;
+                       }
+                       *dst = (unsigned long) mask;
+                       a = bstr_next(a);
+               } else
+                       break;
+       }
+
+       return a;
+
+not_compatible:
+       PARSE_ERR(arg, "lvalue and rvalue are not compatible.");
+       return PARSE_FAILURE;
+}
+
+static int meta_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr,
+                          struct bstr *args)
+{
+       int opnd;
+       struct bstr *a;
+       struct tcf_meta_hdr meta_hdr;
+       unsigned long lvalue = 0, rvalue = 0;
+
+       memset(&meta_hdr, 0, sizeof(meta_hdr));
+
+       if (args == NULL)
+               return PARSE_ERR(args, "meta: missing arguments");
+
+       if (!bstrcmp(args, "list")) {
+               list_meta_ids(stderr);
+               return -1;
+       }
+
+       a = parse_object(args, args, &meta_hdr.left, &lvalue, NULL);
+       if (a == PARSE_FAILURE)
+               return -1;
+       else if (a == NULL)
+               return PARSE_ERR(args, "meta: missing operand");
+
+       if (!bstrcmp(a, "eq"))
+               opnd = TCF_EM_OPND_EQ;
+       else if (!bstrcmp(a, "gt"))
+               opnd = TCF_EM_OPND_GT;
+       else if (!bstrcmp(a, "lt"))
+               opnd = TCF_EM_OPND_LT;
+       else
+               return PARSE_ERR(a, "meta: invalid operand");
+
+       meta_hdr.left.op = (__u8) opnd;
+
+       if (a->next == NULL)
+               return PARSE_ERR(args, "meta: missing rvalue");
+       a = bstr_next(a);
+
+       a = parse_object(args, a, &meta_hdr.right, &rvalue, &meta_hdr.left);
+       if (a == PARSE_FAILURE)
+               return -1;
+       else if (a != NULL)
+               return PARSE_ERR(a, "meta: unexpected trailer");
+       
+
+       addraw_l(n, MAX_MSG, hdr, sizeof(*hdr));
+
+       addattr_l(n, MAX_MSG, TCA_EM_META_HDR, &meta_hdr, sizeof(meta_hdr));
+
+       if (lvalue)
+               dump_value(n, TCA_EM_META_LVALUE, lvalue, &meta_hdr.left);
+
+       if (rvalue)
+               dump_value(n, TCA_EM_META_RVALUE, rvalue, &meta_hdr.right);
+
+       return 0;
+}
+#undef PARSE_ERR
+
+static inline void print_binary(FILE *fd, unsigned char *str, int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               if (!isprint(str[i]))
+                       goto binary;
+
+       for (i = 0; i < len; i++)
+               fprintf(fd, "%c", str[i]);
+       return;
+
+binary:
+       for (i = 0; i < len; i++)
+               fprintf(fd, "%02x ", str[i]);
+
+       fprintf(fd, "\"");
+       for (i = 0; i < len; i++)
+               fprintf(fd, "%c", isprint(str[i]) ? str[i] : '.');
+       fprintf(fd, "\"");
+}
+
+static inline int print_value(FILE *fd, int type, struct rtattr *rta)
+{
+       if (rta == NULL) {
+               fprintf(stderr, "Missing value TLV\n");
+               return -1;
+       }
+
+       switch(type) {
+               case TCF_META_TYPE_INT:
+                       if (RTA_PAYLOAD(rta) < sizeof(__u32)) {
+                               fprintf(stderr, "meta int type value TLV " \
+                                   "size mismatch.\n");
+                               return -1;
+                       }
+                       fprintf(fd, "%d", *(__u32 *) RTA_DATA(rta));
+                       break;
+
+               case TCF_META_TYPE_VAR:
+                       print_binary(fd, RTA_DATA(rta), RTA_PAYLOAD(rta));
+                       break;
+       }
+
+       return 0;
+}
+
+static int print_object(FILE *fd, struct tcf_meta_val *obj, struct rtattr *rta)
+{
+       int id = TCF_META_ID(obj->kind);
+       int type = TCF_META_TYPE(obj->kind);
+       struct meta_entry *entry;
+
+       if (id == TCF_META_ID_VALUE)
+               return print_value(fd, type, rta);
+
+       entry = lookup_meta_entry_byid(id);
+
+       if (entry == NULL)
+               fprintf(fd, "[unknown meta id %d]", id);
+       else
+               fprintf(fd, "%s", entry->kind);
+
+       if (obj->shift)
+               fprintf(fd, " shift %d", obj->shift);
+
+       switch (type) {
+               case TCF_META_TYPE_INT:
+                       if (rta) {
+                               if (RTA_PAYLOAD(rta) < sizeof(__u32))
+                                       goto size_mismatch;
+
+                               fprintf(fd, " mask 0x%08x",
+                                   *(__u32*) RTA_DATA(rta));
+                       }
+                       break;
+       }
+
+       return 0;
+
+size_mismatch:
+       fprintf(stderr, "meta int type mask TLV size mismatch\n");
+       return -1;
+}
+
+
+static int meta_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data,
+                          int data_len)
+{
+       struct rtattr *tb[TCA_EM_META_MAX+1];
+       struct tcf_meta_hdr *meta_hdr;
+
+       if (parse_rtattr(tb, TCA_EM_META_MAX, data, data_len) < 0)
+               return -1;
+
+       if (tb[TCA_EM_META_HDR] == NULL) {
+               fprintf(stderr, "Missing meta header\n");
+               return -1;
+       }
+
+       if (RTA_PAYLOAD(tb[TCA_EM_META_HDR]) < sizeof(*meta_hdr)) {
+               fprintf(stderr, "Meta header size mismatch\n");
+               return -1;
+       }
+
+       meta_hdr = RTA_DATA(tb[TCA_EM_META_HDR]);
+
+       if (print_object(fd, &meta_hdr->left, tb[TCA_EM_META_LVALUE]) < 0)
+               return -1;
+
+       switch (meta_hdr->left.op) {
+               case TCF_EM_OPND_EQ:
+                       fprintf(fd, " eq ");
+                       break;
+               case TCF_EM_OPND_LT:
+                       fprintf(fd, " lt ");
+                       break;
+               case TCF_EM_OPND_GT:
+                       fprintf(fd, " gt ");
+                       break;
+       }
+
+       return print_object(fd, &meta_hdr->right, tb[TCA_EM_META_RVALUE]);
+}
+
+struct ematch_util meta_ematch_util = {
+       .kind = "meta",
+       .kind_num = TCF_EM_META,
+       .parse_eopt = meta_parse_eopt,
+       .print_eopt = meta_print_eopt,
+       .print_usage = meta_print_usage
+};
diff --git a/tc/em_nbyte.c b/tc/em_nbyte.c
new file mode 100644 (file)
index 0000000..e0ed5ba
--- /dev/null
@@ -0,0 +1,144 @@
+/*
+ * em_nbyte.c          N-Byte Ematch
+ *
+ *             This program is free software; you can distribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <errno.h>
+
+#include "m_ematch.h"
+#include <linux/tc_ematch/tc_em_nbyte.h>
+
+extern struct ematch_util nbyte_ematch_util;
+
+static void nbyte_print_usage(FILE *fd)
+{
+       fprintf(fd,
+           "Usage: nbyte(NEEDLE at OFFSET [layer LAYER])\n" \
+           "where: NEEDLE := { string | \"c-escape-sequence\" }\n" \
+           "       OFFSET := int\n" \
+           "       LAYER  := { link | header | next-header | 0..%d }\n" \
+           "\n" \
+           "Example: nbyte(\"ababa\" at 12 layer 1)\n",
+           TCF_LAYER_MAX);
+}
+
+static int nbyte_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr,
+                           struct bstr *args)
+{
+       struct bstr *a;
+       struct bstr *needle = args;
+       unsigned long offset = 0, layer = TCF_LAYER_NETWORK;
+       int offset_present = 0;
+       struct tcf_em_nbyte nb;
+
+       memset(&nb, 0, sizeof(nb));
+
+#define PARSE_ERR(CARG, FMT, ARGS...) \
+       em_parse_error(EINVAL, args, CARG, &nbyte_ematch_util, FMT ,##ARGS)
+
+       if (args == NULL)
+               return PARSE_ERR(args, "nbyte: missing arguments");
+
+       if (needle->len <= 0)
+               return PARSE_ERR(args, "nbyte: needle length is 0");
+
+       for (a = bstr_next(args); a; a = bstr_next(a)) {
+               if (!bstrcmp(a, "at")) {
+                       if (a->next == NULL)
+                               return PARSE_ERR(a, "nbyte: missing argument");
+                       a = bstr_next(a);
+
+                       offset = bstrtoul(a);
+                       if (offset == ULONG_MAX)
+                               return PARSE_ERR(a, "nbyte: invalid offset, " \
+                                   "must be numeric");
+
+                       offset_present = 1;
+               } else if (!bstrcmp(a, "layer")) {
+                       if (a->next == NULL)
+                               return PARSE_ERR(a, "nbyte: missing argument");
+                       a = bstr_next(a);
+
+                       layer = parse_layer(a);
+                       if (layer == INT_MAX) {
+                               layer = bstrtoul(a);
+                               if (layer == ULONG_MAX)
+                                       return PARSE_ERR(a, "nbyte: invalid " \
+                                           "layer");
+                       }
+
+                       if (layer > TCF_LAYER_MAX)
+                               return PARSE_ERR(a, "nbyte: illegal layer, " \
+                                   "must be in 0..%d", TCF_LAYER_MAX);
+               } else
+                       return PARSE_ERR(a, "nbyte: unknown parameter");
+       }
+
+       if (offset_present == 0)
+               return PARSE_ERR(a, "nbyte: offset required");
+       
+       nb.len = needle->len;
+       nb.layer = (__u8) layer;
+       nb.off = (__u16) offset;
+
+       addraw_l(n, MAX_MSG, hdr, sizeof(*hdr));
+       addraw_l(n, MAX_MSG, &nb, sizeof(nb));
+       addraw_l(n, MAX_MSG, needle->data, needle->len);
+
+#undef PARSE_ERR
+       return 0;
+}
+
+static int nbyte_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data,
+                           int data_len)
+{
+       int i;
+       struct tcf_em_nbyte *nb = data;
+       __u8 *needle;
+
+       if (data_len < sizeof(*nb)) {
+               fprintf(stderr, "NByte header size mismatch\n");
+               return -1;
+       }
+
+       if (data_len < sizeof(*nb) + nb->len) {
+               fprintf(stderr, "NByte payload size mismatch\n");
+               return -1;
+       }
+
+       needle = data + sizeof(*nb);
+
+       for (i = 0; i < nb->len; i++)
+               fprintf(fd, "%02x ", needle[i]);
+
+       fprintf(fd, "\"");
+       for (i = 0; i < nb->len; i++)
+               fprintf(fd, "%c", isprint(needle[i]) ? needle[i] : '.');
+       fprintf(fd, "\" at %d layer %d", nb->off, nb->layer);
+       
+       return 0;
+}
+
+struct ematch_util nbyte_ematch_util = {
+       .kind = "nbyte",
+       .kind_num = TCF_EM_NBYTE,
+       .parse_eopt = nbyte_parse_eopt,
+       .print_eopt = nbyte_print_eopt,
+       .print_usage = nbyte_print_usage
+};
diff --git a/tc/em_u32.c b/tc/em_u32.c
new file mode 100644 (file)
index 0000000..b8857f1
--- /dev/null
@@ -0,0 +1,178 @@
+/*
+ * em_u32.c            U32 Ematch
+ *
+ *             This program is free software; you can distribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <errno.h>
+
+#include "m_ematch.h"
+
+extern struct ematch_util u32_ematch_util;
+
+static void u32_print_usage(FILE *fd)
+{
+       fprintf(fd,
+           "Usage: u32(ALIGN VALUE MASK at [ nexthdr+ ] OFFSET)\n" \
+           "where: ALIGN  := { u8 | u16 | u32 }\n" \
+           "\n" \
+           "Example: u32(u16 0x1122 0xffff at nexthdr+4)\n");
+}
+
+static int u32_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr,
+                         struct bstr *args)
+{
+       struct bstr *a;
+       int align, nh_len;
+       unsigned long key, mask, offmask = 0, offset;
+       struct tc_u32_key u_key;
+
+       memset(&u_key, 0, sizeof(u_key));
+
+#define PARSE_ERR(CARG, FMT, ARGS...) \
+       em_parse_error(EINVAL, args, CARG, &u32_ematch_util, FMT ,##ARGS)
+
+       if (args == NULL)
+               return PARSE_ERR(args, "u32: missing arguments");
+
+       if (!bstrcmp(args, "u8"))
+               align = 1;
+       else if (!bstrcmp(args, "u16"))
+               align = 2;
+       else if (!bstrcmp(args, "u32"))
+               align = 4;
+       else
+               return PARSE_ERR(args, "u32: invalid alignment");
+
+       a = bstr_next(args);
+       if (a == NULL)
+               return PARSE_ERR(a, "u32: missing key");
+
+       key = bstrtoul(a);
+       if (key == ULONG_MAX)
+               return PARSE_ERR(a, "u32: invalid key, must be numeric");
+
+       a = bstr_next(a);
+       if (a == NULL)
+               return PARSE_ERR(a, "u32: missing mask");
+
+       mask = bstrtoul(a);
+       if (mask == ULONG_MAX)
+               return PARSE_ERR(a, "u32: invalid mask, must be numeric");
+
+       a = bstr_next(a);
+       if (a == NULL || bstrcmp(a, "at") != 0)
+               return PARSE_ERR(a, "u32: missing \"at\"");
+
+       a = bstr_next(a);
+       if (a == NULL)
+               return PARSE_ERR(a, "u32: missing offset");
+
+       nh_len = strlen("nexthdr+");
+       if (a->len > nh_len && !memcmp(a->data, "nexthdr+", nh_len)) {
+               char buf[a->len - nh_len + 1];
+               offmask = -1;
+               memcpy(buf, a->data + nh_len, a->len - nh_len);
+               offset = strtoul(buf, NULL, 0);
+       } else if (!bstrcmp(a, "nexthdr+")) {
+               a = bstr_next(a);
+               if (a == NULL)
+                       return PARSE_ERR(a, "u32: missing offset");
+               offset = bstrtoul(a);
+       } else
+               offset = bstrtoul(a);
+               
+       if (offset == ULONG_MAX)
+               return PARSE_ERR(a, "u32: invalid offset");
+
+       if (a->next)
+               return PARSE_ERR(a->next, "u32: unexpected trailer");
+
+       switch (align) {
+               case 1:
+                       if (key > 0xFF)
+                               return PARSE_ERR(a, "Illegal key (>0xFF)");
+                       if (mask > 0xFF)
+                               return PARSE_ERR(a, "Illegal mask (>0xFF)");
+
+                       key <<= 24 - ((offset & 3) * 8);
+                       mask <<= 24 - ((offset & 3) * 8);
+                       offset &= ~3;
+                       break;
+
+               case 2:
+                       if (key > 0xFFFF)
+                               return PARSE_ERR(a, "Illegal key (>0xFFFF)");
+                       if (mask > 0xFFFF)
+                               return PARSE_ERR(a, "Illegal mask (>0xFFFF)");
+
+                       if ((offset & 3) == 0) {
+                               key <<= 16;
+                               mask <<= 16;
+                       }
+                       offset &= ~3;
+                       break;
+       }
+
+       key = htonl(key);
+       mask = htonl(mask);
+
+       if (offset % 4)
+               return PARSE_ERR(a, "u32: invalid offset alignment, " \
+                   "must be aligned to 4.");
+
+       key &= mask;
+
+       u_key.mask = mask;
+       u_key.val = key;
+       u_key.off = offset;
+       u_key.offmask = offmask;
+
+       addraw_l(n, MAX_MSG, hdr, sizeof(*hdr));
+       addraw_l(n, MAX_MSG, &u_key, sizeof(u_key));
+
+#undef PARSE_ERR
+       return 0;
+}
+
+static int u32_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data,
+                         int data_len)
+{
+       struct tc_u32_key *u_key = data;
+
+       if (data_len < sizeof(*u_key)) {
+               fprintf(stderr, "U32 header size mismatch\n");
+               return -1;
+       }
+
+       fprintf(fd, "%08x/%08x at %s%d",
+           (unsigned int) ntohl(u_key->val),
+           (unsigned int) ntohl(u_key->mask),
+           u_key->offmask ? "nexthdr+" : "",
+           u_key->off);
+
+       return 0;
+}
+
+struct ematch_util u32_ematch_util = {
+       .kind = "u32",
+       .kind_num = TCF_EM_U32,
+       .parse_eopt = u32_parse_eopt,
+       .print_eopt = u32_print_eopt,
+       .print_usage = u32_print_usage
+};
diff --git a/tc/emp_ematch.l b/tc/emp_ematch.l
new file mode 100644 (file)
index 0000000..09d535d
--- /dev/null
@@ -0,0 +1,145 @@
+%{
+ #include "emp_ematch.yacc.h"
+ #include "m_ematch.h"
+
+ extern int ematch_argc;
+ extern char **ematch_argv;
+
+ #define yylval ematch_lval
+
+ #define NEXT_EM_ARG() do { ematch_argc--; ematch_argv++; } while(0);
+
+ #define YY_INPUT(buf, result, max_size)                               \
+ {                                                                     \
+ next:                                                                 \
+       if (ematch_argc <= 0)                                           \
+               result = YY_NULL;                                       \
+       else if (**ematch_argv == '\0') {                               \
+               NEXT_EM_ARG();                                          \
+               goto next;                                              \
+       } else {                                                        \
+               if (max_size <= strlen(*ematch_argv) + 1) {             \
+                       fprintf(stderr, "match argument too long.\n");  \
+                       result = YY_NULL;                               \
+               } else {                                                \
+                       strcpy(buf, *ematch_argv);                      \
+                       result = strlen(*ematch_argv) + 1;              \
+                       buf[result-1] = ' ';                            \
+                       buf[result] = '\0';                             \
+                       NEXT_EM_ARG();                                  \
+               }                                                       \
+       }                                                               \
+ }
+
+ static void __attribute__ ((unused)) yyunput (int c,char *buf_ptr  );
+ static void __attribute__ ((unused)) yy_push_state (int  new_state );
+ static void __attribute__ ((unused)) yy_pop_state  (void);
+ static int  __attribute__ ((unused)) yy_top_state (void );
+
+ static char *strbuf;
+ static unsigned int strbuf_size;
+ static unsigned int strbuf_index;
+
+ static void strbuf_enlarge(void)
+ {
+       strbuf_size += 512;
+       strbuf = realloc(strbuf, strbuf_size);
+ }
+
+ static void strbuf_append_char(char c)
+ {
+       while (strbuf_index >= strbuf_size)
+               strbuf_enlarge();
+       strbuf[strbuf_index++] = c;
+ }
+
+ static void strbuf_append_charp(char *s)
+ {
+       while (strbuf_index >= strbuf_size)
+               strbuf_enlarge();
+       memcpy(strbuf + strbuf_index, s, strlen(s));
+       strbuf_index += strlen(s);
+ }
+
+%}
+
+%x str
+
+%option 8bit stack warn noyywrap prefix="ematch_"
+%%
+[ \t\r\n]+
+
+\"                                     {
+                                               if (strbuf == NULL) {
+                                                       strbuf_size = 512;
+                                                       strbuf = calloc(1, strbuf_size);
+                                                       if (strbuf == NULL)
+                                                               return ERROR;
+                                               }
+                                               strbuf_index = 0;
+                                               
+                                               BEGIN(str);
+                                       }
+
+<str>\"                                        {
+                                               BEGIN(INITIAL);
+                                               yylval.b = bstr_new(strbuf, strbuf_index);
+                                               yylval.b->quoted = 1;
+                                               return ATTRIBUTE;
+                                       }
+
+<str>\\[0-7]{1,3}                      { /* octal escape sequence */
+                                               int res;
+                                               
+                                               sscanf(yytext + 1, "%o", &res);
+                                               if (res > 0xFF) {
+                                                       fprintf(stderr, "error: octal escape sequence" \
+                                                       " out of range\n");
+                                                       return ERROR;
+                                               }
+                                               strbuf_append_char((unsigned char) res);
+                                       }
+
+<str>\\[0-9]+                          { /* catch wrong octal escape seq. */
+                                               fprintf(stderr, "error: invalid octale escape sequence\n");
+                                               return ERROR;
+                                       }
+
+<str>\\x[0-9a-fA-F]{1,2}               {
+                                               int res;
+                                               
+                                               sscanf(yytext + 2, "%x", &res);
+                                               
+                                               if (res > 0xFF) {
+                                                       fprintf(stderr, "error: hexadecimal escape " \
+                                                       "sequence out of range\n");
+                                                       return ERROR;
+                                               }
+                                               strbuf_append_char((unsigned char) res);
+                                       }
+
+<str>\\n                               strbuf_append_char('\n');
+<str>\\r                               strbuf_append_char('\r');
+<str>\\t                               strbuf_append_char('\t');
+<str>\\v                               strbuf_append_char('\v');
+<str>\\b                               strbuf_append_char('\b');
+<str>\\f                               strbuf_append_char('\f');
+<str>\\a                               strbuf_append_char('\a');
+
+<str>\\(.|\n)                          strbuf_append_char(yytext[1]);
+<str>[^\\\n\"]+                                strbuf_append_charp(yytext);
+
+[aA][nN][dD]                           return AND;
+[oO][rR]                               return OR;
+[nN][oO][tT]                           return NOT;
+"("                                    |
+")"                                    {
+                                               return yylval.i = *yytext;
+                                       }
+[^ \t\r\n()]+                          {
+                                               yylval.b = bstr_alloc(yytext);
+                                               if (yylval.b == NULL)
+                                                       return ERROR;
+                                               return ATTRIBUTE;
+                                       }
+%%
diff --git a/tc/emp_ematch.y b/tc/emp_ematch.y
new file mode 100644 (file)
index 0000000..e8d1671
--- /dev/null
@@ -0,0 +1,101 @@
+%{
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <malloc.h>
+ #include <string.h>
+ #include "m_ematch.h"
+%}
+
+%locations
+%token-table
+%error-verbose
+%name-prefix="ematch_"
+
+%union {
+       unsigned int i;
+       struct bstr *b;
+       struct ematch *e;
+}
+
+%{
+ extern int ematch_lex(void);
+ extern void yyerror(char *s);
+ extern struct ematch *ematch_root;
+ extern char *ematch_err;
+%}
+
+%token <i> ERROR
+%token <b> ATTRIBUTE
+%token <i> AND OR NOT
+%type <i> invert relation
+%type <e> match expr
+%type <b> args
+%right AND OR
+%start input
+%%
+input:
+       /* empty */
+       | expr
+               { ematch_root = $1; }
+       | expr error
+               {
+                       ematch_root = $1;
+                       YYACCEPT;
+               }
+       ;
+
+expr:
+       match
+               { $$ = $1; }
+       | match relation expr
+               {
+                       $1->relation = $2;
+                       $1->next = $3;
+                       $$ = $1;
+               }
+       ;
+
+match:
+       invert ATTRIBUTE '(' args ')'
+               {
+                       $2->next = $4;
+                       $$ = new_ematch($2, $1);
+                       if ($$ == NULL)
+                               YYABORT;
+               }
+       | invert '(' expr ')'
+               {
+                       $$ = new_ematch(NULL, $1);
+                       if ($$ == NULL)
+                               YYABORT;
+                       $$->child = $3;
+               }
+       ;
+
+args:
+       ATTRIBUTE
+               { $$ = $1; }
+       | ATTRIBUTE args
+               { $1->next = $2; }
+       ;
+
+relation:
+       AND
+               { $$ = TCF_EM_REL_AND; }
+       | OR
+               { $$ = TCF_EM_REL_OR; }
+       ;
+
+invert:
+       /* empty */
+               { $$ = 0; }
+       | NOT
+               { $$ = 1; }
+       ;
+%%
+
+ void yyerror(char *s)
+ {
+        ematch_err = strdup(s);
+ }
+
diff --git a/tc/f_basic.c b/tc/f_basic.c
new file mode 100644 (file)
index 0000000..264f358
--- /dev/null
@@ -0,0 +1,146 @@
+/*
+ * f_basic.c           Basic Classifier
+ *
+ *             This program is free software; you can u32istribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Thomas Graf <tgraf@suug.ch>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <linux/if.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "m_ematch.h"
+
+static void explain(void)
+{
+       fprintf(stderr, "Usage: ... basic [ match EMATCH_TREE ] [ police POLICE_SPEC ]\n");
+       fprintf(stderr, "                 [ action ACTION_SPEC ] [ classid CLASSID ]\n");
+       fprintf(stderr, "\n");
+       fprintf(stderr, "Where: SELECTOR := SAMPLE SAMPLE ...\n");
+       fprintf(stderr, "       FILTERID := X:Y:Z\n");
+}
+
+static int basic_parse_opt(struct filter_util *qu, char *handle,
+                          int argc, char **argv, struct nlmsghdr *n)
+{
+       struct tcmsg *t = NLMSG_DATA(n);
+       struct rtattr *tail;
+       long h = 0;
+
+       if (argc == 0)
+               return 0;
+
+       if (handle) {
+               h = strtol(handle, NULL, 0);
+               if (h == LONG_MIN || h == LONG_MAX) {
+                       fprintf(stderr, "Illegal handle \"%s\", must be numeric.\n",
+                           handle);
+                       return -1;
+               }
+       }
+               
+       t->tcm_handle = h;
+
+       tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len));
+       addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0);
+
+       while (argc > 0) {
+               if (matches(*argv, "match") == 0) {
+                       NEXT_ARG();
+                       if (parse_ematch(&argc, &argv, TCA_BASIC_EMATCHES, n)) {
+                               fprintf(stderr, "Illegal \"ematch\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (matches(*argv, "classid") == 0 ||
+                          strcmp(*argv, "flowid") == 0) {
+                       unsigned handle;
+                       NEXT_ARG();
+                       if (get_tc_classid(&handle, *argv)) {
+                               fprintf(stderr, "Illegal \"classid\"\n");
+                               return -1;
+                       }
+                       addattr_l(n, MAX_MSG, TCA_BASIC_CLASSID, &handle, 4);
+               } else if (matches(*argv, "action") == 0) {
+                       NEXT_ARG();
+                       if (parse_action(&argc, &argv, TCA_BASIC_ACT, n)) {
+                               fprintf(stderr, "Illegal \"action\"\n");
+                               return -1;
+                       }
+                       continue;
+
+               } else if (matches(*argv, "police") == 0) {
+                       NEXT_ARG();
+                       if (parse_police(&argc, &argv, TCA_BASIC_POLICE, n)) {
+                               fprintf(stderr, "Illegal \"police\"\n");
+                               return -1;
+                       }
+                       continue;
+               } else if (strcmp(*argv, "help") == 0) {
+                       explain();
+                       return -1;
+               } else {
+                       fprintf(stderr, "What is \"%s\"?\n", *argv);
+                       explain();
+                       return -1;
+               }
+               argc--; argv++;
+       }
+
+       tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail;
+       return 0;
+}
+
+static int basic_print_opt(struct filter_util *qu, FILE *f,
+                          struct rtattr *opt, __u32 handle)
+{
+       struct rtattr *tb[TCA_BASIC_MAX+1];
+
+       if (opt == NULL)
+               return 0;
+
+       parse_rtattr_nested(tb, TCA_BASIC_MAX, opt);
+
+       if (handle)
+               fprintf(f, "handle 0x%x ", handle);
+
+       if (tb[TCA_BASIC_CLASSID]) {
+               SPRINT_BUF(b1);
+               fprintf(f, "flowid %s ",
+                       sprint_tc_classid(*(__u32*)RTA_DATA(tb[TCA_BASIC_CLASSID]), b1));
+       }
+
+       if (tb[TCA_BASIC_EMATCHES])
+               print_ematch(f, tb[TCA_BASIC_EMATCHES]);
+
+       if (tb[TCA_BASIC_POLICE]) {
+               fprintf(f, "\n");
+               tc_print_police(f, tb[TCA_BASIC_POLICE]);
+       }
+
+       if (tb[TCA_BASIC_ACT]) {
+               tc_print_action(f, tb[TCA_BASIC_ACT]);
+       }
+
+       return 0;
+}
+
+struct filter_util basic_filter_util = {
+       .id = "basic",
+       .parse_fopt = basic_parse_opt,
+       .print_fopt = basic_print_opt,
+};
index 50dc4df..9d527fc 100644 (file)
@@ -34,7 +34,7 @@ static void explain(void)
        fprintf(stderr, "or         u32 divisor DIVISOR\n");
        fprintf(stderr, "\n");
        fprintf(stderr, "Where: SELECTOR := SAMPLE SAMPLE ...\n");
-       fprintf(stderr, "       SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} | mark } SAMPLE_ARGS\n");
+       fprintf(stderr, "       SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} | mark } SAMPLE_ARGS [divisor DIVISOR]\n");
        fprintf(stderr, "       FILTERID := X:Y:Z\n");
 }
 
@@ -495,7 +495,7 @@ static int parse_ip6(int *argc_p, char ***argv_p, struct tc_u32_sel *sel)
        }
        if (strcmp(*argv, "priority") == 0) {
                NEXT_ARG();
-               res = parse_u8(&argc, &argv, sel, 0, 0);
+               res = parse_u8(&argc, &argv, sel, 4, 0);
                goto done;
        }
        if (strcmp(*argv, "protocol") == 0) {
@@ -833,8 +833,9 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char **
                } else if (matches(*argv, "divisor") == 0) {
                        unsigned divisor;
                        NEXT_ARG();
-                       if (get_unsigned(&divisor, *argv, 0) || divisor == 0 ||
-                           divisor > 0x100) {
+                       if (get_unsigned(&divisor, *argv, 0) || 
+                           divisor == 0 ||
+                           divisor > 0x100 || ((divisor - 1) & divisor)) {
                                fprintf(stderr, "Illegal \"divisor\"\n");
                                return -1;
                        }
@@ -874,10 +875,13 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char **
                                htid = (handle&0xFFFFF000);
                } else if (strcmp(*argv, "sample") == 0) {
                        __u32 hash;
+                       unsigned divisor = 0x100;
+
                        struct {
                                struct tc_u32_sel sel;
                                struct tc_u32_key keys[4];
                        } sel2;
+                       memset(&sel2, 0, sizeof(sel2));
                        NEXT_ARG();
                        if (parse_selector(&argc, &argv, &sel2.sel, n)) {
                                fprintf(stderr, "Illegal \"sample\"\n");
@@ -887,10 +891,19 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char **
                                fprintf(stderr, "\"sample\" must contain exactly ONE key.\n");
                                return -1;
                        }
+                       if (*argv != 0 && strcmp(*argv, "divisor") == 0) {
+                               NEXT_ARG();
+                               if (get_unsigned(&divisor, *argv, 0) || divisor == 0 ||
+                                   divisor > 0x100 || ((divisor - 1) & divisor)) {
+                                       fprintf(stderr, "Illegal sample \"divisor\"\n");
+                                       return -1;
+                               }
+                               NEXT_ARG();
+                       }
                        hash = sel2.sel.keys[0].val&sel2.sel.keys[0].mask;
                        hash ^= hash>>16;
                        hash ^= hash>>8;
-                       htid = ((hash<<12)&0xFF000)|(htid&0xFFF00000);
+                       htid = ((hash%divisor)<<12)|(htid&0xFFF00000);
                        sample_ok = 1;
                        continue;
                } else if (strcmp(*argv, "indev") == 0) {
diff --git a/tc/m_ematch.c b/tc/m_ematch.c
new file mode 100644 (file)
index 0000000..44c621b
--- /dev/null
@@ -0,0 +1,493 @@
+/*
+ * m_ematch.c          Extended Matches
+ *
+ *             This program is free software; you can distribute it and/or
+ *             modify it under the terms of the GNU General Public License
+ *             as published by the Free Software Foundation; either version
+ *             2 of the License, or (at your option) any later version.
+ *
+ * Authors:    Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include "utils.h"
+#include "tc_util.h"
+#include "m_ematch.h"
+
+#define EMATCH_MAP "/etc/iproute2/ematch_map"
+
+static struct ematch_util *ematch_list;
+
+/* export to bison parser */
+int ematch_argc;
+char **ematch_argv;
+char *ematch_err = NULL;
+struct ematch *ematch_root;
+
+static int begin_argc;
+static char **begin_argv;
+
+static inline void map_warning(int num, char *kind)
+{
+       fprintf(stderr,
+           "Error: Unable to find ematch \"%s\" in %s\n" \
+           "Please assign a unique ID to the ematch kind the suggested " \
+           "entry is:\n" \
+           "\t%d\t%s\n", 
+           kind, EMATCH_MAP, num, kind);
+}
+
+static int lookup_map(__u16 num, char *dst, int len, const char *file)
+{
+       int err = -EINVAL;
+       char buf[512];
+       FILE *fd = fopen(file, "r");
+
+       if (fd == NULL)
+               return -errno;
+
+       while (fgets(buf, sizeof(buf), fd)) {
+               char namebuf[512], *p = buf;
+               int id;
+
+               while (*p == ' ' || *p == '\t')
+                       p++;
+               if (*p == '#' || *p == '\n' || *p == 0)
+                       continue;
+       
+               if (sscanf(p, "%d %s", &id, namebuf) != 2) {
+                       fprintf(stderr, "ematch map %s corrupted at %s\n",
+                           file, p);
+                       goto out;
+               }
+
+               if (id == num) {
+                       if (dst)
+                               strncpy(dst, namebuf, len - 1);
+                       err = 0;
+                       goto out;
+               }
+       }
+
+       err = -ENOENT;
+out:
+       fclose(fd);
+       return err;
+}
+
+static int lookup_map_id(char *kind, int *dst, const char *file)
+{
+       int err = -EINVAL;
+       char buf[512];
+       FILE *fd = fopen(file, "r");
+
+       if (fd == NULL)
+               return -errno;
+
+       while (fgets(buf, sizeof(buf), fd)) {
+               char namebuf[512], *p = buf;
+               int id;
+
+               while (*p == ' ' || *p == '\t')
+                       p++;
+               if (*p == '#' || *p == '\n' || *p == 0)
+                       continue;
+       
+               if (sscanf(p, "%d %s", &id, namebuf) != 2) {
+                       fprintf(stderr, "ematch map %s corrupted at %s\n",
+                           file, p);
+                       goto out;
+               }
+
+               if (!strcasecmp(namebuf, kind)) {
+                       if (dst)
+                               *dst = id;
+                       err = 0;
+                       goto out;
+               }
+       }
+
+       err = -ENOENT;
+       *dst = 0;
+out:
+       fclose(fd);
+       return err;
+}
+
+static struct ematch_util *get_ematch_kind(char *kind)
+{
+       static void *body;
+       void *dlh;
+       char buf[256];
+       struct ematch_util *e;
+
+       for (e = ematch_list; e; e = e->next) {
+               if (strcmp(e->kind, kind) == 0)
+                       return e;
+       }
+
+       snprintf(buf, sizeof(buf), "em_%s.so", kind);
+       dlh = dlopen(buf, RTLD_LAZY);
+       if (dlh == NULL) {
+               dlh = body;
+               if (dlh == NULL) {
+                       dlh = body = dlopen(NULL, RTLD_LAZY);
+                       if (dlh == NULL)
+                               return NULL;
+               }
+       }
+
+       snprintf(buf, sizeof(buf), "%s_ematch_util", kind);
+       e = dlsym(dlh, buf);
+       if (e == NULL)
+               return NULL;
+
+       e->next = ematch_list;
+       ematch_list = e;
+
+       return e;
+}
+
+static struct ematch_util *get_ematch_kind_num(__u16 kind)
+{
+       char name[32];
+
+       if (lookup_map(kind, name, sizeof(name), EMATCH_MAP) < 0)
+               return NULL;
+
+       return get_ematch_kind(name);
+
+       return NULL;
+}
+
+static int parse_tree(struct nlmsghdr *n, struct ematch *tree)
+{
+       int index = 1;
+       struct ematch *t;
+
+       for (t = tree; t; t = t->next) {
+               struct rtattr *tail = NLMSG_TAIL(n);
+               struct tcf_ematch_hdr hdr = {
+                       .flags = t->relation
+               };
+
+               if (t->inverted)
+                       hdr.flags |= TCF_EM_INVERT;
+               
+               addattr_l(n, MAX_MSG, index++, NULL, 0);
+
+               if (t->child) {
+                       __u32 r = t->child_ref;
+                       addraw_l(n, MAX_MSG, &hdr, sizeof(hdr));
+                       addraw_l(n, MAX_MSG, &r, sizeof(r));
+               } else {
+                       int num = 0, err;
+                       char buf[64];
+                       struct ematch_util *e;
+
+                       if (t->args == NULL)
+                               return -1;
+
+                       strncpy(buf, (char*) t->args->data, sizeof(buf)-1);
+                       e = get_ematch_kind(buf);
+                       if (e == NULL) {
+                               fprintf(stderr, "Unknown ematch \"%s\"\n",
+                                   buf);
+                               return -1;
+                       }
+
+                       err = lookup_map_id(buf, &num, EMATCH_MAP);
+                       if (err < 0) {
+                               if (err == -ENOENT)
+                                       map_warning(e->kind_num, buf);
+                               return err;
+                       }
+
+                       hdr.kind = num;
+                       if (e->parse_eopt(n, &hdr, t->args->next) < 0)
+                               return -1;
+               }
+
+               tail->rta_len = (void*) NLMSG_TAIL(n) - (void*) tail;
+       }
+
+       return 0;
+}
+
+static int flatten_tree(struct ematch *head, struct ematch *tree)
+{
+       int i, count = 0;
+       struct ematch *t;
+
+       for (;;) {
+               count++;
+
+               if (tree->child) {
+                       for (t = head; t->next; t = t->next);
+                       t->next = tree->child;
+                       count += flatten_tree(head, tree->child);
+               }
+
+               if (tree->relation == 0)
+                       break;
+
+               tree = tree->next;
+       }
+
+       for (i = 0, t = head; t; t = t->next, i++)
+               t->index = i;
+
+       for (t = head; t; t = t->next)
+               if (t->child)
+                       t->child_ref = t->child->index;
+
+       return count;
+}
+
+int em_parse_error(int err, struct bstr *args, struct bstr *carg,
+                  struct ematch_util *e, char *fmt, ...)
+{
+       va_list a;
+
+       va_start(a, fmt);
+       vfprintf(stderr, fmt, a);
+       va_end(a);
+
+       if (ematch_err)
+               fprintf(stderr, ": %s\n... ", ematch_err);
+       else
+               fprintf(stderr, "\n... ");
+
+       while (ematch_argc < begin_argc) {
+               if (ematch_argc == (begin_argc - 1))
+                       fprintf(stderr, ">>%s<< ", *begin_argv);
+               else
+                       fprintf(stderr, "%s ", *begin_argv);
+               begin_argv++;
+               begin_argc--;
+       }
+       
+       fprintf(stderr, "...\n");
+
+       if (args) {
+               fprintf(stderr, "... %s(", e->kind);
+               while (args) {
+                       fprintf(stderr, "%s", args == carg ? ">>" : "");
+                       bstr_print(stderr, args, 1);
+                       fprintf(stderr, "%s%s", args == carg ? "<<" : "",
+                           args->next ? " " : "");
+                       args = args->next;
+               }
+               fprintf(stderr, ")...\n");
+
+       }
+
+       if (e == NULL) {
+               fprintf(stderr,
+                   "Usage: EXPR\n" \
+                   "where: EXPR  := TERM [ { and | or } EXPR ]\n" \
+                   "       TERM  := [ not ] { MATCH | '(' EXPR ')' }\n" \
+                   "       MATCH := module '(' ARGS ')'\n" \
+                   "       ARGS := ARG1 ARG2 ...\n" \
+                   "\n" \
+                   "Example: a(x y) and not (b(x) or c(x y z))\n");
+       } else
+               e->print_usage(stderr);
+
+       return -err;
+}
+
+static inline void free_ematch_err(void)
+{
+       if (ematch_err) {
+               free(ematch_err);
+               ematch_err = NULL;
+       }
+}
+
+extern int ematch_parse(void);
+
+int parse_ematch(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n)
+{
+       begin_argc = ematch_argc = *argc_p;
+       begin_argv = ematch_argv = *argv_p;
+
+       if (ematch_parse()) {
+               int err = em_parse_error(EINVAL, NULL, NULL, NULL,
+                   "Parse error");
+               free_ematch_err();
+               return err;
+       }
+
+       free_ematch_err();
+
+       /* undo look ahead by parser */
+       ematch_argc++;
+       ematch_argv--;
+
+       if (ematch_root) {
+               struct rtattr *tail, *tail_list;
+
+               struct tcf_ematch_tree_hdr hdr = {
+                       .nmatches = flatten_tree(ematch_root, ematch_root),
+                       .progid = TCF_EM_PROG_TC
+               };
+
+               tail = NLMSG_TAIL(n);
+               addattr_l(n, MAX_MSG, tca_id, NULL, 0);
+               addattr_l(n, MAX_MSG, TCA_EMATCH_TREE_HDR, &hdr, sizeof(hdr));
+
+               tail_list = NLMSG_TAIL(n);
+               addattr_l(n, MAX_MSG, TCA_EMATCH_TREE_LIST, NULL, 0);
+
+               if (parse_tree(n, ematch_root) < 0)
+                       return -1;
+
+               tail_list->rta_len = (void*) NLMSG_TAIL(n) - (void*) tail_list;
+               tail->rta_len = (void*) NLMSG_TAIL(n) - (void*) tail;
+       }
+
+       *argc_p = ematch_argc;
+       *argv_p = ematch_argv;
+
+       return 0;
+}
+
+static int print_ematch_seq(FILE *fd, struct rtattr **tb, int start,
+                           int prefix)
+{
+       int n, i = start;
+       struct tcf_ematch_hdr *hdr;
+       int dlen;
+       void *data;
+
+       for (;;) {
+               if (tb[i] == NULL)
+                       return -1;
+
+               dlen = RTA_PAYLOAD(tb[i]) - sizeof(*hdr);
+               data = (void *) RTA_DATA(tb[i]) + sizeof(*hdr);
+
+               if (dlen < 0)
+                       return -1;
+
+               hdr = RTA_DATA(tb[i]);
+
+               if (hdr->flags & TCF_EM_INVERT)
+                       fprintf(fd, "NOT ");
+
+               if (hdr->kind == 0) {
+                       __u32 ref;
+
+                       if (dlen < sizeof(__u32))
+                               return -1;
+
+                       ref = *(__u32 *) data;
+                       fprintf(fd, "(\n");
+                       for (n = 0; n <= prefix; n++)
+                               fprintf(fd, "  ");
+                       if (print_ematch_seq(fd, tb, ref + 1, prefix + 1) < 0)
+                               return -1;
+                       for (n = 0; n < prefix; n++)
+                               fprintf(fd, "  ");
+                       fprintf(fd, ") ");
+
+               } else {
+                       struct ematch_util *e;
+
+                       e = get_ematch_kind_num(hdr->kind);
+                       if (e == NULL)
+                               fprintf(fd, "[unknown ematch %d]\n",
+                                   hdr->kind);
+                       else {
+                               fprintf(fd, "%s(", e->kind);
+                               if (e->print_eopt(fd, hdr, data, dlen) < 0)
+                                       return -1;
+                               fprintf(fd, ")\n");
+                       }
+                       if (hdr->flags & TCF_EM_REL_MASK)
+                               for (n = 0; n < prefix; n++)
+                                       fprintf(fd, "  ");
+               }
+
+               switch (hdr->flags & TCF_EM_REL_MASK) {
+                       case TCF_EM_REL_AND:
+                               fprintf(fd, "AND ");
+                               break;
+
+                       case TCF_EM_REL_OR:
+                               fprintf(fd, "OR ");
+                               break;
+
+                       default:
+                               return 0;
+               }
+
+               i++;
+       }
+       
+       return 0;
+}
+
+static int print_ematch_list(FILE *fd, struct tcf_ematch_tree_hdr *hdr,
+                            struct rtattr *rta)
+{
+       int err = -1;
+       struct rtattr **tb;
+
+       tb = malloc((hdr->nmatches + 1) * sizeof(struct rtattr *));
+       if (tb == NULL)
+               return -1;
+
+       if (parse_rtattr_nested(tb, hdr->nmatches, rta) < 0)
+               goto errout;
+
+       fprintf(fd, "\n  ");
+       if (print_ematch_seq(fd, tb, 1, 1) < 0)
+               goto errout;
+
+       err = 0;
+errout:
+       free(tb);
+       return err;
+}
+
+int print_ematch(FILE *fd, const struct rtattr *rta)
+{
+       struct rtattr *tb[TCA_EMATCH_TREE_MAX+1];
+       struct tcf_ematch_tree_hdr *hdr;
+
+       if (parse_rtattr_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0)
+               return -1;
+
+       if (tb[TCA_EMATCH_TREE_HDR] == NULL) {
+               fprintf(stderr, "Missing ematch tree header\n");
+               return -1;
+       }
+
+       if (tb[TCA_EMATCH_TREE_LIST] == NULL) {
+               fprintf(stderr, "Missing ematch tree list\n");
+               return -1;
+       }
+
+       if (RTA_PAYLOAD(tb[TCA_EMATCH_TREE_HDR]) < sizeof(*hdr)) {
+               fprintf(stderr, "Ematch tree header size mismatch\n");
+               return -1;
+       }
+
+       hdr = RTA_DATA(tb[TCA_EMATCH_TREE_HDR]);
+
+       return print_ematch_list(fd, hdr, tb[TCA_EMATCH_TREE_LIST]);
+}
diff --git a/tc/m_ematch.h b/tc/m_ematch.h
new file mode 100644 (file)
index 0000000..ed98446
--- /dev/null
@@ -0,0 +1,179 @@
+#ifndef __TC_EMATCH_H_
+#define __TC_EMATCH_H_
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+#define EMATCHKINDSIZ 16
+
+struct bstr
+{
+       char    *data;
+       unsigned int    len;
+       int             quoted;
+       struct bstr     *next;
+};
+
+static inline struct bstr * bstr_alloc(const char *text)
+{
+       struct bstr *b = calloc(1, sizeof(*b));
+
+       if (b == NULL)
+               return NULL;
+
+       b->data = strdup(text);
+       if (b->data == NULL) {
+               free(b);
+               return NULL;
+       }
+
+       b->len = strlen(text);
+
+       return b;
+}
+
+static inline struct bstr * bstr_new(char *data, unsigned int len)
+{
+       struct bstr *b = calloc(1, sizeof(*b));
+
+       if (b == NULL)
+               return NULL;
+
+       b->data = data;
+       b->len = len;
+
+       return b;
+}
+
+static inline int bstrcmp(struct bstr *b, const char *text)
+{
+       int len = strlen(text);
+       int d = b->len - len;
+
+       if (d == 0)
+               return strncmp(b->data, text, len);
+
+       return d;
+}
+
+static inline unsigned long bstrtoul(struct bstr *b)
+{
+       char *inv = NULL;
+       unsigned long l;
+       char buf[b->len+1];
+
+       memcpy(buf, b->data, b->len);
+       buf[b->len] = '\0';
+       
+       l = strtol(buf, &inv, 0);
+       if (l == ULONG_MAX || inv == buf)
+               return LONG_MAX;
+
+       return l;
+}
+
+static inline void bstr_print(FILE *fd, struct bstr *b, int ascii)
+{
+       int i;
+       char *s = b->data;
+
+       if (ascii)
+               for (i = 0; i < b->len; i++)
+                   fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.');
+       else {
+               for (i = 0; i < b->len; i++)
+                   fprintf(fd, "%02x", s[i]);
+               fprintf(fd, "\"");
+               for (i = 0; i < b->len; i++)
+                   fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.');
+               fprintf(fd, "\"");
+       }
+}
+
+static inline struct bstr *bstr_next(struct bstr *b)
+{
+       return b->next;
+}
+
+struct ematch
+{
+       struct bstr     *args;
+       int             index;
+       int             inverted;
+       int             relation;
+       int             child_ref;
+       struct ematch   *child;
+       struct ematch   *next;
+};
+
+static inline struct ematch * new_ematch(struct bstr *args, int inverted)
+{
+       struct ematch *e = calloc(1, sizeof(*e));
+
+       if (e == NULL)
+               return NULL;
+
+       e->args = args;
+       e->inverted = inverted;
+
+       return e;
+}
+
+static inline void print_ematch_tree(struct ematch *tree)
+{
+       struct ematch *t;
+
+       for (t = tree; t; t = t->next) {
+               if (t->inverted)
+                       printf("NOT ");
+
+               if (t->child) {
+                       printf("(");
+                       print_ematch_tree(t->child);
+                       printf(")");
+               } else {
+                       struct bstr *b;
+                       for (b = t->args; b; b = b->next)
+                               printf("%s%s", b->data, b->next ? " " : "");
+               }
+
+               if (t->relation == TCF_EM_REL_AND)
+                       printf(" AND ");
+               else if (t->relation == TCF_EM_REL_OR)
+                       printf(" OR ");
+       }
+}
+
+struct ematch_util
+{
+       char                    kind[EMATCHKINDSIZ];
+       int                     kind_num;
+       int     (*parse_eopt)(struct nlmsghdr *,struct tcf_ematch_hdr *,
+                             struct bstr *);
+       int     (*print_eopt)(FILE *, struct tcf_ematch_hdr *, void *, int);
+       void    (*print_usage)(FILE *);
+       struct ematch_util      *next;
+};
+
+static inline int parse_layer(struct bstr *b)
+{
+       if (*((char *) b->data) == 'l')
+               return TCF_LAYER_LINK;
+       else if (*((char *) b->data) == 'n')
+               return TCF_LAYER_NETWORK;
+       else if (*((char *) b->data) == 't')
+               return TCF_LAYER_TRANSPORT;
+       else
+               return INT_MAX;
+}
+
+extern int em_parse_error(int err, struct bstr *args, struct bstr *carg,
+                  struct ematch_util *, char *fmt, ...);
+extern int print_ematch(FILE *, const struct rtattr *);
+extern int parse_ematch(int *, char ***, int, struct nlmsghdr *);
+
+#endif
index 518e4a3..ca39555 100644 (file)
@@ -69,6 +69,7 @@ static struct option original_opts[] = {
 };
 
 static struct iptables_target *t_list = NULL;
+static struct option *opts = original_opts;
 static unsigned int global_option_offset = 0;
 #define OPTION_OFFSET 256
 
@@ -169,18 +170,13 @@ int string_to_number(const char *s, unsigned int min, unsigned int max,
        return result;
 }
 
-static struct option *
-copy_options(struct option *oldopts)
+static void free_opts(struct option *opts)
 {
-       struct option *merge;
-       unsigned int num_old;
-       for (num_old = 0; oldopts[num_old].name; num_old++) ;
-       merge = malloc(sizeof (struct option) * (num_old + 1));
-       if (NULL == merge)
-               return NULL;
-       memcpy(merge, oldopts, num_old * sizeof (struct option));
-       memset(merge + num_old, 0, sizeof (struct option));
-       return merge;
+       if (opts != original_opts) {
+               free(opts);
+               opts = original_opts;
+               global_option_offset = 0;
+       }
 }
 
 static struct option *
@@ -337,6 +333,17 @@ struct in_addr *dotted_to_addr(const char *dotted)
        return &addr;
 }
 
+static void set_revision(char *name, u_int8_t revision)
+{
+       /* Old kernel sources don't have ".revision" field,
+       *  but we stole a byte from name. */
+       name[IPT_FUNCTION_MAXNAMELEN - 2] = '\0';
+       name[IPT_FUNCTION_MAXNAMELEN - 1] = revision;
+}
+
+/* 
+ * we may need to check for version mismatch
+*/
 int
 build_st(struct iptables_target *target, struct ipt_entry_target *t)
 {
@@ -350,8 +357,11 @@ build_st(struct iptables_target *target, struct ipt_entry_target *t)
 
                if (NULL == t) {
                        target->t = fw_calloc(1, size);
-                       target->init(target->t, &nfcache);
                        target->t->u.target_size = size;
+
+                       if (target->init != NULL)
+                               target->init(target->t, &nfcache);
+                       set_revision(target->t->u.user.name, target->revision);
                } else {
                        target->t = t;
                }
@@ -371,7 +381,6 @@ static int parse_ipt(struct action_util *a,int *argc_p,
        int c;
        int rargc = *argc_p;
        char **argv = *argv_p;
-       struct option *opts;
        int argc = 0, iargc = 0;
        char k[16];
        int res = -1;
@@ -395,11 +404,6 @@ static int parse_ipt(struct action_util *a,int *argc_p,
                return -1;
        }
 
-       opts = copy_options(original_opts);
-
-       if (NULL == opts)
-               return -1;
-
        while (1) {
                c = getopt_long(argc, argv, "j:", opts, NULL);
                if (c == -1)
@@ -426,23 +430,14 @@ static int parse_ipt(struct action_util *a,int *argc_p,
                default:
                        memset(&fw, 0, sizeof (fw));
                        if (m) {
-                               unsigned int fake_flags = 0;
                                m->parse(c - m->option_offset, argv, 0,
-                                        &fake_flags, NULL, &m->t);
+                                        &m->tflags, NULL, &m->t);
                        } else {
                                fprintf(stderr," failed to find target %s\n\n", optarg);
                                return -1;
 
                        }
                        ok++;
-
-                       /*m->final_check(m->t); -- Is this necessary?
-                       ** useful when theres depencies
-                       ** eg ipt_TCPMSS.c has have the TCP match loaded
-                       ** before this can be used;
-                       **  also seems the ECN target needs it 
-                       */
-
                        break;
 
                }
@@ -452,6 +447,7 @@ static int parse_ipt(struct action_util *a,int *argc_p,
                if (matches(argv[optind], "index") == 0) {
                        if (get_u32(&index, argv[optind + 1], 10)) {
                                fprintf(stderr, "Illegal \"index\"\n");
+                               free_opts(opts);
                                return -1;
                        }
                        iok++;
@@ -465,6 +461,10 @@ static int parse_ipt(struct action_util *a,int *argc_p,
                return -1;
        }
 
+       /* check that we passed the correct parameters to the target */
+       if (m)
+               m->final_check(m->tflags);
+
        {
                struct tcmsg *t = NLMSG_DATA(n);
                if (t->tcm_parent != TC_H_ROOT
@@ -505,6 +505,7 @@ static int parse_ipt(struct action_util *a,int *argc_p,
        *argv_p = argv;
        
        optind = 1;
+       free_opts(opts);
 
        return 0;
 
@@ -515,16 +516,10 @@ print_ipt(struct action_util *au,FILE * f, struct rtattr *arg)
 {
        struct rtattr *tb[TCA_IPT_MAX + 1];
        struct ipt_entry_target *t = NULL;
-       struct option *opts;
 
        if (arg == NULL)
                return -1;
 
-       opts = copy_options(original_opts);
-
-       if (NULL == opts)
-               return -1;
-
        parse_rtattr_nested(tb, TCA_IPT_MAX, arg);
 
        if (tb[TCA_IPT_TABLE] == NULL) {
@@ -587,6 +582,7 @@ print_ipt(struct action_util *au,FILE * f, struct rtattr *arg)
                fprintf(f, " \n");
 
        }
+       free_opts(opts);
 
        return 0;
 }
index 6ade2a8..cbfea84 100644 (file)
@@ -263,7 +263,10 @@ print_mirred(struct action_util *au,FILE * f, struct rtattr *arg)
        }
        p = RTA_DATA(tb[TCA_MIRRED_PARMS]);
 
+       /*
        ll_init_map(&rth);
+       */
+
 
        if ((dev = ll_index_to_name(p->ifindex)) == 0) {
                fprintf(stderr, "Cannot find device %d\n", p->ifindex);
@@ -285,7 +288,7 @@ print_mirred(struct action_util *au,FILE * f, struct rtattr *arg)
        return 0;
 }
 
-struct action_util mirred_util_util = {
+struct action_util mirred_action_util = {
        .id = "mirred",
        .parse_aopt = parse_mirred,
        .print_aopt = print_mirred,
index 5031c62..acfa581 100644 (file)
@@ -238,9 +238,11 @@ parse_val(int *argc_p, char ***argv_p, __u32 * val, int type)
                return -1;
 
        if (TINT == type)
-               return get_integer(val, *argv, 0);
+               return get_integer((int *) val, *argv, 0);
+
        if (TU32 == type)
                return get_u32(val, *argv, 0);
+
        if (TIPV4 == type) {
                inet_prefix addr;
                if (get_prefix_1(&addr, *argv, AF_INET)) {
index 40c0228..a456eda 100644 (file)
@@ -70,7 +70,7 @@ static int cbq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl
                        }
                } else if (strcmp(*argv, "ewma") == 0) {
                        NEXT_ARG();
-                       if (get_unsigned(&ewma_log, *argv, 0)) {
+                       if (get_integer(&ewma_log, *argv, 0)) {
                                explain1("ewma");
                                return -1;
                        }
@@ -236,7 +236,7 @@ static int cbq_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str
                        lss.change |= TCF_CBQ_LSS_FLAGS;
                } else if (strcmp(*argv, "ewma") == 0) {
                        NEXT_ARG();
-                       if (get_u32(&ewma_log, *argv, 0)) {
+                       if (get_integer(&ewma_log, *argv, 0)) {
                                explain1("ewma");
                                return -1;
                        }
index 384e749..cdb5bf2 100644 (file)
@@ -136,11 +136,9 @@ static int dsmark_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
 {
        struct rtattr *tb[TCA_DSMARK_MAX+1];
 
-       if (opt == NULL)
-               return 0;
-
-       parse_rtattr_nested(tb, TCA_DSMARK_MAX, opt);
-
+       if (!opt) return 0;
+       memset(tb, 0, sizeof(tb));
+       parse_rtattr(tb, TCA_DSMARK_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
        if (tb[TCA_DSMARK_MASK]) {
                if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK]))
                        fprintf(stderr,"dsmark: empty mask\n");
index f696cc3..757edca 100644 (file)
@@ -29,11 +29,12 @@ static void explain(void)
 {
        fprintf(stderr, 
 "Usage: ... netem [ limit PACKETS ] \n" \
-"                [ delay TIME [ JITTER [CORRELATION]]]\n" \
+"                 [ delay TIME [ JITTER [CORRELATION]]]\n" \
+"                 [ distribution {uniform|normal|pareto|paretonormal} ]\n" \
 "                 [ drop PERCENT [CORRELATION]] \n" \
+"                 [ corrupt PERCENT [CORRELATION]] \n" \
 "                 [ duplicate PERCENT [CORRELATION]]\n" \
-"                [ distribution {uniform|normal|pareto|paretonormal} ]\n" \
-"                 [ gap PACKETS ]\n");
+"                 [ reorder PRECENT [CORRELATION] [ gap DISTANCE ]]\n");
 }
 
 static void explain1(const char *arg)
@@ -127,11 +128,15 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
        struct rtattr *tail;
        struct tc_netem_qopt opt;
        struct tc_netem_corr cor;
-       __s16 dist_data[MAXDIST];
+       struct tc_netem_reorder reorder;
+       struct tc_netem_corrupt corrupt;
+       __s16 *dist_data = NULL;
 
        memset(&opt, 0, sizeof(opt));
        opt.limit = 1000;
        memset(&cor, 0, sizeof(cor));
+       memset(&reorder, 0, sizeof(reorder));
+       memset(&corrupt, 0, sizeof(corrupt));
 
        while (argc > 0) {
                if (matches(*argv, "limit") == 0) {
@@ -178,6 +183,32 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
                                        return -1;
                                }
                        }
+               } else if (matches(*argv, "reorder") == 0) {
+                       NEXT_ARG();
+                       if (get_percent(&reorder.probability, *argv)) {
+                               explain1("reorder");
+                               return -1;
+                       }
+                       if (NEXT_IS_NUMBER()) {
+                               NEXT_ARG();
+                               if (get_percent(&reorder.correlation, *argv)) {
+                                       explain1("reorder");
+                                       return -1;
+                               }
+                       }
+               } else if (matches(*argv, "corrupt") == 0) {
+                       NEXT_ARG();
+                       if (get_percent(&corrupt.probability, *argv)) {
+                               explain1("corrupt");
+                               return -1;
+                       }
+                       if (NEXT_IS_NUMBER()) {
+                               NEXT_ARG();
+                               if (get_percent(&corrupt.correlation, *argv)) {
+                                       explain1("corrupt");
+                                       return -1;
+                               }
+                       }
                } else if (matches(*argv, "gap") == 0) {
                        NEXT_ARG();
                        if (get_u32(&opt.gap, *argv, 0)) {
@@ -199,6 +230,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
                        }
                } else if (matches(*argv, "distribution") == 0) {
                        NEXT_ARG();
+                       dist_data = alloca(MAXDIST);
                        dist_size = get_distribution(*argv, dist_data);
                        if (dist_size < 0)
                                return -1;
@@ -215,12 +247,44 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
 
        tail = NLMSG_TAIL(n);
 
-       addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
-       addattr_l(n, 1024, TCA_NETEM_CORR, &cor, sizeof(cor));
+       if (reorder.probability) {
+               if (opt.latency == 0) {
+                       fprintf(stderr, "reordering not possible without specifying some delay\n");
+               }
+               if (opt.gap == 0)
+                       opt.gap = 1;
+       } else if (opt.gap > 0) {
+               fprintf(stderr, "gap specified without reorder probability\n");
+               explain();
+               return -1;
+       }
+
+       if (dist_data && (opt.latency == 0 || opt.jitter == 0)) {
+               fprintf(stderr, "distribution specified but no latency and jitter values\n");
+               explain();
+               return -1;
+       }
+
+       if (addattr_l(n, TCA_BUF_MAX, TCA_OPTIONS, &opt, sizeof(opt)) < 0)
+               return -1;
+
+       if (cor.delay_corr || cor.loss_corr || cor.dup_corr) {
+               if (addattr_l(n, TCA_BUF_MAX, TCA_NETEM_CORR, &cor, sizeof(cor)) < 0)
+                       return -1;
+       }
+
+       if (addattr_l(n, TCA_BUF_MAX, TCA_NETEM_REORDER, &reorder, sizeof(reorder)) < 0)
+               return -1;
+
+       if (corrupt.probability) {
+               if (addattr_l(n, TCA_BUF_MAX, TCA_NETEM_CORRUPT, &corrupt, sizeof(corrupt)) < 0)
+                       return -1;
+       }
 
-       if (dist_size > 0) {
-               addattr_l(n, 32768, TCA_NETEM_DELAY_DIST,
-                         dist_data, dist_size*sizeof(dist_data[0]));
+       if (dist_data) {
+               if (addattr_l(n, 32768, TCA_NETEM_DELAY_DIST,
+                             dist_data, dist_size*sizeof(dist_data[0])) < 0)
+                       return -1;
        }
        tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
        return 0;
@@ -229,6 +293,8 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv,
 static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
 {
        const struct tc_netem_corr *cor = NULL;
+       const struct tc_netem_reorder *reorder = NULL;
+       const struct tc_netem_corrupt *corrupt = NULL;
        struct tc_netem_qopt qopt;
        int len = RTA_PAYLOAD(opt) - sizeof(qopt);
        SPRINT_BUF(b1);
@@ -252,6 +318,16 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
                                return -1;
                        cor = RTA_DATA(tb[TCA_NETEM_CORR]);
                }
+               if (tb[TCA_NETEM_REORDER]) {
+                       if (RTA_PAYLOAD(tb[TCA_NETEM_REORDER]) < sizeof(*reorder))
+                               return -1;
+                       reorder = RTA_DATA(tb[TCA_NETEM_REORDER]);
+               }
+               if (tb[TCA_NETEM_CORRUPT]) {
+                       if (RTA_PAYLOAD(tb[TCA_NETEM_CORRUPT]) < sizeof(*corrupt))
+                               return -1;
+                       corrupt = RTA_DATA(tb[TCA_NETEM_CORRUPT]);
+               }
        }
 
        fprintf(f, "limit %d", qopt.limit);
@@ -278,6 +354,22 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
                if (cor && cor->dup_corr)
                        fprintf(f, " %s", sprint_percent(cor->dup_corr, b1));
        }
+                       
+       if (reorder && reorder->probability) {
+               fprintf(f, " reorder %s", 
+                       sprint_percent(reorder->probability, b1));
+               if (reorder->correlation)
+                       fprintf(f, " %s", 
+                               sprint_percent(reorder->correlation, b1));
+       }
+
+       if (corrupt && corrupt->probability) {
+               fprintf(f, " corrupt %s", 
+                       sprint_percent(corrupt->probability, b1));
+               if (corrupt->correlation)
+                       fprintf(f, " %s", 
+                               sprint_percent(corrupt->correlation, b1));
+       }
 
        if (qopt.gap)
                fprintf(f, " gap %lu", (unsigned long)qopt.gap);
diff --git a/tc/tc.c b/tc/tc.c
index dd6ac97..fa36ee0 100644 (file)
--- a/tc/tc.c
+++ b/tc/tc.c
@@ -35,9 +35,10 @@ int show_details = 0;
 int show_raw = 0;
 int resolve_hosts = 0;
 int use_iec = 0;
+int force = 0;
 struct rtnl_handle rth;
 
-static void *BODY;     /* cached handle dlopen(NULL) */
+static void *BODY = NULL;      /* cached handle dlopen(NULL) */
 static struct qdisc_util * qdisc_list;
 static struct filter_util * filter_list;
 
@@ -179,8 +180,9 @@ noexist:
 static void usage(void)
 {
        fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n"
+                       "       tc [-force] -batch file\n"
                        "where  OBJECT := { qdisc | class | filter | action }\n"
-                       "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -b[atch] file }\n");
+                       "       OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -b[atch] [file] }\n");
 }
 
 static int do_cmd(int argc, char **argv)
@@ -207,34 +209,13 @@ static int do_cmd(int argc, char **argv)
        return -1;
 }
 
-static int makeargs(char *line, char *argv[], int maxargs)
-{
-       static const char ws[] = " \t\r\n";
-       char *cp;
-       int argc = 0;
-
-       for (cp = strtok(line, ws); cp; cp = strtok(NULL, ws)) {
-               if (argc >= maxargs) {
-                       fprintf(stderr, "Too many arguments to command\n");
-                       exit(1);
-               }
-               argv[argc++] = cp;
-       }
-       argv[argc] = NULL;
-
-       return argc;
-}
-
 static int batch(const char *name)
 {
        char *line = NULL;
        size_t len = 0;
-       ssize_t cc;
-       int lineno = 0;
-       char *largv[100];
-       int largc, ret = 0;
+       int ret = 0;
 
-       if (strcmp(name, "-") != 0) {
+       if (name && strcmp(name, "-") != 0) {
                if (freopen(name, "r", stdin) == NULL) {
                        fprintf(stderr, "Cannot open file \"%s\" for reading: %s=n",
                                name, strerror(errno));
@@ -249,44 +230,24 @@ static int batch(const char *name)
                return -1;
        }
 
-       while ((cc = getline(&line, &len, stdin)) != -1) {
-               ++lineno;
-
-               /* ignore blank lines and comments */
-               if (*line == '\n' || *line == '#')
-                       continue;
-
-               /* handle continuation lines */
-               while (cc >= 2 && strcmp(line+cc-2, "\\\n") == 0) {
-                       char *line1 = NULL;
-                       ssize_t len1 = 0;
-                       int cc1;
-                       cc1 = getline(&line1, &len1, stdin);
-
-                       if (cc1 < 0) {
-                               fprintf(stderr, "Missing continuation line\n");
-                               return -1;
-                       }
-                       ++lineno;
-                       line = realloc(line, cc + cc1);
-                       if (!line) {
-                               fprintf(stderr, "Out of memory\n");
-                               return -1;
-                       }
-
-                       strcpy(line+cc-2, line1);
-                       cc += cc1 - 2;
-                       free(line1);
-               }
+       cmdlineno = 0;
+       while (getcmdline(&line, &len, stdin) != -1) {
+               char *largv[100];
+               int largc;
 
                largc = makeargs(line, largv, 100);
-
-               ret = do_cmd(largc, largv);
-               if (ret) {
-                       fprintf(stderr, "Command failed %s:%d\n", name, lineno);
-                       break;
+               if (largc == 0)
+                       continue;       /* blank line */
+
+               if (do_cmd(largc, largv)) {
+                       fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno);
+                       ret = 1;
+                       if (!force)
+                               break;
                }
        }
+       if (line)
+               free(line);
 
        rtnl_close(&rth);
        return ret;
@@ -296,6 +257,8 @@ static int batch(const char *name)
 int main(int argc, char **argv)
 {
        int ret;
+       int do_batching = 0;
+       char *batchfile = NULL;
 
        while (argc > 1) {
                if (argv[1][0] != '-')
@@ -315,13 +278,13 @@ int main(int argc, char **argv)
                } else if (matches(argv[1], "-help") == 0) {
                        usage();
                        return 0;
+               } else if (matches(argv[1], "-force") == 0) {
+                       ++force;
                } else  if (matches(argv[1], "-batch") == 0) {
-                       if (argc < 3) {
-                               fprintf(stderr, "Wrong number of arguments in batch mode\n");
-                               return -1;
-                       }
-
-                       return batch(argv[2]);
+                       do_batching = 1;
+                       if (argc > 2)
+                               batchfile = argv[2];
+                       argc--; argv++;
                } else {
                        fprintf(stderr, "Option \"%s\" is unknown, try \"tc -help\".\n", argv[1]);
                        return -1;
@@ -329,6 +292,9 @@ int main(int argc, char **argv)
                argc--; argv++;
        }
 
+       if (do_batching)
+               return batch(batchfile);
+
        if (argc <= 1) {
                usage();
                return 0;
index c4b27eb..894caa1 100644 (file)
@@ -76,7 +76,10 @@ int tc_class_modify(int cmd, unsigned flags, int argc, char **argv)
                        if (get_tc_classid(&handle, *argv))
                                invarg(*argv, "invalid class ID");
                        req.t.tcm_handle = handle;
-               } else if (strcmp(*argv, "root") == 0) {
+               } else if (strcmp(*argv, "handle") == 0) {
+                       fprintf(stderr, "Error: try \"classid\" instead of \"handle\"\n");
+                       return -1;
+               } else if (strcmp(*argv, "root") == 0) {
                        if (req.t.tcm_parent) {
                                fprintf(stderr, "Error: \"root\" is duplicate parent ID.\n");
                                return -1;
index 7802d52..e9174ab 100644 (file)
@@ -126,6 +126,10 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv)
                addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est));
 
        if (q) {
+               if (!q->parse_qopt) {
+                       fprintf(stderr, "qdisc '%s' does not support option parsing\n", k);
+                       return -1;
+               }
                if (q->parse_qopt(q, argc, argv, &req.n))
                        return 1;
        } else {
index 5661cea..2a4e0ba 100644 (file)
@@ -1,11 +1,18 @@
-TESTS := $(patsubst tests/%,%,$(wildcard tests/*))
+## -- Config --
+DEV := lo
+PREFIX := sudo
+## -- End Config --
+
+TESTS := $(patsubst tests/%,%,$(wildcard tests/*.t))
 IPVERS := $(filter-out iproute2/Makefile,$(wildcard iproute2/*))
+KENV := $(shell cat /proc/config.gz | gunzip | grep ^CONFIG)
 
-DEV := eth0
+.PHONY: compile listtests alltests configure $(TESTS)
 
-.PHONY: compile listtests alltests $(TESTS)
+configure:
+       echo "Entering iproute2" && cd iproute2 && $(MAKE) configure && cd ..;
 
-compile:
+compile: configure
        echo "Entering iproute2" && cd iproute2 && $(MAKE) && cd ..;
 
 listtests:
@@ -18,16 +25,21 @@ alltests: $(TESTS)
 clean:
        @rm -rf results/*
 
+distclean: clean
+       echo "Entering iproute2" && cd iproute2 && $(MAKE) distclean && cd ..;
+
 $(TESTS):
        @for i in $(IPVERS); do \
-               echo -n "Running $@ with $$i on `uname -r`: "; \
-               logger "TESTMARK: $@"; \
                o=`echo $$i | sed -e 's/iproute2\///'`; \
-               TC="$$i/tc/tc" IP="$$i/ip/ip" DEV="$(DEV)" sudo tests/$@ > results/$@.$$o.out 2> results/$@.$$o.err; \
-               dmesg > results/$@.$$o.dmesg; \
-               if [ -z "`cat results/$@.$$o.err`" ]; then \
-                       echo "PASS"; \
-               else \
+               echo -n "Running $@ [$$o/`uname -r`]: "; \
+               TC="$$i/tc/tc" IP="$$i/ip/ip" DEV="$(DEV)" IPVER="$@" SNAME="$$i" \
+               ERRF="results/$@.$$o.err" $(KENV) $(PREFIX) tests/$@ > results/$@.$$o.out; \
+               if [ "$$?" = "127" ]; then \
+                       echo "SKIPPED"; \
+               elif [ -e "results/$@.$$o.err" ]; then \
                        echo "FAILED"; \
-               fi \
+               else \
+                       echo "PASS"; \
+               fi; \
+               dmesg > results/$@.$$o.dmesg; \
        done
diff --git a/testsuite/iproute2/Makefile b/testsuite/iproute2/Makefile
new file mode 100644 (file)
index 0000000..ba128aa
--- /dev/null
@@ -0,0 +1,33 @@
+SUBDIRS := $(filter-out Makefile,$(wildcard *))
+.PHONY: all configure clean distclean show $(SUBDIRS)
+
+all: configure
+       @for dir in $(SUBDIRS); do \
+               echo "Entering $$dir" && cd $$dir && $(MAKE) && cd ..; \
+       done
+
+link:
+       @if [ ! -L iproute2-this ]; then \
+               ln -s ../.. iproute2-this; \
+       fi
+
+configure: link
+       @for dir in $(SUBDIRS); do \
+               echo "Entering $$dir" && cd $$dir && if [ -f configure ]; then ./configure; fi && cd ..; \
+       done
+
+clean: link
+       @for dir in $(SUBDIRS); do \
+               echo "Entering $$dir" && cd $$dir && $(MAKE) clean && cd ..; \
+       done
+
+distclean: clean
+       @for dir in $(SUBDIRS); do \
+               echo "Entering $$dir" && cd $$dir && $(MAKE) distclean && cd ..; \
+       done
+
+show: link
+       @echo "$(SUBDIRS)"
+
+$(SUBDIRS):
+       cd $@ && $(MAKE)
diff --git a/testsuite/lib/generic.sh b/testsuite/lib/generic.sh
new file mode 100644 (file)
index 0000000..cc48947
--- /dev/null
@@ -0,0 +1,88 @@
+
+export DEST="127.0.0.1"
+
+ts_log()
+{
+       echo "$@"
+}
+
+ts_err()
+{
+       ts_log "$@" | tee >> $ERRF
+}
+
+ts_cat()
+{
+       cat "$@"
+}
+
+ts_err_cat()
+{
+       ts_cat "$@" | tee >> $ERRF
+}
+
+ts_tc()
+{
+       SCRIPT=$1; shift
+       DESC=$1; shift
+       TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
+       TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
+
+       $TC $@ 2> $TMP_ERR > $TMP_OUT
+
+       if [ -s $TMP_ERR ]; then
+               ts_err "${SCRIPT}: ${DESC} failed:"
+               ts_err "command: $TC $@"
+               ts_err "stderr output:"
+               ts_err_cat $TMP_ERR
+               if [ -s $TMP_OUT ]; then
+                       ts_err "stdout output:"
+                       ts_err_cat $TMP_OUT
+               fi
+       elif [ -s $TMP_OUT ]; then
+               echo "${SCRIPT}: ${DESC} succeeded with output:"
+               cat $TMP_OUT
+       else
+               echo "${SCRIPT}: ${DESC} succeeded"
+       fi
+
+       rm $TMP_ERR $TMP_OUT
+}
+
+ts_ip()
+{
+       SCRIPT=$1; shift
+       DESC=$1; shift
+       TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
+       TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit
+
+       $IP $@ 2> $TMP_ERR > $TMP_OUT
+
+       if [ -s $TMP_ERR ]; then
+               ts_err "${SCRIPT}: ${DESC} failed:"
+               ts_err "command: $IP $@"
+               ts_err "stderr output:"
+               ts_err_cat $TMP_ERR
+               if [ -s $TMP_OUT ]; then
+                       ts_err "stdout output:"
+                       ts_err_cat $TMP_OUT
+               fi
+       elif [ -s $TMP_OUT ]; then
+               echo "${SCRIPT}: ${DESC} succeeded with output:"
+               cat $TMP_OUT
+       else
+               echo "${SCRIPT}: ${DESC} succeeded"
+       fi
+
+       rm $TMP_ERR $TMP_OUT
+}
+
+ts_qdisc_available()
+{
+       HELPOUT=`$TC qdisc add $1 help 2>&1`
+       if [ "`echo $HELPOUT | grep \"^Unknown qdisc\"`" ]; then
+               return 0;
+       else
+               return 1;
+       fi
+}
diff --git a/testsuite/tests/cbq.t b/testsuite/tests/cbq.t
new file mode 100644 (file)
index 0000000..bff814b
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/sh
+$TC qdisc del dev $DEV root >/dev/null 2>&1
+$TC qdisc add dev $DEV root handle 10:0 cbq bandwidth 100Mbit avpkt 1400 mpu 64
+$TC class add dev $DEV parent 10:0  classid 10:12   cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 maxburst 1 avpkt  500 bounded
+$TC qdisc list dev $DEV
+$TC qdisc del dev $DEV root
+$TC qdisc list dev $DEV
+$TC qdisc add dev $DEV root handle 10:0 cbq bandwidth 100Mbit avpkt 1400 mpu 64
+$TC class add dev $DEV parent 10:0  classid 10:12   cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 maxburst 1 avpkt  500 bounded
+$TC qdisc del dev $DEV root
diff --git a/testsuite/tests/cls-testbed.t b/testsuite/tests/cls-testbed.t
new file mode 100644 (file)
index 0000000..efae2a5
--- /dev/null
@@ -0,0 +1,68 @@
+#!/bin/bash
+# vim: ft=sh
+
+source lib/generic.sh
+
+QDISCS="cbq htb dsmark"
+
+for q in ${QDISCS}; do
+       ts_log "Preparing classifier testbed with qdisc $q"
+
+       for c in tests/cls/*.t; do
+
+               case "$q" in
+               cbq)
+                       ts_tc "cls-testbed" "cbq root qdisc creation" \
+                               qdisc add dev $DEV root handle 10:0 \
+                               cbq bandwidth 100Mbit avpkt 1400 mpu 64
+                       ts_tc "cls-testbed" "cbq root class creation" \
+                               class add dev $DEV parent 10:0  classid 10:12 \
+                               cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 \
+                               maxburst 1 avpkt  500 bounded
+                       ;;
+               htb)
+                       ts_qdisc_available "htb"
+                       if [ $? -eq 0 ]; then
+                               ts_log "cls-testbed: HTB is unsupported by $TC, skipping"
+                               continue;
+                       fi
+                       ts_tc "cls-testbed" "htb root qdisc creation" \
+                               qdisc add dev $DEV root handle 10:0 htb
+                       ts_tc "cls-testbed" "htb root class creation" \
+                               class add dev $DEV parent 10:0 classid 10:12 \
+                               htb rate 100Mbit quantum 1514
+                       ;;
+               dsmark)
+                       ts_qdisc_available "dsmark"
+                       if [ $? -eq 0 ]; then
+                               ts_log "cls-testbed: dsmark is unsupported by $TC, skipping"
+                               continue;
+                       fi
+                       ts_tc "cls-testbed" "dsmark root qdisc creation" \
+                               qdisc add dev $DEV root handle 20:0 \
+                               dsmark indices 64 default_index 1 set_tc_index
+                       ts_tc "cls-testbed" "dsmark class creation" \
+                               class change dev $DEV parent 20:0 classid 20:12 \
+                               dsmark mask 0xff value 2
+                       ts_tc "cls-testbed" "prio inner qdisc creation" \
+                               qdisc add dev $DEV parent 20:0 handle 10:0 prio
+                       ;;
+               *)
+                       ts_err "cls-testbed: no testbed configuration found for qdisc $q"
+                       continue
+                       ;;
+               esac
+
+               ts_tc "cls-testbed" "tree listing" qdisc list dev eth0
+               ts_tc "cls-testbed" "tree class listing" class list dev eth0
+               ts_log "cls-testbed: starting classifier test $c"
+               $c 
+
+               case "$q" in
+               *)
+                       ts_tc "cls-testbed" "generic qdisc tree deletion" \
+                               qdisc del dev $DEV root
+                       ;;
+               esac
+       done
+done
diff --git a/testsuite/tests/dsmark.t b/testsuite/tests/dsmark.t
new file mode 100644 (file)
index 0000000..6934165
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+# vim: ft=sh
+
+source lib/generic.sh
+
+ts_qdisc_available "dsmark"
+if [ $? -eq 0 ]; then
+       ts_log "dsmark: Unsupported by $TC, skipping"
+       exit 127
+fi
+
+ts_tc "dsmark" "dsmark root qdisc creation" \
+       qdisc add dev $DEV root handle 10:0 \
+       dsmark indices 64 default_index 1 set_tc_index
+
+ts_tc "dsmark" "dsmark class 1 creation" \
+       class change dev $DEV parent 10:0 classid 10:12 \
+       dsmark mask 0xff value 2
+
+ts_tc "dsmark" "dsmark class 2 creation" \
+       class change dev $DEV parent 10:0 classid 10:13 \
+       dsmark mask 0xfc value 4
+
+ts_tc "dsmark" "dsmark dump qdisc" \
+       qdisc list dev $DEV
+
+ts_tc "dsmark" "dsmark dump class" \
+       class list dev $DEV parent 10:0
+
+ts_tc "dsmark" "generic qdisc tree deletion" \
+       qdisc del dev $DEV root