From b4a5a91c5a4ca186690479ddc0fff26644c98c93 Mon Sep 17 00:00:00 2001 From: Marc Fiuczynski Date: Fri, 19 Jan 2007 18:46:40 +0000 Subject: [PATCH] This commit was generated by cvs2svn to compensate for changes in r2587, which included commits to RCS files with non-trunk default branches. --- ChangeLog | 233 +++++- Makefile | 14 +- README.decnet | 52 +- doc/actions/actions-general | 254 ++++++ doc/actions/dummy-README | 155 ++++ doc/actions/mirred-usage | 2 +- etc/iproute2/ematch_map | 5 + etc/iproute2/rt_dsfield | 24 +- etc/iproute2/rt_protos | 41 +- etc/iproute2/rt_realms | 2 +- etc/iproute2/rt_scopes | 11 +- etc/iproute2/rt_tables | 8 +- examples/README.cbq | 122 +++ examples/cbq.init-v0.7.3 | 984 +++++++++++++++++++++++ include/SNAPSHOT.h | 2 +- include/iptables.h | 22 + include/iptables_common.h | 1 + include/linux/inet_diag.h | 122 +++ include/linux/ip_mp_alg.h | 22 + include/linux/netfilter_ipv4/ip_tables.h | 1 - include/linux/netlink.h | 45 +- include/linux/pkt_cls.h | 3 + include/linux/pkt_sched.h | 65 +- include/linux/rtnetlink.h | 169 +++- include/linux/socket.h | 1 + include/linux/tc_act/tc_defact.h | 21 + include/linux/tc_ematch/tc_em_cmp.h | 26 + include/linux/tc_ematch/tc_em_meta.h | 94 +++ include/linux/tc_ematch/tc_em_nbyte.h | 13 + include/linux/tcp.h | 35 +- include/linux/xfrm.h | 55 +- include/ll_map.h | 10 +- include/net/tcp_states.h | 50 ++ include/rt_names.h | 2 +- include/utils.h | 16 +- ip/Makefile | 4 +- ip/ip.c | 164 ++-- ip/ip_common.h | 6 + ip/ipaddress.c | 47 +- ip/iplink.c | 5 +- ip/ipmaddr.c | 3 +- ip/ipmonitor.c | 12 +- ip/ipmroute.c | 6 +- ip/ipneigh.c | 24 +- ip/ipntable.c | 657 +++++++++++++++ ip/iproute.c | 69 +- ip/iprule.c | 26 +- ip/ipxfrm.c | 152 +++- ip/xfrm.h | 26 +- ip/xfrm_monitor.c | 218 +++++ ip/xfrm_policy.c | 150 ++-- ip/xfrm_state.c | 326 ++++++-- lib/libnetlink.c | 90 ++- lib/ll_addr.c | 3 +- lib/ll_map.c | 16 +- lib/rt_names.c | 3 +- lib/utils.c | 90 ++- man/man8/ip.8 | 5 +- man/man8/tc-pfifo.8 | 0 misc/Makefile | 4 +- misc/arpd.c | 13 +- misc/ifstat.c | 7 +- misc/lnstat.c | 7 +- misc/lnstat_util.c | 9 +- misc/nstat.c | 5 +- misc/rtacct.c | 13 +- misc/ss.c | 198 ++--- netem/Makefile | 20 +- netem/normal.c | 15 +- netem/paretonormal.c | 14 +- tc/Makefile | 19 +- tc/em_cmp.c | 188 +++++ tc/em_meta.c | 550 +++++++++++++ tc/em_nbyte.c | 144 ++++ tc/em_u32.c | 178 ++++ tc/emp_ematch.l | 145 ++++ tc/emp_ematch.y | 101 +++ tc/f_basic.c | 146 ++++ tc/f_u32.c | 23 +- tc/m_ematch.c | 493 ++++++++++++ tc/m_ematch.h | 179 +++++ tc/m_ipt.c | 64 +- tc/m_mirred.c | 5 +- tc/m_pedit.c | 4 +- tc/q_cbq.c | 4 +- tc/q_dsmark.c | 8 +- tc/q_netem.c | 110 ++- tc/tc.c | 96 +-- tc/tc_class.c | 5 +- tc/tc_qdisc.c | 4 + testsuite/Makefile | 36 +- testsuite/iproute2/Makefile | 33 + testsuite/lib/generic.sh | 88 ++ testsuite/tests/cbq.t | 10 + testsuite/tests/cls-testbed.t | 68 ++ testsuite/tests/dsmark.t | 31 + 96 files changed, 7026 insertions(+), 795 deletions(-) create mode 100644 doc/actions/actions-general create mode 100644 doc/actions/dummy-README create mode 100644 etc/iproute2/ematch_map create mode 100644 examples/README.cbq create mode 100644 examples/cbq.init-v0.7.3 create mode 100644 include/linux/inet_diag.h create mode 100644 include/linux/ip_mp_alg.h create mode 100644 include/linux/socket.h create mode 100644 include/linux/tc_act/tc_defact.h create mode 100644 include/linux/tc_ematch/tc_em_cmp.h create mode 100644 include/linux/tc_ematch/tc_em_meta.h create mode 100644 include/linux/tc_ematch/tc_em_nbyte.h create mode 100644 include/net/tcp_states.h create mode 100644 ip/ipntable.c create mode 100644 ip/xfrm_monitor.c create mode 100644 man/man8/tc-pfifo.8 create mode 100644 tc/em_cmp.c create mode 100644 tc/em_meta.c create mode 100644 tc/em_nbyte.c create mode 100644 tc/em_u32.c create mode 100644 tc/emp_ematch.l create mode 100644 tc/emp_ematch.y create mode 100644 tc/f_basic.c create mode 100644 tc/m_ematch.c create mode 100644 tc/m_ematch.h create mode 100644 testsuite/iproute2/Makefile create mode 100644 testsuite/lib/generic.sh create mode 100644 testsuite/tests/cbq.t create mode 100644 testsuite/tests/cls-testbed.t create mode 100644 testsuite/tests/dsmark.t diff --git a/ChangeLog b/ChangeLog index 53bd530..3590a64 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,234 @@ +2006-03-21 Stephen Hemminger + + * Back out the 2.4 utsname patch + +2006-03-21 James Lentini + + * Increase size of hw address allowed for ip neigh to allow + for IB. + +2006-03-14 Russell Stuart + + * Fix missing memset in tc sample + * Fixes for tc hash samples + * Add sample divisor + +2006-03-10 Alpt + + * Add more rt_proto values + +2006-03-10 Dale Sedivec + + * Warn when using "handle" instead of "classid" with "tc class" + +2006-03-10 Jean Tourrilhes + + * Fix endless loop in netlink error handling + + +2006-03-10 Stephen Hemminger + + * Change default lnstat count to 1 + * Update to 2.6.16 headers + * Add fake version of include/linux/socket.h to fix warnings + +2006-01-12 Patrick McHardy + + * Handle DCCP in ipxfrm.c to allow using port numbers in the selector. + +2006-01-10 Masahide NAKAMURA + + * Add ip link ntable + +2006-01-10 Stephen Hemminger + + * Update headers to santized kernel 2.6.15 + * Fix ipv6 priority option in u32 + +2006-01-03 Alpt + + * Ip man page addition + +2006-01-03 Jamal Hadi Salim + + * Documentation for ifb + +2005-12-09 Stephen Hemminger + + * Add corrupt feature to netem + +2005-12-02 Stephen Hemminger + + * Backout ambigious ip command matches + +2005-11-22 Stephen Hemminger + + * Handle ambigious ip command matches + +2005-11-22 Patrick McHardy + + * Add back ip command aliases + +2005-11-07 Masahide NAKAMURA + + * Updating for 2.6.14 + - Show UPD{SA,POLICY} message information from kernel instead of error + - Add lengh check of deleting message from kernel + - Use macro for struct xfrm_user{sa,policy}_id + + * Minor fix: + - Add fflush at the end of normal dump + +2005-11-01 Jamal Hadi Salim + + * Fix handling of XFRM monitor and state + +2005-11-01 Stephen Hemminger + + * Fix ip commnad shortcuts + +2005-10-12 Stephen Hemminger + + * Add more CBQ examples from Fedora Core + * Fix buffer overrun in iproute because of bits vs. bytes confusion + +2005-10-12 Jamal Hadi Salim + + * Fix ip rule flush, need to reopen rtnl + +2005-10-07 Stephen Hemminger + + * Reenable ip mroute + +2005-10-07 Mike Frysinger + + * Handle pfifo_fast that has no qopt without segfaulting + +2005-10-05 Mads Martin Joergensen + + * Trivial netem ccopts + +2005-10-04 Jerome Borsboom + + * Fix regression in ip addr (libnetlink) handling + +2005-09-21 Stephen Hemminger + + * Fix uninitialized memory and leaks with valgrind + Reported by Redhat + +2005-09-01 Mike Frysinger + + * Fix build issues with netem tables (parallel make and HOSTCC) + +2005-09-01 Stephen Hemminger + + * Integrate support for DCCP into 'ss' (from acme) + * Add -batch option to ip. + * Update to 2.6.14 headers + +2005-09-01 Eric Dumazet + + * Fix lnstat : First column should not be summed + +2005-08-16 Stephen Hemminger + + * Limit ip route flush to 10 rounds. + * Cleanup ip rule flush error message + +2005-08-08 Stephen Hemminger + + * Update to 2.6.13+ kernel headers + * Fix array overrun in paretonormal + * Fix ematch to not include dropped fields from skb. + +2005-07-14 Thomas Graf + + * Make ematch bison/lex build with common flex + +2005-07-10 Stephen Hemminger + + * Fix Gcc 4.0 build warnings signed/unsigned + +2005-06-23 Jamal Hadi Salim + + * Fix for options process with ipt + +2005-06-23 Thomas Graf + + * Add extended matches (nbyte, cmp, u32, meta) + * Add basic classifier + * Fix clean/distclean makefile targets + * update local header file copies + * IPv4 multipath algorithm selection support + * cscope Makefile target + * Fix off-by-one while generating argument vector + in batched mode. + * Assume stdin if no argument is given to -batch + +2005-06-22 Stephen Hemminger + + * Update include files to 2.6.12 + * Add ss support for TCP_CONG + +2005-06-13 Steven Whitehouse + + * Decnet doc's update + +2005-06-07 Stephen Hemminger + + * Fix 'ip link' map to handle case where device gets autoloaded + by using if_nametoindex as fallback + * Device indices are unsigned not int. + +2005-06-07 Masahide NAKAMURA + + * [ip] show timestamp when using '-t' option. + * [ip] remove duplicated code for expired message of xfrm. + * [ip] add "deleteall" command for xfrm; + "flush" uses kernel's flush interface and + "deleteall" uses legacy iproute2's flush feature like + getting-and-deleting-for-each. + +2005-03-30 Stephen Hemminger + + * include/linux/netfilter_ipv4/ip_tables.h dont include compiler.h + because it isn't needed and not on all systems + * Update rtnetlink.h and pkt_cls.h to be stripped versions + of headers from 2.6.12-rc1 + +2005-03-30 Jamal Hadi Salim + + * Proper verison of iptables headers (from 1.3.1) + * Set revision file in m_ipt + * Fix action_util naming in mirred + * don't call ll_init_map in mirred + +2005-03-19 Thomas Graf + + * Warn about wildcard deletions and provide IFA_ADDRESS upon + deletions to enforce prefix length validation for IPv4. + * Fix netlink message alignment when the last routing attribute added + has a data length not aligned to RTA_ALIGNTO. + +2005-03-30 Masahide NAKAMURA + + * ipv6 xfrm allocspi and monitor support. + +2005-03-29 Stephen Hemminger + + * switch to stack for netem tables + +2005-03-18 Stephen Hemminger + + * add -force option to batch mode + * handle midline comments in batch mode + * sum per cpu fields in lnstat correctly + 2005-03-14 Stephen Hemminger * cleanup batch mode, allow continuation, comments etc. @@ -71,7 +302,7 @@ * need to call getline() with null for first usage * don't overwrite const arg -2005-02-07 Stephen Hemminger +2005-02-07 Stephen Hemminger * Add experimental distribution diff --git a/Makefile b/Makefile index 1d11462..ac58cd9 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,6 @@ SBINDIR=/usr/sbin CONFDIR=/etc/iproute2 DOCDIR=/usr/share/doc/iproute2 MANDIR=/usr/share/man -KERNEL_INCLUDE=/usr/include # Path to db_185.h include DBM_INCLUDE:=/usr/include @@ -24,6 +23,7 @@ CC = gcc HOSTCC = gcc CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall CFLAGS = $(CCOPTS) -I../include $(DEFINES) +YACCFLAGS = -d -t -v LDLIBS += -L../lib -lnetlink -lutil @@ -36,7 +36,7 @@ all: Config do $(MAKE) $(MFLAGS) -C $$i; done Config: - ./configure $(KERNEL_INCLUDE) + sh configure $(KERNEL_INCLUDE) install: all install -m 0755 -d $(DESTDIR)$(SBINDIR) @@ -51,18 +51,22 @@ install: all install -m 0644 $(shell find etc/iproute2 -maxdepth 1 -type f) $(DESTDIR)$(CONFDIR) install -m 0755 -d $(DESTDIR)$(MANDIR)/man8 install -m 0644 $(shell find man/man8 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man8 - ln -sf $(MANDIR)/man8/tc-pbfifo.8 $(DESTDIR)$(MANDIR)/man8/tc-bfifo.8 - ln -sf $(MANDIR)/man8/tc-pbfifo.8 $(DESTDIR)$(MANDIR)/man8/tc-pfifo.8 + ln -sf tc-pbfifo.8 $(DESTDIR)$(MANDIR)/man8/tc-bfifo.8 + ln -sf tc-pbfifo.8 $(DESTDIR)$(MANDIR)/man8/tc-pfifo.8 install -m 0755 -d $(DESTDIR)$(MANDIR)/man3 install -m 0644 $(shell find man/man3 -maxdepth 1 -type f) $(DESTDIR)$(MANDIR)/man3 clean: + rm -f cscope.* @for i in $(SUBDIRS) doc; \ do $(MAKE) $(MFLAGS) -C $$i clean; done clobber: clean rm -f Config -distclean: clean clobber +distclean: clobber + +cscope: + cscope -b -q -R -Iinclude -sip -slib -smisc -snetem -stc .EXPORT_ALL_VARIABLES: diff --git a/README.decnet b/README.decnet index 4d7453a..4300f90 100644 --- a/README.decnet +++ b/README.decnet @@ -1,41 +1,33 @@ Here are a few quick points about DECnet support... + o iproute2 is the tool of choice for configuring the DECnet support for + Linux. For many features, it is the only tool which can be used to + configure them. + o No name resolution is available as yet, all addresses must be entered numerically. - o The neighbour cache may well list every entry as having the address - 0.170. This is due to a problem that I need to sort out kernel side. - It is harmless (but don't try and use neigh add yet) just look in - /proc/net/decnet_neigh to see the real addresses for now. + o Remember to set the hardware address of the interface using: + + ip link set ethX address xx:xx:xx:xx:xx:xx + (where xx:xx:xx:xx:xx:xx is the MAC address for your DECnet node + address) - o The rtnetlink support in the kernel is rather exprimental, expect a - few odd things to happen for the next few DECnet kernel releases. + if your Ethernet card won't listen to more than one unicast + mac address at once. If the Linux DECnet stack doesn't talk to + any other DECnet nodes, then check this with tcpdump and if its + a problem, change the mac address (but do this _before_ starting + any other network protocol on the interface) o Whilst you can use ip addr add to add more than one DECnet address to an interface, don't expect addresses which are not the same as the - kernels node address to work properly. i.e. You will break the DECnet - protocol if you do add anything other than the automatically generated - interface addresses to ethernet cards. This option is there for future - link layer support, where the device will have to be configed for - DECnet explicitly. - - o The DECnet support is currently self contained. You do not need the - libdnet library to use it. In fact until I've sent the dnet_pton and - dnet_ntop functions to Patrick to add, you can't use libdnet. - - o If you are not using the very latest 2.3.xx series kernels, don't - try and list DECnet routes if you've got IPv6 compiled into the - kernel. It will oops. - - o My main reason for writing the DECnet support for iproute2 was to - check out the DECnet routing code, so the route get and - route show cache commands are likely to be the most debugged out of - all of them. - - o If you find bugs in the DECnet support, please send them to me in the - first instance, and then I'll send Alexey a patch to fix it. IPv4/6 - bugs should be sent to Alexey as before. - -Steve Whitehouse + kernels node address to work properly with 2.4 kernels. This should + be fine with 2.6 kernels as the routing code has been extensively + modified and improved. + + o The DECnet support is currently self contained. It does not depend on + the libdnet library. + +Steve Whitehouse diff --git a/doc/actions/actions-general b/doc/actions/actions-general new file mode 100644 index 0000000..bb2295d --- /dev/null +++ b/doc/actions/actions-general @@ -0,0 +1,254 @@ + +This documented is slightly dated but should give you idea of how things +work. + +What is it? +----------- + +An extension to the filtering/classification architecture of Linux Traffic +Control. +Up to 2.6.8 the only action that could be "attached" to a filter was policing. +i.e you could say something like: + +----- +tc filter add dev lo parent ffff: protocol ip prio 10 u32 match ip src \ +127.0.0.1/32 flowid 1:1 police mtu 4000 rate 1500kbit burst 90k +----- + +which implies "if a packet is seen on the ingress of the lo device with +a source IP address of 127.0.0.1/32 we give it a classification id of 1:1 and +we execute a policing action which rate limits its bandwidth utilization +to 1.5Mbps". + +The new extensions allow for more than just policing actions to be added. +They are also fully backward compatible. If you have a kernel that doesnt +understand them, then the effect is null i.e if you have a newer tc +but older kernel, the actions are not installed. Likewise if you +have a newer kernel but older tc, obviously the tc will use current +syntax which will work fine. Of course to get the required effect you need +both newer tc and kernel. If you are reading this you have the +right tc ;-> + +A side effect is that we can now get stateless firewalling to work with tc. +Essentially this is now an alternative to iptables. +I wont go into details of my dislike for iptables at times, but +scalability is one of the main issues; however, if you need stateful +classification - use netfilter (for now). + +This stuff works on both ingress and egress qdiscs. + +Features +-------- + +1) new additional syntax and actions enabled. Note old syntax is still valid. + +Essentially this is still the same syntax as tc with a new construct +"action". The syntax is of the form: +tc filter add parent 1:0 protocol ip prio 10 +flowid 1:1 action * + +You can have as many actions as you want (within sensible reasoning). + +In the past the only real action was the policer; i.e you could do something +along the lines of: +tc filter add dev lo parent ffff: protocol ip prio 10 u32 \ +match ip src 127.0.0.1/32 flowid 1:1 \ +police mtu 4000 rate 1500kbit burst 90k + +Although you can still use the same syntax, now you can say: + +tc filter add dev lo parent 1:0 protocol ip prio 10 u32 \ +match ip src 127.0.0.1/32 flowid 1:1 \ +action police mtu 4000 rate 1500kbit burst 90k + +" generic Actions" (gact) at the moment are: +{ drop, pass, reclassify, continue} +(If you have others, no listed here give me a reason and we will add them) ++drop says to drop the packet ++pass says to accept it ++reclassify requests for reclassification of the packet ++continue requests for next lookup to match + +2)In order to take advantage of some of the targets written by the +iptables people, a classifier can have a packet being massaged by an +iptable target. I have only tested with mangler targets up to now. +(infact anything that is not in the mangling table is disabled right now) + +In terms of hooks: +*ingress is mapped to pre-routing hook +*egress is mapped to post-routing hook +I dont see much value in the other hooks, if you see it and email me good +reasons, the addition is trivial. + +Example syntax for iptables targets usage becomes: +tc filter add ..... u32 action ipt -j + +example: +tc filter add dev lo parent ffff: protocol ip prio 8 u32 \ +match ip dst 127.0.0.8/32 flowid 1:12 \ +action ipt -j mark --set-mark 2 + +3) A feature i call pipe +The motivation is derived from Unix pipe mechanism but applied to packets. +Essentially take a matching packet and pass it through +action1 | action2 | action3 etc. +You could do something similar to this with the tc policer and the "continue" +operator but this rather restricts it to just the policer and requires +multiple rules (and lookups, hence quiet inefficient); + +as an example -- and please note that this is just an example _not_ The +Word Youve Been Waiting For (yes i have had problems giving examples +which ended becoming dogma in documents and people modifying them a little +to look clever); + +i selected the metering rates to be small so that i can show better how +things work. + +The script below does the following: +- an incoming packet from 10.0.0.21 is first given a firewall mark of 1. + +- It is then metered to make sure it does not exceed its allocated rate of +1Kbps. If it doesnt exceed rate, this is where we terminate action execution. + +- If it does exceed its rate, its "color" changes to a mark of 2 and it is +then passed through a second meter. + +-The second meter is shared across all flows on that device [i am suprised +that this seems to be not a well know feature of the policer; Bert was telling +me that someone was writing a qdisc just to do sharing across multiple devices; +it must be the summer heat again; weve had someone doing that every year around +summer -- the key to sharing is to use a operator "index" in your policer +rules (example "index 20"). All your rules have to use the same index to +share.] + +-If the second meter is exceeded the color of the flow changes further to 3. + +-We then pass the packet to another meter which is shared across all devices +in the system. If this meter is exceeded we drop the packet. + +Note the mark can be used further up the system to do things like policy +or more interesting things on the egress. + +------------------ cut here ------------------------------- +# +# Add an ingress qdisc on eth0 +tc qdisc add dev eth0 ingress +# +#if you see an incoming packet from 10.0.0.21 +tc filter add dev eth0 parent ffff: protocol ip prio 1 \ +u32 match ip src 10.0.0.21/32 flowid 1:15 \ +# +# first give it a mark of 1 +action ipt -j mark --set-mark 1 index 2 \ +# +# then pass it through a policer which allows 1kbps; if the flow +# doesnt exceed that rate, this is where we stop, if it exceeds we +# pipe the packet to the next action +action police rate 1kbit burst 9k pipe \ +# +# which marks the packet fwmark as 2 and pipes +action ipt -j mark --set-mark 2 \ +# +# next attempt to borrow b/width from a meter +# used across all flows incoming on eth0("index 30") +# and if that is exceeded we pipe to the next action +action police index 30 mtu 5000 rate 1kbit burst 10k pipe \ +# mark it as fwmark 3 if exceeded +action ipt -j mark --set-mark 3 \ +# and then attempt to borrow from a meter used by all devices in the +# system. Should this be exceeded, drop the packet on the floor. +action police index 20 mtu 5000 rate 1kbit burst 90k drop +--------------------------------- + +Now lets see the actions installed with +"tc filter show parent ffff: dev eth0" + +-------- output ----------- +jroot# tc filter show parent ffff: dev eth0 +filter protocol ip pref 1 u32 +filter protocol ip pref 1 u32 fh 800: ht divisor 1 +filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:15 + + action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x1 index 2 + + action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb + + action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x2 index 1 + + action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b + + action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x3 index 3 + + action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b + + match 0a000015/ffffffff at 12 +------------------------------- + +Note the ordering of the actions is based on the order in which we entered +them. In the future i will add explicit priorities. + +Now lets run a ping -f from 10.0.0.21 to this host; stop the ping after +you see a few lines of dots + +---- +[root@jzny hadi]# ping -f 10.0.0.22 +PING 10.0.0.22 (10.0.0.22): 56 data bytes +.................................................................................................................................................................................................................................................................................................................................................................................................................................................... +--- 10.0.0.22 ping statistics --- +2248 packets transmitted, 1811 packets received, 19% packet loss +round-trip min/avg/max = 0.7/9.3/20.1 ms +----------------------------- + +Now lets take a look at the stats with "tc -s filter show parent ffff: dev eth0" + +-------------- +jroot# tc -s filter show parent ffff: dev eth0 +filter protocol ip pref 1 u32 +filter protocol ip pref 1 u32 fh 800: ht divisor 1 +filter protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 +5 + + action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x1 index 2 + Sent 188832 bytes 2248 pkts (dropped 0, overlimits 0) + + action order 2: police 1 action pipe rate 1Kbit burst 9Kb mtu 2Kb + Sent 188832 bytes 2248 pkts (dropped 0, overlimits 2122) + + action order 3: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x2 index 1 + Sent 178248 bytes 2122 pkts (dropped 0, overlimits 0) + + action order 4: police 30 action pipe rate 1Kbit burst 10Kb mtu 5000b + Sent 178248 bytes 2122 pkts (dropped 0, overlimits 1945) + + action order 5: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x3 index 3 + Sent 163380 bytes 1945 pkts (dropped 0, overlimits 0) + + action order 6: police 20 action drop rate 1Kbit burst 90Kb mtu 5000b + Sent 163380 bytes 1945 pkts (dropped 0, overlimits 437) + + match 0a000015/ffffffff at 12 +------------------------------- + +Neat, eh? + + +Wanna write an action module? +------------------------------ +Its easy. Either look at the code or send me email. I will document at +some point; will also accept documentation. + +TODO +---- + +Lotsa goodies/features coming. Requests also being accepted. +At the moment the focus has been on getting the architecture in place. +Expect new things in the spurious time i have to work on this +(particularly around end of year when i have typically get time off +from work). + diff --git a/doc/actions/dummy-README b/doc/actions/dummy-README new file mode 100644 index 0000000..3ef9f21 --- /dev/null +++ b/doc/actions/dummy-README @@ -0,0 +1,155 @@ + +Advantage over current IMQ; cleaner in particular in in SMP; +with a _lot_ less code. +Old Dummy device functionality is preserved while new one only +kicks in if you use actions. + +IMQ USES +-------- +As far as i know the reasons listed below is why people use IMQ. +It would be nice to know of anything else that i missed. + +1) qdiscs/policies that are per device as opposed to system wide. +IMQ allows for sharing. + +2) Allows for queueing incoming traffic for shaping instead of +dropping. I am not aware of any study that shows policing is +worse than shaping in achieving the end goal of rate control. +I would be interested if anyone is experimenting. + +3) Very interesting use: if you are serving p2p you may wanna give +preference to your own localy originated traffic (when responses come back) +vs someone using your system to do bittorent. So QoSing based on state +comes in as the solution. What people did to achive this was stick +the IMQ somewhere prelocal hook. +I think this is a pretty neat feature to have in Linux in general. +(i.e not just for IMQ). +But i wont go back to putting netfilter hooks in the device to satisfy +this. I also dont think its worth it hacking dummy some more to be +aware of say L3 info and play ip rule tricks to achieve this. +--> Instead the plan is to have a contrack related action. This action will +selectively either query/create contrack state on incoming packets. +Packets could then be redirected to dummy based on what happens -> eg +on incoming packets; if we find they are of known state we could send to +a different queue than one which didnt have existing state. This +all however is dependent on whatever rules the admin enters. + +At the moment this function does not exist yet. I have decided instead +of sitting on the patch to release it and then if theres pressure i will +add this feature. + +What you can do with dummy currently with actions +-------------------------------------------------- + +Lets say you are policing packets from alias 192.168.200.200/32 +you dont want those to exceed 100kbps going out. + +tc filter add dev eth0 parent 1: protocol ip prio 10 u32 \ +match ip src 192.168.200.200/32 flowid 1:2 \ +action police rate 100kbit burst 90k drop + +If you run tcpdump on eth0 you will see all packets going out +with src 192.168.200.200/32 dropped or not +Extend the rule a little to see only the ones that made it out: + +tc filter add dev eth0 parent 1: protocol ip prio 10 u32 \ +match ip src 192.168.200.200/32 flowid 1:2 \ +action police rate 10kbit burst 90k drop \ +action mirred egress mirror dev dummy0 + +Now fire tcpdump on dummy0 to see only those packets .. +tcpdump -n -i dummy0 -x -e -t + +Essentially a good debugging/logging interface. + +If you replace mirror with redirect, those packets will be +blackholed and will never make it out. This redirect behavior +changes with new patch (but not the mirror). + +What you can do with the patch to provide functionality +that most people use IMQ for below: + +-------- +export TC="/sbin/tc" + +$TC qdisc add dev dummy0 root handle 1: prio +$TC qdisc add dev dummy0 parent 1:1 handle 10: sfq +$TC qdisc add dev dummy0 parent 1:2 handle 20: tbf rate 20kbit buffer 1600 limit 3000 +$TC qdisc add dev dummy0 parent 1:3 handle 30: sfq +$TC filter add dev dummy0 protocol ip pref 1 parent 1: handle 1 fw classid 1:1 +$TC filter add dev dummy0 protocol ip pref 2 parent 1: handle 2 fw classid 1:2 + +ifconfig dummy0 up + +$TC qdisc add dev eth0 ingress + +# redirect all IP packets arriving in eth0 to dummy0 +# use mark 1 --> puts them onto class 1:1 +$TC filter add dev eth0 parent ffff: protocol ip prio 10 u32 \ +match u32 0 0 flowid 1:1 \ +action ipt -j MARK --set-mark 1 \ +action mirred egress redirect dev dummy0 + +-------- + + +Run A Little test: + +from another machine ping so that you have packets going into the box: +----- +[root@jzny action-tests]# ping 10.22 +PING 10.22 (10.0.0.22): 56 data bytes +64 bytes from 10.0.0.22: icmp_seq=0 ttl=64 time=2.8 ms +64 bytes from 10.0.0.22: icmp_seq=1 ttl=64 time=0.6 ms +64 bytes from 10.0.0.22: icmp_seq=2 ttl=64 time=0.6 ms + +--- 10.22 ping statistics --- +3 packets transmitted, 3 packets received, 0% packet loss +round-trip min/avg/max = 0.6/1.3/2.8 ms +[root@jzny action-tests]# +----- +Now look at some stats: + +--- +[root@jmandrake]:~# $TC -s filter show parent ffff: dev eth0 +filter protocol ip pref 10 u32 +filter protocol ip pref 10 u32 fh 800: ht divisor 1 +filter protocol ip pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 + match 00000000/00000000 at 0 + action order 1: tablename: mangle hook: NF_IP_PRE_ROUTING + target MARK set 0x1 + index 1 ref 1 bind 1 installed 4195sec used 27sec + Sent 252 bytes 3 pkts (dropped 0, overlimits 0) + + action order 2: mirred (Egress Redirect to device dummy0) stolen + index 1 ref 1 bind 1 installed 165 sec used 27 sec + Sent 252 bytes 3 pkts (dropped 0, overlimits 0) + +[root@jmandrake]:~# $TC -s qdisc +qdisc sfq 30: dev dummy0 limit 128p quantum 1514b + Sent 0 bytes 0 pkts (dropped 0, overlimits 0) +qdisc tbf 20: dev dummy0 rate 20Kbit burst 1575b lat 2147.5s + Sent 210 bytes 3 pkts (dropped 0, overlimits 0) +qdisc sfq 10: dev dummy0 limit 128p quantum 1514b + Sent 294 bytes 3 pkts (dropped 0, overlimits 0) +qdisc prio 1: dev dummy0 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 + Sent 504 bytes 6 pkts (dropped 0, overlimits 0) +qdisc ingress ffff: dev eth0 ---------------- + Sent 308 bytes 5 pkts (dropped 0, overlimits 0) + +[root@jmandrake]:~# ifconfig dummy0 +dummy0 Link encap:Ethernet HWaddr 00:00:00:00:00:00 + inet6 addr: fe80::200:ff:fe00:0/64 Scope:Link + UP BROADCAST RUNNING NOARP MTU:1500 Metric:1 + RX packets:6 errors:0 dropped:3 overruns:0 frame:0 + TX packets:3 errors:0 dropped:0 overruns:0 carrier:0 + collisions:0 txqueuelen:32 + RX bytes:504 (504.0 b) TX bytes:252 (252.0 b) +----- + +Dummy continues to behave like it always did. +You send it any packet not originating from the actions it will drop them. +[In this case the three dropped packets were ipv6 ndisc]. + +cheers, +jamal diff --git a/doc/actions/mirred-usage b/doc/actions/mirred-usage index 3e135a0..aa942e5 100644 --- a/doc/actions/mirred-usage +++ b/doc/actions/mirred-usage @@ -66,6 +66,6 @@ action mirred egress mirror dev eth1 --- A more interesting example is when you mirror flows to a dummy device -so you could tcpdump them (dummy by defaults drops all devices it sees). +so you could tcpdump them (dummy by defaults drops all packets it sees). This is a very useful debug feature. diff --git a/etc/iproute2/ematch_map b/etc/iproute2/ematch_map new file mode 100644 index 0000000..7c6a281 --- /dev/null +++ b/etc/iproute2/ematch_map @@ -0,0 +1,5 @@ +# lookup table for ematch kinds +1 cmp +2 nbyte +3 u32 +4 meta diff --git a/etc/iproute2/rt_dsfield b/etc/iproute2/rt_dsfield index 2b36e49..110061a 100644 --- a/etc/iproute2/rt_dsfield +++ b/etc/iproute2/rt_dsfield @@ -1,15 +1,13 @@ -#0x10 lowdelay -#0x08 throughput -#0x04 reliability - +0x10 lowdelay +0x08 throughput +0x04 reliability # This value overlap with ECT, do not use it! -#0x02 mincost - +0x02 mincost # These values seems do not want to die, Cisco likes them by a strange reason. -#0x20 priority -#0x40 immediate -#0x60 flash -#0x80 flash-override -#0xa0 critical -#0xc0 internet -#0xe0 network +0x20 priority +0x40 immediate +0x60 flash +0x80 flash-override +0xa0 critical +0xc0 internet +0xe0 network diff --git a/etc/iproute2/rt_protos b/etc/iproute2/rt_protos index 2569edf..5304770 100644 --- a/etc/iproute2/rt_protos +++ b/etc/iproute2/rt_protos @@ -1,26 +1,29 @@ # # Reserved protocols. # -#0 unspec -#1 redirect -#2 kernel -#3 boot -#4 static -#8 gated -#9 ra -#10 mrt -#11 zebra -#12 bird +0 unspec +1 redirect +2 kernel +3 boot +4 static +8 gated +9 ra +10 mrt +11 zebra +12 bird +13 dnrouted +14 xorp +15 ntk # # Used by me for gated # -#254 gated/aggr -#253 gated/bgp -#252 gated/ospf -#251 gated/ospfase -#250 gated/rip -#249 gated/static -#248 gated/conn -#247 gated/inet -#246 gated/default +254 gated/aggr +253 gated/bgp +252 gated/ospf +251 gated/ospfase +250 gated/rip +249 gated/static +248 gated/conn +247 gated/inet +246 gated/default diff --git a/etc/iproute2/rt_realms b/etc/iproute2/rt_realms index 332179d..eedd76d 100644 --- a/etc/iproute2/rt_realms +++ b/etc/iproute2/rt_realms @@ -1,7 +1,7 @@ # # reserved values # -#0 cosmos +0 cosmos # # local # diff --git a/etc/iproute2/rt_scopes b/etc/iproute2/rt_scopes index 36fbc01..8514bc1 100644 --- a/etc/iproute2/rt_scopes +++ b/etc/iproute2/rt_scopes @@ -1,12 +1,11 @@ # # reserved values # -#0 global -#255 nowhere -#254 host -#253 link - +0 global +255 nowhere +254 host +253 link # # pseudo-reserved # -#200 site +200 site diff --git a/etc/iproute2/rt_tables b/etc/iproute2/rt_tables index 558716b..541abfd 100644 --- a/etc/iproute2/rt_tables +++ b/etc/iproute2/rt_tables @@ -1,10 +1,10 @@ # # reserved values # -#255 local -#254 main -#253 default -#0 unspec +255 local +254 main +253 default +0 unspec # # local # diff --git a/examples/README.cbq b/examples/README.cbq new file mode 100644 index 0000000..38c1089 --- /dev/null +++ b/examples/README.cbq @@ -0,0 +1,122 @@ +# CHANGES +# ------- +# v0.3a2- fixed bug in "if" operator. Thanks kad@dgtu.donetsk.ua. +# v0.3a- added TIME parameter. Example: +# TIME=00:00-19:00;64Kbit/6Kbit +# So, between 00:00 and 19:00 RATE will be 64Kbit. +# Just start "cbq.init timecheck" periodically from cron (every 10 +# minutes for example). +# !!! Anyway you MUST start "cbq.init start" for CBQ initialize. +# v0.2 - Some cosmetique changes. Now it more compatible with +# old bash version. Thanks to Stanislav V. Voronyi +# . +# v0.1 - First public release +# +# README +# ------ +# +# First of all - this is just a SIMPLE EXAMPLE of CBQ power. +# Don't ask me "why" and "how" :) +# +# This is an example of using CBQ (Class Based Queueing) and policy-based +# filter for building smart ethernet shapers. All CBQ parameters are +# correct only for ETHERNET (eth0,1,2..) linux interfaces. It works for +# ARCNET too (just set bandwidth parameter to 2Mbit). It was tested +# on 2.1.125-2.1.129 linux kernels (KSI linux, Nostromo version) and +# ip-route utility by A.Kuznetsov (iproute2-ss981101 version). +# You can download ip-route from ftp://ftp.inr.ac.ru/ip-routing or +# get iproute2*.rpm (compiled with glibc) from ftp.ksi-linux.com. +# +# +# HOW IT WORKS +# +# Each shaper must be described by config file in $CBQ_PATH +# (/etc/sysconfig/cbq/) directory - one config file for each CBQ shaper. +# +# Some words about config file name: +# Each shaper has its personal ID - two byte HEX number. Really ID is +# CBQ class. +# So, filename looks like: +# +# cbq-1280.My_first_shaper +# ^^^ ^^^ ^^^^^^^^^^^^^ +# | | |______ Shaper name - any word +# | |___________________ ID (0000-FFFF), let ID looks like shaper's rate +# |______________________ Filename must begin from "cbq-" +# +# +# Config file describes shaper parameters and source[destination] +# address[port]. +# For example let's prepare /etc/sysconfig/cbq/cbq-1280.My_first_shaper: +# +# ----------8<--------------------- +# DEVICE=eth0,10Mbit,1Mbit +# RATE=128Kbit +# WEIGHT=10Kbit +# PRIO=5 +# RULE=192.168.1.0/24 +# ----------8<--------------------- +# +# This is minimal configuration, where: +# DEVICE: eth0 - device where we do control our traffic +# 10Mbit - REAL ethernet card bandwidth +# 1Mbit - "weight" of :1 class (parent for all shapers for eth0), +# as a rule of thumb weight=batdwidth/10. +# 100Mbit adapter's example: DEVICE=eth0,100Mbit,10Mbit +# *** If you want to build more than one shaper per device it's +# enough to describe bandwidth and weight once - cbq.init +# is smart :) You can put only 'DEVICE=eth0' into cbq-* +# config file for eth0. +# +# RATE: Shaper's speed - Kbit,Mbit or bps (bytes per second) +# +# WEIGHT: "weight" of shaper (CBQ class). Like for DEVICE - approx. RATE/10 +# +# PRIO: shaper's priority from 1 to 8 where 1 is the highest one. +# I do always use "5" for all my shapers. +# +# RULE: [source addr][:source port],[dest addr][:dest port] +# Some examples: +# RULE=10.1.1.0/24:80 - all traffic for network 10.1.1.0 to port 80 +# will be shaped. +# RULE=10.2.2.5 - shaper works only for IP address 10.2.2.5 +# RULE=:25,10.2.2.128/25:5000 - all traffic from any address and port 25 to +# address 10.2.2.128 - 10.2.2.255 and port 5000 +# will be shaped. +# RULE=10.5.5.5:80, - shaper active only for traffic from port 80 of +# address 10.5.5.5 +# Multiple RULE fields per one config file are allowed. For example: +# RULE=10.1.1.2:80 +# RULE=10.1.1.2:25 +# RULE=10.1.1.2:110 +# +# *** ATTENTION!!! +# All shapers do work only for outgoing traffic! +# So, if you want to build bidirectional shaper you must set it up for +# both ethernet card. For example let's build shaper for our linux box like: +# +# --------- 192.168.1.1 +# BACKBONE -----eth0-| linux |-eth1------*[our client] +# --------- +# +# Let all traffic from backbone to client will be shaped at 28Kbit and +# traffic from client to backbone - at 128Kbit. We need two config files: +# +# ---8<-----/etc/sysconfig/cbq/cbq-28.client-out---- +# DEVICE=eth1,10Mbit,1Mbit +# RATE=28Kbit +# WEIGHT=2Kbit +# PRIO=5 +# RULE=192.168.1.1 +# ---8<--------------------------------------------- +# +# ---8<-----/etc/sysconfig/cbq/cbq-128.client-in---- +# DEVICE=eth0,10Mbit,1Mbit +# RATE=128Kbit +# WEIGHT=10Kbit +# PRIO=5 +# RULE=192.168.1.1, +# ---8<--------------------------------------------- +# ^pay attention to "," - this is source address! +# +# Enjoy. diff --git a/examples/cbq.init-v0.7.3 b/examples/cbq.init-v0.7.3 new file mode 100644 index 0000000..888aba4 --- /dev/null +++ b/examples/cbq.init-v0.7.3 @@ -0,0 +1,984 @@ +#!/bin/bash +# +# cbq.init v0.7.3 +# Copyright (C) 1999 Pavel Golubev +# Copyright (C) 2001-2004 Lubomir Bulej +# +# chkconfig: 2345 11 89 +# description: sets up CBQ-based traffic control +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# To get the latest version, check on Freshmeat for actual location: +# +# http://freshmeat.net/projects/cbq.init +# +# +# VERSION HISTORY +# --------------- +# v0.7.3- Deepak Singhal +# - fix timecheck to not ignore regular TIME rules after +# encountering a TIME rule that spans over midnight +# - Nathan Shafer +# - allow symlinks to class files +# - Seth J. Blank +# - replace hardcoded ip/tc location with variables +# - Mark Davis +# - allow setting of PRIO_{MARK,RULE,REALM} in class file +# - Fernando Sanch +# - allow underscores in interface names +# v0.7.2- Paulo Sedrez +# - fix time2abs to allow hours with leading zero in TIME rules +# - Svetlin Simeonov +# - fix cbq_device_list to allow VLAN interfaces +# - Mark Davis +# - ignore *~ backup files when looking for classes +# - Mike Boyer +# - fix to allow arguments to be passed to "restart" command +# v0.7.1- Lubomir Bulej +# - default value for PERTURB +# - fixed small bug in RULE parser to correctly parse rules with +# identical source and destination fields +# - faster initial scanning of DEVICE fields +# v0.7 - Lubomir Bulej +# - lots of various cleanups and reorganizations; the parsing is now +# some 40% faster, but the class ID must be in range 0x0002-0xffff +# (again). Because of the number of internal changes and the above +# class ID restriction, I bumped the version to 0.7 to indicate +# something might have got broken :) +# - changed PRIO_{U32,FW,ROUTE} to PRIO_{RULE,MARK,REALM} +# for consistency with filter keywords +# - exposed "compile" command +# - Catalin Petrescu +# - support for port masks in RULE (u32) filter +# - Jordan Vrtanoski +# - support for week days in TIME rules +# v0.6.4- Lubomir Bulej +# - added PRIO_* variables to allow easy control of filter priorities +# - added caching to speed up CBQ start, the cache is invalidated +# whenever any of the configuration files changes +# - updated the readme section + some cosmetic fixes +# v0.6.3- Lubomir Bulej +# - removed setup of (unnecessary) class 1:1 - all classes +# now use qdisc's default class 1:0 as their parent +# - minor fix in the timecheck branch - classes +# without leaf qdisc were not updated +# - minor fix to avoid timecheck failure when run +# at time with minutes equal to 08 or 09 +# - respect CBQ_PATH setting in environment +# - made PRIO=5 default, rendering it optional in configs +# - added support for route filter, see notes about REALM keyword +# - added support for fw filter, see notes about MARK keyword +# - added filter display to "list" and "stats" commands +# - readme section update + various cosmetic fixes +# v0.6.2- Catalin Petrescu +# - added tunnels interface handling +# v0.6.1- Pavel Golubev +# - added sch_prio module loading +# (thanks johan at iglo.virtual.or.id for reminding) +# - resolved errors resulting from stricter syntax checking in bash2 +# - Lubomir Bulej +# - various cosmetic fixes +# v0.6 - Lubomir Bulej +# - attempt to limit number of spawned processes by utilizing +# more of sed power (use sed instead of grep+cut) +# - simplified TIME parser, using bash builtins +# - added initial support for SFQ as leaf qdisc +# - reworked the documentation part a little +# - incorporated pending patches and ideas submitted by +# following people for versions 0.3 into version 0.6 +# - Miguel Freitas +# - in case of overlapping TIME parameters, the last match is taken +# - Juanjo Ciarlante +# - chkconfig tags, list + stats startup parameters +# - optional tc & ip command logging (into /var/run/cbq-*) +# - Rafal Maszkowski +# - PEAK parameter for setting TBF's burst peak rate +# - fix for many config files (use find instead of ls) +# v0.5.1- Lubomir Bulej +# - fixed little but serious bug in RULE parser +# v0.5 - Lubomir Bulej +# - added options PARENT, LEAF, ISOLATED and BOUNDED. This allows +# (with some attention to config file ordering) for creating +# hierarchical structures of shapers with classes able (or unable) +# to borrow bandwidth from their parents. +# - class ID check allows hexadecimal numbers +# - rewritten & simplified RULE parser +# - cosmetic changes to improve readability +# - reorganization to avoid duplicate code (timecheck etc.) +# - timecheck doesn't check classes without TIME fields anymore +# v0.4 - Lubomir Bulej +# - small bugfix in RULE parsing code +# - simplified configuration parsing code +# - several small cosmetic changes +# - TIME parameter can be now specified more than once allowing you to +# differentiate RATE throughout the whole day. Time overlapping is +# not checked, first match is taken. Midnight wrap (eg. 20:00-6:00) +# is allowed and taken care of. +# v0.3a4- fixed small bug in IF operator. Thanks to +# Rafal Maszkowski +# v0.3a3- fixed grep bug when using more than 10 eth devices. Thanks to David +# Trcka . +# v0.3a2- fixed bug in "if" operator. Thanks kad at dgtu.donetsk.ua. +# v0.3a - added TIME parameter. Example: TIME=00:00-19:00;64Kbit/6Kbit +# So, between 00:00 and 19:00 the RATE will be 64Kbit. +# Just start "cbq.init timecheck" periodically from cron +# (every 10 minutes for example). DON'T FORGET though, to run +# "cbq.init start" for CBQ to initialize. +# v0.2 - Some cosmetic changes. Now it is more compatible with old bash +# version. Thanks to Stanislav V. Voronyi . +# v0.1 - First public release +# +# +# README +# ------ +# +# First of all - this is just a SIMPLE EXAMPLE of CBQ power. +# Don't ask me "why" and "how" :) +# +# This script is meant to simplify setup and management of relatively simple +# CBQ-based traffic control on Linux. Access to advanced networking features +# of Linux kernel is provided by "ip" and "tc" utilities from A. Kuznetsov's +# iproute2 package, available at ftp://ftp.inr.ac.ru/ip-routing. Because the +# utilities serve primarily to translate user wishes to RTNETLINK commands, +# their interface is rather spartan, intolerant and requires quite a lot of +# typing. And typing is what this script attempts to reduce :) +# +# The advanced networking stuff in Linux is pretty flexible and this script +# aims to bring some of its features to the not-so-hard-core Linux users. Of +# course, there is a tradeoff between simplicity and flexibility and you may +# realize that the flexibility suffered too much for your needs -- time to +# face "ip" and "tc" interface. +# +# To speed up the "start" command, simple caching was introduced in version +# 0.6.4. The caching works so that the sequence of "tc" commands for given +# configuration is stored in a file (/var/cache/cbq.init by default) which +# is used next time the "start" command is run to avoid repeated parsing of +# configuration files. This cache is invalidated whenever any of the CBQ +# configuration files changes. If you want to run "cbq.init start" without +# caching, run it as "cbq.init start nocache". If you want to force cache +# invalidation, run it as "cbq.init start invalidate". Caching is disabled +# if you have logging enabled (ie. CBQ_DEBUG is not empty). +# +# If you only want cqb.init to translate your configuration to "tc" commands, +# use "compile" command which will output "tc" commands required to build +# your configuration. Bear in mind that "compile" does not check if the "tc" +# commands were successful - this is done (in certain places) only when the +# "start nocache" command is used, which is also useful when creating the +# configuration to check whether it is completely valid. +# +# All CBQ parameters are valid for Ethernet interfaces only, The script was +# tested on various Linux kernel versions from series 2.1 to 2.4 and several +# distributions with KSI Linux (Nostromo version) as the premier one. +# +# +# HOW DOES IT WORK? +# ----------------- +# +# Every traffic class must be described by a file in the $CBQ_PATH directory +# (/etc/sysconfig/cbq by default) - one file per class. +# +# The config file names must obey mandatory format: cbq-. where +# is two-byte hexadecimal number in range <0002-FFFF> (which in fact +# is a CBQ class ID) and is the name of the class -- anything to help +# you distinguish the configuration files. For small amount of classes it is +# often possible (and convenient) to let resemble bandwidth of the +# class. +# +# Example of valid config name: +# cbq-1280.My_first_shaper +# +# +# The configuration file may contain the following parameters: +# +### Device parameters +# +# DEVICE=,[,] mandatory +# DEVICE=eth0,10Mbit,1Mbit +# +# is the name of the interface you want to control +# traffic on, e.g. eth0 +# is the physical bandwidth of the device, e.g. for +# ethernet 10Mbit or 100Mbit, for arcnet 2Mbit +# is tuning parameter that should be proportional to +# . As a rule of thumb: = / 10 +# +# When you have more classes on one interface, it is enough to specify +# [and ] only once, therefore in other files you only +# need to set DEVICE=. +# +### Class parameters +# +# RATE= mandatory +# RATE=5Mbit +# +# Bandwidth allocated to the class. Traffic going through the class is +# shaped to conform to specified rate. You can use Kbit, Mbit or bps, +# Kbps and Mbps as suffices. If you don't specify any unit, bits/sec +# are used. Also note that "bps" means "bytes per second", not bits. +# +# WEIGHT= mandatory +# WEIGHT=500Kbit +# +# Tuning parameter that should be proportional to RATE. As a rule +# of thumb, use WEIGHT ~= RATE / 10. +# +# PRIO=<1-8> optional, default 5 +# PRIO=5 +# +# Priority of class traffic. The higher the number, the lesser +# the priority. Priority of 5 is just fine. +# +# PARENT= optional, default not set +# PARENT=1280 +# +# Specifies ID of the parent class to which you want this class be +# attached. You might want to use LEAF=none for the parent class as +# mentioned below. By using this parameter and carefully ordering the +# configuration files, it is possible to create simple hierarchical +# structures of CBQ classes. The ordering is important so that parent +# classes are constructed prior to their children. +# +# LEAF=none|tbf|sfq optional, default "tbf" +# +# Tells the script to attach specified leaf queueing discipline to CBQ +# class. By default, TBF is used. Note that attaching TBF to CBQ class +# shapes the traffic to conform to TBF parameters and prevents the class +# from borrowing bandwidth from its parent even if you have BOUNDED set +# to "no". To allow the class to borrow bandwith (provided it is not +# bounded), you must set LEAF to "none" or "sfq". +# +# If you want to ensure (approximately) fair sharing of bandwidth among +# several hosts in the same class, you might want to specify LEAF=sfq to +# attach SFQ as leaf queueing discipline to that class. +# +# BOUNDED=yes|no optional, default "yes" +# +# If set to "yes", the class is not allowed to borrow bandwidth from +# its parent class in overlimit situation. If set to "no", the class +# will be allowed to borrow bandwidth from its parent. +# +# Note: Don't forget to set LEAF to "none" or "sfq", otherwise the class will +# have TBF attached to itself and will not be able to borrow unused +# bandwith from its parent. +# +# ISOLATED=yes|no optional, default "no" +# +# If set to "yes", the class will not lend unused bandwidth to +# its children. +# +### TBF qdisc parameters +# +# BUFFER=[/] optional, default "10Kb/8" +# +# This parameter controls the depth of the token bucket. In other +# words it represents the maximal burst size the class can send. +# The optional part of parameter is used to determine the length +# of intervals in packet sizes, for which the transmission times +# are kept. +# +# LIMIT= optional, default "15Kb" +# +# This parameter determines the maximal length of backlog. If +# the queue contains more data than specified by LIMIT, the +# newly arriving packets are dropped. The length of backlog +# determines queue latency in case of congestion. +# +# PEAK= optional, default not set +# +# Maximal peak rate for short-term burst traffic. This allows you +# to control the absolute peak rate the class can send at, because +# single TBF that allows 256Kbit/s would of course allow rate of +# 512Kbit for half a second or 1Mbit for a quarter of second. +# +# MTU= optional, default "1500" +# +# Maximum number of bytes that can be sent at once over the +# physical medium. This parameter is required when you specify +# PEAK parameter. It defaults to MTU of ethernet - for other +# media types you might want to change it. +# +# Note: Setting TBF as leaf qdisc will effectively prevent the class from +# borrowing bandwidth from the ancestor class, because even if the +# class allows more traffic to pass through, it is then shaped to +# conform to TBF. +# +### SFQ qdisc parameters +# +# The SFQ queueing discipline is a cheap way for sharing class bandwidth +# among several hosts. As it is stochastic, the fairness is approximate but +# it will do the job in most cases. If you want real fairness, you should +# probably use WRR (weighted round robin) or WFQ queueing disciplines. Note +# that SFQ does not do any traffic shaping - the shaping is done by the CBQ +# class the SFQ is attached to. +# +# QUANTUM= optional, default not set +# +# This parameter should not be set lower than link MTU, for ethernet +# it is 1500b, or (with MAC header) 1514b which is the value used +# in Alexey Kuznetsov's examples. +# +# PERTURB= optional, default "10" +# +# Period of hash function perturbation. If unset, hash reconfiguration +# will never take place which is what you probably don't want. The +# default value of 10 seconds is probably a good one. +# +### Filter parameters +# +# RULE=[[saddr[/prefix]][:port[/mask]],][daddr[/prefix]][:port[/mask]] +# +# These parameters make up "u32" filter rules that select traffic for +# each of the classes. You can use multiple RULE fields per config. +# +# The optional port mask should only be used by advanced users who +# understand how the u32 filter works. +# +# Some examples: +# +# RULE=10.1.1.0/24:80 +# selects traffic going to port 80 in network 10.1.1.0 +# +# RULE=10.2.2.5 +# selects traffic going to any port on single host 10.2.2.5 +# +# RULE=10.2.2.5:20/0xfffe +# selects traffic going to ports 20 and 21 on host 10.2.2.5 +# +# RULE=:25,10.2.2.128/26:5000 +# selects traffic going from anywhere on port 50 to +# port 5000 in network 10.2.2.128 +# +# RULE=10.5.5.5:80, +# selects traffic going from port 80 of single host 10.5.5.5 +# +# +# +# REALM=[srealm,][drealm] +# +# These parameters make up "route" filter rules that classify traffic +# according to packet source/destination realms. For information about +# realms, see Alexey Kuznetsov's IP Command Reference. This script +# does not define any realms, it justs builds "tc filter" commands +# for you if you need to classify traffic this way. +# +# Realm is either a decimal number or a string referencing entry in +# /etc/iproute2/rt_realms (usually). +# +# Some examples: +# +# REALM=russia,internet +# selects traffic going from realm "russia" to realm "internet" +# +# REALM=freenet, +# selects traffic going from realm "freenet" +# +# REALM=10 +# selects traffic going to realm 10 +# +# +# +# MARK= +# +# These parameters make up "fw" filter rules that select traffic for +# each of the classes accoring to firewall "mark". Mark is a decimal +# number packets are tagged with if firewall rules say so. You can +# use multiple MARK fields per config. +# +# +# Note: Rules for different filter types can be combined. Attention must be +# paid to the priority of filter rules, which can be set below using +# PRIO_{RULE,MARK,REALM} variables. +# +### Time ranging parameters +# +# TIME=[,, ...,/]-;/[/] +# TIME=0,1,2,5/18:00-06:00;256Kbit/25Kbit +# TIME=60123/18:00-06:00;256Kbit/25Kbit +# TIME=18:00-06:00;256Kbit/25Kbit +# +# This parameter allows you to differentiate the class bandwidth +# throughout the day. You can specify multiple TIME parameters, if +# the times overlap, last match is taken. The fields , +# and correspond to parameters RATE, WEIGHT and PEAK (which +# is optional and applies to TBF leaf qdisc only). +# +# You can also specify days of week when the TIME rule applies. +# is numeric, 0 corresponds to sunday, 1 corresponds to monday, etc. +# +### +# +# Sample configuration file: cbq-1280.My_first_shaper +# +# -------------------------------------------------------------------------- +# DEVICE=eth0,10Mbit,1Mbit +# RATE=128Kbit +# WEIGHT=10Kbit +# PRIO=5 +# RULE=192.128.1.0/24 +# -------------------------------------------------------------------------- +# +# The configuration says that we will control traffic on 10Mbit ethernet +# device eth0 and the traffic going to network 192.168.1.0 will be +# processed with priority 5 and shaped to rate of 128Kbit. +# +# Note that you can control outgoing traffic only. If you want to control +# traffic in both directions, you must set up CBQ for both interfaces. +# +# Consider the following example: +# +# +---------+ 192.168.1.1 +# BACKBONE -----eth0-| linux |-eth1------*-[client] +# +---------+ +# +# Imagine you want to shape traffic from backbone to the client to 28Kbit +# and traffic in the opposite direction to 128Kbit. You need to setup CBQ +# on both eth0 and eth1 interfaces, thus you need two config files: +# +# cbq-028.backbone-client +# -------------------------------------------------------------------------- +# DEVICE=eth1,10Mbit,1Mbit +# RATE=28Kbit +# WEIGHT=2Kbit +# PRIO=5 +# RULE=192.168.1.1 +# -------------------------------------------------------------------------- +# +# cbq-128.client-backbone +# -------------------------------------------------------------------------- +# DEVICE=eth0,10Mbit,1Mbit +# RATE=128Kbit +# WEIGHT=10Kbit +# PRIO=5 +# RULE=192.168.1.1, +# -------------------------------------------------------------------------- +# +# Pay attention to comma "," in the RULE field - it denotes source address! +# +# Enjoy. +# +############################################################################# + +export LC_ALL=C + +### Command locations +TC=/sbin/tc +IP=/sbin/ip +MP=/sbin/modprobe + +### Default filter priorities (must be different) +PRIO_RULE_DEFAULT=${PRIO_RULE:-100} +PRIO_MARK_DEFAULT=${PRIO_MARK:-200} +PRIO_REALM_DEFAULT=${PRIO_REALM:-300} + +### Default CBQ_PATH & CBQ_CACHE settings +CBQ_PATH=${CBQ_PATH:-/etc/sysconfig/cbq} +CBQ_CACHE=${CBQ_CACHE:-/var/cache/cbq.init} + +### Uncomment to enable logfile for debugging +#CBQ_DEBUG="/var/run/cbq-$1" + +### Modules to probe for. Uncomment the last CBQ_PROBE +### line if you have QoS support compiled into kernel +CBQ_PROBE="sch_cbq sch_tbf sch_sfq sch_prio" +CBQ_PROBE="$CBQ_PROBE cls_fw cls_u32 cls_route" +#CBQ_PROBE="" + +### Keywords required for qdisc & class configuration +CBQ_WORDS="DEVICE|RATE|WEIGHT|PRIO|PARENT|LEAF|BOUNDED|ISOLATED" +CBQ_WORDS="$CBQ_WORDS|PRIO_MARK|PRIO_RULE|PRIO_REALM|BUFFER" +CBQ_WORDS="$CBQ_WORDS|LIMIT|PEAK|MTU|QUANTUM|PERTURB" + +### Source AVPKT if it exists +[ -r /etc/sysconfig/cbq/avpkt ] && . /etc/sysconfig/cbq/avpkt +AVPKT=${AVPKT:-3000} + + +############################################################################# +############################# SUPPORT FUNCTIONS ############################# +############################################################################# + +### Get list of network devices +cbq_device_list () { + ip link show| sed -n "/^[0-9]/ \ + { s/^[0-9]\+: \([a-z0-9._]\+\)[:@].*/\1/; p; }" +} # cbq_device_list + + +### Remove root class from device $1 +cbq_device_off () { + tc qdisc del dev $1 root 2> /dev/null +} # cbq_device_off + + +### Remove CBQ from all devices +cbq_off () { + for dev in `cbq_device_list`; do + cbq_device_off $dev + done +} # cbq_off + + +### Prefixed message +cbq_message () { + echo -e "**CBQ: $@" +} # cbq_message + +### Failure message +cbq_failure () { + cbq_message "$@" + exit 1 +} # cbq_failure + +### Failure w/ cbq-off +cbq_fail_off () { + cbq_message "$@" + cbq_off + exit 1 +} # cbq_fail_off + + +### Convert time to absolute value +cbq_time2abs () { + local min=${1##*:}; min=${min##0} + local hrs=${1%%:*}; hrs=${hrs##0} + echo $[hrs*60 + min] +} # cbq_time2abs + + +### Display CBQ setup +cbq_show () { + for dev in `cbq_device_list`; do + [ `tc qdisc show dev $dev| wc -l` -eq 0 ] && continue + echo -e "### $dev: queueing disciplines\n" + tc $1 qdisc show dev $dev; echo + + [ `tc class show dev $dev| wc -l` -eq 0 ] && continue + echo -e "### $dev: traffic classes\n" + tc $1 class show dev $dev; echo + + [ `tc filter show dev $dev| wc -l` -eq 0 ] && continue + echo -e "### $dev: filtering rules\n" + tc $1 filter show dev $dev; echo + done +} # cbq_show + + +### Check configuration and load DEVICES, DEVFIELDS and CLASSLIST from $1 +cbq_init () { + ### Get a list of configured classes + CLASSLIST=`find $1 \( -type f -or -type l \) -name 'cbq-*' \ + -not -name '*~' -maxdepth 1 -printf "%f\n"| sort` + [ -z "$CLASSLIST" ] && + cbq_failure "no configuration files found in $1!" + + ### Gather all DEVICE fields from $1/cbq-* + DEVFIELDS=`find $1 \( -type f -or -type l \) -name 'cbq-*' \ + -not -name '*~' -maxdepth 1| xargs sed -n 's/#.*//; \ + s/[[:space:]]//g; /^DEVICE=[^,]*,[^,]*\(,[^,]*\)\?/ \ + { s/.*=//; p; }'| sort -u` + [ -z "$DEVFIELDS" ] && + cbq_failure "no DEVICE field found in $1/cbq-*!" + + ### Check for different DEVICE fields for the same device + DEVICES=`echo "$DEVFIELDS"| sed 's/,.*//'| sort -u` + [ `echo "$DEVICES"| wc -l` -ne `echo "$DEVFIELDS"| wc -l` ] && + cbq_failure "different DEVICE fields for single device!\n$DEVFIELDS" +} # cbq_init + + +### Load class configuration from $1/$2 +cbq_load_class () { + CLASS=`echo $2| sed 's/^cbq-0*//; s/^\([0-9a-fA-F]\+\).*/\1/'` + CFILE=`sed -n 's/#.*//; s/[[:space:]]//g; /^[[:alnum:]_]\+=[[:alnum:].,:;/*@-_]\+$/ p' $1/$2` + + ### Check class number + IDVAL=`/usr/bin/printf "%d" 0x$CLASS 2> /dev/null` + [ $? -ne 0 -o $IDVAL -lt 2 -o $IDVAL -gt 65535 ] && + cbq_fail_off "class ID of $2 must be in range <0002-FFFF>!" + + ### Set defaults & load class + RATE=""; WEIGHT=""; PARENT=""; PRIO=5 + LEAF=tbf; BOUNDED=yes; ISOLATED=no + BUFFER=10Kb/8; LIMIT=15Kb; MTU=1500 + PEAK=""; PERTURB=10; QUANTUM="" + + PRIO_RULE=$PRIO_RULE_DEFAULT + PRIO_MARK=$PRIO_MARK_DEFAULT + PRIO_REALM=$PRIO_REALM_DEFAULT + + eval `echo "$CFILE"| grep -E "^($CBQ_WORDS)="` + + ### Require RATE/WEIGHT + [ -z "$RATE" -o -z "$WEIGHT" ] && + cbq_fail_off "missing RATE or WEIGHT in $2!" + + ### Class device + DEVICE=${DEVICE%%,*} + [ -z "$DEVICE" ] && cbq_fail_off "missing DEVICE field in $2!" + + BANDWIDTH=`echo "$DEVFIELDS"| sed -n "/^$DEVICE,/ \ + { s/[^,]*,\([^,]*\).*/\1/; p; q; }"` + + ### Convert to "tc" options + PEAK=${PEAK:+peakrate $PEAK} + PERTURB=${PERTURB:+perturb $PERTURB} + QUANTUM=${QUANTUM:+quantum $QUANTUM} + + [ "$BOUNDED" = "no" ] && BOUNDED="" || BOUNDED="bounded" + [ "$ISOLATED" = "yes" ] && ISOLATED="isolated" || ISOLATED="" +} # cbq_load_class + + +############################################################################# +#################################### INIT ################################### +############################################################################# + +### Check for presence of ip-route2 in usual place +[ -x $TC -a -x $IP ] || + cbq_failure "ip-route2 utilities not installed or executable!" + + +### ip/tc wrappers +if [ "$1" = "compile" ]; then + ### no module probing + CBQ_PROBE="" + + ip () { + $IP "$@" + } # ip + + ### echo-only version of "tc" command + tc () { + echo "$TC $@" + } # tc + +elif [ -n "$CBQ_DEBUG" ]; then + echo -e "# `date`" > $CBQ_DEBUG + + ### Logging version of "ip" command + ip () { + echo -e "\n# ip $@" >> $CBQ_DEBUG + $IP "$@" 2>&1 | tee -a $CBQ_DEBUG + } # ip + + ### Logging version of "tc" command + tc () { + echo -e "\n# tc $@" >> $CBQ_DEBUG + $TC "$@" 2>&1 | tee -a $CBQ_DEBUG + } # tc +else + ### Default wrappers + + ip () { + $IP "$@" + } # ip + + tc () { + $TC "$@" + } # tc +fi # ip/tc wrappers + + +case "$1" in + +############################################################################# +############################### START/COMPILE ############################### +############################################################################# + +start|compile) + +### Probe QoS modules (start only) +for module in $CBQ_PROBE; do + $MP $module || cbq_failure "failed to load module $module" +done + +### If we are in compile/nocache/logging mode, don't bother with cache +if [ "$1" != "compile" -a "$2" != "nocache" -a -z "$CBQ_DEBUG" ]; then + VALID=1 + + ### validate the cache + [ "$2" = "invalidate" -o ! -f $CBQ_CACHE ] && VALID=0 + if [ $VALID -eq 1 ]; then + [ `find $CBQ_PATH -maxdepth 1 -newer $CBQ_CACHE| \ + wc -l` -gt 0 ] && VALID=0 + fi + + ### compile the config if the cache is invalid + if [ $VALID -ne 1 ]; then + $0 compile > $CBQ_CACHE || + cbq_fail_off "failed to compile CBQ configuration!" + fi + + ### run the cached commands + exec /bin/sh $CBQ_CACHE 2> /dev/null +fi + +### Load DEVICES, DEVFIELDS and CLASSLIST +cbq_init $CBQ_PATH + + +### Setup root qdisc on all configured devices +for dev in $DEVICES; do + ### Retrieve device bandwidth and, optionally, weight + DEVTEMP=`echo "$DEVFIELDS"| sed -n "/^$dev,/ { s/$dev,//; p; q; }"` + DEVBWDT=${DEVTEMP%%,*}; DEVWGHT=${DEVTEMP##*,} + [ "$DEVBWDT" = "$DEVWGHT" ] && DEVWGHT="" + + ### Device bandwidth is required + if [ -z "$DEVBWDT" ]; then + cbq_message "could not determine bandwidth for device $dev!" + cbq_failure "please set up the DEVICE fields properly!" + fi + + ### Check if the device is there + ip link show $dev &> /dev/null || + cbq_fail_off "device $dev not found!" + + ### Remove old root qdisc from device + cbq_device_off $dev + + + ### Setup root qdisc + class for device + tc qdisc add dev $dev root handle 1 cbq \ + bandwidth $DEVBWDT avpkt $AVPKT cell 8 + + ### Set weight of the root class if set + [ -n "$DEVWGHT" ] && + tc class change dev $dev root cbq weight $DEVWGHT allot 1514 + + [ "$1" = "compile" ] && echo +done # dev + + +### Setup traffic classes +for classfile in $CLASSLIST; do + cbq_load_class $CBQ_PATH $classfile + + ### Create the class + tc class add dev $DEVICE parent 1:$PARENT classid 1:$CLASS cbq \ + bandwidth $BANDWIDTH rate $RATE weight $WEIGHT prio $PRIO \ + allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED || + cbq_fail_off "failed to add class $CLASS with parent $PARENT on $DEVICE!" + + ### Create leaf qdisc if set + if [ "$LEAF" = "tbf" ]; then + tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS tbf \ + rate $RATE buffer $BUFFER limit $LIMIT mtu $MTU $PEAK + elif [ "$LEAF" = "sfq" ]; then + tc qdisc add dev $DEVICE parent 1:$CLASS handle $CLASS sfq \ + $PERTURB $QUANTUM + fi + + + ### Create fw filter for MARK fields + for mark in `echo "$CFILE"| sed -n '/^MARK/ { s/.*=//; p; }'`; do + ### Attach fw filter to root class + tc filter add dev $DEVICE parent 1:0 protocol ip \ + prio $PRIO_MARK handle $mark fw classid 1:$CLASS + done ### mark + + ### Create route filter for REALM fields + for realm in `echo "$CFILE"| sed -n '/^REALM/ { s/.*=//; p; }'`; do + ### Split realm into source & destination realms + SREALM=${realm%%,*}; DREALM=${realm##*,} + [ "$SREALM" = "$DREALM" ] && SREALM="" + + ### Convert asterisks to empty strings + SREALM=${SREALM#\*}; DREALM=${DREALM#\*} + + ### Attach route filter to the root class + tc filter add dev $DEVICE parent 1:0 protocol ip \ + prio $PRIO_REALM route ${SREALM:+from $SREALM} \ + ${DREALM:+to $DREALM} classid 1:$CLASS + done ### realm + + ### Create u32 filter for RULE fields + for rule in `echo "$CFILE"| sed -n '/^RULE/ { s/.*=//; p; }'`; do + ### Split rule into source & destination + SRC=${rule%%,*}; DST=${rule##*,} + [ "$SRC" = "$rule" ] && SRC="" + + + ### Split destination into address, port & mask fields + DADDR=${DST%%:*}; DTEMP=${DST##*:} + [ "$DADDR" = "$DST" ] && DTEMP="" + + DPORT=${DTEMP%%/*}; DMASK=${DTEMP##*/} + [ "$DPORT" = "$DTEMP" ] && DMASK="0xffff" + + + ### Split up source (if specified) + SADDR=""; SPORT="" + if [ -n "$SRC" ]; then + SADDR=${SRC%%:*}; STEMP=${SRC##*:} + [ "$SADDR" = "$SRC" ] && STEMP="" + + SPORT=${STEMP%%/*}; SMASK=${STEMP##*/} + [ "$SPORT" = "$STEMP" ] && SMASK="0xffff" + fi + + + ### Convert asterisks to empty strings + SADDR=${SADDR#\*}; DADDR=${DADDR#\*} + + ### Compose u32 filter rules + u32_s="${SPORT:+match ip sport $SPORT $SMASK}" + u32_s="${SADDR:+match ip src $SADDR} $u32_s" + u32_d="${DPORT:+match ip dport $DPORT $DMASK}" + u32_d="${DADDR:+match ip dst $DADDR} $u32_d" + + ### Uncomment the following if you want to see parsed rules + #echo "$rule: $u32_s $u32_d" + + ### Attach u32 filter to the appropriate class + tc filter add dev $DEVICE parent 1:0 protocol ip \ + prio $PRIO_RULE u32 $u32_s $u32_d classid 1:$CLASS + done ### rule + + [ "$1" = "compile" ] && echo +done ### classfile +;; + + +############################################################################# +################################# TIME CHECK ################################ +############################################################################# + +timecheck) + +### Get time + weekday +TIME_TMP=`date +%w/%k:%M` +TIME_DOW=${TIME_TMP%%/*} +TIME_NOW=${TIME_TMP##*/} + +### Load DEVICES, DEVFIELDS and CLASSLIST +cbq_init $CBQ_PATH + +### Run through all classes +for classfile in $CLASSLIST; do + ### Gather all TIME rules from class config + TIMESET=`sed -n 's/#.*//; s/[[:space:]]//g; /^TIME/ { s/.*=//; p; }' \ + $CBQ_PATH/$classfile` + [ -z "$TIMESET" ] && continue + + MATCH=0; CHANGE=0 + for timerule in $TIMESET; do + TIME_ABS=`cbq_time2abs $TIME_NOW` + + ### Split TIME rule to pieces + TIMESPEC=${timerule%%;*}; PARAMS=${timerule##*;} + WEEKDAYS=${TIMESPEC%%/*}; INTERVAL=${TIMESPEC##*/} + BEG_TIME=${INTERVAL%%-*}; END_TIME=${INTERVAL##*-} + + ### Check the day-of-week (if present) + [ "$WEEKDAYS" != "$INTERVAL" -a \ + -n "${WEEKDAYS##*$TIME_DOW*}" ] && continue + + ### Compute interval boundaries + BEG_ABS=`cbq_time2abs $BEG_TIME` + END_ABS=`cbq_time2abs $END_TIME` + + ### Midnight wrap fixup + if [ $BEG_ABS -gt $END_ABS ]; then + [ $TIME_ABS -le $END_ABS ] && + TIME_ABS=$[TIME_ABS + 24*60] + + END_ABS=$[END_ABS + 24*60] + fi + + ### If the time matches, remember params and set MATCH flag + if [ $TIME_ABS -ge $BEG_ABS -a $TIME_ABS -lt $END_ABS ]; then + TMP_RATE=${PARAMS%%/*}; PARAMS=${PARAMS#*/} + TMP_WGHT=${PARAMS%%/*}; TMP_PEAK=${PARAMS##*/} + + [ "$TMP_PEAK" = "$TMP_WGHT" ] && TMP_PEAK="" + TMP_PEAK=${TMP_PEAK:+peakrate $TMP_PEAK} + + MATCH=1 + fi + done ### timerule + + + cbq_load_class $CBQ_PATH $classfile + + ### Get current RATE of CBQ class + RATE_NOW=`tc class show dev $DEVICE| sed -n \ + "/cbq 1:$CLASS / { s/.*rate //; s/ .*//; p; q; }"` + [ -z "$RATE_NOW" ] && continue + + ### Time interval matched + if [ $MATCH -ne 0 ]; then + + ### Check if there is any change in class RATE + if [ "$RATE_NOW" != "$TMP_RATE" ]; then + NEW_RATE="$TMP_RATE" + NEW_WGHT="$TMP_WGHT" + NEW_PEAK="$TMP_PEAK" + CHANGE=1 + fi + + ### Match not found, reset to default RATE if necessary + elif [ "$RATE_NOW" != "$RATE" ]; then + NEW_WGHT="$WEIGHT" + NEW_RATE="$RATE" + NEW_PEAK="$PEAK" + CHANGE=1 + fi + + ### If there are no changes, go for next class + [ $CHANGE -eq 0 ] && continue + + ### Replace CBQ class + tc class replace dev $DEVICE classid 1:$CLASS cbq \ + bandwidth $BANDWIDTH rate $NEW_RATE weight $NEW_WGHT prio $PRIO \ + allot 1514 cell 8 maxburst 20 avpkt $AVPKT $BOUNDED $ISOLATED + + ### Replace leaf qdisc (if any) + if [ "$LEAF" = "tbf" ]; then + tc qdisc replace dev $DEVICE handle $CLASS tbf \ + rate $NEW_RATE buffer $BUFFER limit $LIMIT mtu $MTU $NEW_PEAK + fi + + cbq_message "$TIME_NOW: class $CLASS on $DEVICE changed rate ($RATE_NOW -> $NEW_RATE)" +done ### class file +;; + + +############################################################################# +################################## THE REST ################################# +############################################################################# + +stop) + cbq_off + ;; + +list) + cbq_show + ;; + +stats) + cbq_show -s + ;; + +restart) + shift + $0 stop + $0 start "$@" + ;; + +*) + echo "Usage: `basename $0` {start|compile|stop|restart|timecheck|list|stats}" +esac diff --git a/include/SNAPSHOT.h b/include/SNAPSHOT.h index 8375d76..9438c0f 100644 --- a/include/SNAPSHOT.h +++ b/include/SNAPSHOT.h @@ -1 +1 @@ -static char SNAPSHOT[] = "050314"; +static char SNAPSHOT[] = "060323"; diff --git a/include/iptables.h b/include/iptables.h index 5aca69a..25f36ae 100644 --- a/include/iptables.h +++ b/include/iptables.h @@ -4,10 +4,26 @@ #include "iptables_common.h" #include "libiptc/libiptc.h" +#ifndef IPT_LIB_DIR +#define IPT_LIB_DIR "/usr/local/lib/iptables" +#endif + #ifndef IPPROTO_SCTP #define IPPROTO_SCTP 132 #endif +#ifndef IPT_SO_GET_REVISION_MATCH /* Old kernel source. */ +#define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2) +#define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) + +struct ipt_get_revision +{ + char name[IPT_FUNCTION_MAXNAMELEN-1]; + + u_int8_t revision; +}; +#endif /* IPT_SO_GET_REVISION_MATCH Old kernel source */ + struct iptables_rule_match { struct iptables_rule_match *next; @@ -22,6 +38,9 @@ struct iptables_match ipt_chainlabel name; + /* Revision of match (0 by default). */ + u_int8_t revision; + const char *version; /* Size of match data. */ @@ -72,6 +91,9 @@ struct iptables_target ipt_chainlabel name; + /* Revision of target (0 by default). */ + u_int8_t revision; + const char *version; /* Size of target data. */ diff --git a/include/iptables_common.h b/include/iptables_common.h index e3b99aa..ed5b9c0 100644 --- a/include/iptables_common.h +++ b/include/iptables_common.h @@ -26,6 +26,7 @@ extern int iptables_insmod(const char *modname, const char *modprobe); void exit_error(enum exittype, char *, ...)__attribute__((noreturn, format(printf,2,3))); extern const char *program_name, *program_version; +extern char *lib_dir; #ifdef NO_SHARED_LIBS # ifdef _INIT diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h new file mode 100644 index 0000000..85d456d --- /dev/null +++ b/include/linux/inet_diag.h @@ -0,0 +1,122 @@ +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ 1 + +/* Just some random number */ +#define TCPDIAG_GETSOCK 18 +#define DCCPDIAG_GETSOCK 19 + +#define INET_DIAG_GETSOCK_MAX 24 + +/* Socket identity */ +struct inet_diag_sockid { + __u16 idiag_sport; + __u16 idiag_dport; + __u32 idiag_src[4]; + __u32 idiag_dst[4]; + __u32 idiag_if; + __u32 idiag_cookie[2]; +#define INET_DIAG_NOCOOKIE (~0U) +}; + +/* Request structure */ + +struct inet_diag_req { + __u8 idiag_family; /* Family of addresses. */ + __u8 idiag_src_len; + __u8 idiag_dst_len; + __u8 idiag_ext; /* Query extended information */ + + struct inet_diag_sockid id; + + __u32 idiag_states; /* States to dump */ + __u32 idiag_dbs; /* Tables to dump (NI) */ +}; + +enum { + INET_DIAG_REQ_NONE, + INET_DIAG_REQ_BYTECODE, +}; + +#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE + +/* Bytecode is sequence of 4 byte commands followed by variable arguments. + * All the commands identified by "code" are conditional jumps forward: + * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be + * length of the command and its arguments. + */ + +struct inet_diag_bc_op { + unsigned char code; + unsigned char yes; + unsigned short no; +}; + +enum { + INET_DIAG_BC_NOP, + INET_DIAG_BC_JMP, + INET_DIAG_BC_S_GE, + INET_DIAG_BC_S_LE, + INET_DIAG_BC_D_GE, + INET_DIAG_BC_D_LE, + INET_DIAG_BC_AUTO, + INET_DIAG_BC_S_COND, + INET_DIAG_BC_D_COND, +}; + +struct inet_diag_hostcond { + __u8 family; + __u8 prefix_len; + int port; + __u32 addr[0]; +}; + +/* Base info structure. It contains socket identity (addrs/ports/cookie) + * and, alas, the information shown by netstat. */ +struct inet_diag_msg { + __u8 idiag_family; + __u8 idiag_state; + __u8 idiag_timer; + __u8 idiag_retrans; + + struct inet_diag_sockid id; + + __u32 idiag_expires; + __u32 idiag_rqueue; + __u32 idiag_wqueue; + __u32 idiag_uid; + __u32 idiag_inode; +}; + +/* Extensions */ + +enum { + INET_DIAG_NONE, + INET_DIAG_MEMINFO, + INET_DIAG_INFO, + INET_DIAG_VEGASINFO, + INET_DIAG_CONG, +}; + +#define INET_DIAG_MAX INET_DIAG_CONG + + +/* INET_DIAG_MEM */ + +struct inet_diag_meminfo { + __u32 idiag_rmem; + __u32 idiag_wmem; + __u32 idiag_fmem; + __u32 idiag_tmem; +}; + +/* INET_DIAG_VEGASINFO */ + +struct tcpvegas_info { + __u32 tcpv_enabled; + __u32 tcpv_rttcnt; + __u32 tcpv_rtt; + __u32 tcpv_minrtt; +}; + + +#endif /* _INET_DIAG_H_ */ diff --git a/include/linux/ip_mp_alg.h b/include/linux/ip_mp_alg.h new file mode 100644 index 0000000..e234e20 --- /dev/null +++ b/include/linux/ip_mp_alg.h @@ -0,0 +1,22 @@ +/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values. + * + * Copyright (C) 2004, 2005 Einar Lueck + * Copyright (C) 2005 David S. Miller + */ + +#ifndef _LINUX_IP_MP_ALG_H +#define _LINUX_IP_MP_ALG_H + +enum ip_mp_alg { + IP_MP_ALG_NONE, + IP_MP_ALG_RR, + IP_MP_ALG_DRR, + IP_MP_ALG_RANDOM, + IP_MP_ALG_WRANDOM, + __IP_MP_ALG_MAX +}; + +#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1) + +#endif /* _LINUX_IP_MP_ALG_H */ + diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 7346ead..17d8eff 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -15,7 +15,6 @@ #ifndef _IPTABLES_H #define _IPTABLES_H -#include #include #define IPT_FUNCTION_MAXNAMELEN 30 diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 13828e5..24a38ae 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -5,20 +5,22 @@ #include #define NETLINK_ROUTE 0 /* Routing/device hook */ -#define NETLINK_SKIP 1 /* Reserved for ENskip */ +#define NETLINK_W1 1 /* 1-wire subsystem */ #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ #define NETLINK_FIREWALL 3 /* Firewalling hook */ -#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ +#define NETLINK_INET_DIAG 4 /* INET socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ #define NETLINK_SELINUX 7 /* SELinux event notifications */ -#define NETLINK_ARPD 8 +#define NETLINK_ISCSI 8 /* Open-iSCSI */ #define NETLINK_AUDIT 9 /* auditing */ -#define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ +#define NETLINK_FIB_LOOKUP 10 +#define NETLINK_CONNECTOR 11 +#define NETLINK_NETFILTER 12 /* netfilter subsystem */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ -#define NETLINK_TAPBASE 16 /* 16 to 31 are ethertap */ +#define NETLINK_GENERIC 16 #define MAX_LINKS 32 @@ -69,7 +71,8 @@ struct nlmsghdr #define NLMSG_ALIGNTO 4 #define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) ) -#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr))) +#define NLMSG_LENGTH(len) ((len)+NLMSG_ALIGN(NLMSG_HDRLEN)) #define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len)) #define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0))) #define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \ @@ -84,12 +87,23 @@ struct nlmsghdr #define NLMSG_DONE 0x3 /* End of a dump */ #define NLMSG_OVERRUN 0x4 /* Data lost */ +#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */ + struct nlmsgerr { int error; struct nlmsghdr msg; }; +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 + +struct nl_pktinfo +{ + __u32 group; +}; + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ enum { @@ -97,5 +111,24 @@ enum { NETLINK_CONNECTED, }; +/* + * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)--> + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * | Header | Pad | Payload | Pad | + * | (struct nlattr) | ing | | ing | + * +---------------------+- - -+- - - - - - - - - -+- - -+ + * <-------------- nlattr->nla_len --------------> + */ + +struct nlattr +{ + __u16 nla_len; + __u16 nla_type; +}; + +#define NLA_ALIGNTO 4 +#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1)) +#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr))) + #endif /* __LINUX_NETLINK_H */ diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h index 741d15b..bd2c5a2 100644 --- a/include/linux/pkt_cls.h +++ b/include/linux/pkt_cls.h @@ -80,6 +80,7 @@ enum TCA_ACT_KIND, TCA_ACT_OPTIONS, TCA_ACT_INDEX, + TCA_ACT_STATS, __TCA_ACT_MAX }; @@ -275,6 +276,7 @@ struct tc_rsvp_pinfo __u8 protocol; __u8 tunnelid; __u8 tunnelhdr; + __u8 pad; }; /* ROUTE filter */ @@ -407,6 +409,7 @@ enum TCF_EM_NBYTE, TCF_EM_U32, TCF_EM_META, + TCF_EM_TEXT, __TCF_EM_MAX }; diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 73d84c0..d10f353 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -93,6 +93,7 @@ struct tc_fifo_qopt /* PRIO section */ #define TCQ_PRIO_BANDS 16 +#define TCQ_MIN_PRIO_BANDS 2 struct tc_prio_qopt { @@ -169,6 +170,7 @@ struct tc_red_qopt unsigned char Scell_log; /* cell size for idle damping */ unsigned char flags; #define TC_RED_ECN 1 +#define TC_RED_HARDDROP 2 }; struct tc_red_xstats @@ -194,36 +196,34 @@ enum #define TCA_GRED_MAX (__TCA_GRED_MAX - 1) -#define TCA_SET_OFF TCA_GRED_PARMS struct tc_gred_qopt { - __u32 limit; /* HARD maximal queue length (bytes) -*/ - __u32 qth_min; /* Min average length threshold (bytes) -*/ - __u32 qth_max; /* Max average length threshold (bytes) -*/ - __u32 DP; /* upto 2^32 DPs */ - __u32 backlog; - __u32 qave; - __u32 forced; - __u32 early; - __u32 other; - __u32 pdrop; - - unsigned char Wlog; /* log(W) */ - unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ - unsigned char Scell_log; /* cell size for idle damping */ - __u8 prio; /* prio of this VQ */ - __u32 packets; - __u32 bytesin; + __u32 limit; /* HARD maximal queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + __u32 DP; /* upto 2^32 DPs */ + __u32 backlog; + __u32 qave; + __u32 forced; + __u32 early; + __u32 other; + __u32 pdrop; + __u8 Wlog; /* log(W) */ + __u8 Plog; /* log(P_max/(qth_max-qth_min)) */ + __u8 Scell_log; /* cell size for idle damping */ + __u8 prio; /* prio of this VQ */ + __u32 packets; + __u32 bytesin; }; + /* gred setup */ struct tc_gred_sopt { - __u32 DPs; - __u32 def_DP; - __u8 grio; + __u32 DPs; + __u32 def_DP; + __u8 grio; + __u8 flags; + __u16 pad1; }; /* HTB section */ @@ -351,6 +351,7 @@ struct tc_cbq_ovl #define TC_CBQ_OVL_DROP 3 #define TC_CBQ_OVL_RCLASSIC 4 unsigned char priority2; + __u16 pad; __u32 penalty; }; @@ -427,6 +428,8 @@ enum TCA_NETEM_UNSPEC, TCA_NETEM_CORR, TCA_NETEM_DELAY_DIST, + TCA_NETEM_REORDER, + TCA_NETEM_CORRUPT, __TCA_NETEM_MAX, }; @@ -437,7 +440,7 @@ struct tc_netem_qopt __u32 latency; /* added delay (us) */ __u32 limit; /* fifo limit (packets) */ __u32 loss; /* random packet loss (0=none ~0=100%) */ - __u32 gap; /* re-ordering gap (0 for delay all) */ + __u32 gap; /* re-ordering gap (0 for none) */ __u32 duplicate; /* random packet dup (0=none ~0=100%) */ __u32 jitter; /* random jitter in latency (us) */ }; @@ -449,6 +452,18 @@ struct tc_netem_corr __u32 dup_corr; /* duplicate correlation */ }; +struct tc_netem_reorder +{ + __u32 probability; + __u32 correlation; +}; + +struct tc_netem_corrupt +{ + __u32 probability; + __u32 correlation; +}; + #define NETEM_DIST_SCALE 8192 #endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 1facfe9..7504618 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -89,10 +89,21 @@ enum { RTM_GETANYCAST = 62, #define RTM_GETANYCAST RTM_GETANYCAST - RTM_MAX, -#define RTM_MAX RTM_MAX + RTM_NEWNEIGHTBL = 64, +#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL + RTM_GETNEIGHTBL = 66, +#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL + RTM_SETNEIGHTBL, +#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL + + __RTM_MAX, +#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; +#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE) +#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2) +#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2) + /* Generic structure for encapsulation of optional route information. It is reminiscent of sockaddr, but with sa_family replaced @@ -188,6 +199,7 @@ enum #define RTPROT_BIRD 12 /* BIRD */ #define RTPROT_DNROUTED 13 /* DECnet routing daemon */ #define RTPROT_XORP 14 /* XORP */ +#define RTPROT_NTK 15 /* Netsukuku */ /* rtm_scope @@ -250,6 +262,7 @@ enum rtattr_type_t RTA_FLOW, RTA_CACHEINFO, RTA_SESSION, + RTA_MP_ALGO, __RTA_MAX }; @@ -346,10 +359,13 @@ enum #define RTAX_FEATURE_ECN 0x00000001 #define RTAX_FEATURE_SACK 0x00000002 #define RTAX_FEATURE_TIMESTAMP 0x00000004 +#define RTAX_FEATURE_ALLFRAG 0x00000008 struct rta_session { __u8 proto; + __u8 pad1; + __u16 pad2; union { struct { @@ -446,6 +462,7 @@ enum NDA_DST, NDA_LLADDR, NDA_CACHEINFO, + NDA_PROBES, __NDA_MAX }; @@ -486,6 +503,106 @@ struct nda_cacheinfo __u32 ndm_refcnt; }; + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + +#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ + NLMSG_ALIGN(sizeof(struct ndtmsg)))) +#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) + + /**** * General form of address family dependent message. ****/ @@ -521,10 +638,13 @@ struct ifinfomsg struct prefixmsg { unsigned char prefix_family; + unsigned char prefix_pad1; + unsigned short prefix_pad2; int prefix_ifindex; unsigned char prefix_type; unsigned char prefix_len; unsigned char prefix_flags; + unsigned char prefix_pad3; }; enum @@ -699,7 +819,6 @@ enum TCA_RATE, TCA_FCNT, TCA_STATS2, - TCA_ACT_STATS, __TCA_MAX }; @@ -708,9 +827,7 @@ enum #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) - -/* RTnetlink multicast groups */ - +/* RTnetlink multicast groups - backwards compatibility for userspace */ #define RTMGRP_LINK 1 #define RTMGRP_NOTIFY 2 #define RTMGRP_NEIGH 4 @@ -730,6 +847,46 @@ enum #define RTMGRP_IPV6_PREFIX 0x20000 +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_NOP1, + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_NOP2, + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_NOP3, + RTNLGRP_NOP4, + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) + /* TC action piece */ struct tcamsg { diff --git a/include/linux/socket.h b/include/linux/socket.h new file mode 100644 index 0000000..dc979c0 --- /dev/null +++ b/include/linux/socket.h @@ -0,0 +1 @@ +#include diff --git a/include/linux/tc_act/tc_defact.h b/include/linux/tc_act/tc_defact.h new file mode 100644 index 0000000..964f473 --- /dev/null +++ b/include/linux/tc_act/tc_defact.h @@ -0,0 +1,21 @@ +#ifndef __LINUX_TC_DEF_H +#define __LINUX_TC_DEF_H + +#include + +struct tc_defact +{ + tc_gen; +}; + +enum +{ + TCA_DEF_UNSPEC, + TCA_DEF_TM, + TCA_DEF_PARMS, + TCA_DEF_DATA, + __TCA_DEF_MAX +}; +#define TCA_DEF_MAX (__TCA_DEF_MAX - 1) + +#endif diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/linux/tc_ematch/tc_em_cmp.h new file mode 100644 index 0000000..c7f4d43 --- /dev/null +++ b/include/linux/tc_ematch/tc_em_cmp.h @@ -0,0 +1,26 @@ +#ifndef __LINUX_TC_EM_CMP_H +#define __LINUX_TC_EM_CMP_H + +#include + +struct tcf_em_cmp +{ + __u32 val; + __u32 mask; + __u16 off; + __u8 align:4; + __u8 flags:4; + __u8 layer:4; + __u8 opnd:4; +}; + +enum +{ + TCF_EM_ALIGN_U8 = 1, + TCF_EM_ALIGN_U16 = 2, + TCF_EM_ALIGN_U32 = 4 +}; + +#define TCF_EM_CMP_TRANS 1 + +#endif diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h new file mode 100644 index 0000000..e21937c --- /dev/null +++ b/include/linux/tc_ematch/tc_em_meta.h @@ -0,0 +1,94 @@ +#ifndef __LINUX_TC_EM_META_H +#define __LINUX_TC_EM_META_H + +#include + +enum +{ + TCA_EM_META_UNSPEC, + TCA_EM_META_HDR, + TCA_EM_META_LVALUE, + TCA_EM_META_RVALUE, + __TCA_EM_META_MAX +}; +#define TCA_EM_META_MAX (__TCA_EM_META_MAX - 1) + +struct tcf_meta_val +{ + __u16 kind; + __u8 shift; + __u8 op; +}; + +#define TCF_META_TYPE_MASK (0xf << 12) +#define TCF_META_TYPE(kind) (((kind) & TCF_META_TYPE_MASK) >> 12) +#define TCF_META_ID_MASK 0x7ff +#define TCF_META_ID(kind) ((kind) & TCF_META_ID_MASK) + +enum +{ + TCF_META_TYPE_VAR, + TCF_META_TYPE_INT, + __TCF_META_TYPE_MAX +}; +#define TCF_META_TYPE_MAX (__TCF_META_TYPE_MAX - 1) + +enum +{ + TCF_META_ID_VALUE, + TCF_META_ID_RANDOM, + TCF_META_ID_LOADAVG_0, + TCF_META_ID_LOADAVG_1, + TCF_META_ID_LOADAVG_2, + TCF_META_ID_DEV, + TCF_META_ID_PRIORITY, + TCF_META_ID_PROTOCOL, + TCF_META_ID_PKTTYPE, + TCF_META_ID_PKTLEN, + TCF_META_ID_DATALEN, + TCF_META_ID_MACLEN, + TCF_META_ID_NFMARK, + TCF_META_ID_TCINDEX, + TCF_META_ID_RTCLASSID, + TCF_META_ID_RTIIF, + TCF_META_ID_SK_FAMILY, + TCF_META_ID_SK_STATE, + TCF_META_ID_SK_REUSE, + TCF_META_ID_SK_BOUND_IF, + TCF_META_ID_SK_REFCNT, + TCF_META_ID_SK_SHUTDOWN, + TCF_META_ID_SK_PROTO, + TCF_META_ID_SK_TYPE, + TCF_META_ID_SK_RCVBUF, + TCF_META_ID_SK_RMEM_ALLOC, + TCF_META_ID_SK_WMEM_ALLOC, + TCF_META_ID_SK_OMEM_ALLOC, + TCF_META_ID_SK_WMEM_QUEUED, + TCF_META_ID_SK_RCV_QLEN, + TCF_META_ID_SK_SND_QLEN, + TCF_META_ID_SK_ERR_QLEN, + TCF_META_ID_SK_FORWARD_ALLOCS, + TCF_META_ID_SK_SNDBUF, + TCF_META_ID_SK_ALLOCS, + TCF_META_ID_SK_ROUTE_CAPS, + TCF_META_ID_SK_HASH, + TCF_META_ID_SK_LINGERTIME, + TCF_META_ID_SK_ACK_BACKLOG, + TCF_META_ID_SK_MAX_ACK_BACKLOG, + TCF_META_ID_SK_PRIO, + TCF_META_ID_SK_RCVLOWAT, + TCF_META_ID_SK_RCVTIMEO, + TCF_META_ID_SK_SNDTIMEO, + TCF_META_ID_SK_SENDMSG_OFF, + TCF_META_ID_SK_WRITE_PENDING, + __TCF_META_ID_MAX +}; +#define TCF_META_ID_MAX (__TCF_META_ID_MAX - 1) + +struct tcf_meta_hdr +{ + struct tcf_meta_val left; + struct tcf_meta_val right; +}; + +#endif diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/linux/tc_ematch/tc_em_nbyte.h new file mode 100644 index 0000000..f19d1f5 --- /dev/null +++ b/include/linux/tc_ematch/tc_em_nbyte.h @@ -0,0 +1,13 @@ +#ifndef __LINUX_TC_EM_NBYTE_H +#define __LINUX_TC_EM_NBYTE_H + +#include + +struct tcf_em_nbyte +{ + __u16 off; + __u16 len:12; + __u8 layer:4; +}; + +#endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9703d6b..b4d74eb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,40 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; - -enum { - TCP_ESTABLISHED = 1, - TCP_SYN_SENT, - TCP_SYN_RECV, - TCP_FIN_WAIT1, - TCP_FIN_WAIT2, - TCP_TIME_WAIT, - TCP_CLOSE, - TCP_CLOSE_WAIT, - TCP_LAST_ACK, - TCP_LISTEN, - TCP_CLOSING, /* now a valid state */ - - TCP_MAX_STATES /* Leave at the end! */ -}; - -#define TCP_STATE_MASK 0xF -#define TCP_ACTION_FIN (1 << 7) - -enum { - TCPF_ESTABLISHED = (1 << 1), - TCPF_SYN_SENT = (1 << 2), - TCPF_SYN_RECV = (1 << 3), - TCPF_FIN_WAIT1 = (1 << 4), - TCPF_FIN_WAIT2 = (1 << 5), - TCPF_TIME_WAIT = (1 << 6), - TCPF_CLOSE = (1 << 7), - TCPF_CLOSE_WAIT = (1 << 8), - TCPF_LAST_ACK = (1 << 9), - TCPF_LISTEN = (1 << 10), - TCPF_CLOSING = (1 << 11) -}; - /* * The union cast uses a gcc extension to avoid aliasing problems * (union is compatible to any of its members) @@ -127,6 +93,7 @@ enum { #define TCP_WINDOW_CLAMP 10 /* Bound advertised window */ #define TCP_INFO 11 /* Information about this connection. */ #define TCP_QUICKACK 12 /* Block/reenable quick acks */ +#define TCP_CONGESTION 13 /* Congestion control algorithm */ #define TCPI_OPT_TIMESTAMPS 1 #define TCPI_OPT_SACK 2 diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0df02a..f2bbf4b 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -27,6 +27,22 @@ struct xfrm_id __u8 proto; }; +struct xfrm_sec_ctx { + __u8 ctx_doi; + __u8 ctx_alg; + __u16 ctx_len; + __u32 ctx_sid; + char ctx_str[0]; +}; + +/* Security Context Domains of Interpretation */ +#define XFRM_SC_DOI_RESERVED 0 +#define XFRM_SC_DOI_LSM 1 + +/* Security Context Algorithms */ +#define XFRM_SC_ALG_RESERVED 0 +#define XFRM_SC_ALG_SELINUX 1 + /* Selector, used as selector both on policy rules (SPD) and SAs. */ struct xfrm_selector @@ -140,7 +156,22 @@ enum { XFRM_MSG_FLUSHPOLICY, #define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY - XFRM_MSG_MAX + __XFRM_MSG_MAX +}; +#define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) + +#define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE) + +/* + * Generic LSM security context for comunicating to user space + * NOTE: Same format as sadb_x_sec_ctx + */ +struct xfrm_user_sec_ctx { + __u16 len; + __u16 exttype; + __u8 ctx_alg; /* LSMs: e.g., selinux == 1 */ + __u8 ctx_doi; + __u16 ctx_len; }; struct xfrm_user_tmpl { @@ -171,6 +202,9 @@ enum xfrm_attr_type_t { XFRMA_ALG_COMP, /* struct xfrm_algo */ XFRMA_ENCAP, /* struct xfrm_algo + struct xfrm_encap_tmpl */ XFRMA_TMPL, /* 1 or more struct xfrm_user_tmpl */ + XFRMA_SA, + XFRMA_POLICY, + XFRMA_SEC_CTX, /* struct xfrm_sec_ctx */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -191,6 +225,7 @@ struct xfrm_usersa_info { __u8 flags; #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 +#define XFRM_STATE_NOPMTUDISC 4 }; struct xfrm_usersa_id { @@ -252,7 +287,25 @@ struct xfrm_usersa_flush { __u8 proto; }; +/* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 +#define XFRMGRP_SA 4 +#define XFRMGRP_POLICY 8 + +enum xfrm_nlgroups { + XFRMNLGRP_NONE, +#define XFRMNLGRP_NONE XFRMNLGRP_NONE + XFRMNLGRP_ACQUIRE, +#define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE + XFRMNLGRP_EXPIRE, +#define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE + XFRMNLGRP_SA, +#define XFRMNLGRP_SA XFRMNLGRP_SA + XFRMNLGRP_POLICY, +#define XFRMNLGRP_POLICY XFRMNLGRP_POLICY + __XFRMNLGRP_MAX +}; +#define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) #endif /* _LINUX_XFRM_H */ diff --git a/include/ll_map.h b/include/ll_map.h index 3bff5e9..d085813 100644 --- a/include/ll_map.h +++ b/include/ll_map.h @@ -4,10 +4,10 @@ extern int ll_remember_index(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); extern int ll_init_map(struct rtnl_handle *rth); -extern int ll_name_to_index(const char *name); -extern const char *ll_index_to_name(int idx); -extern const char *ll_idx_n2a(int idx, char *buf); -extern int ll_index_to_type(int idx); -extern unsigned ll_index_to_flags(int idx); +extern unsigned ll_name_to_index(const char *name); +extern const char *ll_index_to_name(unsigned idx); +extern const char *ll_idx_n2a(unsigned idx, char *buf); +extern int ll_index_to_type(unsigned idx); +extern unsigned ll_index_to_flags(unsigned idx); #endif /* __LL_MAP_H__ */ diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 0000000..b0b6459 --- /dev/null +++ b/include/net/tcp_states.h @@ -0,0 +1,50 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol sk_state field. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_STATES_H +#define _LINUX_TCP_STATES_H + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + + TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_STATE_MASK 0xF + +#define TCP_ACTION_FIN (1 << 7) + +enum { + TCPF_ESTABLISHED = (1 << 1), + TCPF_SYN_SENT = (1 << 2), + TCPF_SYN_RECV = (1 << 3), + TCPF_FIN_WAIT1 = (1 << 4), + TCPF_FIN_WAIT2 = (1 << 5), + TCPF_TIME_WAIT = (1 << 6), + TCPF_CLOSE = (1 << 7), + TCPF_CLOSE_WAIT = (1 << 8), + TCPF_LAST_ACK = (1 << 9), + TCPF_LISTEN = (1 << 10), + TCPF_CLOSING = (1 << 11) +}; + +#endif /* _LINUX_TCP_STATES_H */ diff --git a/include/rt_names.h b/include/rt_names.h index 249231e..2d9ef10 100644 --- a/include/rt_names.h +++ b/include/rt_names.h @@ -21,7 +21,7 @@ int inet_proto_a2n(char *buf); const char * ll_type_n2a(int type, char *buf, int len); const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen); -int ll_addr_a2n(unsigned char *lladdr, int len, char *arg); +int ll_addr_a2n(char *lladdr, int len, char *arg); const char * ll_proto_n2a(unsigned short id, char *buf, int len); int ll_proto_a2n(unsigned short *id, char *buf); diff --git a/include/utils.h b/include/utils.h index 906e394..0f1d1f6 100644 --- a/include/utils.h +++ b/include/utils.h @@ -14,6 +14,7 @@ extern int show_details; extern int show_raw; extern int resolve_hosts; extern int oneline; +extern int timestamp; extern char * _SL_; #ifndef IPPROTO_ESP @@ -43,9 +44,12 @@ typedef struct __u8 family; __u8 bytelen; __s16 bitlen; + __u32 flags; __u32 data[4]; } inet_prefix; +#define PREFIXLEN_SPECIFIED 1 + #define DN_MAXADDL 20 #ifndef AF_DECnet #define AF_DECnet 12 @@ -82,8 +86,8 @@ extern int get_s16(__s16 *val, const char *arg, int base); extern int get_u8(__u8 *val, const char *arg, int base); extern int get_s8(__s8 *val, const char *arg, int base); -extern __u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen); -extern __u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen); +extern char* hexstring_n2a(const __u8 *str, int len, char *buf, int blen); +extern __u8* hexstring_a2n(const char *str, __u8 *buf, int blen); extern const char *format_host(int af, int len, const void *addr, char *buf, int buflen); @@ -123,4 +127,12 @@ static __inline__ int get_user_hz(void) return __iproute2_user_hz_internal; } +int print_timestamp(FILE *fp); + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + +extern int cmdlineno; +extern size_t getcmdline(char **line, size_t *len, FILE *in); +extern int makeargs(char *line, char *argv[], int maxargs); + #endif /* __UTILS_H__ */ diff --git a/ip/Makefile b/ip/Makefile index bcc419b..3383c72 100644 --- a/ip/Makefile +++ b/ip/Makefile @@ -1,7 +1,7 @@ IPOBJ=ip.o ipaddress.o iproute.o iprule.o \ - rtm_map.o iptunnel.o ipneigh.o iplink.o \ + rtm_map.o iptunnel.o ipneigh.o ipntable.o iplink.o \ ipmaddr.o ipmonitor.o ipmroute.o ipprefix.o \ - ipxfrm.o xfrm_state.o xfrm_policy.o + ipxfrm.o xfrm_state.o xfrm_policy.o xfrm_monitor.o RTMONOBJ=rtmon.o diff --git a/ip/ip.c b/ip/ip.c index 6358ec4..c29d2f3 100644 --- a/ip/ip.c +++ b/ip/ip.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "SNAPSHOT.h" #include "utils.h" @@ -31,7 +32,11 @@ int preferred_family = AF_UNSPEC; int show_stats = 0; int resolve_hosts = 0; int oneline = 0; +int timestamp = 0; char * _SL_ = NULL; +char *batch_file = NULL; +int force = 0; +struct rtnl_handle rth; static void usage(void) __attribute__((noreturn)); @@ -39,13 +44,98 @@ static void usage(void) { fprintf(stderr, "Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n" -"where OBJECT := { link | addr | route | rule | neigh | tunnel |\n" +" ip [ -force ] [-batch filename\n" +"where OBJECT := { link | addr | route | rule | neigh | ntable | tunnel |\n" " maddr | mroute | monitor | xfrm }\n" " OPTIONS := { -V[ersion] | -s[tatistics] | -r[esolve] |\n" -" -f[amily] { inet | inet6 | ipx | dnet | link } | -o[neline] }\n"); +" -f[amily] { inet | inet6 | ipx | dnet | link } |\n" +" -o[neline] | -t[imestamp] }\n"); exit(-1); } +static int do_help(int argc, char **argv) +{ + usage(); +} + +static const struct cmd { + const char *cmd; + int (*func)(int argc, char **argv); +} cmds[] = { + { "address", do_ipaddr }, + { "maddress", do_multiaddr }, + { "route", do_iproute }, + { "rule", do_iprule }, + { "neighbor", do_ipneigh }, + { "neighbour", do_ipneigh }, + { "ntable", do_ipntable }, + { "ntbl", do_ipntable }, + { "link", do_iplink }, + { "tunnel", do_iptunnel }, + { "tunl", do_iptunnel }, + { "monitor", do_ipmonitor }, + { "xfrm", do_xfrm }, + { "mroute", do_multiroute }, + { "help", do_help }, + { 0 } +}; + +static int do_cmd(const char *argv0, int argc, char **argv) +{ + const struct cmd *c; + + for (c = cmds; c->cmd; ++c) { + if (matches(argv0, c->cmd) == 0) + return c->func(argc-1, argv+1); + } + + fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv0); + return -1; +} + +static int batch(const char *name) +{ + char *line = NULL; + size_t len = 0; + int ret = 0; + int lineno = 0; + + if (name && strcmp(name, "-") != 0) { + if (freopen(name, "r", stdin) == NULL) { + fprintf(stderr, "Cannot open file \"%s\" for reading: %s=n", + name, strerror(errno)); + return -1; + } + } + + if (rtnl_open(&rth, 0) < 0) { + fprintf(stderr, "Cannot open rtnetlink\n"); + return -1; + } + + while (getcmdline(&line, &len, stdin) != -1) { + char *largv[100]; + int largc; + + largc = makeargs(line, largv, 100); + if (largc == 0) + continue; /* blank line */ + + if (do_cmd(largv[0], largc, largv)) { + fprintf(stderr, "Command failed %s:%d\n", name, lineno); + ret = 1; + if (!force) + break; + } + } + if (line) + free(line); + + rtnl_close(&rth); + return ret; +} + + int main(int argc, char **argv) { char *basename; @@ -102,6 +192,8 @@ int main(int argc, char **argv) ++resolve_hosts; } else if (matches(opt, "-oneline") == 0) { ++oneline; + } else if (matches(opt, "-timestamp") == 0) { + ++timestamp; #if 0 } else if (matches(opt, "-numeric") == 0) { rtnl_names_numeric++; @@ -109,6 +201,14 @@ int main(int argc, char **argv) } else if (matches(opt, "-Version") == 0) { printf("ip utility, iproute2-ss%s\n", SNAPSHOT); exit(0); + } else if (matches(opt, "-force") == 0) { + ++force; + } else if (matches(opt, "-batch") == 0) { + argc--; + argv++; + if (argc <= 1) + usage(); + batch_file = argv[1]; } else if (matches(opt, "-help") == 0) { usage(); } else { @@ -120,52 +220,18 @@ int main(int argc, char **argv) _SL_ = oneline ? "\\" : "\n" ; - if (strcmp(basename, "ipaddr") == 0) - return do_ipaddr(argc-1, argv+1); - if (strcmp(basename, "ipmaddr") == 0) - return do_multiaddr(argc-1, argv+1); - if (strcmp(basename, "iproute") == 0) - return do_iproute(argc-1, argv+1); - if (strcmp(basename, "iprule") == 0) - return do_iprule(argc-1, argv+1); - if (strcmp(basename, "ipneigh") == 0) - return do_ipneigh(argc-1, argv+1); - if (strcmp(basename, "iplink") == 0) - return do_iplink(argc-1, argv+1); - if (strcmp(basename, "iptunnel") == 0) - return do_iptunnel(argc-1, argv+1); - if (strcmp(basename, "ipmonitor") == 0) - return do_ipmonitor(argc-1, argv+1); - if (strcmp(basename, "ipxfrm") == 0) - return do_xfrm(argc-1, argv+1); - - if (argc > 1) { - if (matches(argv[1], "address") == 0) - return do_ipaddr(argc-2, argv+2); - if (matches(argv[1], "maddress") == 0) - return do_multiaddr(argc-2, argv+2); - if (matches(argv[1], "route") == 0) - return do_iproute(argc-2, argv+2); - if (matches(argv[1], "rule") == 0) - return do_iprule(argc-2, argv+2); - if (matches(argv[1], "mroute") == 0) - return do_multiroute(argc-2, argv+2); - if (matches(argv[1], "neighbor") == 0 || - matches(argv[1], "neighbour") == 0) - return do_ipneigh(argc-2, argv+2); - if (matches(argv[1], "link") == 0) - return do_iplink(argc-2, argv+2); - if (matches(argv[1], "tunnel") == 0 || - strcmp(argv[1], "tunl") == 0) - return do_iptunnel(argc-2, argv+2); - if (matches(argv[1], "monitor") == 0) - return do_ipmonitor(argc-2, argv+2); - if (matches(argv[1], "xfrm") == 0) - return do_xfrm(argc-2, argv+2); - if (matches(argv[1], "help") == 0) - usage(); - fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv[1]); - exit(-1); - } + if (batch_file) + return batch(batch_file); + + if (rtnl_open(&rth, 0) < 0) + exit(1); + + if (strlen(basename) > 2) + return do_cmd(basename+2, argc, argv); + + if (argc > 1) + return do_cmd(argv[1], argc-1, argv+1); + + rtnl_close(&rth); usage(); } diff --git a/ip/ip_common.h b/ip/ip_common.h index 688d384..1fe4a69 100644 --- a/ip/ip_common.h +++ b/ip/ip_common.h @@ -6,6 +6,8 @@ extern int print_addrinfo(const struct sockaddr_nl *who, void *arg); extern int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); +extern int print_ntable(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg); extern int ipaddr_list(int argc, char **argv); extern int ipaddr_list_link(int argc, char **argv); extern int iproute_monitor(int argc, char **argv); @@ -13,6 +15,7 @@ extern void iplink_usage(void) __attribute__((noreturn)); extern void iproute_reset_filter(void); extern void ipaddr_reset_filter(int); extern void ipneigh_reset_filter(void); +extern void ipntable_reset_filter(void); extern int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg); extern int print_prefix(const struct sockaddr_nl *who, @@ -21,9 +24,12 @@ extern int do_ipaddr(int argc, char **argv); extern int do_iproute(int argc, char **argv); extern int do_iprule(int argc, char **argv); extern int do_ipneigh(int argc, char **argv); +extern int do_ipntable(int argc, char **argv); extern int do_iptunnel(int argc, char **argv); extern int do_iplink(int argc, char **argv); extern int do_ipmonitor(int argc, char **argv); extern int do_multiaddr(int argc, char **argv); extern int do_multiroute(int argc, char **argv); extern int do_xfrm(int argc, char **argv); + +extern struct rtnl_handle rth; diff --git a/ip/ipaddress.c b/ip/ipaddress.c index 92f0089..cb164c0 100644 --- a/ip/ipaddress.c +++ b/ip/ipaddress.c @@ -49,7 +49,6 @@ static struct char *flushb; int flushp; int flushe; - struct rtnl_handle *rth; } filter; static int do_link; @@ -269,7 +268,7 @@ int print_linkinfo(const struct sockaddr_nl *who, static int flush_update(void) { - if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) { + if (rtnl_send(&rth, filter.flushb, filter.flushp) < 0) { perror("Failed to send flush request\n"); return -1; } @@ -345,7 +344,7 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n, memcpy(fn, n, n->nlmsg_len); fn->nlmsg_type = RTM_DELADDR; fn->nlmsg_flags = NLM_F_REQUEST; - fn->nlmsg_seq = ++filter.rth->seq; + fn->nlmsg_seq = ++rth.seq; filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb; filter.flushed++; if (show_stats < 2) @@ -495,8 +494,7 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush) { struct nlmsg_list *linfo = NULL; struct nlmsg_list *ainfo = NULL; - struct nlmsg_list *l; - struct rtnl_handle rth; + struct nlmsg_list *l, *n; char *filter_dev = NULL; int no_link = 0; @@ -524,7 +522,7 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush) if (filter.family == AF_UNSPEC) filter.family = filter.pfx.family; } else if (strcmp(*argv, "scope") == 0) { - int scope = 0; + unsigned scope = 0; NEXT_ARG(); filter.scopemask = -1; if (rtnl_rtscope_a2n(&scope, *argv)) { @@ -570,9 +568,6 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush) argv++; argc--; } - if (rtnl_open(&rth, 0) < 0) - exit(1); - if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETLINK) < 0) { perror("Cannot send dump request"); exit(1); @@ -598,7 +593,6 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush) filter.flushb = flushb; filter.flushp = 0; filter.flushe = sizeof(flushb); - filter.rth = &rth; for (;;) { if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) { @@ -620,7 +614,8 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush) } round++; if (flush_update() < 0) - exit(1); + return 1; + if (show_stats) { printf("\n*** Round %d, deleting %d addresses ***\n", round, filter.flushed); fflush(stdout); @@ -700,16 +695,18 @@ int ipaddr_list_or_flush(int argc, char **argv, int flush) } } - for (l=linfo; l; l = l->next) { + for (l=linfo; l; l = n) { + n = l->next; if (no_link || print_linkinfo(NULL, &l->h, stdout) == 0) { struct ifinfomsg *ifi = NLMSG_DATA(&l->h); if (filter.family != AF_PACKET) print_selected_addrinfo(ifi->ifi_index, ainfo, stdout); } fflush(stdout); + free(l); } - exit(0); + return 0; } int ipaddr_list_link(int argc, char **argv) @@ -736,7 +733,6 @@ int default_scope(inet_prefix *lcl) int ipaddr_modify(int cmd, int argc, char **argv) { - struct rtnl_handle rth; struct { struct nlmsghdr n; struct ifaddrmsg ifa; @@ -744,6 +740,7 @@ int ipaddr_modify(int cmd, int argc, char **argv) } req; char *d = NULL; char *l = NULL; + char *lcl_arg = NULL; inet_prefix lcl; inet_prefix peer; int local_len = 0; @@ -800,7 +797,7 @@ int ipaddr_modify(int cmd, int argc, char **argv) addattr_l(&req.n, sizeof(req), IFA_ANYCAST, &addr.data, addr.bytelen); any_len = addr.bytelen; } else if (strcmp(*argv, "scope") == 0) { - int scope = 0; + unsigned scope = 0; NEXT_ARG(); if (rtnl_rtscope_a2n(&scope, *argv)) invarg(*argv, "invalid scope value."); @@ -821,6 +818,7 @@ int ipaddr_modify(int cmd, int argc, char **argv) usage(); if (local_len) duparg2("local", *argv); + lcl_arg = *argv; get_prefix(&lcl, *argv, req.ifa.ifa_family); if (req.ifa.ifa_family == AF_UNSPEC) req.ifa.ifa_family = lcl.family; @@ -838,9 +836,17 @@ int ipaddr_modify(int cmd, int argc, char **argv) exit(1); } - if (peer_len == 0 && local_len && cmd != RTM_DELADDR) { - peer = lcl; - addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &lcl.data, lcl.bytelen); + if (peer_len == 0 && local_len) { + if (cmd == RTM_DELADDR && lcl.family == AF_INET && !(lcl.flags & PREFIXLEN_SPECIFIED)) { + fprintf(stderr, + "Warning: Executing wildcard deletion to stay compatible with old scripts.\n" \ + " Explicitly specify the prefix length (%s/%d) to avoid this warning.\n" \ + " This special behaviour is likely to disappear in further releases,\n" \ + " fix your scripts!\n", lcl_arg, local_len*8); + } else { + peer = lcl; + addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &lcl.data, lcl.bytelen); + } } if (req.ifa.ifa_prefixlen == 0) req.ifa.ifa_prefixlen = lcl.bitlen; @@ -867,9 +873,6 @@ int ipaddr_modify(int cmd, int argc, char **argv) if (!scoped && cmd != RTM_DELADDR) req.ifa.ifa_scope = default_scope(&lcl); - if (rtnl_open(&rth, 0) < 0) - exit(1); - ll_init_map(&rth); if ((req.ifa.ifa_index = ll_name_to_index(d)) == 0) { @@ -880,7 +883,7 @@ int ipaddr_modify(int cmd, int argc, char **argv) if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0) exit(2); - exit(0); + return 0; } int do_ipaddr(int argc, char **argv) diff --git a/ip/iplink.c b/ip/iplink.c index 520280e..ffc9f06 100644 --- a/ip/iplink.c +++ b/ip/iplink.c @@ -178,7 +178,7 @@ static int get_address(const char *dev, int *htype) { struct ifreq ifr; struct sockaddr_ll me; - int alen; + socklen_t alen; int s; s = socket(PF_PACKET, SOCK_DGRAM, 0); @@ -216,7 +216,8 @@ static int get_address(const char *dev, int *htype) return me.sll_halen; } -static int parse_address(const char *dev, int hatype, int halen, char *lla, struct ifreq *ifr) +static int parse_address(const char *dev, int hatype, int halen, + char *lla, struct ifreq *ifr) { int alen; diff --git a/ip/ipmaddr.c b/ip/ipmaddr.c index 1cdab0b..e6bd625 100644 --- a/ip/ipmaddr.c +++ b/ip/ipmaddr.c @@ -298,7 +298,8 @@ int multiaddr_modify(int cmd, int argc, char **argv) usage(); if (ifr.ifr_hwaddr.sa_data[0]) duparg("address", *argv); - if (ll_addr_a2n(ifr.ifr_hwaddr.sa_data, 14, *argv) < 0) { + if (ll_addr_a2n(ifr.ifr_hwaddr.sa_data, + 14, *argv) < 0) { fprintf(stderr, "Error: \"%s\" is not a legal ll address.\n", *argv); exit(1); } diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c index cdaeb6f..50b6327 100644 --- a/ip/ipmonitor.c +++ b/ip/ipmonitor.c @@ -38,6 +38,9 @@ int accept_msg(const struct sockaddr_nl *who, { FILE *fp = (FILE*)arg; + if (timestamp) + print_timestamp(fp); + if (n->nlmsg_type == RTM_NEWROUTE || n->nlmsg_type == RTM_DELROUTE) { print_route(who, n, arg); return 0; @@ -85,7 +88,6 @@ int accept_msg(const struct sockaddr_nl *who, int do_ipmonitor(int argc, char **argv) { - struct rtnl_handle rth; char *file = NULL; unsigned groups = ~RTMGRP_TC; int llink=0; @@ -93,6 +95,7 @@ int do_ipmonitor(int argc, char **argv) int lroute=0; int lprefix=0; + rtnl_close(&rth); ipaddr_reset_filter(1); iproute_reset_filter(); ipneigh_reset_filter(); @@ -150,16 +153,15 @@ int do_ipmonitor(int argc, char **argv) perror("Cannot fopen"); exit(-1); } - return rtnl_from_file(fp, accept_msg, (void*)stdout); + return rtnl_from_file(fp, accept_msg, stdout); } if (rtnl_open(&rth, groups) < 0) exit(1); - ll_init_map(&rth); - if (rtnl_listen(&rth, accept_msg, (void*)stdout) < 0) + if (rtnl_listen(&rth, accept_msg, stdout) < 0) exit(2); - exit(0); + return 0; } diff --git a/ip/ipmroute.c b/ip/ipmroute.c index b24caee..951a54f 100644 --- a/ip/ipmroute.c +++ b/ip/ipmroute.c @@ -42,7 +42,7 @@ static void usage(void) exit(-1); } -char *viftable[32]; +static char *viftable[32]; struct rtfilter { @@ -50,7 +50,7 @@ struct rtfilter inet_prefix msrc; } filter; -void read_viftable(void) +static void read_viftable(void) { char buf[256]; FILE *fp = fopen("/proc/net/ip_mr_vif", "r"); @@ -75,7 +75,7 @@ void read_viftable(void) fclose(fp); } -void read_mroute_list(FILE *ofp) +static void read_mroute_list(FILE *ofp) { char buf[256]; FILE *fp = fopen("/proc/net/ip_mr_cache", "r"); diff --git a/ip/ipneigh.c b/ip/ipneigh.c index e8ab291..249ee68 100644 --- a/ip/ipneigh.c +++ b/ip/ipneigh.c @@ -31,6 +31,7 @@ #include "ip_common.h" #define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY) +#define MAX_ROUNDS 10 static struct { @@ -43,7 +44,6 @@ static struct char *flushb; int flushp; int flushe; - struct rtnl_handle *rth; } filter; static void usage(void) __attribute__((noreturn)); @@ -88,7 +88,7 @@ int nud_state_a2n(unsigned *state, char *arg) static int flush_update(void) { - if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) { + if (rtnl_send(&rth, filter.flushb, filter.flushp) < 0) { perror("Failed to send flush request\n"); return -1; } @@ -99,7 +99,6 @@ static int flush_update(void) static int ipneigh_modify(int cmd, int flags, int argc, char **argv) { - struct rtnl_handle rth; struct { struct nlmsghdr n; struct ndmsg ndm; @@ -166,16 +165,13 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv) addattr_l(&req.n, sizeof(req), NDA_DST, &dst.data, dst.bytelen); if (lla && strcmp(lla, "null")) { - __u8 llabuf[16]; + char llabuf[20]; int l; l = ll_addr_a2n(llabuf, sizeof(llabuf), lla); addattr_l(&req.n, sizeof(req), NDA_LLADDR, llabuf, l); } - if (rtnl_open(&rth, 0) < 0) - exit(1); - ll_init_map(&rth); if ((req.ndm.ndm_ifindex = ll_name_to_index(d)) == 0) { @@ -186,7 +182,7 @@ static int ipneigh_modify(int cmd, int flags, int argc, char **argv) if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0) exit(2); - exit(0); + return 0; } @@ -250,7 +246,7 @@ int print_neigh(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) memcpy(fn, n, n->nlmsg_len); fn->nlmsg_type = RTM_DELNEIGH; fn->nlmsg_flags = NLM_F_REQUEST; - fn->nlmsg_seq = ++filter.rth->seq; + fn->nlmsg_seq = ++rth.seq; filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb; filter.flushed++; if (show_stats < 2) @@ -325,7 +321,6 @@ void ipneigh_reset_filter() int do_show_or_flush(int argc, char **argv, int flush) { char *filter_dev = NULL; - struct rtnl_handle rth; int state_given = 0; ipneigh_reset_filter(); @@ -380,9 +375,6 @@ int do_show_or_flush(int argc, char **argv, int flush) argc--; argv++; } - if (rtnl_open(&rth, 0) < 0) - exit(1); - ll_init_map(&rth); if (filter_dev) { @@ -399,10 +391,9 @@ int do_show_or_flush(int argc, char **argv, int flush) filter.flushb = flushb; filter.flushp = 0; filter.flushe = sizeof(flushb); - filter.rth = &rth; filter.state &= ~NUD_FAILED; - for (;;) { + while (round < MAX_ROUNDS) { if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) { perror("Cannot send dump request"); exit(1); @@ -428,6 +419,9 @@ int do_show_or_flush(int argc, char **argv, int flush) fflush(stdout); } } + printf("*** Flush not complete bailing out after %d rounds\n", + MAX_ROUNDS); + return 1; } if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) { diff --git a/ip/ipntable.c b/ip/ipntable.c new file mode 100644 index 0000000..5655d93 --- /dev/null +++ b/ip/ipntable.c @@ -0,0 +1,657 @@ +/* + * Copyright (C)2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * based on ipneigh.c + */ +/* + * Authors: + * Masahide NAKAMURA @USAGI + */ + +#include +#include +#include +#include +#include + +#include "utils.h" +#include "ip_common.h" + +static struct +{ + int family; + int index; +#define NONE_DEV (-1) + char name[1024]; +} filter; + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, + "Usage: ip ntable change name NAME [ dev DEV ]\n" + " [ thresh1 VAL ] [ thresh2 VAL ] [ thresh3 VAL ] [ gc_int MSEC ]\n" + " [ PARMS ]\n" + "Usage: ip ntable show [ dev DEV ] [ name NAME ]\n" + + "PARMS := [ base_reachable MSEC ] [ retrans MSEC ] [ gc_stale MSEC ]\n" + " [ delay_probe MSEC ] [ queue LEN ]\n" + " [ app_probs VAL ] [ ucast_probes VAL ] [ mcast_probes VAL ]\n" + " [ anycast_delay MSEC ] [ proxy_delay MSEC ] [ proxy_queue LEN ]\n" + " [ locktime MSEC ]\n" + ); + + exit(-1); +} + +static int ipntable_modify(int cmd, int flags, int argc, char **argv) +{ + struct { + struct nlmsghdr n; + struct ndtmsg ndtm; + char buf[1024]; + } req; + char *namep = NULL; + char *threshsp = NULL; + char *gc_intp = NULL; + char parms_buf[1024]; + struct rtattr *parms_rta = (struct rtattr *)parms_buf; + int parms_change = 0; + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg)); + req.n.nlmsg_flags = NLM_F_REQUEST|flags; + req.n.nlmsg_type = cmd; + + req.ndtm.ndtm_family = preferred_family; + req.ndtm.ndtm_pad1 = 0; + req.ndtm.ndtm_pad2 = 0; + + memset(&parms_buf, 0, sizeof(parms_buf)); + + parms_rta->rta_type = NDTA_PARMS; + parms_rta->rta_len = RTA_LENGTH(0); + + while (argc > 0) { + if (strcmp(*argv, "name") == 0) { + int len; + + NEXT_ARG(); + if (namep) + duparg("NAME", *argv); + + namep = *argv; + len = strlen(namep) + 1; + addattr_l(&req.n, sizeof(req), NDTA_NAME, namep, len); + } else if (strcmp(*argv, "thresh1") == 0) { + __u32 thresh1; + + NEXT_ARG(); + threshsp = *argv; + + if (get_u32(&thresh1, *argv, 0)) + invarg("\"thresh1\" value is invalid", *argv); + + addattr32(&req.n, sizeof(req), NDTA_THRESH1, thresh1); + } else if (strcmp(*argv, "thresh2") == 0) { + __u32 thresh2; + + NEXT_ARG(); + threshsp = *argv; + + if (get_u32(&thresh2, *argv, 0)) + invarg("\"thresh2\" value is invalid", *argv); + + addattr32(&req.n, sizeof(req), NDTA_THRESH2, thresh2); + } else if (strcmp(*argv, "thresh3") == 0) { + __u32 thresh3; + + NEXT_ARG(); + threshsp = *argv; + + if (get_u32(&thresh3, *argv, 0)) + invarg("\"thresh3\" value is invalid", *argv); + + addattr32(&req.n, sizeof(req), NDTA_THRESH3, thresh3); + } else if (strcmp(*argv, "gc_int") == 0) { + __u64 gc_int; + + NEXT_ARG(); + gc_intp = *argv; + + if (get_u64(&gc_int, *argv, 0)) + invarg("\"gc_int\" value is invalid", *argv); + + addattr_l(&req.n, sizeof(req), NDTA_GC_INTERVAL, + &gc_int, sizeof(gc_int)); + } else if (strcmp(*argv, "dev") == 0) { + __u32 ifindex; + + NEXT_ARG(); + ifindex = ll_name_to_index(*argv); + if (ifindex == 0) { + fprintf(stderr, "Cannot find device \"%s\"\n", *argv); + return -1; + } + + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_IFINDEX, ifindex); + } else if (strcmp(*argv, "base_reachable") == 0) { + __u64 breachable; + + NEXT_ARG(); + + if (get_u64(&breachable, *argv, 0)) + invarg("\"base_reachable\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_BASE_REACHABLE_TIME, + &breachable, sizeof(breachable)); + parms_change = 1; + } else if (strcmp(*argv, "retrans") == 0) { + __u64 retrans; + + NEXT_ARG(); + + if (get_u64(&retrans, *argv, 0)) + invarg("\"retrans\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_RETRANS_TIME, + &retrans, sizeof(retrans)); + parms_change = 1; + } else if (strcmp(*argv, "gc_stale") == 0) { + __u64 gc_stale; + + NEXT_ARG(); + + if (get_u64(&gc_stale, *argv, 0)) + invarg("\"gc_stale\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_GC_STALETIME, + &gc_stale, sizeof(gc_stale)); + parms_change = 1; + } else if (strcmp(*argv, "delay_probe") == 0) { + __u64 delay_probe; + + NEXT_ARG(); + + if (get_u64(&delay_probe, *argv, 0)) + invarg("\"delay_probe\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_DELAY_PROBE_TIME, + &delay_probe, sizeof(delay_probe)); + parms_change = 1; + } else if (strcmp(*argv, "queue") == 0) { + __u32 queue; + + NEXT_ARG(); + + if (get_u32(&queue, *argv, 0)) + invarg("\"queue\" value is invalid", *argv); + + if (!parms_rta) + parms_rta = (struct rtattr *)&parms_buf; + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_QUEUE_LEN, queue); + parms_change = 1; + } else if (strcmp(*argv, "app_probes") == 0) { + __u32 aprobe; + + NEXT_ARG(); + + if (get_u32(&aprobe, *argv, 0)) + invarg("\"app_probes\" value is invalid", *argv); + + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_APP_PROBES, aprobe); + parms_change = 1; + } else if (strcmp(*argv, "ucast_probes") == 0) { + __u32 uprobe; + + NEXT_ARG(); + + if (get_u32(&uprobe, *argv, 0)) + invarg("\"ucast_probes\" value is invalid", *argv); + + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_UCAST_PROBES, uprobe); + parms_change = 1; + } else if (strcmp(*argv, "mcast_probes") == 0) { + __u32 mprobe; + + NEXT_ARG(); + + if (get_u32(&mprobe, *argv, 0)) + invarg("\"mcast_probes\" value is invalid", *argv); + + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_MCAST_PROBES, mprobe); + parms_change = 1; + } else if (strcmp(*argv, "anycast_delay") == 0) { + __u64 anycast_delay; + + NEXT_ARG(); + + if (get_u64(&anycast_delay, *argv, 0)) + invarg("\"anycast_delay\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_ANYCAST_DELAY, + &anycast_delay, sizeof(anycast_delay)); + parms_change = 1; + } else if (strcmp(*argv, "proxy_delay") == 0) { + __u64 proxy_delay; + + NEXT_ARG(); + + if (get_u64(&proxy_delay, *argv, 0)) + invarg("\"proxy_delay\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_PROXY_DELAY, + &proxy_delay, sizeof(proxy_delay)); + parms_change = 1; + } else if (strcmp(*argv, "proxy_queue") == 0) { + __u32 pqueue; + + NEXT_ARG(); + + if (get_u32(&pqueue, *argv, 0)) + invarg("\"proxy_queue\" value is invalid", *argv); + + rta_addattr32(parms_rta, sizeof(parms_buf), + NDTPA_PROXY_QLEN, pqueue); + parms_change = 1; + } else if (strcmp(*argv, "locktime") == 0) { + __u64 locktime; + + NEXT_ARG(); + + if (get_u64(&locktime, *argv, 0)) + invarg("\"locktime\" value is invalid", *argv); + + rta_addattr_l(parms_rta, sizeof(parms_buf), + NDTPA_LOCKTIME, + &locktime, sizeof(locktime)); + parms_change = 1; + } else { + invarg("unknown", *argv); + } + + argc--; argv++; + } + + if (!namep) + missarg("NAME"); + if (!threshsp && !gc_intp && !parms_change) { + fprintf(stderr, "Not enough information: changable attributes required.\n"); + exit(-1); + } + + if (parms_rta->rta_len > RTA_LENGTH(0)) { + addattr_l(&req.n, sizeof(req), NDTA_PARMS, RTA_DATA(parms_rta), + RTA_PAYLOAD(parms_rta)); + } + + if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0) + exit(2); + + return 0; +} + +static const char *ntable_strtime_delta(__u32 msec) +{ + static char str[32]; + struct timeval now; + time_t t; + struct tm *tp; + + if (msec == 0) + goto error; + + memset(&now, 0, sizeof(now)); + + if (gettimeofday(&now, NULL) < 0) { + perror("gettimeofday"); + goto error; + } + + t = now.tv_sec - (msec / 1000); + tp = localtime(&t); + if (!tp) + goto error; + + strftime(str, sizeof(str), "%Y-%m-%d %T", tp); + + return str; + error: + strcpy(str, "(error)"); + return str; +} + +int print_ntable(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) +{ + FILE *fp = (FILE*)arg; + struct ndtmsg *ndtm = NLMSG_DATA(n); + int len = n->nlmsg_len; + struct rtattr *tb[NDTA_MAX+1]; + struct rtattr *tpb[NDTPA_MAX+1]; + int ret; + + if (n->nlmsg_type != RTM_NEWNEIGHTBL) { + fprintf(stderr, "Not NEIGHTBL: %08x %08x %08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + return 0; + } + len -= NLMSG_LENGTH(sizeof(*ndtm)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + if (preferred_family && preferred_family != ndtm->ndtm_family) + return 0; + + parse_rtattr(tb, NDTA_MAX, NDTA_RTA(ndtm), + n->nlmsg_len - NLMSG_LENGTH(sizeof(*ndtm))); + + if (tb[NDTA_NAME]) { + char *name = RTA_DATA(tb[NDTA_NAME]); + + if (strlen(filter.name) > 0 && strcmp(filter.name, name)) + return 0; + } + if (tb[NDTA_PARMS]) { + parse_rtattr(tpb, NDTPA_MAX, RTA_DATA(tb[NDTA_PARMS]), + RTA_PAYLOAD(tb[NDTA_PARMS])); + + if (tpb[NDTPA_IFINDEX]) { + __u32 ifindex = *(__u32 *)RTA_DATA(tpb[NDTPA_IFINDEX]); + + if (filter.index && filter.index != ifindex) + return 0; + } else { + if (filter.index && filter.index != NONE_DEV) + return 0; + } + } + + if (ndtm->ndtm_family == AF_INET) + fprintf(fp, "inet "); + else if (ndtm->ndtm_family == AF_INET6) + fprintf(fp, "inet6 "); + else if (ndtm->ndtm_family == AF_DECnet) + fprintf(fp, "dnet "); + else + fprintf(fp, "(%d) ", ndtm->ndtm_family); + + if (tb[NDTA_NAME]) { + char *name = RTA_DATA(tb[NDTA_NAME]); + fprintf(fp, "%s ", name); + } + + fprintf(fp, "%s", _SL_); + + ret = (tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || tb[NDTA_THRESH3] || + tb[NDTA_GC_INTERVAL]); + if (ret) + fprintf(fp, " "); + + if (tb[NDTA_THRESH1]) { + __u32 thresh1 = *(__u32 *)RTA_DATA(tb[NDTA_THRESH1]); + fprintf(fp, "thresh1 %u ", thresh1); + } + if (tb[NDTA_THRESH2]) { + __u32 thresh2 = *(__u32 *)RTA_DATA(tb[NDTA_THRESH2]); + fprintf(fp, "thresh2 %u ", thresh2); + } + if (tb[NDTA_THRESH3]) { + __u32 thresh3 = *(__u32 *)RTA_DATA(tb[NDTA_THRESH3]); + fprintf(fp, "thresh3 %u ", thresh3); + } + if (tb[NDTA_GC_INTERVAL]) { + __u64 gc_int = *(__u64 *)RTA_DATA(tb[NDTA_GC_INTERVAL]); + fprintf(fp, "gc_int %llu ", gc_int); + } + + if (ret) + fprintf(fp, "%s", _SL_); + + if (tb[NDTA_CONFIG] && show_stats) { + struct ndt_config *ndtc = RTA_DATA(tb[NDTA_CONFIG]); + + fprintf(fp, " "); + fprintf(fp, "config "); + + fprintf(fp, "key_len %u ", ndtc->ndtc_key_len); + fprintf(fp, "entry_size %u ", ndtc->ndtc_entry_size); + fprintf(fp, "entries %u ", ndtc->ndtc_entries); + + fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + fprintf(fp, "last_flush %s ", + ntable_strtime_delta(ndtc->ndtc_last_flush)); + fprintf(fp, "last_rand %s ", + ntable_strtime_delta(ndtc->ndtc_last_rand)); + + fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + fprintf(fp, "hash_rnd %u ", ndtc->ndtc_hash_rnd); + fprintf(fp, "hash_mask %08x ", ndtc->ndtc_hash_mask); + + fprintf(fp, "hash_chain_gc %u ", ndtc->ndtc_hash_chain_gc); + fprintf(fp, "proxy_qlen %u ", ndtc->ndtc_proxy_qlen); + + fprintf(fp, "%s", _SL_); + } + + if (tb[NDTA_PARMS]) { + if (tpb[NDTPA_IFINDEX]) { + __u32 ifindex = *(__u32 *)RTA_DATA(tpb[NDTPA_IFINDEX]); + + fprintf(fp, " "); + fprintf(fp, "dev %s ", ll_index_to_name(ifindex)); + fprintf(fp, "%s", _SL_); + } + + fprintf(fp, " "); + + if (tpb[NDTPA_REFCNT]) { + __u32 refcnt = *(__u32 *)RTA_DATA(tpb[NDTPA_REFCNT]); + fprintf(fp, "refcnt %u ", refcnt); + } + if (tpb[NDTPA_REACHABLE_TIME]) { + __u64 reachable = *(__u64 *)RTA_DATA(tpb[NDTPA_REACHABLE_TIME]); + fprintf(fp, "reachable %llu ", reachable); + } + if (tpb[NDTPA_BASE_REACHABLE_TIME]) { + __u64 breachable = *(__u64 *)RTA_DATA(tpb[NDTPA_BASE_REACHABLE_TIME]); + fprintf(fp, "base_reachable %llu ", breachable); + } + if (tpb[NDTPA_RETRANS_TIME]) { + __u64 retrans = *(__u64 *)RTA_DATA(tpb[NDTPA_RETRANS_TIME]); + fprintf(fp, "retrans %llu ", retrans); + } + + fprintf(fp, "%s", _SL_); + + fprintf(fp, " "); + + if (tpb[NDTPA_GC_STALETIME]) { + __u64 gc_stale = *(__u64 *)RTA_DATA(tpb[NDTPA_GC_STALETIME]); + fprintf(fp, "gc_stale %llu ", gc_stale); + } + if (tpb[NDTPA_DELAY_PROBE_TIME]) { + __u64 delay_probe = *(__u64 *)RTA_DATA(tpb[NDTPA_DELAY_PROBE_TIME]); + fprintf(fp, "delay_probe %llu ", delay_probe); + } + if (tpb[NDTPA_QUEUE_LEN]) { + __u32 queue = *(__u32 *)RTA_DATA(tpb[NDTPA_QUEUE_LEN]); + fprintf(fp, "queue %u ", queue); + } + + fprintf(fp, "%s", _SL_); + + fprintf(fp, " "); + + if (tpb[NDTPA_APP_PROBES]) { + __u32 aprobe = *(__u32 *)RTA_DATA(tpb[NDTPA_APP_PROBES]); + fprintf(fp, "app_probes %u ", aprobe); + } + if (tpb[NDTPA_UCAST_PROBES]) { + __u32 uprobe = *(__u32 *)RTA_DATA(tpb[NDTPA_UCAST_PROBES]); + fprintf(fp, "ucast_probes %u ", uprobe); + } + if (tpb[NDTPA_MCAST_PROBES]) { + __u32 mprobe = *(__u32 *)RTA_DATA(tpb[NDTPA_MCAST_PROBES]); + fprintf(fp, "mcast_probes %u ", mprobe); + } + + fprintf(fp, "%s", _SL_); + + fprintf(fp, " "); + + if (tpb[NDTPA_ANYCAST_DELAY]) { + __u64 anycast_delay = *(__u64 *)RTA_DATA(tpb[NDTPA_ANYCAST_DELAY]); + fprintf(fp, "anycast_delay %llu ", anycast_delay); + } + if (tpb[NDTPA_PROXY_DELAY]) { + __u64 proxy_delay = *(__u64 *)RTA_DATA(tpb[NDTPA_PROXY_DELAY]); + fprintf(fp, "proxy_delay %llu ", proxy_delay); + } + if (tpb[NDTPA_PROXY_QLEN]) { + __u32 pqueue = *(__u32 *)RTA_DATA(tpb[NDTPA_PROXY_QLEN]); + fprintf(fp, "proxy_queue %u ", pqueue); + } + if (tpb[NDTPA_LOCKTIME]) { + __u64 locktime = *(__u64 *)RTA_DATA(tpb[NDTPA_LOCKTIME]); + fprintf(fp, "locktime %llu ", locktime); + } + + fprintf(fp, "%s", _SL_); + } + + if (tb[NDTA_STATS] && show_stats) { + struct ndt_stats *ndts = RTA_DATA(tb[NDTA_STATS]); + + fprintf(fp, " "); + fprintf(fp, "stats "); + + fprintf(fp, "allocs %llu ", ndts->ndts_allocs); + fprintf(fp, "destroys %llu ", ndts->ndts_destroys); + fprintf(fp, "hash_grows %llu ", ndts->ndts_hash_grows); + + fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + fprintf(fp, "res_failed %llu ", ndts->ndts_res_failed); + fprintf(fp, "lookups %llu ", ndts->ndts_lookups); + fprintf(fp, "hits %llu ", ndts->ndts_hits); + + fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + fprintf(fp, "rcv_probes_mcast %llu ", ndts->ndts_rcv_probes_mcast); + fprintf(fp, "rcv_probes_ucast %llu ", ndts->ndts_rcv_probes_ucast); + + fprintf(fp, "%s", _SL_); + fprintf(fp, " "); + + fprintf(fp, "periodic_gc_runs %llu ", ndts->ndts_periodic_gc_runs); + fprintf(fp, "forced_gc_runs %llu ", ndts->ndts_forced_gc_runs); + + fprintf(fp, "%s", _SL_); + } + + fprintf(fp, "\n"); + + fflush(fp); + return 0; +} + +void ipntable_reset_filter(void) +{ + memset(&filter, 0, sizeof(filter)); +} + +static int ipntable_show(int argc, char **argv) +{ + ipntable_reset_filter(); + + filter.family = preferred_family; + + while (argc > 0) { + if (strcmp(*argv, "dev") == 0) { + NEXT_ARG(); + + if (strcmp("none", *argv) == 0) + filter.index = NONE_DEV; + else if ((filter.index = ll_name_to_index(*argv)) == 0) + invarg("\"DEV\" is invalid", *argv); + } else if (strcmp(*argv, "name") == 0) { + NEXT_ARG(); + + strncpy(filter.name, *argv, sizeof(filter.name)); + } else + invarg("unknown", *argv); + + argc--; argv++; + } + + if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETNEIGHTBL) < 0) { + perror("Cannot send dump request"); + exit(1); + } + + if (rtnl_dump_filter(&rth, print_ntable, stdout, NULL, NULL) < 0) { + fprintf(stderr, "Dump terminated\n"); + exit(1); + } + + return 0; +} + +int do_ipntable(int argc, char **argv) +{ + ll_init_map(&rth); + + if (argc > 0) { + if (matches(*argv, "change") == 0 || + matches(*argv, "chg") == 0) + return ipntable_modify(RTM_SETNEIGHTBL, + NLM_F_REPLACE, + argc-1, argv+1); + if (matches(*argv, "show") == 0 || + matches(*argv, "lst") == 0 || + matches(*argv, "list") == 0) + return ipntable_show(argc-1, argv+1); + if (matches(*argv, "help") == 0) + usage(); + } else + return ipntable_show(0, NULL); + + fprintf(stderr, "Command \"%s\" is unknown, try \"ip ntable help\".\n", *argv); + exit(-1); +} diff --git a/ip/iproute.c b/ip/iproute.c index 1e23e49..a43c09e 100644 --- a/ip/iproute.c +++ b/ip/iproute.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "rt_names.h" #include "utils.h" @@ -53,6 +54,7 @@ static void usage(void) fprintf(stderr, "NODE_SPEC := [ TYPE ] PREFIX [ tos TOS ]\n"); fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n"); fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n"); + fprintf(stderr, " [ mpath MP_ALGO ]\n"); fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n"); fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n"); fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n"); @@ -64,6 +66,7 @@ static void usage(void) fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n"); fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n"); fprintf(stderr, "FLAGS := [ equalize ]\n"); + fprintf(stderr, "MP_ALGO := { rr | drr | random | wrandom }\n"); fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n"); fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n"); exit(-1); @@ -77,7 +80,6 @@ static struct char *flushb; int flushp; int flushe; - struct rtnl_handle *rth; int protocol, protocolmask; int scope, scopemask; int type, typemask; @@ -93,9 +95,17 @@ static struct inet_prefix msrc; } filter; +static char *mp_alg_names[IP_MP_ALG_MAX+1] = { + [IP_MP_ALG_NONE] = "none", + [IP_MP_ALG_RR] = "rr", + [IP_MP_ALG_DRR] = "drr", + [IP_MP_ALG_RANDOM] = "random", + [IP_MP_ALG_WRANDOM] = "wrandom" +}; + static int flush_update(void) { - if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) { + if (rtnl_send(&rth, filter.flushb, filter.flushp) < 0) { perror("Failed to send flush request\n"); return -1; } @@ -206,13 +216,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) memset(&via, 0, sizeof(via)); via.family = r->rtm_family; if (tb[RTA_GATEWAY]) - memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len); + memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8); } if (filter.rprefsrc.bitlen>0) { memset(&prefsrc, 0, sizeof(prefsrc)); prefsrc.family = r->rtm_family; if (tb[RTA_PREFSRC]) - memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len); + memcpy(&prefsrc.data, RTA_DATA(tb[RTA_PREFSRC]), host_len/8); } if (filter.rdst.family && inet_addr_match(&dst, &filter.rdst, filter.rdst.bitlen)) @@ -270,7 +280,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) memcpy(fn, n, n->nlmsg_len); fn->nlmsg_type = RTM_DELROUTE; fn->nlmsg_flags = NLM_F_REQUEST; - fn->nlmsg_seq = ++filter.rth->seq; + fn->nlmsg_seq = ++rth.seq; filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb; filter.flushed++; if (show_stats < 2) @@ -324,6 +334,15 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) SPRINT_BUF(b1); fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1))); } + + if (tb[RTA_MP_ALGO]) { + __u32 mp_alg = *(__u32*) RTA_DATA(tb[RTA_MP_ALGO]); + if (mp_alg > IP_MP_ALG_NONE) { + fprintf(fp, "mpath %s ", + mp_alg < IP_MP_ALG_MAX ? mp_alg_names[mp_alg] : "unknown"); + } + } + if (tb[RTA_GATEWAY] && filter.rvia.bitlen != host_len) { fprintf(fp, "via %s ", format_host(r->rtm_family, @@ -616,9 +635,6 @@ int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv) } memset(rtnh, 0, sizeof(*rtnh)); rtnh->rtnh_len = sizeof(*rtnh); - rtnh->rtnh_ifindex = 0; - rtnh->rtnh_flags = 0; - rtnh->rtnh_hops = 0; rta->rta_len += rtnh->rtnh_len; parse_one_nh(rta, rtnh, &argc, &argv); rtnh = RTNH_NEXT(rtnh); @@ -632,7 +648,6 @@ int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv) int iproute_modify(int cmd, unsigned flags, int argc, char **argv) { - struct rtnl_handle rth; struct { struct nlmsghdr n; struct rtmsg r; @@ -709,7 +724,7 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv) invarg("\"metric\" value is invalid\n", *argv); addattr32(&req.n, sizeof(req), RTA_PRIORITY, metric); } else if (strcmp(*argv, "scope") == 0) { - int scope = 0; + __u32 scope = 0; NEXT_ARG(); if (rtnl_rtscope_a2n(&scope, *argv)) invarg("invalid \"scope\" value\n", *argv); @@ -814,14 +829,14 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv) nhs_ok = 1; break; } else if (matches(*argv, "protocol") == 0) { - int prot; + __u32 prot; NEXT_ARG(); if (rtnl_rtprot_a2n(&prot, *argv)) invarg("\"protocol\" value is invalid\n", *argv); req.r.rtm_protocol = prot; proto_ok =1; } else if (matches(*argv, "table") == 0) { - int tid; + __u32 tid; NEXT_ARG(); if (rtnl_rttable_a2n(&tid, *argv)) invarg("\"table\" value is invalid\n", *argv); @@ -831,6 +846,18 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv) strcmp(*argv, "oif") == 0) { NEXT_ARG(); d = *argv; + } else if (strcmp(*argv, "mpath") == 0 || + strcmp(*argv, "mp") == 0) { + int i; + __u32 mp_alg = IP_MP_ALG_NONE; + + NEXT_ARG(); + for (i = 1; i < ARRAY_SIZE(mp_alg_names); i++) + if (strcmp(*argv, mp_alg_names[i]) == 0) + mp_alg = i; + if (mp_alg == IP_MP_ALG_NONE) + invarg("\"mpath\" value is invalid\n", *argv); + addattr_l(&req.n, sizeof(req), RTA_MP_ALGO, &mp_alg, sizeof(mp_alg)); } else { int type; inet_prefix dst; @@ -860,9 +887,6 @@ int iproute_modify(int cmd, unsigned flags, int argc, char **argv) argc--; argv++; } - if (rtnl_open(&rth, 0) < 0) - exit(1); - if (d || nhs_ok) { int idx; @@ -969,7 +993,6 @@ static int iproute_flush_cache(void) static int iproute_list_or_flush(int argc, char **argv, int flush) { int do_ipv6 = preferred_family; - struct rtnl_handle rth; char *id = NULL; char *od = NULL; @@ -983,7 +1006,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush) while (argc > 0) { if (matches(*argv, "table") == 0) { - int tid; + __u32 tid; NEXT_ARG(); if (rtnl_rttable_a2n(&tid, *argv)) { if (strcmp(*argv, "all") == 0) { @@ -1009,7 +1032,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush) filter.tos = tos; filter.tosmask = -1; } else if (matches(*argv, "protocol") == 0) { - int prot = 0; + __u32 prot = 0; NEXT_ARG(); filter.protocolmask = -1; if (rtnl_rtprot_a2n(&prot, *argv)) { @@ -1020,7 +1043,7 @@ static int iproute_list_or_flush(int argc, char **argv, int flush) } filter.protocol = prot; } else if (matches(*argv, "scope") == 0) { - int scope = 0; + __u32 scope = 0; NEXT_ARG(); filter.scopemask = -1; if (rtnl_rtscope_a2n(&scope, *argv)) { @@ -1103,9 +1126,6 @@ static int iproute_list_or_flush(int argc, char **argv, int flush) if (do_ipv6 == AF_UNSPEC && filter.tb) do_ipv6 = AF_INET; - if (rtnl_open(&rth, 0) < 0) - exit(1); - ll_init_map(&rth); if (id || od) { @@ -1147,7 +1167,6 @@ static int iproute_list_or_flush(int argc, char **argv, int flush) filter.flushb = flushb; filter.flushp = 0; filter.flushe = sizeof(flushb); - filter.rth = &rth; for (;;) { if (rtnl_wilddump_request(&rth, do_ipv6, RTM_GETROUTE) < 0) { @@ -1208,7 +1227,6 @@ static int iproute_list_or_flush(int argc, char **argv, int flush) int iproute_get(int argc, char **argv) { - struct rtnl_handle rth; struct { struct nlmsghdr n; struct rtmsg r; @@ -1288,9 +1306,6 @@ int iproute_get(int argc, char **argv) exit(1); } - if (rtnl_open(&rth, 0) < 0) - exit(1); - ll_init_map(&rth); if (idev || odev) { diff --git a/ip/iprule.c b/ip/iprule.c index 764edc8..ccf699f 100644 --- a/ip/iprule.c +++ b/ip/iprule.c @@ -28,6 +28,8 @@ #include "rt_names.h" #include "utils.h" +extern struct rtnl_handle rth; + static void usage(void) __attribute__((noreturn)); static void usage(void) @@ -161,7 +163,6 @@ static int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, static int iprule_list(int argc, char **argv) { - struct rtnl_handle rth; int af = preferred_family; if (af == AF_UNSPEC) @@ -172,9 +173,6 @@ static int iprule_list(int argc, char **argv) return -1; } - if (rtnl_open(&rth, 0) < 0) - return 1; - if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) { perror("Cannot send dump request"); return 1; @@ -192,7 +190,6 @@ static int iprule_list(int argc, char **argv) static int iprule_modify(int cmd, int argc, char **argv) { int table_ok = 0; - struct rtnl_handle rth; struct { struct nlmsghdr n; struct rtmsg r; @@ -256,7 +253,7 @@ static int iprule_modify(int cmd, int argc, char **argv) addattr32(&req.n, sizeof(req), RTA_FLOW, realm); } else if (matches(*argv, "table") == 0 || strcmp(*argv, "lookup") == 0) { - int tid; + __u32 tid; NEXT_ARG(); if (rtnl_rttable_a2n(&tid, *argv)) invarg("invalid table ID\n", *argv); @@ -294,9 +291,6 @@ static int iprule_modify(int cmd, int argc, char **argv) if (!table_ok && cmd == RTM_NEWRULE) req.r.rtm_table = RT_TABLE_MAIN; - if (rtnl_open(&rth, 0) < 0) - return 1; - if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0) return 2; @@ -306,7 +300,7 @@ static int iprule_modify(int cmd, int argc, char **argv) static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg) { - struct rtnl_handle rth; + struct rtnl_handle rth2; struct rtmsg *r = NLMSG_DATA(n); int len = n->nlmsg_len; struct rtattr * tb[RTA_MAX+1]; @@ -321,11 +315,13 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *a n->nlmsg_type = RTM_DELRULE; n->nlmsg_flags = NLM_F_REQUEST; - if (rtnl_open(&rth, 0) < 0) + if (rtnl_open(&rth2, 0) < 0) return -1; - if (rtnl_talk(&rth, n, 0, 0, NULL, NULL, NULL) < 0) + if (rtnl_talk(&rth2, n, 0, 0, NULL, NULL, NULL) < 0) return -2; + + rtnl_close(&rth2); } return 0; @@ -333,20 +329,16 @@ static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *a static int iprule_flush(int argc, char **argv) { - struct rtnl_handle rth; int af = preferred_family; if (af == AF_UNSPEC) af = AF_INET; if (argc > 0) { - fprintf(stderr, "\"ip rule flush\" need not any arguments.\n"); + fprintf(stderr, "\"ip rule flush\" does not allow arguments\n"); return -1; } - if (rtnl_open(&rth, 0) < 0) - return 1; - if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) { perror("Cannot send dump request"); return 1; diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c index fc0f0d9..8baaabd 100644 --- a/ip/ipxfrm.c +++ b/ip/ipxfrm.c @@ -40,6 +40,19 @@ #include "utils.h" #include "xfrm.h" +#define STRBUF_SIZE (128) +#define STRBUF_CAT(buf, str) \ + do { \ + int rest = sizeof(buf) - 1 - strlen(buf); \ + if (rest > 0) { \ + int len = strlen(str); \ + if (len > rest) \ + len = rest; \ + strncat(buf, str, len); \ + buf[sizeof(buf) - 1] = '\0'; \ + } \ + } while(0); + struct xfrm_filter filter; static void usage(void) __attribute__((noreturn)); @@ -48,7 +61,7 @@ static void usage(void) { fprintf(stderr, "Usage: ip xfrm XFRM_OBJECT { COMMAND | help }\n" - "where XFRM_OBJECT := { state | policy }\n"); + "where XFRM_OBJECT := { state | policy | monitor }\n"); exit(-1); } @@ -227,12 +240,12 @@ const char *strxf_proto(__u8 proto) void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id, __u8 mode, __u32 reqid, __u16 family, int force_spi, - FILE *fp, const char *prefix) + FILE *fp, const char *prefix, const char *title) { char abuf[256]; - if (prefix) - fprintf(fp, prefix); + if (title) + fprintf(fp, title); memset(abuf, '\0', sizeof(abuf)); fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr), @@ -248,7 +261,6 @@ void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id, fprintf(fp, "proto %s ", strxf_xfrmproto(id->proto)); - if (show_stats > 0 || force_spi || id->spi) { __u32 spi = ntohl(id->spi); fprintf(fp, "spi 0x%08x", spi); @@ -433,6 +445,7 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family, case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: + case IPPROTO_DCCP: default: /* XXX */ if (sel->sport_mask) fprintf(fp, "sport %u ", ntohs(sel->sport)); @@ -519,9 +532,8 @@ static void xfrm_tmpl_print(struct xfrm_user_tmpl *tmpls, int len, if (prefix) fprintf(fp, prefix); - fprintf(fp, "tmpl"); xfrm_id_info_print(&tmpl->saddr, &tmpl->id, tmpl->mode, - tmpl->reqid, family, 0, fp, prefix); + tmpl->reqid, family, 0, fp, prefix, "tmpl "); if (show_stats > 0 || tmpl->optional) { if (prefix) @@ -630,6 +642,125 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, } } +static int xfrm_selector_iszero(struct xfrm_selector *s) +{ + struct xfrm_selector s0; + + memset(&s0, 0, sizeof(s0)); + + return (memcmp(&s0, s, sizeof(s0)) == 0); +} + +void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, + struct rtattr *tb[], FILE *fp, const char *prefix, + const char *title) +{ + char buf[STRBUF_SIZE]; + + memset(buf, '\0', sizeof(buf)); + + xfrm_id_info_print(&xsinfo->saddr, &xsinfo->id, xsinfo->mode, + xsinfo->reqid, xsinfo->family, 1, fp, prefix, + title); + + if (prefix) + STRBUF_CAT(buf, prefix); + STRBUF_CAT(buf, "\t"); + + fprintf(fp, buf); + fprintf(fp, "replay-window %u ", xsinfo->replay_window); + if (show_stats > 0) + fprintf(fp, "seq 0x%08u ", xsinfo->seq); + if (show_stats > 0 || xsinfo->flags) { + __u8 flags = xsinfo->flags; + + fprintf(fp, "flag "); + XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_NOECN, "noecn"); + XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_DECAP_DSCP, "decap-dscp"); + if (flags) + fprintf(fp, "%x", flags); + if (show_stats > 0) + fprintf(fp, " (0x%s)", strxf_mask8(flags)); + } + fprintf(fp, "%s", _SL_); + + xfrm_xfrma_print(tb, xsinfo->family, fp, buf); + + if (!xfrm_selector_iszero(&xsinfo->sel)) { + char sbuf[STRBUF_SIZE]; + + memcpy(sbuf, buf, sizeof(sbuf)); + STRBUF_CAT(sbuf, "sel "); + + xfrm_selector_print(&xsinfo->sel, xsinfo->family, fp, sbuf); + } + + if (show_stats > 0) { + xfrm_lifetime_print(&xsinfo->lft, &xsinfo->curlft, fp, buf); + xfrm_stats_print(&xsinfo->stats, fp, buf); + } +} + +void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo, + struct rtattr *tb[], FILE *fp, const char *prefix, + const char *title) +{ + char buf[STRBUF_SIZE]; + + memset(buf, '\0', sizeof(buf)); + + xfrm_selector_print(&xpinfo->sel, preferred_family, fp, title); + + if (prefix) + STRBUF_CAT(buf, prefix); + STRBUF_CAT(buf, "\t"); + + fprintf(fp, buf); + fprintf(fp, "dir "); + switch (xpinfo->dir) { + case XFRM_POLICY_IN: + fprintf(fp, "in"); + break; + case XFRM_POLICY_OUT: + fprintf(fp, "out"); + break; + case XFRM_POLICY_FWD: + fprintf(fp, "fwd"); + break; + default: + fprintf(fp, "%u", xpinfo->dir); + break; + } + fprintf(fp, " "); + + switch (xpinfo->action) { + case XFRM_POLICY_ALLOW: + if (show_stats > 0) + fprintf(fp, "action allow "); + break; + case XFRM_POLICY_BLOCK: + fprintf(fp, "action block "); + break; + default: + fprintf(fp, "action %u ", xpinfo->action); + break; + } + + if (show_stats) + fprintf(fp, "index %u ", xpinfo->index); + fprintf(fp, "priority %u ", xpinfo->priority); + if (show_stats > 0) { + fprintf(fp, "share %s ", strxf_share(xpinfo->share)); + fprintf(fp, "flag 0x%s", strxf_mask8(xpinfo->flags)); + } + fprintf(fp, "%s", _SL_); + + if (show_stats > 0) + xfrm_lifetime_print(&xpinfo->lft, &xpinfo->curlft, fp, buf); + + xfrm_xfrma_print(tb, xpinfo->sel.family, fp, buf); +} + int xfrm_id_parse(xfrm_address_t *saddr, struct xfrm_id *id, __u16 *family, int loose, int *argcp, char ***argvp) { @@ -869,6 +1000,7 @@ static int xfrm_selector_upspec_parse(struct xfrm_selector *sel, case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: + case IPPROTO_DCCP: break; default: fprintf(stderr, "\"sport\" and \"dport\" are invalid with proto=%s\n", strxf_proto(sel->proto)); @@ -1038,10 +1170,12 @@ int do_xfrm(int argc, char **argv) usage(); if (matches(*argv, "state") == 0 || - matches(*argv, "sa") == 0) { + matches(*argv, "sa") == 0) return do_xfrm_state(argc-1, argv+1); - } else if (matches(*argv, "policy") == 0) + else if (matches(*argv, "policy") == 0) return do_xfrm_policy(argc-1, argv+1); + else if (matches(*argv, "monitor") == 0) + return do_xfrm_monitor(argc-1, argv+1); else if (matches(*argv, "help") == 0) { usage(); fprintf(stderr, "xfrm Object \"%s\" is unknown.\n", *argv); diff --git a/ip/xfrm.h b/ip/xfrm.h index fa551b1..4833b36 100644 --- a/ip/xfrm.h +++ b/ip/xfrm.h @@ -32,6 +32,9 @@ #ifndef IPPROTO_SCTP # define IPPROTO_SCTP 132 #endif +#ifndef IPPPROTO_DCCP +# define IPPROTO_DCCP 33 +#endif #define XFRMS_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_usersa_info)))) #define XFRMS_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_usersa_info)) @@ -39,6 +42,16 @@ #define XFRMP_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_info)))) #define XFRMP_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_userpoilcy_info)) +#define XFRMSID_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_usersa_id)))) +#define XFRMSID_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_usersa_id)) + +#define XFRMPID_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)))) +#define XFRMPID_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct xfrm_userpoilcy_id)) + +#define XFRMACQ_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)))) +#define XFRMEXP_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_user_expire)))) +#define XFRMPEXP_RTA(x) ((struct rtattr*)(((char*)(x)) + NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)))) + #define XFRM_FLAG_PRINT(fp, flags, f, s) \ do { \ if (flags & f) { \ @@ -84,8 +97,13 @@ struct xfrm_filter { extern struct xfrm_filter filter; +int xfrm_state_print(const struct sockaddr_nl *who, struct nlmsghdr *n, + void *arg); +int xfrm_policy_print(const struct sockaddr_nl *who, struct nlmsghdr *n, + void *arg); int do_xfrm_state(int argc, char **argv); int do_xfrm_policy(int argc, char **argv); +int do_xfrm_monitor(int argc, char **argv); int xfrm_addr_match(xfrm_address_t *x1, xfrm_address_t *x2, int bits); int xfrm_xfrmproto_getbyname(char *name); @@ -98,7 +116,7 @@ const char *strxf_share(__u8 share); const char *strxf_proto(__u8 proto); void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id, __u8 mode, __u32 reqid, __u16 family, int force_spi, - FILE *fp, const char *prefix); + FILE *fp, const char *prefix, const char *title); void xfrm_stats_print(struct xfrm_stats *s, FILE *fp, const char *prefix); void xfrm_lifetime_print(struct xfrm_lifetime_cfg *cfg, struct xfrm_lifetime_cur *cur, @@ -107,6 +125,12 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family, FILE *fp, const char *prefix); void xfrm_xfrma_print(struct rtattr *tb[], __u16 family, FILE *fp, const char *prefix); +void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo, + struct rtattr *tb[], FILE *fp, const char *prefix, + const char *title); +void xfrm_policy_info_print(struct xfrm_userpolicy_info *xpinfo, + struct rtattr *tb[], FILE *fp, const char *prefix, + const char *title); int xfrm_id_parse(xfrm_address_t *saddr, struct xfrm_id *id, __u16 *family, int loose, int *argcp, char ***argvp); int xfrm_mode_parse(__u8 *mode, int *argcp, char ***argvp); diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c new file mode 100644 index 0000000..153621f --- /dev/null +++ b/ip/xfrm_monitor.c @@ -0,0 +1,218 @@ +/* $USAGI: $ */ + +/* + * Copyright (C)2005 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * based on ipmonitor.c + */ +/* + * Authors: + * Masahide NAKAMURA @USAGI + */ + +#include +#include +#include +#include +#include "utils.h" +#include "xfrm.h" +#include "ip_common.h" + +static void usage(void) __attribute__((noreturn)); + +static void usage(void) +{ + fprintf(stderr, "Usage: ip xfrm monitor [ all | LISTofOBJECTS ]\n"); + exit(-1); +} + +static int xfrm_acquire_print(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) +{ + FILE *fp = (FILE*)arg; + struct xfrm_user_acquire *xacq = NLMSG_DATA(n); + int len = n->nlmsg_len; + struct rtattr * tb[XFRMA_MAX+1]; + __u16 family; + + if (n->nlmsg_type != XFRM_MSG_ACQUIRE) { + fprintf(stderr, "Not an acquire: %08x %08x %08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + return 0; + } + + len -= NLMSG_LENGTH(sizeof(*xacq)); + if (len < 0) { + fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); + return -1; + } + + parse_rtattr(tb, XFRMA_MAX, XFRMACQ_RTA(xacq), len); + + family = xacq->sel.family; + if (family == AF_UNSPEC) + family = xacq->policy.sel.family; + if (family == AF_UNSPEC) + family = preferred_family; + + fprintf(fp, "acquire "); + + fprintf(fp, "proto %s ", strxf_xfrmproto(xacq->id.proto)); + if (show_stats > 0 || xacq->id.spi) { + __u32 spi = ntohl(xacq->id.spi); + fprintf(fp, "spi 0x%08x", spi); + if (show_stats > 0) + fprintf(fp, "(%u)", spi); + fprintf(fp, " "); + } + fprintf(fp, "%s", _SL_); + + xfrm_selector_print(&xacq->sel, family, fp, " sel "); + + xfrm_policy_info_print(&xacq->policy, tb, fp, " ", " policy "); + + if (show_stats > 0) + fprintf(fp, " seq 0x%08u ", xacq->seq); + if (show_stats > 0) { + fprintf(fp, "%s-mask %s ", + strxf_algotype(XFRMA_ALG_CRYPT), + strxf_mask32(xacq->ealgos)); + fprintf(fp, "%s-mask %s ", + strxf_algotype(XFRMA_ALG_AUTH), + strxf_mask32(xacq->aalgos)); + fprintf(fp, "%s-mask %s", + strxf_algotype(XFRMA_ALG_COMP), + strxf_mask32(xacq->calgos)); + } + fprintf(fp, "%s", _SL_); + + if (oneline) + fprintf(fp, "\n"); + fflush(fp); + + return 0; +} + +static int xfrm_accept_msg(const struct sockaddr_nl *who, + struct nlmsghdr *n, void *arg) +{ + FILE *fp = (FILE*)arg; + + if (timestamp) + print_timestamp(fp); + + if (n->nlmsg_type == XFRM_MSG_NEWSA || + n->nlmsg_type == XFRM_MSG_DELSA || + n->nlmsg_type == XFRM_MSG_UPDSA || + n->nlmsg_type == XFRM_MSG_EXPIRE) { + xfrm_state_print(who, n, arg); + return 0; + } + if (n->nlmsg_type == XFRM_MSG_NEWPOLICY || + n->nlmsg_type == XFRM_MSG_DELPOLICY || + n->nlmsg_type == XFRM_MSG_UPDPOLICY || + n->nlmsg_type == XFRM_MSG_POLEXPIRE) { + xfrm_policy_print(who, n, arg); + return 0; + } + + if (n->nlmsg_type == XFRM_MSG_ACQUIRE) { + xfrm_acquire_print(who, n, arg); + return 0; + } + if (n->nlmsg_type == XFRM_MSG_FLUSHSA) { + /* XXX: Todo: show proto in xfrm_usersa_flush */ + fprintf(fp, "Flushed state\n"); + return 0; + } + if (n->nlmsg_type == XFRM_MSG_FLUSHPOLICY) { + fprintf(fp, "Flushed policy\n"); + return 0; + } + if (n->nlmsg_type != NLMSG_ERROR && n->nlmsg_type != NLMSG_NOOP && + n->nlmsg_type != NLMSG_DONE) { + fprintf(fp, "Unknown message: %08d 0x%08x 0x%08x\n", + n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); + } + return 0; +} + +int do_xfrm_monitor(int argc, char **argv) +{ + struct rtnl_handle rth; + char *file = NULL; + unsigned groups = ~((unsigned)0); /* XXX */ + int lacquire=0; + int lexpire=0; + int lpolicy=0; + int lsa=0; + + while (argc > 0) { + if (matches(*argv, "file") == 0) { + NEXT_ARG(); + file = *argv; + } else if (matches(*argv, "acquire") == 0) { + lacquire=1; + groups = 0; + } else if (matches(*argv, "expire") == 0) { + lexpire=1; + groups = 0; + } else if (matches(*argv, "SA") == 0) { + lsa=1; + groups = 0; + } else if (matches(*argv, "policy") == 0) { + lpolicy=1; + groups = 0; + } else if (matches(*argv, "help") == 0) { + usage(); + } else { + fprintf(stderr, "Argument \"%s\" is unknown, try \"ip xfrm monitor help\".\n", *argv); + exit(-1); + } + argc--; argv++; + } + + if (lacquire) + groups |= XFRMGRP_ACQUIRE; + if (lexpire) + groups |= XFRMGRP_EXPIRE; + if (lsa) + groups |= XFRMGRP_SA; + if (lpolicy) + groups |= XFRMGRP_POLICY; + + if (file) { + FILE *fp; + fp = fopen(file, "r"); + if (fp == NULL) { + perror("Cannot fopen"); + exit(-1); + } + return rtnl_from_file(fp, xfrm_accept_msg, (void*)stdout); + } + + //ll_init_map(&rth); + + if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0) + exit(1); + + if (rtnl_listen(&rth, xfrm_accept_msg, (void*)stdout) < 0) + exit(2); + + return 0; +} diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c index c1331a4..433b513 100644 --- a/ip/xfrm_policy.c +++ b/ip/xfrm_policy.c @@ -35,8 +35,8 @@ #include "xfrm.h" #include "ip_common.h" -//#define NLMSG_FLUSH_BUF_SIZE (4096-512) -#define NLMSG_FLUSH_BUF_SIZE 8192 +//#define NLMSG_DELETEALL_BUF_SIZE (4096-512) +#define NLMSG_DELETEALL_BUF_SIZE 8192 /* * Receiving buffer defines: @@ -56,8 +56,9 @@ static void usage(void) fprintf(stderr, "Usage: ip xfrm policy { add | update } dir DIR SELECTOR [ index INDEX ] \n"); fprintf(stderr, " [ action ACTION ] [ priority PRIORITY ] [ LIMIT-LIST ] [ TMPL-LIST ]\n"); fprintf(stderr, "Usage: ip xfrm policy { delete | get } dir DIR [ SELECTOR | index INDEX ]\n"); - fprintf(stderr, "Usage: ip xfrm policy { flush | list } [ dir DIR ] [ SELECTOR ]\n"); + fprintf(stderr, "Usage: ip xfrm policy { deleteall | list } [ dir DIR ] [ SELECTOR ]\n"); fprintf(stderr, " [ index INDEX ] [ action ACTION ] [ priority PRIORITY ]\n"); + fprintf(stderr, "Usage: ip xfrm policy flush\n"); fprintf(stderr, "DIR := [ in | out | fwd ]\n"); fprintf(stderr, "SELECTOR := src ADDR[/PLEN] dst ADDR[/PLEN] [ UPSPEC ] [ dev DEV ]\n"); @@ -331,84 +332,87 @@ static int xfrm_policy_filter_match(struct xfrm_userpolicy_info *xpinfo) return 1; } -static int xfrm_policy_print(const struct sockaddr_nl *who, - struct nlmsghdr *n, void *arg) +int xfrm_policy_print(const struct sockaddr_nl *who, struct nlmsghdr *n, + void *arg) { + struct rtattr * tb[XFRMA_MAX+1]; + struct rtattr * rta; + struct xfrm_userpolicy_info *xpinfo = NULL; + struct xfrm_user_polexpire *xpexp = NULL; + struct xfrm_userpolicy_id *xpid = NULL; FILE *fp = (FILE*)arg; - struct xfrm_userpolicy_info *xpinfo = NLMSG_DATA(n); int len = n->nlmsg_len; - struct rtattr * tb[XFRMA_MAX+1]; if (n->nlmsg_type != XFRM_MSG_NEWPOLICY && - n->nlmsg_type != XFRM_MSG_DELPOLICY) { + n->nlmsg_type != XFRM_MSG_DELPOLICY && + n->nlmsg_type != XFRM_MSG_UPDPOLICY && + n->nlmsg_type != XFRM_MSG_POLEXPIRE) { fprintf(stderr, "Not a policy: %08x %08x %08x\n", n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); return 0; } - len -= NLMSG_LENGTH(sizeof(*xpinfo)); + if (n->nlmsg_type == XFRM_MSG_DELPOLICY) { + xpid = NLMSG_DATA(n); + len -= NLMSG_LENGTH(sizeof(*xpid)); + } else if (n->nlmsg_type == XFRM_MSG_POLEXPIRE) { + xpexp = NLMSG_DATA(n); + xpinfo = &xpexp->pol; + len -= NLMSG_LENGTH(sizeof(*xpexp)); + } else { + xpexp = NULL; + xpinfo = NLMSG_DATA(n); + len -= NLMSG_LENGTH(sizeof(*xpinfo)); + } + if (len < 0) { fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); return -1; } - if (!xfrm_policy_filter_match(xpinfo)) + if (xpinfo && !xfrm_policy_filter_match(xpinfo)) return 0; - parse_rtattr(tb, XFRMA_MAX, XFRMP_RTA(xpinfo), len); - if (n->nlmsg_type == XFRM_MSG_DELPOLICY) fprintf(fp, "Deleted "); + else if (n->nlmsg_type == XFRM_MSG_UPDPOLICY) + fprintf(fp, "Updated "); + else if (n->nlmsg_type == XFRM_MSG_POLEXPIRE) + fprintf(fp, "Expired "); - xfrm_selector_print(&xpinfo->sel, preferred_family, fp, NULL); - - fprintf(fp, "\t"); - fprintf(fp, "dir "); - switch (xpinfo->dir) { - case XFRM_POLICY_IN: - fprintf(fp, "in"); - break; - case XFRM_POLICY_OUT: - fprintf(fp, "out"); - break; - case XFRM_POLICY_FWD: - fprintf(fp, "fwd"); - break; - default: - fprintf(fp, "%u", xpinfo->dir); - break; - } - fprintf(fp, " "); - - switch (xpinfo->action) { - case XFRM_POLICY_ALLOW: - if (show_stats > 0) - fprintf(fp, "action allow "); - break; - case XFRM_POLICY_BLOCK: - fprintf(fp, "action block "); - break; - default: - fprintf(fp, "action %u ", xpinfo->action); - break; - } + if (n->nlmsg_type == XFRM_MSG_DELPOLICY) + rta = XFRMPID_RTA(xpid); + else if (n->nlmsg_type == XFRM_MSG_POLEXPIRE) + rta = XFRMPEXP_RTA(xpexp); + else + rta = XFRMP_RTA(xpinfo); + + parse_rtattr(tb, XFRMA_MAX, rta, len); - if (show_stats) - fprintf(fp, "index %u ", xpinfo->index); - fprintf(fp, "priority %u ", xpinfo->priority); - if (show_stats > 0) { - fprintf(fp, "share %s ", strxf_share(xpinfo->share)); - fprintf(fp, "flag 0x%s", strxf_mask8(xpinfo->flags)); + if (n->nlmsg_type == XFRM_MSG_DELPOLICY) { + //xfrm_policy_id_print(); + if (!tb[XFRMA_POLICY]) { + fprintf(stderr, "Buggy XFRM_MSG_DELPOLICY: no XFRMA_POLICY\n"); + return -1; + } + if (RTA_PAYLOAD(tb[XFRMA_POLICY]) < sizeof(*xpinfo)) { + fprintf(stderr, "Buggy XFRM_MSG_DELPOLICY: too short XFRMA_POLICY len\n"); + return -1; + } + xpinfo = (struct xfrm_userpolicy_info *)RTA_DATA(tb[XFRMA_POLICY]); } - fprintf(fp, "%s", _SL_); - if (show_stats > 0) - xfrm_lifetime_print(&xpinfo->lft, &xpinfo->curlft, fp, "\t"); + xfrm_policy_info_print(xpinfo, tb, fp, NULL, NULL); - xfrm_xfrma_print(tb, xpinfo->sel.family, fp, "\t"); + if (n->nlmsg_type == XFRM_MSG_POLEXPIRE) { + fprintf(fp, "\t"); + fprintf(fp, "hard %u", xpexp->hard); + fprintf(fp, "%s", _SL_); + } if (oneline) fprintf(fp, "\n"); + fflush(fp); return 0; } @@ -541,7 +545,7 @@ static int xfrm_policy_keep(const struct sockaddr_nl *who, return 0; if (xb->offset > xb->size) { - fprintf(stderr, "Flush buffer overflow\n"); + fprintf(stderr, "Policy buffer overflow\n"); return -1; } @@ -562,7 +566,7 @@ static int xfrm_policy_keep(const struct sockaddr_nl *who, return 0; } -static int xfrm_policy_list_or_flush(int argc, char **argv, int flush) +static int xfrm_policy_list_or_deleteall(int argc, char **argv, int deleteall) { char *selp = NULL; struct rtnl_handle rth; @@ -620,9 +624,9 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush) if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) exit(1); - if (flush) { + if (deleteall) { struct xfrm_buffer xb; - char buf[NLMSG_FLUSH_BUF_SIZE]; + char buf[NLMSG_DELETEALL_BUF_SIZE]; int i; xb.buf = buf; @@ -634,7 +638,7 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush) xb.nlmsg_count = 0; if (show_stats > 1) - fprintf(stderr, "Flush round = %d\n", i); + fprintf(stderr, "Delete-all round = %d\n", i); if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETPOLICY) < 0) { perror("Cannot send dump request"); @@ -642,21 +646,21 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush) } if (rtnl_dump_filter(&rth, xfrm_policy_keep, &xb, NULL, NULL) < 0) { - fprintf(stderr, "Flush terminated\n"); + fprintf(stderr, "Delete-all terminated\n"); exit(1); } if (xb.nlmsg_count == 0) { if (show_stats > 1) - fprintf(stderr, "Flush completed\n"); + fprintf(stderr, "Delete-all completed\n"); break; } if (rtnl_send(&rth, xb.buf, xb.offset) < 0) { - perror("Failed to send flush request\n"); + perror("Failed to send delete-all request\n"); exit(1); } if (show_stats > 1) - fprintf(stderr, "Flushed nlmsg count = %d\n", xb.nlmsg_count); + fprintf(stderr, "Delete-all nlmsg count = %d\n", xb.nlmsg_count); xb.offset = 0; xb.nlmsg_count = 0; @@ -678,7 +682,7 @@ static int xfrm_policy_list_or_flush(int argc, char **argv, int flush) exit(0); } -static int xfrm_policy_flush_all(void) +static int xfrm_policy_flush(void) { struct rtnl_handle rth; struct { @@ -695,7 +699,7 @@ static int xfrm_policy_flush_all(void) exit(1); if (show_stats > 1) - fprintf(stderr, "Flush all\n"); + fprintf(stderr, "Flush policy\n"); if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0) exit(2); @@ -708,7 +712,7 @@ static int xfrm_policy_flush_all(void) int do_xfrm_policy(int argc, char **argv) { if (argc < 1) - return xfrm_policy_list_or_flush(0, NULL, 0); + return xfrm_policy_list_or_deleteall(0, NULL, 0); if (matches(*argv, "add") == 0) return xfrm_policy_modify(XFRM_MSG_NEWPOLICY, 0, @@ -716,19 +720,17 @@ int do_xfrm_policy(int argc, char **argv) if (matches(*argv, "update") == 0) return xfrm_policy_modify(XFRM_MSG_UPDPOLICY, 0, argc-1, argv+1); - if (matches(*argv, "delete") == 0 || matches(*argv, "del") == 0) + if (matches(*argv, "delete") == 0) return xfrm_policy_delete(argc-1, argv+1); + if (matches(*argv, "deleteall") == 0 || matches(*argv, "delall") == 0) + return xfrm_policy_list_or_deleteall(argc-1, argv+1, 1); if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 || matches(*argv, "lst") == 0) - return xfrm_policy_list_or_flush(argc-1, argv+1, 0); + return xfrm_policy_list_or_deleteall(argc-1, argv+1, 0); if (matches(*argv, "get") == 0) return xfrm_policy_get(argc-1, argv+1); - if (matches(*argv, "flush") == 0) { - if (argc-1 < 1) - return xfrm_policy_flush_all(); - else - return xfrm_policy_list_or_flush(argc-1, argv+1, 1); - } + if (matches(*argv, "flush") == 0) + return xfrm_policy_flush(); if (matches(*argv, "help") == 0) usage(); fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm policy help\".\n", *argv); diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c index b5b6214..3eefaff 100644 --- a/ip/xfrm_state.c +++ b/ip/xfrm_state.c @@ -34,8 +34,8 @@ #include "xfrm.h" #include "ip_common.h" -//#define NLMSG_FLUSH_BUF_SIZE (4096-512) -#define NLMSG_FLUSH_BUF_SIZE 8192 +//#define NLMSG_DELETEALL_BUF_SIZE (4096-512) +#define NLMSG_DELETEALL_BUF_SIZE 8192 /* * Receiving buffer defines: @@ -56,11 +56,14 @@ static void usage(void) __attribute__((noreturn)); static void usage(void) { fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ ALGO-LIST ] [ mode MODE ]\n"); - fprintf(stderr, " [ reqid REQID ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n"); + fprintf(stderr, " [ reqid REQID ] [ seq SEQ ] [ replay-window SIZE ] [ flag FLAG-LIST ]\n"); fprintf(stderr, " [ encap ENCAP ] [ sel SELECTOR ] [ LIMIT-LIST ]\n"); + fprintf(stderr, "Usage: ip xfrm state allocspi ID [ mode MODE ] [ reqid REQID ] [ seq SEQ ]\n"); + fprintf(stderr, " [ min SPI max SPI ]\n"); fprintf(stderr, "Usage: ip xfrm state { delete | get } ID\n"); - fprintf(stderr, "Usage: ip xfrm state { flush | list } [ ID ] [ mode MODE ] [ reqid REQID ]\n"); + fprintf(stderr, "Usage: ip xfrm state { deleteall | list } [ ID ] [ mode MODE ] [ reqid REQID ]\n"); fprintf(stderr, " [ flag FLAG_LIST ]\n"); + fprintf(stderr, "Usage: ip xfrm state flush [ proto XFRM_PROTO ]\n"); fprintf(stderr, "ID := [ src ADDR ] [ dst ADDR ] [ proto XFRM_PROTO ] [ spi SPI ]\n"); //fprintf(stderr, "XFRM_PROTO := [ esp | ah | comp ]\n"); @@ -137,7 +140,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type, for (i = - (plen % 2), j = 0; j < len; i += 2, j++) { char vbuf[3]; - char val; + __u8 val; vbuf[0] = i >= 0 ? p[i] : '0'; vbuf[1] = p[i + 1]; @@ -163,6 +166,22 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type, return 0; } +static int xfrm_seq_parse(__u32 *seq, int *argcp, char ***argvp) +{ + int argc = *argcp; + char **argv = *argvp; + + if (get_u32(seq, *argv, 0)) + invarg("\"SEQ\" is invalid", *argv); + + *seq = htonl(*seq); + + *argcp = argc; + *argvp = argv; + + return 0; +} + static int xfrm_state_flag_parse(__u8 *flags, int *argcp, char ***argvp) { int argc = *argcp; @@ -232,6 +251,9 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) } else if (strcmp(*argv, "reqid") == 0) { NEXT_ARG(); xfrm_reqid_parse(&req.xsinfo.reqid, &argc, &argv); + } else if (strcmp(*argv, "seq") == 0) { + NEXT_ARG(); + xfrm_seq_parse(&req.xsinfo.seq, &argc, &argv); } else if (strcmp(*argv, "replay-window") == 0) { NEXT_ARG(); if (get_u8(&req.xsinfo.replay_window, *argv, 0)) @@ -372,6 +394,136 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv) return 0; } +static int xfrm_state_allocspi(int argc, char **argv) +{ + struct rtnl_handle rth; + struct { + struct nlmsghdr n; + struct xfrm_userspi_info xspi; + char buf[RTA_BUF_SIZE]; + } req; + char *idp = NULL; + char *minp = NULL; + char *maxp = NULL; + char res_buf[NLMSG_BUF_SIZE]; + struct nlmsghdr *res_n = (struct nlmsghdr *)res_buf; + + memset(res_buf, 0, sizeof(res_buf)); + + memset(&req, 0, sizeof(req)); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xspi)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = XFRM_MSG_ALLOCSPI; + req.xspi.info.family = preferred_family; + +#if 0 + req.xsinfo.lft.soft_byte_limit = XFRM_INF; + req.xsinfo.lft.hard_byte_limit = XFRM_INF; + req.xsinfo.lft.soft_packet_limit = XFRM_INF; + req.xsinfo.lft.hard_packet_limit = XFRM_INF; +#endif + + while (argc > 0) { + if (strcmp(*argv, "mode") == 0) { + NEXT_ARG(); + xfrm_mode_parse(&req.xspi.info.mode, &argc, &argv); + } else if (strcmp(*argv, "reqid") == 0) { + NEXT_ARG(); + xfrm_reqid_parse(&req.xspi.info.reqid, &argc, &argv); + } else if (strcmp(*argv, "seq") == 0) { + NEXT_ARG(); + xfrm_seq_parse(&req.xspi.info.seq, &argc, &argv); + } else if (strcmp(*argv, "min") == 0) { + if (minp) + duparg("min", *argv); + minp = *argv; + + NEXT_ARG(); + + if (get_u32(&req.xspi.min, *argv, 0)) + invarg("\"min\" value is invalid", *argv); + } else if (strcmp(*argv, "max") == 0) { + if (maxp) + duparg("max", *argv); + maxp = *argv; + + NEXT_ARG(); + + if (get_u32(&req.xspi.max, *argv, 0)) + invarg("\"max\" value is invalid", *argv); + } else { + /* try to assume ID */ + if (idp) + invarg("unknown", *argv); + idp = *argv; + + /* ID */ + xfrm_id_parse(&req.xspi.info.saddr, &req.xspi.info.id, + &req.xspi.info.family, 0, &argc, &argv); + if (req.xspi.info.id.spi) { + fprintf(stderr, "\"SPI\" must be zero\n"); + exit(1); + } + if (preferred_family == AF_UNSPEC) + preferred_family = req.xspi.info.family; + } + argc--; argv++; + } + + if (!idp) { + fprintf(stderr, "Not enough information: \"ID\" is required\n"); + exit(1); + } + + if (minp) { + if (!maxp) { + fprintf(stderr, "\"max\" is missing\n"); + exit(1); + } + if (req.xspi.min > req.xspi.max) { + fprintf(stderr, "\"min\" valie is larger than \"max\" one\n"); + exit(1); + } + } else { + if (maxp) { + fprintf(stderr, "\"min\" is missing\n"); + exit(1); + } + + /* XXX: Default value defined in PF_KEY; + * See kernel's net/key/af_key.c(pfkey_getspi). + */ + req.xspi.min = 0x100; + req.xspi.max = 0x0fffffff; + + /* XXX: IPCOMP spi is 16-bits; + * See kernel's net/xfrm/xfrm_user(verify_userspi_info). + */ + if (req.xspi.info.id.proto == IPPROTO_COMP) + req.xspi.max = 0xffff; + } + + if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) + exit(1); + + if (req.xspi.info.family == AF_UNSPEC) + req.xspi.info.family = AF_INET; + + + if (rtnl_talk(&rth, &req.n, 0, 0, res_n, NULL, NULL) < 0) + exit(2); + + if (xfrm_state_print(NULL, res_n, (void*)stdout) < 0) { + fprintf(stderr, "An error :-)\n"); + exit(1); + } + + rtnl_close(&rth); + + return 0; +} + static int xfrm_state_filter_match(struct xfrm_usersa_info *xsinfo) { if (!filter.use) @@ -400,77 +552,89 @@ static int xfrm_state_filter_match(struct xfrm_usersa_info *xsinfo) return 1; } -static int xfrm_selector_iszero(struct xfrm_selector *s) -{ - struct xfrm_selector s0; - - memset(&s0, 0, sizeof(s0)); - - return (memcmp(&s0, s, sizeof(s0)) == 0); -} - -static int xfrm_state_print(const struct sockaddr_nl *who, - struct nlmsghdr *n, - void *arg) +int xfrm_state_print(const struct sockaddr_nl *who, struct nlmsghdr *n, + void *arg) { FILE *fp = (FILE*)arg; - struct xfrm_usersa_info *xsinfo = NLMSG_DATA(n); - int len = n->nlmsg_len; struct rtattr * tb[XFRMA_MAX+1]; + struct rtattr * rta; + struct xfrm_usersa_info *xsinfo = NULL; + struct xfrm_user_expire *xexp = NULL; + struct xfrm_usersa_id *xsid = NULL; + int len = n->nlmsg_len; if (n->nlmsg_type != XFRM_MSG_NEWSA && - n->nlmsg_type != XFRM_MSG_DELSA) { + n->nlmsg_type != XFRM_MSG_DELSA && + n->nlmsg_type != XFRM_MSG_UPDSA && + n->nlmsg_type != XFRM_MSG_EXPIRE) { fprintf(stderr, "Not a state: %08x %08x %08x\n", n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags); return 0; } - len -= NLMSG_LENGTH(sizeof(*xsinfo)); + if (n->nlmsg_type == XFRM_MSG_DELSA) { + /* Dont blame me for this .. Herbert made me do it */ + xsid = NLMSG_DATA(n); + len -= NLMSG_LENGTH(sizeof(*xsid)); + } else if (n->nlmsg_type == XFRM_MSG_EXPIRE) { + xexp = NLMSG_DATA(n); + xsinfo = &xexp->state; + len -= NLMSG_LENGTH(sizeof(*xexp)); + } else { + xexp = NULL; + xsinfo = NLMSG_DATA(n); + len -= NLMSG_LENGTH(sizeof(*xsinfo)); + } + if (len < 0) { fprintf(stderr, "BUG: wrong nlmsg len %d\n", len); return -1; } - if (!xfrm_state_filter_match(xsinfo)) + if (xsinfo && !xfrm_state_filter_match(xsinfo)) return 0; - parse_rtattr(tb, XFRMA_MAX, XFRMS_RTA(xsinfo), len); - if (n->nlmsg_type == XFRM_MSG_DELSA) fprintf(fp, "Deleted "); + else if (n->nlmsg_type == XFRM_MSG_UPDSA) + fprintf(fp, "Updated "); + else if (n->nlmsg_type == XFRM_MSG_EXPIRE) + fprintf(fp, "Expired "); - xfrm_id_info_print(&xsinfo->saddr, &xsinfo->id, xsinfo->mode, - xsinfo->reqid, xsinfo->family, 1, fp, NULL); - - fprintf(fp, "\t"); - fprintf(fp, "replay-window %u ", xsinfo->replay_window); - if (show_stats > 0) - fprintf(fp, "seq 0x%08u ", xsinfo->seq); - if (show_stats > 0 || xsinfo->flags) { - __u8 flags = xsinfo->flags; - - fprintf(fp, "flag "); - XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_NOECN, "noecn"); - XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_DECAP_DSCP, "decap-dscp"); - if (flags) - fprintf(fp, "%x", flags); - if (show_stats > 0) - fprintf(fp, " (0x%s)", strxf_mask8(flags)); - } - fprintf(fp, "%s", _SL_); + if (n->nlmsg_type == XFRM_MSG_DELSA) + rta = XFRMSID_RTA(xsid); + else if (n->nlmsg_type == XFRM_MSG_EXPIRE) + rta = XFRMEXP_RTA(xexp); + else + rta = XFRMS_RTA(xsinfo); - xfrm_xfrma_print(tb, xsinfo->family, fp, "\t"); + parse_rtattr(tb, XFRMA_MAX, rta, len); - if (!xfrm_selector_iszero(&xsinfo->sel)) - xfrm_selector_print(&xsinfo->sel, xsinfo->family, fp, "\tsel "); + if (n->nlmsg_type == XFRM_MSG_DELSA) { + //xfrm_policy_id_print(); - if (show_stats > 0) { - xfrm_lifetime_print(&xsinfo->lft, &xsinfo->curlft, fp, "\t"); - xfrm_stats_print(&xsinfo->stats, fp, "\t"); + if (!tb[XFRMA_SA]) { + fprintf(stderr, "Buggy XFRM_MSG_DELSA: no XFRMA_SA\n"); + return -1; + } + if (RTA_PAYLOAD(tb[XFRMA_SA]) < sizeof(*xsinfo)) { + fprintf(stderr, "Buggy XFRM_MSG_DELPOLICY: too short XFRMA_POLICY len\n"); + return -1; + } + xsinfo = (struct xfrm_usersa_info *)RTA_DATA(tb[XFRMA_SA]); + } + + xfrm_state_info_print(xsinfo, tb, fp, NULL, NULL); + + if (n->nlmsg_type == XFRM_MSG_EXPIRE) { + fprintf(fp, "\t"); + fprintf(fp, "hard %u", xexp->hard); + fprintf(fp, "%s", _SL_); } if (oneline) fprintf(fp, "\n"); + fflush(fp); return 0; } @@ -576,7 +740,7 @@ static int xfrm_state_keep(const struct sockaddr_nl *who, return 0; if (xb->offset > xb->size) { - fprintf(stderr, "Flush buffer overflow\n"); + fprintf(stderr, "State buffer overflow\n"); return -1; } @@ -598,7 +762,7 @@ static int xfrm_state_keep(const struct sockaddr_nl *who, return 0; } -static int xfrm_state_list_or_flush(int argc, char **argv, int flush) +static int xfrm_state_list_or_deleteall(int argc, char **argv, int deleteall) { char *idp = NULL; struct rtnl_handle rth; @@ -643,9 +807,9 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush) if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) exit(1); - if (flush) { + if (deleteall) { struct xfrm_buffer xb; - char buf[NLMSG_FLUSH_BUF_SIZE]; + char buf[NLMSG_DELETEALL_BUF_SIZE]; int i; xb.buf = buf; @@ -657,7 +821,7 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush) xb.nlmsg_count = 0; if (show_stats > 1) - fprintf(stderr, "Flush round = %d\n", i); + fprintf(stderr, "Delete-all round = %d\n", i); if (rtnl_wilddump_request(&rth, preferred_family, XFRM_MSG_GETSA) < 0) { perror("Cannot send dump request"); @@ -665,21 +829,21 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush) } if (rtnl_dump_filter(&rth, xfrm_state_keep, &xb, NULL, NULL) < 0) { - fprintf(stderr, "Flush terminated\n"); + fprintf(stderr, "Delete-all terminated\n"); exit(1); } if (xb.nlmsg_count == 0) { if (show_stats > 1) - fprintf(stderr, "Flush completed\n"); + fprintf(stderr, "Delete-all completed\n"); break; } if (rtnl_send(&rth, xb.buf, xb.offset) < 0) { - perror("Failed to send flush request\n"); + perror("Failed to send delete-all request\n"); exit(1); } if (show_stats > 1) - fprintf(stderr, "Flushed nlmsg count = %d\n", xb.nlmsg_count); + fprintf(stderr, "Delete-all nlmsg count = %d\n", xb.nlmsg_count); xb.offset = 0; xb.nlmsg_count = 0; @@ -702,13 +866,14 @@ static int xfrm_state_list_or_flush(int argc, char **argv, int flush) exit(0); } -static int xfrm_state_flush_all(void) +static int xfrm_state_flush(int argc, char **argv) { struct rtnl_handle rth; struct { struct nlmsghdr n; struct xfrm_usersa_flush xsf; } req; + char *protop = NULL; memset(&req, 0, sizeof(req)); @@ -717,11 +882,34 @@ static int xfrm_state_flush_all(void) req.n.nlmsg_type = XFRM_MSG_FLUSHSA; req.xsf.proto = IPSEC_PROTO_ANY; + while (argc > 0) { + if (strcmp(*argv, "proto") == 0) { + int ret; + + if (protop) + duparg("proto", *argv); + protop = *argv; + + NEXT_ARG(); + + ret = xfrm_xfrmproto_getbyname(*argv); + if (ret < 0) + invarg("\"XFRM_PROTO\" is invalid", *argv); + + req.xsf.proto = (__u8)ret; + } else + invarg("unknown", *argv); + + argc--; argv++; + } + if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0) exit(1); if (show_stats > 1) - fprintf(stderr, "Flush all\n"); + fprintf(stderr, "Flush state proto=%s\n", + (req.xsf.proto == IPSEC_PROTO_ANY) ? "any" : + strxf_xfrmproto(req.xsf.proto)); if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0) exit(2); @@ -734,7 +922,7 @@ static int xfrm_state_flush_all(void) int do_xfrm_state(int argc, char **argv) { if (argc < 1) - return xfrm_state_list_or_flush(0, NULL, 0); + return xfrm_state_list_or_deleteall(0, NULL, 0); if (matches(*argv, "add") == 0) return xfrm_state_modify(XFRM_MSG_NEWSA, 0, @@ -742,19 +930,19 @@ int do_xfrm_state(int argc, char **argv) if (matches(*argv, "update") == 0) return xfrm_state_modify(XFRM_MSG_UPDSA, 0, argc-1, argv+1); - if (matches(*argv, "delete") == 0 || matches(*argv, "del") == 0) + if (matches(*argv, "allocspi") == 0) + return xfrm_state_allocspi(argc-1, argv+1); + if (matches(*argv, "delete") == 0) return xfrm_state_get_or_delete(argc-1, argv+1, 1); + if (matches(*argv, "deleteall") == 0 || matches(*argv, "delall") == 0) + return xfrm_state_list_or_deleteall(argc-1, argv+1, 1); if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0 || matches(*argv, "lst") == 0) - return xfrm_state_list_or_flush(argc-1, argv+1, 0); + return xfrm_state_list_or_deleteall(argc-1, argv+1, 0); if (matches(*argv, "get") == 0) return xfrm_state_get_or_delete(argc-1, argv+1, 0); - if (matches(*argv, "flush") == 0) { - if (argc-1 < 1) - return xfrm_state_flush_all(); - else - return xfrm_state_list_or_flush(argc-1, argv+1, 1); - } + if (matches(*argv, "flush") == 0) + return xfrm_state_flush(argc-1, argv+1); if (matches(*argv, "help") == 0) usage(); fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm state help\".\n", *argv); diff --git a/lib/libnetlink.c b/lib/libnetlink.c index 4cd2b2a..67951fe 100644 --- a/lib/libnetlink.c +++ b/lib/libnetlink.c @@ -30,9 +30,10 @@ void rtnl_close(struct rtnl_handle *rth) close(rth->fd); } -int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions, int protocol) +int rtnl_open_byproto(struct rtnl_handle *rth, unsigned subscriptions, + int protocol) { - int addr_len; + socklen_t addr_len; int sndbuf = 32768; int rcvbuf = 32768; @@ -95,6 +96,7 @@ int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type) memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; + memset(&req, 0, sizeof(req)); req.nlh.nlmsg_len = sizeof(req); req.nlh.nlmsg_type = type; req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST; @@ -102,7 +104,8 @@ int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type) req.nlh.nlmsg_seq = rth->dump = ++rth->seq; req.g.rtgen_family = family; - return sendto(rth->fd, (void*)&req, sizeof(req), 0, (struct sockaddr*)&nladdr, sizeof(nladdr)); + return sendto(rth->fd, (void*)&req, sizeof(req), 0, + (struct sockaddr*)&nladdr, sizeof(nladdr)); } int rtnl_send(struct rtnl_handle *rth, const char *buf, int len) @@ -119,12 +122,15 @@ int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len) { struct nlmsghdr nlh; struct sockaddr_nl nladdr; - struct iovec iov[2] = { { &nlh, sizeof(nlh) }, { req, len } }; + struct iovec iov[2] = { + { .iov_base = &nlh, .iov_len = sizeof(nlh) }, + { .iov_base = req, .iov_len = len } + }; struct msghdr msg = { - (void*)&nladdr, sizeof(nladdr), - iov, 2, - NULL, 0, - 0 + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = 2, }; memset(&nladdr, 0, sizeof(nladdr)); @@ -145,21 +151,22 @@ int rtnl_dump_filter(struct rtnl_handle *rth, rtnl_filter_t junk, void *arg2) { - char buf[16384]; struct sockaddr_nl nladdr; - struct iovec iov = { buf, sizeof(buf) }; + struct iovec iov; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + char buf[16384]; + iov.iov_base = buf; while (1) { int status; struct nlmsghdr *h; - struct msghdr msg = { - (void*)&nladdr, sizeof(nladdr), - &iov, 1, - NULL, 0, - 0 - }; - + iov.iov_len = sizeof(buf); status = recvmsg(rth->fd, &msg, 0); if (status < 0) { @@ -168,14 +175,11 @@ int rtnl_dump_filter(struct rtnl_handle *rth, perror("OVERRUN"); continue; } + if (status == 0) { fprintf(stderr, "EOF on netlink\n"); return -1; } - if (msg.msg_namelen != sizeof(nladdr)) { - fprintf(stderr, "sender address length == %d\n", msg.msg_namelen); - exit(1); - } h = (struct nlmsghdr*)buf; while (NLMSG_OK(h, status)) { @@ -231,14 +235,17 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, unsigned seq; struct nlmsghdr *h; struct sockaddr_nl nladdr; - struct iovec iov = { (void*)n, n->nlmsg_len }; - char buf[16384]; + struct iovec iov = { + .iov_base = (void*) n, + .iov_len = n->nlmsg_len + }; struct msghdr msg = { - (void*)&nladdr, sizeof(nladdr), - &iov, 1, - NULL, 0, - 0 + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, }; + char buf[16384]; memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; @@ -301,6 +308,9 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, if (err < 0) return err; } + /* Don't forget to skip that message. */ + status -= NLMSG_ALIGN(len); + h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len)); continue; } @@ -340,7 +350,7 @@ int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer, } } -int rtnl_listen(struct rtnl_handle *rtnl, +int rtnl_listen(struct rtnl_handle *rtnl, rtnl_filter_t handler, void *jarg) { @@ -348,22 +358,20 @@ int rtnl_listen(struct rtnl_handle *rtnl, struct nlmsghdr *h; struct sockaddr_nl nladdr; struct iovec iov; - char buf[8192]; struct msghdr msg = { - (void*)&nladdr, sizeof(nladdr), - &iov, 1, - NULL, 0, - 0 + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, }; + char buf[8192]; memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; nladdr.nl_pid = 0; nladdr.nl_groups = 0; - iov.iov_base = buf; - while (1) { iov.iov_len = sizeof(buf); status = recvmsg(rtnl->fd, &msg, 0); @@ -485,13 +493,13 @@ int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data) return 0; } -int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, +int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen) { int len = RTA_LENGTH(alen); struct rtattr *rta; - if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen) { + if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) { fprintf(stderr, "addattr_l ERROR: message exceeded bound of %d\n",maxlen); return -1; } @@ -499,7 +507,7 @@ int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, rta->rta_type = type; rta->rta_len = len; memcpy(RTA_DATA(rta), data, alen); - n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len; + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); return 0; } @@ -533,13 +541,13 @@ int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data) return 0; } -int rta_addattr_l(struct rtattr *rta, int maxlen, int type, +int rta_addattr_l(struct rtattr *rta, int maxlen, int type, const void *data, int alen) { struct rtattr *subrta; int len = RTA_LENGTH(alen); - if (RTA_ALIGN(rta->rta_len) + len > maxlen) { + if (RTA_ALIGN(rta->rta_len) + RTA_ALIGN(len) > maxlen) { fprintf(stderr,"rta_addattr_l: Error! max allowed bound %d exceeded\n",maxlen); return -1; } @@ -547,7 +555,7 @@ int rta_addattr_l(struct rtattr *rta, int maxlen, int type, subrta->rta_type = type; subrta->rta_len = len; memcpy(RTA_DATA(subrta), data, alen); - rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len; + rta->rta_len = NLMSG_ALIGN(rta->rta_len) + RTA_ALIGN(len); return 0; } diff --git a/lib/ll_addr.c b/lib/ll_addr.c index ea3d660..581487d 100644 --- a/lib/ll_addr.c +++ b/lib/ll_addr.c @@ -53,7 +53,8 @@ const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int return buf; } -int ll_addr_a2n(unsigned char *lladdr, int len, char *arg) +/*NB: lladdr is char * (rather than u8 *) because sa_data is char * (1003.1g) */ +int ll_addr_a2n(char *lladdr, int len, char *arg) { if (strchr(arg, '.')) { inet_prefix pfx; diff --git a/lib/ll_map.c b/lib/ll_map.c index 89c0d20..1acbf8f 100644 --- a/lib/ll_map.c +++ b/lib/ll_map.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "libnetlink.h" @@ -25,7 +26,7 @@ struct idxmap { struct idxmap * next; - int index; + unsigned index; int type; int alen; unsigned flags; @@ -86,7 +87,7 @@ int ll_remember_index(const struct sockaddr_nl *who, return 0; } -const char *ll_idx_n2a(int idx, char *buf) +const char *ll_idx_n2a(unsigned idx, char *buf) { struct idxmap *im; @@ -100,14 +101,14 @@ const char *ll_idx_n2a(int idx, char *buf) } -const char *ll_index_to_name(int idx) +const char *ll_index_to_name(unsigned idx) { static char nbuf[16]; return ll_idx_n2a(idx, nbuf); } -int ll_index_to_type(int idx) +int ll_index_to_type(unsigned idx) { struct idxmap *im; @@ -119,7 +120,7 @@ int ll_index_to_type(int idx) return -1; } -unsigned ll_index_to_flags(int idx) +unsigned ll_index_to_flags(unsigned idx) { struct idxmap *im; @@ -132,7 +133,7 @@ unsigned ll_index_to_flags(int idx) return 0; } -int ll_name_to_index(const char *name) +unsigned ll_name_to_index(const char *name) { static char ncache[16]; static int icache; @@ -152,7 +153,8 @@ int ll_name_to_index(const char *name) } } } - return 0; + + return if_nametoindex(name); } int ll_init_map(struct rtnl_handle *rth) diff --git a/lib/rt_names.c b/lib/rt_names.c index 03df086..05046c2 100644 --- a/lib/rt_names.c +++ b/lib/rt_names.c @@ -71,9 +71,8 @@ static char * rtnl_rtprot_tab[256] = { [RTPROT_ZEBRA] ="zebra", [RTPROT_BIRD] = "bird", [RTPROT_DNROUTED] = "dnrouted", -#ifdef RTPROT_XORP [RTPROT_XORP] = "xorp", -#endif + [RTPROT_NTK] = "ntk", }; diff --git a/lib/utils.c b/lib/utils.c index 73ce865..4bdda71 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -27,6 +27,9 @@ #include #include #include +#include +#include + #include "utils.h" @@ -237,10 +240,11 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family) dst->bitlen = 32; } if (slash) { - if (get_integer(&plen, slash+1, 0) || plen > dst->bitlen) { + if (get_unsigned(&plen, slash+1, 0) || plen > dst->bitlen) { err = -1; goto done; } + dst->flags |= PREFIXLEN_SPECIFIED; dst->bitlen = plen; } } @@ -500,9 +504,9 @@ const char *format_host(int af, int len, const void *addr, } -__u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen) +char *hexstring_n2a(const __u8 *str, int len, char *buf, int blen) { - __u8 *ptr = buf; + char *ptr = buf; int i; for (i=0; i= (maxargs - 1)) { + fprintf(stderr, "Too many arguments to command\n"); + exit(1); + } + argv[argc++] = cp; + } + argv[argc] = NULL; + + return argc; +} diff --git a/man/man8/ip.8 b/man/man8/ip.8 index cca6d1c..12da6d5 100644 --- a/man/man8/ip.8 +++ b/man/man8/ip.8 @@ -211,7 +211,7 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .ti -8 .B ip rule -.RB " [ " list " | " add " | " del " ]" +.RB " [ " list " | " add " | " del " | " flush " ]" .I SELECTOR ACTION .ti -8 @@ -1574,6 +1574,9 @@ immediately. It is assumed that after a script finishes a batch of updates, it flushes the routing cache with .BR "ip route flush cache" . +.SS ip rule flush - also dumps all the deleted rules. +This command has no arguments. + .SS ip rule show - list rules This command has no arguments. diff --git a/man/man8/tc-pfifo.8 b/man/man8/tc-pfifo.8 new file mode 100644 index 0000000..e69de29 diff --git a/misc/Makefile b/misc/Makefile index 2ddf950..bda37e5 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -28,8 +28,8 @@ lnstat: $(LNSTATOBJ) install: all install -m 0755 -s $(TARGETS) $(DESTDIR)$(SBINDIR) - ln -sf $(SBINDIR)/lnstat $(DESTDIR)$(SBINDIR)/rtstat - ln -sf $(SBINDIR)/lnstat $(DESTDIR)$(SBINDIR)/ctstat + ln -sf lnstat $(DESTDIR)$(SBINDIR)/rtstat + ln -sf lnstat $(DESTDIR)$(SBINDIR)/ctstat clean: rm -f *.o $(TARGETS) ssfilter.c diff --git a/misc/arpd.c b/misc/arpd.c index 85b2a1c..4fd226e 100644 --- a/misc/arpd.c +++ b/misc/arpd.c @@ -180,7 +180,7 @@ int send_probe(int ifindex, __u32 addr) { struct ifreq ifr; struct sockaddr_in dst; - int len; + socklen_t len; unsigned char buf[256]; struct arphdr *ah = (struct arphdr*)buf; unsigned char *p = (unsigned char *)(ah+1); @@ -228,8 +228,7 @@ int send_probe(int ifindex, __u32 addr) memcpy(p, &addr, 4); p+=4; - len = sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr*)&sll, sizeof(sll)); - if (len < 0) + if (sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr*)&sll, sizeof(sll)) < 0) return -1; stats.probes_sent++; return 0; @@ -480,13 +479,14 @@ void get_arp_pkt(void) { unsigned char buf[1024]; struct sockaddr_ll sll; - int sll_len = sizeof(sll); + socklen_t sll_len = sizeof(sll); struct arphdr *a = (struct arphdr*)buf; struct dbkey key; DBT dbkey, dbdat; int n; - n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, (struct sockaddr*)&sll, &sll_len); + n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, + (struct sockaddr*)&sll, &sll_len); if (n < 0) { if (errno != EINTR && errno != EAGAIN) syslog(LOG_ERR, "recvfrom: %m"); @@ -708,6 +708,7 @@ int main(int argc, char **argv) fprintf(stderr, "Invalid IP address: \"%s\"\n", ipbuf); goto do_abort; } + dbdat.data = hexstring_a2n(macbuf, b1, 6); if (dbdat.data == NULL) goto do_abort; @@ -730,7 +731,7 @@ int main(int argc, char **argv) struct dbkey *key = dbkey.data; if (handle_if(key->iface)) { if (!IS_NEG(dbdat.data)) { - __u8 b1[18]; + char b1[18]; printf("%-8d %-15s %s\n", key->iface, inet_ntoa(*(struct in_addr*)&key->addr), diff --git a/misc/ifstat.c b/misc/ifstat.c index 1379a81..4b87994 100644 --- a/misc/ifstat.c +++ b/misc/ifstat.c @@ -484,7 +484,7 @@ void update_db(int interval) void server_loop(int fd) { - struct timeval snaptime; + struct timeval snaptime = { 0 }; struct pollfd p; p.fd = fd; p.events = p.revents = POLLIN; @@ -498,6 +498,7 @@ void server_loop(int fd) int status; int tdiff; struct timeval now; + gettimeofday(&now, NULL); tdiff = T_DIFF(now, snaptime); if (tdiff >= scan_interval) { @@ -505,6 +506,7 @@ void server_loop(int fd) snaptime = now; tdiff = 0; } + if (poll(&p, 1, tdiff + scan_interval) > 0 && (p.revents&POLLIN)) { int clnt = accept(fd, NULL, NULL); @@ -535,7 +537,8 @@ void server_loop(int fd) int verify_forging(int fd) { struct ucred cred; - int olen = sizeof(cred); + socklen_t olen = sizeof(cred); + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) || olen < sizeof(cred)) return -1; diff --git a/misc/lnstat.c b/misc/lnstat.c index 03e6f3f..460540e 100644 --- a/misc/lnstat.c +++ b/misc/lnstat.c @@ -218,7 +218,7 @@ int main(int argc, char **argv) MODE_NORMAL, } mode = MODE_NORMAL; - unsigned long count = 0; + unsigned long count = 1; static struct field_params fp; int num_req_files = 0; char *req_files[LNSTAT_MAX_FILES]; @@ -242,9 +242,10 @@ int main(int argc, char **argv) while ((c = getopt_long(argc, argv,"Vc:df:h?i:k:s:w:", opts, NULL)) != -1) { + int i, len = 0; + char *tmp, *tok; + switch (c) { - int i, len = 0; - char *tmp, *tok; case 'c': count = strtoul(optarg, NULL, 0); break; diff --git a/misc/lnstat_util.c b/misc/lnstat_util.c index 6ff3779..59c5e96 100644 --- a/misc/lnstat_util.c +++ b/misc/lnstat_util.c @@ -52,8 +52,13 @@ static int scan_lines(struct lnstat_file *lf, int i) fgets(buf, sizeof(buf)-1, lf->fp); gettimeofday(&lf->last_read, NULL); - for (j = 0; j < lf->num_fields; j++) - lf->fields[j].values[i] = strtoul(ptr, &ptr, 16); + for (j = 0; j < lf->num_fields; j++) { + unsigned long f = strtoul(ptr, &ptr, 16); + if (j == 0) + lf->fields[j].values[i] = f; + else + lf->fields[j].values[i] += f; + } } return num_lines; } diff --git a/misc/nstat.c b/misc/nstat.c index f2887ec..fc4b03b 100644 --- a/misc/nstat.c +++ b/misc/nstat.c @@ -369,7 +369,7 @@ void update_db(int interval) void server_loop(int fd) { - struct timeval snaptime; + struct timeval snaptime = { 0 }; struct pollfd p; p.fd = fd; p.events = p.revents = POLLIN; @@ -422,7 +422,8 @@ void server_loop(int fd) int verify_forging(int fd) { struct ucred cred; - int olen = sizeof(cred); + socklen_t olen = sizeof(cred); + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) || olen < sizeof(cred)) return -1; diff --git a/misc/rtacct.c b/misc/rtacct.c index 5c6748b..0d8ff7b 100644 --- a/misc/rtacct.c +++ b/misc/rtacct.c @@ -68,7 +68,7 @@ struct rtacct_data unsigned long long val[256*4]; double rate[256*4]; - __u8 signature[128]; + char signature[128]; }; struct rtacct_data kern_db_static; @@ -363,13 +363,15 @@ void pad_kern_table(struct rtacct_data *dat, __u32 *ival) void server_loop(int fd) { - struct timeval snaptime; + struct timeval snaptime = { 0 }; struct pollfd p; p.fd = fd; p.events = p.revents = POLLIN; - sprintf(kern_db->signature, "%d.%lu sampling_interval=%d time_const=%d", - getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000); + sprintf(kern_db->signature, + "%u.%lu sampling_interval=%d time_const=%d", + (unsigned) getpid(), (unsigned long)random(), + scan_interval/1000, time_constant/1000); pad_kern_table(kern_db, read_kern_table(kern_db->ival)); @@ -411,7 +413,8 @@ void server_loop(int fd) int verify_forging(int fd) { struct ucred cred; - int olen = sizeof(cred); + socklen_t olen = sizeof(cred); + if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) || olen < sizeof(cred)) return -1; diff --git a/misc/ss.c b/misc/ss.c index 668a5bf..f902560 100644 --- a/misc/ss.c +++ b/misc/ss.c @@ -33,8 +33,9 @@ #include "libnetlink.h" #include "SNAPSHOT.h" +#include #include -#include +#include int resolve_hosts = 0; int resolve_services = 1; @@ -60,6 +61,7 @@ static const char *dg_proto = NULL; enum { TCP_DB, + DCCP_DB, UDP_DB, RAW_DB, UNIX_DG_DB, @@ -730,7 +732,7 @@ int run_ssfilter(struct ssfilter *f, struct tcpstat *s) static void ssfilter_patch(char *a, int len, int reloc) { while (len > 0) { - struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)a; + struct inet_diag_bc_op *op = (struct inet_diag_bc_op*)a; if (op->no == len+4) op->no += reloc; len -= op->yes; @@ -746,7 +748,7 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) case SSF_S_AUTO: { if (!(*bytecode=malloc(4))) abort(); - ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_AUTO, 4, 8 }; + ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_AUTO, 4, 8 }; return 8; } case SSF_DCOND: @@ -755,11 +757,11 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) struct aafilter *a = (void*)f->pred; struct aafilter *b; char *ptr; - int code = (f->type == SSF_DCOND ? TCPDIAG_BC_D_COND : TCPDIAG_BC_S_COND); + int code = (f->type == SSF_DCOND ? INET_DIAG_BC_D_COND : INET_DIAG_BC_S_COND); int len = 0; for (b=a; b; b=b->next) { - len += 4 + sizeof(struct tcpdiag_hostcond); + len += 4 + sizeof(struct inet_diag_hostcond); if (a->addr.family == AF_INET6) len += 16; else @@ -770,20 +772,20 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) if (!(ptr = malloc(len))) abort(); *bytecode = ptr; for (b=a; b; b=b->next) { - struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op *)ptr; + struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)ptr; int alen = (a->addr.family == AF_INET6 ? 16 : 4); - int oplen = alen + 4 + sizeof(struct tcpdiag_hostcond); - struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(ptr+4); + int oplen = alen + 4 + sizeof(struct inet_diag_hostcond); + struct inet_diag_hostcond *cond = (struct inet_diag_hostcond*)(ptr+4); - *op = (struct tcpdiag_bc_op){ code, oplen, oplen+4 }; + *op = (struct inet_diag_bc_op){ code, oplen, oplen+4 }; cond->family = a->addr.family; cond->port = a->port; cond->prefix_len = a->addr.bitlen; memcpy(cond->addr, a->addr.data, alen); ptr += oplen; if (b->next) { - op = (struct tcpdiag_bc_op *)ptr; - *op = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, len - (ptr-*bytecode)}; + op = (struct inet_diag_bc_op *)ptr; + *op = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, len - (ptr-*bytecode)}; ptr += 4; } } @@ -793,32 +795,32 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) { struct aafilter *x = (void*)f->pred; if (!(*bytecode=malloc(8))) abort(); - ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_D_GE, 8, 12 }; - ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port }; + ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_GE, 8, 12 }; + ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; return 8; } case SSF_D_LE: { struct aafilter *x = (void*)f->pred; if (!(*bytecode=malloc(8))) abort(); - ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_D_LE, 8, 12 }; - ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port }; + ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_D_LE, 8, 12 }; + ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; return 8; } case SSF_S_GE: { struct aafilter *x = (void*)f->pred; if (!(*bytecode=malloc(8))) abort(); - ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_S_GE, 8, 12 }; - ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port }; + ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_GE, 8, 12 }; + ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; return 8; } case SSF_S_LE: { struct aafilter *x = (void*)f->pred; if (!(*bytecode=malloc(8))) abort(); - ((struct tcpdiag_bc_op*)*bytecode)[0] = (struct tcpdiag_bc_op){ TCPDIAG_BC_S_LE, 8, 12 }; - ((struct tcpdiag_bc_op*)*bytecode)[1] = (struct tcpdiag_bc_op){ 0, 0, x->port }; + ((struct inet_diag_bc_op*)*bytecode)[0] = (struct inet_diag_bc_op){ INET_DIAG_BC_S_LE, 8, 12 }; + ((struct inet_diag_bc_op*)*bytecode)[1] = (struct inet_diag_bc_op){ 0, 0, x->port }; return 8; } @@ -844,7 +846,7 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) memcpy(a, a1, l1); memcpy(a+l1+4, a2, l2); free(a1); free(a2); - *(struct tcpdiag_bc_op*)(a+l1) = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, l2+4 }; + *(struct inet_diag_bc_op*)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, l2+4 }; *bytecode = a; return l1+l2+4; } @@ -855,7 +857,7 @@ static int ssfilter_bytecompile(struct ssfilter *f, char **bytecode) if (!(a = malloc(l1+4))) abort(); memcpy(a, a1, l1); free(a1); - *(struct tcpdiag_bc_op*)(a+l1) = (struct tcpdiag_bc_op){ TCPDIAG_BC_JMP, 4, 8 }; + *(struct inet_diag_bc_op*)(a+l1) = (struct inet_diag_bc_op){ INET_DIAG_BC_JMP, 4, 8 }; *bytecode = a; return l1+4; } @@ -1299,36 +1301,36 @@ static char *sprint_bw(char *buf, double bw) return buf; } -static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r) +static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r) { - struct rtattr * tb[TCPDIAG_MAX+1]; + struct rtattr * tb[INET_DIAG_MAX+1]; char b1[64]; double rtt = 0; - parse_rtattr(tb, TCPDIAG_MAX, (struct rtattr*)(r+1), + parse_rtattr(tb, INET_DIAG_MAX, (struct rtattr*)(r+1), nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r))); - if (tb[TCPDIAG_MEMINFO]) { - const struct tcpdiag_meminfo *minfo - = RTA_DATA(tb[TCPDIAG_MEMINFO]); + if (tb[INET_DIAG_MEMINFO]) { + const struct inet_diag_meminfo *minfo + = RTA_DATA(tb[INET_DIAG_MEMINFO]); printf(" mem:(r%u,w%u,f%u,t%u)", - minfo->tcpdiag_rmem, - minfo->tcpdiag_wmem, - minfo->tcpdiag_fmem, - minfo->tcpdiag_tmem); + minfo->idiag_rmem, + minfo->idiag_wmem, + minfo->idiag_fmem, + minfo->idiag_tmem); } - if (tb[TCPDIAG_INFO]) { + if (tb[INET_DIAG_INFO]) { struct tcp_info *info; - int len = RTA_PAYLOAD(tb[TCPDIAG_INFO]); + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); /* workaround for older kernels with less fields */ if (len < sizeof(*info)) { info = alloca(sizeof(*info)); memset(info, 0, sizeof(*info)); - memcpy(info, RTA_DATA(tb[TCPDIAG_INFO]), len); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); } else - info = RTA_DATA(tb[TCPDIAG_INFO]); + info = RTA_DATA(tb[INET_DIAG_INFO]); if (show_options) { if (info->tcpi_options & TCPI_OPT_TIMESTAMPS) @@ -1338,6 +1340,10 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r) if (info->tcpi_options & TCPI_OPT_ECN) printf(" ecn"); } + + if (tb[INET_DIAG_CONG]) + printf("%s", (char *) RTA_DATA(tb[INET_DIAG_CONG])); + if (info->tcpi_options & TCPI_OPT_WSCALE) printf(" wscale:%d,%d", info->tcpi_snd_wscale, info->tcpi_rcv_wscale); @@ -1352,18 +1358,15 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r) printf(" cwnd:%d", info->tcpi_snd_cwnd); if (info->tcpi_snd_ssthresh < 0xFFFF) printf(" ssthresh:%d", info->tcpi_snd_ssthresh); - + rtt = (double) info->tcpi_rtt; - if (tb[TCPDIAG_VEGASINFO]) { + if (tb[INET_DIAG_VEGASINFO]) { const struct tcpvegas_info *vinfo - = RTA_DATA(tb[TCPDIAG_VEGASINFO]); + = RTA_DATA(tb[INET_DIAG_VEGASINFO]); - if (vinfo->tcpv_enabled) - printf(" vegas"); - - if (vinfo->tcpv_rtt && - vinfo->tcpv_rtt != 0x7fffffff) - rtt = vinfo->tcpv_rtt; + if (vinfo->tcpv_enabled && + vinfo->tcpv_rtt && vinfo->tcpv_rtt != 0x7fffffff) + rtt = vinfo->tcpv_rtt; } if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) { @@ -1383,20 +1386,20 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct tcpdiagmsg *r) int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f) { - struct tcpdiagmsg *r = NLMSG_DATA(nlh); + struct inet_diag_msg *r = NLMSG_DATA(nlh); struct tcpstat s; - s.state = r->tcpdiag_state; - s.local.family = s.remote.family = r->tcpdiag_family; - s.lport = ntohs(r->id.tcpdiag_sport); - s.rport = ntohs(r->id.tcpdiag_dport); + s.state = r->idiag_state; + s.local.family = s.remote.family = r->idiag_family; + s.lport = ntohs(r->id.idiag_sport); + s.rport = ntohs(r->id.idiag_dport); if (s.local.family == AF_INET) { s.local.bytelen = s.remote.bytelen = 4; } else { s.local.bytelen = s.remote.bytelen = 16; } - memcpy(s.local.data, r->id.tcpdiag_src, s.local.bytelen); - memcpy(s.remote.data, r->id.tcpdiag_dst, s.local.bytelen); + memcpy(s.local.data, r->id.idiag_src, s.local.bytelen); + memcpy(s.remote.data, r->id.idiag_dst, s.local.bytelen); if (f && f->f && run_ssfilter(f->f, &s) == 0) return 0; @@ -1406,33 +1409,33 @@ int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f) if (state_width) printf("%-*s ", state_width, sstate_name[s.state]); - printf("%-6d %-6d ", r->tcpdiag_rqueue, r->tcpdiag_wqueue); + printf("%-6d %-6d ", r->idiag_rqueue, r->idiag_wqueue); formatted_print(&s.local, s.lport); formatted_print(&s.remote, s.rport); if (show_options) { - if (r->tcpdiag_timer) { - if (r->tcpdiag_timer > 4) - r->tcpdiag_timer = 5; + if (r->idiag_timer) { + if (r->idiag_timer > 4) + r->idiag_timer = 5; printf(" timer:(%s,%s,%d)", - tmr_name[r->tcpdiag_timer], - print_ms_timer(r->tcpdiag_expires), - r->tcpdiag_retrans); + tmr_name[r->idiag_timer], + print_ms_timer(r->idiag_expires), + r->idiag_retrans); } } if (show_users) { char ubuf[4096]; - if (find_users(r->tcpdiag_inode, ubuf, sizeof(ubuf)) > 0) + if (find_users(r->idiag_inode, ubuf, sizeof(ubuf)) > 0) printf(" users:(%s)", ubuf); } if (show_details) { - if (r->tcpdiag_uid) - printf(" uid:%u", (unsigned)r->tcpdiag_uid); - printf(" ino:%u", (unsigned)r->tcpdiag_inode); - printf(" sk:%08x", r->id.tcpdiag_cookie[0]); - if (r->id.tcpdiag_cookie[1] != 0) - printf("%08x", r->id.tcpdiag_cookie[1]); + if (r->idiag_uid) + printf(" uid:%u", (unsigned)r->idiag_uid); + printf(" ino:%u", (unsigned)r->idiag_inode); + printf(" sk:%08x", r->id.idiag_cookie[0]); + if (r->id.idiag_cookie[1] != 0) + printf("%08x", r->id.idiag_cookie[1]); } if (show_mem || show_tcpinfo) { printf("\n\t"); @@ -1442,16 +1445,15 @@ int tcp_show_sock(struct nlmsghdr *nlh, struct filter *f) printf("\n"); return 0; - } -int tcp_show_netlink(struct filter *f, FILE *dump_fp) +int tcp_show_netlink(struct filter *f, FILE *dump_fp, int socktype) { int fd; struct sockaddr_nl nladdr; struct { struct nlmsghdr nlh; - struct tcpdiagreq r; + struct inet_diag_req r; } req; char *bc = NULL; int bclen; @@ -1460,32 +1462,36 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp) char buf[8192]; struct iovec iov[3]; - if ((fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_TCPDIAG)) < 0) + if ((fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_INET_DIAG)) < 0) return -1; memset(&nladdr, 0, sizeof(nladdr)); nladdr.nl_family = AF_NETLINK; req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = TCPDIAG_GETSOCK; + req.nlh.nlmsg_type = socktype; req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST; req.nlh.nlmsg_pid = 0; req.nlh.nlmsg_seq = 123456; memset(&req.r, 0, sizeof(req.r)); - req.r.tcpdiag_family = AF_INET; - req.r.tcpdiag_states = f->states; + req.r.idiag_family = AF_INET; + req.r.idiag_states = f->states; if (show_mem) - req.r.tcpdiag_ext |= (1<<(TCPDIAG_MEMINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_MEMINFO-1)); if (show_tcpinfo) { - req.r.tcpdiag_ext |= (1<<(TCPDIAG_INFO-1)); - req.r.tcpdiag_ext |= (1<<(TCPDIAG_VEGASINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_INFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_VEGASINFO-1)); + req.r.idiag_ext |= (1<<(INET_DIAG_CONG-1)); } - iov[0] = (struct iovec){ &req, sizeof(req) }; + iov[0] = (struct iovec){ + .iov_base = &req, + .iov_len = sizeof(req) + }; if (f->f) { bclen = ssfilter_bytecompile(f->f, &bc); - rta.rta_type = TCPDIAG_REQ_BYTECODE; + rta.rta_type = INET_DIAG_REQ_BYTECODE; rta.rta_len = RTA_LENGTH(bclen); iov[1] = (struct iovec){ &rta, sizeof(rta) }; iov[2] = (struct iovec){ bc, bclen }; @@ -1493,17 +1499,19 @@ int tcp_show_netlink(struct filter *f, FILE *dump_fp) } msg = (struct msghdr) { - (void*)&nladdr, sizeof(nladdr), - iov, f->f ? 3 : 1, - NULL, 0, - 0 + .msg_name = (void*)&nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = f->f ? 3 : 1, }; if (sendmsg(fd, &msg, 0) < 0) return -1; - - iov[0] = (struct iovec){ buf, sizeof(buf) }; + iov[0] = (struct iovec){ + .iov_base = buf, + .iov_len = sizeof(buf) + }; while (1) { int status; @@ -1629,7 +1637,7 @@ int tcp_show_netlink_file(struct filter *f) } } -int tcp_show(struct filter *f) +int tcp_show(struct filter *f, int socktype) { int fd = -1; char *buf = NULL; @@ -1641,7 +1649,7 @@ int tcp_show(struct filter *f) return tcp_show_netlink_file(f); if (!getenv("PROC_NET_TCP") && !getenv("PROC_ROOT") - && tcp_show_netlink(f, NULL) == 0) + && tcp_show_netlink(f, NULL, socktype) == 0) return 0; /* Sigh... We have to parse /proc/net/tcp... */ @@ -2383,6 +2391,7 @@ static void usage(void) " -0, --packet display PACKET sockets\n" " -t, --tcp display only TCP sockets\n" " -u, --udp display only UDP sockets\n" +" -d, --dccp display only DCCP sockets\n" " -w, --raw display only RAW sockets\n" " -x, --unix display only Unix domain sockets\n" " -f, --family=FAMILY display sockets of type FAMILY\n" @@ -2432,6 +2441,7 @@ static const struct option long_opts[] = { { "memory", 0, 0, 'm' }, { "info", 0, 0, 'i' }, { "processes", 0, 0, 'p' }, + { "dccp", 0, 0, 'd' }, { "tcp", 0, 0, 't' }, { "udp", 0, 0, 'u' }, { "raw", 0, 0, 'w' }, @@ -2466,7 +2476,7 @@ int main(int argc, char *argv[]) current_filter.states = default_filter.states; - while ((ch = getopt_long(argc, argv, "haletuwxnro460spf:miA:D:F:vV", + while ((ch = getopt_long(argc, argv, "dhaletuwxnro460spf:miA:D:F:vV", long_opts, NULL)) != EOF) { switch(ch) { case 'n': @@ -2491,6 +2501,10 @@ int main(int argc, char *argv[]) case 'p': show_users++; break; + case 'd': + current_filter.dbs |= (1<$@ +all: $(DISTGEN) $(DISTDATA) -%.dist: %.dat - ./maketable $< >$@ +$(DISTGEN): + $(HOSTCC) $(CCOPTS) -I../include -o $@ $@.c -lm -all: $(DISTGEN) $(DISTDATA) +%.dist: % + ./$* > $@ + +experimental.dist: maketable experimental.dat + ./maketable experimental.dat > experimental.dist install: all mkdir -p $(DESTDIR)/usr/lib/tc @@ -20,8 +23,3 @@ install: all clean: rm -f $(DISTDATA) $(DISTGEN) - -maketable: maketable.c - $(HOSTCC) $(CFLAGS) -o $@ $< -lm - - diff --git a/netem/normal.c b/netem/normal.c index e6683db..dbdebb1 100644 --- a/netem/normal.c +++ b/netem/normal.c @@ -20,21 +20,16 @@ normal(double x, double mu, double sigma) return .5 + .5*erf((x-mu)/(sqrt(2.0)*sigma)); } + int main(int argc, char **argv) { - double x, *table; int i, n; - - table = calloc(sizeof(double), TABLESIZE+1); - if (!table) { - fprintf(stderr, "Not enough memory\n"); - return 1; - } - + double x; + double table[TABLESIZE+1]; for (x = -10.0; x < 10.05; x += .00005) { - i = (int)rint(TABLESIZE*normal(x, 0.0, 1.0)); + i = rint(TABLESIZE * normal(x, 0.0, 1.0)); table[i] = x; } @@ -51,6 +46,6 @@ main(int argc, char **argv) n = 0; } } - free(table); + return 0; } diff --git a/netem/paretonormal.c b/netem/paretonormal.c index c793df6..ed75f28 100644 --- a/netem/paretonormal.c +++ b/netem/paretonormal.c @@ -29,7 +29,6 @@ normal(double x, double mu, double sigma) return .5 + .5*erf((x-mu)/(sqrt(2.0)*sigma)); } - static const double a=3.0; static int @@ -50,18 +49,12 @@ paretovalue(int i) int main(int argc, char **argv) { - double x; - double *table; int i,n; - - table = calloc(TABLESIZE+1, sizeof(double)); - if (!table) { - fprintf(stderr, "Out of memory!\n"); - exit(1); - } + double x; + double table[TABLESIZE+1]; for (x = -10.0; x < 10.05; x += .00005) { - i = (int)rint(TABLESIZE*normal(x, 0.0, 1.0)); + i = rint(TABLESIZE*normal(x, 0.0, 1.0)); table[i] = x; } printf( @@ -84,7 +77,6 @@ main(int argc, char **argv) n = 0; } } - free(table); return 0; } diff --git a/tc/Makefile b/tc/Makefile index 06546f9..9d618ff 100644 --- a/tc/Makefile +++ b/tc/Makefile @@ -1,5 +1,6 @@ TCOBJ= tc.o tc_qdisc.o tc_class.o tc_filter.o tc_util.o \ - m_police.o m_estimator.o m_action.o + m_police.o m_estimator.o m_action.o m_ematch.o \ + emp_ematch.yacc.o emp_ematch.lex.o include ../Config @@ -14,6 +15,7 @@ TCMODULES += f_rsvp.o TCMODULES += f_u32.o TCMODULES += f_route.o TCMODULES += f_fw.o +TCMODULES += f_basic.o TCMODULES += q_dsmark.o TCMODULES += q_gred.o TCMODULES += f_tcindex.o @@ -28,6 +30,10 @@ TCMODULES += p_ip.o TCMODULES += p_icmp.o TCMODULES += p_tcp.o TCMODULES += p_udp.o +TCMODULES += em_nbyte.o +TCMODULES += em_cmp.o +TCMODULES += em_u32.o +TCMODULES += em_meta.o TCOBJ += $(TCMODULES) @@ -48,6 +54,9 @@ LDLIBS += -L. -ltc -lm -ldl LDFLAGS += -Wl,-export-dynamic +YACC := bison +LEX := flex + %.so: %.c $(CC) $(CFLAGS) -shared -fpic $< -o $@ @@ -67,8 +76,14 @@ install: all done clean: - rm -f $(TCOBJ) $(TCLIB) libtc.a tc *.so + rm -f $(TCOBJ) $(TCLIB) libtc.a tc *.so emp_ematch.yacc.h; \ + rm -f emp_ematch.yacc.output q_atm.so: q_atm.c $(CC) $(CFLAGS) -shared -fpic -o q_atm.so q_atm.c -latm +%.yacc.c: %.y + $(YACC) $(YACCFLAGS) -o $@ $< + +%.lex.c: %.l + $(LEX) $(LEXFLAGS) -o$@ $< diff --git a/tc/em_cmp.c b/tc/em_cmp.c new file mode 100644 index 0000000..c636c53 --- /dev/null +++ b/tc/em_cmp.c @@ -0,0 +1,188 @@ +/* + * em_cmp.c Simle coparison Ematch + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "m_ematch.h" +#include + +extern struct ematch_util cmp_ematch_util; + +static void cmp_print_usage(FILE *fd) +{ + fprintf(fd, + "Usage: cmp(ALIGN at OFFSET [ ATTRS ] { eq | lt | gt } VALUE)\n" \ + "where: ALIGN := { u8 | u16 | u32 }\n" \ + " ATTRS := [ layer LAYER ] [ mask MASK ] [ trans ]\n" \ + " LAYER := { link | header | next-header | 0..%d }\n" \ + "\n" \ + "Example: cmp(u16 at 3 layer 2 mask 0xff00 gt 20)\n", + TCF_LAYER_MAX); +} + +static int cmp_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr, + struct bstr *args) +{ + struct bstr *a; + int align, opnd = 0; + unsigned long offset = 0, layer = TCF_LAYER_NETWORK, mask = 0, value = 0; + int offset_present = 0, value_present = 0; + struct tcf_em_cmp cmp; + + memset(&cmp, 0, sizeof(cmp)); + +#define PARSE_ERR(CARG, FMT, ARGS...) \ + em_parse_error(EINVAL, args, CARG, &cmp_ematch_util, FMT ,##ARGS) + + if (args == NULL) + return PARSE_ERR(args, "cmp: missing arguments"); + + if (!bstrcmp(args, "u8")) + align = TCF_EM_ALIGN_U8; + else if (!bstrcmp(args, "u16")) + align = TCF_EM_ALIGN_U16; + else if (!bstrcmp(args, "u32")) + align = TCF_EM_ALIGN_U32; + else + return PARSE_ERR(args, "cmp: invalid alignment"); + + for (a = bstr_next(args); a; a = bstr_next(a)) { + if (!bstrcmp(a, "at")) { + if (a->next == NULL) + return PARSE_ERR(a, "cmp: missing argument"); + a = bstr_next(a); + + offset = bstrtoul(a); + if (offset == ULONG_MAX) + return PARSE_ERR(a, "cmp: invalid offset, " \ + "must be numeric"); + + offset_present = 1; + } else if (!bstrcmp(a, "layer")) { + if (a->next == NULL) + return PARSE_ERR(a, "cmp: missing argument"); + a = bstr_next(a); + + layer = parse_layer(a); + if (layer == INT_MAX) { + layer = bstrtoul(a); + if (layer == ULONG_MAX) + return PARSE_ERR(a, "cmp: invalid " \ + "layer"); + } + + if (layer > TCF_LAYER_MAX) + return PARSE_ERR(a, "cmp: illegal layer, " \ + "must be in 0..%d", TCF_LAYER_MAX); + } else if (!bstrcmp(a, "mask")) { + if (a->next == NULL) + return PARSE_ERR(a, "cmp: missing argument"); + a = bstr_next(a); + + mask = bstrtoul(a); + if (mask == ULONG_MAX) + return PARSE_ERR(a, "cmp: invalid mask"); + } else if (!bstrcmp(a, "trans")) { + cmp.flags |= TCF_EM_CMP_TRANS; + } else if (!bstrcmp(a, "eq") || !bstrcmp(a, "gt") || + !bstrcmp(a, "lt")) { + + if (!bstrcmp(a, "eq")) + opnd = TCF_EM_OPND_EQ; + else if (!bstrcmp(a, "gt")) + opnd = TCF_EM_OPND_GT; + else if (!bstrcmp(a, "lt")) + opnd = TCF_EM_OPND_LT; + + if (a->next == NULL) + return PARSE_ERR(a, "cmp: missing argument"); + a = bstr_next(a); + + value = bstrtoul(a); + if (value == ULONG_MAX) + return PARSE_ERR(a, "cmp: invalid value"); + + value_present = 1; + } else + return PARSE_ERR(a, "nbyte: unknown parameter"); + } + + if (offset_present == 0 || value_present == 0) + return PARSE_ERR(a, "cmp: offset and value required"); + + cmp.val = (__u32) value; + cmp.mask = (__u32) mask; + cmp.off = (__u16) offset; + cmp.align = (__u8) align; + cmp.layer = (__u8) layer; + cmp.opnd = (__u8) opnd; + + addraw_l(n, MAX_MSG, hdr, sizeof(*hdr)); + addraw_l(n, MAX_MSG, &cmp, sizeof(cmp)); + +#undef PARSE_ERR + return 0; +} + +static int cmp_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data, + int data_len) +{ + struct tcf_em_cmp *cmp = data; + + if (data_len < sizeof(*cmp)) { + fprintf(stderr, "CMP header size mismatch\n"); + return -1; + } + + if (cmp->align == TCF_EM_ALIGN_U8) + fprintf(fd, "u8 "); + else if (cmp->align == TCF_EM_ALIGN_U16) + fprintf(fd, "u16 "); + else if (cmp->align == TCF_EM_ALIGN_U16) + fprintf(fd, "u32 "); + + fprintf(fd, "at %d layer %d ", cmp->off, cmp->layer); + + if (cmp->mask) + fprintf(fd, "mask 0x%x ", cmp->mask); + + if (cmp->flags & TCF_EM_CMP_TRANS) + fprintf(fd, "trans "); + + if (cmp->opnd == TCF_EM_OPND_EQ) + fprintf(fd, "eq "); + else if (cmp->opnd == TCF_EM_OPND_LT) + fprintf(fd, "lt "); + else if (cmp->opnd == TCF_EM_OPND_GT) + fprintf(fd, "gt "); + + fprintf(fd, "%d", cmp->val); + + return 0; +} + +struct ematch_util cmp_ematch_util = { + .kind = "cmp", + .kind_num = TCF_EM_CMP, + .parse_eopt = cmp_parse_eopt, + .print_eopt = cmp_print_eopt, + .print_usage = cmp_print_usage +}; diff --git a/tc/em_meta.c b/tc/em_meta.c new file mode 100644 index 0000000..bd1e151 --- /dev/null +++ b/tc/em_meta.c @@ -0,0 +1,550 @@ +/* + * em_meta.c Metadata Ematch + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "m_ematch.h" +#include + +extern struct ematch_util meta_ematch_util; + +static void meta_print_usage(FILE *fd) +{ + fprintf(fd, + "Usage: meta(OBJECT { eq | lt | gt } OBJECT)\n" \ + "where: OBJECT := { META_ID | VALUE }\n" \ + " META_ID := id [ shift SHIFT ] [ mask MASK ]\n" \ + "\n" \ + "Example: meta(nfmark gt 24)\n" \ + " meta(indev shift 1 eq \"ppp\"\n" \ + " meta(tcindex mask 0xf0 eq 0xf0)\n" \ + " meta(dev eq indev)\n" \ + "\n" \ + "For a list of meta identifiers, use meta(list).\n"); +} + +struct meta_entry { + int id; + char * kind; + char * mask; + char * desc; +} meta_table[] = { +#define TCF_META_ID_SECTION 0 +#define __A(id, name, mask, desc) { TCF_META_ID_##id, name, mask, desc } + __A(SECTION, "Generic", "", ""), + __A(RANDOM, "random", "i", + "Random value (32 bit)"), + __A(LOADAVG_0, "loadavg_1", "i", + "Load average in last minute"), + __A(LOADAVG_1, "loadavg_5", "i", + "Load average in last 5 minutes"), + __A(LOADAVG_2, "loadavg_15", "i", + "Load average in last 15 minutes"), + + __A(SECTION, "Interfaces", "", ""), + __A(DEV, "dev", "iv", + "Device the packet is on"), + __A(SECTION, "Packet attributes", "", ""), + __A(PRIORITY, "priority", "i", + "Priority of packet"), + __A(PROTOCOL, "protocol", "i", + "Link layer protocol"), + __A(PKTTYPE, "pkt_type", "i", + "Packet type (uni|multi|broad|...)cast"), + __A(PKTLEN, "pkt_len", "i", + "Length of packet"), + __A(DATALEN, "data_len", "i", + "Length of data in packet"), + __A(MACLEN, "mac_len", "i", + "Length of link layer header"), + + __A(SECTION, "Netfilter", "", ""), + __A(NFMARK, "nf_mark", "i", + "Netfilter mark"), + __A(NFMARK, "fwmark", "i", + "Alias for nf_mark"), + + __A(SECTION, "Traffic Control", "", ""), + __A(TCINDEX, "tc_index", "i", "TC Index"), + __A(SECTION, "Routing", "", ""), + __A(RTCLASSID, "rt_classid", "i", + "Routing ClassID (cls_route)"), + __A(RTIIF, "rt_iif", "i", + "Incoming interface index"), + + __A(SECTION, "Sockets", "", ""), + __A(SK_FAMILY, "sk_family", "i", "Address family"), + __A(SK_STATE, "sk_state", "i", "State"), + __A(SK_REUSE, "sk_reuse", "i", "Reuse Flag"), + __A(SK_BOUND_IF, "sk_bind_if", "iv", "Bound interface"), + __A(SK_REFCNT, "sk_refcnt", "i", "Reference counter"), + __A(SK_SHUTDOWN, "sk_shutdown", "i", "Shutdown mask"), + __A(SK_PROTO, "sk_proto", "i", "Protocol"), + __A(SK_TYPE, "sk_type", "i", "Type"), + __A(SK_RCVBUF, "sk_rcvbuf", "i", "Receive buffer size"), + __A(SK_RMEM_ALLOC, "sk_rmem", "i", "RMEM"), + __A(SK_WMEM_ALLOC, "sk_wmem", "i", "WMEM"), + __A(SK_OMEM_ALLOC, "sk_omem", "i", "OMEM"), + __A(SK_WMEM_QUEUED, "sk_wmem_queue","i", "WMEM queue"), + __A(SK_SND_QLEN, "sk_snd_queue", "i", "Send queue length"), + __A(SK_RCV_QLEN, "sk_rcv_queue", "i", "Receive queue length"), + __A(SK_ERR_QLEN, "sk_err_queue", "i", "Error queue length"), + __A(SK_FORWARD_ALLOCS, "sk_fwd_alloc", "i", "Forward allocations"), + __A(SK_SNDBUF, "sk_sndbuf", "i", "Send buffer size"), +#undef __A +}; + +static inline int map_type(char k) +{ + switch (k) { + case 'i': return TCF_META_TYPE_INT; + case 'v': return TCF_META_TYPE_VAR; + } + + fprintf(stderr, "BUG: Unknown map character '%c'\n", k); + return INT_MAX; +} + +static struct meta_entry * lookup_meta_entry(struct bstr *kind) +{ + int i; + + for (i = 0; i < (sizeof(meta_table)/sizeof(meta_table[0])); i++) + if (!bstrcmp(kind, meta_table[i].kind) && + meta_table[i].id != 0) + return &meta_table[i]; + + return NULL; +} + +static struct meta_entry * lookup_meta_entry_byid(int id) +{ + int i; + + for (i = 0; i < (sizeof(meta_table)/sizeof(meta_table[0])); i++) + if (meta_table[i].id == id) + return &meta_table[i]; + + return NULL; +} + +static inline void dump_value(struct nlmsghdr *n, int tlv, unsigned long val, + struct tcf_meta_val *hdr) +{ + __u32 t; + + switch (TCF_META_TYPE(hdr->kind)) { + case TCF_META_TYPE_INT: + t = val; + addattr_l(n, MAX_MSG, tlv, &t, sizeof(t)); + break; + + case TCF_META_TYPE_VAR: + if (TCF_META_ID(hdr->kind) == TCF_META_ID_VALUE) { + struct bstr *a = (struct bstr *) val; + addattr_l(n, MAX_MSG, tlv, a->data, a->len); + } + break; + } +} + +static inline int is_compatible(struct tcf_meta_val *what, + struct tcf_meta_val *needed) +{ + char *p; + struct meta_entry *entry; + + entry = lookup_meta_entry_byid(TCF_META_ID(what->kind)); + + if (entry == NULL) + return 0; + + for (p = entry->mask; p; p++) + if (map_type(*p) == TCF_META_TYPE(needed->kind)) + return 1; + + return 0; +} + +static void list_meta_ids(FILE *fd) +{ + int i; + + fprintf(fd, + "--------------------------------------------------------\n" \ + " ID Type Description\n" \ + "--------------------------------------------------------"); + + for (i = 0; i < (sizeof(meta_table)/sizeof(meta_table[0])); i++) { + if (meta_table[i].id == TCF_META_ID_SECTION) { + fprintf(fd, "\n%s:\n", meta_table[i].kind); + } else { + char *p = meta_table[i].mask; + char buf[64] = {0}; + + fprintf(fd, " %-16s ", meta_table[i].kind); + + while (*p) { + int type = map_type(*p); + + switch (type) { + case TCF_META_TYPE_INT: + strcat(buf, "INT"); + break; + + case TCF_META_TYPE_VAR: + strcat(buf, "VAR"); + break; + } + + if (*(++p)) + strcat(buf, ","); + } + + fprintf(fd, "%-10s %s\n", buf, meta_table[i].desc); + } + } + + fprintf(fd, + "--------------------------------------------------------\n"); +} + +#undef TCF_META_ID_SECTION + +#define PARSE_FAILURE ((void *) (-1)) + +#define PARSE_ERR(CARG, FMT, ARGS...) \ + em_parse_error(EINVAL, args, CARG, &meta_ematch_util, FMT ,##ARGS) + +static inline int can_adopt(struct tcf_meta_val *val) +{ + return !!TCF_META_ID(val->kind); +} + +static inline int overwrite_type(struct tcf_meta_val *src, + struct tcf_meta_val *dst) +{ + return (TCF_META_TYPE(dst->kind) << 12) | TCF_META_ID(src->kind); +} + + +static inline struct bstr * +parse_object(struct bstr *args, struct bstr *arg, struct tcf_meta_val *obj, + unsigned long *dst, struct tcf_meta_val *left) +{ + struct meta_entry *entry; + unsigned long num; + struct bstr *a; + + if (arg->quoted) { + obj->kind = TCF_META_TYPE_VAR << 12; + obj->kind |= TCF_META_ID_VALUE; + *dst = (unsigned long) arg; + return bstr_next(arg); + } + + num = bstrtoul(arg); + if (num != LONG_MAX) { + obj->kind = TCF_META_TYPE_INT << 12; + obj->kind |= TCF_META_ID_VALUE; + *dst = (unsigned long) num; + return bstr_next(arg); + } + + entry = lookup_meta_entry(arg); + + if (entry == NULL) { + PARSE_ERR(arg, "meta: unknown meta id\n"); + return PARSE_FAILURE; + } + + obj->kind = entry->id | (map_type(entry->mask[0]) << 12); + + if (left) { + struct tcf_meta_val *right = obj; + + if (TCF_META_TYPE(right->kind) == TCF_META_TYPE(left->kind)) + goto compatible; + + if (can_adopt(left) && !can_adopt(right)) { + if (is_compatible(left, right)) + left->kind = overwrite_type(left, right); + else + goto not_compatible; + } else if (can_adopt(right) && !can_adopt(left)) { + if (is_compatible(right, left)) + right->kind = overwrite_type(right, left); + else + goto not_compatible; + } else if (can_adopt(left) && can_adopt(right)) { + if (is_compatible(left, right)) + left->kind = overwrite_type(left, right); + else if (is_compatible(right, left)) + right->kind = overwrite_type(right, left); + else + goto not_compatible; + } else + goto not_compatible; + } + +compatible: + + a = bstr_next(arg); + + while(a) { + if (!bstrcmp(a, "shift")) { + unsigned long shift; + + if (a->next == NULL) { + PARSE_ERR(a, "meta: missing argument"); + return PARSE_FAILURE; + } + a = bstr_next(a); + + shift = bstrtoul(a); + if (shift == LONG_MAX) { + PARSE_ERR(a, "meta: invalid shift, must " \ + "be numeric"); + return PARSE_FAILURE; + } + + obj->shift = (__u8) shift; + a = bstr_next(a); + } else if (!bstrcmp(a, "mask")) { + unsigned long mask; + + if (a->next == NULL) { + PARSE_ERR(a, "meta: missing argument"); + return PARSE_FAILURE; + } + a = bstr_next(a); + + mask = bstrtoul(a); + if (mask == LONG_MAX) { + PARSE_ERR(a, "meta: invalid mask, must be " \ + "numeric"); + return PARSE_FAILURE; + } + *dst = (unsigned long) mask; + a = bstr_next(a); + } else + break; + } + + return a; + +not_compatible: + PARSE_ERR(arg, "lvalue and rvalue are not compatible."); + return PARSE_FAILURE; +} + +static int meta_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr, + struct bstr *args) +{ + int opnd; + struct bstr *a; + struct tcf_meta_hdr meta_hdr; + unsigned long lvalue = 0, rvalue = 0; + + memset(&meta_hdr, 0, sizeof(meta_hdr)); + + if (args == NULL) + return PARSE_ERR(args, "meta: missing arguments"); + + if (!bstrcmp(args, "list")) { + list_meta_ids(stderr); + return -1; + } + + a = parse_object(args, args, &meta_hdr.left, &lvalue, NULL); + if (a == PARSE_FAILURE) + return -1; + else if (a == NULL) + return PARSE_ERR(args, "meta: missing operand"); + + if (!bstrcmp(a, "eq")) + opnd = TCF_EM_OPND_EQ; + else if (!bstrcmp(a, "gt")) + opnd = TCF_EM_OPND_GT; + else if (!bstrcmp(a, "lt")) + opnd = TCF_EM_OPND_LT; + else + return PARSE_ERR(a, "meta: invalid operand"); + + meta_hdr.left.op = (__u8) opnd; + + if (a->next == NULL) + return PARSE_ERR(args, "meta: missing rvalue"); + a = bstr_next(a); + + a = parse_object(args, a, &meta_hdr.right, &rvalue, &meta_hdr.left); + if (a == PARSE_FAILURE) + return -1; + else if (a != NULL) + return PARSE_ERR(a, "meta: unexpected trailer"); + + + addraw_l(n, MAX_MSG, hdr, sizeof(*hdr)); + + addattr_l(n, MAX_MSG, TCA_EM_META_HDR, &meta_hdr, sizeof(meta_hdr)); + + if (lvalue) + dump_value(n, TCA_EM_META_LVALUE, lvalue, &meta_hdr.left); + + if (rvalue) + dump_value(n, TCA_EM_META_RVALUE, rvalue, &meta_hdr.right); + + return 0; +} +#undef PARSE_ERR + +static inline void print_binary(FILE *fd, unsigned char *str, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (!isprint(str[i])) + goto binary; + + for (i = 0; i < len; i++) + fprintf(fd, "%c", str[i]); + return; + +binary: + for (i = 0; i < len; i++) + fprintf(fd, "%02x ", str[i]); + + fprintf(fd, "\""); + for (i = 0; i < len; i++) + fprintf(fd, "%c", isprint(str[i]) ? str[i] : '.'); + fprintf(fd, "\""); +} + +static inline int print_value(FILE *fd, int type, struct rtattr *rta) +{ + if (rta == NULL) { + fprintf(stderr, "Missing value TLV\n"); + return -1; + } + + switch(type) { + case TCF_META_TYPE_INT: + if (RTA_PAYLOAD(rta) < sizeof(__u32)) { + fprintf(stderr, "meta int type value TLV " \ + "size mismatch.\n"); + return -1; + } + fprintf(fd, "%d", *(__u32 *) RTA_DATA(rta)); + break; + + case TCF_META_TYPE_VAR: + print_binary(fd, RTA_DATA(rta), RTA_PAYLOAD(rta)); + break; + } + + return 0; +} + +static int print_object(FILE *fd, struct tcf_meta_val *obj, struct rtattr *rta) +{ + int id = TCF_META_ID(obj->kind); + int type = TCF_META_TYPE(obj->kind); + struct meta_entry *entry; + + if (id == TCF_META_ID_VALUE) + return print_value(fd, type, rta); + + entry = lookup_meta_entry_byid(id); + + if (entry == NULL) + fprintf(fd, "[unknown meta id %d]", id); + else + fprintf(fd, "%s", entry->kind); + + if (obj->shift) + fprintf(fd, " shift %d", obj->shift); + + switch (type) { + case TCF_META_TYPE_INT: + if (rta) { + if (RTA_PAYLOAD(rta) < sizeof(__u32)) + goto size_mismatch; + + fprintf(fd, " mask 0x%08x", + *(__u32*) RTA_DATA(rta)); + } + break; + } + + return 0; + +size_mismatch: + fprintf(stderr, "meta int type mask TLV size mismatch\n"); + return -1; +} + + +static int meta_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data, + int data_len) +{ + struct rtattr *tb[TCA_EM_META_MAX+1]; + struct tcf_meta_hdr *meta_hdr; + + if (parse_rtattr(tb, TCA_EM_META_MAX, data, data_len) < 0) + return -1; + + if (tb[TCA_EM_META_HDR] == NULL) { + fprintf(stderr, "Missing meta header\n"); + return -1; + } + + if (RTA_PAYLOAD(tb[TCA_EM_META_HDR]) < sizeof(*meta_hdr)) { + fprintf(stderr, "Meta header size mismatch\n"); + return -1; + } + + meta_hdr = RTA_DATA(tb[TCA_EM_META_HDR]); + + if (print_object(fd, &meta_hdr->left, tb[TCA_EM_META_LVALUE]) < 0) + return -1; + + switch (meta_hdr->left.op) { + case TCF_EM_OPND_EQ: + fprintf(fd, " eq "); + break; + case TCF_EM_OPND_LT: + fprintf(fd, " lt "); + break; + case TCF_EM_OPND_GT: + fprintf(fd, " gt "); + break; + } + + return print_object(fd, &meta_hdr->right, tb[TCA_EM_META_RVALUE]); +} + +struct ematch_util meta_ematch_util = { + .kind = "meta", + .kind_num = TCF_EM_META, + .parse_eopt = meta_parse_eopt, + .print_eopt = meta_print_eopt, + .print_usage = meta_print_usage +}; diff --git a/tc/em_nbyte.c b/tc/em_nbyte.c new file mode 100644 index 0000000..e0ed5ba --- /dev/null +++ b/tc/em_nbyte.c @@ -0,0 +1,144 @@ +/* + * em_nbyte.c N-Byte Ematch + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "m_ematch.h" +#include + +extern struct ematch_util nbyte_ematch_util; + +static void nbyte_print_usage(FILE *fd) +{ + fprintf(fd, + "Usage: nbyte(NEEDLE at OFFSET [layer LAYER])\n" \ + "where: NEEDLE := { string | \"c-escape-sequence\" }\n" \ + " OFFSET := int\n" \ + " LAYER := { link | header | next-header | 0..%d }\n" \ + "\n" \ + "Example: nbyte(\"ababa\" at 12 layer 1)\n", + TCF_LAYER_MAX); +} + +static int nbyte_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr, + struct bstr *args) +{ + struct bstr *a; + struct bstr *needle = args; + unsigned long offset = 0, layer = TCF_LAYER_NETWORK; + int offset_present = 0; + struct tcf_em_nbyte nb; + + memset(&nb, 0, sizeof(nb)); + +#define PARSE_ERR(CARG, FMT, ARGS...) \ + em_parse_error(EINVAL, args, CARG, &nbyte_ematch_util, FMT ,##ARGS) + + if (args == NULL) + return PARSE_ERR(args, "nbyte: missing arguments"); + + if (needle->len <= 0) + return PARSE_ERR(args, "nbyte: needle length is 0"); + + for (a = bstr_next(args); a; a = bstr_next(a)) { + if (!bstrcmp(a, "at")) { + if (a->next == NULL) + return PARSE_ERR(a, "nbyte: missing argument"); + a = bstr_next(a); + + offset = bstrtoul(a); + if (offset == ULONG_MAX) + return PARSE_ERR(a, "nbyte: invalid offset, " \ + "must be numeric"); + + offset_present = 1; + } else if (!bstrcmp(a, "layer")) { + if (a->next == NULL) + return PARSE_ERR(a, "nbyte: missing argument"); + a = bstr_next(a); + + layer = parse_layer(a); + if (layer == INT_MAX) { + layer = bstrtoul(a); + if (layer == ULONG_MAX) + return PARSE_ERR(a, "nbyte: invalid " \ + "layer"); + } + + if (layer > TCF_LAYER_MAX) + return PARSE_ERR(a, "nbyte: illegal layer, " \ + "must be in 0..%d", TCF_LAYER_MAX); + } else + return PARSE_ERR(a, "nbyte: unknown parameter"); + } + + if (offset_present == 0) + return PARSE_ERR(a, "nbyte: offset required"); + + nb.len = needle->len; + nb.layer = (__u8) layer; + nb.off = (__u16) offset; + + addraw_l(n, MAX_MSG, hdr, sizeof(*hdr)); + addraw_l(n, MAX_MSG, &nb, sizeof(nb)); + addraw_l(n, MAX_MSG, needle->data, needle->len); + +#undef PARSE_ERR + return 0; +} + +static int nbyte_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data, + int data_len) +{ + int i; + struct tcf_em_nbyte *nb = data; + __u8 *needle; + + if (data_len < sizeof(*nb)) { + fprintf(stderr, "NByte header size mismatch\n"); + return -1; + } + + if (data_len < sizeof(*nb) + nb->len) { + fprintf(stderr, "NByte payload size mismatch\n"); + return -1; + } + + needle = data + sizeof(*nb); + + for (i = 0; i < nb->len; i++) + fprintf(fd, "%02x ", needle[i]); + + fprintf(fd, "\""); + for (i = 0; i < nb->len; i++) + fprintf(fd, "%c", isprint(needle[i]) ? needle[i] : '.'); + fprintf(fd, "\" at %d layer %d", nb->off, nb->layer); + + return 0; +} + +struct ematch_util nbyte_ematch_util = { + .kind = "nbyte", + .kind_num = TCF_EM_NBYTE, + .parse_eopt = nbyte_parse_eopt, + .print_eopt = nbyte_print_eopt, + .print_usage = nbyte_print_usage +}; diff --git a/tc/em_u32.c b/tc/em_u32.c new file mode 100644 index 0000000..b8857f1 --- /dev/null +++ b/tc/em_u32.c @@ -0,0 +1,178 @@ +/* + * em_u32.c U32 Ematch + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "m_ematch.h" + +extern struct ematch_util u32_ematch_util; + +static void u32_print_usage(FILE *fd) +{ + fprintf(fd, + "Usage: u32(ALIGN VALUE MASK at [ nexthdr+ ] OFFSET)\n" \ + "where: ALIGN := { u8 | u16 | u32 }\n" \ + "\n" \ + "Example: u32(u16 0x1122 0xffff at nexthdr+4)\n"); +} + +static int u32_parse_eopt(struct nlmsghdr *n, struct tcf_ematch_hdr *hdr, + struct bstr *args) +{ + struct bstr *a; + int align, nh_len; + unsigned long key, mask, offmask = 0, offset; + struct tc_u32_key u_key; + + memset(&u_key, 0, sizeof(u_key)); + +#define PARSE_ERR(CARG, FMT, ARGS...) \ + em_parse_error(EINVAL, args, CARG, &u32_ematch_util, FMT ,##ARGS) + + if (args == NULL) + return PARSE_ERR(args, "u32: missing arguments"); + + if (!bstrcmp(args, "u8")) + align = 1; + else if (!bstrcmp(args, "u16")) + align = 2; + else if (!bstrcmp(args, "u32")) + align = 4; + else + return PARSE_ERR(args, "u32: invalid alignment"); + + a = bstr_next(args); + if (a == NULL) + return PARSE_ERR(a, "u32: missing key"); + + key = bstrtoul(a); + if (key == ULONG_MAX) + return PARSE_ERR(a, "u32: invalid key, must be numeric"); + + a = bstr_next(a); + if (a == NULL) + return PARSE_ERR(a, "u32: missing mask"); + + mask = bstrtoul(a); + if (mask == ULONG_MAX) + return PARSE_ERR(a, "u32: invalid mask, must be numeric"); + + a = bstr_next(a); + if (a == NULL || bstrcmp(a, "at") != 0) + return PARSE_ERR(a, "u32: missing \"at\""); + + a = bstr_next(a); + if (a == NULL) + return PARSE_ERR(a, "u32: missing offset"); + + nh_len = strlen("nexthdr+"); + if (a->len > nh_len && !memcmp(a->data, "nexthdr+", nh_len)) { + char buf[a->len - nh_len + 1]; + offmask = -1; + memcpy(buf, a->data + nh_len, a->len - nh_len); + offset = strtoul(buf, NULL, 0); + } else if (!bstrcmp(a, "nexthdr+")) { + a = bstr_next(a); + if (a == NULL) + return PARSE_ERR(a, "u32: missing offset"); + offset = bstrtoul(a); + } else + offset = bstrtoul(a); + + if (offset == ULONG_MAX) + return PARSE_ERR(a, "u32: invalid offset"); + + if (a->next) + return PARSE_ERR(a->next, "u32: unexpected trailer"); + + switch (align) { + case 1: + if (key > 0xFF) + return PARSE_ERR(a, "Illegal key (>0xFF)"); + if (mask > 0xFF) + return PARSE_ERR(a, "Illegal mask (>0xFF)"); + + key <<= 24 - ((offset & 3) * 8); + mask <<= 24 - ((offset & 3) * 8); + offset &= ~3; + break; + + case 2: + if (key > 0xFFFF) + return PARSE_ERR(a, "Illegal key (>0xFFFF)"); + if (mask > 0xFFFF) + return PARSE_ERR(a, "Illegal mask (>0xFFFF)"); + + if ((offset & 3) == 0) { + key <<= 16; + mask <<= 16; + } + offset &= ~3; + break; + } + + key = htonl(key); + mask = htonl(mask); + + if (offset % 4) + return PARSE_ERR(a, "u32: invalid offset alignment, " \ + "must be aligned to 4."); + + key &= mask; + + u_key.mask = mask; + u_key.val = key; + u_key.off = offset; + u_key.offmask = offmask; + + addraw_l(n, MAX_MSG, hdr, sizeof(*hdr)); + addraw_l(n, MAX_MSG, &u_key, sizeof(u_key)); + +#undef PARSE_ERR + return 0; +} + +static int u32_print_eopt(FILE *fd, struct tcf_ematch_hdr *hdr, void *data, + int data_len) +{ + struct tc_u32_key *u_key = data; + + if (data_len < sizeof(*u_key)) { + fprintf(stderr, "U32 header size mismatch\n"); + return -1; + } + + fprintf(fd, "%08x/%08x at %s%d", + (unsigned int) ntohl(u_key->val), + (unsigned int) ntohl(u_key->mask), + u_key->offmask ? "nexthdr+" : "", + u_key->off); + + return 0; +} + +struct ematch_util u32_ematch_util = { + .kind = "u32", + .kind_num = TCF_EM_U32, + .parse_eopt = u32_parse_eopt, + .print_eopt = u32_print_eopt, + .print_usage = u32_print_usage +}; diff --git a/tc/emp_ematch.l b/tc/emp_ematch.l new file mode 100644 index 0000000..09d535d --- /dev/null +++ b/tc/emp_ematch.l @@ -0,0 +1,145 @@ +%{ + #include "emp_ematch.yacc.h" + #include "m_ematch.h" + + extern int ematch_argc; + extern char **ematch_argv; + + #define yylval ematch_lval + + #define NEXT_EM_ARG() do { ematch_argc--; ematch_argv++; } while(0); + + #define YY_INPUT(buf, result, max_size) \ + { \ + next: \ + if (ematch_argc <= 0) \ + result = YY_NULL; \ + else if (**ematch_argv == '\0') { \ + NEXT_EM_ARG(); \ + goto next; \ + } else { \ + if (max_size <= strlen(*ematch_argv) + 1) { \ + fprintf(stderr, "match argument too long.\n"); \ + result = YY_NULL; \ + } else { \ + strcpy(buf, *ematch_argv); \ + result = strlen(*ematch_argv) + 1; \ + buf[result-1] = ' '; \ + buf[result] = '\0'; \ + NEXT_EM_ARG(); \ + } \ + } \ + } + + static void __attribute__ ((unused)) yyunput (int c,char *buf_ptr ); + static void __attribute__ ((unused)) yy_push_state (int new_state ); + static void __attribute__ ((unused)) yy_pop_state (void); + static int __attribute__ ((unused)) yy_top_state (void ); + + static char *strbuf; + static unsigned int strbuf_size; + static unsigned int strbuf_index; + + static void strbuf_enlarge(void) + { + strbuf_size += 512; + strbuf = realloc(strbuf, strbuf_size); + } + + static void strbuf_append_char(char c) + { + while (strbuf_index >= strbuf_size) + strbuf_enlarge(); + strbuf[strbuf_index++] = c; + } + + static void strbuf_append_charp(char *s) + { + while (strbuf_index >= strbuf_size) + strbuf_enlarge(); + memcpy(strbuf + strbuf_index, s, strlen(s)); + strbuf_index += strlen(s); + } + +%} + +%x str + +%option 8bit stack warn noyywrap prefix="ematch_" +%% +[ \t\r\n]+ + +\" { + if (strbuf == NULL) { + strbuf_size = 512; + strbuf = calloc(1, strbuf_size); + if (strbuf == NULL) + return ERROR; + } + strbuf_index = 0; + + BEGIN(str); + } + +\" { + BEGIN(INITIAL); + yylval.b = bstr_new(strbuf, strbuf_index); + yylval.b->quoted = 1; + return ATTRIBUTE; + } + +\\[0-7]{1,3} { /* octal escape sequence */ + int res; + + sscanf(yytext + 1, "%o", &res); + if (res > 0xFF) { + fprintf(stderr, "error: octal escape sequence" \ + " out of range\n"); + return ERROR; + } + strbuf_append_char((unsigned char) res); + } + +\\[0-9]+ { /* catch wrong octal escape seq. */ + fprintf(stderr, "error: invalid octale escape sequence\n"); + return ERROR; + } + +\\x[0-9a-fA-F]{1,2} { + int res; + + sscanf(yytext + 2, "%x", &res); + + if (res > 0xFF) { + fprintf(stderr, "error: hexadecimal escape " \ + "sequence out of range\n"); + return ERROR; + } + strbuf_append_char((unsigned char) res); + } + +\\n strbuf_append_char('\n'); +\\r strbuf_append_char('\r'); +\\t strbuf_append_char('\t'); +\\v strbuf_append_char('\v'); +\\b strbuf_append_char('\b'); +\\f strbuf_append_char('\f'); +\\a strbuf_append_char('\a'); + +\\(.|\n) strbuf_append_char(yytext[1]); +[^\\\n\"]+ strbuf_append_charp(yytext); + +[aA][nN][dD] return AND; +[oO][rR] return OR; +[nN][oO][tT] return NOT; +"(" | +")" { + return yylval.i = *yytext; + } +[^ \t\r\n()]+ { + yylval.b = bstr_alloc(yytext); + if (yylval.b == NULL) + return ERROR; + return ATTRIBUTE; + } +%% diff --git a/tc/emp_ematch.y b/tc/emp_ematch.y new file mode 100644 index 0000000..e8d1671 --- /dev/null +++ b/tc/emp_ematch.y @@ -0,0 +1,101 @@ +%{ + #include + #include + #include + #include + #include "m_ematch.h" +%} + +%locations +%token-table +%error-verbose +%name-prefix="ematch_" + +%union { + unsigned int i; + struct bstr *b; + struct ematch *e; +} + +%{ + extern int ematch_lex(void); + extern void yyerror(char *s); + extern struct ematch *ematch_root; + extern char *ematch_err; +%} + +%token ERROR +%token ATTRIBUTE +%token AND OR NOT +%type invert relation +%type match expr +%type args +%right AND OR +%start input +%% +input: + /* empty */ + | expr + { ematch_root = $1; } + | expr error + { + ematch_root = $1; + YYACCEPT; + } + ; + +expr: + match + { $$ = $1; } + | match relation expr + { + $1->relation = $2; + $1->next = $3; + $$ = $1; + } + ; + +match: + invert ATTRIBUTE '(' args ')' + { + $2->next = $4; + $$ = new_ematch($2, $1); + if ($$ == NULL) + YYABORT; + } + | invert '(' expr ')' + { + $$ = new_ematch(NULL, $1); + if ($$ == NULL) + YYABORT; + $$->child = $3; + } + ; + +args: + ATTRIBUTE + { $$ = $1; } + | ATTRIBUTE args + { $1->next = $2; } + ; + +relation: + AND + { $$ = TCF_EM_REL_AND; } + | OR + { $$ = TCF_EM_REL_OR; } + ; + +invert: + /* empty */ + { $$ = 0; } + | NOT + { $$ = 1; } + ; +%% + + void yyerror(char *s) + { + ematch_err = strdup(s); + } + diff --git a/tc/f_basic.c b/tc/f_basic.c new file mode 100644 index 0000000..264f358 --- /dev/null +++ b/tc/f_basic.c @@ -0,0 +1,146 @@ +/* + * f_basic.c Basic Classifier + * + * This program is free software; you can u32istribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "m_ematch.h" + +static void explain(void) +{ + fprintf(stderr, "Usage: ... basic [ match EMATCH_TREE ] [ police POLICE_SPEC ]\n"); + fprintf(stderr, " [ action ACTION_SPEC ] [ classid CLASSID ]\n"); + fprintf(stderr, "\n"); + fprintf(stderr, "Where: SELECTOR := SAMPLE SAMPLE ...\n"); + fprintf(stderr, " FILTERID := X:Y:Z\n"); +} + +static int basic_parse_opt(struct filter_util *qu, char *handle, + int argc, char **argv, struct nlmsghdr *n) +{ + struct tcmsg *t = NLMSG_DATA(n); + struct rtattr *tail; + long h = 0; + + if (argc == 0) + return 0; + + if (handle) { + h = strtol(handle, NULL, 0); + if (h == LONG_MIN || h == LONG_MAX) { + fprintf(stderr, "Illegal handle \"%s\", must be numeric.\n", + handle); + return -1; + } + } + + t->tcm_handle = h; + + tail = (struct rtattr*)(((void*)n)+NLMSG_ALIGN(n->nlmsg_len)); + addattr_l(n, MAX_MSG, TCA_OPTIONS, NULL, 0); + + while (argc > 0) { + if (matches(*argv, "match") == 0) { + NEXT_ARG(); + if (parse_ematch(&argc, &argv, TCA_BASIC_EMATCHES, n)) { + fprintf(stderr, "Illegal \"ematch\"\n"); + return -1; + } + continue; + } else if (matches(*argv, "classid") == 0 || + strcmp(*argv, "flowid") == 0) { + unsigned handle; + NEXT_ARG(); + if (get_tc_classid(&handle, *argv)) { + fprintf(stderr, "Illegal \"classid\"\n"); + return -1; + } + addattr_l(n, MAX_MSG, TCA_BASIC_CLASSID, &handle, 4); + } else if (matches(*argv, "action") == 0) { + NEXT_ARG(); + if (parse_action(&argc, &argv, TCA_BASIC_ACT, n)) { + fprintf(stderr, "Illegal \"action\"\n"); + return -1; + } + continue; + + } else if (matches(*argv, "police") == 0) { + NEXT_ARG(); + if (parse_police(&argc, &argv, TCA_BASIC_POLICE, n)) { + fprintf(stderr, "Illegal \"police\"\n"); + return -1; + } + continue; + } else if (strcmp(*argv, "help") == 0) { + explain(); + return -1; + } else { + fprintf(stderr, "What is \"%s\"?\n", *argv); + explain(); + return -1; + } + argc--; argv++; + } + + tail->rta_len = (((void*)n)+n->nlmsg_len) - (void*)tail; + return 0; +} + +static int basic_print_opt(struct filter_util *qu, FILE *f, + struct rtattr *opt, __u32 handle) +{ + struct rtattr *tb[TCA_BASIC_MAX+1]; + + if (opt == NULL) + return 0; + + parse_rtattr_nested(tb, TCA_BASIC_MAX, opt); + + if (handle) + fprintf(f, "handle 0x%x ", handle); + + if (tb[TCA_BASIC_CLASSID]) { + SPRINT_BUF(b1); + fprintf(f, "flowid %s ", + sprint_tc_classid(*(__u32*)RTA_DATA(tb[TCA_BASIC_CLASSID]), b1)); + } + + if (tb[TCA_BASIC_EMATCHES]) + print_ematch(f, tb[TCA_BASIC_EMATCHES]); + + if (tb[TCA_BASIC_POLICE]) { + fprintf(f, "\n"); + tc_print_police(f, tb[TCA_BASIC_POLICE]); + } + + if (tb[TCA_BASIC_ACT]) { + tc_print_action(f, tb[TCA_BASIC_ACT]); + } + + return 0; +} + +struct filter_util basic_filter_util = { + .id = "basic", + .parse_fopt = basic_parse_opt, + .print_fopt = basic_print_opt, +}; diff --git a/tc/f_u32.c b/tc/f_u32.c index 50dc4df..9d527fc 100644 --- a/tc/f_u32.c +++ b/tc/f_u32.c @@ -34,7 +34,7 @@ static void explain(void) fprintf(stderr, "or u32 divisor DIVISOR\n"); fprintf(stderr, "\n"); fprintf(stderr, "Where: SELECTOR := SAMPLE SAMPLE ...\n"); - fprintf(stderr, " SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} | mark } SAMPLE_ARGS\n"); + fprintf(stderr, " SAMPLE := { ip | ip6 | udp | tcp | icmp | u{32|16|8} | mark } SAMPLE_ARGS [divisor DIVISOR]\n"); fprintf(stderr, " FILTERID := X:Y:Z\n"); } @@ -495,7 +495,7 @@ static int parse_ip6(int *argc_p, char ***argv_p, struct tc_u32_sel *sel) } if (strcmp(*argv, "priority") == 0) { NEXT_ARG(); - res = parse_u8(&argc, &argv, sel, 0, 0); + res = parse_u8(&argc, &argv, sel, 4, 0); goto done; } if (strcmp(*argv, "protocol") == 0) { @@ -833,8 +833,9 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char ** } else if (matches(*argv, "divisor") == 0) { unsigned divisor; NEXT_ARG(); - if (get_unsigned(&divisor, *argv, 0) || divisor == 0 || - divisor > 0x100) { + if (get_unsigned(&divisor, *argv, 0) || + divisor == 0 || + divisor > 0x100 || ((divisor - 1) & divisor)) { fprintf(stderr, "Illegal \"divisor\"\n"); return -1; } @@ -874,10 +875,13 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char ** htid = (handle&0xFFFFF000); } else if (strcmp(*argv, "sample") == 0) { __u32 hash; + unsigned divisor = 0x100; + struct { struct tc_u32_sel sel; struct tc_u32_key keys[4]; } sel2; + memset(&sel2, 0, sizeof(sel2)); NEXT_ARG(); if (parse_selector(&argc, &argv, &sel2.sel, n)) { fprintf(stderr, "Illegal \"sample\"\n"); @@ -887,10 +891,19 @@ static int u32_parse_opt(struct filter_util *qu, char *handle, int argc, char ** fprintf(stderr, "\"sample\" must contain exactly ONE key.\n"); return -1; } + if (*argv != 0 && strcmp(*argv, "divisor") == 0) { + NEXT_ARG(); + if (get_unsigned(&divisor, *argv, 0) || divisor == 0 || + divisor > 0x100 || ((divisor - 1) & divisor)) { + fprintf(stderr, "Illegal sample \"divisor\"\n"); + return -1; + } + NEXT_ARG(); + } hash = sel2.sel.keys[0].val&sel2.sel.keys[0].mask; hash ^= hash>>16; hash ^= hash>>8; - htid = ((hash<<12)&0xFF000)|(htid&0xFFF00000); + htid = ((hash%divisor)<<12)|(htid&0xFFF00000); sample_ok = 1; continue; } else if (strcmp(*argv, "indev") == 0) { diff --git a/tc/m_ematch.c b/tc/m_ematch.c new file mode 100644 index 0000000..44c621b --- /dev/null +++ b/tc/m_ematch.c @@ -0,0 +1,493 @@ +/* + * m_ematch.c Extended Matches + * + * This program is free software; you can distribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" +#include "m_ematch.h" + +#define EMATCH_MAP "/etc/iproute2/ematch_map" + +static struct ematch_util *ematch_list; + +/* export to bison parser */ +int ematch_argc; +char **ematch_argv; +char *ematch_err = NULL; +struct ematch *ematch_root; + +static int begin_argc; +static char **begin_argv; + +static inline void map_warning(int num, char *kind) +{ + fprintf(stderr, + "Error: Unable to find ematch \"%s\" in %s\n" \ + "Please assign a unique ID to the ematch kind the suggested " \ + "entry is:\n" \ + "\t%d\t%s\n", + kind, EMATCH_MAP, num, kind); +} + +static int lookup_map(__u16 num, char *dst, int len, const char *file) +{ + int err = -EINVAL; + char buf[512]; + FILE *fd = fopen(file, "r"); + + if (fd == NULL) + return -errno; + + while (fgets(buf, sizeof(buf), fd)) { + char namebuf[512], *p = buf; + int id; + + while (*p == ' ' || *p == '\t') + p++; + if (*p == '#' || *p == '\n' || *p == 0) + continue; + + if (sscanf(p, "%d %s", &id, namebuf) != 2) { + fprintf(stderr, "ematch map %s corrupted at %s\n", + file, p); + goto out; + } + + if (id == num) { + if (dst) + strncpy(dst, namebuf, len - 1); + err = 0; + goto out; + } + } + + err = -ENOENT; +out: + fclose(fd); + return err; +} + +static int lookup_map_id(char *kind, int *dst, const char *file) +{ + int err = -EINVAL; + char buf[512]; + FILE *fd = fopen(file, "r"); + + if (fd == NULL) + return -errno; + + while (fgets(buf, sizeof(buf), fd)) { + char namebuf[512], *p = buf; + int id; + + while (*p == ' ' || *p == '\t') + p++; + if (*p == '#' || *p == '\n' || *p == 0) + continue; + + if (sscanf(p, "%d %s", &id, namebuf) != 2) { + fprintf(stderr, "ematch map %s corrupted at %s\n", + file, p); + goto out; + } + + if (!strcasecmp(namebuf, kind)) { + if (dst) + *dst = id; + err = 0; + goto out; + } + } + + err = -ENOENT; + *dst = 0; +out: + fclose(fd); + return err; +} + +static struct ematch_util *get_ematch_kind(char *kind) +{ + static void *body; + void *dlh; + char buf[256]; + struct ematch_util *e; + + for (e = ematch_list; e; e = e->next) { + if (strcmp(e->kind, kind) == 0) + return e; + } + + snprintf(buf, sizeof(buf), "em_%s.so", kind); + dlh = dlopen(buf, RTLD_LAZY); + if (dlh == NULL) { + dlh = body; + if (dlh == NULL) { + dlh = body = dlopen(NULL, RTLD_LAZY); + if (dlh == NULL) + return NULL; + } + } + + snprintf(buf, sizeof(buf), "%s_ematch_util", kind); + e = dlsym(dlh, buf); + if (e == NULL) + return NULL; + + e->next = ematch_list; + ematch_list = e; + + return e; +} + +static struct ematch_util *get_ematch_kind_num(__u16 kind) +{ + char name[32]; + + if (lookup_map(kind, name, sizeof(name), EMATCH_MAP) < 0) + return NULL; + + return get_ematch_kind(name); + + return NULL; +} + +static int parse_tree(struct nlmsghdr *n, struct ematch *tree) +{ + int index = 1; + struct ematch *t; + + for (t = tree; t; t = t->next) { + struct rtattr *tail = NLMSG_TAIL(n); + struct tcf_ematch_hdr hdr = { + .flags = t->relation + }; + + if (t->inverted) + hdr.flags |= TCF_EM_INVERT; + + addattr_l(n, MAX_MSG, index++, NULL, 0); + + if (t->child) { + __u32 r = t->child_ref; + addraw_l(n, MAX_MSG, &hdr, sizeof(hdr)); + addraw_l(n, MAX_MSG, &r, sizeof(r)); + } else { + int num = 0, err; + char buf[64]; + struct ematch_util *e; + + if (t->args == NULL) + return -1; + + strncpy(buf, (char*) t->args->data, sizeof(buf)-1); + e = get_ematch_kind(buf); + if (e == NULL) { + fprintf(stderr, "Unknown ematch \"%s\"\n", + buf); + return -1; + } + + err = lookup_map_id(buf, &num, EMATCH_MAP); + if (err < 0) { + if (err == -ENOENT) + map_warning(e->kind_num, buf); + return err; + } + + hdr.kind = num; + if (e->parse_eopt(n, &hdr, t->args->next) < 0) + return -1; + } + + tail->rta_len = (void*) NLMSG_TAIL(n) - (void*) tail; + } + + return 0; +} + +static int flatten_tree(struct ematch *head, struct ematch *tree) +{ + int i, count = 0; + struct ematch *t; + + for (;;) { + count++; + + if (tree->child) { + for (t = head; t->next; t = t->next); + t->next = tree->child; + count += flatten_tree(head, tree->child); + } + + if (tree->relation == 0) + break; + + tree = tree->next; + } + + for (i = 0, t = head; t; t = t->next, i++) + t->index = i; + + for (t = head; t; t = t->next) + if (t->child) + t->child_ref = t->child->index; + + return count; +} + +int em_parse_error(int err, struct bstr *args, struct bstr *carg, + struct ematch_util *e, char *fmt, ...) +{ + va_list a; + + va_start(a, fmt); + vfprintf(stderr, fmt, a); + va_end(a); + + if (ematch_err) + fprintf(stderr, ": %s\n... ", ematch_err); + else + fprintf(stderr, "\n... "); + + while (ematch_argc < begin_argc) { + if (ematch_argc == (begin_argc - 1)) + fprintf(stderr, ">>%s<< ", *begin_argv); + else + fprintf(stderr, "%s ", *begin_argv); + begin_argv++; + begin_argc--; + } + + fprintf(stderr, "...\n"); + + if (args) { + fprintf(stderr, "... %s(", e->kind); + while (args) { + fprintf(stderr, "%s", args == carg ? ">>" : ""); + bstr_print(stderr, args, 1); + fprintf(stderr, "%s%s", args == carg ? "<<" : "", + args->next ? " " : ""); + args = args->next; + } + fprintf(stderr, ")...\n"); + + } + + if (e == NULL) { + fprintf(stderr, + "Usage: EXPR\n" \ + "where: EXPR := TERM [ { and | or } EXPR ]\n" \ + " TERM := [ not ] { MATCH | '(' EXPR ')' }\n" \ + " MATCH := module '(' ARGS ')'\n" \ + " ARGS := ARG1 ARG2 ...\n" \ + "\n" \ + "Example: a(x y) and not (b(x) or c(x y z))\n"); + } else + e->print_usage(stderr); + + return -err; +} + +static inline void free_ematch_err(void) +{ + if (ematch_err) { + free(ematch_err); + ematch_err = NULL; + } +} + +extern int ematch_parse(void); + +int parse_ematch(int *argc_p, char ***argv_p, int tca_id, struct nlmsghdr *n) +{ + begin_argc = ematch_argc = *argc_p; + begin_argv = ematch_argv = *argv_p; + + if (ematch_parse()) { + int err = em_parse_error(EINVAL, NULL, NULL, NULL, + "Parse error"); + free_ematch_err(); + return err; + } + + free_ematch_err(); + + /* undo look ahead by parser */ + ematch_argc++; + ematch_argv--; + + if (ematch_root) { + struct rtattr *tail, *tail_list; + + struct tcf_ematch_tree_hdr hdr = { + .nmatches = flatten_tree(ematch_root, ematch_root), + .progid = TCF_EM_PROG_TC + }; + + tail = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, tca_id, NULL, 0); + addattr_l(n, MAX_MSG, TCA_EMATCH_TREE_HDR, &hdr, sizeof(hdr)); + + tail_list = NLMSG_TAIL(n); + addattr_l(n, MAX_MSG, TCA_EMATCH_TREE_LIST, NULL, 0); + + if (parse_tree(n, ematch_root) < 0) + return -1; + + tail_list->rta_len = (void*) NLMSG_TAIL(n) - (void*) tail_list; + tail->rta_len = (void*) NLMSG_TAIL(n) - (void*) tail; + } + + *argc_p = ematch_argc; + *argv_p = ematch_argv; + + return 0; +} + +static int print_ematch_seq(FILE *fd, struct rtattr **tb, int start, + int prefix) +{ + int n, i = start; + struct tcf_ematch_hdr *hdr; + int dlen; + void *data; + + for (;;) { + if (tb[i] == NULL) + return -1; + + dlen = RTA_PAYLOAD(tb[i]) - sizeof(*hdr); + data = (void *) RTA_DATA(tb[i]) + sizeof(*hdr); + + if (dlen < 0) + return -1; + + hdr = RTA_DATA(tb[i]); + + if (hdr->flags & TCF_EM_INVERT) + fprintf(fd, "NOT "); + + if (hdr->kind == 0) { + __u32 ref; + + if (dlen < sizeof(__u32)) + return -1; + + ref = *(__u32 *) data; + fprintf(fd, "(\n"); + for (n = 0; n <= prefix; n++) + fprintf(fd, " "); + if (print_ematch_seq(fd, tb, ref + 1, prefix + 1) < 0) + return -1; + for (n = 0; n < prefix; n++) + fprintf(fd, " "); + fprintf(fd, ") "); + + } else { + struct ematch_util *e; + + e = get_ematch_kind_num(hdr->kind); + if (e == NULL) + fprintf(fd, "[unknown ematch %d]\n", + hdr->kind); + else { + fprintf(fd, "%s(", e->kind); + if (e->print_eopt(fd, hdr, data, dlen) < 0) + return -1; + fprintf(fd, ")\n"); + } + if (hdr->flags & TCF_EM_REL_MASK) + for (n = 0; n < prefix; n++) + fprintf(fd, " "); + } + + switch (hdr->flags & TCF_EM_REL_MASK) { + case TCF_EM_REL_AND: + fprintf(fd, "AND "); + break; + + case TCF_EM_REL_OR: + fprintf(fd, "OR "); + break; + + default: + return 0; + } + + i++; + } + + return 0; +} + +static int print_ematch_list(FILE *fd, struct tcf_ematch_tree_hdr *hdr, + struct rtattr *rta) +{ + int err = -1; + struct rtattr **tb; + + tb = malloc((hdr->nmatches + 1) * sizeof(struct rtattr *)); + if (tb == NULL) + return -1; + + if (parse_rtattr_nested(tb, hdr->nmatches, rta) < 0) + goto errout; + + fprintf(fd, "\n "); + if (print_ematch_seq(fd, tb, 1, 1) < 0) + goto errout; + + err = 0; +errout: + free(tb); + return err; +} + +int print_ematch(FILE *fd, const struct rtattr *rta) +{ + struct rtattr *tb[TCA_EMATCH_TREE_MAX+1]; + struct tcf_ematch_tree_hdr *hdr; + + if (parse_rtattr_nested(tb, TCA_EMATCH_TREE_MAX, rta) < 0) + return -1; + + if (tb[TCA_EMATCH_TREE_HDR] == NULL) { + fprintf(stderr, "Missing ematch tree header\n"); + return -1; + } + + if (tb[TCA_EMATCH_TREE_LIST] == NULL) { + fprintf(stderr, "Missing ematch tree list\n"); + return -1; + } + + if (RTA_PAYLOAD(tb[TCA_EMATCH_TREE_HDR]) < sizeof(*hdr)) { + fprintf(stderr, "Ematch tree header size mismatch\n"); + return -1; + } + + hdr = RTA_DATA(tb[TCA_EMATCH_TREE_HDR]); + + return print_ematch_list(fd, hdr, tb[TCA_EMATCH_TREE_LIST]); +} diff --git a/tc/m_ematch.h b/tc/m_ematch.h new file mode 100644 index 0000000..ed98446 --- /dev/null +++ b/tc/m_ematch.h @@ -0,0 +1,179 @@ +#ifndef __TC_EMATCH_H_ +#define __TC_EMATCH_H_ + +#include +#include +#include + +#include "utils.h" +#include "tc_util.h" + +#define EMATCHKINDSIZ 16 + +struct bstr +{ + char *data; + unsigned int len; + int quoted; + struct bstr *next; +}; + +static inline struct bstr * bstr_alloc(const char *text) +{ + struct bstr *b = calloc(1, sizeof(*b)); + + if (b == NULL) + return NULL; + + b->data = strdup(text); + if (b->data == NULL) { + free(b); + return NULL; + } + + b->len = strlen(text); + + return b; +} + +static inline struct bstr * bstr_new(char *data, unsigned int len) +{ + struct bstr *b = calloc(1, sizeof(*b)); + + if (b == NULL) + return NULL; + + b->data = data; + b->len = len; + + return b; +} + +static inline int bstrcmp(struct bstr *b, const char *text) +{ + int len = strlen(text); + int d = b->len - len; + + if (d == 0) + return strncmp(b->data, text, len); + + return d; +} + +static inline unsigned long bstrtoul(struct bstr *b) +{ + char *inv = NULL; + unsigned long l; + char buf[b->len+1]; + + memcpy(buf, b->data, b->len); + buf[b->len] = '\0'; + + l = strtol(buf, &inv, 0); + if (l == ULONG_MAX || inv == buf) + return LONG_MAX; + + return l; +} + +static inline void bstr_print(FILE *fd, struct bstr *b, int ascii) +{ + int i; + char *s = b->data; + + if (ascii) + for (i = 0; i < b->len; i++) + fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.'); + else { + for (i = 0; i < b->len; i++) + fprintf(fd, "%02x", s[i]); + fprintf(fd, "\""); + for (i = 0; i < b->len; i++) + fprintf(fd, "%c", isprint(s[i]) ? s[i] : '.'); + fprintf(fd, "\""); + } +} + +static inline struct bstr *bstr_next(struct bstr *b) +{ + return b->next; +} + +struct ematch +{ + struct bstr *args; + int index; + int inverted; + int relation; + int child_ref; + struct ematch *child; + struct ematch *next; +}; + +static inline struct ematch * new_ematch(struct bstr *args, int inverted) +{ + struct ematch *e = calloc(1, sizeof(*e)); + + if (e == NULL) + return NULL; + + e->args = args; + e->inverted = inverted; + + return e; +} + +static inline void print_ematch_tree(struct ematch *tree) +{ + struct ematch *t; + + for (t = tree; t; t = t->next) { + if (t->inverted) + printf("NOT "); + + if (t->child) { + printf("("); + print_ematch_tree(t->child); + printf(")"); + } else { + struct bstr *b; + for (b = t->args; b; b = b->next) + printf("%s%s", b->data, b->next ? " " : ""); + } + + if (t->relation == TCF_EM_REL_AND) + printf(" AND "); + else if (t->relation == TCF_EM_REL_OR) + printf(" OR "); + } +} + +struct ematch_util +{ + char kind[EMATCHKINDSIZ]; + int kind_num; + int (*parse_eopt)(struct nlmsghdr *,struct tcf_ematch_hdr *, + struct bstr *); + int (*print_eopt)(FILE *, struct tcf_ematch_hdr *, void *, int); + void (*print_usage)(FILE *); + struct ematch_util *next; +}; + +static inline int parse_layer(struct bstr *b) +{ + if (*((char *) b->data) == 'l') + return TCF_LAYER_LINK; + else if (*((char *) b->data) == 'n') + return TCF_LAYER_NETWORK; + else if (*((char *) b->data) == 't') + return TCF_LAYER_TRANSPORT; + else + return INT_MAX; +} + +extern int em_parse_error(int err, struct bstr *args, struct bstr *carg, + struct ematch_util *, char *fmt, ...); +extern int print_ematch(FILE *, const struct rtattr *); +extern int parse_ematch(int *, char ***, int, struct nlmsghdr *); + +#endif diff --git a/tc/m_ipt.c b/tc/m_ipt.c index 518e4a3..ca39555 100644 --- a/tc/m_ipt.c +++ b/tc/m_ipt.c @@ -69,6 +69,7 @@ static struct option original_opts[] = { }; static struct iptables_target *t_list = NULL; +static struct option *opts = original_opts; static unsigned int global_option_offset = 0; #define OPTION_OFFSET 256 @@ -169,18 +170,13 @@ int string_to_number(const char *s, unsigned int min, unsigned int max, return result; } -static struct option * -copy_options(struct option *oldopts) +static void free_opts(struct option *opts) { - struct option *merge; - unsigned int num_old; - for (num_old = 0; oldopts[num_old].name; num_old++) ; - merge = malloc(sizeof (struct option) * (num_old + 1)); - if (NULL == merge) - return NULL; - memcpy(merge, oldopts, num_old * sizeof (struct option)); - memset(merge + num_old, 0, sizeof (struct option)); - return merge; + if (opts != original_opts) { + free(opts); + opts = original_opts; + global_option_offset = 0; + } } static struct option * @@ -337,6 +333,17 @@ struct in_addr *dotted_to_addr(const char *dotted) return &addr; } +static void set_revision(char *name, u_int8_t revision) +{ + /* Old kernel sources don't have ".revision" field, + * but we stole a byte from name. */ + name[IPT_FUNCTION_MAXNAMELEN - 2] = '\0'; + name[IPT_FUNCTION_MAXNAMELEN - 1] = revision; +} + +/* + * we may need to check for version mismatch +*/ int build_st(struct iptables_target *target, struct ipt_entry_target *t) { @@ -350,8 +357,11 @@ build_st(struct iptables_target *target, struct ipt_entry_target *t) if (NULL == t) { target->t = fw_calloc(1, size); - target->init(target->t, &nfcache); target->t->u.target_size = size; + + if (target->init != NULL) + target->init(target->t, &nfcache); + set_revision(target->t->u.user.name, target->revision); } else { target->t = t; } @@ -371,7 +381,6 @@ static int parse_ipt(struct action_util *a,int *argc_p, int c; int rargc = *argc_p; char **argv = *argv_p; - struct option *opts; int argc = 0, iargc = 0; char k[16]; int res = -1; @@ -395,11 +404,6 @@ static int parse_ipt(struct action_util *a,int *argc_p, return -1; } - opts = copy_options(original_opts); - - if (NULL == opts) - return -1; - while (1) { c = getopt_long(argc, argv, "j:", opts, NULL); if (c == -1) @@ -426,23 +430,14 @@ static int parse_ipt(struct action_util *a,int *argc_p, default: memset(&fw, 0, sizeof (fw)); if (m) { - unsigned int fake_flags = 0; m->parse(c - m->option_offset, argv, 0, - &fake_flags, NULL, &m->t); + &m->tflags, NULL, &m->t); } else { fprintf(stderr," failed to find target %s\n\n", optarg); return -1; } ok++; - - /*m->final_check(m->t); -- Is this necessary? - ** useful when theres depencies - ** eg ipt_TCPMSS.c has have the TCP match loaded - ** before this can be used; - ** also seems the ECN target needs it - */ - break; } @@ -452,6 +447,7 @@ static int parse_ipt(struct action_util *a,int *argc_p, if (matches(argv[optind], "index") == 0) { if (get_u32(&index, argv[optind + 1], 10)) { fprintf(stderr, "Illegal \"index\"\n"); + free_opts(opts); return -1; } iok++; @@ -465,6 +461,10 @@ static int parse_ipt(struct action_util *a,int *argc_p, return -1; } + /* check that we passed the correct parameters to the target */ + if (m) + m->final_check(m->tflags); + { struct tcmsg *t = NLMSG_DATA(n); if (t->tcm_parent != TC_H_ROOT @@ -505,6 +505,7 @@ static int parse_ipt(struct action_util *a,int *argc_p, *argv_p = argv; optind = 1; + free_opts(opts); return 0; @@ -515,16 +516,10 @@ print_ipt(struct action_util *au,FILE * f, struct rtattr *arg) { struct rtattr *tb[TCA_IPT_MAX + 1]; struct ipt_entry_target *t = NULL; - struct option *opts; if (arg == NULL) return -1; - opts = copy_options(original_opts); - - if (NULL == opts) - return -1; - parse_rtattr_nested(tb, TCA_IPT_MAX, arg); if (tb[TCA_IPT_TABLE] == NULL) { @@ -587,6 +582,7 @@ print_ipt(struct action_util *au,FILE * f, struct rtattr *arg) fprintf(f, " \n"); } + free_opts(opts); return 0; } diff --git a/tc/m_mirred.c b/tc/m_mirred.c index 6ade2a8..cbfea84 100644 --- a/tc/m_mirred.c +++ b/tc/m_mirred.c @@ -263,7 +263,10 @@ print_mirred(struct action_util *au,FILE * f, struct rtattr *arg) } p = RTA_DATA(tb[TCA_MIRRED_PARMS]); + /* ll_init_map(&rth); + */ + if ((dev = ll_index_to_name(p->ifindex)) == 0) { fprintf(stderr, "Cannot find device %d\n", p->ifindex); @@ -285,7 +288,7 @@ print_mirred(struct action_util *au,FILE * f, struct rtattr *arg) return 0; } -struct action_util mirred_util_util = { +struct action_util mirred_action_util = { .id = "mirred", .parse_aopt = parse_mirred, .print_aopt = print_mirred, diff --git a/tc/m_pedit.c b/tc/m_pedit.c index 5031c62..acfa581 100644 --- a/tc/m_pedit.c +++ b/tc/m_pedit.c @@ -238,9 +238,11 @@ parse_val(int *argc_p, char ***argv_p, __u32 * val, int type) return -1; if (TINT == type) - return get_integer(val, *argv, 0); + return get_integer((int *) val, *argv, 0); + if (TU32 == type) return get_u32(val, *argv, 0); + if (TIPV4 == type) { inet_prefix addr; if (get_prefix_1(&addr, *argv, AF_INET)) { diff --git a/tc/q_cbq.c b/tc/q_cbq.c index 40c0228..a456eda 100644 --- a/tc/q_cbq.c +++ b/tc/q_cbq.c @@ -70,7 +70,7 @@ static int cbq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nl } } else if (strcmp(*argv, "ewma") == 0) { NEXT_ARG(); - if (get_unsigned(&ewma_log, *argv, 0)) { + if (get_integer(&ewma_log, *argv, 0)) { explain1("ewma"); return -1; } @@ -236,7 +236,7 @@ static int cbq_parse_class_opt(struct qdisc_util *qu, int argc, char **argv, str lss.change |= TCF_CBQ_LSS_FLAGS; } else if (strcmp(*argv, "ewma") == 0) { NEXT_ARG(); - if (get_u32(&ewma_log, *argv, 0)) { + if (get_integer(&ewma_log, *argv, 0)) { explain1("ewma"); return -1; } diff --git a/tc/q_dsmark.c b/tc/q_dsmark.c index 384e749..cdb5bf2 100644 --- a/tc/q_dsmark.c +++ b/tc/q_dsmark.c @@ -136,11 +136,9 @@ static int dsmark_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { struct rtattr *tb[TCA_DSMARK_MAX+1]; - if (opt == NULL) - return 0; - - parse_rtattr_nested(tb, TCA_DSMARK_MAX, opt); - + if (!opt) return 0; + memset(tb, 0, sizeof(tb)); + parse_rtattr(tb, TCA_DSMARK_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt)); if (tb[TCA_DSMARK_MASK]) { if (!RTA_PAYLOAD(tb[TCA_DSMARK_MASK])) fprintf(stderr,"dsmark: empty mask\n"); diff --git a/tc/q_netem.c b/tc/q_netem.c index f696cc3..757edca 100644 --- a/tc/q_netem.c +++ b/tc/q_netem.c @@ -29,11 +29,12 @@ static void explain(void) { fprintf(stderr, "Usage: ... netem [ limit PACKETS ] \n" \ -" [ delay TIME [ JITTER [CORRELATION]]]\n" \ +" [ delay TIME [ JITTER [CORRELATION]]]\n" \ +" [ distribution {uniform|normal|pareto|paretonormal} ]\n" \ " [ drop PERCENT [CORRELATION]] \n" \ +" [ corrupt PERCENT [CORRELATION]] \n" \ " [ duplicate PERCENT [CORRELATION]]\n" \ -" [ distribution {uniform|normal|pareto|paretonormal} ]\n" \ -" [ gap PACKETS ]\n"); +" [ reorder PRECENT [CORRELATION] [ gap DISTANCE ]]\n"); } static void explain1(const char *arg) @@ -127,11 +128,15 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct rtattr *tail; struct tc_netem_qopt opt; struct tc_netem_corr cor; - __s16 dist_data[MAXDIST]; + struct tc_netem_reorder reorder; + struct tc_netem_corrupt corrupt; + __s16 *dist_data = NULL; memset(&opt, 0, sizeof(opt)); opt.limit = 1000; memset(&cor, 0, sizeof(cor)); + memset(&reorder, 0, sizeof(reorder)); + memset(&corrupt, 0, sizeof(corrupt)); while (argc > 0) { if (matches(*argv, "limit") == 0) { @@ -178,6 +183,32 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, return -1; } } + } else if (matches(*argv, "reorder") == 0) { + NEXT_ARG(); + if (get_percent(&reorder.probability, *argv)) { + explain1("reorder"); + return -1; + } + if (NEXT_IS_NUMBER()) { + NEXT_ARG(); + if (get_percent(&reorder.correlation, *argv)) { + explain1("reorder"); + return -1; + } + } + } else if (matches(*argv, "corrupt") == 0) { + NEXT_ARG(); + if (get_percent(&corrupt.probability, *argv)) { + explain1("corrupt"); + return -1; + } + if (NEXT_IS_NUMBER()) { + NEXT_ARG(); + if (get_percent(&corrupt.correlation, *argv)) { + explain1("corrupt"); + return -1; + } + } } else if (matches(*argv, "gap") == 0) { NEXT_ARG(); if (get_u32(&opt.gap, *argv, 0)) { @@ -199,6 +230,7 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, } } else if (matches(*argv, "distribution") == 0) { NEXT_ARG(); + dist_data = alloca(MAXDIST); dist_size = get_distribution(*argv, dist_data); if (dist_size < 0) return -1; @@ -215,12 +247,44 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, tail = NLMSG_TAIL(n); - addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt)); - addattr_l(n, 1024, TCA_NETEM_CORR, &cor, sizeof(cor)); + if (reorder.probability) { + if (opt.latency == 0) { + fprintf(stderr, "reordering not possible without specifying some delay\n"); + } + if (opt.gap == 0) + opt.gap = 1; + } else if (opt.gap > 0) { + fprintf(stderr, "gap specified without reorder probability\n"); + explain(); + return -1; + } + + if (dist_data && (opt.latency == 0 || opt.jitter == 0)) { + fprintf(stderr, "distribution specified but no latency and jitter values\n"); + explain(); + return -1; + } + + if (addattr_l(n, TCA_BUF_MAX, TCA_OPTIONS, &opt, sizeof(opt)) < 0) + return -1; + + if (cor.delay_corr || cor.loss_corr || cor.dup_corr) { + if (addattr_l(n, TCA_BUF_MAX, TCA_NETEM_CORR, &cor, sizeof(cor)) < 0) + return -1; + } + + if (addattr_l(n, TCA_BUF_MAX, TCA_NETEM_REORDER, &reorder, sizeof(reorder)) < 0) + return -1; + + if (corrupt.probability) { + if (addattr_l(n, TCA_BUF_MAX, TCA_NETEM_CORRUPT, &corrupt, sizeof(corrupt)) < 0) + return -1; + } - if (dist_size > 0) { - addattr_l(n, 32768, TCA_NETEM_DELAY_DIST, - dist_data, dist_size*sizeof(dist_data[0])); + if (dist_data) { + if (addattr_l(n, 32768, TCA_NETEM_DELAY_DIST, + dist_data, dist_size*sizeof(dist_data[0])) < 0) + return -1; } tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail; return 0; @@ -229,6 +293,8 @@ static int netem_parse_opt(struct qdisc_util *qu, int argc, char **argv, static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) { const struct tc_netem_corr *cor = NULL; + const struct tc_netem_reorder *reorder = NULL; + const struct tc_netem_corrupt *corrupt = NULL; struct tc_netem_qopt qopt; int len = RTA_PAYLOAD(opt) - sizeof(qopt); SPRINT_BUF(b1); @@ -252,6 +318,16 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) return -1; cor = RTA_DATA(tb[TCA_NETEM_CORR]); } + if (tb[TCA_NETEM_REORDER]) { + if (RTA_PAYLOAD(tb[TCA_NETEM_REORDER]) < sizeof(*reorder)) + return -1; + reorder = RTA_DATA(tb[TCA_NETEM_REORDER]); + } + if (tb[TCA_NETEM_CORRUPT]) { + if (RTA_PAYLOAD(tb[TCA_NETEM_CORRUPT]) < sizeof(*corrupt)) + return -1; + corrupt = RTA_DATA(tb[TCA_NETEM_CORRUPT]); + } } fprintf(f, "limit %d", qopt.limit); @@ -278,6 +354,22 @@ static int netem_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt) if (cor && cor->dup_corr) fprintf(f, " %s", sprint_percent(cor->dup_corr, b1)); } + + if (reorder && reorder->probability) { + fprintf(f, " reorder %s", + sprint_percent(reorder->probability, b1)); + if (reorder->correlation) + fprintf(f, " %s", + sprint_percent(reorder->correlation, b1)); + } + + if (corrupt && corrupt->probability) { + fprintf(f, " corrupt %s", + sprint_percent(corrupt->probability, b1)); + if (corrupt->correlation) + fprintf(f, " %s", + sprint_percent(corrupt->correlation, b1)); + } if (qopt.gap) fprintf(f, " gap %lu", (unsigned long)qopt.gap); diff --git a/tc/tc.c b/tc/tc.c index dd6ac97..fa36ee0 100644 --- a/tc/tc.c +++ b/tc/tc.c @@ -35,9 +35,10 @@ int show_details = 0; int show_raw = 0; int resolve_hosts = 0; int use_iec = 0; +int force = 0; struct rtnl_handle rth; -static void *BODY; /* cached handle dlopen(NULL) */ +static void *BODY = NULL; /* cached handle dlopen(NULL) */ static struct qdisc_util * qdisc_list; static struct filter_util * filter_list; @@ -179,8 +180,9 @@ noexist: static void usage(void) { fprintf(stderr, "Usage: tc [ OPTIONS ] OBJECT { COMMAND | help }\n" + " tc [-force] -batch file\n" "where OBJECT := { qdisc | class | filter | action }\n" - " OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -b[atch] file }\n"); + " OPTIONS := { -s[tatistics] | -d[etails] | -r[aw] | -b[atch] [file] }\n"); } static int do_cmd(int argc, char **argv) @@ -207,34 +209,13 @@ static int do_cmd(int argc, char **argv) return -1; } -static int makeargs(char *line, char *argv[], int maxargs) -{ - static const char ws[] = " \t\r\n"; - char *cp; - int argc = 0; - - for (cp = strtok(line, ws); cp; cp = strtok(NULL, ws)) { - if (argc >= maxargs) { - fprintf(stderr, "Too many arguments to command\n"); - exit(1); - } - argv[argc++] = cp; - } - argv[argc] = NULL; - - return argc; -} - static int batch(const char *name) { char *line = NULL; size_t len = 0; - ssize_t cc; - int lineno = 0; - char *largv[100]; - int largc, ret = 0; + int ret = 0; - if (strcmp(name, "-") != 0) { + if (name && strcmp(name, "-") != 0) { if (freopen(name, "r", stdin) == NULL) { fprintf(stderr, "Cannot open file \"%s\" for reading: %s=n", name, strerror(errno)); @@ -249,44 +230,24 @@ static int batch(const char *name) return -1; } - while ((cc = getline(&line, &len, stdin)) != -1) { - ++lineno; - - /* ignore blank lines and comments */ - if (*line == '\n' || *line == '#') - continue; - - /* handle continuation lines */ - while (cc >= 2 && strcmp(line+cc-2, "\\\n") == 0) { - char *line1 = NULL; - ssize_t len1 = 0; - int cc1; - cc1 = getline(&line1, &len1, stdin); - - if (cc1 < 0) { - fprintf(stderr, "Missing continuation line\n"); - return -1; - } - ++lineno; - line = realloc(line, cc + cc1); - if (!line) { - fprintf(stderr, "Out of memory\n"); - return -1; - } - - strcpy(line+cc-2, line1); - cc += cc1 - 2; - free(line1); - } + cmdlineno = 0; + while (getcmdline(&line, &len, stdin) != -1) { + char *largv[100]; + int largc; largc = makeargs(line, largv, 100); - - ret = do_cmd(largc, largv); - if (ret) { - fprintf(stderr, "Command failed %s:%d\n", name, lineno); - break; + if (largc == 0) + continue; /* blank line */ + + if (do_cmd(largc, largv)) { + fprintf(stderr, "Command failed %s:%d\n", name, cmdlineno); + ret = 1; + if (!force) + break; } } + if (line) + free(line); rtnl_close(&rth); return ret; @@ -296,6 +257,8 @@ static int batch(const char *name) int main(int argc, char **argv) { int ret; + int do_batching = 0; + char *batchfile = NULL; while (argc > 1) { if (argv[1][0] != '-') @@ -315,13 +278,13 @@ int main(int argc, char **argv) } else if (matches(argv[1], "-help") == 0) { usage(); return 0; + } else if (matches(argv[1], "-force") == 0) { + ++force; } else if (matches(argv[1], "-batch") == 0) { - if (argc < 3) { - fprintf(stderr, "Wrong number of arguments in batch mode\n"); - return -1; - } - - return batch(argv[2]); + do_batching = 1; + if (argc > 2) + batchfile = argv[2]; + argc--; argv++; } else { fprintf(stderr, "Option \"%s\" is unknown, try \"tc -help\".\n", argv[1]); return -1; @@ -329,6 +292,9 @@ int main(int argc, char **argv) argc--; argv++; } + if (do_batching) + return batch(batchfile); + if (argc <= 1) { usage(); return 0; diff --git a/tc/tc_class.c b/tc/tc_class.c index c4b27eb..894caa1 100644 --- a/tc/tc_class.c +++ b/tc/tc_class.c @@ -76,7 +76,10 @@ int tc_class_modify(int cmd, unsigned flags, int argc, char **argv) if (get_tc_classid(&handle, *argv)) invarg(*argv, "invalid class ID"); req.t.tcm_handle = handle; - } else if (strcmp(*argv, "root") == 0) { + } else if (strcmp(*argv, "handle") == 0) { + fprintf(stderr, "Error: try \"classid\" instead of \"handle\"\n"); + return -1; + } else if (strcmp(*argv, "root") == 0) { if (req.t.tcm_parent) { fprintf(stderr, "Error: \"root\" is duplicate parent ID.\n"); return -1; diff --git a/tc/tc_qdisc.c b/tc/tc_qdisc.c index 7802d52..e9174ab 100644 --- a/tc/tc_qdisc.c +++ b/tc/tc_qdisc.c @@ -126,6 +126,10 @@ int tc_qdisc_modify(int cmd, unsigned flags, int argc, char **argv) addattr_l(&req.n, sizeof(req), TCA_RATE, &est, sizeof(est)); if (q) { + if (!q->parse_qopt) { + fprintf(stderr, "qdisc '%s' does not support option parsing\n", k); + return -1; + } if (q->parse_qopt(q, argc, argv, &req.n)) return 1; } else { diff --git a/testsuite/Makefile b/testsuite/Makefile index 5661cea..2a4e0ba 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -1,11 +1,18 @@ -TESTS := $(patsubst tests/%,%,$(wildcard tests/*)) +## -- Config -- +DEV := lo +PREFIX := sudo +## -- End Config -- + +TESTS := $(patsubst tests/%,%,$(wildcard tests/*.t)) IPVERS := $(filter-out iproute2/Makefile,$(wildcard iproute2/*)) +KENV := $(shell cat /proc/config.gz | gunzip | grep ^CONFIG) -DEV := eth0 +.PHONY: compile listtests alltests configure $(TESTS) -.PHONY: compile listtests alltests $(TESTS) +configure: + echo "Entering iproute2" && cd iproute2 && $(MAKE) configure && cd ..; -compile: +compile: configure echo "Entering iproute2" && cd iproute2 && $(MAKE) && cd ..; listtests: @@ -18,16 +25,21 @@ alltests: $(TESTS) clean: @rm -rf results/* +distclean: clean + echo "Entering iproute2" && cd iproute2 && $(MAKE) distclean && cd ..; + $(TESTS): @for i in $(IPVERS); do \ - echo -n "Running $@ with $$i on `uname -r`: "; \ - logger "TESTMARK: $@"; \ o=`echo $$i | sed -e 's/iproute2\///'`; \ - TC="$$i/tc/tc" IP="$$i/ip/ip" DEV="$(DEV)" sudo tests/$@ > results/$@.$$o.out 2> results/$@.$$o.err; \ - dmesg > results/$@.$$o.dmesg; \ - if [ -z "`cat results/$@.$$o.err`" ]; then \ - echo "PASS"; \ - else \ + echo -n "Running $@ [$$o/`uname -r`]: "; \ + TC="$$i/tc/tc" IP="$$i/ip/ip" DEV="$(DEV)" IPVER="$@" SNAME="$$i" \ + ERRF="results/$@.$$o.err" $(KENV) $(PREFIX) tests/$@ > results/$@.$$o.out; \ + if [ "$$?" = "127" ]; then \ + echo "SKIPPED"; \ + elif [ -e "results/$@.$$o.err" ]; then \ echo "FAILED"; \ - fi \ + else \ + echo "PASS"; \ + fi; \ + dmesg > results/$@.$$o.dmesg; \ done diff --git a/testsuite/iproute2/Makefile b/testsuite/iproute2/Makefile new file mode 100644 index 0000000..ba128aa --- /dev/null +++ b/testsuite/iproute2/Makefile @@ -0,0 +1,33 @@ +SUBDIRS := $(filter-out Makefile,$(wildcard *)) +.PHONY: all configure clean distclean show $(SUBDIRS) + +all: configure + @for dir in $(SUBDIRS); do \ + echo "Entering $$dir" && cd $$dir && $(MAKE) && cd ..; \ + done + +link: + @if [ ! -L iproute2-this ]; then \ + ln -s ../.. iproute2-this; \ + fi + +configure: link + @for dir in $(SUBDIRS); do \ + echo "Entering $$dir" && cd $$dir && if [ -f configure ]; then ./configure; fi && cd ..; \ + done + +clean: link + @for dir in $(SUBDIRS); do \ + echo "Entering $$dir" && cd $$dir && $(MAKE) clean && cd ..; \ + done + +distclean: clean + @for dir in $(SUBDIRS); do \ + echo "Entering $$dir" && cd $$dir && $(MAKE) distclean && cd ..; \ + done + +show: link + @echo "$(SUBDIRS)" + +$(SUBDIRS): + cd $@ && $(MAKE) diff --git a/testsuite/lib/generic.sh b/testsuite/lib/generic.sh new file mode 100644 index 0000000..cc48947 --- /dev/null +++ b/testsuite/lib/generic.sh @@ -0,0 +1,88 @@ + +export DEST="127.0.0.1" + +ts_log() +{ + echo "$@" +} + +ts_err() +{ + ts_log "$@" | tee >> $ERRF +} + +ts_cat() +{ + cat "$@" +} + +ts_err_cat() +{ + ts_cat "$@" | tee >> $ERRF +} + +ts_tc() +{ + SCRIPT=$1; shift + DESC=$1; shift + TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX` || exit + TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit + + $TC $@ 2> $TMP_ERR > $TMP_OUT + + if [ -s $TMP_ERR ]; then + ts_err "${SCRIPT}: ${DESC} failed:" + ts_err "command: $TC $@" + ts_err "stderr output:" + ts_err_cat $TMP_ERR + if [ -s $TMP_OUT ]; then + ts_err "stdout output:" + ts_err_cat $TMP_OUT + fi + elif [ -s $TMP_OUT ]; then + echo "${SCRIPT}: ${DESC} succeeded with output:" + cat $TMP_OUT + else + echo "${SCRIPT}: ${DESC} succeeded" + fi + + rm $TMP_ERR $TMP_OUT +} + +ts_ip() +{ + SCRIPT=$1; shift + DESC=$1; shift + TMP_ERR=`mktemp /tmp/tc_testsuite.XXXXXX` || exit + TMP_OUT=`mktemp /tmp/tc_testsuite.XXXXXX` || exit + + $IP $@ 2> $TMP_ERR > $TMP_OUT + + if [ -s $TMP_ERR ]; then + ts_err "${SCRIPT}: ${DESC} failed:" + ts_err "command: $IP $@" + ts_err "stderr output:" + ts_err_cat $TMP_ERR + if [ -s $TMP_OUT ]; then + ts_err "stdout output:" + ts_err_cat $TMP_OUT + fi + elif [ -s $TMP_OUT ]; then + echo "${SCRIPT}: ${DESC} succeeded with output:" + cat $TMP_OUT + else + echo "${SCRIPT}: ${DESC} succeeded" + fi + + rm $TMP_ERR $TMP_OUT +} + +ts_qdisc_available() +{ + HELPOUT=`$TC qdisc add $1 help 2>&1` + if [ "`echo $HELPOUT | grep \"^Unknown qdisc\"`" ]; then + return 0; + else + return 1; + fi +} diff --git a/testsuite/tests/cbq.t b/testsuite/tests/cbq.t new file mode 100644 index 0000000..bff814b --- /dev/null +++ b/testsuite/tests/cbq.t @@ -0,0 +1,10 @@ +#!/bin/sh +$TC qdisc del dev $DEV root >/dev/null 2>&1 +$TC qdisc add dev $DEV root handle 10:0 cbq bandwidth 100Mbit avpkt 1400 mpu 64 +$TC class add dev $DEV parent 10:0 classid 10:12 cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 maxburst 1 avpkt 500 bounded +$TC qdisc list dev $DEV +$TC qdisc del dev $DEV root +$TC qdisc list dev $DEV +$TC qdisc add dev $DEV root handle 10:0 cbq bandwidth 100Mbit avpkt 1400 mpu 64 +$TC class add dev $DEV parent 10:0 classid 10:12 cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 maxburst 1 avpkt 500 bounded +$TC qdisc del dev $DEV root diff --git a/testsuite/tests/cls-testbed.t b/testsuite/tests/cls-testbed.t new file mode 100644 index 0000000..efae2a5 --- /dev/null +++ b/testsuite/tests/cls-testbed.t @@ -0,0 +1,68 @@ +#!/bin/bash +# vim: ft=sh + +source lib/generic.sh + +QDISCS="cbq htb dsmark" + +for q in ${QDISCS}; do + ts_log "Preparing classifier testbed with qdisc $q" + + for c in tests/cls/*.t; do + + case "$q" in + cbq) + ts_tc "cls-testbed" "cbq root qdisc creation" \ + qdisc add dev $DEV root handle 10:0 \ + cbq bandwidth 100Mbit avpkt 1400 mpu 64 + ts_tc "cls-testbed" "cbq root class creation" \ + class add dev $DEV parent 10:0 classid 10:12 \ + cbq bandwidth 100mbit rate 100mbit allot 1514 prio 3 \ + maxburst 1 avpkt 500 bounded + ;; + htb) + ts_qdisc_available "htb" + if [ $? -eq 0 ]; then + ts_log "cls-testbed: HTB is unsupported by $TC, skipping" + continue; + fi + ts_tc "cls-testbed" "htb root qdisc creation" \ + qdisc add dev $DEV root handle 10:0 htb + ts_tc "cls-testbed" "htb root class creation" \ + class add dev $DEV parent 10:0 classid 10:12 \ + htb rate 100Mbit quantum 1514 + ;; + dsmark) + ts_qdisc_available "dsmark" + if [ $? -eq 0 ]; then + ts_log "cls-testbed: dsmark is unsupported by $TC, skipping" + continue; + fi + ts_tc "cls-testbed" "dsmark root qdisc creation" \ + qdisc add dev $DEV root handle 20:0 \ + dsmark indices 64 default_index 1 set_tc_index + ts_tc "cls-testbed" "dsmark class creation" \ + class change dev $DEV parent 20:0 classid 20:12 \ + dsmark mask 0xff value 2 + ts_tc "cls-testbed" "prio inner qdisc creation" \ + qdisc add dev $DEV parent 20:0 handle 10:0 prio + ;; + *) + ts_err "cls-testbed: no testbed configuration found for qdisc $q" + continue + ;; + esac + + ts_tc "cls-testbed" "tree listing" qdisc list dev eth0 + ts_tc "cls-testbed" "tree class listing" class list dev eth0 + ts_log "cls-testbed: starting classifier test $c" + $c + + case "$q" in + *) + ts_tc "cls-testbed" "generic qdisc tree deletion" \ + qdisc del dev $DEV root + ;; + esac + done +done diff --git a/testsuite/tests/dsmark.t b/testsuite/tests/dsmark.t new file mode 100644 index 0000000..6934165 --- /dev/null +++ b/testsuite/tests/dsmark.t @@ -0,0 +1,31 @@ +#!/bin/bash +# vim: ft=sh + +source lib/generic.sh + +ts_qdisc_available "dsmark" +if [ $? -eq 0 ]; then + ts_log "dsmark: Unsupported by $TC, skipping" + exit 127 +fi + +ts_tc "dsmark" "dsmark root qdisc creation" \ + qdisc add dev $DEV root handle 10:0 \ + dsmark indices 64 default_index 1 set_tc_index + +ts_tc "dsmark" "dsmark class 1 creation" \ + class change dev $DEV parent 10:0 classid 10:12 \ + dsmark mask 0xff value 2 + +ts_tc "dsmark" "dsmark class 2 creation" \ + class change dev $DEV parent 10:0 classid 10:13 \ + dsmark mask 0xfc value 4 + +ts_tc "dsmark" "dsmark dump qdisc" \ + qdisc list dev $DEV + +ts_tc "dsmark" "dsmark dump class" \ + class list dev $DEV parent 10:0 + +ts_tc "dsmark" "generic qdisc tree deletion" \ + qdisc del dev $DEV root -- 2.43.0