- fc4 additions

author Mark Huang <mlhuang@cs.princeton.edu>

Wed, 22 Feb 2006 21:38:27 +0000 (21:38 +0000)

committer Mark Huang <mlhuang@cs.princeton.edu>

Wed, 22 Feb 2006 21:38:27 +0000 (21:38 +0000)
author Mark Huang <mlhuang@cs.princeton.edu>
Wed, 22 Feb 2006 21:38:27 +0000 (21:38 +0000)
committer Mark Huang <mlhuang@cs.princeton.edu>
Wed, 22 Feb 2006 21:38:27 +0000 (21:38 +0000)
diff --git a/ip.8 b/ip.8

new file mode 100644 (file)

index 0000000..50e4419
--- /dev/null
+++ b/ip.8
@@ -0,0 +1,1809 @@
+.TH IP 8 "17 January 2002" "iproute2" "Linux"
+.SH NAME
+ip \- show / manipulate routing, devices, policy routing and tunnels
+.SH SYNOPSIS
+
+.ad l
+.in +8
+.ti -8
+.B ip
+.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | "
+.BR help " }"
+.sp
+
+.ti -8
+.IR OBJECT " := { "
+.BR link " | " addr " | " route " | " rule " | " neigh " | " tunnel " | "\
+maddr " | "  mroute " | " monitor " }"
+.sp
+
+.ti -8
+.IR OPTIONS " := { " 
+\fB\-V\fR[\fIersion\fR] |
+\fB\-s\fR[\fItatistics\fR] |
+\fB\-r\fR[\fIesolve\fR] |
+\fB\-f\fR[\fIamily\fR] {
+.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | "
+\fB\-o\fR[\fIneline\fR] }
+
+.ti -8
+.BI "ip link set " DEVICE
+.RB "{ " up " | " down " | " arp " { " on " | " off " } |"
+.br
+.BR promisc " { " on " | " off " } |"
+.br
+.BR allmulti " { " on " | " off " } |"
+.br
+.BR dynamic " { " on " | " off " } |"
+.br
+.BR multicast " { " on " | " off " } |"
+.br
+.B  txqueuelen
+.IR PACKETS " |"
+.br
+.B  name
+.IR NEWNAME " |"
+.br
+.B  address
+.IR LLADDR " |"
+.B  broadcast 
+.IR LLADDR " |"
+.br
+.B  mtu
+.IR MTU " }"
+
+.ti -8
+.B ip link show
+.RI "[ " DEVICE " ]"
+
+.ti -8
+.BR "ip addr" " { " add " | " del " } " 
+.IB IFADDR " dev " STRING
+
+.ti -8
+.BR "ip addr" " { " show " | " flush " } [ " dev
+.IR STRING " ] [ "
+.B  scope
+.IR SCOPE-ID " ] [ "
+.B  to 
+.IR PREFIX " ] [ " FLAG-LIST " ] [ "
+.B  label
+.IR PATTERN " ]"
+
+.ti -8
+.IR IFADDR " := " PREFIX " | " ADDR
+.B  peer
+.IR PREFIX " [ "
+.B  broadcast
+.IR ADDR " ] [ "
+.B  anycast
+.IR ADDR " ] [ "
+.B  label
+.IR STRING " ] [ "
+.B  scope
+.IR SCOPE-ID " ]"
+
+.ti -8
+.IR SCOPE-ID " := "
+.RB "[ " host " | " link " | " global " | "
+.IR NUMBER " ]"
+
+.ti -8
+.IR FLAG-LIST " := [ "  FLAG-LIST " ] " FLAG
+
+.ti -8
+.IR FLAG " := "
+.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | "\
+tentative " | " deprecated " ]"
+
+.ti -8
+.BR "ip route" " { "
+.BR list " | " flush " } "
+.I  SELECTOR
+
+.ti -8
+.B  ip route get 
+.IR ADDRESS " [ "
+.BI from " ADDRESS " iif " STRING"
+.RB " ] [ " oif 
+.IR STRING " ] [ "
+.B  tos
+.IR TOS " ]"
+
+.ti -8
+.BR "ip route" " { " add " | " del " | " change " | " append " | "\
+replace " | " monitor " } "
+.I  ROUTE
+
+.ti -8
+.IR SELECTOR " := "
+.RB "[ " root
+.IR PREFIX " ] [ "
+.B  match
+.IR PREFIX " ] [ "
+.B  exact
+.IR PREFIX " ] [ "
+.B  table
+.IR TABLE_ID " ] [ "
+.B  proto
+.IR RTPROTO " ] [ "
+.B  type
+.IR TYPE " ] [ "
+.B  scope
+.IR SCOPE " ]"
+
+.ti -8
+.IR ROUTE " := " NODE_SPEC " [ " INFO_SPEC " ]"
+
+.ti -8
+.IR NODE_SPEC " := [ " TYPE " ] " PREFIX " ["
+.B  tos
+.IR TOS " ] [ "
+.B  table
+.IR TABLE_ID " ] [ "
+.B  proto
+.IR RTPROTO " ] [ "
+.B  scope
+.IR SCOPE " ] [ "
+.B  metric
+.IR METRIC " ]"
+
+.ti -8
+.IR INFO_SPEC " := " "NH OPTIONS FLAGS" " ["
+.B  nexthop
+.IR NH " ] ..."
+
+.ti -8
+.IR NH " := [ "
+.B  via
+.IR ADDRESS " ] [ "
+.B  dev
+.IR STRING " ] [ "
+.B  weight
+.IR NUMBER " ] " NHFLAGS
+
+.ti -8
+.IR OPTIONS " := " FLAGS " [ "
+.B  mtu
+.IR NUMBER " ] [ "
+.B  advmss
+.IR NUMBER " ] [ "
+.B  rtt
+.IR NUMBER " ] [ "
+.B  rttvar
+.IR NUMBER " ] [ "
+.B  window
+.IR NUMBER " ] [ "
+.B  cwnd
+.IR NUMBER " ] [ "
+.B  ssthresh
+.IR REALM " ] [ "
+.B  realms
+.IR REALM " ]"
+
+.ti -8
+.IR TYPE " := [ "
+.BR unicast " | " local " | " broadcast " | " multicast " | "\
+throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]"
+
+.ti -8
+.IR TABLE_ID " := [ "
+.BR local "| " main " | " default " | " all " |"
+.IR NUMBER " ]"
+
+.ti -8
+.IR SCOPE " := [ "
+.BR host " | " link " | " global " |"
+.IR NUMBER " ]"
+
+.ti -8
+.IR FLAGS " := [ "
+.BR equalize " ]"
+
+.ti -8
+.IR NHFLAGS " := [ "
+.BR onlink " | " pervasive " ]"
+
+.ti -8
+.IR RTPROTO " := [ "
+.BR kernel " | " boot " | " static " |"
+.IR NUMBER " ]"
+
+.ti -8
+.B  ip rule
+.RB " [ " list " | " add " | " del " ]"
+.I  SELECTOR ACTION
+
+.ti -8
+.IR SELECTOR " := [ "
+.B  from
+.IR PREFIX " ] [ "
+.B  to
+.IR PREFIX " ] [ "
+.B  tos
+.IR TOS " ] [ "
+.B  fwmark
+.IR FWMARK " ] [ "
+.B  dev
+.IR STRING " ] [ "
+.B  pref
+.IR NUMBER " ]"
+
+.ti -8
+.IR ACTION " := [ "
+.B  table
+.IR TABLE_ID " ] [ "
+.B  nat
+.IR ADDRESS " ] [ "
+.BR prohibit " | " reject " | " unreachable " ] [ " realms
+.RI "[" SRCREALM "/]" DSTREALM " ]"
+
+.ti -8
+.IR TABLE_ID " := [ "
+.BR local " | " main " | " default " |"
+.IR NUMBER " ]"
+
+.ti -8
+.BR "ip neigh" " { " add " | " del " | " change " | " replace " } { "
+.IR ADDR " [ "
+.B  lladdr
+.IR LLADDR " ] [ "
+.BR nud " { " permanent " | " noarp " | " stale " | " reachable " } ] | " proxy
+.IR ADDR " } [ "
+.B  dev
+.IR DEV " ]"
+
+.ti -8
+.BR "ip neigh" " { " show " | " flush " } [ " to
+.IR PREFIX " ] [ "
+.B  dev
+.IR DEV " ] [ "
+.B  nud
+.IR STATE " ]"
+
+.ti -8
+.BR "ip tunnel" " { " add " | " change " | " del " | " show " }"
+.RI "[ " NAME " ]"
+.br
+.RB "[ " mode " { " ipip " | " gre " | " sit " } ]"
+.br
+.RB "[ " remote
+.IR ADDR " ] [ "
+.B  local
+.IR ADDR " ]"
+.br
+.RB "[ [" i "|" o "]" seq " ] [ [" i "|" o "]" key
+.IR KEY " ] [ "
+.RB "[" i "|" o "]" csum " ] ]"
+.br
+.RB "[ " ttl
+.IR TTL " ] [ "
+.B  tos
+.IR TOS " ] [ "
+.RB "[" no "]" pmtudisc " ]"
+.br
+.RB "[ " dev
+.IR PHYS_DEV " ]"
+
+.ti -8
+.IR ADDR " := { " IP_ADDRESS " |"
+.BR any " }"
+
+.ti -8
+.IR TOS " := { " NUMBER " |"
+.BR inherit " }"
+
+.ti -8
+.IR TTL " := { " 1 ".." 255 " | "
+.BR inherit " }"
+
+.ti -8
+.IR KEY " := { " DOTTED_QUAD " | " NUMBER " }"
+
+.ti -8
+.BR "ip maddr" " [ " add " | " del " ]"
+.IB MULTIADDR " dev " STRING
+
+.ti -8
+.BR "ip maddr show" " [ " dev
+.IR STRING " ]"
+
+.ti -8
+.BR "ip mroute show" " ["
+.IR PREFIX " ] [ "
+.B  from
+.IR PREFIX " ] [ "
+.B  iif
+.IR DEVICE " ]"
+
+.ti -8
+.BR "ip monitor" " [ " all " |"
+.IR LISTofOBJECTS " ]"
+.in -8
+.ad b
+
+.SH OPTIONS
+
+.TP
+.BR "\-V" , " -Version"
+print the version of the
+.B ip
+utility and exit.
+
+.TP
+.BR "\-s" , " \-stats", " \-statistics"
+output more information.  If the option
+appears twice or more, the amount of information increases.
+As a rule, the information is statistics or some time values.
+
+.TP
+.BR "\-f" , " \-family"
+followed by protocol family identifier:
+.BR "inet" , " inet6"
+or
+.B link
+,enforce the protocol family to use.  If the option is not present,
+the protocol family is guessed from other arguments.  If the rest 
+of the command line does not give enough information to guess the
+family,
+.B ip
+falls back to the default one, usually
+.B inet
+or
+.BR "any" .
+.B link
+is a special family identifier meaning that no networking protocol
+is involved.
+
+.TP
+.B \-4
+shortcut for
+.BR "-family inet" .
+
+.TP
+.B \-6
+shortcut for
+.BR "\-family inet6" .
+
+.TP
+.B \-0
+shortcut for
+.BR "\-family link" .
+
+.TP
+.BR "\-o" , " \-oneline"
+output each record on a single line, replacing line feeds
+with the
+.B '\'
+character. This is convenient when you want to count records 
+with
+.BR wc (1)
+ or to
+.BR grep (1)
+the output.
+
+.TP
+.BR "\-r" , " \-resolve"
+use the system's name resolver to print DNS names instead of
+host addresses.
+
+.SH IP - COMMAND SYNTAX
+
+.SS
+.I OBJECT
+
+.TP
+.B link
+- network device.
+
+.TP
+.B address
+- protocol (IP or IPv6) address on a device.
+.TP
+.B neighbour
+- ARP or NDISC cache entry.
+
+.TP
+.B route
+- routing table entry.
+
+.TP
+.B rule
+- rule in routing policy database.
+
+.TP
+.B maddress
+- multicast address.
+
+.TP
+.B mroute
+- multicast routing cache entry.
+
+.TP
+.B tunnel
+- tunnel over IP.
+
+.PP
+The names of all objects may be written in full or
+abbreviated form, f.e.
+.B address
+is abbreviated as
+.B addr
+or just
+.B a.
+
+.SS
+.I COMMAND
+
+Specifies the action to perform on the object.
+The set of possible actions depends on the object type.
+As a rule, it is possible to
+.BR "add" , " delete"
+and
+.B show
+(or
+.B list
+) objects, but some objects do not allow all of these operations
+or have some additional commands.  The
+.B help
+command is available for all objects.  It prints
+out a list of available commands and argument syntax conventions.
+.sp
+If no command is given, some default command is assumed.
+Usually it is
+.B list
+or, if the objects of this class cannot be listed,
+.BR "help" .
+
+.SH ip link - network device configuration
+
+.B link
+is a network device and the corresponding commands
+display and change the state of devices.
+
+.SS ip link set - change device attributes
+
+.TP
+.BI dev " NAME " (default)
+.I NAME
+specifies network device to operate on.
+
+.TP
+.BR up " and " down
+change the state of the device to
+.B UP
+or
+.BR "DOWN" .
+
+.TP
+.BR "arp on " or " arp off"
+change the
+.B NOARP
+flag on the device.
+
+.TP
+.BR "multicast on " or " multicast off"
+change the
+.B MULTICAST
+flag on the device.
+
+.TP
+.BR "dynamic on " or " dynamic off"
+change the
+.B DYNAMIC
+flag on the device.
+
+.TP
+.BI name " NAME"
+change the name of the device.  This operation is not
+recommended if the device is running or has some addresses
+already configured.
+
+.TP
+.BI txqueuelen " NUMBER"
+.TP 
+.BI txqlen " NUMBER"
+change the transmit queue length of the device.
+
+.TP
+.BI mtu " NUMBER"
+change the 
+.I MTU
+of the device.
+
+.TP
+.BI address " LLADDRESS"
+change the station address of the interface.
+
+.TP
+.BI broadcast " LLADDRESS"
+.TP
+.BI brd " LLADDRESS"
+.TP
+.BI peer " LLADDRESS"
+change the link layer broadcast address or the peer address when
+the interface is
+.IR "POINTOPOINT" .
+
+.PP
+.B Warning:
+If multiple parameter changes are requested,
+.B ip
+aborts immediately after any of the changes have failed.
+This is the only case when
+.B ip
+can move the system to an unpredictable state.  The solution
+is to avoid changing several parameters with one
+.B ip link set
+call.
+
+.SS  ip link show - display device attributes
+
+.TP
+.BI dev " NAME " (default)
+.I NAME
+specifies the network device to show.
+If this argument is omitted all devices are listed.
+
+.TP
+.B up
+only display running interfaces.
+
+.SH ip address - protocol address management.
+
+The
+.B address
+is a protocol (IP or IPv6) address attached
+to a network device.  Each device must have at least one address
+to use the corresponding protocol.  It is possible to have several
+different addresses attached to one device.  These addresses are not
+discriminated, so that the term
+.B alias
+is not quite appropriate for them and we do not use it in this document.
+.sp
+The
+.B ip addr
+command displays addresses and their properties, adds new addresses
+and deletes old ones.
+
+.SS ip address add - add new protocol address.
+
+.TP
+.BI dev " NAME"
+the name of the device to add the address to.
+
+.TP
+.BI local " ADDRESS " (default)
+the address of the interface. The format of the address depends
+on the protocol. It is a dotted quad for IP and a sequence of
+hexadecimal halfwords separated by colons for IPv6.  The
+.I ADDRESS
+may be followed by a slash and a decimal number which encodes
+the network prefix length.
+
+.TP
+.BI peer " ADDRESS"
+the address of the remote endpoint for pointopoint interfaces.
+Again, the
+.I ADDRESS
+may be followed by a slash and a decimal number, encoding the network
+prefix length.  If a peer address is specified, the local address
+cannot have a prefix length.  The network prefix is associated
+with the peer rather than with the local address.
+
+.TP
+.BI broadcast " ADDRESS"
+the broadcast address on the interface.
+.sp
+It is possible to use the special symbols
+.B '+'
+and
+.B '-'
+instead of the broadcast address.  In this case, the broadcast address
+is derived by setting/resetting the host bits of the interface prefix.
+
+.TP
+.BI label " NAME"
+Each address may be tagged with a label string.
+In order to preserve compatibility with Linux-2.0 net aliases,
+this string must coincide with the name of the device or must be prefixed
+with the device name followed by colon.
+
+.TP
+.BI scope " SCOPE_VALUE"
+the scope of the area where this address is valid.
+The available scopes are listed in file
+.BR "/etc/iproute2/rt_scopes" .
+Predefined scope values are:
+
+.in +8
+.B global
+- the address is globally valid.
+.sp
+.B site
+- (IPv6 only) the address is site local, i.e. it is
+valid inside this site.
+.sp
+.B link
+- the address is link local, i.e. it is valid only on this device.
+.sp
+.B host
+- the address is valid only inside this host.
+.in -8
+
+.SS ip address delete - delete protocol address
+.B Arguments:
+coincide with the arguments of
+.B ip addr add.
+The device name is a required argument.  The rest are optional.
+If no arguments are given, the first address is deleted.
+
+.SS ip address show - look at protocol addresses
+
+.TP
+.BI dev " NAME " (default)
+name of device.
+
+.TP
+.BI scope " SCOPE_VAL"
+only list addresses with this scope.
+
+.TP
+.BI to " PREFIX"
+only list addresses matching this prefix.
+
+.TP
+.BI label " PATTERN"
+only list addresses with labels matching the
+.IR "PATTERN" .
+.I PATTERN
+is a usual shell style pattern.
+
+.TP
+.BR dynamic " and " permanent
+(IPv6 only) only list addresses installed due to stateless
+address configuration or only list permanent (not dynamic)
+addresses.
+
+.TP
+.B tentative
+(IPv6 only) only list addresses which did not pass duplicate
+address detection.
+
+.TP
+.B deprecated
+(IPv6 only) only list deprecated addresses.
+
+.TP
+.BR primary " and " secondary
+only list primary (or secondary) addresses.
+
+.SS ip address flush - flush protocol addresses
+This command flushes the protocol addresses selected by some criteria.
+
+.PP
+This command has the same arguments as
+.B show.
+The difference is that it does not run when no arguments are given.
+
+.PP
+.B Warning:
+This command (and other
+.B flush
+commands described below) is pretty dangerous.  If you make a mistake,
+it will not forgive it, but will cruelly purge all the addresses.
+
+.PP
+With the
+.B -statistics
+option, the command becomes verbose. It prints out the number of deleted
+addresses and the number of rounds made to flush the address list.  If
+this option is given twice,
+.B ip addr flush
+also dumps all the deleted addresses in the format described in the
+previous subsection.
+
+.SH ip neighbour - neighbour/arp tables management.
+
+.B neighbour
+objects establish bindings between protocol addresses and
+link layer addresses for hosts sharing the same link.
+Neighbour entries are organized into tables. The IPv4 neighbour table
+is known by another name - the ARP table.
+
+.P
+The corresponding commands display neighbour bindings
+and their properties, add new neighbour entries and delete old ones.
+
+.SS ip neighbour add - add a new neighbour entry
+.SS ip neighbour change - change an existing entry
+.SS ip neighbour replace - add a new entry or change an existing one
+
+These commands create new neighbour records or update existing ones.
+
+.TP
+.BI to " ADDRESS " (default)
+the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
+
+.TP
+.BI dev " NAME"
+the interface to which this neighbour is attached.
+
+.TP
+.BI lladdr " LLADDRESS"
+the link layer address of the neighbour.
+.I LLADDRESS
+can also be
+.BR "null" .
+
+.TP
+.BI nud " NUD_STATE"
+the state of the neighbour entry.
+.B nud
+is an abbreviation for 'Neigh bour Unreachability Detection'.
+The state can take one of the following values:
+
+.in +8
+.B permanent
+- the neighbour entry is valid forever and can be only
+be removed administratively.
+.sp
+
+.B noarp
+- the neighbour entry is valid. No attempts to validate
+this entry will be made but it can be removed when its lifetime expires.
+.sp
+
+.B reachable
+- the neighbour entry is valid until the reachability
+timeout expires.
+.sp
+
+.B stale
+- the neighbour entry is valid but suspicious.
+This option to
+.B ip neigh
+does not change the neighbour state if it was valid and the address
+is not changed by this command.
+.in -8
+
+.SS ip neighbour delete - delete a neighbour entry
+This command invalidates a neighbour entry.
+
+.PP
+The arguments are the same as with
+.BR "ip neigh add" ,
+except that
+.B lladdr
+and
+.B nud
+are ignored.
+
+.PP
+.B Warning:
+Attempts to delete or manually change a
+.B noarp
+entry created by the kernel may result in unpredictable behaviour.
+Particularly, the kernel may try to resolve this address even
+on a
+.B NOARP
+interface or if the address is multicast or broadcast.
+
+.SS ip neighbour show - list neighbour entries
+
+This commands displays neighbour tables.
+
+.TP
+.BI to " ADDRESS " (default)
+the prefix selecting the neighbours to list.
+
+.TP
+.BI dev " NAME"
+only list the neighbours attached to this device.
+
+.TP
+.B unused
+only list neighbours which are not currently in use.
+
+.TP
+.BI nud " NUD_STATE"
+only list neighbour entries in this state.
+.I NUD_STATE
+takes values listed below or the special value
+.B all
+which means all states.  This option may occur more than once.
+If this option is absent,
+.B ip
+lists all entries except for
+.B none
+and
+.BR "noarp" .
+
+.SS ip neighbour flush - flush neighbour entries
+This command flushes neighbour tables, selecting
+entries to flush by some criteria.
+
+.PP
+This command has the same arguments as
+.B show.
+The differences are that it does not run when no arguments are given,
+and that the default neighbour states to be flushed do not include
+.B permanent
+and
+.BR "noarp" .
+
+.PP
+With the
+.B -statistics
+option, the command becomes verbose.  It prints out the number of
+deleted neighbours and the number of rounds made to flush the
+neighbour table.  If the option is given
+twice,
+.B ip neigh flush
+also dumps all the deleted neighbours.
+
+.SH ip route - routing table management
+Manipulate route entries in the kernel routing tables keep
+information about paths to other networked nodes.
+.sp
+.B Route types:
+
+.in +8
+.B unicast
+- the route entry describes real paths to the destinations covered
+by the route prefix.
+
+.sp
+.B unreachable
+- these destinations are unreachable.  Packets are discarded and the
+ICMP message
+.I host unreachable
+is generated.
+The local senders get an
+.I EHOSTUNREACH
+error.
+
+.sp
+.B blackhole
+- these destinations are unreachable.  Packets are discarded silently.
+The local senders get an
+.I EINVAL
+error.
+
+.sp
+.B prohibit
+- these destinations are unreachable.  Packets are discarded and the
+ICMP message
+.I communication administratively prohibited
+is generated.  The local senders get an
+.I EACCES
+error.
+
+.sp
+.B local
+- the destinations are assigned to this host.  The packets are looped
+back and delivered locally.
+
+.sp
+.B broadcast
+- the destinations are broadcast addresses.  The packets are sent as
+link broadcasts.
+
+.sp
+.B throw
+- a special control route used together with policy rules. If such a
+route is selected, lookup in this table is terminated pretending that
+no route was found.  Without policy routing it is equivalent to the
+absence of the route in the routing table.  The packets are dropped
+and the ICMP message
+.I net unreachable
+is generated.  The local senders get an
+.I ENETUNREACH
+error.
+
+.sp
+.B nat
+- a special NAT route.  Destinations covered by the prefix
+are considered to be dummy (or external) addresses which require translation
+to real (or internal) ones before forwarding.  The addresses to translate to
+are selected with the attribute
+.BR "via" .
+
+.sp
+.B anycast
+.RI "- " "not implemented"
+the destinations are
+.I anycast
+addresses assigned to this host.  They are mainly equivalent
+to
+.B local
+with one difference: such addresses are invalid when used
+as the source address of any packet.
+
+.sp
+.B multicast
+- a special type used for multicast routing.  It is not present in
+normal routing tables.
+.in -8
+
+.P
+.B Route tables:
+Linux-2.x can pack routes into several routing
+tables identified by a number in the range from 1 to 255 or by
+name from the file
+.B /etc/iproute2/rt_tables
+. By default all normal routes are inserted into the
+.B main
+table (ID 254) and the kernel only uses this table when calculating routes.
+
+.sp
+Actually, one other table always exists, which is invisible but
+even more important.  It is the
+.B local
+table (ID 255).  This table
+consists of routes for local and broadcast addresses.  The kernel maintains
+this table automatically and the administrator usually need not modify it
+or even look at it.
+
+The multiple routing tables enter the game when
+.I policy routing
+is used.
+
+.SS ip route add - add new route
+.SS ip route change - change route
+.SS ip route replace - change or add new one
+
+.TP
+.BI to " TYPE PREFIX " (default)
+the destination prefix of the route.  If
+.I TYPE
+is omitted,
+.B ip
+assumes type
+.BR "unicast" .
+Other values of
+.I TYPE
+are listed above.
+.I PREFIX
+is an IP or IPv6 address optionally followed by a slash and the
+prefix length.  If the length of the prefix is missing,
+.B ip
+assumes a full-length host route.  There is also a special
+.I PREFIX
+.B default
+- which is equivalent to IP
+.B 0/0
+or to IPv6
+.BR "::/0" .
+
+.TP
+.BI tos " TOS"
+.TP
+.BI dsfield " TOS"
+the Type Of Service (TOS) key.  This key has no associated mask and
+the longest match is understood as: First, compare the TOS
+of the route and of the packet.  If they are not equal, then the packet
+may still match a route with a zero TOS.
+.I TOS
+is either an 8 bit hexadecimal number or an identifier
+from
+.BR "/etc/iproute2/rt_dsfield" .
+
+.TP
+.BI metric " NUMBER"
+.TP
+.BI preference " NUMBER"
+the preference value of the route.
+.I NUMBER
+is an arbitrary 32bit number.
+
+.TP
+.BI table " TABLEID"
+the table to add this route to.
+.I TABLEID
+may be a number or a string from the file
+.BR "/etc/iproute2/rt_tables" .
+If this parameter is omitted,
+.B ip
+assumes the
+.B main
+table, with the exception of
+.BR local " , " broadcast " and " nat
+routes, which are put into the
+.B local
+table by default.
+
+.TP
+.BI dev " NAME"
+the output device name.
+
+.TP
+.BI via " ADDRESS"
+the address of the nexthop router.  Actually, the sense of this field
+depends on the route type.  For normal
+.B unicast
+routes it is either the true next hop router or, if it is a direct
+route installed in BSD compatibility mode, it can be a local address
+of the interface.  For NAT routes it is the first address of the block
+of translated IP destinations.
+
+.TP
+.BI src " ADDRESS"
+the source address to prefer when sending to the destinations
+covered by the route prefix.
+
+.TP
+.BI realm " REALMID"
+the realm to which this route is assigned.
+.I REALMID
+may be a number or a string from the file
+.BR "/etc/iproute2/rt_realms" .
+
+.TP
+.BI mtu " MTU"
+.TP
+.BI "mtu lock" " MTU"
+the MTU along the path to the destination.  If the modifier
+.B lock
+is not used, the MTU may be updated by the kernel due to
+Path MTU Discovery.  If the modifier
+.B lock
+is used, no path MTU discovery will be tried, all packets
+will be sent without the DF bit in IPv4 case or fragmented
+to MTU for IPv6.
+
+.TP
+.BI window " NUMBER"
+the maximal window for TCP to advertise to these destinations,
+measured in bytes.  It limits maximal data bursts that our TCP
+peers are allowed to send to us.
+
+.TP
+.BI rtt " NUMBER"
+the initial RTT ('Round Trip Time') estimate.
+
+.TP
+.BI rttvar " NUMBER " "(2.3.15+ only)"
+the initial RTT variance estimate.
+
+.TP
+.BI ssthresh " NUMBER " "(2.3.15+ only)"
+an estimate for the initial slow start threshold.
+
+.TP
+.BI cwnd " NUMBER " "(2.3.15+ only)"
+the clamp for congestion window.  It is ignored if the
+.B lock
+flag is not used.
+
+.TP
+.BI advmss " NUMBER " "(2.3.15+ only)"
+the MSS ('Maximal Segment Size') to advertise to these
+destinations when establishing TCP connections.  If it is not given,
+Linux uses a default value calculated from the first hop device MTU.
+(If the path to these destination is asymmetric, this guess may be wrong.)
+
+.TP
+.BI reordering " NUMBER " "(2.3.15+ only)"
+Maximal reordering on the path to this destination.
+If it is not given, Linux uses the value selected with
+.B sysctl
+variable
+.BR "net/ipv4/tcp_reordering" .
+
+.TP
+.BI nexthop " NEXTHOP"
+the nexthop of a multipath route.
+.I NEXTHOP
+is a complex value with its own syntax similar to the top level
+argument lists:
+
+.in +8
+.BI via " ADDRESS"
+- is the nexthop router.
+.sp
+
+.BI dev " NAME"
+- is the output device.
+.sp
+
+.BI weight " NUMBER"
+- is a weight for this element of a multipath
+route reflecting its relative bandwidth or quality.
+.in -8
+
+.TP
+.BI scope " SCOPE_VAL"
+the scope of the destinations covered by the route prefix.
+.I SCOPE_VAL
+may be a number or a string from the file
+.BR "/etc/iproute2/rt_scopes" .
+If this parameter is omitted,
+.B ip
+assumes scope
+.B global
+for all gatewayed
+.B unicast
+routes, scope
+.B link
+for direct
+.BR unicast " and " broadcast
+routes and scope
+.BR host " for " local
+routes.
+
+.TP
+.BI protocol " RTPROTO"
+the routing protocol identifier of this route.
+.I RTPROTO
+may be a number or a string from the file
+.BR "/etc/iproute2/rt_protos" .
+If the routing protocol ID is not given,
+.B ip assumes protocol
+.B boot
+(i.e. it assumes the route was added by someone who doesn't
+understand what they are doing).  Several protocol values have
+a fixed interpretation.
+Namely:
+
+.in +8
+.B redirect
+- the route was installed due to an ICMP redirect.
+.sp
+
+.B kernel
+- the route was installed by the kernel during autoconfiguration.
+.sp
+
+.B boot
+- the route was installed during the bootup sequence.
+If a routing daemon starts, it will purge all of them.
+.sp
+
+.B static
+- the route was installed by the administrator
+to override dynamic routing. Routing daemon will respect them
+and, probably, even advertise them to its peers.
+.sp
+
+.B ra
+- the route was installed by Router Discovery protocol.
+.in -8
+
+.sp
+The rest of the values are not reserved and the administrator is free
+to assign (or not to assign) protocol tags.
+
+.TP
+.B onlink
+pretend that the nexthop is directly attached to this link,
+even if it does not match any interface prefix.
+
+.TP
+.B equalize
+allow packet by packet randomization on multipath routes.
+Without this modifier, the route will be frozen to one selected
+nexthop, so that load splitting will only occur on per-flow base.
+.B equalize
+only works if the kernel is patched.
+
+.SS ip route delete - delete route
+
+.B ip route del
+has the same arguments as
+.BR "ip route add" ,
+but their semantics are a bit different.
+
+Key values
+.RB "(" to ", " tos ", " preference " and " table ")"
+select the route to delete.  If optional attributes are present,
+.B ip
+verifies that they coincide with the attributes of the route to delete.
+If no route with the given key and attributes was found,
+.B ip route del
+fails.
+
+.SS ip route show - list routes
+the command displays the contents of the routing tables or the route(s)
+selected by some criteria.
+
+.TP
+.BI to " SELECTOR " (default)
+only select routes from the given range of destinations.
+.I SELECTOR
+consists of an optional modifier
+.RB "(" root ", " match " or " exact ")"
+and a prefix.
+.BI root " PREFIX"
+selects routes with prefixes not shorter than
+.IR PREFIX "."
+F.e.
+.BI root " 0/0"
+selects the entire routing table.
+.BI match " PREFIX"
+selects routes with prefixes not longer than
+.IR PREFIX "."
+F.e.
+.BI match " 10.0/16"
+selects
+.IR 10.0/16 ","
+.IR 10/8 " and " 0/0 ,
+but it does not select
+.IR 10.1/16 " and " 10.0.0/24 .
+And
+.BI exact " PREFIX"
+(or just
+.IR PREFIX ")"
+selects routes with this exact prefix. If neither of these options
+are present,
+.B ip
+assumes
+.BI root " 0/0"
+i.e. it lists the entire table.
+
+.TP
+.BI tos " TOS"
+.BI dsfield " TOS"
+only select routes with the given TOS.
+
+.TP
+.BI table " TABLEID"
+show the routes from this table(s).  The default setting is to show
+.BR table main "."
+.I TABLEID
+may either be the ID of a real table or one of the special values:
+.sp
+.in +8
+.B all
+- list all of the tables.
+.sp
+.B cache
+- dump the routing cache.
+.in -8
+
+.TP
+.B cloned
+.TP
+.B cached
+list cloned routes i.e. routes which were dynamically forked from
+other routes because some route attribute (f.e. MTU) was updated.
+Actually, it is equivalent to
+.BR "table cache" "."
+
+.TP
+.BI from " SELECTOR"
+the same syntax as for
+.BR to ","
+but it binds the source address range rather than destinations.
+Note that the
+.B from
+option only works with cloned routes.
+
+.TP
+.BI protocol " RTPROTO"
+only list routes of this protocol.
+
+.TP
+.BI scope " SCOPE_VAL"
+only list routes with this scope.
+
+.TP
+.BI type " TYPE"
+only list routes of this type.
+
+.TP
+.BI dev " NAME"
+only list routes going via this device.
+
+.TP
+.BI via " PREFIX"
+only list routes going via the nexthop routers selected by
+.IR PREFIX "."
+
+.TP
+.BI src " PREFIX"
+only list routes with preferred source addresses selected
+by
+.IR PREFIX "."
+
+.TP
+.BI realm " REALMID"
+.TP
+.BI realms " FROMREALM/TOREALM"
+only list routes with these realms.
+
+.SS ip route flush - flush routing tables
+this command flushes routes selected by some criteria.
+
+.sp
+The arguments have the same syntax and semantics as the arguments of
+.BR "ip route show" ,
+but routing tables are not listed but purged.  The only difference is
+the default action:
+.B show
+dumps all the IP main routing table but
+.B flush
+prints the helper page.
+
+.sp
+With the
+.B -statistics
+option, the command becomes verbose. It prints out the number of
+deleted routes and the number of rounds made to flush the routing
+table. If the option is given
+twice,
+.B ip route flush
+also dumps all the deleted routes in the format described in the
+previous subsection.
+
+.SS ip route get - get a single route
+this command gets a single route to a destination and prints its
+contents exactly as the kernel sees it.
+
+.TP
+.BI to " ADDRESS " (default)
+the destination address.
+
+.TP
+.BI from " ADDRESS"
+the source address.
+
+.TP
+.BI tos " TOS"
+.TP
+.BI dsfield " TOS"
+the Type Of Service.
+
+.TP
+.BI iif " NAME"
+the device from which this packet is expected to arrive.
+
+.TP
+.BI oif " NAME"
+force the output device on which this packet will be routed.
+
+.TP
+.B connected
+if no source address 
+.RB "(option " from ")"
+was given, relookup the route with the source set to the preferred
+address received from the first lookup.
+If policy routing is used, it may be a different route.
+
+.P
+Note that this operation is not equivalent to
+.BR "ip route show" .
+.B show
+shows existing routes.
+.B get
+resolves them and creates new clones if necessary.  Essentially,
+.B get
+is equivalent to sending a packet along this path.
+If the
+.B iif
+argument is not given, the kernel creates a route
+to output packets towards the requested destination.
+This is equivalent to pinging the destination
+with a subsequent
+.BR "ip route ls cache" ,
+however, no packets are actually sent.  With the
+.B iif
+argument, the kernel pretends that a packet arrived from this interface
+and searches for a path to forward the packet.
+
+.SH ip rule - routing policy database management
+
+.BR "Rule" s
+in the routing policy database control the route selection algorithm.
+
+.P
+Classic routing algorithms used in the Internet make routing decisions
+based only on the destination address of packets (and in theory,
+but not in practice, on the TOS field).
+
+.P
+In some circumstances we want to route packets differently depending not only
+on destination addresses, but also on other packet fields: source address,
+IP protocol, transport protocol ports or even packet payload.
+This task is called 'policy routing'.
+
+.P
+To solve this task, the conventional destination based routing table, ordered
+according to the longest match rule, is replaced with a 'routing policy
+database' (or RPDB), which selects routes by executing some set of rules.
+
+.P
+Each policy routing rule consists of a
+.B selector
+and an
+.B action predicate.
+The RPDB is scanned in the order of increasing priority. The selector
+of each rule is applied to {source address, destination address, incoming
+interface, tos, fwmark} and, if the selector matches the packet,
+the action is performed.  The action predicate may return with success.
+In this case, it will either give a route or failure indication
+and the RPDB lookup is terminated. Otherwise, the RPDB program
+continues on the next rule.
+
+.P
+Semantically, natural action is to select the nexthop and the output device.
+
+.P
+At startup time the kernel configures the default RPDB consisting of three
+rules:
+
+.TP
+1.
+Priority: 0, Selector: match anything, Action: lookup routing
+table
+.B local
+(ID 255).
+The
+.B local
+table is a special routing table containing
+high priority control routes for local and broadcast addresses.
+.sp
+Rule 0 is special. It cannot be deleted or overridden.
+
+.TP
+2.
+Priority: 32766, Selector: match anything, Action: lookup routing
+table
+.B main
+(ID 254).
+The
+.B main
+table is the normal routing table containing all non-policy
+routes. This rule may be deleted and/or overridden with other
+ones by the administrator.
+
+.TP
+3.
+Priority: 32767, Selector: match anything, Action: lookup routing
+table
+.B default
+(ID 253).
+The
+.B default
+table is empty.  It is reserved for some post-processing if no previous
+default rules selected the packet.
+This rule may also be deleted.
+
+.P
+Each RPDB entry has additional
+attributes.  F.e. each rule has a pointer to some routing
+table.  NAT and masquerading rules have an attribute to select new IP
+address to translate/masquerade.  Besides that, rules have some
+optional attributes, which routes have, namely
+.BR "realms" .
+These values do not override those contained in the routing tables.  They
+are only used if the route did not select any attributes.
+
+.sp
+The RPDB may contain rules of the following types:
+
+.in +8
+.B unicast
+- the rule prescribes to return the route found
+in the routing table referenced by the rule.
+
+.B blackhole
+- the rule prescribes to silently drop the packet.
+
+.B unreachable
+- the rule prescribes to generate a 'Network is unreachable' error.
+
+.B prohibit
+- the rule prescribes to generate 'Communication is administratively
+prohibited' error.
+
+.B nat
+- the rule prescribes to translate the source address
+of the IP packet into some other value.
+.in -8
+
+.SS ip rule add - insert a new rule
+.SS ip rule delete - delete a rule
+
+.TP
+.BI type " TYPE " (default)
+the type of this rule.  The list of valid types was given in the previous
+subsection.
+
+.TP
+.BI from " PREFIX"
+select the source prefix to match.
+
+.TP
+.BI to " PREFIX"
+select the destination prefix to match.
+
+.TP
+.BI iif " NAME"
+select the incoming device to match.  If the interface is loopback,
+the rule only matches packets originating from this host.  This means
+that you may create separate routing tables for forwarded and local
+packets and, hence, completely segregate them.
+
+.TP
+.BI tos " TOS"
+.TP
+.BI dsfield " TOS"
+select the TOS value to match.
+
+.TP
+.BI fwmark " MARK"
+select the
+.B fwmark
+value to match.
+
+.TP
+.BI priority " PREFERENCE"
+the priority of this rule.  Each rule should have an explicitly
+set
+.I unique
+priority value.
+
+.TP
+.BI table " TABLEID"
+the routing table identifier to lookup if the rule selector matches.
+
+.TP
+.BI realms " FROM/TO"
+Realms to select if the rule matched and the routing table lookup
+succeeded.  Realm 
+.I TO
+is only used if the route did not select any realm.
+
+.TP
+.BI nat " ADDRESS"
+The base of the IP address block to translate (for source addresses).
+The 
+.I ADDRESS
+may be either the start of the block of NAT addresses (selected by NAT
+routes) or a local host address (or even zero).
+In the last case the router does not translate the packets, but
+masquerades them to this address.
+
+.B Warning:
+Changes to the RPDB made with these commands do not become active
+immediately.  It is assumed that after a script finishes a batch of
+updates, it flushes the routing cache with
+.BR "ip route flush cache" .
+
+.SS ip rule show - list rules
+This command has no arguments.
+
+.SH ip maddress - multicast addresses management
+
+.B maddress
+objects are multicast addresses.
+
+.SS ip maddress show - list multicast addresses
+
+.TP
+.BI dev " NAME " (default)
+the device name.
+
+.SS ip maddress add - add a multicast address
+.SS ip maddress delete - delete a multicast address
+these commands attach/detach a static link layer multicast address
+to listen on the interface.
+Note that it is impossible to join protocol multicast groups
+statically.  This command only manages link layer addresses.
+
+.TP
+.BI address " LLADDRESS " (default)
+the link layer multicast address.
+
+.TP
+.BI dev " NAME"
+the device to join/leave this multicast address.
+
+.SH ip mroute - multicast routing cache management
+.B mroute
+objects are multicast routing cache entries created by a user level
+mrouting daemon (f.e.
+.B pimd
+or
+.B mrouted
+).
+
+Due to the limitations of the current interface to the multicast routing
+engine, it is impossible to change
+.B mroute
+objects administratively, so we may only display them.  This limitation
+will be removed in the future.
+
+.SS ip mroute show - list mroute cache entries
+
+.TP
+.BI to " PREFIX " (default)
+the prefix selecting the destination multicast addresses to list.
+
+.TP
+.BI iif " NAME"
+the interface on which multicast packets are received.
+
+.TP
+.BI from " PREFIX"
+the prefix selecting the IP source addresses of the multicast route.
+
+.SH ip tunnel - tunnel configuration
+.B tunnel
+objects are tunnels, encapsulating packets in IPv4 packets and then
+sending them over the IP infrastructure.
+
+.SS ip tunnel add - add a new tunnel
+.SS ip tunnel change - change an existing tunnel
+.SS ip tunnel delete - destroy a tunnel
+
+.TP
+.BI name " NAME " (default)
+select the tunnel device name.
+
+.TP
+.BI mode " MODE"
+set the tunnel mode.  Three modes are currently available:
+.BR ipip ", " sit " and " gre "."
+
+.TP
+.BI remote " ADDRESS"
+set the remote endpoint of the tunnel.
+
+.TP
+.BI local " ADDRESS"
+set the fixed local address for tunneled packets.
+It must be an address on another interface of this host.
+
+.TP
+.BI ttl " N"
+set a fixed TTL 
+.I N
+on tunneled packets.
+.I N
+is a number in the range 1--255. 0 is a special value
+meaning that packets inherit the TTL value. 
+The default value is:
+.BR "inherit" .
+
+.TP
+.BI tos " T"
+.TP
+.BI dsfield " T"
+set a fixed TOS
+.I T
+on tunneled packets.
+The default value is:
+.BR "inherit" .
+
+.TP
+.BI dev " NAME" 
+bind the tunnel to the device
+.I NAME
+so that tunneled packets will only be routed via this device and will
+not be able to escape to another device when the route to endpoint
+changes.
+
+.TP
+.B nopmtudisc
+disable Path MTU Discovery on this tunnel.
+It is enabled by default.  Note that a fixed ttl is incompatible
+with this option: tunnelling with a fixed ttl always makes pmtu
+discovery.
+
+.TP
+.BI key " K"
+.TP
+.BI ikey " K"
+.TP
+.BI okey " K"
+.RB ( " only GRE tunnels " )
+use keyed GRE with key
+.IR K ". " K
+is either a number or an IP address-like dotted quad.
+The
+.B key
+parameter sets the key to use in both directions.
+The
+.BR ikey " and " okey
+parameters set different keys for input and output.
+   
+.TP
+.BR csum ", " icsum ", " ocsum
+.RB ( " only GRE tunnels " )
+generate/require checksums for tunneled packets.
+The 
+.B ocsum
+flag calculates checksums for outgoing packets.
+The
+.B icsum
+flag requires that all input packets have the correct
+checksum.  The
+.B csum
+flag is equivalent to the combination
+.BR "icsum ocsum" .
+
+.TP
+.BR seq ", " iseq ", " oseq
+.RB ( " only GRE tunnels " )
+serialize packets.
+The
+.B oseq
+flag enables sequencing of outgoing packets.
+The
+.B iseq
+flag requires that all input packets are serialized.
+The
+.B  seq
+flag is equivalent to the combination 
+.BR "iseq oseq" .
+.B It isn't work. Don't use it.
+
+.SS ip tunnel show - list tunnels
+This command has no arguments.
+
+.SH ip monitor and rtmon - state monitoring
+
+The
+.B ip
+utility can monitor the state of devices, addresses
+and routes continuously.  This option has a slightly different format.
+Namely, the
+.B monitor
+command is the first in the command line and then the object list follows:
+
+.BR "ip monitor" " [ " all " |"
+.IR LISTofOBJECTS " ]"
+
+.I OBJECT-LIST
+is the list of object types that we want to monitor.
+It may contain
+.BR link ", " address " and " route "."
+If no
+.B file
+argument is given,
+.B ip
+opens RTNETLINK, listens on it and dumps state changes in the format
+described in previous sections.
+
+.P
+If a file name is given, it does not listen on RTNETLINK,
+but opens the file containing RTNETLINK messages saved in binary format
+and dumps them.  Such a history file can be generated with the
+.B rtmon
+utility.  This utility has a command line syntax similar to
+.BR "ip monitor" .
+Ideally,
+.B rtmon
+should be started before the first network configuration command
+is issued. F.e. if you insert:
+.sp
+.in +8
+rtmon file /var/log/rtmon.log
+.in -8
+.sp
+in a startup script, you will be able to view the full history
+later.
+
+.P
+Certainly, it is possible to start
+.B rtmon
+at any time.
+It prepends the history with the state snapshot dumped at the moment
+of starting.
+
+.SH HISTORY
+
+.B ip
+was written by Alexey N. Kuznetsov and added in Linux 2.2.
+.SH SEE ALSO
+.BR tc (8)
+.br
+.RB "IP Command reference " ip-cref.ps
+.br
+.RB "IP tunnels " ip-cref.ps
+
+.SH AUTHOR
+
+Manpage maintained by Michail Litvak <mci@owl.openwall.com>
diff --git a/tc-cbq-details.8 b/tc-cbq-details.8

new file mode 100644 (file)

index 0000000..e47da62
--- /dev/null
+++ b/tc-cbq-details.8
@@ -0,0 +1,425 @@
+.TH CBQ 8 "8 December 2001" "iproute2" "Linux"
+.SH NAME
+CBQ \- Class Based Queueing
+.SH SYNOPSIS
+.B tc qdisc ... dev
+dev
+.B  ( parent
+classid 
+.B | root) [ handle 
+major: 
+.B ] cbq avpkt
+bytes
+.B bandwidth
+rate
+.B [ cell 
+bytes
+.B ] [ ewma
+log
+.B ] [ mpu
+bytes
+.B ] 
+
+.B tc class ... dev
+dev
+.B parent 
+major:[minor]
+.B [ classid 
+major:minor
+.B ] cbq allot
+bytes
+.B [ bandwidth 
+rate 
+.B ] [ rate 
+rate
+.B ] prio
+priority
+.B [ weight
+weight
+.B ] [ minburst 
+packets
+.B ] [ maxburst 
+packets 
+.B ] [ ewma 
+log
+.B ] [ cell
+bytes
+.B ] avpkt
+bytes
+.B [ mpu
+bytes 
+.B ] [ bounded isolated ] [ split
+handle
+.B & defmap
+defmap
+.B ] [ estimator 
+interval timeconstant
+.B ]
+
+.SH DESCRIPTION
+Class Based Queueing is a classful qdisc that implements a rich
+linksharing hierarchy of classes.  It contains shaping elements as
+well as prioritizing capabilities.  Shaping is performed using link
+idle time calculations based on the timing of dequeue events and 
+underlying link bandwidth.
+
+.SH SHAPING ALGORITHM
+Shaping is done using link idle time calculations, and actions taken if
+these calculations deviate from set limits.
+
+When shaping a 10mbit/s connection to 1mbit/s, the link will
+be idle 90% of the time. If it isn't, it needs to be throttled so that it
+IS idle 90% of the time.
+
+From the kernel's perspective, this is hard to measure, so CBQ instead 
+derives the idle time from the number of microseconds (in fact, jiffies) 
+that elapse between  requests from the device driver for more data. Combined 
+with the  knowledge of packet sizes, this is used to approximate how full or 
+empty the link is.
+
+This is rather circumspect and doesn't always arrive at proper
+results. For example, what is the actual link speed of an interface
+that is not really able to transmit the full 100mbit/s of data,
+perhaps because of a badly implemented driver? A PCMCIA network card
+will also never achieve 100mbit/s because of the way the bus is
+designed - again, how do we calculate the idle time?
+
+The physical link bandwidth may be ill defined in case of not-quite-real 
+network devices like PPP over Ethernet or PPTP over TCP/IP. The effective 
+bandwidth in that case is probably determined by the efficiency of pipes 
+to userspace - which not defined.
+
+During operations, the effective idletime is measured using an
+exponential weighted moving average (EWMA), which considers recent
+packets to be exponentially more important than past ones. The Unix
+loadaverage is calculated in the same way.
+
+The calculated idle time is subtracted from the EWMA measured one,
+the resulting number is called 'avgidle'. A perfectly loaded link has
+an avgidle of zero: packets arrive exactly at the calculated
+interval.
+
+An overloaded link has a negative avgidle and if it gets too negative,
+CBQ throttles and is then 'overlimit'.
+
+Conversely, an idle link might amass a huge avgidle, which would then
+allow infinite bandwidths after a few hours of silence. To prevent
+this, avgidle is capped at 
+.B maxidle.
+
+If overlimit, in theory, the CBQ could throttle itself for exactly the
+amount of time that was calculated to pass between packets, and then
+pass one packet, and throttle again. Due to timer resolution constraints,
+this may not be feasible, see the 
+.B minburst
+parameter below.
+
+.SH CLASSIFICATION
+Within the one CBQ instance many classes may exist. Each of these classes
+contains another qdisc, by default 
+.BR tc-pfifo (8).
+
+When enqueueing a packet, CBQ starts at the root and uses various methods to 
+determine which class should receive the data. If a verdict is reached, this
+process is repeated for the recipient class which might have further
+means of classifying traffic to its children, if any.
+
+CBQ has the following methods available to classify a packet to any child 
+classes.
+.TP
+(i)
+.B skb->priority class encoding.
+Can be set from userspace by an application with the 
+.B SO_PRIORITY
+setsockopt.
+The 
+.B skb->priority class encoding
+only applies if the skb->priority holds a major:minor handle of an existing 
+class within  this qdisc.
+.TP
+(ii)
+tc filters attached to the class.
+.TP
+(iii)
+The defmap of a class, as set with the 
+.B split & defmap
+parameters. The defmap may contain instructions for each possible Linux packet
+priority.
+
+.P
+Each class also has a 
+.B level.
+Leaf nodes, attached to the bottom of the class hierarchy, have a level of 0.
+.SH CLASSIFICATION ALGORITHM
+
+Classification is a loop, which terminates when a leaf class is found. At any 
+point the loop may jump to the fallback algorithm.
+
+The loop consists of the following steps:
+.TP 
+(i)
+If the packet is generated locally and has a valid classid encoded within its
+.B skb->priority,
+choose it and terminate.
+
+.TP
+(ii)
+Consult the tc filters, if any, attached to this child. If these return
+a class which is not a leaf class, restart loop from the class returned.
+If it is a leaf, choose it and terminate.
+.TP
+(iii)
+If the tc filters did not return a class, but did return a classid, 
+try to find a class with that id within this qdisc. 
+Check if the found class is of a lower
+.B level
+than the current class. If so, and the returned class is not a leaf node,
+restart the loop at the found class. If it is a leaf node, terminate.
+If we found an upward reference to a higher level, enter the fallback 
+algorithm.
+.TP
+(iv)
+If the tc filters did not return a class, nor a valid reference to one,
+consider the minor number of the reference to be the priority. Retrieve
+a class from the defmap of this class for the priority. If this did not
+contain a class, consult the defmap of this class for the 
+.B BEST_EFFORT
+class. If this is an upward reference, or no 
+.B BEST_EFFORT 
+class was defined,
+enter the fallback algorithm. If a valid class was found, and it is not a
+leaf node, restart the loop at this class. If it is a leaf, choose it and 
+terminate. If
+neither the priority distilled from the classid, nor the 
+.B BEST_EFFORT 
+priority yielded a class, enter the fallback algorithm.
+.P
+The fallback algorithm resides outside of the loop and is as follows.
+.TP
+(i)
+Consult the defmap of the class at which the jump to fallback occured. If 
+the defmap contains a class for the 
+.B
+priority
+of the class (which is related to the TOS field), choose this class and 
+terminate. 
+.TP
+(ii)
+Consult the map for a class for the
+.B BEST_EFFORT
+priority. If found, choose it, and terminate.
+.TP
+(iii)
+Choose the class at which break out to the fallback algorithm occured. Terminate.
+.P
+The packet is enqueued to the class which was chosen when either algorithm 
+terminated. It is therefore possible for a packet to be enqueued *not* at a
+leaf node, but in the middle of the hierarchy.
+
+.SH LINK SHARING ALGORITHM
+When dequeuing for sending to the network device, CBQ decides which of its 
+classes will be allowed to send. It does so with a Weighted Round Robin process
+in which each class with packets gets a chance to send in turn. The WRR process
+starts by asking the highest priority classes (lowest numerically - 
+highest semantically) for packets, and will continue to do so until they
+have no more data to offer, in which case the process repeats for lower 
+priorities.
+
+.B CERTAINTY ENDS HERE, ANK PLEASE HELP
+
+Each class is not allowed to send at length though - they can only dequeue a
+configurable amount of data during each round. 
+
+If a class is about to go overlimit, and it is not
+.B bounded
+it will try to borrow avgidle from siblings that are not
+.B isolated. 
+This process is repeated from the bottom upwards. If a class is unable
+to borrow enough avgidle to send a packet, it is throttled and not asked
+for a packet for enough time for the avgidle to increase above zero.
+
+.B I REALLY NEED HELP FIGURING THIS OUT. REST OF DOCUMENT IS PRETTY CERTAIN
+.B AGAIN.
+
+.SH QDISC
+The root qdisc of a CBQ class tree has the following parameters:
+
+.TP 
+parent major:minor | root
+This mandatory parameter determines the place of the CBQ instance, either at the
+.B root
+of an interface or within an existing class.
+.TP
+handle major:
+Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
+of a major number, followed by a colon. Optional.
+.TP
+avpkt bytes
+For calculations, the average packet size must be known. It is silently capped
+at a minimum of 2/3 of the interface MTU. Mandatory.
+.TP
+bandwidth rate
+To determine the idle time, CBQ must know the bandwidth of your underlying 
+physical interface, or parent qdisc. This is a vital parameter, more about it
+later. Mandatory.
+.TP
+cell
+The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
+.TP 
+mpu
+A zero sized packet may still take time to transmit. This value is the lower
+cap for packet transmission time calculations - packets smaller than this value
+are still deemed to have this size. Defaults to zero.
+.TP
+ewma log
+When CBQ needs to measure the average idle time, it does so using an 
+Exponentially Weighted Moving Average which smoothes out measurements into
+a moving average. The EWMA LOG determines how much smoothing occurs. Defaults 
+to 5. Lower values imply greater sensitivity. Must be between 0 and 31.
+.P
+A CBQ qdisc does not shape out of its own accord. It only needs to know certain
+parameters about the underlying link. Actual shaping is done in classes.
+
+.SH CLASSES
+Classes have a host of parameters to configure their operation.
+
+.TP 
+parent major:minor
+Place of this class within the hierarchy. If attached directly to a qdisc 
+and not to another class, minor can be omitted. Mandatory.
+.TP 
+classid major:minor
+Like qdiscs, classes can be named. The major number must be equal to the
+major number of the qdisc to which it belongs. Optional, but needed if this 
+class is going to have children.
+.TP 
+weight weight
+When dequeuing to the interface, classes are tried for traffic in a 
+round-robin fashion. Classes with a higher configured qdisc will generally
+have more traffic to offer during each round, so it makes sense to allow
+it to dequeue more traffic. All weights under a class are normalized, so
+only the ratios matter. Defaults to the configured rate, unless the priority 
+of this class is maximal, in which case it is set to 1.
+.TP 
+allot bytes
+Allot specifies how many bytes a qdisc can dequeue
+during each round of the process. This parameter is weighted using the 
+renormalized class weight described above.
+
+.TP 
+priority priority
+In the round-robin process, classes with the lowest priority field are tried 
+for packets first. Mandatory.
+
+.TP 
+rate rate
+Maximum rate this class and all its children combined can send at. Mandatory.
+
+.TP
+bandwidth rate
+This is different from the bandwidth specified when creating a CBQ disc. Only
+used to determine maxidle and offtime, which are only calculated when
+specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
+
+.TP 
+maxburst
+This number of packets is used to calculate maxidle so that when
+avgidle is at maxidle, this number of average packets can be burst
+before avgidle drops to 0. Set it higher to be more tolerant of
+bursts. You can't set maxidle directly, only via this parameter.
+
+.TP
+minburst 
+As mentioned before, CBQ needs to throttle in case of
+overlimit. The ideal solution is to do so for exactly the calculated
+idle time, and pass 1 packet. However, Unix kernels generally have a
+hard time scheduling events shorter than 10ms, so it is better to
+throttle for a longer period, and then pass minburst packets in one
+go, and then sleep minburst times longer.
+
+The time to wait is called the offtime. Higher values of minburst lead
+to more accurate shaping in the long term, but to bigger bursts at
+millisecond timescales.
+
+.TP
+minidle
+If avgidle is below 0, we are overlimits and need to wait until
+avgidle will be big enough to send one packet. To prevent a sudden
+burst from shutting down the link for a prolonged period of time,
+avgidle is reset to minidle if it gets too low.
+
+Minidle is specified in negative microseconds, so 10 means that
+avgidle is capped at -10us.
+
+.TP
+bounded 
+Signifies that this class will not borrow bandwidth from its siblings.
+.TP 
+isolated
+Means that this class will not borrow bandwidth to its siblings
+
+.TP 
+split major:minor & defmap bitmap[/bitmap]
+If consulting filters attached to a class did not give a verdict, 
+CBQ can also classify based on the packet's priority. There are 16
+priorities available, numbered from 0 to 15. 
+
+The defmap specifies which priorities this class wants to receive, 
+specified as a bitmap. The Least Significant Bit corresponds to priority 
+zero. The 
+.B split
+parameter tells CBQ at which class the decision must be made, which should
+be a (grand)parent of the class you are adding.
+
+As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
+configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
+
+The complimentary configuration would then 
+be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
+Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
+.TP
+estimator interval timeconstant
+CBQ can measure how much bandwidth each class is using, which tc filters
+can use to classify packets with. In order to determine the bandwidth
+it uses a very simple estimator that measures once every
+.B interval
+microseconds how much traffic has passed. This again is a EWMA, for which
+the time constant can be specified, also in microseconds. The 
+.B time constant
+corresponds to the sluggishness of the measurement or, conversely, to the 
+sensitivity of the average to short bursts. Higher values mean less
+sensitivity. 
+
+
+
+.SH SOURCES
+.TP
+o
+Sally Floyd and Van Jacobson, "Link-sharing and Resource
+Management Models for Packet Networks",
+IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
+
+.TP 
+o
+Sally Floyd, "Notes on CBQ and Guarantee Service", 1995
+
+.TP
+o
+Sally Floyd, "Notes on Class-Based Queueing: Setting
+Parameters", 1996
+
+.TP 
+o
+Sally Floyd and Michael Speer, "Experimental Results
+for Class-Based Queueing", 1998, not published.
+
+
+
+.SH SEE ALSO
+.BR tc (8)
+
+.SH AUTHOR
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
+bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-cbq.8 b/tc-cbq.8

new file mode 100644 (file)

index 0000000..79fb93b
--- /dev/null
+++ b/tc-cbq.8
@@ -0,0 +1,353 @@
+.TH CBQ 8 "16 December 2001" "iproute2" "Linux"
+.SH NAME
+CBQ \- Class Based Queueing
+.SH SYNOPSIS
+.B tc qdisc ... dev
+dev
+.B  ( parent
+classid 
+.B | root) [ handle 
+major: 
+.B ] cbq [ allot 
+bytes
+.B ] avpkt
+bytes
+.B bandwidth
+rate
+.B [ cell 
+bytes
+.B ] [ ewma
+log
+.B ] [ mpu
+bytes
+.B ] 
+
+.B tc class ... dev
+dev
+.B parent 
+major:[minor]
+.B [ classid 
+major:minor
+.B ] cbq allot
+bytes
+.B [ bandwidth 
+rate 
+.B ] [ rate 
+rate
+.B ] prio
+priority
+.B [ weight
+weight
+.B ] [ minburst 
+packets
+.B ] [ maxburst 
+packets 
+.B ] [ ewma 
+log
+.B ] [ cell
+bytes
+.B ] avpkt
+bytes
+.B [ mpu
+bytes 
+.B ] [ bounded isolated ] [ split
+handle
+.B & defmap
+defmap
+.B ] [ estimator 
+interval timeconstant
+.B ]
+
+.SH DESCRIPTION
+Class Based Queueing is a classful qdisc that implements a rich
+linksharing hierarchy of classes.  It contains shaping elements as
+well as prioritizing capabilities.  Shaping is performed using link
+idle time calculations based on the timing of dequeue events and 
+underlying link bandwidth.
+
+.SH SHAPING ALGORITHM
+When shaping a 10mbit/s connection to 1mbit/s, the link will
+be idle 90% of the time. If it isn't, it needs to be throttled so that it
+IS idle 90% of the time.
+
+During operations, the effective idletime is measured using an
+exponential weighted moving average (EWMA), which considers recent
+packets to be exponentially more important than past ones. The Unix
+loadaverage is calculated in the same way.
+
+The calculated idle time is subtracted from the EWMA measured one,
+the resulting number is called 'avgidle'. A perfectly loaded link has
+an avgidle of zero: packets arrive exactly at the calculated
+interval.
+
+An overloaded link has a negative avgidle and if it gets too negative,
+CBQ throttles and is then 'overlimit'.
+
+Conversely, an idle link might amass a huge avgidle, which would then
+allow infinite bandwidths after a few hours of silence. To prevent
+this, avgidle is capped at 
+.B maxidle.
+
+If overlimit, in theory, the CBQ could throttle itself for exactly the
+amount of time that was calculated to pass between packets, and then
+pass one packet, and throttle again. Due to timer resolution constraints,
+this may not be feasible, see the 
+.B minburst
+parameter below.
+
+.SH CLASSIFICATION
+Within the one CBQ instance many classes may exist. Each of these classes
+contains another qdisc, by default 
+.BR tc-pfifo (8).
+
+When enqueueing a packet, CBQ starts at the root and uses various methods to 
+determine which class should receive the data. 
+
+In the absence of uncommon configuration options, the process is rather easy. 
+At each node we look for an instruction, and then go to the class the 
+instruction refers us to. If the class found is a barren leaf-node (without 
+children), we enqueue the packet there. If it is not yet a leaf node, we do 
+the whole thing over again starting from that node. 
+
+The following actions are performed, in order at each node we visit, until one 
+sends us to another node, or terminates the process.
+.TP
+(i)
+Consult filters attached to the class. If sent to a leafnode, we are done. 
+Otherwise, restart.
+.TP
+(ii)
+Consult the defmap for the priority assigned to this packet, which depends 
+on the TOS bits. Check if the referral is leafless, otherwise restart.
+.TP
+(iii)
+Ask the defmap for instructions for the 'best effort' priority. Check the 
+answer for leafness, otherwise restart.
+.TP
+(iv)
+If none of the above returned with an instruction, enqueue at this node.
+.P
+This algorithm makes sure that a packet always ends up somewhere, even while
+you are busy building your configuration. 
+
+For more details, see
+.BR tc-cbq-details(8).
+
+.SH LINK SHARING ALGORITHM
+When dequeuing for sending to the network device, CBQ decides which of its 
+classes will be allowed to send. It does so with a Weighted Round Robin process
+in which each class with packets gets a chance to send in turn. The WRR process
+starts by asking the highest priority classes (lowest numerically - 
+highest semantically) for packets, and will continue to do so until they
+have no more data to offer, in which case the process repeats for lower 
+priorities.
+
+Classes by default borrow bandwidth from their siblings. A class can be 
+prevented from doing so by declaring it 'bounded'. A class can also indicate 
+its unwillingness to lend out bandwidth by being 'isolated'.
+
+.SH QDISC
+The root of a CBQ qdisc class tree has the following parameters:
+
+.TP 
+parent major:minor | root
+This mandatory parameter determines the place of the CBQ instance, either at the
+.B root
+of an interface or within an existing class.
+.TP
+handle major:
+Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
+of a major number, followed by a colon. Optional, but very useful if classes
+will be generated within this qdisc.
+.TP 
+allot bytes
+This allotment is the 'chunkiness' of link sharing and is used for determining packet
+transmission time tables. The qdisc allot differs slightly from the class allot discussed
+below. Optional. Defaults to a reasonable value, related to avpkt.
+.TP
+avpkt bytes
+The average size of a packet is needed for calculating maxidle, and is also used
+for making sure 'allot' has a safe value. Mandatory.
+.TP
+bandwidth rate
+To determine the idle time, CBQ must know the bandwidth of your underlying 
+physical interface, or parent qdisc. This is a vital parameter, more about it
+later. Mandatory.
+.TP
+cell
+The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
+.TP 
+mpu
+A zero sized packet may still take time to transmit. This value is the lower
+cap for packet transmission time calculations - packets smaller than this value
+are still deemed to have this size. Defaults to zero.
+.TP
+ewma log
+When CBQ needs to measure the average idle time, it does so using an 
+Exponentially Weighted Moving Average which smoothes out measurements into
+a moving average. The EWMA LOG determines how much smoothing occurs. Lower 
+values imply greater sensitivity. Must be between 0 and 31. Defaults 
+to 5.
+.P
+A CBQ qdisc does not shape out of its own accord. It only needs to know certain
+parameters about the underlying link. Actual shaping is done in classes.
+
+.SH CLASSES
+Classes have a host of parameters to configure their operation.
+
+.TP 
+parent major:minor
+Place of this class within the hierarchy. If attached directly to a qdisc 
+and not to another class, minor can be omitted. Mandatory.
+.TP 
+classid major:minor
+Like qdiscs, classes can be named. The major number must be equal to the
+major number of the qdisc to which it belongs. Optional, but needed if this 
+class is going to have children.
+.TP 
+weight weight
+When dequeuing to the interface, classes are tried for traffic in a 
+round-robin fashion. Classes with a higher configured qdisc will generally
+have more traffic to offer during each round, so it makes sense to allow
+it to dequeue more traffic. All weights under a class are normalized, so
+only the ratios matter. Defaults to the configured rate, unless the priority 
+of this class is maximal, in which case it is set to 1.
+.TP 
+allot bytes
+Allot specifies how many bytes a qdisc can dequeue
+during each round of the process. This parameter is weighted using the 
+renormalized class weight described above. Silently capped at a minimum of
+3/2 avpkt. Mandatory.
+
+.TP 
+prio priority
+In the round-robin process, classes with the lowest priority field are tried 
+for packets first. Mandatory.
+
+.TP 
+avpkt
+See the QDISC section.
+
+.TP 
+rate rate
+Maximum rate this class and all its children combined can send at. Mandatory.
+
+.TP
+bandwidth rate
+This is different from the bandwidth specified when creating a CBQ disc! Only
+used to determine maxidle and offtime, which are only calculated when
+specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
+
+.TP 
+maxburst
+This number of packets is used to calculate maxidle so that when
+avgidle is at maxidle, this number of average packets can be burst
+before avgidle drops to 0. Set it higher to be more tolerant of
+bursts. You can't set maxidle directly, only via this parameter.
+
+.TP
+minburst 
+As mentioned before, CBQ needs to throttle in case of
+overlimit. The ideal solution is to do so for exactly the calculated
+idle time, and pass 1 packet. However, Unix kernels generally have a
+hard time scheduling events shorter than 10ms, so it is better to
+throttle for a longer period, and then pass minburst packets in one
+go, and then sleep minburst times longer.
+
+The time to wait is called the offtime. Higher values of minburst lead
+to more accurate shaping in the long term, but to bigger bursts at
+millisecond timescales. Optional.
+
+.TP
+minidle
+If avgidle is below 0, we are overlimits and need to wait until
+avgidle will be big enough to send one packet. To prevent a sudden
+burst from shutting down the link for a prolonged period of time,
+avgidle is reset to minidle if it gets too low.
+
+Minidle is specified in negative microseconds, so 10 means that
+avgidle is capped at -10us. Optional.
+
+.TP
+bounded 
+Signifies that this class will not borrow bandwidth from its siblings.
+.TP 
+isolated
+Means that this class will not borrow bandwidth to its siblings
+
+.TP 
+split major:minor & defmap bitmap[/bitmap]
+If consulting filters attached to a class did not give a verdict, 
+CBQ can also classify based on the packet's priority. There are 16
+priorities available, numbered from 0 to 15. 
+
+The defmap specifies which priorities this class wants to receive, 
+specified as a bitmap. The Least Significant Bit corresponds to priority 
+zero. The 
+.B split
+parameter tells CBQ at which class the decision must be made, which should
+be a (grand)parent of the class you are adding.
+
+As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
+configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
+
+The complimentary configuration would then 
+be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
+Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
+.TP
+estimator interval timeconstant
+CBQ can measure how much bandwidth each class is using, which tc filters
+can use to classify packets with. In order to determine the bandwidth
+it uses a very simple estimator that measures once every
+.B interval
+microseconds how much traffic has passed. This again is a EWMA, for which
+the time constant can be specified, also in microseconds. The 
+.B time constant
+corresponds to the sluggishness of the measurement or, conversely, to the 
+sensitivity of the average to short bursts. Higher values mean less
+sensitivity. 
+
+.SH BUGS
+The actual bandwidth of the underlying link may not be known, for example 
+in the case of PPoE or PPTP connections which in fact may send over a 
+pipe, instead of over a physical device. CBQ is quite resilient to major
+errors in the configured bandwidth, probably a the cost of coarser shaping.
+
+Default kernels rely on coarse timing information for making decisions. These 
+may make shaping precise in the long term, but inaccurate on second long scales.
+
+See 
+.BR tc-cbq-details(8)
+for hints on how to improve this.
+
+.SH SOURCES
+.TP
+o
+Sally Floyd and Van Jacobson, "Link-sharing and Resource
+Management Models for Packet Networks",
+IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
+
+.TP 
+o
+Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
+
+.TP
+o
+Sally Floyd, "Notes on Class-Based Queueing: Setting
+Parameters", 1996
+
+.TP 
+o
+Sally Floyd and Michael Speer, "Experimental Results
+for Class-Based Queueing", 1998, not published.
+
+
+
+.SH SEE ALSO
+.BR tc (8)
+
+.SH AUTHOR
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
+bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-htb.8 b/tc-htb.8

new file mode 100644 (file)

index 0000000..f61b818
--- /dev/null
+++ b/tc-htb.8
@@ -0,0 +1,150 @@
+.TH HTB 8 "10 January 2002" "iproute2" "Linux"
+.SH NAME
+HTB \- Hierarchy Token Bucket
+.SH SYNOPSIS
+.B tc qdisc ... dev
+dev
+.B  ( parent
+classid 
+.B | root) [ handle 
+major: 
+.B ] htb [ default 
+minor-id
+.B ] 
+
+.B tc class ... dev
+dev
+.B parent 
+major:[minor]
+.B [ classid 
+major:minor
+.B ] htb rate
+rate
+.B [ ceil
+rate 
+.B ] burst 
+bytes
+.B [ cburst
+bytes
+.B ] [ prio
+priority
+.B ] 
+
+.SH DESCRIPTION
+HTB is meant as a more understandable and intuitive replacement for
+the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use
+of the outbound bandwidth on a given link. Both allow you to use one
+physical link to simulate several slower links and to send different
+kinds of traffic on different simulated links. In both cases, you have
+to specify how to divide the physical link into simulated links and
+how to decide which simulated link to use for a given packet to be sent. 
+
+Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm 
+which does not depend on interface characteristics and so does not need to
+know the underlying bandwidth of the outgoing interface.
+
+.SH SHAPING ALGORITHM
+Shaping works as documented in
+.B tc-tbf (8).
+
+.SH CLASSIFICATION
+Within the one HRB instance many classes may exist. Each of these classes
+contains another qdisc, by default 
+.BR tc-pfifo (8).
+
+When enqueueing a packet, HTB starts at the root and uses various methods to 
+determine which class should receive the data. 
+
+In the absence of uncommon configuration options, the process is rather easy. 
+At each node we look for an instruction, and then go to the class the 
+instruction refers us to. If the class found is a barren leaf-node (without 
+children), we enqueue the packet there. If it is not yet a leaf node, we do 
+the whole thing over again starting from that node. 
+
+The following actions are performed, in order at each node we visit, until one 
+sends us to another node, or terminates the process.
+.TP
+(i)
+Consult filters attached to the class. If sent to a leafnode, we are done. 
+Otherwise, restart.
+.TP
+(ii)
+If none of the above returned with an instruction, enqueue at this node.
+.P
+This algorithm makes sure that a packet always ends up somewhere, even while
+you are busy building your configuration. 
+
+.SH LINK SHARING ALGORITHM
+FIXME
+
+.SH QDISC
+The root of a HTB qdisc class tree has the following parameters:
+
+.TP 
+parent major:minor | root
+This mandatory parameter determines the place of the HTB instance, either at the
+.B root
+of an interface or within an existing class.
+.TP
+handle major:
+Like all other qdiscs, the HTB can be assigned a handle. Should consist only
+of a major number, followed by a colon. Optional, but very useful if classes
+will be generated within this qdisc.
+.TP 
+default minor-id
+Unclassified traffic gets sent to the class with this minor-id.
+
+.SH CLASSES
+Classes have a host of parameters to configure their operation.
+
+.TP 
+parent major:minor
+Place of this class within the hierarchy. If attached directly to a qdisc 
+and not to another class, minor can be omitted. Mandatory.
+.TP 
+classid major:minor
+Like qdiscs, classes can be named. The major number must be equal to the
+major number of the qdisc to which it belongs. Optional, but needed if this 
+class is going to have children.
+.TP 
+prio priority
+In the round-robin process, classes with the lowest priority field are tried 
+for packets first. Mandatory.
+
+.TP 
+rate rate
+Maximum rate this class and all its children are guaranteed. Mandatory.
+
+.TP
+ceil rate
+Maximum rate at which a class can send, if its parent has bandwidth to spare. 
+Defaults to the configured rate, which implies no borrowing
+
+.TP 
+burst bytes
+Amount of bytes that can be burst at 
+.B ceil
+speed, in excess of the configured
+.B rate. 
+Should be at least as high as the highest burst of all children.
+
+.TP 
+cburst bytes
+Amount of bytes that can be burst at 'infinite' speed, in other words, as fast
+as the interface can transmit them. For perfect evening out, should be equal to at most one average
+packet. Should be at least as high as the highest cburst of all children.
+
+.SH NOTES
+Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel, 
+there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick.
+From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte 
+burst as 100*12kb*8 equals 10mbit.
+
+.SH SEE ALSO
+.BR tc (8)
+.P
+HTB website: http://luxik.cdi.cz/~devik/qos/htb/
+.SH AUTHOR
+Martin Devera <devik@cdi.cz>. This manpage maintained by bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-pbfifo.8 b/tc-pbfifo.8

new file mode 100644 (file)

index 0000000..8dda4bb
--- /dev/null
+++ b/tc-pbfifo.8
@@ -0,0 +1,72 @@
+.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux"
+.SH NAME
+pfifo \- Packet limited First In, First Out queue
+.P
+bfifo \- Byte limited First In, First Out queue
+
+.SH SYNOPSIS
+.B tc qdisc ... add pfifo
+.B [ limit 
+packets
+.B ]
+.P
+.B tc qdisc ... add bfifo
+.B [ limit 
+bytes
+.B ]
+
+.SH DESCRIPTION
+The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the
+simplest queues possible and therefore have no overhead. 
+.B pfifo
+constrains the queue size as measured in packets. 
+.B bfifo
+does so as measured in bytes.
+
+Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer 
+pfifo or bfifo over the default.
+
+.SH ALGORITHM
+A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of
+a list. When a packet needs to be sent out to the network, it is taken from the head of the list. 
+
+If the list is too long, no further packets are allowed on. This is called 'tail drop'.
+
+.SH PARAMETERS
+.TP 
+limit
+Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults 
+to the interface txqueuelen, as specified with 
+.BR ifconfig (8)
+or
+.BR ip (8).
+
+For bfifo, it defaults to the txqueuelen multiplied by the interface MTU.
+
+.SH OUTPUT
+The output of 
+.B tc -s qdisc ls
+contains the limit, either in packets or in bytes, and the number of bytes 
+and packets actually sent. An unsent and dropped packet only appears between braces 
+and is not counted as 'Sent'.
+
+In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets. 
+No packets were dropped, and as the pfifo queue does not slow down packets, there were also no
+overlimits:
+.P
+.nf
+# tc -s qdisc ls dev eth0 
+qdisc pfifo 8001: dev eth0 limit 100p
+ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0) 
+.fi
+
+If a backlog occurs, this is displayed as well.
+.SH SEE ALSO
+.BR tc (8)
+
+.SH AUTHORS
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
+
+This manpage maintained by bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-pfifo_fast.8 b/tc-pfifo_fast.8

new file mode 100644 (file)

index 0000000..43ab166
--- /dev/null
+++ b/tc-pfifo_fast.8
@@ -0,0 +1,59 @@
+.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux"
+.SH NAME
+pfifo_fast \- three-band first in, first out queue
+
+.SH DESCRIPTION
+pfifo_fast is the default qdisc of each interface.
+
+Whenever an interface is created, the pfifo_fast qdisc is automatically used
+as a queue. If another qdisc is attached, it preempts the default
+pfifo_fast, which automatically returns to function when an existing qdisc
+is detached.
+
+In this sense this qdisc is magic, and unlike other qdiscs.
+
+.SH ALGORITHM
+The algorithm is very similar to that of the classful 
+.BR tc-prio (8)
+qdisc. 
+.B pfifo_fast
+is like three
+.BR tc-pfifo (8)
+queues side by side, where packets can be enqueued in any of the three bands
+based on their Type of Service bits or assigned priority. 
+
+Not all three bands are dequeued simultaneously - as long as lower bands
+have traffic, higher bands are never dequeued. This can be used to
+prioritize interactive traffic or penalize 'lowest cost' traffic.
+
+Each band can be txqueuelen packets long, as configured with
+.BR ifconfig (8)
+or 
+.BR ip (8).
+Additional packets coming in are not enqueued but are instead dropped.
+
+See
+.BR tc-prio (8)
+for complete details on how TOS bits are translated into bands.
+.SH PARAMETERS
+.TP 
+txqueuelen
+The length of the three bands depends on the interface txqueuelen, as
+specified with
+.BR ifconfig (8)
+or
+.BR ip (8).
+
+.SH BUGS
+Does not maintain statistics and does not show up in tc qdisc ls. This is because
+it is the automatic default in the absence of a configured qdisc. 
+
+.SH SEE ALSO
+.BR tc (8)
+
+.SH AUTHORS
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
+
+This manpage maintained by bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-prio.8 b/tc-prio.8

new file mode 100644 (file)

index 0000000..e942e62
--- /dev/null
+++ b/tc-prio.8
@@ -0,0 +1,187 @@
+.TH PRIO 8 "16 December 2001" "iproute2" "Linux"
+.SH NAME
+PRIO \- Priority qdisc
+.SH SYNOPSIS
+.B tc qdisc ... dev
+dev
+.B  ( parent
+classid 
+.B | root) [ handle 
+major: 
+.B ] prio [ bands 
+bands
+.B ] [ priomap
+band,band,band... 
+.B ] [ estimator 
+interval timeconstant
+.B ]
+
+.SH DESCRIPTION
+The PRIO qdisc is a simple classful queueing discipline that contains
+an arbitrary number of classes of differing priority. The classes are
+dequeued in numerical descending order of priority. PRIO is a scheduler 
+and never delays packets - it is a work-conserving qdisc, though the qdiscs
+contained in the classes may not be.
+
+Very useful for lowering latency when there is no need for slowing down
+traffic.
+
+.SH ALGORITHM
+On creation with 'tc qdisc add', a fixed number of bands is created. Each
+band is a class, although is not possible to add classes with 'tc qdisc
+add', the number of bands to be created must instead be specified on the
+commandline attaching PRIO to its root.
+
+When dequeueing, band 0 is tried first and only if it did not deliver a
+packet does PRIO try band 1, and so onwards. Maximum reliability packets
+should therefore go to band 0, minimum delay to band 1 and the rest to band
+2.
+
+As the PRIO qdisc itself will have minor number 0, band 0 is actually
+major:1, band 1 is major:2, etc. For major, substitute the major number
+assigned to the qdisc on 'tc qdisc add' with the
+.B handle
+parameter.
+
+.SH CLASSIFICATION
+Three methods are available to PRIO to determine in which band a packet will
+be enqueued.
+.TP
+From userspace
+A process with sufficient privileges can encode the destination class
+directly with SO_PRIORITY, see
+.BR tc(7).
+.TP 
+with a tc filter
+A tc filter attached to the root qdisc can point traffic directly to a class
+.TP 
+with the priomap
+Based on the packet priority, which in turn is derived from the Type of
+Service assigned to the packet.
+.P
+Only the priomap is specific to this qdisc. 
+.SH QDISC PARAMETERS
+.TP
+bands
+Number of bands. If changed from the default of 3,
+.B priomap
+must be updated as well.
+.TP 
+priomap
+The priomap maps the priority of
+a packet to a class. The priority can either be set directly from userspace,
+or be derived from the Type of Service of the packet.
+
+Determines how packet priorities, as assigned by the kernel, map to
+bands. Mapping occurs based on the TOS octet of the packet, which looks like
+this:
+
+.nf
+0   1   2   3   4   5   6   7
++---+---+---+---+---+---+---+---+
+|           |               |   |
+|PRECEDENCE |      TOS      |MBZ|
+|           |               |   |
++---+---+---+---+---+---+---+---+
+.fi
+
+The four TOS bits (the 'TOS field') are defined as:
+
+.nf
+Binary Decimcal  Meaning
+-----------------------------------------
+1000   8         Minimize delay (md)
+0100   4         Maximize throughput (mt)
+0010   2         Maximize reliability (mr)
+0001   1         Minimize monetary cost (mmc)
+0000   0         Normal Service
+.fi
+
+As there is 1 bit to the right of these four bits, the actual value of the
+TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the
+value of the entire TOS field, not just the four bits. It is the value you
+see in the first column of this table:
+
+.nf
+TOS     Bits  Means                    Linux Priority    Band
+------------------------------------------------------------
+0x0     0     Normal Service           0 Best Effort     1
+0x2     1     Minimize Monetary Cost   1 Filler          2
+0x4     2     Maximize Reliability     0 Best Effort     1
+0x6     3     mmc+mr                   0 Best Effort     1
+0x8     4     Maximize Throughput      2 Bulk            2
+0xa     5     mmc+mt                   2 Bulk            2
+0xc     6     mr+mt                    2 Bulk            2
+0xe     7     mmc+mr+mt                2 Bulk            2
+0x10    8     Minimize Delay           6 Interactive     0
+0x12    9     mmc+md                   6 Interactive     0
+0x14    10    mr+md                    6 Interactive     0
+0x16    11    mmc+mr+md                6 Interactive     0
+0x18    12    mt+md                    4 Int. Bulk       1
+0x1a    13    mmc+mt+md                4 Int. Bulk       1
+0x1c    14    mr+mt+md                 4 Int. Bulk       1
+0x1e    15    mmc+mr+mt+md             4 Int. Bulk       1
+.fi
+
+The second column contains the value of the relevant
+four TOS bits, followed by their translated meaning. For example, 15 stands
+for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum
+Throughput AND Minimum Delay. 
+
+The fourth column lists the way the Linux kernel interprets the TOS bits, by
+showing to which Priority they are mapped.
+
+The last column shows the result of the default priomap. On the commandline,
+the default priomap looks like this:
+
+    1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+
+This means that priority 4, for example, gets mapped to band number 1.
+The priomap also allows you to list higher priorities (> 7) which do not
+correspond to TOS mappings, but which are set by other means.
+
+This table from RFC 1349 (read it for more details) explains how
+applications might very well set their TOS bits:
+
+.nf
+TELNET                   1000           (minimize delay)
+FTP
+        Control          1000           (minimize delay)
+        Data             0100           (maximize throughput)
+
+TFTP                     1000           (minimize delay)
+
+SMTP 
+        Command phase    1000           (minimize delay)
+        DATA phase       0100           (maximize throughput)
+
+Domain Name Service
+        UDP Query        1000           (minimize delay)
+        TCP Query        0000
+        Zone Transfer    0100           (maximize throughput)
+
+NNTP                     0001           (minimize monetary cost)
+
+ICMP
+        Errors           0000
+        Requests         0000 (mostly)
+        Responses        <same as request> (mostly)
+.fi
+
+
+.SH CLASSES
+PRIO classes cannot be configured further - they are automatically created
+when the PRIO qdisc is attached. Each class however can contain yet a
+further qdisc.
+
+.SH BUGS
+Large amounts of traffic in the lower bands can cause starvation of higher
+bands. Can be prevented by attaching a shaper (for example, 
+.BR tc-tbf(8)
+to these bands to make sure they cannot dominate the link.
+
+.SH AUTHORS
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>,  J Hadi Salim
+<hadi@cyberus.ca>. This manpage maintained by bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-red.8 b/tc-red.8

new file mode 100644 (file)

index 0000000..d02b411
--- /dev/null
+++ b/tc-red.8
@@ -0,0 +1,131 @@
+.TH RED 8 "13 December 2001" "iproute2" "Linux"
+.SH NAME
+red \- Random Early Detection 
+.SH SYNOPSIS
+.B tc qdisc ... red
+.B limit 
+bytes
+.B min 
+bytes 
+.B max 
+bytes 
+.B avpkt
+bytes
+.B burst 
+packets
+.B [ ecn ] [ bandwidth
+rate
+.B ] probability
+chance
+
+.SH DESCRIPTION
+Random Early Detection is a classless qdisc which manages its queue size
+smartly. Regular queues simply drop packets from the tail when they are
+full, which may not be the optimal behaviour. RED also performs tail drop,
+but does so in a more gradual way.
+
+Once the queue hits a certain average length, packets enqueued have a
+configurable chance of being marked (which may mean dropped). This chance
+increases linearly up to a point called the
+.B max
+average queue length, although the queue might get bigger.
+
+This has a host of benefits over simple taildrop, while not being processor
+intensive. It prevents synchronous retransmits after a burst in traffic,
+which cause further retransmits, etc.
+
+The goal is the have a small queue size, which is good for interactivity
+while not disturbing TCP/IP traffic with too many sudden drops after a burst
+of traffic.
+
+Depending on if ECN is configured, marking either means dropping or
+purely marking a packet as overlimit.
+.SH ALGORITHM
+The average queue size is used for determining the marking
+probability. This is calculated using an Exponential Weighted Moving
+Average, which can be more or less sensitive to bursts.
+
+When the average queue size is below 
+.B min
+bytes, no packet will ever be marked. When it exceeds 
+.B min, 
+the probability of doing so climbs linearly up
+to 
+.B probability, 
+until the average queue size hits
+.B max
+bytes. Because 
+.B probability 
+is normally not set to 100%, the queue size might
+conceivably rise above 
+.B max
+bytes, so the 
+.B limit
+parameter is provided to set a hard maximum for the size of the queue.
+
+.SH PARAMETERS
+.TP 
+min
+Average queue size at which marking becomes a possibility.
+.TP 
+max
+At this average queue size, the marking probability is maximal. Should be at
+least twice
+.B min
+to prevent synchronous retransmits, higher for low 
+.B min.
+.TP 
+probability
+Maximum probability for marking, specified as a floating point
+number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%,
+respectively).
+.TP 
+limit
+Hard limit on the real (not average) queue size in bytes. Further packets
+are dropped. Should be set higher than max+burst. It is advised to set this
+a few times higher than 
+.B max.
+.TP
+burst
+Used for determining how fast the average queue size is influenced by the
+real queue size. Larger values make the calculation more sluggish, allowing
+longer bursts of traffic before marking starts. Real life experiments
+support the following guideline: (min+min+max)/(3*avpkt).
+.TP 
+avpkt
+Specified in bytes. Used with burst to determine the time constant for
+average queue size calculations. 1000 is a good value.
+.TP
+bandwidth
+This rate is used for calculating the average queue size after some
+idle time. Should be set to the bandwidth of your interface. Does not mean
+that RED will shape for you! Optional.
+.TP
+ecn
+As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion
+Notification allows RED to notify remote hosts that their rate exceeds the
+amount of bandwidth available. Non-ECN capable hosts can only be notified by
+dropping a packet.  If this parameter is specified, packets which indicate
+that their hosts honor ECN will only be marked and not dropped, unless the
+queue size hits
+.B limit
+bytes. Needs a tc binary with RED support compiled in. Recommended.
+
+.SH SEE ALSO
+.BR tc (8)
+
+.SH SOURCES
+.TP 
+o
+Floyd, S., and Jacobson, V., Random Early Detection gateways for
+Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html
+.TP 
+o
+Some changes to the algorithm by Alexey N. Kuznetsov.
+
+.SH AUTHORS
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>,  Alexey Makarenko
+<makar@phoenix.kharkov.ua>, J Hadi Salim <hadi@nortelnetworks.com>.  
+This manpage maintained by bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-sfq.8 b/tc-sfq.8

new file mode 100644 (file)

index 0000000..337c795
--- /dev/null
+++ b/tc-sfq.8
@@ -0,0 +1,107 @@
+.TH TC 8 "8 December 2001" "iproute2" "Linux"
+.SH NAME
+sfq \- Stochastic Fairness Queueing
+.SH SYNOPSIS
+.B tc qdisc ... perturb
+seconds
+.B quantum
+bytes
+
+.SH DESCRIPTION
+
+Stochastic Fairness Queueing is a classless queueing discipline available for
+traffic control with the 
+.BR tc (8)
+command.
+
+SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'. 
+The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing
+any single flow from drowning out the rest.
+
+This may in fact have some effect in mitigating a Denial of Service attempt.
+
+SFQ is work-conserving and therefore always delivers a packet if it has one available.
+.SH ALGORITHM
+On enqueueing, each packet is assigned to a hash bucket, based on
+.TP
+(i)
+Source address
+.TP
+(ii)
+Destination address
+.TP
+(iii)
+Source port
+.P
+If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP. 
+Packets with other protocols are hashed based on the 32bits representation of their 
+destination and the socket they belong to. A flow corresponds mostly to a TCP/IP 
+connection.
+
+Each of these buckets should represent a unique flow. Because multiple flows may
+get hashed to the same bucket, the hashing algorithm is perturbed at configurable 
+intervals so that the unfairness lasts only for a short while. Perturbation may 
+however cause some inadvertent packet reordering to occur.
+
+When dequeuing, each hashbucket with data is queried in a round robin fashion.
+
+The compile time maximum length of the SFQ is 128 packets, which can be spread over
+at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed
+on the fullest bucket, thus maintaining fairness.
+
+.SH PARAMETERS
+.TP 
+perturb
+Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that 
+no perturbation occurs. Do not set too low for each perturbation may cause some packet
+reordering. Advised value: 10
+.TP 
+quantum
+Amount of bytes a flow is allowed to dequeue during a round of the round robin process.
+Defaults to the MTU of the interface which is also the advised value and the minimum value.
+
+.SH EXAMPLE & USAGE
+
+To attach to device ppp0:
+.P
+# tc qdisc add dev ppp0 root sfq perturb 10
+.P
+Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful 
+if it owns the queue.
+This is the case when the link speed equals the actually available bandwidth. This holds 
+for regular phone modems, ISDN connections and direct non-switched ethernet links. 
+.P
+Most often, cable modems and DSL devices do not fall into this category. The same holds 
+for when connected to a switch  and trying to send data to a congested segment also 
+connected to the switch.
+.P
+In this case, the effective queue does not reside within Linux and is therefore not 
+available for scheduling.
+.P
+Embed SFQ in a classful qdisc to make sure it owns the queue.
+
+.SH SOURCE
+.TP 
+o
+Paul E. McKenney "Stochastic Fairness Queuing",
+IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
+
+.TP
+o
+Paul E. McKenney "Stochastic Fairness Queuing",
+"Interworking: Research and Experience", v.2, 1991, p.113-131.
+
+.TP 
+o
+See also:
+M. Shreedhar and George Varghese "Efficient Fair
+Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
+
+.SH SEE ALSO
+.BR tc (8)
+
+.SH AUTHOR
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
+bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc-tbf.8 b/tc-tbf.8

new file mode 100644 (file)

index 0000000..3abb238
--- /dev/null
+++ b/tc-tbf.8
@@ -0,0 +1,138 @@
+.TH TC 8 "13 December 2001" "iproute2" "Linux"
+.SH NAME
+tbf \- Token Bucket Filter
+.SH SYNOPSIS
+.B tc qdisc ... tbf rate
+rate
+.B burst
+bytes/cell
+.B ( latency 
+ms 
+.B | limit
+bytes
+.B ) [ mpu 
+bytes
+.B [ peakrate
+rate
+.B mtu
+bytes/cell
+.B ] ]
+.P
+burst is also known as buffer and maxburst. mtu is also known as minburst.
+.SH DESCRIPTION
+
+The Token Bucket Filter is a classless queueing discipline available for
+traffic control with the 
+.BR tc (8)
+command.
+
+TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle
+itself, although packets are available, to ensure that the configured rate is not exceeded. 
+On all platforms except for Alpha,
+it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness, 
+sending out  data exactly at the configured rates. 
+
+Much higher rates are possible but at the cost of losing the minimal burstiness. In that
+case, data is on average dequeued at the configured rate but may be sent much faster at millisecond 
+timescales. Because of further queues living in network adaptors, this is often not a problem.
+
+Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten
+times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on 
+average 1000 bytes.
+
+.SH ALGORITHM
+As the name implies, traffic is filtered based on the expenditure of 
+.B tokens.
+Tokens roughly correspond to bytes, with the additional constraint that each packet consumes
+some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies
+the link for some time.
+
+On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst 
+in one go. Tokens arrive at a steady rate, until the bucket is full.
+
+If no tokens are available, packets are queued, up to a configured limit. The TBF now 
+calculates the token deficit, and throttles until the first packet in the queue can be sent.
+
+If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured 
+to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF
+with a very small bucket, so that it doesn't burst.
+
+To achieve perfection, the second bucket may contain only a single packet, which leads to 
+the earlier mentioned 1mbit/s limit. 
+
+This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends
+on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100 
+packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s.
+
+.SH PARAMETERS
+See 
+.BR tc (8)
+for how to specify the units of these values.
+.TP
+limit or latency
+Limit is the number of bytes that can be queued waiting for tokens to become
+available. You can also specify this the other way around by setting the
+latency parameter, which specifies the maximum amount of time a packet can
+sit in the TBF. The latter calculation takes into account the size of the
+bucket, the rate and possibly the peakrate (if set). These two parameters
+are mutually exclusive. 
+.TP
+burst
+Also known as buffer or maxburst.
+Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously. 
+In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer 
+if you want to reach your configured rate!
+
+If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket.
+The minimum buffer size can be calculated by dividing the rate by HZ.
+
+Token usage calculations are performed using a table which by default has a resolution of 8 packets. 
+This resolution can be changed by specifying the 
+.B cell
+size with the burst. For example, to specify a 6000 byte buffer with a 16
+byte cell size, set a burst of 6000/16. You will probably never have to set
+this. Must be an integral power of 2.
+.TP
+mpu
+A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit 
+determines the minimal token usage (specified in bytes) for a packet. Defaults to zero.
+.TP
+rate
+The speed knob. See remarks above about limits! See 
+.BR tc (8)
+for units.
+.PP
+Furthermore, if a peakrate is desired, the following parameters are available:
+
+.TP
+peakrate
+Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does 
+not need to be set, it is only necessary if perfect millisecond timescale shaping is required.
+
+.TP
+mtu/minburst
+Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface.
+If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst
+allows around 3mbit/s of peakrate, given 1000 byte packets.
+
+Like the regular burstsize you can also specify a 
+.B cell
+size.
+.SH EXAMPLE & USAGE
+
+To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s,
+a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes
+at most 70ms of latency, with perfect peakrate behaviour, issue:
+.P
+# tc qdisc add dev eth0 root tbf rate 0.5mbit \\
+  burst 5kb latency 70ms peakrate 1mbit       \\
+  minburst 1540
+
+.SH SEE ALSO
+.BR tc (8)
+
+.SH AUTHOR
+Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
+bert hubert <ahu@ds9a.nl>
+
+
diff --git a/tc.8 b/tc.8

new file mode 100644 (file)

index 0000000..b9b8039
--- /dev/null
+++ b/tc.8
@@ -0,0 +1,348 @@
+.TH TC 8 "16 December 2001" "iproute2" "Linux"
+.SH NAME
+tc \- show / manipulate traffic control settings
+.SH SYNOPSIS
+.B tc qdisc [ add | change | replace | link ] dev 
+DEV 
+.B 
+[ parent 
+qdisc-id 
+.B | root ] 
+.B [ handle 
+qdisc-id ] qdisc
+[ qdisc specific parameters ]
+.P
+
+.B tc class [ add | change | replace ] dev
+DEV
+.B parent 
+qdisc-id 
+.B [ classid 
+class-id ] qdisc
+[ qdisc specific parameters ]
+.P
+
+.B tc filter [ add | change | replace ] dev
+DEV
+.B  [ parent
+qdisc-id
+.B | root ] protocol
+protocol
+.B prio
+priority filtertype
+[ filtertype specific parameters ]
+.B flowid
+flow-id
+
+.B tc [-s | -d ] qdisc show [ dev 
+DEV 
+.B  ]
+.P
+.B tc [-s | -d ] class show dev 
+DEV 
+.P
+.B tc filter show dev 
+DEV 
+
+.SH DESCRIPTION
+.B Tc
+is used to configure Traffic Control in the Linux kernel. Traffic Control consists 
+of the following:
+
+.TP 
+SHAPING
+When traffic is shaped, its rate of transmission is under control. Shaping may 
+be more than lowering the available bandwidth - it is also used to smooth out 
+bursts in traffic for better network behaviour. Shaping occurs on egress.
+
+.TP 
+SCHEDULING
+By scheduling the transmission of packets it is possible to improve interactivity
+for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering
+is also called prioritizing, and happens only on egress.
+
+.TP
+POLICING
+Where shaping deals with transmission of traffic, policing pertains to traffic
+arriving. Policing thus occurs on ingress.
+
+.TP
+DROPPING
+Traffic exceeding a set bandwidth may also be dropped forthwith, both on 
+ingress and on egress.
+
+.P
+Processing of traffic is controlled by three kinds of objects: qdiscs, 
+classes and filters. 
+
+.SH QDISCS
+.B qdisc 
+is short for 'queueing discipline' and it is elementary to 
+understanding traffic control. Whenever the kernel needs to send a 
+packet to an interface, it is 
+.B enqueued
+to the qdisc configured for that interface. Immediately afterwards, the kernel
+tries to get as many packets as possible from the qdisc, for giving them
+to the network adaptor driver.
+
+A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure 
+First In, First Out queue. It does however store traffic when the network interface
+can't handle it momentarily.
+
+.SH CLASSES
+Some qdiscs can contain classes, which contain further qdiscs - traffic may 
+then be enqueued in any of the inner qdiscs, which are within the
+.B classes.
+When the kernel tries to dequeue a packet from such a 
+.B classful qdisc
+it can come from any of the classes. A qdisc may for example prioritize 
+certain kinds of traffic by trying to dequeue from certain classes
+before others.
+
+.SH FILTERS
+A
+.B filter
+is used by a classful qdisc to determine in which class a packet will
+be enqueued. Whenever traffic arrives at a class with subclasses, it needs
+to be classified. Various methods may be employed to do so, one of these
+are the filters. All filters attached to the class are called, until one of 
+them returns with a verdict. If no verdict was made, other criteria may be 
+available. This differs per qdisc.
+
+It is important to notice that filters reside 
+.B within
+qdiscs - they are not masters of what happens.
+
+.SH CLASSLESS QDISCS
+The classless qdiscs are:
+.TP 
+[p|b]fifo
+Simplest usable qdisc, pure First In, First Out behaviour. Limited in 
+packets or in bytes.
+.TP
+pfifo_fast
+Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band
+queue which honors Type of Service flags, as well as the priority that may be 
+assigned to a packet.
+.TP
+red
+Random Early Detection simulates physical congestion by randomly dropping
+packets when nearing configured bandwidth allocation. Well suited to very
+large bandwidth applications.
+.TP 
+sfq
+Stochastic Fairness Queueing reorders queued traffic so each 'session'
+gets to send a packet in turn.
+.TP
+tbf
+The Token Bucket Filter is suited for slowing traffic down to a precisely
+configured rate. Scales well to large bandwidths. 
+.SH CONFIGURING CLASSLESS QDISCS
+In the absence of classful qdiscs, classless qdiscs can only be attached at 
+the root of a device. Full syntax:
+.P
+.B tc qdisc add dev 
+DEV 
+.B root 
+QDISC QDISC-PARAMETERS
+
+To remove, issue
+.P
+.B tc qdisc del dev
+DEV
+.B root
+
+The  
+.B pfifo_fast
+qdisc is the automatic default in the absence of a configured qdisc.
+
+.SH CLASSFUL QDISCS
+The classful qdiscs are:
+.TP
+CBQ
+Class Based Queueing implements a rich linksharing hierarchy of classes. 
+It contains shaping elements as well as prioritizing capabilities. Shaping is
+performed using link idle time calculations based on average packet size and
+underlying link bandwidth. The latter may be ill-defined for some interfaces.
+.TP
+HTB
+The Hierarchy Token Bucket implements a rich linksharing hierarchy of 
+classes with an emphasis on conforming to existing practices. HTB facilitates
+guaranteeing bandwidth to classes, while also allowing specification of upper
+limits to inter-class sharing. It contains shaping elements, based on TBF and
+can prioritize classes.        
+.TP 
+PRIO
+The PRIO qdisc is a non-shaping container for a configurable number of 
+classes which are dequeued in order. This allows for easy prioritization 
+of traffic, where lower classes are only able to send if higher ones have 
+no packets available. To facilitate configuration, Type Of Service bits are 
+honored by default.
+.SH THEORY OF OPERATION
+Classes form a tree, where each class has a single parent. 
+A class may have multiple children. Some qdiscs allow for runtime addition
+of classes (CBQ, HTB) while others (PRIO) are created with a static number of 
+children.
+
+Qdiscs which allow dynamic addition of classes can have zero or more 
+subclasses to which traffic may be enqueued. 
+
+Furthermore, each class contains a
+.B leaf qdisc
+which by default has 
+.B pfifo 
+behaviour though another qdisc can be attached in place. This qdisc may again 
+contain classes, but each class can have only one leaf qdisc. 
+
+When a packet enters a classful qdisc it can be 
+.B classified
+to one of the classes within. Three criteria are available, although not all 
+qdiscs will use all three:
+.TP 
+tc filters
+If tc filters are attached to a class, they are consulted first 
+for relevant instructions. Filters can match on all fields of a packet header, 
+as well as on the firewall mark applied by ipchains or iptables. See 
+.BR tc-filters (8).
+.TP
+Type of Service
+Some qdiscs have built in rules for classifying packets based on the TOS field.
+.TP
+skb->priority
+Userspace programs can encode a class-id in the 'skb->priority' field using 
+the SO_PRIORITY option.
+.P
+Each node within the tree can have its own filters but higher level filters
+may also point directly to lower classes.
+
+If classification did not succeed, packets are enqueued to the leaf qdisc 
+attached to that class. Check qdisc specific manpages for details, however.
+
+.SH NAMING
+All qdiscs, classes and filters have IDs, which can either be specified
+or be automatically assigned. 
+
+IDs consist of a major number and a minor number, separated by a colon.
+
+.TP 
+QDISCS
+A qdisc, which potentially can have children, 
+gets assigned a major number, called a 'handle', leaving the minor 
+number namespace available for classes. The handle is expressed as '10:'. 
+It is customary to explicitly assign a handle to qdiscs expected to have 
+children.
+
+.TP 
+CLASSES
+Classes residing under a qdisc share their qdisc major number, but each have
+a separate minor number called a 'classid' that has no relation to their 
+parent classes, only to their parent qdisc. The same naming custom as for 
+qdiscs applies.
+
+.TP 
+FILTERS
+Filters have a three part ID, which is only needed when using a hashed
+filter hierarchy, for which see
+.BR tc-filters (8).
+.SH UNITS
+All parameters accept a floating point number, possibly followed by a unit.
+.P
+Bandwidths or rates can be specified in:
+.TP 
+kbps
+Kilobytes per second
+.TP
+mbps
+Megabytes per second
+.TP
+kbit
+Kilobits per second
+.TP
+mbit
+Megabits per second
+.TP
+bps or a bare number
+Bytes per second
+.P
+Amounts of data can be specified in:
+.TP
+kb or k
+Kilobytes
+.TP
+mb or m
+Megabytes
+.TP
+mbit
+Megabits
+.TP
+kbit
+Kilobits
+.TP
+b or a bare number
+Bytes.
+.P
+Lengths of time can be specified in:
+.TP
+s, sec or secs
+Whole seconds
+.TP
+ms, msec or msecs
+Milliseconds
+.TP
+us, usec, usecs or a bare number
+Microseconds.
+
+.SH TC COMMANDS
+The following commands are available for qdiscs, classes and filter:
+.TP
+add
+Add a qdisc, class or filter to a node. For all entities, a 
+.B parent
+must be passed, either by passing its ID or by attaching directly to the root of a device. 
+When creating a qdisc or a filter, it can be named with the
+.B handle
+parameter. A class is named with the
+.B classid
+parameter.
+
+.TP
+remove
+A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs 
+are automatically deleted, as well as any filters attached to them.
+
+.TP
+change
+Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception
+that the handle cannot be changed and neither can the parent. In other words, 
+.B
+change 
+cannot move a node.
+
+.TP
+replace
+Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet
+it is created.
+
+.TP
+link
+Only available for qdiscs and performs a replace where the node 
+must exist already.
+
+
+.SH HISTORY
+.B tc
+was written by Alexey N. Kuznetsov and added in Linux 2.2.
+.SH SEE ALSO
+.BR tc-cbq (8),
+.BR tc-htb (8),
+.BR tc-sfq (8),
+.BR tc-red (8),
+.BR tc-tbf (8),
+.BR tc-pfifo (8),
+.BR tc-bfifo (8),
+.BR tc-pfifo_fast (8),
+.BR tc-filters (8)
+
+.SH AUTHOR
+Manpage maintained by bert hubert (ahu@ds9a.nl)
+
author	Mark Huang <mlhuang@cs.princeton.edu>
	Wed, 22 Feb 2006 21:38:27 +0000 (21:38 +0000)
committer	Mark Huang <mlhuang@cs.princeton.edu>
	Wed, 22 Feb 2006 21:38:27 +0000 (21:38 +0000)
ip.8	[new file with mode: 0644]	patch \| blob
tc-cbq-details.8	[new file with mode: 0644]	patch \| blob
tc-cbq.8	[new file with mode: 0644]	patch \| blob
tc-htb.8	[new file with mode: 0644]	patch \| blob
tc-pbfifo.8	[new file with mode: 0644]	patch \| blob
tc-pfifo_fast.8	[new file with mode: 0644]	patch \| blob
tc-prio.8	[new file with mode: 0644]	patch \| blob
tc-red.8	[new file with mode: 0644]	patch \| blob
tc-sfq.8	[new file with mode: 0644]	patch \| blob
tc-tbf.8	[new file with mode: 0644]	patch \| blob
tc.8	[new file with mode: 0644]	patch \| blob