-#!/bin/env python2 -u
-
-# Based on code written by: Andy Bavier, acb@cs.princeton.edu
+#!/usr/bin/python
#
-# Bandwidth limit script to run on PlanetLab nodes. The intent is to use
-# the Hierarchical Token Bucket queueing discipline of 'tc' to (1) cap
-# the output bandwidth of the node at a specified rate (e.g., 5Mbps) and
-# (2) to allow all vservers to fairly share this rate. For instance,
-# if there are N vservers, then each should get at least 5/N Mbps of
-# bandwidth.
+# Bandwidth limit module for PlanetLab nodes. The intent is to use the
+# Hierarchical Token Bucket (HTB) queueing discipline (qdisc) to allow
+# slices to fairly share access to available node bandwidth. We
+# currently define three classes of "available node bandwidth":
+#
+# 1. Available hardware bandwidth (bwmax): The maximum rate of the
+# hardware.
+#
+# 2. Available capped bandwidth (bwcap): The maximum rate allowed to
+# non-exempt destinations. By default, equal to bwmax, but may be
+# lowered by PIs.
+#
+# 3. Available uncapped ("exempt") bandwidth: The difference between
+# bwmax and what is currently being used of bwcap, or the maximum rate
+# allowed to destinations exempt from caps (e.g., Internet2).
+#
+# All three classes of bandwidth are fairly shared according to the
+# notion of "shares". For instance, if the node is capped at 5 Mbps,
+# there are N slices, and each slice has 1 share, then each slice
+# should get at least 5/N Mbps of bandwidth. How HTB is implemented
+# makes this statement a little too simplistic. What it really means
+# is that during any single time period, only a certain number of
+# bytes can be sent onto the wire. Each slice is guaranteed that at
+# least some small number of its bytes will be sent. Whatever is left
+# over from the budget, is split in proportion to the number of shares
+# each slice has.
+#
+# Even if the node is not capped at a particular limit (bwcap ==
+# bwmax), this module enforces fair share access to bwmax. Also, if
+# the node is capped at a particular limit, rules may optionally be
+# defined that classify certain packets into the "exempt" class. This
+# class receives whatever bandwidth is leftover between bwcap and
+# bwmax; slices fairly share this bandwidth as well.
+#
+# The root context is exempt from sharing and can send as much as it
+# needs to.
#
# Some relevant URLs:
-# http://lartc.org/howto for how to use tc
-# http://luxik.cdi.cz/~devik/qos/htb/ for info on htb
+#
+# 1. http://lartc.org/howto for how to use tc
+# 2. http://luxik.cdi.cz/~devik/qos/htb/ for info on HTB
+#
+# Andy Bavier <acb@cs.princeton.edu>
+# Mark Huang <mlhuang@cs.princeton.edu>
+# Copyright (C) 2006 The Trustees of Princeton University
+#
+# $Id: bwlimit.py,v 1.14 2006/12/13 21:48:14 faiyaza Exp $
+#
+
+import sys, os, re, getopt
+from sets import Set
+import pwd
+
+
+# Where the tc binary lives
+TC = "/sbin/tc"
+
+# Default interface
+dev = "eth0"
+
+# Verbosity level
+verbose = 0
+
+# bwmin should be small enough that it can be considered negligibly
+# slow compared to the hardware. 8 bits/second appears to be the
+# smallest value supported by tc.
+bwmin = 8
+
+# bwmax should be large enough that it can be considered at least as
+# fast as the hardware.
+bwmax = 1000*1000*1000
+
+# quantum is the maximum number of bytes that can be borrowed by a
+# share (or slice, if each slice gets 1 share) in one time period
+# (with HZ=1000, 1 ms). If multiple slices are competing for bandwidth
+# above their guarantees, and each is attempting to borrow up to the
+# node bandwidth cap, quantums control how the excess bandwidth is
+# distributed. Slices with 2 shares will borrow twice the amount in
+# one time period as slices with 1 share, so averaged over time, they
+# will get twice as much of the excess bandwidth. The value should be
+# as small as possible and at least 1 MTU. By default, it would be
+# calculated as bwmin/10, but since we use such small a value for
+# bwmin, it's better to just set it to a value safely above 1 Ethernet
+# MTU.
+quantum = 1600
+
+# cburst is the maximum number of bytes that can be burst onto the
+# wire in one time period (with HZ=1000, 1 ms). If multiple slices
+# have data queued for transmission, cbursts control how long each
+# slice can have the wire for. If not specified, it is set to the
+# smallest possible value that would enable the slice's "ceil" rate
+# (usually the node bandwidth cap), to be reached if a slice was able
+# to borrow enough bandwidth to do so. For now, it's unclear how or if
+# to relate this to the notion of shares, so just let tc set the
+# default.
+cburst = None
+
+# There is another parameter that controls how bandwidth is allocated
+# between slices on nodes that is outside the scope of HTB. We enforce
+# a 16 GByte/day total limit on each slice, which works out to about
+# 1.5mbit. If a slice exceeds this byte limit before the day finishes,
+# it is capped at (i.e., its "ceil" rate is set to) the smaller of the
+# node bandwidth cap or 1.5mbit. pl_mom is in charge of enforcing this
+# rule and executes this script to override "ceil".
+
+# We support multiple bandwidth limits, by reserving the top nibble of
+# the minor classid to be the "subclassid". Theoretically, we could
+# support up to 15 subclasses, but for now, we only define two: the
+# "default" subclass 1:10 that is capped at the node bandwidth cap (in
+# this example, 5mbit) and the "exempt" subclass 1:20 that is capped
+# at bwmax (i.e., not capped). The 1:1 parent class exists only to
+# make the borrowing model work. All bandwidth above minimum
+# guarantees is fairly shared (in this example, slice 2 is guaranteed
+# at least 1mbit in addition to fair access to the rest), subject to
+# the restrictions of the class hierarchy: namely, that the total
+# bandwidth to non-exempt destinations should not exceed the node
+# bandwidth cap.
+#
+# 1:
+# |
+# 1:1 (1gbit)
+# ______________|_____________
+# | |
+# 1:10 (8bit, 5mbit) 1:20 (8bit, 1gbit)
+# | |
+# 1:1000 (8bit, 5mbit), 1:2000 (8bit, 1gbit),
+# 1:1001 (8bit, 5mbit), 1:2001 (8bit, 1gbit),
+# 1:1002 (1mbit, 5mbit), 1:2002 (1mbit, 1gbit),
+# ... ...
+# 1:1FFF (8bit, 5mbit) 1:2FFF (8bit, 1gbit)
+#
+default_minor = 0x1000
+exempt_minor = 0x2000
+
+# root_xid is for the root context. The root context is exempt from
+# fair sharing in both the default and exempt subclasses. The root
+# context gets 5 shares by default.
+root_xid = 0x0000
+root_share = 5
+
+# default_xid is for unclassifiable packets. Packets should not be
+# classified here very often. They can be if a slice's HTB classes are
+# deleted before its processes are. Each slice gets 1 share by
+# default.
+default_xid = 0x0FFF
+default_share = 1
+
+# See tc_util.c and http://physics.nist.gov/cuu/Units/binary.html. Be
+# warned that older versions of tc interpret "kbps", "mbps", "mbit",
+# and "kbit" to mean (in this system) "kibps", "mibps", "mibit", and
+# "kibit" and that if an older version is installed, all rates will
+# be off by a small fraction.
+suffixes = {
+ "": 1,
+ "bit": 1,
+ "kibit": 1024,
+ "kbit": 1000,
+ "mibit": 1024*1024,
+ "mbit": 1000000,
+ "gibit": 1024*1024*1024,
+ "gbit": 1000000000,
+ "tibit": 1024*1024*1024*1024,
+ "tbit": 1000000000000,
+ "bps": 8,
+ "kibps": 8*1024,
+ "kbps": 8000,
+ "mibps": 8*1024*1024,
+ "mbps": 8000000,
+ "gibps": 8*1024*1024*1024,
+ "gbps": 8000000000,
+ "tibps": 8*1024*1024*1024*1024,
+ "tbps": 8000000000000
+}
+
+
+def get_tc_rate(s):
+ """
+ Parses an integer or a tc rate string (e.g., 1.5mbit) into bits/second
+ """
+
+ if type(s) == int:
+ return s
+ m = re.match(r"([0-9.]+)(\D*)", s)
+ if m is None:
+ return -1
+ suffix = m.group(2).lower()
+ if suffixes.has_key(suffix):
+ return int(float(m.group(1)) * suffixes[suffix])
+ else:
+ return -1
-import sys, os, re, string
-# Global variables
-TC="/sbin/tc" # Where the modified tc program lives
-OPS = ["change","add"] # Sequence of TC ops we'll try
+def format_tc_rate(rate):
+ """
+ Formats a bits/second rate into a tc rate string
+ """
-# Support to run system commands
-import runcmd
-def run(cmd):
+ if rate >= 1000000000 and (rate % 1000000000) == 0:
+ return "%.0fgbit" % (rate / 1000000000.)
+ elif rate >= 1000000 and (rate % 1000000) == 0:
+ return "%.0fmbit" % (rate / 1000000.)
+ elif rate >= 1000:
+ return "%.0fkbit" % (rate / 1000.)
+ else:
+ return "%.0fbit" % rate
+
+
+# Parse /etc/planetlab/bwcap (or equivalent)
+def read_bwcap(bwcap_file):
+ bwcap = bwmax
try:
- runcmd.run(cmd)
- ret = True
- except runcmd.Error, ex:
- ret = False
+ fp = open(bwcap_file, "r")
+ line = fp.readline().strip()
+ if line:
+ bwcap = get_tc_rate(line)
+ except:
+ pass
+ if bwcap == -1:
+ bwcap = bwmax
+ return bwcap
- return ret
-def get_defaults(cap_file="/etc/planetlab/bwcap", default_cap="10mbit"):
- # The maximum output bandwidth, read in from cap_file (if it
- # exists). If cap_file does not exist, use default_cap for
- # bandwidth cap. See also the 'cburst' parameter below.
- cap=default_cap
+def get_bwcap(dev = dev):
+ """
+ Get the current (live) value of the node bandwidth cap
+ """
+
+ state = tc("-d class show dev %s" % dev)
+ base_re = re.compile(r"class htb 1:10 parent 1:1 .*ceil ([^ ]+) .*")
+ base_classes = filter(None, map(base_re.match, state))
+ if not base_classes:
+ return -1
+ if len(base_classes) > 1:
+ raise Exception, "unable to get current bwcap"
+ return get_tc_rate(base_classes[0].group(1))
+
+
+def get_slice(xid):
+ """
+ Get slice name ("princeton_mlh") from slice xid (500)
+ """
+
+ if xid == root_xid:
+ return "root"
+ if xid == default_xid:
+ return "default"
+ try:
+ return pwd.getpwuid(xid).pw_name
+ except KeyError:
+ pass
+
+ return None
+
+def get_xid(slice):
+ """
+ Get slice xid ("princeton_mlh") from slice name ("500" or "princeton_mlh")
+ """
+
+ if slice == "root":
+ return root_xid
+ if slice == "default":
+ return default_xid
try:
- os.stat(cap_file)
- fp = open(cap_file)
- lines = fp.readlines()
- fp.close()
try:
- cap=string.strip(lines[0])
- except ValueError, ex:
+ return int(slice)
+ except ValueError:
pass
- except OSError, ex:
+ return pwd.getpwnam(slice).pw_uid
+ except KeyError:
pass
- # How many bytes a single token bucket is allowed to send at once.
- # Small values (i.e., 3080 = two maximum-sized Ethernet packets)
- # provide better fine-grained fairness. At high rates (e.g.,
- # cap=100mbit) this needs to be raised to allow full throughput.
- cburst=30800
-
- # The 'share' and 'quantum' parameters both influence the actual throughput
- # seen by a particular vserver:
-
- # 'share' is the rate at which tokens fill the bucket, and so is
- # the minimum bandwidth given to the task. I think this just
- # needs to be set to some small value that is the same for all
- # vservers. With the current value and a 5mbit cap, we can
- # support 5000 vservers (5mbit/1kbit = 5000). With values lower
- # than 10kbit, the HTB output (from tc -s -d class dev eth0) looks
- # strange... this needs to be looked into further.
- share="1kbit"
-
- # 'quantum' influences how excess bandwidth (i.e., above the
- # 'share') is distributed to vservers. Apparently, vservers can
- # send additional packets in proportion to their quantums (and not
- # their shares, as one might expect). See:
- # http://luxik.cdi.cz/~devik/qos/htb/manual/userg.htm#sharing
- # The above link states that 'quantum' is automatically
- # calculated for shares above 120kbit. Otherwise it should be
- # set to a small value but at least one MTU, so I set it to one
- # MTU. All vservers are assigned the same quantum and so they
- # should share equally.
- quantum=1540
-
- return cap, cburst, share, quantum
-
-
-def init(eth):
- global TC, OPS
-
- cap, cburst, share, quantum = get_defaults()
- if cap == "-1": return
-
- # Install HTB on $ETH. Specifies that all packets not matching a
- # filter rule go to class with handle 9999. If we don't supply a
- # default class, it sounds like non-matching packets can be sent
- # at an unlimited rate.
- for op in OPS:
- cmd = "%s qdisc %s dev %s root handle 1: htb default 9999" % (TC,op,eth)
- if run(cmd): break
-
- # Add a root class with bwcap capped rate
- for op in OPS:
- cmd = "%s class %s dev %s parent 1: classid 1:1 htb rate %s quantum %d" % \
- (TC, op, eth, cap, quantum)
- if run(cmd): break
-
- # Set up the default class. Packets will fail to match a filter rule
- # and end up here if they are sent by a process with UID < 500.
- for op in OPS:
- cmd = "%s class %s dev %s parent 1:1 classid 1:9999 htb rate %s ceil %s quantum %d cburst %d" % \
- (TC, op, eth, share, cap, quantum, cburst)
- if run(cmd): break
-
- # The next command appears to throttle back processes that are
- # sending faster than the token bucket can support, rather than
- # just dropping their packets.
- for op in OPS:
- cmd = "%s qdisc %s dev %s parent 1:9999 handle 9999 pfifo" % \
- (TC, op, eth)
- if run(cmd): break
-
-def on(xid, eth, share, minrate, maxrate = None):
- global TC, OPS
-
- default_cap, default_cburst, default_share, default_quantum = get_defaults()
- if maxrate == None:
- maxrate = default_cap
- quantum = share * default_quantum
-
- # Set up the per-vserver token bucket
- for op in OPS:
- cmd = "%s class %s dev %s parent 1:1 classid 1:%d htb rate %s ceil %s quantum %d cburst %d" % \
- (TC, op, eth, xid, minrate, maxrate, quantum, default_cburst)
- if run(cmd): break
-
- # The next command appears to throttle back processes that are
- # sending faster than the token bucket can support, rather than
- # just dropping their packets.
- for op in OPS:
- cmd = "%s qdisc %s dev %s parent 1:%d handle %d pfifo" % \
- (TC, op, eth, xid, xid)
- if run(cmd): break
-
- # Matches packets sent by a vserver to the appropriate token bucket.
- # The raw socket module marks each packet with its vserver id.
- # See: http://lartc.org/howto/lartc.qdisc.filters.html for more
- # info on the filter command.
- cmd = "%s filter del dev %s protocol ip prio %d" % (TC, eth, xid)
- run(cmd)
- cmd = "%s filter add dev %s prio %d parent 1:0 protocol ip handle %d fw flowid 1:%d" % \
- (TC, eth, xid, xid, xid)
- run(cmd)
-
-def off(xid, eth):
- cmd = "%s filter del dev %s protocol ip prio %d" % (TC, eth, xid)
- run(cmd)
-
- cmd = "%s qdisc del dev %s parent 1:%d" % (TC, eth, xid)
- run(cmd)
-
- cmd = "%s class del dev %s classid 1:%d" % (TC, eth, xid)
- run(cmd)
+ return None
+
+def run(cmd, input = None):
+ """
+ Shortcut for running a shell command
+ """
+
+ try:
+ if verbose:
+ sys.stderr.write("Executing: " + cmd + "\n")
+ if input is None:
+ fileobj = os.popen(cmd, "r")
+ output = fileobj.readlines()
+ else:
+ fileobj = os.popen(cmd, "w")
+ fileobj.write(input)
+ output = None
+ if fileobj.close() is None:
+ return output
+ except Exception, e:
+ pass
+ return None
+
+
+def tc(cmd):
+ """
+ Shortcut for running a tc command
+ """
+
+ return run(TC + " " + cmd)
+
+
+def init(dev = dev, bwcap = bwmax):
+ """
+ (Re)initialize the bandwidth limits on this node
+ """
+
+ # Load the module used to manage exempt classes
+ run("/sbin/modprobe ip_set_iphash")
+
+ # Save current settings
+ paramslist = get(None, dev)
+
+ # Delete root qdisc 1: if it exists. This will also automatically
+ # delete any child classes.
+ for line in tc("qdisc show dev %s" % dev):
+ # Search for the root qdisc 1:
+ m = re.match(r"qdisc htb 1:", line)
+ if m is not None:
+ tc("qdisc del dev %s root handle 1:" % dev)
+ break
+
+ # Initialize HTB. The "default" clause specifies that if a packet
+ # fails classification, it should go into the class with handle
+ # 1FFF.
+ tc("qdisc add dev %s root handle 1: htb default %x" % \
+ (dev, default_minor | default_xid))
+
+ # Set up a parent class from which all subclasses borrow.
+ tc("class add dev %s parent 1: classid 1:1 htb rate %dbit" % \
+ (dev, bwmax))
+
+ # Set up a subclass that represents the node bandwidth cap. We
+ # allow each slice to borrow up to this rate, so it is also
+ # usually the "ceil" rate for each slice.
+ tc("class add dev %s parent 1:1 classid 1:10 htb rate %dbit ceil %dbit" % \
+ (dev, bwmin, bwcap))
+
+ # Set up a subclass that represents "exemption" from the node
+ # bandwidth cap. Once the node bandwidth cap is reached, bandwidth
+ # to exempt destinations can still be fairly shared up to bwmax.
+ tc("class add dev %s parent 1:1 classid 1:20 htb rate %dbit ceil %dbit" % \
+ (dev, bwmin, bwmax))
+ # Set up the root class (and tell VNET what it is). Packets sent
+ # by root end up here and are capped at the node bandwidth
+ # cap.
+ on(root_xid, dev, share = root_share)
+ file("/proc/sys/vnet/root_class", "w").write("%d" % ((1 << 16) | default_minor | root_xid))
+
+ # Set up the default class. Packets that fail classification end
+ # up here.
+ on(default_xid, dev, share = default_share)
+
+ # Restore old settings
+ for (xid, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ bytes, exemptbytes) in paramslist:
+ if xid not in (root_xid, default_xid):
+ on(xid, dev, share, minrate, maxrate, minexemptrate, maxexemptrate)
+
+
+def get(xid = None, dev = dev):
+ """
+ Get the bandwidth limits and current byte totals for a
+ particular slice xid as a tuple (xid, share, minrate, maxrate,
+ minexemptrate, maxexemptrate, bytes, exemptbytes), or all classes
+ as a list of such tuples.
+ """
+
+ if xid is None:
+ ret = []
+ else:
+ ret = None
+
+ rates = {}
+ rate = None
+
+ # ...
+ # class htb 1:1000 parent 1:10 leaf 1000: prio 0 quantum 8000 rate 8bit ceil 10000Kbit ...
+ # Sent 6851486 bytes 49244 pkt (dropped 0, overlimits 0 requeues 0)
+ # ...
+ # class htb 1:2000 parent 1:20 leaf 2000: prio 0 quantum 8000 rate 8bit ceil 1000Mbit ...
+ # Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
+ # ...
+ for line in tc("-s -d class show dev %s" % dev):
+ # Rate parameter line
+ params = re.match(r"class htb 1:([0-9a-f]+) parent 1:(10|20)", line)
+ # Statistics line
+ stats = re.match(r".* Sent ([0-9]+) bytes", line)
+ # Another class
+ ignore = re.match(r"class htb", line)
+
+ if params is not None:
+ # Which class
+ if params.group(2) == "10":
+ min = 'min'
+ max = 'max'
+ bytes = 'bytes'
+ else:
+ min = 'minexempt'
+ max = 'maxexempt'
+ bytes = 'exemptbytes'
+
+ # Slice ID
+ id = int(params.group(1), 16) & 0x0FFF;
+
+ if rates.has_key(id):
+ rate = rates[id]
+ else:
+ rate = {'id': id}
+
+ # Parse share
+ rate['share'] = 1
+ m = re.search(r"quantum (\d+)", line)
+ if m is not None:
+ rate['share'] = int(m.group(1)) / quantum
+
+ # Parse minrate
+ rate[min] = bwmin
+ m = re.search(r"rate (\w+)", line)
+ if m is not None:
+ rate[min] = get_tc_rate(m.group(1))
+
+ # Parse maxrate
+ rate[max] = bwmax
+ m = re.search(r"ceil (\w+)", line)
+ if m is not None:
+ rate[max] = get_tc_rate(m.group(1))
+
+ # Which statistics to parse
+ rate['stats'] = bytes
+
+ rates[id] = rate
+
+ elif stats is not None:
+ if rate is not None:
+ rate[rate['stats']] = int(stats.group(1))
+
+ elif ignore is not None:
+ rate = None
+
+ # Keep parsing until we get everything
+ if rate is not None and \
+ rate.has_key('min') and rate.has_key('minexempt') and \
+ rate.has_key('max') and rate.has_key('maxexempt') and \
+ rate.has_key('bytes') and rate.has_key('exemptbytes'):
+ params = (rate['id'], rate['share'],
+ rate['min'], rate['max'],
+ rate['minexempt'], rate['maxexempt'],
+ rate['bytes'], rate['exemptbytes'])
+ if xid is None:
+ # Return a list of parameters
+ ret.append(params)
+ rate = None
+ elif xid == rate['id']:
+ # Return the parameters for this class
+ ret = params
+ break
+
+ return ret
+
+
+def on(xid, dev = dev, share = None, minrate = None, maxrate = None, minexemptrate = None, maxexemptrate = None):
+ """
+ Apply specified bandwidth limit to the specified slice xid
+ """
+
+ # Get defaults from current state if available
+ cap = get(xid, dev)
+ if cap is not None:
+ if share is None:
+ share = cap[1]
+ if minrate is None:
+ minrate = cap[2]
+ if maxrate is None:
+ maxrate = cap[3]
+ if minexemptrate is None:
+ minexemptrate = cap[4]
+ if maxexemptrate is None:
+ maxexemptrate = cap[5]
+
+ # Figure out what the current node bandwidth cap is
+ bwcap = get_bwcap()
+
+ # Set defaults
+ if share is None:
+ share = default_share
+ if minrate is None:
+ minrate = bwmin
+ else:
+ minrate = get_tc_rate(minrate)
+ if maxrate is None:
+ maxrate = bwcap
+ else:
+ maxrate = get_tc_rate(maxrate)
+ if minexemptrate is None:
+ minexemptrate = minrate
+ else:
+ minexemptrate = get_tc_rate(minexemptrate)
+ if maxexemptrate is None:
+ maxexemptrate = bwmax
+ else:
+ maxexemptrate = get_tc_rate(maxexemptrate)
+
+ # Sanity checks
+ if maxrate < bwmin:
+ maxrate = bwmin
+ if maxrate > bwcap:
+ maxrate = bwcap
+ if minrate < bwmin:
+ minrate = bwmin
+ if minrate > maxrate:
+ minrate = maxrate
+ if maxexemptrate < bwmin:
+ maxexemptrate = bwmin
+ if maxexemptrate > bwmax:
+ maxexemptrate = bwmax
+ if minexemptrate < bwmin:
+ minexemptrate = bwmin
+ if minexemptrate > maxexemptrate:
+ minexemptrate = maxexemptrate
+
+ # Set up subclasses for the slice
+ tc("class replace dev %s parent 1:10 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \
+ (dev, default_minor | xid, minrate, maxrate, share * quantum))
+
+ tc("class replace dev %s parent 1:20 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \
+ (dev, exempt_minor | xid, minexemptrate, maxexemptrate, share * quantum))
+
+ # Attach a FIFO to each subclass, which helps to throttle back
+ # processes that are sending faster than the token buckets can
+ # support.
+ tc("qdisc replace dev %s parent 1:%x handle %x pfifo" % \
+ (dev, default_minor | xid, default_minor | xid))
+
+ tc("qdisc replace dev %s parent 1:%x handle %x pfifo" % \
+ (dev, exempt_minor | xid, exempt_minor | xid))
+
+
+def set(xid, share = None, minrate = None, maxrate = None, minexemptrate = None, maxexemptrate = None):
+ on(xid = xid, share = share,
+ minrate = minrate, maxrate = maxrate,
+ minexemptrate = minexemptrate, maxexemptrate = maxexemptrate)
+
+
+# Remove class associated with specified slice xid. If further packets
+# are seen from this slice, they will be classified into the default
+# class 1:1FFF.
+def off(xid, dev = dev):
+ """
+ Remove class associated with specified slice xid. If further
+ packets are seen from this slice, they will be classified into the
+ default class 1:1FFF.
+ """
+
+ cap = get(xid, dev)
+ if cap is not None:
+ tc("class del dev %s classid 1:%x" % (dev, default_minor | xid))
+ tc("class del dev %s classid 1:%x" % (dev, exempt_minor | xid))
+
+
+def exempt_init(group_name, node_ips):
+ """
+ Initialize the list of destinations exempt from the node bandwidth
+ (burst) cap.
+ """
+
+ # Clean up
+ iptables = "/sbin/iptables -t vnet %s POSTROUTING"
+ run(iptables % "-F")
+ run("/sbin/ipset -X " + group_name)
+
+ # Create a hashed IP set of all of these destinations
+ lines = ["-N %s iphash" % group_name]
+ add_cmd = "-A %s " % group_name
+ lines += [(add_cmd + ip) for ip in node_ips]
+ lines += ["COMMIT"]
+ restore = "\n".join(lines) + "\n"
+ run("/sbin/ipset -R", restore)
+
+ # Add rule to match on destination IP set
+ run((iptables + " -m set --set %s dst -j CLASSIFY --set-class 1:%x") %
+ ("-A", group_name, exempt_minor))
+
+
+def usage():
+ bwcap_description = format_tc_rate(get_bwcap())
+
+ print """
+Usage:
+
+%s [OPTION]... [COMMAND] [ARGUMENT]...
+
+Options:
+ -d device Network interface (default: %s)
+ -r rate Node bandwidth cap (default: %s)
+ -q quantum Share multiplier (default: %d bytes)
+ -n Print rates in numeric bits per second
+ -v Enable verbose debug messages
+ -h This message
+
+Commands:
+ init
+ (Re)initialize all bandwidth parameters
+ on slice [share|-] [minrate|-] [maxrate|-] [minexemptrate|-] [maxexemptrate|-]
+ Set bandwidth parameter(s) for the specified slice
+ off slice
+ Remove all bandwidth parameters for the specified slice
+ get
+ Get all bandwidth parameters for all slices
+ get slice
+ Get bandwidth parameters for the specified slice
+""" % (sys.argv[0], dev, bwcap_description, quantum)
+ sys.exit(1)
+
+def main():
+ global dev, quantum, verbose
+
+ # Defaults
+ numeric = False
+ bwcap = get_bwcap()
+
+ (opts, argv) = getopt.getopt(sys.argv[1:], "d:nr:q:vh")
+ for (opt, optval) in opts:
+ if opt == '-d':
+ dev = optval
+ elif opt == '-n':
+ numeric = True
+ elif opt == '-r':
+ bwcap = get_tc_rate(optval)
+ elif opt == '-q':
+ quantum = int(optval)
+ elif opt == '-v':
+ verbose += 1
+ elif opt == '-h':
+ usage()
+
+ if len(argv):
+ if argv[0] == "init" or (argv[0] == "on" and len(argv) == 1):
+ # (Re)initialize
+ init(dev, get_tc_rate(bwcap))
+
+ elif argv[0] == "get" or argv[0] == "show":
+ # Show
+ if len(argv) >= 2:
+ # Show a particular slice
+ xid = get_xid(argv[1])
+ if xid is None:
+ sys.stderr.write("Error: Invalid slice name or context '%s'\n" % argv[1])
+ usage()
+ params = get(xid, dev)
+ if params is None:
+ paramslist = []
+ else:
+ paramslist = [params]
+ else:
+ # Show all slices
+ paramslist = get(None, dev)
+
+ for (xid, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ bytes, exemptbytes) in paramslist:
+ slice = get_slice(xid)
+ if slice is None:
+ # Orphaned (not associated with a slice) class
+ slice = "%d?" % xid
+ if numeric:
+ print "%s %d %d %d %d %d %d %d" % \
+ (slice, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ bytes, exemptbytes)
+ else:
+ print "%s %d %s %s %s %s %d %d" % \
+ (slice, share,
+ format_tc_rate(minrate), format_tc_rate(maxrate),
+ format_tc_rate(minexemptrate), format_tc_rate(maxexemptrate),
+ bytes, exemptbytes)
+
+ elif len(argv) >= 2:
+ # slice, ...
+ xid = get_xid(argv[1])
+ if xid is None:
+ sys.stderr.write("Error: Invalid slice name or context '%s'\n" % argv[1])
+ usage()
+
+ if argv[0] == "on" or argv[0] == "add" or argv[0] == "replace" or argv[0] == "set":
+ # Enable cap
+ args = []
+ if len(argv) >= 3:
+ # ... share, minrate, maxrate, minexemptrate, maxexemptrate
+ casts = [int, get_tc_rate, get_tc_rate, get_tc_rate, get_tc_rate]
+ for i, arg in enumerate(argv[2:]):
+ if i >= len(casts):
+ break
+ if arg == "-":
+ args.append(None)
+ else:
+ args.append(casts[i](arg))
+ on(xid, dev, *args)
+
+ elif argv[0] == "off" or argv[0] == "del":
+ # Disable cap
+ off(xid, dev)
+
+ else:
+ usage()
+
+ else:
+ usage()
+
+
+if __name__ == '__main__':
+ main()