X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=python%2Fbwlimit.py;h=8221e7b91878afaf72680544bfa692a47727a62b;hb=05b5a9ba6c637e5b7ff30a0ea67a9ebb32e8a595;hp=1d2d8cfc381285ed5c359305654cad2011a8b5d3;hpb=3ff044841d967dc6aebb73e43de445951310044d;p=util-vserver.git diff --git a/python/bwlimit.py b/python/bwlimit.py index 1d2d8cf..8221e7b 100644 --- a/python/bwlimit.py +++ b/python/bwlimit.py @@ -46,10 +46,12 @@ # Mark Huang # Copyright (C) 2006 The Trustees of Princeton University # -# $Id: bwlimit.py,v 1.5 2006/02/27 01:58:09 mlhuang Exp $ +# $Id: bwlimit.py,v 1.15 2007/02/07 04:21:11 mlhuang Exp $ # import sys, os, re, getopt +from sets import Set +import pwd # Where the tc binary lives @@ -58,10 +60,6 @@ TC = "/sbin/tc" # Default interface dev = "eth0" -# For backward compatibility, if bwcap is not specified, attempt to -# get it from here. -bwcap_file = "/etc/planetlab/bwcap" - # Verbosity level verbose = 0 @@ -113,37 +111,41 @@ cburst = None # "default" subclass 1:10 that is capped at the node bandwidth cap (in # this example, 5mbit) and the "exempt" subclass 1:20 that is capped # at bwmax (i.e., not capped). The 1:1 parent class exists only to -# make the borrowing model work. All bandwidth is fairly shared, -# subject to the restrictions of the class hierarchy: namely, that the -# total bandwidth to non-exempt destinations should not exceed the -# node bandwidth cap. The root slice has a higher priority (0) than -# the others (1) and can thus request all of the bandwidth of that -# subclass. +# make the borrowing model work. All bandwidth above minimum +# guarantees is fairly shared (in this example, slice 2 is guaranteed +# at least 1mbit in addition to fair access to the rest), subject to +# the restrictions of the class hierarchy: namely, that the total +# bandwidth to non-exempt destinations should not exceed the node +# bandwidth cap. # -# 1: -# | -# 1:1 (1gbit) -# ______________|______________ -# | | -# 1:10 (8bit, 5mbit) 1:20 (8bit, 1gbit) -# | | -# 1:1000 (8bit, 5mbit, 0), 1:2000 (8bit, 1gbit, 0), -# 1:1001 (8bit, 5mbit, 1), 1:2001 (8bit, 1gbit, 1), -# 1:1002 (8bit, 5mbit, 1), 1:2002 (8bit, 1gbit, 1), -# ... ... -# 1:1FFF (8bit, 5mbit, 1) 1:2FFF (8bit, 1gbit, 1) +# 1: +# | +# 1:1 (1gbit) +# ______________|_____________ +# | | +# 1:10 (8bit, 5mbit) 1:20 (8bit, 1gbit) +# | | +# 1:1000 (8bit, 5mbit), 1:2000 (8bit, 1gbit), +# 1:1001 (8bit, 5mbit), 1:2001 (8bit, 1gbit), +# 1:1002 (1mbit, 5mbit), 1:2002 (1mbit, 1gbit), +# ... ... +# 1:1FFF (8bit, 5mbit) 1:2FFF (8bit, 1gbit) # default_minor = 0x1000 exempt_minor = 0x2000 # root_xid is for the root context. The root context is exempt from -# fair sharing in both the default and exempt subclasses.. +# fair sharing in both the default and exempt subclasses. The root +# context gets 5 shares by default. root_xid = 0x0000 +root_share = 5 # default_xid is for unclassifiable packets. Packets should not be # classified here very often. They can be if a slice's HTB classes are -# deleted before its processes are. +# deleted before its processes are. Each slice gets 1 share by +# default. default_xid = 0x0FFF +default_share = 1 # See tc_util.c and http://physics.nist.gov/cuu/Units/binary.html. Be # warned that older versions of tc interpret "kbps", "mbps", "mbit", @@ -173,8 +175,11 @@ suffixes = { } -# Parses an integer or a tc rate string (e.g., 1.5mbit) into bits/second def get_tc_rate(s): + """ + Parses an integer or a tc rate string (e.g., 1.5mbit) into bits/second + """ + if type(s) == int: return s m = re.match(r"([0-9.]+)(\D*)", s) @@ -187,9 +192,14 @@ def get_tc_rate(s): return -1 -# Prints a tc rate string def format_tc_rate(rate): - if rate >= 1000000: + """ + Formats a bits/second rate into a tc rate string + """ + + if rate >= 1000000000 and (rate % 1000000000) == 0: + return "%.0fgbit" % (rate / 1000000000.) + elif rate >= 1000000 and (rate % 1000000) == 0: return "%.0fmbit" % (rate / 1000000.) elif rate >= 1000: return "%.0fkbit" % (rate / 1000.) @@ -197,9 +207,8 @@ def format_tc_rate(rate): return "%.0fbit" % rate -# Parse /etc/planetlab/bwcap. XXX Should get this from the API -# instead. -def get_bwcap(): +# Parse /etc/planetlab/bwcap (or equivalent) +def read_bwcap(bwcap_file): bwcap = bwmax try: fp = open(bwcap_file, "r") @@ -213,42 +222,72 @@ def get_bwcap(): return bwcap -# Get slice xid (500) from slice name ("500" or "princeton_mlh") or -# slice name ("princeton_mlh") from slice xid (500). -def get_slice(xid_or_name): - labels = ['account', 'password', 'uid', 'gid', 'gecos', 'directory', 'shell'] - - for line in file("/etc/passwd"): - # Comment - if line.strip() == '' or line[0] in '#': - continue - # princeton_mlh:x:... - fields = line.strip().split(':') - if len(fields) < len(labels): - continue - # {'account': 'princeton_mlh', 'password': 'x', ...} - pw = dict(zip(labels, fields)) - if xid_or_name == root_xid: - return "root" - if xid_or_name == default_xid: - return "default" - elif xid_or_name == int(pw['uid']): - # Convert xid into name - return pw['account'] - elif pw['uid'] == xid_or_name or pw['account'] == xid_or_name: - # Convert name into xid - return int(pw['uid']) +def get_bwcap(dev = dev): + """ + Get the current (live) value of the node bandwidth cap + """ + + state = tc("-d class show dev %s" % dev) + base_re = re.compile(r"class htb 1:10 parent 1:1 .*ceil ([^ ]+) .*") + base_classes = filter(None, map(base_re.match, state)) + if not base_classes: + return -1 + if len(base_classes) > 1: + raise Exception, "unable to get current bwcap" + return get_tc_rate(base_classes[0].group(1)) + + +def get_slice(xid): + """ + Get slice name ("princeton_mlh") from slice xid (500) + """ + + if xid == root_xid: + return "root" + if xid == default_xid: + return "default" + try: + return pwd.getpwuid(xid).pw_name + except KeyError: + pass return None +def get_xid(slice): + """ + Get slice xid ("princeton_mlh") from slice name ("500" or "princeton_mlh") + """ + + if slice == "root": + return root_xid + if slice == "default": + return default_xid + try: + try: + return int(slice) + except ValueError: + pass + return pwd.getpwnam(slice).pw_uid + except KeyError: + pass + + return None + +def run(cmd, input = None): + """ + Shortcut for running a shell command + """ -# Shortcut for running a tc command -def tc(cmd): try: if verbose: - sys.stderr.write("Executing: " + TC + " " + cmd + "\n") - fileobj = os.popen(TC + " " + cmd, "r") - output = fileobj.readlines() + sys.stderr.write("Executing: " + cmd + "\n") + if input is None: + fileobj = os.popen(cmd, "r") + output = fileobj.readlines() + else: + fileobj = os.popen(cmd, "w") + fileobj.write(input) + output = None if fileobj.close() is None: return output except Exception, e: @@ -256,18 +295,24 @@ def tc(cmd): return None -# (Re)initialize the bandwidth limits on this node -def init(dev = dev, bwcap = None): - if bwcap is None: - # For backward compatibility, if bwcap is not specified, - # attempt to get it from /etc/planetlab/bwcap. - bwcap = get_bwcap() - else: - # Allow bwcap to be specified as a tc rate string - bwcap = get_tc_rate(bwcap) +def tc(cmd): + """ + Shortcut for running a tc command + """ + + return run(TC + " " + cmd) + + +def init(dev = dev, bwcap = bwmax): + """ + (Re)initialize the bandwidth limits on this node + """ - # Save current state (if any) - caps = get(dev = dev) + # Load the module used to manage exempt classes + run("/sbin/modprobe ip_set_iphash") + + # Save current settings + paramslist = get(None, dev) # Delete root qdisc 1: if it exists. This will also automatically # delete any child classes. @@ -303,73 +348,131 @@ def init(dev = dev, bwcap = None): # Set up the root class (and tell VNET what it is). Packets sent # by root end up here and are capped at the node bandwidth # cap. - on(root_xid, dev, prio = 0) - file("/proc/sys/vnet/root_class", "w").write("%d" % ((1 << 16) | default_minor | root_xid)) + #on(root_xid, dev, share = root_share) + #try: + # file("/proc/sys/vnet/root_class", "w").write("%d" % ((1 << 16) | default_minor | root_xid)) + #except: + # pass # Set up the default class. Packets that fail classification end # up here. - on(default_xid, dev) + on(default_xid, dev, share = default_share) - # Reapply bandwidth caps. If the node bandwidth cap is now lower - # than it was before, "ceil" for each class will be lowered. If - # the node bandwidth cap is now higher than it was before, "ceil" - # for each class should be reapplied. - for (xid, share, minrate, maxrate) in caps: - if xid != root_xid and xid != default_xid: - on(xid, dev, share = share, minrate = minrate, maxrate = maxrate) + # Restore old settings + for (xid, share, + minrate, maxrate, + minexemptrate, maxexemptrate, + bytes, exemptbytes) in paramslist: + if xid not in (root_xid, default_xid): + on(xid, dev, share, minrate, maxrate, minexemptrate, maxexemptrate) -# Get the bandwidth limits for a particular slice xid as a tuple (xid, -# share, minrate, maxrate), or all classes as a list of tuples. def get(xid = None, dev = dev): + """ + Get the bandwidth limits and current byte totals for a + particular slice xid as a tuple (xid, share, minrate, maxrate, + minexemptrate, maxexemptrate, bytes, exemptbytes), or all classes + as a list of such tuples. + """ + if xid is None: ret = [] else: ret = None - # class htb 1:1002 parent 1:10 leaf 81b3: prio 1 rate 8bit ceil 5000Kbit burst 1600b cburst 4Kb - for line in tc("-d class show dev %s" % dev): - # Search for child classes of 1:10 - m = re.match(r"class htb 1:([0-9a-f]+) parent 1:10", line) - if m is None: - continue - - # If we are looking for a particular class - classid = int(m.group(1), 16) & default_xid - if xid is not None and xid != classid: - continue - - # Parse share - share = 1 - m = re.search(r"quantum (\d+)", line) - if m is not None: - share = int(m.group(1)) / quantum - - # Parse minrate - minrate = bwmin - m = re.search(r"rate (\w+)", line) - if m is not None: - minrate = get_tc_rate(m.group(1)) + rates = {} + rate = None + + # ... + # class htb 1:1000 parent 1:10 leaf 1000: prio 0 quantum 8000 rate 8bit ceil 10000Kbit ... + # Sent 6851486 bytes 49244 pkt (dropped 0, overlimits 0 requeues 0) + # ... + # class htb 1:2000 parent 1:20 leaf 2000: prio 0 quantum 8000 rate 8bit ceil 1000Mbit ... + # Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + # ... + for line in tc("-s -d class show dev %s" % dev): + # Rate parameter line + params = re.match(r"class htb 1:([0-9a-f]+) parent 1:(10|20)", line) + # Statistics line + stats = re.match(r".* Sent ([0-9]+) bytes", line) + # Another class + ignore = re.match(r"class htb", line) + + if params is not None: + # Which class + if params.group(2) == "10": + min = 'min' + max = 'max' + bytes = 'bytes' + else: + min = 'minexempt' + max = 'maxexempt' + bytes = 'exemptbytes' - # Parse maxrate - maxrate = bwmax - m = re.search(r"ceil (\w+)", line) - if m is not None: - maxrate = get_tc_rate(m.group(1)) + # Slice ID + id = int(params.group(1), 16) & 0x0FFF; - if xid is None: - # Return a list of parameters - ret.append((classid, share, minrate, maxrate)) - else: - # Return the parameters for this class - ret = (classid, share, minrate, maxrate) - break + if rates.has_key(id): + rate = rates[id] + else: + rate = {'id': id} + + # Parse share + rate['share'] = 1 + m = re.search(r"quantum (\d+)", line) + if m is not None: + rate['share'] = int(m.group(1)) / quantum + + # Parse minrate + rate[min] = bwmin + m = re.search(r"rate (\w+)", line) + if m is not None: + rate[min] = get_tc_rate(m.group(1)) + + # Parse maxrate + rate[max] = bwmax + m = re.search(r"ceil (\w+)", line) + if m is not None: + rate[max] = get_tc_rate(m.group(1)) + + # Which statistics to parse + rate['stats'] = bytes + + rates[id] = rate + + elif stats is not None: + if rate is not None: + rate[rate['stats']] = int(stats.group(1)) + + elif ignore is not None: + rate = None + + # Keep parsing until we get everything + if rate is not None and \ + rate.has_key('min') and rate.has_key('minexempt') and \ + rate.has_key('max') and rate.has_key('maxexempt') and \ + rate.has_key('bytes') and rate.has_key('exemptbytes'): + params = (rate['id'], rate['share'], + rate['min'], rate['max'], + rate['minexempt'], rate['maxexempt'], + rate['bytes'], rate['exemptbytes']) + if xid is None: + # Return a list of parameters + ret.append(params) + rate = None + elif xid == rate['id']: + # Return the parameters for this class + ret = params + break return ret -# Apply specified bandwidth limit to the specified slice xid -def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): +def on(xid, dev = dev, share = None, minrate = None, maxrate = None, minexemptrate = None, maxexemptrate = None): + """ + Apply specified bandwidth limit to the specified slice xid + """ + # Get defaults from current state if available cap = get(xid, dev) if cap is not None: @@ -379,19 +482,17 @@ def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): minrate = cap[2] if maxrate is None: maxrate = cap[3] + if minexemptrate is None: + minexemptrate = cap[4] + if maxexemptrate is None: + maxexemptrate = cap[5] # Figure out what the current node bandwidth cap is - bwcap = bwmax - for line in tc("-d class show dev %s" % dev): - # Search for 1:10 - m = re.match(r"class htb 1:10.*ceil (\w+)", line) - if m is not None: - bwcap = get_tc_rate(m.group(1)) - break + bwcap = get_bwcap() # Set defaults if share is None: - share = 1 + share = default_share if minrate is None: minrate = bwmin else: @@ -400,20 +501,39 @@ def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): maxrate = bwcap else: maxrate = get_tc_rate(maxrate) + if minexemptrate is None: + minexemptrate = minrate + else: + minexemptrate = get_tc_rate(minexemptrate) + if maxexemptrate is None: + maxexemptrate = bwmax + else: + maxexemptrate = get_tc_rate(maxexemptrate) # Sanity checks + if maxrate < bwmin: + maxrate = bwmin if maxrate > bwcap: maxrate = bwcap - + if minrate < bwmin: + minrate = bwmin if minrate > maxrate: minrate = maxrate + if maxexemptrate < bwmin: + maxexemptrate = bwmin + if maxexemptrate > bwmax: + maxexemptrate = bwmax + if minexemptrate < bwmin: + minexemptrate = bwmin + if minexemptrate > maxexemptrate: + minexemptrate = maxexemptrate # Set up subclasses for the slice - tc("class replace dev %s parent 1:10 classid 1:%x htb rate %dbit ceil %dbit quantum %d prio %d" % \ - (dev, default_minor | xid, minrate, maxrate, share * quantum, prio)) + tc("class replace dev %s parent 1:10 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \ + (dev, default_minor | xid, minrate, maxrate, share * quantum)) - tc("class replace dev %s parent 1:20 classid 1:%x htb rate %dbit ceil %dbit quantum %d prio %d" % \ - (dev, exempt_minor | xid, minrate, bwmax, share * quantum, prio)) + tc("class replace dev %s parent 1:20 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \ + (dev, exempt_minor | xid, minexemptrate, maxexemptrate, share * quantum)) # Attach a FIFO to each subclass, which helps to throttle back # processes that are sending faster than the token buckets can @@ -425,16 +545,54 @@ def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): (dev, exempt_minor | xid, exempt_minor | xid)) +def set(xid, share = None, minrate = None, maxrate = None, minexemptrate = None, maxexemptrate = None): + on(xid = xid, share = share, + minrate = minrate, maxrate = maxrate, + minexemptrate = minexemptrate, maxexemptrate = maxexemptrate) + + # Remove class associated with specified slice xid. If further packets # are seen from this slice, they will be classified into the default # class 1:1FFF. def off(xid, dev = dev): - tc("class del dev %s classid 1:%x" % (dev, default_minor | xid)) - tc("class del dev %s classid 1:%x" % (dev, exempt_minor | xid)) + """ + Remove class associated with specified slice xid. If further + packets are seen from this slice, they will be classified into the + default class 1:1FFF. + """ + + cap = get(xid, dev) + if cap is not None: + tc("class del dev %s classid 1:%x" % (dev, default_minor | xid)) + tc("class del dev %s classid 1:%x" % (dev, exempt_minor | xid)) + + +def exempt_init(group_name, node_ips): + """ + Initialize the list of destinations exempt from the node bandwidth + (burst) cap. + """ + + # Clean up + iptables = "/sbin/iptables -t MANGLE %s POSTROUTING" + run(iptables % "-F") + run("/sbin/ipset -X " + group_name) + + # Create a hashed IP set of all of these destinations + lines = ["-N %s iphash" % group_name] + add_cmd = "-A %s " % group_name + lines += [(add_cmd + ip) for ip in node_ips] + lines += ["COMMIT"] + restore = "\n".join(lines) + "\n" + run("/sbin/ipset -R", restore) + + # Add rule to match on destination IP set + run((iptables + " -m set --set %s dst -j CLASSIFY --set-class 1:%x") % + ("-A", group_name, exempt_minor)) def usage(): - bwcap_description = format_tc_rate(bwmax) + bwcap_description = format_tc_rate(get_bwcap()) print """ Usage: @@ -445,23 +603,21 @@ Options: -d device Network interface (default: %s) -r rate Node bandwidth cap (default: %s) -q quantum Share multiplier (default: %d bytes) + -n Print rates in numeric bits per second + -v Enable verbose debug messages -h This message Commands: init - (Re)initialize bandwidth caps. - on slice [share] [minrate] [maxrate] - Set bandwidth cap for the specified slice + (Re)initialize all bandwidth parameters + on slice [share|-] [minrate|-] [maxrate|-] [minexemptrate|-] [maxexemptrate|-] + Set bandwidth parameter(s) for the specified slice off slice - Remove bandwidth caps for the specified slice + Remove all bandwidth parameters for the specified slice get - Get all bandwidth caps + Get all bandwidth parameters for all slices get slice - Get bandwidth caps for the specified slice - getcap slice - Get maxrate for the specified slice - setcap slice maxrate - Set maxrate for the specified slice + Get bandwidth parameters for the specified slice """ % (sys.argv[0], dev, bwcap_description, quantum) sys.exit(1) @@ -470,12 +626,15 @@ def main(): global dev, quantum, verbose # Defaults + numeric = False bwcap = get_bwcap() - (opts, argv) = getopt.getopt(sys.argv[1:], "f:d:r:g:q:vh") + (opts, argv) = getopt.getopt(sys.argv[1:], "d:nr:q:vh") for (opt, optval) in opts: if opt == '-d': dev = optval + elif opt == '-n': + numeric = True elif opt == '-r': bwcap = get_tc_rate(optval) elif opt == '-q': @@ -488,68 +647,72 @@ def main(): if len(argv): if argv[0] == "init" or (argv[0] == "on" and len(argv) == 1): # (Re)initialize - init(dev, bwcap) + init(dev, get_tc_rate(bwcap)) elif argv[0] == "get" or argv[0] == "show": # Show if len(argv) >= 2: # Show a particular slice - xid = get_slice(argv[1]) + xid = get_xid(argv[1]) if xid is None: sys.stderr.write("Error: Invalid slice name or context '%s'\n" % argv[1]) usage() - caps = [get(xid, dev)] + params = get(xid, dev) + if params is None: + paramslist = [] + else: + paramslist = [params] else: # Show all slices - caps = get(None, dev) + paramslist = get(None, dev) - for (xid, share, minrate, maxrate) in caps: + for (xid, share, + minrate, maxrate, + minexemptrate, maxexemptrate, + bytes, exemptbytes) in paramslist: slice = get_slice(xid) if slice is None: # Orphaned (not associated with a slice) class slice = "%d?" % xid - print "%s: share %d minrate %s maxrate %s" % \ - (slice, share, format_tc_rate(minrate), format_tc_rate(maxrate)) + if numeric: + print "%s %d %d %d %d %d %d %d" % \ + (slice, share, + minrate, maxrate, + minexemptrate, maxexemptrate, + bytes, exemptbytes) + else: + print "%s %d %s %s %s %s %d %d" % \ + (slice, share, + format_tc_rate(minrate), format_tc_rate(maxrate), + format_tc_rate(minexemptrate), format_tc_rate(maxexemptrate), + bytes, exemptbytes) elif len(argv) >= 2: # slice, ... - xid = get_slice(argv[1]) + xid = get_xid(argv[1]) if xid is None: sys.stderr.write("Error: Invalid slice name or context '%s'\n" % argv[1]) usage() - if argv[0] == "on" or argv[0] == "add" or argv[0] == "replace": + if argv[0] == "on" or argv[0] == "add" or argv[0] == "replace" or argv[0] == "set": # Enable cap args = [] if len(argv) >= 3: - # ... share, minrate, maxrate - casts = [int, get_tc_rate, get_tc_rate] + # ... share, minrate, maxrate, minexemptrate, maxexemptrate + casts = [int, get_tc_rate, get_tc_rate, get_tc_rate, get_tc_rate] for i, arg in enumerate(argv[2:]): if i >= len(casts): break - args.append(casts[i](arg)) + if arg == "-": + args.append(None) + else: + args.append(casts[i](arg)) on(xid, dev, *args) elif argv[0] == "off" or argv[0] == "del": # Disable cap off(xid, dev) - # Backward compatibility with old resman script - elif argv[0] == "getcap": - # Get maxrate - cap = get(xid, dev) - if cap is not None: - (xid, share, minrate, maxrate) = cap - print format_tc_rate(maxrate) - - # Backward compatibility with old resman script - elif argv[0] == "setcap": - if len(argv) >= 3: - # Set maxrate - on(xid, dev, maxrate = get_tc_rate(argv[2])) - else: - usage() - else: usage()