X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=python%2Fbwlimit.py;h=fbe825fbb99f3a9e9d90df03966362411357571a;hb=e4fc09d0b138c3bee000dce9fd9fd73dad3d283d;hp=1d2d8cfc381285ed5c359305654cad2011a8b5d3;hpb=3ff044841d967dc6aebb73e43de445951310044d;p=util-vserver.git diff --git a/python/bwlimit.py b/python/bwlimit.py index 1d2d8cf..fbe825f 100644 --- a/python/bwlimit.py +++ b/python/bwlimit.py @@ -46,10 +46,12 @@ # Mark Huang # Copyright (C) 2006 The Trustees of Princeton University # -# $Id: bwlimit.py,v 1.5 2006/02/27 01:58:09 mlhuang Exp $ +# $Id: bwlimit.py,v 1.10 2006/03/14 22:57:50 smuir Exp $ # import sys, os, re, getopt +from sets import Set +import pwd # Where the tc binary lives @@ -58,10 +60,6 @@ TC = "/sbin/tc" # Default interface dev = "eth0" -# For backward compatibility, if bwcap is not specified, attempt to -# get it from here. -bwcap_file = "/etc/planetlab/bwcap" - # Verbosity level verbose = 0 @@ -113,37 +111,41 @@ cburst = None # "default" subclass 1:10 that is capped at the node bandwidth cap (in # this example, 5mbit) and the "exempt" subclass 1:20 that is capped # at bwmax (i.e., not capped). The 1:1 parent class exists only to -# make the borrowing model work. All bandwidth is fairly shared, -# subject to the restrictions of the class hierarchy: namely, that the -# total bandwidth to non-exempt destinations should not exceed the -# node bandwidth cap. The root slice has a higher priority (0) than -# the others (1) and can thus request all of the bandwidth of that -# subclass. +# make the borrowing model work. All bandwidth above minimum +# guarantees is fairly shared (in this example, slice 2 is guaranteed +# at least 1mbit in addition to fair access to the rest), subject to +# the restrictions of the class hierarchy: namely, that the total +# bandwidth to non-exempt destinations should not exceed the node +# bandwidth cap. # -# 1: -# | -# 1:1 (1gbit) -# ______________|______________ -# | | -# 1:10 (8bit, 5mbit) 1:20 (8bit, 1gbit) -# | | -# 1:1000 (8bit, 5mbit, 0), 1:2000 (8bit, 1gbit, 0), -# 1:1001 (8bit, 5mbit, 1), 1:2001 (8bit, 1gbit, 1), -# 1:1002 (8bit, 5mbit, 1), 1:2002 (8bit, 1gbit, 1), -# ... ... -# 1:1FFF (8bit, 5mbit, 1) 1:2FFF (8bit, 1gbit, 1) +# 1: +# | +# 1:1 (1gbit) +# ______________|_____________ +# | | +# 1:10 (8bit, 5mbit) 1:20 (8bit, 1gbit) +# | | +# 1:1000 (8bit, 5mbit), 1:2000 (8bit, 1gbit), +# 1:1001 (8bit, 5mbit), 1:2001 (8bit, 1gbit), +# 1:1002 (1mbit, 5mbit), 1:2002 (1mbit, 1gbit), +# ... ... +# 1:1FFF (8bit, 5mbit) 1:2FFF (8bit, 1gbit) # default_minor = 0x1000 exempt_minor = 0x2000 # root_xid is for the root context. The root context is exempt from -# fair sharing in both the default and exempt subclasses.. +# fair sharing in both the default and exempt subclasses. The root +# context gets 5 shares by default. root_xid = 0x0000 +root_share = 5 # default_xid is for unclassifiable packets. Packets should not be # classified here very often. They can be if a slice's HTB classes are -# deleted before its processes are. +# deleted before its processes are. Each slice gets 1 share by +# default. default_xid = 0x0FFF +default_share = 1 # See tc_util.c and http://physics.nist.gov/cuu/Units/binary.html. Be # warned that older versions of tc interpret "kbps", "mbps", "mbit", @@ -197,9 +199,8 @@ def format_tc_rate(rate): return "%.0fbit" % rate -# Parse /etc/planetlab/bwcap. XXX Should get this from the API -# instead. -def get_bwcap(): +# Parse /etc/planetlab/bwcap (or equivalent) +def read_bwcap(bwcap_file): bwcap = bwmax try: fp = open(bwcap_file, "r") @@ -213,42 +214,57 @@ def get_bwcap(): return bwcap +# Get current (live) value of bwcap +def get_bwcap(dev = dev): + + state = tc("-d class show dev %s" % dev) + base_re = re.compile(r"class htb 1:10 parent 1:1 .*ceil ([^ ]+) .*") + base_classes = filter(None, map(base_re.match, state)) + if not base_classes: + return -1 + if len(base_classes) > 1: + raise Exception, "unable to get current bwcap" + return get_tc_rate(base_classes[0].group(1)) + + # Get slice xid (500) from slice name ("500" or "princeton_mlh") or # slice name ("princeton_mlh") from slice xid (500). def get_slice(xid_or_name): - labels = ['account', 'password', 'uid', 'gid', 'gecos', 'directory', 'shell'] - for line in file("/etc/passwd"): - # Comment - if line.strip() == '' or line[0] in '#': - continue - # princeton_mlh:x:... - fields = line.strip().split(':') - if len(fields) < len(labels): - continue - # {'account': 'princeton_mlh', 'password': 'x', ...} - pw = dict(zip(labels, fields)) - if xid_or_name == root_xid: - return "root" - if xid_or_name == default_xid: - return "default" - elif xid_or_name == int(pw['uid']): - # Convert xid into name - return pw['account'] - elif pw['uid'] == xid_or_name or pw['account'] == xid_or_name: - # Convert name into xid - return int(pw['uid']) + if xid_or_name == root_xid: + return "root" + if xid_or_name == default_xid: + return "default" + if isinstance(xid_or_name, (int, long)): + try: + return pwd.getpwuid(xid_or_name).pw_name + except KeyError: + pass + else: + try: + try: + return int(xid_or_name) + except ValueError: + pass + return pwd.getpwnam(xid_or_name).pw_uid + except KeyError: + pass return None -# Shortcut for running a tc command -def tc(cmd): +# Shortcut for running a command +def run(cmd, input = None): try: if verbose: - sys.stderr.write("Executing: " + TC + " " + cmd + "\n") - fileobj = os.popen(TC + " " + cmd, "r") - output = fileobj.readlines() + sys.stderr.write("Executing: " + cmd + "\n") + if input is None: + fileobj = os.popen(cmd, "r") + output = fileobj.readlines() + else: + fileobj = os.popen(cmd, "w") + fileobj.write(input) + output = None if fileobj.close() is None: return output except Exception, e: @@ -256,18 +272,16 @@ def tc(cmd): return None +# Shortcut for running a tc command +def tc(cmd): + return run(TC + " " + cmd) + + # (Re)initialize the bandwidth limits on this node -def init(dev = dev, bwcap = None): - if bwcap is None: - # For backward compatibility, if bwcap is not specified, - # attempt to get it from /etc/planetlab/bwcap. - bwcap = get_bwcap() - else: - # Allow bwcap to be specified as a tc rate string - bwcap = get_tc_rate(bwcap) +def init(dev, bwcap): - # Save current state (if any) - caps = get(dev = dev) + # load the module used to manage exempt classes + run("/sbin/modprobe ip_set_iphash") # Delete root qdisc 1: if it exists. This will also automatically # delete any child classes. @@ -303,20 +317,12 @@ def init(dev = dev, bwcap = None): # Set up the root class (and tell VNET what it is). Packets sent # by root end up here and are capped at the node bandwidth # cap. - on(root_xid, dev, prio = 0) + on(root_xid, dev, share = root_share) file("/proc/sys/vnet/root_class", "w").write("%d" % ((1 << 16) | default_minor | root_xid)) # Set up the default class. Packets that fail classification end # up here. - on(default_xid, dev) - - # Reapply bandwidth caps. If the node bandwidth cap is now lower - # than it was before, "ceil" for each class will be lowered. If - # the node bandwidth cap is now higher than it was before, "ceil" - # for each class should be reapplied. - for (xid, share, minrate, maxrate) in caps: - if xid != root_xid and xid != default_xid: - on(xid, dev, share = share, minrate = minrate, maxrate = maxrate) + on(default_xid, dev, share = default_share) # Get the bandwidth limits for a particular slice xid as a tuple (xid, @@ -369,7 +375,7 @@ def get(xid = None, dev = dev): # Apply specified bandwidth limit to the specified slice xid -def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): +def on(xid, dev = dev, share = None, minrate = None, maxrate = None): # Get defaults from current state if available cap = get(xid, dev) if cap is not None: @@ -391,7 +397,7 @@ def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): # Set defaults if share is None: - share = 1 + share = default_share if minrate is None: minrate = bwmin else: @@ -404,16 +410,15 @@ def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): # Sanity checks if maxrate > bwcap: maxrate = bwcap - if minrate > maxrate: minrate = maxrate # Set up subclasses for the slice - tc("class replace dev %s parent 1:10 classid 1:%x htb rate %dbit ceil %dbit quantum %d prio %d" % \ - (dev, default_minor | xid, minrate, maxrate, share * quantum, prio)) + tc("class replace dev %s parent 1:10 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \ + (dev, default_minor | xid, minrate, maxrate, share * quantum)) - tc("class replace dev %s parent 1:20 classid 1:%x htb rate %dbit ceil %dbit quantum %d prio %d" % \ - (dev, exempt_minor | xid, minrate, bwmax, share * quantum, prio)) + tc("class replace dev %s parent 1:20 classid 1:%x htb rate %dbit ceil %dbit quantum %d" % \ + (dev, exempt_minor | xid, minrate, bwmax, share * quantum)) # Attach a FIFO to each subclass, which helps to throttle back # processes that are sending faster than the token buckets can @@ -429,12 +434,34 @@ def on(xid, dev = dev, share = None, minrate = None, maxrate = None, prio = 1): # are seen from this slice, they will be classified into the default # class 1:1FFF. def off(xid, dev = dev): - tc("class del dev %s classid 1:%x" % (dev, default_minor | xid)) - tc("class del dev %s classid 1:%x" % (dev, exempt_minor | xid)) + cap = get(xid, dev) + if cap is not None: + tc("class del dev %s classid 1:%x" % (dev, default_minor | xid)) + tc("class del dev %s classid 1:%x" % (dev, exempt_minor | xid)) + + +def exempt_init(group_name, node_ips): + + # Clean up + iptables = "/sbin/iptables -t vnet %s POSTROUTING" + run(iptables % "-F") + run("/sbin/ipset -X " + group_name) + + # Create a hashed IP set of all of these destinations + lines = ["-N %s iphash" % group_name] + add_cmd = "-A %s " % group_name + lines += [(add_cmd + ip) for ip in node_ips] + lines += ["COMMIT"] + restore = "\n".join(lines) + "\n" + run("/sbin/ipset -R", restore) + + # Add rule to match on destination IP set + run((iptables + " -m set --set %s dst -j CLASSIFY --set-class 1:%x") % + ("-A", group_name, exempt_minor)) def usage(): - bwcap_description = format_tc_rate(bwmax) + bwcap_description = format_tc_rate(get_bwcap()) print """ Usage: @@ -488,7 +515,7 @@ def main(): if len(argv): if argv[0] == "init" or (argv[0] == "on" and len(argv) == 1): # (Re)initialize - init(dev, bwcap) + init(dev, get_tc_rate(bwcap)) elif argv[0] == "get" or argv[0] == "show": # Show @@ -508,7 +535,7 @@ def main(): if slice is None: # Orphaned (not associated with a slice) class slice = "%d?" % xid - print "%s: share %d minrate %s maxrate %s" % \ + print "%s %d %s %s" % \ (slice, share, format_tc_rate(minrate), format_tc_rate(maxrate)) elif len(argv) >= 2: