X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bwmon.py;h=0af380da57d254c8682b892290ea90d952a2c622;hb=d0885c70dc1fa0fcec2822e7ca0d95ba941be51d;hp=b6f56971d0e664b47f64b05ffaf50c3ca25cf4fe;hpb=dff03b78bcf6bec800955db10c041c462f3f679a;p=mom.git diff --git a/bwmon.py b/bwmon.py index b6f5697..0af380d 100755 --- a/bwmon.py +++ b/bwmon.py @@ -12,9 +12,10 @@ # # Mark Huang # Andy Bavier +# Faiyaz Ahmed # Copyright (C) 2004-2006 The Trustees of Princeton University # -# $Id: bwmon.py,v 1.2 2006/04/28 20:25:19 mlhuang Exp $ +# $Id: bwmon.py,v 1.19 2007/01/08 21:58:13 faiyaza Exp $ # import syslog @@ -43,15 +44,25 @@ verbose = 0 datafile = "/var/lib/misc/bwmon.dat" nm = None +# Burst to line rate (or node cap). Set by NM. default_maxrate = bwlimit.get_bwcap() - default_maxexemptrate = bwlimit.bwmax +# What we cap to when slices break the rules. # 500 Kbit or 5.4 GB per day -default_avgrate = 500000 - +#default_avgrate = 500000 # 1.5 Mbit or 16.4 GB per day -default_avgexemptrate = 1500000 +#default_avgexemptrate = 1500000 + +# 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G +# 5.4 Gbyte per day max allowed transfered per recording period +default_ByteMax = 5798205850 +default_ByteThresh = int(.8 * default_ByteMax) +# 16.4 Gbyte per day max allowed transfered per recording period to I2 +default_ExemptByteMax = 17609365914 +default_ExemptByteThresh = int(.8 * default_ExemptByteMax) + +default_MinRate = 8 # Average over 1 day period = 1 * seconds_per_day @@ -63,11 +74,11 @@ The slice %(slice)s has transmitted more than %(bytes)s from %(hostname)s to %(class)s destinations since %(since)s. -Its maximum %(class)s burst rate will be capped at %(avgrate)s +Its maximum %(class)s burst rate will be capped at %(new_maxrate)s/s until %(until)s. Please reduce the average %(class)s transmission rate -of the slice to %(avgrate)s, or %(limit)s per %(period)s. +of the slice to %(limit)s per %(period)s. """.lstrip() @@ -84,24 +95,89 @@ class Slice: name - slice name time - beginning of recording period in UNIX seconds bytes - low bandwidth bytes transmitted at the beginning of the recording period - exemptbytes - high bandwidth bytes transmitted at the beginning of the recording period - avgrate - average low bandwidth rate to enforce over the recording period - avgexemptrate - average high bandwidth rate to enforce over the recording period + exemptbytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F) + ByteMax - total volume of data allowed + ByteThresh - After thresh, cap node to (maxbyte - bytes)/(time left in period) + ExemptByteMax - Same as above, but for i2. + ExemptByteThresh - i2 ByteThresh + maxrate - max_rate slice attribute. + maxexemptrate - max_exempt_rate slice attribute. + self.emailed = did we email during this recording period + """ def __init__(self, xid, name, maxrate, maxexemptrate, bytes, exemptbytes): self.xid = xid self.name = name + self.time = 0 + self.bytes = 0 + self.exemptbytes = 0 + self.ByteMax = default_ByteMax + self.ByteThresh = default_ByteThresh + self.ExemptByteMax = default_ExemptByteMax + self.ExemptByteThresh = default_ExemptByteThresh + self.maxrate = default_maxrate + self.maxexemptrate = default_maxexemptrate + self.emailed = False + + # Get real values where applicable self.reset(maxrate, maxexemptrate, bytes, exemptbytes) def __repr__(self): return self.name + def updateSliceAttributes(self): + # Query Node Manager for max rate overrides + try: + vals = nm.query(self.name, + [('nm_net_max_rate', self.maxrate), + ('nm_net_max_exempt_rate', self.maxexemptrate), + ("nm_net_max_byte", int(self.ByteMax / 1024)), + ("nm_net_max_exempt_byte", int(self.ExemptByteMax / 1024)), + ("nm_net_max_thresh_byte", int( .8 * self.ByteMax / 1024)), + ("nm_net_max_thresh_exempt_byte", int(.8 * self.ExemptByteMax / 1024)), + ("nm_net_avg_rate", 0), + ("nm_net_avg_exempt_rate", 0)]) + + (self.maxrate, + self.maxexemptrate, + ByteMax, + ExemptByteMax, + ByteThresh, + ExemptByteThresh, + avgrate, + avgexemptrate) = vals + + # The shitty bit. Gotta bias the limits so as not to overflow xmlrpc + self.ByteMax = ByteMax * 1024 + self.ByteThresh = ByteThresh * 1024 + self.ExemptByteMax = ExemptByteMax * 1024 + self.ExemptByteThresh = ExemptByteThresh * 1024 + + # The hack here is that when i pass 0 to the xmlrpc request to NM, + # for rate limits and it comes back non zero, then screw the byte limits. + # Mult by the period and recompute the byte limits. The thought is + # If/when PLC switches to byte limits, the avgrates wont be used as + # slice attributes and will return as 0 + if (avgrate != 0): + self.ByteMax = int(avgrate * period / 8) + self.ByteThresh = int(self.ByteMax * .8) + + if (avgexemptrate != 0): + self.ExemptByteMax = int(avgexemptrate * period / 8) + self.ExemptByteThresh = int(self.ExemptByteMax * .8) + + except Exception, err: + print "Warning: Exception received while querying NM:", err + def reset(self, maxrate, maxexemptrate, bytes, exemptbytes): """ Begin a new recording period. Remove caps by restoring limits to their default values. """ + + # Query Node Manager for max rate overrides + self.updateSliceAttributes() # Reset baseline time self.time = time.time() @@ -110,42 +186,25 @@ class Slice: self.bytes = bytes self.exemptbytes = exemptbytes - # Query Node Manager for max rate overrides - (new_maxrate, new_maxexemptrate) = nm.query(self.name, ['nm_net_max_rate', 'nm_net_max_exempt_rate']) - if new_maxrate is not None: - new_maxrate *= 1000 - else: - new_maxrate = default_maxrate - if new_maxexemptrate is not None: - new_maxexemptrate *= 1000 - else: - new_maxexemptrate = default_maxexemptrate + # Reset email + self.emailed = False - if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate: + if (self.maxrate != maxrate) or (self.maxexemptrate != maxexemptrate): print "%s reset to %s/%s" % \ (self.name, - bwlimit.format_tc_rate(new_maxrate), - bwlimit.format_tc_rate(new_maxexemptrate)) - bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate) + bwlimit.format_tc_rate(self.maxrate), + bwlimit.format_tc_rate(self.maxexemptrate)) + bwlimit.set(xid = self.xid, maxrate = self.maxrate, maxexemptrate = self.maxexemptrate) def update(self, maxrate, maxexemptrate, bytes, exemptbytes): """ - Update byte counts and check if average rates have been - exceeded. In the worst case (instantaneous usage of the entire - average daily byte limit at the beginning of the recording - period), the slice will be immediately capped and will get to - send twice the average daily byte limit. In the common case, - it will get to send slightly more than the average daily byte - limit. + Update byte counts and check if byte limits have been + exceeded. """ - - # Query Node Manager for max average rate overrides - (self.avgrate, self.avgexemptrate) = nm.query(self.name, ['nm_net_avg_rate', 'nm_net_avg_exempt_rate']) - if self.avgrate is None: - self.avgrate = default_avgrate - if self.avgexemptrate is None: - self.avgexemptrate = default_avgexemptrate - + + # Query Node Manager for max rate overrides + self.updateSliceAttributes() + # Prepare message parameters from the template message = "" params = {'slice': self.name, 'hostname': socket.gethostname(), @@ -154,10 +213,11 @@ class Slice: 'date': time.asctime(time.gmtime()) + " GMT", 'period': format_period(period)} - bytelimit = self.avgrate * period / bits_per_byte - if bytes >= (self.bytes + bytelimit) and \ - maxrate > self.avgrate: - new_maxrate = self.avgrate + if bytes >= (self.bytes + self.ByteThresh): + new_maxrate = \ + int(((self.ByteMax - (bytes - self.bytes)) * 8)/(period - int(time.time() - self.time))) + if new_maxrate < default_MinRate: + new_maxrate = default_MinRate else: new_maxrate = maxrate @@ -165,23 +225,24 @@ class Slice: params['class'] = "low bandwidth" params['bytes'] = format_bytes(bytes - self.bytes) params['maxrate'] = bwlimit.format_tc_rate(maxrate) - params['limit'] = format_bytes(bytelimit) - params['avgrate'] = bwlimit.format_tc_rate(self.avgrate) + params['limit'] = format_bytes(self.ByteMax) + params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate) if verbose: print "%(slice)s %(class)s " \ - "%(bytes)s/%(limit)s (%(maxrate)s/%(avgrate)s)" % \ + "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % \ params # Cap low bandwidth burst rate if new_maxrate != maxrate: message += template % params - print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params - - exemptbytelimit = self.avgexemptrate * period / bits_per_byte - if exemptbytes >= (self.exemptbytes + exemptbytelimit) and \ - maxexemptrate > self.avgexemptrate: - new_maxexemptrate = self.avgexemptrate + print "%(slice)s %(class)s capped at %(new_maxrate)s/s " % params + + if exemptbytes >= (self.exemptbytes + self.ExemptByteThresh): + new_maxexemptrate = \ + int(((self.ExemptByteMax - (self.bytes - bytes)) * 8)/(period - int(time.time() - self.time))) + if new_maxexemptrate < default_MinRate: + new_maxexemptrate = default_MinRate else: new_maxexemptrate = maxexemptrate @@ -189,43 +250,45 @@ class Slice: params['class'] = "high bandwidth" params['bytes'] = format_bytes(exemptbytes - self.exemptbytes) params['maxrate'] = bwlimit.format_tc_rate(maxexemptrate) - params['limit'] = format_bytes(exemptbytelimit) - params['avgrate'] = bwlimit.format_tc_rate(self.avgexemptrate) + params['limit'] = format_bytes(self.ExemptByteMax) + params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxexemptrate) if verbose: print "%(slice)s %(class)s " \ - "%(bytes)s/%(limit)s (%(maxrate)s/%(avgrate)s)" % \ - params + "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % params # Cap high bandwidth burst rate if new_maxexemptrate != maxexemptrate: message += template % params - print "%(slice)s %(class)s capped at %(avgrate)s (%(bytes)s/%(limit)s)" % params + print "%(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params # Apply parameters if new_maxrate != maxrate or new_maxexemptrate != maxexemptrate: bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxexemptrate) # Notify slice - if message: + if message and self.emailed == False: subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params if debug: print subject print message + (footer % params) else: + self.emailed = True slicemail(self.name, subject, message + (footer % params)) + + def usage(): print """ Usage: %s [OPTIONS]... Options: - -d, --debug Enable debugging (default: %s) - -v, --verbose Increase verbosity level (default: %d) - -f, --file=FILE Data file (default: %s) - -s, --slice=SLICE Constrain monitoring to these slices (default: all) + -d, --debug Enable debugging (default: %s) + -v, --verbose Increase verbosity level (default: %d) + -f, --file=FILE Data file (default: %s) + -s, --slice=SLICE Constrain monitoring to these slices (default: all) -p, --period=SECONDS Interval in seconds over which to enforce average byte limits (default: %s) - -h, --help This message + -h, --help This message """.lstrip() % (sys.argv[0], debug, verbose, datafile, format_period(period)) def main(): @@ -273,21 +336,22 @@ def main(): (version, slices) = pickle.load(f) f.close() # Check version of data file - if version != "$Id: bwmon.py,v 1.2 2006/04/28 20:25:19 mlhuang Exp $": + if version != "$Id: bwmon.py,v 1.19 2007/01/08 21:58:13 faiyaza Exp $": print "Not using old version '%s' data file %s" % (version, datafile) raise Exception except Exception: - version = "$Id: bwmon.py,v 1.2 2006/04/28 20:25:19 mlhuang Exp $" + version = "$Id: bwmon.py,v 1.19 2007/01/08 21:58:13 faiyaza Exp $" slices = {} # Get special slice IDs root_xid = bwlimit.get_xid("root") default_xid = bwlimit.get_xid("default") - # Open connection to Node Manager + #Open connection to Node Manager. Global. nm = NM() live = [] + # Get actuall running values from tc. for params in bwlimit.get(): (xid, share, minrate, maxrate, @@ -307,7 +371,8 @@ def main(): # Monitor only the specified slices if names and name not in names: continue - + #slices is populated from the pickle file + #xid is populated from bwlimit (read from /etc/passwd) if slices.has_key(xid): slice = slices[xid] if time.time() >= (slice.time + period) or \