X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bwmon.py;h=882b8f4ef8149fffc03b494dd2ced70a3f287a5a;hb=refs%2Fheads%2F1.5;hp=68a118caab4d5fe879f5c7eb7d01ee10c2cf695a;hpb=f211fe036f17f4758df731fc6d8dbf3d64ca96ac;p=nodemanager.git diff --git a/bwmon.py b/bwmon.py index 68a118c..882b8f4 100644 --- a/bwmon.py +++ b/bwmon.py @@ -15,21 +15,24 @@ # Faiyaz Ahmed # Copyright (C) 2004-2006 The Trustees of Princeton University # -# $Id$ +# $Id: bwmon.py,v 1.1.2.11 2007/06/26 18:03:55 faiyaza Exp $ # import os import sys import time import pickle -import database - import socket -#import xmlrpclib -import bwlimit import logger +import copy +import threading +import tools + +import bwlimit +import database from sets import Set + try: sys.path.append("/etc/planetlab") from plc_config import * @@ -40,17 +43,13 @@ except: PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org" PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org" - -# Utility functions -#from pl_mom import * - # Constants seconds_per_day = 24 * 60 * 60 bits_per_byte = 8 # Defaults debug = False -verbose = 0 +verbose = False datafile = "/var/lib/misc/bwmon.dat" #nm = None @@ -184,14 +183,14 @@ class Slice: """ - def __init__(self, xid, name, data): + def __init__(self, xid, name, rspec): self.xid = xid self.name = name self.time = 0 self.bytes = 0 self.i2bytes = 0 self.MaxRate = default_MaxRate - self.MinRate = default_MinRate + self.MinRate = default_MinRate self.Maxi2Rate = default_Maxi2Rate self.Mini2Rate = default_Mini2Rate self.MaxKByte = default_MaxKByte @@ -199,75 +198,86 @@ class Slice: self.Maxi2KByte = default_Maxi2KByte self.Threshi2KByte = default_Threshi2KByte self.Share = default_Share + self.Sharei2 = default_Share self.emailed = False - self.updateSliceAttributes(data) + self.updateSliceAttributes(rspec) bwlimit.set(xid = self.xid, - minrate = self.MinRate, - maxrate = self.MaxRate, - maxexemptrate = self.Maxi2Rate, - minexemptrate = self.Mini2Rate, + minrate = self.MinRate * 1000, + maxrate = self.MaxRate * 1000, + maxexemptrate = self.Maxi2Rate * 1000, + minexemptrate = self.Mini2Rate * 1000, share = self.Share) - def __repr__(self): return self.name - @database.synchronized - def updateSliceAttributes(self, data): - for sliver in data['slivers']: - if sliver['name'] == self.name: - for attribute in sliver['attributes']: - if attribute['name'] == 'net_min_rate': - self.MinRate = int(attribute['value']) - logger.log("bwmon: Updating %s. Min Rate = %s" \ - %(self.name, self.MinRate)) - elif attribute['name'] == 'net_max_rate': - self.MaxRate = int(attribute['value']) - logger.log("bwmon: Updating %s. Max Rate = %s" \ - %(self.name, self.MaxRate)) - elif attribute['name'] == 'net_i2_min_rate': - self.Mini2Rate = int(attribute['value']) - logger.log("bwmon: Updating %s. Min i2 Rate = %s" \ - %(self.name, self.Mini2Rate)) - elif attribute['name'] == 'net_i2_max_rate': - self.Maxi2Rate = int(attribute['value']) - logger.log("bwmon: Updating %s. Max i2 Rate = %s" \ - %(self.name, self.Maxi2Rate)) - elif attribute['name'] == 'net_max_kbyte': - self.MaxKByte = int(attribute['value']) - logger.log("bwmon: Updating %s. Max KByte lim = %s" \ - %(self.name, self.MaxKByte)) - elif attribute['name'] == 'net_i2_max_kbyte': - self.Maxi2KByte = int(attribute['value']) - logger.log("bwmon: Updating %s. Max i2 KByte = %s" \ - %(self.name, self.Maxi2KByte)) - elif attribute['name'] == 'net_thresh_kbyte': - self.ThreshKByte = int(attribute['value']) - logger.log("bwmon: Updating %s. Thresh KByte = %s" \ - %(self.name, self.ThreshKByte)) - elif attribute['name'] == 'net_i2_thresh_kbyte': - self.Threshi2KByte = int(attribute['value']) - logger.log("bwmon: Updating %s. i2 Thresh KByte = %s" \ - %(self.name, self.Threshi2KByte)) - elif attribute['name'] == 'net_share': - self.Share = int(attribute['value']) - logger.log("bwmon: Updating %s. Net Share = %s" \ - %(self.name, self.Share)) - elif attribute['name'] == 'net_i2_share': - self.Sharei2 = int(attribute['value']) - logger.log("bwmon: Updating %s. Net i2 Share = %s" \ - %(self.name, self.i2Share)) - - - def reset(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, data): + def updateSliceAttributes(self, rspec): + # Get attributes + + # Sanity check plus policy decision for MinRate: + # Minrate cant be greater than 25% of MaxRate or NodeCap. + MinRate = int(rspec.get("net_min_rate", default_MinRate)) + if MinRate > int(.25 * default_MaxRate): + MinRate = int(.25 * default_MaxRate) + if MinRate != self.MinRate: + self.MinRate = MinRate + logger.log("bwmon: Updating %s: Min Rate = %s" %(self.name, self.MinRate)) + + MaxRate = int(rspec.get('net_max_rate', bwlimit.get_bwcap() / 1000)) + if MaxRate != self.MaxRate: + self.MaxRate = MaxRate + logger.log("bwmon: Updating %s: Max Rate = %s" %(self.name, self.MaxRate)) + + Mini2Rate = int(rspec.get('net_i2_min_rate', default_Mini2Rate)) + if Mini2Rate != self.Mini2Rate: + self.Mini2Rate = Mini2Rate + logger.log("bwmon: Updating %s: Min i2 Rate = %s" %(self.name, self.Mini2Rate)) + + Maxi2Rate = int(rspec.get('net_i2_max_rate', bwlimit.bwmax / 1000)) + if Maxi2Rate != self.Maxi2Rate: + self.Maxi2Rate = Maxi2Rate + logger.log("bwmon: Updating %s: Max i2 Rate = %s" %(self.name, self.Maxi2Rate)) + + MaxKByte = int(rspec.get('net_max_kbyte', default_MaxKByte)) + if MaxKByte != self.MaxKByte: + self.MaxKByte = MaxKByte + logger.log("bwmon: Updating %s: Max KByte lim = %s" %(self.name, self.MaxKByte)) + + Maxi2KByte = int(rspec.get('net_i2_max_kbyte', default_Maxi2KByte)) + if Maxi2KByte != self.Maxi2KByte: + self.Maxi2KByte = Maxi2KByte + logger.log("bwmon: Updating %s: Max i2 KByte = %s" %(self.name, self.Maxi2KByte)) + + ThreshKByte = int(rspec.get('net_thresh_kbyte', default_ThreshKByte)) + if ThreshKByte != self.ThreshKByte: + self.ThreshKByte = ThreshKByte + logger.log("bwmon: Updating %s: Thresh KByte = %s" %(self.name, self.ThreshKByte)) + + Threshi2KByte = int(rspec.get('net_i2_thresh_kbyte', default_Threshi2KByte)) + if Threshi2KByte != self.Threshi2KByte: + self.Threshi2KByte = Threshi2KByte + logger.log("bwmon: Updating %s: i2 Thresh KByte = %s" %(self.name, self.Threshi2KByte)) + + Share = int(rspec.get('net_share', default_Share)) + if Share != self.Share: + self.Share = Share + logger.log("bwmon: Updating %s: Net Share = %s" %(self.name, self.Share)) + + Sharei2 = int(rspec.get('net_i2_share', default_Share)) + if Sharei2 != self.Sharei2: + self.Sharei2 = Sharei2 + logger.log("bwmon: Updating %s: Net i2 Share = %s" %(self.name, self.i2Share)) + + + def reset(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec): """ Begin a new recording period. Remove caps by restoring limits to their default values. """ # Query Node Manager for max rate overrides - self.updateSliceAttributes(data) + self.updateSliceAttributes(rspec) # Reset baseline time self.time = time.time() @@ -293,14 +303,14 @@ class Slice: minexemptrate = self.Mini2Rate * 1000, share = self.Share) - def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, data): + def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec): """ Update byte counts and check if byte limits have been exceeded. """ # Query Node Manager for max rate overrides - self.updateSliceAttributes(data) + self.updateSliceAttributes(rspec) # Prepare message parameters from the template message = "" @@ -311,46 +321,49 @@ class Slice: 'period': format_period(period)} if usedbytes >= (self.bytes + (self.ThreshKByte * 1024)): + if verbose: + logger.log("bwmon: %s over thresh %s" \ + % (self.name, format_bytes(self.ThreshKByte * 1024))) + sum = self.bytes + (self.ThreshKByte * 1024) maxbyte = self.MaxKByte * 1024 bytesused = usedbytes - self.bytes timeused = int(time.time() - self.time) new_maxrate = int(((maxbyte - bytesused) * 8)/(period - timeused)) - if new_maxrate < self.MinRate: - new_maxrate = self.MinRate + if new_maxrate < (self.MinRate * 1000): + new_maxrate = self.MinRate * 1000 else: new_maxrate = self.MaxRate * 1000 # Format template parameters for low bandwidth message params['class'] = "low bandwidth" params['bytes'] = format_bytes(usedbytes - self.bytes) - params['maxrate'] = bwlimit.format_tc_rate(runningmaxrate) params['limit'] = format_bytes(self.MaxKByte * 1024) + params['thresh'] = format_bytes(self.ThreshKByte * 1024) params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate) if verbose: logger.log("bwmon: %(slice)s %(class)s " \ - "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % \ + "%(bytes)s of %(limit)s max %(thresh)s thresh (%(new_maxrate)s/s maxrate)" % \ params) # Cap low bandwidth burst rate if new_maxrate != runningmaxrate: message += template % params - logger.log("bwmon: %(slice)s %(class)s capped at %(new_maxrate)s/s " % params) + logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params) if usedi2bytes >= (self.i2bytes + (self.Threshi2KByte * 1024)): maxi2byte = self.Maxi2KByte * 1024 i2bytesused = usedi2bytes - self.i2bytes timeused = int(time.time() - self.time) new_maxi2rate = int(((maxi2byte - i2bytesused) * 8)/(period - timeused)) - if new_maxi2rate < self.Mini2Rate: - new_maxi2rate = self.Mini2Rate + if new_maxi2rate < (self.Mini2Rate * 1000): + new_maxi2rate = self.Mini2Rate * 1000 else: new_maxi2rate = self.Maxi2Rate * 1000 # Format template parameters for high bandwidth message params['class'] = "high bandwidth" params['bytes'] = format_bytes(usedi2bytes - self.i2bytes) - params['maxrate'] = bwlimit.format_tc_rate(runningmaxi2rate) params['limit'] = format_bytes(self.Maxi2KByte * 1024) params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate) @@ -377,7 +390,43 @@ class Slice: self.emailed = True slicemail(self.name, subject, message + (footer % params)) -def GetSlivers(data): +def gethtbs(root_xid, default_xid): + """ + Return dict {xid: {*rates}} of running htbs as reported by tc that have names. + Turn off HTBs without names. + """ + livehtbs = {} + for params in bwlimit.get(): + (xid, share, + minrate, maxrate, + minexemptrate, maxexemptrate, + usedbytes, usedi2bytes) = params + + name = bwlimit.get_slice(xid) + + if (name is None) \ + and (xid != root_xid) \ + and (xid != default_xid): + # Orphaned (not associated with a slice) class + name = "%d?" % xid + logger.log("bwmon: Found orphaned HTB %s. Removing." %name) + bwlimit.off(xid) + + livehtbs[xid] = {'share': share, + 'minrate': minrate, + 'maxrate': maxrate, + 'maxexemptrate': maxexemptrate, + 'minexemptrate': minexemptrate, + 'usedbytes': usedbytes, + 'name': name, + 'usedi2bytes': usedi2bytes} + + return livehtbs + +def sync(nmdbcopy): + """ + Syncs tc, db, and bwmon.dat. Then, starts new slices, kills old ones, and updates byte accounts for each running slice. Sends emails and caps those that went over their limit. + """ # Defaults global datafile, \ period, \ @@ -391,115 +440,154 @@ def GetSlivers(data): default_Share,\ verbose - verbose = True # All slices names = [] + # Incase the limits have changed. + default_MaxRate = int(bwlimit.get_bwcap() / 1000) + default_Maxi2Rate = int(bwlimit.bwmax / 1000) + + # Incase default isn't set yet. + if default_MaxRate == -1: + default_MaxRate = 1000000 try: f = open(datafile, "r+") - if verbose: - logger.log("bwmon: Loading %s" % datafile) + logger.log("bwmon: Loading %s" % datafile) (version, slices) = pickle.load(f) f.close() # Check version of data file - if version != "$Id$": + if version != "$Id: bwmon.py,v 1.1.2.11 2007/06/26 18:03:55 faiyaza Exp $": logger.log("bwmon: Not using old version '%s' data file %s" % (version, datafile)) raise Exception except Exception: - version = "$Id$" + version = "$Id: bwmon.py,v 1.1.2.11 2007/06/26 18:03:55 faiyaza Exp $" slices = {} # Get/set special slice IDs root_xid = bwlimit.get_xid("root") default_xid = bwlimit.get_xid("default") + # Since root is required for sanity, its not in the API/plc database, so pass {} + # to use defaults. if root_xid not in slices.keys(): - slices[root_xid] = Slice(root_xid, "root", data) - slices[root_xid].reset(0, 0, 0, 0, data) - + slices[root_xid] = Slice(root_xid, "root", {}) + slices[root_xid].reset(0, 0, 0, 0, {}) + + # Used by bwlimit. pass {} since there is no rspec (like above). if default_xid not in slices.keys(): - slices[default_xid] = Slice(default_xid, "default", data) - slices[default_xid].reset(0, 0, 0, 0, data) + slices[default_xid] = Slice(default_xid, "default", {}) + slices[default_xid].reset(0, 0, 0, 0, {}) live = {} - # Get running slivers. {xid: name} - for sliver in data['slivers']: - live[bwlimit.get_xid(sliver['name'])] = sliver['name'] - - # Setup new slices. - # live.xids - runing.xids = new.xids - newslicesxids = Set(live.keys()) - Set(slices.keys()) - for newslicexid in newslicesxids: - if newslicexid != None: - logger.log("bwmon: New Slice %s" % live[newslicexid]) - slices[newslicexid] = Slice(newslicexid, live[newslicexid], data) - slices[newslicexid].reset(0, 0, 0, 0, data) - else: - logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslicexid]) + # Get running slivers that should be on this node (from plc). {xid: name} + # db keys on name, bwmon keys on xid. db doesnt have xid either. + for plcSliver in nmdbcopy.keys(): + live[bwlimit.get_xid(plcSliver)] = nmdbcopy[plcSliver] + + logger.log("bwmon: Found %s instantiated slices" % live.keys().__len__()) + logger.log("bwmon: Found %s slices in dat file" % slices.values().__len__()) + # Get actual running values from tc. - # Update slice totals and bandwidth. - for params in bwlimit.get(): - (xid, share, - minrate, maxrate, - minexemptrate, maxexemptrate, - usedbytes, usedi2bytes) = params + # Update slice totals and bandwidth. {xid: {values}} + livehtbs = gethtbs(root_xid, default_xid) + logger.log("bwmon: Found %s running HTBs" % livehtbs.keys().__len__()) + + # Get new slices. + # live.xids - runing(slices).xids = new.xids + #newslicesxids = Set(live.keys()) - Set(slices.keys()) + newslicesxids = Set(live.keys()) - Set(livehtbs.keys()) + logger.log("bwmon: Found %s new slices" % newslicesxids.__len__()) + + # Incase we rebooted and need to keep track of already running htbs + norecxids = Set(livehtbs.keys()) - Set(slices.keys()) + logger.log("bwmon: Found %s slices that have htbs but not in dat." % norecxids.__len__()) + # Reset tc counts. + for norecxid in norecxids: + slices[norecxid] = Slice(norecxid, live[norecxid]['name'], live[norecxid]['_rspec']) + slices[norecxid].reset(livehtbs[norecxid]['maxrate'], + livehtbs[norecxid]['maxexemptrate'], + livehtbs[norecxid]['usedbytes'], + livehtbs[norecxid]['usedi2bytes'], + live[norecxid]['_rspec']) - # Ignore root and default buckets + # Setup new slices + for newslice in newslicesxids: + # Delegated slices dont have xids (which are uids) since they haven't been + # instantiated yet. + if newslice != None and live[newslice].has_key('_rspec') == True: + logger.log("bwmon: New Slice %s" % live[newslice]['name']) + # _rspec is the computed rspec: NM retrieved data from PLC, computed loans + # and made a dict of computed values. + slices[newslice] = Slice(newslice, live[newslice]['name'], live[newslice]['_rspec']) + slices[newslice].reset(0, 0, 0, 0, live[newslice]['_rspec']) + else: + logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslice]['name']) + + # Delete dead slices. + # First delete dead slices that exist in the pickle file, but + # aren't instantiated by PLC. + dead = Set(slices.keys()) - Set(live.keys()) + logger.log("bwmon: Found %s dead slices" % (dead.__len__() - 2)) + for xid in dead: if xid == root_xid or xid == default_xid: continue + logger.log("bwmon: removing dead slice %s " % xid) + if slices.has_key(xid): del slices[xid] + if livehtbs.has_key(xid): bwlimit.off(xid) - name = bwlimit.get_slice(xid) - if name is None: - # Orphaned (not associated with a slice) class - name = "%d?" % xid - bwlimit.off(xid) + # Get actual running values from tc since we've added and removed buckets. + # Update slice totals and bandwidth. {xid: {values}} + livehtbs = gethtbs(root_xid, default_xid) + logger.log("bwmon: now %s running HTBs" % livehtbs.keys().__len__()) + for (xid, slice) in slices.iteritems(): # Monitor only the specified slices + if xid == root_xid or xid == default_xid: continue if names and name not in names: continue - #slices is populated from the pickle file - #xid is populated from bwlimit (read from /etc/passwd) - if slices.has_key(xid): - slice = slices[xid] - if time.time() >= (slice.time + period) or \ - usedbytes < slice.bytes or usedi2bytes < slice.i2bytes: - # Reset to defaults every 24 hours or if it appears - # that the byte counters have overflowed (or, more - # likely, the node was restarted or the HTB buckets - # were re-initialized). - slice.reset(maxrate, maxexemptrate, usedbytes, usedi2bytes, data) - else: - # Update byte counts - slice.update(maxrate, maxexemptrate, usedbytes, usedi2bytes, data) + + if (time.time() >= (slice.time + period)) or \ + (livehtbs[xid]['usedbytes'] < slice.bytes) or \ + (livehtbs[xid]['usedi2bytes'] < slice.i2bytes): + # Reset to defaults every 24 hours or if it appears + # that the byte counters have overflowed (or, more + # likely, the node was restarted or the HTB buckets + # were re-initialized). + slice.reset(livehtbs[xid]['maxrate'], \ + livehtbs[xid]['maxexemptrate'], \ + livehtbs[xid]['usedbytes'], \ + livehtbs[xid]['usedi2bytes'], \ + live[xid]['_rspec']) else: - # Just in case. Probably (hopefully) this will never happen. - # New slice, initialize state - if verbose: - logger.log("bwmon: New Slice %s" % name) - slice = slices[xid] = Slice(xid, name, data) - slice.reset(maxrate, maxexemptrate, usedbytes, usedi2bytes, data) - - # Delete dead slices - dead = Set(slices.keys()) - Set(live.keys()) - for xid in dead: - if xid == root_xid or xid == default_xid: - continue - del slices[xid] - bwlimit.off(xid) - - logger.log("bwmon: Saving %s" % datafile) + if debug: logger.log("bwmon: Updating slice %s" % slice.name) + # Update byte counts + slice.update(livehtbs[xid]['maxrate'], \ + livehtbs[xid]['maxexemptrate'], \ + livehtbs[xid]['usedbytes'], \ + livehtbs[xid]['usedi2bytes'], \ + live[xid]['_rspec']) + + logger.log("bwmon: Saving %s slices in %s" % (slices.keys().__len__(),datafile)) f = open(datafile, "w") pickle.dump((version, slices), f) f.close() - -#def GetSlivers(data): -# for sliver in data['slivers']: -# if sliver.has_key('attributes'): -# print sliver -# for attribute in sliver['attributes']: -# if attribute['name'] == "KByteThresh": print attribute['value'] - -def start(options, config): +lock = threading.Event() +def run(): + """When run as a thread, wait for event, lock db, deep copy it, release it, run bwmon.GetSlivers(), then go back to waiting.""" + if debug: logger.log("bwmon: Thread started") + while True: + lock.wait() + if debug: logger.log("bwmon: Event received. Running.") + database.db_lock.acquire() + nmdbcopy = copy.deepcopy(database.db) + database.db_lock.release() + try: sync(nmdbcopy) + except: logger.log_exc() + lock.clear() + +def start(*args): + tools.as_daemon_thread(run) + +def GetSlivers(*args): pass -