X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bwmon.py;h=9d73d8ca53ad31972e6fdc127328280805e022e8;hb=7e835300a5be01f36b18a76996b40c30fd6cabdb;hp=aba96e3d19766750ec504247c3d6e0ee72288023;hpb=d29fc597904405ef4ad27cf8af852e3c81191b3b;p=nodemanager.git diff --git a/bwmon.py b/bwmon.py index aba96e3..9d73d8c 100644 --- a/bwmon.py +++ b/bwmon.py @@ -32,32 +32,25 @@ import database from sets import Set +# Defaults +debug = False +verbose = False +datafile = "/var/lib/misc/bwmon.dat" + try: sys.path.append("/etc/planetlab") from plc_config import * except: - logger.log("bwmon: Warning: Configuration file /etc/planetlab/plc_config.py not found") - PLC_NAME = "PlanetLab" - PLC_SLICE_PREFIX = "pl" - PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org" - PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org" + logger.log("bwmon: Warning: Configuration file /etc/planetlab/plc_config.py not found", 2) + logger.log("bwmon: Running in DEBUG mode. Logging to file and not emailing.", 1) # Constants seconds_per_day = 24 * 60 * 60 bits_per_byte = 8 -# Defaults -debug = False -verbose = False -datafile = "/var/lib/misc/bwmon.dat" -#nm = None - # Burst to line rate (or node cap). Set by NM. in KBit/s default_MaxRate = int(bwlimit.get_bwcap() / 1000) default_Maxi2Rate = int(bwlimit.bwmax / 1000) -# Min rate 8 bits/s -default_MinRate = 0 -default_Mini2Rate = 0 # 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G # 5.4 Gbyte per day max allowed transfered per recording period default_MaxKByte = 5662310 @@ -133,11 +126,8 @@ def slicemail(slice, subject, body): sendmail = os.popen("/usr/sbin/sendmail -N never -t -f%s" % PLC_MAIL_SUPPORT_ADDRESS, "w") - # PLC has a separate list for pl_mom messages - if PLC_MAIL_SUPPORT_ADDRESS == "support@planet-lab.org": - to = ["pl-mom@planet-lab.org"] - else: - to = [PLC_MAIL_SUPPORT_ADDRESS] + # Parsed from MyPLC config + to = [PLC_MAIL_MOM_LIST_ADDRESS] if slice is not None and slice != "root": to.append(PLC_MAIL_SLICE_ADDRESS.replace("SLICE", slice)) @@ -193,13 +183,13 @@ class Slice: self.bytes = 0 self.i2bytes = 0 self.MaxRate = default_MaxRate - self.MinRate = default_MinRate + self.MinRate = bwlimit.bwmin / 1000 self.Maxi2Rate = default_Maxi2Rate - self.Mini2Rate = default_Mini2Rate + self.Mini2Rate = bwlimit.bwmin / 1000 self.MaxKByte = default_MaxKByte - self.ThreshKByte = (.8 * self.MaxKByte) + self.ThreshKByte = int(.8 * self.MaxKByte) self.Maxi2KByte = default_Maxi2KByte - self.Threshi2KByte = (.8 * self.Maxi2KByte) + self.Threshi2KByte = int(.8 * self.Maxi2KByte) self.Share = default_Share self.Sharei2 = default_Share self.emailed = False @@ -224,7 +214,7 @@ class Slice: # Sanity check plus policy decision for MinRate: # Minrate cant be greater than 25% of MaxRate or NodeCap. - MinRate = int(rspec.get("net_min_rate", default_MinRate)) + MinRate = int(rspec.get("net_min_rate", bwlimit.bwmin / 1000)) if MinRate > int(.25 * default_MaxRate): MinRate = int(.25 * default_MaxRate) if MinRate != self.MinRate: @@ -236,7 +226,7 @@ class Slice: self.MaxRate = MaxRate logger.log("bwmon: Updating %s: Max Rate = %s" %(self.name, self.MaxRate)) - Mini2Rate = int(rspec.get('net_i2_min_rate', default_Mini2Rate)) + Mini2Rate = int(rspec.get('net_i2_min_rate', bwlimit.bwmin / 1000)) if Mini2Rate != self.Mini2Rate: self.Mini2Rate = Mini2Rate logger.log("bwmon: Updating %s: Min i2 Rate = %s" %(self.name, self.Mini2Rate)) @@ -304,7 +294,7 @@ class Slice: logger.log("bwmon: %s reset to %s/%s" % \ (self.name, bwlimit.format_tc_rate(maxrate), - bwlimit.format_tc_rate(maxi2rate))) + bwlimit.format_tc_rate(maxi2rate)), 1) bwlimit.set(xid = self.xid, minrate = self.MinRate * 1000, maxrate = self.MaxRate * 1000, @@ -356,23 +346,15 @@ class Slice: slicemail(self.name, subject, message + (footer % params)) - def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec): + def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, runningshare, rspec): """ Update byte counts and check if byte thresholds have been - exceeded. If exceeded, cap to remaining bytes in limit over remaining in period. + exceeded. If exceeded, cap to remaining bytes in limit over remaining time in period. Recalculate every time module runs. """ # Query Node Manager for max rate overrides self.updateSliceAttributes(rspec) - - # Prepare message parameters from the template - #message = "" - #params = {'slice': self.name, 'hostname': socket.gethostname(), - # 'since': time.asctime(time.gmtime(self.time)) + " GMT", - # 'until': time.asctime(time.gmtime(self.time + period)) + " GMT", - # 'date': time.asctime(time.gmtime()) + " GMT", - # 'period': format_period(period)} # Check limits. if usedbytes >= (self.bytes + (self.ThreshKByte * 1024)): @@ -391,19 +373,7 @@ class Slice: # Sanity Check new_maxrate = self.MaxRate * 1000 self.capped = False - - ## Format template parameters for low bandwidth message - #params['class'] = "low bandwidth" - #params['bytes'] = format_bytes(usedbytes - self.bytes) - #params['limit'] = format_bytes(self.MaxKByte * 1024) - #params['thresh'] = format_bytes(self.ThreshKByte * 1024) - #params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate) - - # Cap low bandwidth burst rate - #if new_maxrate != runningmaxrate: - # message += template % params - # logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params) - + if usedi2bytes >= (self.i2bytes + (self.Threshi2KByte * 1024)): maxi2byte = self.Maxi2KByte * 1024 i2bytesused = usedi2bytes - self.i2bytes @@ -420,31 +390,18 @@ class Slice: new_maxi2rate = self.Maxi2Rate * 1000 self.capped = False - # Format template parameters for high bandwidth message - #params['class'] = "high bandwidth" - #params['bytes'] = format_bytes(usedi2bytes - self.i2bytes) - #params['limit'] = format_bytes(self.Maxi2KByte * 1024) - #params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate) - - # Cap high bandwidth burst rate - #if new_maxi2rate != runningmaxi2rate: - # message += template % params - # logger.log("bwmon: %(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params) - # Apply parameters - if new_maxrate != runningmaxrate or new_maxi2rate != runningmaxi2rate: - bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxi2rate) + bwlimit.set(xid = self.xid, + minrate = self.MinRate * 1000, + maxrate = new_maxrate, + minexemptrate = self.Mini2Rate * 1000, + maxexemptrate = new_maxi2rate, + share = self.Share) # Notify slice if self.capped == True and self.emailed == False: self.notify(newmaxrate, newmaxexemptrate, usedbytes, usedi2bytes) - # subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params - # if debug: - # logger.log("bwmon: "+ subject) - # logger.log("bwmon: "+ message + (footer % params)) - # else: - # self.emailed = True - # slicemail(self.name, subject, message + (footer % params)) + def gethtbs(root_xid, default_xid): """ @@ -465,7 +422,7 @@ def gethtbs(root_xid, default_xid): and (xid != default_xid): # Orphaned (not associated with a slice) class name = "%d?" % xid - logger.log("bwmon: Found orphaned HTB %s. Removing." %name) + logger.log("bwmon: Found orphaned HTB %s. Removing." %name, 1) bwlimit.off(xid) livehtbs[xid] = {'share': share, @@ -488,11 +445,8 @@ def sync(nmdbcopy): period, \ default_MaxRate, \ default_Maxi2Rate, \ - default_MinRate, \ default_MaxKByte,\ - default_ThreshKByte,\ default_Maxi2KByte,\ - default_Threshi2KByte,\ default_Share,\ verbose @@ -508,7 +462,7 @@ def sync(nmdbcopy): try: f = open(datafile, "r+") - logger.log("bwmon: Loading %s" % datafile) + logger.log("bwmon: Loading %s" % datafile, 2) (version, slices, deaddb) = pickle.load(f) f.close() # Check version of data file @@ -541,25 +495,28 @@ def sync(nmdbcopy): for plcSliver in nmdbcopy.keys(): live[bwlimit.get_xid(plcSliver)] = nmdbcopy[plcSliver] - logger.log("bwmon: Found %s instantiated slices" % live.keys().__len__()) - logger.log("bwmon: Found %s slices in dat file" % slices.values().__len__()) + logger.log("bwmon: Found %s instantiated slices" % live.keys().__len__(), 2) + logger.log("bwmon: Found %s slices in dat file" % slices.values().__len__(), 2) # Get actual running values from tc. # Update slice totals and bandwidth. {xid: {values}} kernelhtbs = gethtbs(root_xid, default_xid) - logger.log("bwmon: Found %s running HTBs" % kernelhtbs.keys().__len__()) + logger.log("bwmon: Found %s running HTBs" % kernelhtbs.keys().__len__(), 2) # The dat file has HTBs for slices, but the HTBs aren't running nohtbslices = Set(slices.keys()) - Set(kernelhtbs.keys()) - logger.log( "bwmon: Found %s slices in dat but not running." % nohtbslices.__len__() ) + logger.log( "bwmon: Found %s slices in dat but not running." % nohtbslices.__len__(), 2) # Reset tc counts. for nohtbslice in nohtbslices: if live.has_key(nohtbslice): slices[nohtbslice].reset( 0, 0, 0, 0, live[nohtbslice]['_rspec'] ) + else: + logger.log("bwmon: Removing abondoned slice %s from dat." % nohtbslice) + del slices[nohtbslice] # The dat file doesnt have HTB for the slice but kern has HTB slicesnodat = Set(kernelhtbs.keys()) - Set(slices.keys()) - logger.log( "bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__() ) + logger.log( "bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__(), 2) for slicenodat in slicesnodat: # But slice is running if live.has_key(slicenodat): @@ -568,12 +525,11 @@ def sync(nmdbcopy): slices[slicenodat] = Slice(slicenodat, live[slicenodat]['name'], live[slicenodat]['_rspec']) - else: bwlimit.off(slicenodat) # Abandoned. it doesnt exist at PLC or the dat # Get new slices. # Slices in GetSlivers but not running HTBs newslicesxids = Set(live.keys()) - Set(kernelhtbs.keys()) - logger.log("bwmon: Found %s new slices" % newslicesxids.__len__()) + logger.log("bwmon: Found %s new slices" % newslicesxids.__len__(), 2) # Setup new slices for newslice in newslicesxids: @@ -604,40 +560,43 @@ def sync(nmdbcopy): deadslice['slice'].Maxi2Rate, deadslice['htb']['usedbytes'], deadslice['htb']['usedi2bytes'], + deadslice['htb']['share'], live[newslice]['_rspec']) # Since the slice has been reinitialed, remove from dead database. - del deaddb[deadslice] + del deaddb[deadslice['slice'].name] else: - logger.log("bwmon Slice %s doesn't have xid. Must be delegated."\ - "Skipping." % live[newslice]['name']) + logger.log("bwmon: Slice %s doesn't have xid. Skipping." % live[newslice]['name']) # Move dead slices that exist in the pickle file, but # aren't instantiated by PLC into the dead dict until # recording period is over. This is to avoid the case where a slice is dynamically created # and destroyed then recreated to get around byte limits. deadxids = Set(slices.keys()) - Set(live.keys()) - logger.log("bwmon: Found %s dead slices" % (deadxids.__len__() - 2)) + logger.log("bwmon: Found %s dead slices" % (deadxids.__len__() - 2), 2) for deadxid in deadxids: if deadxid == root_xid or deadxid == default_xid: continue - logger.log("bwmon: removing dead slice %s " % deadxid) - if slices.has_key(deadxid): + logger.log("bwmon: removing dead slice %s " % deadxid) + if slices.has_key(deadxid) and kernelhtbs.has_key(deadxid): # add slice (by name) to deaddb + logger.log("bwmon: Saving bandwidth totals for %s." % slices[deadxid].name) deaddb[slices[deadxid].name] = {'slice': slices[deadxid], 'htb': kernelhtbs[deadxid]} del slices[deadxid] if kernelhtbs.has_key(deadxid): + logger.log("bwmon: Removing HTB for %s." % deadxid, 2) bwlimit.off(deadxid) - - # Clean up deaddb - for (deadslice, deadhtb) in deaddb.iteritems(): - if (time.time() >= (deadslice.time() + period)): - logger.log("bwmon: Removing dead slice %s from dat." % deadslice.name) - del deaddb[deadslice.name] + + # Clean up deaddb + for deadslice in deaddb.keys(): + if (time.time() >= (deaddb[deadslice]['slice'].time + period)): + logger.log("bwmon: Removing dead slice %s from dat." \ + % deaddb[deadslice]['slice'].name) + del deaddb[deadslice] # Get actual running values from tc since we've added and removed buckets. # Update slice totals and bandwidth. {xid: {values}} kernelhtbs = gethtbs(root_xid, default_xid) - logger.log("bwmon: now %s running HTBs" % kernelhtbs.keys().__len__()) + logger.log("bwmon: now %s running HTBs" % kernelhtbs.keys().__len__(), 2) for (xid, slice) in slices.iteritems(): # Monitor only the specified slices @@ -658,15 +617,16 @@ def sync(nmdbcopy): kernelhtbs[xid]['usedi2bytes'], \ live[xid]['_rspec']) else: - if debug: logger.log("bwmon: Updating slice %s" % slice.name) + logger.log("bwmon: Updating slice %s" % slice.name, 2) # Update byte counts slice.update(kernelhtbs[xid]['maxrate'], \ kernelhtbs[xid]['maxexemptrate'], \ kernelhtbs[xid]['usedbytes'], \ kernelhtbs[xid]['usedi2bytes'], \ + kernelhtbs[xid]['share'], live[xid]['_rspec']) - logger.log("bwmon: Saving %s slices in %s" % (slices.keys().__len__(),datafile)) + logger.log("bwmon: Saving %s slices in %s" % (slices.keys().__len__(),datafile), 2) f = open(datafile, "w") pickle.dump((version, slices, deaddb), f) f.close() @@ -674,10 +634,10 @@ def sync(nmdbcopy): lock = threading.Event() def run(): """When run as a thread, wait for event, lock db, deep copy it, release it, run bwmon.GetSlivers(), then go back to waiting.""" - if debug: logger.log("bwmon: Thread started") + logger.log("bwmon: Thread started", 2) while True: lock.wait() - if debug: logger.log("bwmon: Event received. Running.") + logger.log("bwmon: Event received. Running.", 2) database.db_lock.acquire() nmdbcopy = copy.deepcopy(database.db) database.db_lock.release()