# Faiyaz Ahmed <faiyaza@cs.princeton.edu>
# Copyright (C) 2004-2008 The Trustees of Princeton University
#
-# $Id: bwmon.py,v 1.1.2.11 2007/06/26 18:03:55 faiyaza Exp $
+# $Id$
#
import os
from sets import Set
+# Defaults
+debug = False
+verbose = False
+datafile = "/var/lib/misc/bwmon.dat"
+
try:
sys.path.append("/etc/planetlab")
from plc_config import *
except:
- logger.log("bwmon: Warning: Configuration file /etc/planetlab/plc_config.py not found")
- PLC_NAME = "PlanetLab"
- PLC_SLICE_PREFIX = "pl"
- PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org"
- PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org"
+ logger.log("bwmon: Warning: Configuration file /etc/planetlab/plc_config.py not found", 2)
+ logger.log("bwmon: Running in DEBUG mode. Logging to file and not emailing.", 1)
# Constants
seconds_per_day = 24 * 60 * 60
bits_per_byte = 8
-# Defaults
-debug = True
-verbose = False
-datafile = "/var/lib/misc/bwmon.dat"
-#nm = None
-
# Burst to line rate (or node cap). Set by NM. in KBit/s
default_MaxRate = int(bwlimit.get_bwcap() / 1000)
default_Maxi2Rate = int(bwlimit.bwmax / 1000)
# 5.4 Gbyte per day. 5.4 * 1024 k * 1024M * 1024G
# 5.4 Gbyte per day max allowed transfered per recording period
default_MaxKByte = 5662310
-default_ThreshKByte = int(.8 * default_MaxKByte)
# 16.4 Gbyte per day max allowed transfered per recording period to I2
default_Maxi2KByte = 17196646
-default_Threshi2KByte = int(.8 * default_Maxi2KByte)
# Default share quanta
default_Share = 1
sendmail = os.popen("/usr/sbin/sendmail -N never -t -f%s" % PLC_MAIL_SUPPORT_ADDRESS, "w")
- # PLC has a separate list for pl_mom messages
- if PLC_MAIL_SUPPORT_ADDRESS == "support@planet-lab.org":
- to = ["pl-mom@planet-lab.org"]
- else:
- to = [PLC_MAIL_SUPPORT_ADDRESS]
+ # Parsed from MyPLC config
+ to = [PLC_MAIL_MOM_LIST_ADDRESS]
if slice is not None and slice != "root":
to.append(PLC_MAIL_SLICE_ADDRESS.replace("SLICE", slice))
self.Maxi2Rate = default_Maxi2Rate
self.Mini2Rate = default_Mini2Rate
self.MaxKByte = default_MaxKByte
- self.ThreshKByte = default_ThreshKByte
+ self.ThreshKByte = (.8 * self.MaxKByte)
self.Maxi2KByte = default_Maxi2KByte
- self.Threshi2KByte = default_Threshi2KByte
+ self.Threshi2KByte = (.8 * self.Maxi2KByte)
self.Share = default_Share
self.Sharei2 = default_Share
self.emailed = False
+ self.capped = False
self.updateSliceAttributes(rspec)
bwlimit.set(xid = self.xid,
self.Maxi2KByte = Maxi2KByte
logger.log("bwmon: Updating %s: Max i2 KByte = %s" %(self.name, self.Maxi2KByte))
- ThreshKByte = int(rspec.get('net_thresh_kbyte', default_ThreshKByte))
+ ThreshKByte = int(rspec.get('net_thresh_kbyte', (MaxKByte * .8)))
if ThreshKByte != self.ThreshKByte:
self.ThreshKByte = ThreshKByte
logger.log("bwmon: Updating %s: Thresh KByte = %s" %(self.name, self.ThreshKByte))
- Threshi2KByte = int(rspec.get('net_i2_thresh_kbyte', default_Threshi2KByte))
+ Threshi2KByte = int(rspec.get('net_i2_thresh_kbyte', (Maxi2KByte * .8)))
if Threshi2KByte != self.Threshi2KByte:
self.Threshi2KByte = Threshi2KByte
logger.log("bwmon: Updating %s: i2 Thresh KByte = %s" %(self.name, self.Threshi2KByte))
# Reset email
self.emailed = False
- maxrate = self.MaxRate * 1000
- maxi2rate = self.Maxi2Rate * 1000
+ # Reset flag
+ self.capped = False
# Reset rates.
+ maxrate = self.MaxRate * 1000
+ maxi2rate = self.Maxi2Rate * 1000
if (self.MaxRate != runningmaxrate) or (self.Maxi2Rate != runningmaxi2rate):
logger.log("bwmon: %s reset to %s/%s" % \
(self.name,
bwlimit.format_tc_rate(maxrate),
- bwlimit.format_tc_rate(maxi2rate)))
+ bwlimit.format_tc_rate(maxi2rate)), 1)
bwlimit.set(xid = self.xid,
minrate = self.MinRate * 1000,
maxrate = self.MaxRate * 1000,
minexemptrate = self.Mini2Rate * 1000,
share = self.Share)
- def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec):
+ def notify(self, new_maxrate, new_maxexemptrate, usedbytes, usedi2bytes):
+ """
+ Notify the slice it's being capped.
+ """
+ # Prepare message parameters from the template
+ message = ""
+ params = {'slice': self.name, 'hostname': socket.gethostname(),
+ 'since': time.asctime(time.gmtime(self.time)) + " GMT",
+ 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
+ 'date': time.asctime(time.gmtime()) + " GMT",
+ 'period': format_period(period)}
+
+ if new_maxrate != self.MaxRate:
+ # Format template parameters for low bandwidth message
+ params['class'] = "low bandwidth"
+ params['bytes'] = format_bytes(usedbytes - self.bytes)
+ params['limit'] = format_bytes(self.MaxKByte * 1024)
+ params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
+
+ # Cap low bandwidth burst rate
+ message += template % params
+ logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params)
+
+ if new_maxexemptrate != self.Maxi2Rate:
+ # Format template parameters for high bandwidth message
+ params['class'] = "high bandwidth"
+ params['bytes'] = format_bytes(usedi2bytes - self.i2bytes)
+ params['limit'] = format_bytes(self.Maxi2KByte * 1024)
+ params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate)
+
+ message += template % params
+ logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params)
+
+ # Notify slice
+ if message and self.emailed == False:
+ subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
+ if debug:
+ logger.log("bwmon: "+ subject)
+ logger.log("bwmon: "+ message + (footer % params))
+ else:
+ self.emailed = True
+ slicemail(self.name, subject, message + (footer % params))
+
+
+ def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, runningshare, rspec):
"""
Update byte counts and check if byte thresholds have been
exceeded. If exceeded, cap to remaining bytes in limit over remaining in period.
# Query Node Manager for max rate overrides
self.updateSliceAttributes(rspec)
-
- # Prepare message parameters from the template
- message = ""
- params = {'slice': self.name, 'hostname': socket.gethostname(),
- 'since': time.asctime(time.gmtime(self.time)) + " GMT",
- 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
- 'date': time.asctime(time.gmtime()) + " GMT",
- 'period': format_period(period)}
+ # Check shares for Sirius loans.
+ if runningshare != self.Share:
+ logger.log("bwmon: Updating share to %s" % self.share)
+ bwlimit.set(xid = self.xid,
+ minrate = self.MinRate * 1000,
+ maxrate = self.MaxRate * 1000,
+ maxexemptrate = self.Maxi2Rate * 1000,
+ minexemptrate = self.Mini2Rate * 1000,
+ share = self.Share)
+
+ # Prepare message parameters from the template
+ #message = ""
+ #params = {'slice': self.name, 'hostname': socket.gethostname(),
+ # 'since': time.asctime(time.gmtime(self.time)) + " GMT",
+ # 'until': time.asctime(time.gmtime(self.time + period)) + " GMT",
+ # 'date': time.asctime(time.gmtime()) + " GMT",
+ # 'period': format_period(period)}
+
+ # Check limits.
if usedbytes >= (self.bytes + (self.ThreshKByte * 1024)):
- if verbose:
- logger.log("bwmon: %s over thresh %s" \
- % (self.name, format_bytes(self.ThreshKByte * 1024)))
sum = self.bytes + (self.ThreshKByte * 1024)
maxbyte = self.MaxKByte * 1024
bytesused = usedbytes - self.bytes
timeused = int(time.time() - self.time)
+ # Calcuate new rate.
new_maxrate = int(((maxbyte - bytesused) * 8)/(period - timeused))
+ # Never go under MinRate
if new_maxrate < (self.MinRate * 1000):
new_maxrate = self.MinRate * 1000
+ # State information. I'm capped.
+ self.capped = True
else:
- new_maxrate = self.MaxRate * 1000
-
- # Format template parameters for low bandwidth message
- params['class'] = "low bandwidth"
- params['bytes'] = format_bytes(usedbytes - self.bytes)
- params['limit'] = format_bytes(self.MaxKByte * 1024)
- params['thresh'] = format_bytes(self.ThreshKByte * 1024)
- params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
+ # Sanity Check
+ new_maxrate = self.MaxRate * 1000
+ self.capped = False
- if verbose:
- logger.log("bwmon: %(slice)s %(class)s " \
- "%(bytes)s of %(limit)s max %(thresh)s thresh (%(new_maxrate)s/s maxrate)" % \
- params)
+ ## Format template parameters for low bandwidth message
+ #params['class'] = "low bandwidth"
+ #params['bytes'] = format_bytes(usedbytes - self.bytes)
+ #params['limit'] = format_bytes(self.MaxKByte * 1024)
+ #params['thresh'] = format_bytes(self.ThreshKByte * 1024)
+ #params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
# Cap low bandwidth burst rate
- if new_maxrate != runningmaxrate:
- message += template % params
- logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params)
+ #if new_maxrate != runningmaxrate:
+ # message += template % params
+ # logger.log("bwmon: ** %(slice)s %(class)s capped at %(new_maxrate)s/s " % params)
if usedi2bytes >= (self.i2bytes + (self.Threshi2KByte * 1024)):
maxi2byte = self.Maxi2KByte * 1024
i2bytesused = usedi2bytes - self.i2bytes
timeused = int(time.time() - self.time)
+ # Calcuate New Rate.
new_maxi2rate = int(((maxi2byte - i2bytesused) * 8)/(period - timeused))
+ # Never go under MinRate
if new_maxi2rate < (self.Mini2Rate * 1000):
new_maxi2rate = self.Mini2Rate * 1000
+ # State information. I'm capped.
+ self.capped = True
else:
+ # Sanity
new_maxi2rate = self.Maxi2Rate * 1000
+ self.capped = False
# Format template parameters for high bandwidth message
- params['class'] = "high bandwidth"
- params['bytes'] = format_bytes(usedi2bytes - self.i2bytes)
- params['limit'] = format_bytes(self.Maxi2KByte * 1024)
- params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate)
-
- if verbose:
- logger.log("bwmon: %(slice)s %(class)s " \
- "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % params)
+ #params['class'] = "high bandwidth"
+ #params['bytes'] = format_bytes(usedi2bytes - self.i2bytes)
+ #params['limit'] = format_bytes(self.Maxi2KByte * 1024)
+ #params['new_maxexemptrate'] = bwlimit.format_tc_rate(new_maxi2rate)
# Cap high bandwidth burst rate
- if new_maxi2rate != runningmaxi2rate:
- message += template % params
- logger.log("bwmon: %(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params)
+ #if new_maxi2rate != runningmaxi2rate:
+ # message += template % params
+ # logger.log("bwmon: %(slice)s %(class)s capped at %(new_maxexemptrate)s/s" % params)
# Apply parameters
if new_maxrate != runningmaxrate or new_maxi2rate != runningmaxi2rate:
bwlimit.set(xid = self.xid, maxrate = new_maxrate, maxexemptrate = new_maxi2rate)
# Notify slice
- if message and self.emailed == False:
- subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
- if debug:
- logger.log("bwmon: "+ subject)
- logger.log("bwmon: "+ message + (footer % params))
- else:
- self.emailed = True
- slicemail(self.name, subject, message + (footer % params))
+ if self.capped == True and self.emailed == False:
+ self.notify(newmaxrate, newmaxexemptrate, usedbytes, usedi2bytes)
+ # subject = "pl_mom capped bandwidth of slice %(slice)s on %(hostname)s" % params
+ # if debug:
+ # logger.log("bwmon: "+ subject)
+ # logger.log("bwmon: "+ message + (footer % params))
+ # else:
+ # self.emailed = True
+ # slicemail(self.name, subject, message + (footer % params))
def gethtbs(root_xid, default_xid):
"""
and (xid != default_xid):
# Orphaned (not associated with a slice) class
name = "%d?" % xid
- logger.log("bwmon: Found orphaned HTB %s. Removing." %name)
+ logger.log("bwmon: Found orphaned HTB %s. Removing." %name, 1)
bwlimit.off(xid)
livehtbs[xid] = {'share': share,
try:
f = open(datafile, "r+")
- logger.log("bwmon: Loading %s" % datafile)
+ logger.log("bwmon: Loading %s" % datafile, 2)
(version, slices, deaddb) = pickle.load(f)
f.close()
# Check version of data file
- if version != "$Id: bwmon.py,v 1.1.2.11 2007/06/26 18:03:55 faiyaza Exp $":
+ if version != "$Id$":
logger.log("bwmon: Not using old version '%s' data file %s" % (version, datafile))
raise Exception
except Exception:
- version = "$Id: bwmon.py,v 1.1.2.11 2007/06/26 18:03:55 faiyaza Exp $"
+ version = "$Id$"
slices = {}
deaddb = {}
for plcSliver in nmdbcopy.keys():
live[bwlimit.get_xid(plcSliver)] = nmdbcopy[plcSliver]
- logger.log("bwmon: Found %s instantiated slices" % live.keys().__len__())
- logger.log("bwmon: Found %s slices in dat file" % slices.values().__len__())
+ logger.log("bwmon: Found %s instantiated slices" % live.keys().__len__(), 2)
+ logger.log("bwmon: Found %s slices in dat file" % slices.values().__len__(), 2)
# Get actual running values from tc.
# Update slice totals and bandwidth. {xid: {values}}
kernelhtbs = gethtbs(root_xid, default_xid)
- logger.log("bwmon: Found %s running HTBs" % kernelhtbs.keys().__len__())
+ logger.log("bwmon: Found %s running HTBs" % kernelhtbs.keys().__len__(), 2)
# The dat file has HTBs for slices, but the HTBs aren't running
nohtbslices = Set(slices.keys()) - Set(kernelhtbs.keys())
- logger.log( "bwmon: Found %s slices in dat but not running." % nohtbslices.__len__() )
+ logger.log( "bwmon: Found %s slices in dat but not running." % nohtbslices.__len__(), 2)
# Reset tc counts.
for nohtbslice in nohtbslices:
if live.has_key(nohtbslice):
slices[nohtbslice].reset( 0, 0, 0, 0, live[nohtbslice]['_rspec'] )
+ else:
+ logger.log("bwmon: Removing abondoned slice %s from dat." % nohtbslice)
+ del slices[nohtbslice]
- # The dat file doesnt have HTB for the slice, but slice is running and
- # HTB exists
+ # The dat file doesnt have HTB for the slice but kern has HTB
slicesnodat = Set(kernelhtbs.keys()) - Set(slices.keys())
- logger.log( "bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__() )
+ logger.log( "bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__(), 2)
for slicenodat in slicesnodat:
- slices[slicenodat] = Slice(slicenodat,
- live[slicenodat]['name'],
- live[slicenodat]['_rspec'])
+ # But slice is running
+ if live.has_key(slicenodat):
+ # init the slice. which means start accounting over since kernel
+ # htb was already there.
+ slices[slicenodat] = Slice(slicenodat,
+ live[slicenodat]['name'],
+ live[slicenodat]['_rspec'])
# Get new slices.
# Slices in GetSlivers but not running HTBs
newslicesxids = Set(live.keys()) - Set(kernelhtbs.keys())
- logger.log("bwmon: Found %s new slices" % newslicesxids.__len__())
+ logger.log("bwmon: Found %s new slices" % newslicesxids.__len__(), 2)
# Setup new slices
for newslice in newslicesxids:
# Delegated slices dont have xids (which are uids) since they haven't been
# instantiated yet.
if newslice != None and live[newslice].has_key('_rspec') == True:
+ # Check to see if we recently deleted this slice.
if live[newslice]['name'] not in deaddb.keys():
logger.log( "bwmon: New Slice %s" % live[newslice]['name'] )
# _rspec is the computed rspec: NM retrieved data from PLC, computed loans
deadslice['slice'].Maxi2Rate,
deadslice['htb']['usedbytes'],
deadslice['htb']['usedi2bytes'],
+ deadslice['htb']['share'],
live[newslice]['_rspec'])
# Since the slice has been reinitialed, remove from dead database.
- del deaddb[deadslice]
+ del deaddb[deadslice['slice'].name]
else:
- logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslice]['name'])
+ logger.log("bwmon: Slice %s doesn't have xid. Skipping." % live[newslice]['name'])
# Move dead slices that exist in the pickle file, but
# aren't instantiated by PLC into the dead dict until
# recording period is over. This is to avoid the case where a slice is dynamically created
# and destroyed then recreated to get around byte limits.
- dead = Set(slices.keys()) - Set(live.keys())
- logger.log("bwmon: Found %s dead slices" % (dead.__len__() - 2))
- for xid in dead:
- if xid == root_xid or xid == default_xid:
+ deadxids = Set(slices.keys()) - Set(live.keys())
+ logger.log("bwmon: Found %s dead slices" % (deadxids.__len__() - 2), 2)
+ for deadxid in deadxids:
+ if deadxid == root_xid or deadxid == default_xid:
continue
- logger.log("bwmon: removing dead slice %s " % xid)
- if slices.has_key(xid):
+ logger.log("bwmon: removing dead slice %s " % deadxid)
+ if slices.has_key(deadxid) and kernelhtbs.has_key(deadxid):
# add slice (by name) to deaddb
- deaddb[slices[xid].name] = {'slice': slices[xid], 'htb': kernelhtbs[xid]}
- del slices[xid]
- if kernelhtbs.has_key(xid): bwlimit.off(xid)
-
- # Clean up deaddb
- for (deadslicexid, deadslice) in deaddb.iteritems():
- if (time.time() >= (deadslice.time() + period)):
- logger.log("bwmon: Removing dead slice %s from dat." % deadslice.name)
- del deaddb[deadslicexid]
+ logger.log("bwmon: Saving bandwidth totals for %s." % slices[deadxid].name)
+ deaddb[slices[deadxid].name] = {'slice': slices[deadxid], 'htb': kernelhtbs[deadxid]}
+ del slices[deadxid]
+ if kernelhtbs.has_key(deadxid):
+ logger.log("bwmon: Removing HTB for %s." % deadxid, 2)
+ bwlimit.off(deadxid)
+
+ # Clean up deaddb
+ for deadslice in deaddb.keys():
+ if (time.time() >= (deaddb[deadslice]['slice'].time + period)):
+ logger.log("bwmon: Removing dead slice %s from dat." \
+ % deaddb[deadslice]['slice'].name)
+ del deaddb[deadslice]
# Get actual running values from tc since we've added and removed buckets.
# Update slice totals and bandwidth. {xid: {values}}
kernelhtbs = gethtbs(root_xid, default_xid)
- logger.log("bwmon: now %s running HTBs" % kernelhtbs.keys().__len__())
+ logger.log("bwmon: now %s running HTBs" % kernelhtbs.keys().__len__(), 2)
for (xid, slice) in slices.iteritems():
# Monitor only the specified slices
kernelhtbs[xid]['usedi2bytes'], \
live[xid]['_rspec'])
else:
- if debug: logger.log("bwmon: Updating slice %s" % slice.name)
+ logger.log("bwmon: Updating slice %s" % slice.name, 2)
# Update byte counts
slice.update(kernelhtbs[xid]['maxrate'], \
kernelhtbs[xid]['maxexemptrate'], \
kernelhtbs[xid]['usedbytes'], \
kernelhtbs[xid]['usedi2bytes'], \
+ kernelhtbs[xid]['share'],
live[xid]['_rspec'])
- logger.log("bwmon: Saving %s slices in %s" % (slices.keys().__len__(),datafile))
+ logger.log("bwmon: Saving %s slices in %s" % (slices.keys().__len__(),datafile), 2)
f = open(datafile, "w")
pickle.dump((version, slices, deaddb), f)
f.close()
lock = threading.Event()
def run():
"""When run as a thread, wait for event, lock db, deep copy it, release it, run bwmon.GetSlivers(), then go back to waiting."""
- if debug: logger.log("bwmon: Thread started")
+ logger.log("bwmon: Thread started", 2)
while True:
lock.wait()
- if debug: logger.log("bwmon: Event received. Running.")
+ logger.log("bwmon: Event received. Running.", 2)
database.db_lock.acquire()
nmdbcopy = copy.deepcopy(database.db)
database.db_lock.release()