# Faiyaz Ahmed <faiyaza@cs.princeton.edu>
# Copyright (C) 2004-2006 The Trustees of Princeton University
#
-# $Id: bwmon.py,v 1.1.2.5 2007/02/28 05:26:54 faiyaza Exp $
+# $Id: bwmon.py,v 1.21 2007/06/16 14:30:17 faiyaza Exp $
#
import os
import sys
import time
import pickle
-
import socket
-#import xmlrpclib
-import bwlimit
import logger
+import copy
+import threading
+import tools
+
+import bwlimit
+import database
from sets import Set
+
try:
sys.path.append("/etc/planetlab")
from plc_config import *
PLC_MAIL_SUPPORT_ADDRESS = "support@planet-lab.org"
PLC_MAIL_SLICE_ADDRESS = "SLICE@slices.planet-lab.org"
-
-# Utility functions
-#from pl_mom import *
-
# Constants
seconds_per_day = 24 * 60 * 60
bits_per_byte = 8
"""
- def __init__(self, xid, name, data):
+ def __init__(self, xid, name, rspec):
self.xid = xid
self.name = name
self.time = 0
self.Maxi2KByte = default_Maxi2KByte
self.Threshi2KByte = default_Threshi2KByte
self.Share = default_Share
+ self.Sharei2 = default_Share
self.emailed = False
- self.updateSliceAttributes(data)
+ self.updateSliceAttributes(rspec)
bwlimit.set(xid = self.xid,
minrate = self.MinRate * 1000,
maxrate = self.MaxRate * 1000,
minexemptrate = self.Mini2Rate * 1000,
share = self.Share)
-
def __repr__(self):
return self.name
- def updateSliceAttributes(self, data):
- # Incase the limits have changed.
- if (self.MaxRate != default_MaxRate) or \
- (self.Maxi2Rate != default_Maxi2Rate):
- self.MaxRate = int(bwlimit.get_bwcap() / 1000)
- self.Maxi2Rate = int(bwlimit.bwmax / 1000)
-
+ def updateSliceAttributes(self, rspec):
# Get attributes
- for sliver in data['slivers']:
- if sliver['name'] == self.name:
- for attribute in sliver['attributes']:
- if attribute['name'] == 'net_min_rate':
- logger.log("bwmon: Updating %s. Min Rate = %s" \
- %(self.name, self.MinRate))
- # To ensure min does not go above 25% of nodecap.
- if int(attribute['value']) > int(.25 * default_MaxRate):
- self.MinRate = int(.25 * default_MaxRate)
- else:
- self.MinRate = int(attribute['value'])
- elif attribute['name'] == 'net_max_rate':
- self.MaxRate = int(attribute['value'])
- logger.log("bwmon: Updating %s. Max Rate = %s" \
- %(self.name, self.MaxRate))
- elif attribute['name'] == 'net_i2_min_rate':
- self.Mini2Rate = int(attribute['value'])
- logger.log("bwmon: Updating %s. Min i2 Rate = %s" \
- %(self.name, self.Mini2Rate))
- elif attribute['name'] == 'net_i2_max_rate':
- self.Maxi2Rate = int(attribute['value'])
- logger.log("bwmon: Updating %s. Max i2 Rate = %s" \
- %(self.name, self.Maxi2Rate))
- elif attribute['name'] == 'net_max_kbyte':
- self.MaxKByte = int(attribute['value'])
- logger.log("bwmon: Updating %s. Max KByte lim = %s" \
- %(self.name, self.MaxKByte))
- elif attribute['name'] == 'net_i2_max_kbyte':
- self.Maxi2KByte = int(attribute['value'])
- logger.log("bwmon: Updating %s. Max i2 KByte = %s" \
- %(self.name, self.Maxi2KByte))
- elif attribute['name'] == 'net_thresh_kbyte':
- self.ThreshKByte = int(attribute['value'])
- logger.log("bwmon: Updating %s. Thresh KByte = %s" \
- %(self.name, self.ThreshKByte))
- elif attribute['name'] == 'net_i2_thresh_kbyte':
- self.Threshi2KByte = int(attribute['value'])
- logger.log("bwmon: Updating %s. i2 Thresh KByte = %s" \
- %(self.name, self.Threshi2KByte))
- elif attribute['name'] == 'net_share':
- self.Share = int(attribute['value'])
- logger.log("bwmon: Updating %s. Net Share = %s" \
- %(self.name, self.Share))
- elif attribute['name'] == 'net_i2_share':
- self.Sharei2 = int(attribute['value'])
- logger.log("bwmon: Updating %s. Net i2 Share = %s" \
- %(self.name, self.i2Share))
-
-
- def reset(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, data):
+
+ # Sanity check plus policy decision for MinRate:
+ # Minrate cant be greater than 25% of MaxRate or NodeCap.
+ MinRate = int(rspec.get("net_min_rate", default_MinRate))
+ if MinRate > int(.25 * default_MaxRate):
+ MinRate = int(.25 * default_MaxRate)
+ if MinRate != self.MinRate:
+ self.MinRate = MinRate
+ logger.log("bwmon: Updating %s: Min Rate = %s" %(self.name, self.MinRate))
+
+ MaxRate = int(rspec.get('net_max_rate', bwlimit.get_bwcap() / 1000))
+ if MaxRate != self.MaxRate:
+ self.MaxRate = MaxRate
+ logger.log("bwmon: Updating %s: Max Rate = %s" %(self.name, self.MaxRate))
+
+ Mini2Rate = int(rspec.get('net_i2_min_rate', default_Mini2Rate))
+ if Mini2Rate != self.Mini2Rate:
+ self.Mini2Rate = Mini2Rate
+ logger.log("bwmon: Updating %s: Min i2 Rate = %s" %(self.name, self.Mini2Rate))
+
+ Maxi2Rate = int(rspec.get('net_i2_max_rate', bwlimit.bwmax / 1000))
+ if Maxi2Rate != self.Maxi2Rate:
+ self.Maxi2Rate = Maxi2Rate
+ logger.log("bwmon: Updating %s: Max i2 Rate = %s" %(self.name, self.Maxi2Rate))
+
+ MaxKByte = int(rspec.get('net_max_kbyte', default_MaxKByte))
+ if MaxKByte != self.MaxKByte:
+ self.MaxKByte = MaxKByte
+ logger.log("bwmon: Updating %s: Max KByte lim = %s" %(self.name, self.MaxKByte))
+
+ Maxi2KByte = int(rspec.get('net_i2_max_kbyte', default_Maxi2KByte))
+ if Maxi2KByte != self.Maxi2KByte:
+ self.Maxi2KByte = Maxi2KByte
+ logger.log("bwmon: Updating %s: Max i2 KByte = %s" %(self.name, self.Maxi2KByte))
+
+ ThreshKByte = int(rspec.get('net_thresh_kbyte', default_ThreshKByte))
+ if ThreshKByte != self.ThreshKByte:
+ self.ThreshKByte = ThreshKByte
+ logger.log("bwmon: Updating %s: Thresh KByte = %s" %(self.name, self.ThreshKByte))
+
+ Threshi2KByte = int(rspec.get('net_i2_thresh_kbyte', default_Threshi2KByte))
+ if Threshi2KByte != self.Threshi2KByte:
+ self.Threshi2KByte = Threshi2KByte
+ logger.log("bwmon: Updating %s: i2 Thresh KByte = %s" %(self.name, self.Threshi2KByte))
+
+ Share = int(rspec.get('net_share', default_Share))
+ if Share != self.Share:
+ self.Share = Share
+ logger.log("bwmon: Updating %s: Net Share = %s" %(self.name, self.Share))
+
+ Sharei2 = int(rspec.get('net_i2_share', default_Share))
+ if Sharei2 != self.Sharei2:
+ self.Sharei2 = Sharei2
+ logger.log("bwmon: Updating %s: Net i2 Share = %s" %(self.name, self.i2Share))
+
+
+ def reset(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec):
"""
Begin a new recording period. Remove caps by restoring limits
to their default values.
"""
# Query Node Manager for max rate overrides
- self.updateSliceAttributes(data)
+ self.updateSliceAttributes(rspec)
# Reset baseline time
self.time = time.time()
minexemptrate = self.Mini2Rate * 1000,
share = self.Share)
- def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, data):
+ def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec):
"""
Update byte counts and check if byte limits have been
exceeded.
"""
# Query Node Manager for max rate overrides
- self.updateSliceAttributes(data)
+ self.updateSliceAttributes(rspec)
# Prepare message parameters from the template
message = ""
'period': format_period(period)}
if usedbytes >= (self.bytes + (self.ThreshKByte * 1024)):
+ if verbose:
+ logger.log("bwmon: %s over thresh %s" \
+ % (self.name, format_bytes(self.ThreshKByte * 1024)))
sum = self.bytes + (self.ThreshKByte * 1024)
maxbyte = self.MaxKByte * 1024
bytesused = usedbytes - self.bytes
params['class'] = "low bandwidth"
params['bytes'] = format_bytes(usedbytes - self.bytes)
params['limit'] = format_bytes(self.MaxKByte * 1024)
+ params['thresh'] = format_bytes(self.ThreshKByte * 1024)
params['new_maxrate'] = bwlimit.format_tc_rate(new_maxrate)
if verbose:
logger.log("bwmon: %(slice)s %(class)s " \
- "%(bytes)s of %(limit)s (%(new_maxrate)s/s maxrate)" % \
+ "%(bytes)s of %(limit)s max %(thresh)s thresh (%(new_maxrate)s/s maxrate)" % \
params)
# Cap low bandwidth burst rate
self.emailed = True
slicemail(self.name, subject, message + (footer % params))
-def GetSlivers(data):
+def gethtbs(root_xid, default_xid):
+ """
+ Return dict {xid: {*rates}} of running htbs as reported by tc that have names.
+ Turn off HTBs without names.
+ """
+ livehtbs = {}
+ for params in bwlimit.get():
+ (xid, share,
+ minrate, maxrate,
+ minexemptrate, maxexemptrate,
+ usedbytes, usedi2bytes) = params
+
+ name = bwlimit.get_slice(xid)
+
+ if (name is None) \
+ and (xid != root_xid) \
+ and (xid != default_xid):
+ # Orphaned (not associated with a slice) class
+ name = "%d?" % xid
+ logger.log("bwmon: Found orphaned HTB %s. Removing." %name)
+ bwlimit.off(xid)
+
+ livehtbs[xid] = {'share': share,
+ 'minrate': minrate,
+ 'maxrate': maxrate,
+ 'maxexemptrate': maxexemptrate,
+ 'minexemptrate': minexemptrate,
+ 'usedbytes': usedbytes,
+ 'name': name,
+ 'usedi2bytes': usedi2bytes}
+
+ return livehtbs
+
+def sync(nmdbcopy):
+ """
+ Syncs tc, db, and bwmon.dat. Then, starts new slices, kills old ones, and updates byte accounts for each running slice. Sends emails and caps those that went over their limit.
+ """
# Defaults
global datafile, \
period, \
# All slices
names = []
-
# Incase the limits have changed.
default_MaxRate = int(bwlimit.get_bwcap() / 1000)
default_Maxi2Rate = int(bwlimit.bwmax / 1000)
(version, slices) = pickle.load(f)
f.close()
# Check version of data file
- if version != "$Id: bwmon.py,v 1.1.2.5 2007/02/28 05:26:54 faiyaza Exp $":
+ if version != "$Id: bwmon.py,v 1.21 2007/06/16 14:30:17 faiyaza Exp $":
logger.log("bwmon: Not using old version '%s' data file %s" % (version, datafile))
raise Exception
except Exception:
- version = "$Id: bwmon.py,v 1.1.2.5 2007/02/28 05:26:54 faiyaza Exp $"
+ version = "$Id: bwmon.py,v 1.21 2007/06/16 14:30:17 faiyaza Exp $"
slices = {}
# Get/set special slice IDs
root_xid = bwlimit.get_xid("root")
default_xid = bwlimit.get_xid("default")
+ # Since root is required for sanity, its not in the API/plc database, so pass {}
+ # to use defaults.
if root_xid not in slices.keys():
- slices[root_xid] = Slice(root_xid, "root", data)
- slices[root_xid].reset(0, 0, 0, 0, data)
-
+ slices[root_xid] = Slice(root_xid, "root", {})
+ slices[root_xid].reset(0, 0, 0, 0, {})
+
+ # Used by bwlimit. pass {} since there is no rspec (like above).
if default_xid not in slices.keys():
- slices[default_xid] = Slice(default_xid, "default", data)
- slices[default_xid].reset(0, 0, 0, 0, data)
+ slices[default_xid] = Slice(default_xid, "default", {})
+ slices[default_xid].reset(0, 0, 0, 0, {})
live = {}
- # Get running slivers. {xid: name}
- for sliver in data['slivers']:
- live[bwlimit.get_xid(sliver['name'])] = sliver['name']
-
- # Setup new slices.
- # live.xids - runing.xids = new.xids
- newslicesxids = Set(live.keys()) - Set(slices.keys())
- for newslicexid in newslicesxids:
- if newslicexid != None:
- logger.log("bwmon: New Slice %s" % live[newslicexid])
- slices[newslicexid] = Slice(newslicexid, live[newslicexid], data)
- slices[newslicexid].reset(0, 0, 0, 0, data)
- else:
- logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslicexid])
+ # Get running slivers that should be on this node (from plc). {xid: name}
+ # db keys on name, bwmon keys on xid. db doesnt have xid either.
+ for plcSliver in nmdbcopy.keys():
+ live[bwlimit.get_xid(plcSliver)] = nmdbcopy[plcSliver]
+
+ logger.log("bwmon: Found %s instantiated slices" % live.keys().__len__())
+ logger.log("bwmon: Found %s slices in dat file" % slices.values().__len__())
+
# Get actual running values from tc.
- # Update slice totals and bandwidth.
- for params in bwlimit.get():
- (xid, share,
- minrate, maxrate,
- minexemptrate, maxexemptrate,
- usedbytes, usedi2bytes) = params
+ # Update slice totals and bandwidth. {xid: {values}}
+ livehtbs = gethtbs(root_xid, default_xid)
+ logger.log("bwmon: Found %s running HTBs" % livehtbs.keys().__len__())
+
+ # Get new slices.
+ # live.xids - runing(slices).xids = new.xids
+ #newslicesxids = Set(live.keys()) - Set(slices.keys())
+ newslicesxids = Set(live.keys()) - Set(livehtbs.keys())
+ logger.log("bwmon: Found %s new slices" % newslicesxids.__len__())
+
+ # Incase we rebooted and need to keep track of already running htbs
+ norecxids = Set(livehtbs.keys()) - Set(slices.keys())
+ logger.log("bwmon: Found %s slices that have htbs but not in dat." % norecxids.__len__())
+ # Reset tc counts.
+ for norecxid in norecxids:
+ slices[norecxid] = Slice(norecxid, live[norecxid]['name'], live[norecxid]['_rspec'])
+ slices[norecxid].reset(livehtbs[norecxid]['maxrate'],
+ livehtbs[norecxid]['maxexemptrate'],
+ livehtbs[norecxid]['usedbytes'],
+ livehtbs[norecxid]['usedi2bytes'],
+ live[norecxid]['_rspec'])
- # Ignore root and default buckets
+ # Setup new slices
+ for newslice in newslicesxids:
+ # Delegated slices dont have xids (which are uids) since they haven't been
+ # instantiated yet.
+ if newslice != None and live[newslice].has_key('_rspec') == True:
+ logger.log("bwmon: New Slice %s" % live[newslice]['name'])
+ # _rspec is the computed rspec: NM retrieved data from PLC, computed loans
+ # and made a dict of computed values.
+ slices[newslice] = Slice(newslice, live[newslice]['name'], live[newslice]['_rspec'])
+ slices[newslice].reset(0, 0, 0, 0, live[newslice]['_rspec'])
+ else:
+ logger.log("bwmon Slice %s doesn't have xid. Must be delegated. Skipping." % live[newslice]['name'])
+
+ # Delete dead slices.
+ # First delete dead slices that exist in the pickle file, but
+ # aren't instantiated by PLC.
+ dead = Set(slices.keys()) - Set(live.keys())
+ logger.log("bwmon: Found %s dead slices" % (dead.__len__() - 2))
+ for xid in dead:
if xid == root_xid or xid == default_xid:
continue
+ logger.log("bwmon: removing dead slice %s " % xid)
+ if slices.has_key(xid): del slices[xid]
+ if livehtbs.has_key(xid): bwlimit.off(xid)
- name = bwlimit.get_slice(xid)
- if name is None:
- # Orphaned (not associated with a slice) class
- name = "%d?" % xid
- bwlimit.off(xid)
+ # Get actual running values from tc since we've added and removed buckets.
+ # Update slice totals and bandwidth. {xid: {values}}
+ livehtbs = gethtbs(root_xid, default_xid)
+ logger.log("bwmon: now %s running HTBs" % livehtbs.keys().__len__())
+ for (xid, slice) in slices.iteritems():
# Monitor only the specified slices
+ if xid == root_xid or xid == default_xid: continue
if names and name not in names:
continue
- #slices is populated from the pickle file
- #xid is populated from bwlimit (read from /etc/passwd)
- if slices.has_key(xid):
- slice = slices[xid]
- if time.time() >= (slice.time + period) or \
- usedbytes < slice.bytes or usedi2bytes < slice.i2bytes:
- # Reset to defaults every 24 hours or if it appears
- # that the byte counters have overflowed (or, more
- # likely, the node was restarted or the HTB buckets
- # were re-initialized).
- slice.reset(maxrate, maxexemptrate, usedbytes, usedi2bytes, data)
- else:
- # Update byte counts
- slice.update(maxrate, maxexemptrate, usedbytes, usedi2bytes, data)
+
+ if (time.time() >= (slice.time + period)) or \
+ (livehtbs[xid]['usedbytes'] < slice.bytes) or \
+ (livehtbs[xid]['usedi2bytes'] < slice.i2bytes):
+ # Reset to defaults every 24 hours or if it appears
+ # that the byte counters have overflowed (or, more
+ # likely, the node was restarted or the HTB buckets
+ # were re-initialized).
+ slice.reset(livehtbs[xid]['maxrate'], \
+ livehtbs[xid]['maxexemptrate'], \
+ livehtbs[xid]['usedbytes'], \
+ livehtbs[xid]['usedi2bytes'], \
+ live[xid]['_rspec'])
else:
- # Just in case. Probably (hopefully) this will never happen.
- # New slice, initialize state
- logger.log("bwmon: New Slice %s" % name)
- slice = slices[xid] = Slice(xid, name, data)
- slice.reset(maxrate, maxexemptrate, usedbytes, usedi2bytes, data)
-
- # Delete dead slices
- dead = Set(slices.keys()) - Set(live.keys())
- for xid in dead:
- if xid == root_xid or xid == default_xid:
- continue
- del slices[xid]
- bwlimit.off(xid)
-
- logger.log("bwmon: Saving %s" % datafile)
+ if debug: logger.log("bwmon: Updating slice %s" % slice.name)
+ # Update byte counts
+ slice.update(livehtbs[xid]['maxrate'], \
+ livehtbs[xid]['maxexemptrate'], \
+ livehtbs[xid]['usedbytes'], \
+ livehtbs[xid]['usedi2bytes'], \
+ live[xid]['_rspec'])
+
+ logger.log("bwmon: Saving %s slices in %s" % (slices.keys().__len__(),datafile))
f = open(datafile, "w")
pickle.dump((version, slices), f)
f.close()
-
-#def GetSlivers(data):
-# for sliver in data['slivers']:
-# if sliver.has_key('attributes'):
-# print sliver
-# for attribute in sliver['attributes']:
-# if attribute['name'] == "KByteThresh": print attribute['value']
-
-def start(options, config):
+lock = threading.Event()
+def run():
+ """When run as a thread, wait for event, lock db, deep copy it, release it, run bwmon.GetSlivers(), then go back to waiting."""
+ if debug: logger.log("bwmon: Thread started")
+ while True:
+ lock.wait()
+ if debug: logger.log("bwmon: Event received. Running.")
+ database.db_lock.acquire()
+ nmdbcopy = copy.deepcopy(database.db)
+ database.db_lock.release()
+ try: sync(nmdbcopy)
+ except: logger.log_exc()
+ lock.clear()
+
+def start(*args):
+ tools.as_daemon_thread(run)
+
+def GetSlivers(*args):
pass
-