X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=bwmon.py;h=ee5d41b2dc3afef272a7615594274abac6e982a2;hb=55f2ec0520bda53ad99c4a2016d03b9f898ce40b;hp=9d0fbaf0fe9a07b410c1504a08054c28ceaf3066;hpb=d3cd22f9c16e3fc5268e5931e3e7c585b19f0f8b;p=nodemanager.git

diff --git a/bwmon.py b/bwmon.py
index 9d0fbaf..ee5d41b 100644
--- a/bwmon.py
+++ b/bwmon.py
@@ -1,21 +1,20 @@
 #!/usr/bin/python
 #
-# Average bandwidth monitoring script. Run periodically via cron(8) to
+# Average bandwidth monitoring script. Run periodically via NM db.sync to
 # enforce a soft limit on daily bandwidth usage for each slice. If a
-# slice is found to have exceeded its daily bandwidth usage when the
-# script is run, its instantaneous rate will be capped at the desired
-# average rate. Thus, in the worst case, a slice will only be able to
-# send a little more than twice its average daily limit.
+# slice is found to have transmitted 80% of its daily byte limit usage,
+# its instantaneous rate will be capped at the bytes remaning in the limit
+# over the time remaining in the recording period.
 #
 # Two separate limits are enforced, one for destinations exempt from
-# the node bandwidth cap, and the other for all other destinations.
+# the node bandwidth cap (i.e. Internet2), and the other for all other destinations.
 #
 # Mark Huang <mlhuang@cs.princeton.edu>
 # Andy Bavier <acb@cs.princeton.edu>
 # Faiyaz Ahmed <faiyaza@cs.princeton.edu>
-# Copyright (C) 2004-2006 The Trustees of Princeton University
+# Copyright (C) 2004-2008 The Trustees of Princeton University
 #
-# $Id: bwmon.py,v 1.21 2007/06/16 14:30:17 faiyaza Exp $
+# $Id$
 #
 
 import os
@@ -48,7 +47,7 @@ seconds_per_day = 24 * 60 * 60
 bits_per_byte = 8
 
 # Defaults
-debug = False
+debug = True
 verbose = False
 datafile = "/var/lib/misc/bwmon.dat"
 #nm = None
@@ -130,6 +129,10 @@ def format_period(seconds):
         return "%.0f seconds" % seconds
 
 def slicemail(slice, subject, body):
+    '''
+    Front end to sendmail.  Sends email to slice alias with given subject and body.
+    '''
+
     sendmail = os.popen("/usr/sbin/sendmail -N never -t -f%s" % PLC_MAIL_SUPPORT_ADDRESS, "w")
 
     # PLC has a separate list for pl_mom messages
@@ -173,13 +176,15 @@ class Slice:
     time - beginning of recording period in UNIX seconds
     bytes - low bandwidth bytes transmitted at the beginning of the recording period
     i2bytes - high bandwidth bytes transmitted at the beginning of the recording period (for I2 -F)
-    ByteMax - total volume of data allowed
-    ByteThresh - After thresh, cap node to (maxbyte - bytes)/(time left in period)
-    ExemptByteMax - Same as above, but for i2.
-    ExemptByteThresh - i2 ByteThresh
-    maxrate - max_rate slice attribute. 
-    maxexemptrate - max_exempt_rate slice attribute.
-    self.emailed = did we email during this recording period
+    MaxKByte - total volume of data allowed
+    ThreshKbyte - After thresh, cap node to (maxkbyte - bytes)/(time left in period)
+    Maxi2KByte - same as MaxKByte, but for i2 
+    Threshi2Kbyte - same as Threshi2KByte, but for i2 
+    MaxRate - max_rate slice attribute. 
+    Maxi2Rate - max_exempt_rate slice attribute.
+    Share - Used by Sirius to loan min rates
+    Sharei2 - Used by Sirius to loan min rates for i2
+    self.emailed - did slice recv email during this recording period
 
     """
 
@@ -213,7 +218,10 @@ class Slice:
         return self.name
 
     def updateSliceAttributes(self, rspec):
-        # Get attributes
+        '''
+        Use respects from GetSlivers to PLC to populate slice object.  Also
+        do some sanity checking.
+        '''
 
         # Sanity check plus policy decision for MinRate:
         # Minrate cant be greater than 25% of MaxRate or NodeCap.
@@ -305,8 +313,9 @@ class Slice:
 
     def update(self, runningmaxrate, runningmaxi2rate, usedbytes, usedi2bytes, rspec):
         """
-        Update byte counts and check if byte limits have been
-        exceeded. 
+        Update byte counts and check if byte thresholds have been
+        exceeded. If exceeded, cap to  remaining bytes in limit over remaining in period.  
+        Recalculate every time module runs.
         """
     
         # Query Node Manager for max rate overrides
@@ -404,8 +413,6 @@ def gethtbs(root_xid, default_xid):
         
         name = bwlimit.get_slice(xid)
 
-        
-        
         if (name is None) \
         and (xid != root_xid) \
         and (xid != default_xid):
@@ -455,15 +462,16 @@ def sync(nmdbcopy):
     try:
         f = open(datafile, "r+")
         logger.log("bwmon:  Loading %s" % datafile)
-        (version, slices) = pickle.load(f)
+        (version, slices, deaddb) = pickle.load(f)
         f.close()
         # Check version of data file
-        if version != "$Id: bwmon.py,v 1.21 2007/06/16 14:30:17 faiyaza Exp $":
+        if version != "$Id$":
             logger.log("bwmon:  Not using old version '%s' data file %s" % (version, datafile))
             raise Exception
     except Exception:
-        version = "$Id: bwmon.py,v 1.21 2007/06/16 14:30:17 faiyaza Exp $"
+        version = "$Id$"
         slices = {}
+        deaddb = {}
 
     # Get/set special slice IDs
     root_xid = bwlimit.get_xid("root")
@@ -491,49 +499,91 @@ def sync(nmdbcopy):
 
     # Get actual running values from tc.
     # Update slice totals and bandwidth. {xid: {values}}
-    livehtbs = gethtbs(root_xid, default_xid)
-    logger.log("bwmon:  Found %s running HTBs" % livehtbs.keys().__len__())
+    kernelhtbs = gethtbs(root_xid, default_xid)
+    logger.log("bwmon:  Found %s running HTBs" % kernelhtbs.keys().__len__())
+
+    # The dat file has HTBs for slices, but the HTBs aren't running
+    nohtbslices =  Set(slices.keys()) - Set(kernelhtbs.keys())
+    logger.log( "bwmon:  Found %s slices in dat but not running." % nohtbslices.__len__() )
+    # Reset tc counts.
+    for nohtbslice in nohtbslices:
+        if live.has_key(nohtbslice): 
+            slices[nohtbslice].reset( 0, 0, 0, 0, live[nohtbslice]['_rspec'] )
+
+    # The dat file doesnt have HTB for the slice, but slice is running and
+    # HTB exists
+    slicesnodat = Set(kernelhtbs.keys()) - Set(slices.keys())
+    logger.log( "bwmon: Found %s slices with HTBs but not in dat" % slicesnodat.__len__() )
+    for slicenodat in slicesnodat:
+        slices[slicenodat] = Slice(slicenodat, 
+                                live[slicenodat]['name'], 
+                                live[slicenodat]['_rspec'])
 
     # Get new slices.
-    # live.xids - runing(slices).xids = new.xids
-    #newslicesxids = Set(live.keys()) - Set(slices.keys()) 
-    newslicesxids = Set(live.keys()) - Set(livehtbs.keys())
+    # Slices in GetSlivers but not running HTBs
+    newslicesxids = Set(live.keys()) - Set(kernelhtbs.keys())
     logger.log("bwmon:  Found %s new slices" % newslicesxids.__len__())
-
-    # Incase we rebooted and need to keep track of already running htbs 
-    norecxids =  Set(livehtbs.keys()) - Set(slices.keys())
-    logger.log("bwmon:  Found %s slices that have htbs but not in dat." % norecxids.__len__())
-    newslicesxids.update(norecxids)
-        
+       
     # Setup new slices
     for newslice in newslicesxids:
         # Delegated slices dont have xids (which are uids) since they haven't been
         # instantiated yet.
         if newslice != None and live[newslice].has_key('_rspec') == True:
-            logger.log("bwmon: New Slice %s" % live[newslice]['name'])
-            # _rspec is the computed rspec:  NM retrieved data from PLC, computed loans
-            # and made a dict of computed values.
-            slices[newslice] = Slice(newslice, live[newslice]['name'], live[newslice]['_rspec'])
-            slices[newslice].reset(0, 0, 0, 0, live[newslice]['_rspec'])
+            if live[newslice]['name'] not in deaddb.keys():
+                logger.log( "bwmon: New Slice %s" % live[newslice]['name'] )
+                # _rspec is the computed rspec:  NM retrieved data from PLC, computed loans
+                # and made a dict of computed values.
+                slices[newslice] = Slice(newslice, live[newslice]['name'], live[newslice]['_rspec'])
+                slices[newslice].reset( 0, 0, 0, 0, live[newslice]['_rspec'] )
+            # Double check time for dead slice in deaddb is within 24hr recording period.
+            elif (time.time() <= (deaddb[live[newslice]['name']]['slice'].time + period)):
+                deadslice = deaddb[live[newslice]['name']]
+                logger.log("bwmon: Reinstantiating deleted slice %s" % live[newslice]['name'])
+                slices[newslice] = deadslice['slice']
+                slices[newslice].xid = newslice
+                # Start the HTB
+                slices[newslice].reset(deadslice['slice'].MaxRate,
+                                    deadslice['slice'].Maxi2Rate,
+                                    deadslice['htb']['usedbytes'],
+                                    deadslice['htb']['usedi2bytes'],
+                                    live[newslice]['_rspec'])
+                # Bring up to date
+                slices[newslice].update(deadslice['slice'].MaxRate, 
+                                    deadslice['slice'].Maxi2Rate, 
+                                    deadslice['htb']['usedbytes'], 
+                                    deadslice['htb']['usedi2bytes'], 
+                                    live[newslice]['_rspec'])
+                # Since the slice has been reinitialed, remove from dead database.
+                del deaddb[deadslice]
         else:
             logger.log("bwmon  Slice %s doesn't have xid.  Must be delegated.  Skipping." % live[newslice]['name'])
 
-    # Delete dead slices.
-    # First delete dead slices that exist in the pickle file, but
-    # aren't instantiated by PLC.
+    # Move dead slices that exist in the pickle file, but
+    # aren't instantiated by PLC into the dead dict until
+    # recording period is over.  This is to avoid the case where a slice is dynamically created
+    # and destroyed then recreated to get around byte limits.
     dead = Set(slices.keys()) - Set(live.keys())
     logger.log("bwmon:  Found %s dead slices" % (dead.__len__() - 2))
     for xid in dead:
         if xid == root_xid or xid == default_xid:
             continue
         logger.log("bwmon:  removing dead slice  %s " % xid)
-        if slices.has_key(xid): del slices[xid]
-        if livehtbs.has_key(xid): bwlimit.off(xid)
+        if slices.has_key(xid):
+            # add slice (by name) to deaddb
+            deaddb[slices[xid].name] = {'slice': slices[xid], 'htb': kernelhtbs[xid]}
+            del slices[xid]
+        if kernelhtbs.has_key(xid): bwlimit.off(xid)
+	
+	# Clean up deaddb
+	for (deadslicexid, deadslice) in deaddb.iteritems():
+		if (time.time() >= (deadslice.time() + period)):
+			logger.log("bwmon: Removing dead slice %s from dat." % deadslice.name)
+			del deaddb[deadslicexid]
 
     # Get actual running values from tc since we've added and removed buckets.
     # Update slice totals and bandwidth. {xid: {values}}
-    livehtbs = gethtbs(root_xid, default_xid)
-    logger.log("bwmon:  now %s running HTBs" % livehtbs.keys().__len__())
+    kernelhtbs = gethtbs(root_xid, default_xid)
+    logger.log("bwmon:  now %s running HTBs" % kernelhtbs.keys().__len__())
 
     for (xid, slice) in slices.iteritems():
         # Monitor only the specified slices
@@ -542,29 +592,29 @@ def sync(nmdbcopy):
             continue
  
         if (time.time() >= (slice.time + period)) or \
-        (livehtbs[xid]['usedbytes'] < slice.bytes) or \
-        (livehtbs[xid]['usedi2bytes'] < slice.i2bytes):
+            (kernelhtbs[xid]['usedbytes'] < slice.bytes) or \
+            (kernelhtbs[xid]['usedi2bytes'] < slice.i2bytes):
             # Reset to defaults every 24 hours or if it appears
             # that the byte counters have overflowed (or, more
             # likely, the node was restarted or the HTB buckets
             # were re-initialized).
-            slice.reset(livehtbs[xid]['maxrate'], \
-                livehtbs[xid]['maxexemptrate'], \
-                livehtbs[xid]['usedbytes'], \
-                livehtbs[xid]['usedi2bytes'], \
+            slice.reset(kernelhtbs[xid]['maxrate'], \
+                kernelhtbs[xid]['maxexemptrate'], \
+                kernelhtbs[xid]['usedbytes'], \
+                kernelhtbs[xid]['usedi2bytes'], \
                 live[xid]['_rspec'])
         else:
             if debug:  logger.log("bwmon: Updating slice %s" % slice.name)
             # Update byte counts
-            slice.update(livehtbs[xid]['maxrate'], \
-                livehtbs[xid]['maxexemptrate'], \
-                livehtbs[xid]['usedbytes'], \
-                livehtbs[xid]['usedi2bytes'], \
+            slice.update(kernelhtbs[xid]['maxrate'], \
+                kernelhtbs[xid]['maxexemptrate'], \
+                kernelhtbs[xid]['usedbytes'], \
+                kernelhtbs[xid]['usedi2bytes'], \
                 live[xid]['_rspec'])
-    
+
     logger.log("bwmon:  Saving %s slices in %s" % (slices.keys().__len__(),datafile))
     f = open(datafile, "w")
-    pickle.dump((version, slices), f)
+    pickle.dump((version, slices, deaddb), f)
     f.close()
 
 lock = threading.Event()