updated the model for actions, site history
authorStephen Soltesz <soltesz@cs.princeton.edu>
Sat, 21 Mar 2009 00:13:35 +0000 (00:13 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Sat, 21 Mar 2009 00:13:35 +0000 (00:13 +0000)
added email messages for new sample policy.py
enhanced web view to show penalties
updated model in nodebad, pcubad, sitebad

15 files changed:
findall.py
monitor/common.py
monitor/database/info/action.py
monitor/database/info/findbad.py
monitor/database/info/history.py
monitor/reboot.py
monitor/wrapper/emailTxt.py
monitor/wrapper/plccache.py
nodebad.py
pcubad.py
sitebad.py
web/MonitorWeb/monitorweb/controllers.py
web/MonitorWeb/monitorweb/static/css/style.css
web/MonitorWeb/monitorweb/templates/pcuview.kid
web/MonitorWeb/monitorweb/templates/sitelist.kid

index 26335e0..41e23c6 100755 (executable)
@@ -4,6 +4,8 @@ from monitor import parser as parsermodule
 from findbad import main as findbad_main
 from findbadpcu import main as findbadpcu_main
 from sitebad import main as sitebad_main
+from nodebad import main as nodebad_main
+from pcubad import main as pcubad_main
 import sys
 
 if __name__ == '__main__':
@@ -11,7 +13,7 @@ if __name__ == '__main__':
        parser = parsermodule.getParser(['nodesets'])
 
        parser.set_defaults( increment=False, dbname="findbad", cachenodes=False, 
-                                               force=False, pcuselect=None, pcuid=None)
+                                               force=False, pcuselect=None, pcuid=None, pcu=None)
        parser.add_option("", "--cachenodes", action="store_true",
                                                help="Cache node lookup from PLC")
        parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
@@ -26,8 +28,15 @@ if __name__ == '__main__':
        cfg = parsermodule.parse_args(parser)
 
        try:
+               print "findbad"
                findbad_main()
+               print "findbadpcu"
                findbadpcu_main()
+               print "nodebad"
+               nodebad_main()
+               print "pcubad"
+               pcubad_main()
+               print "sitebad"
                sitebad_main()
        except Exception, err:
                import traceback
index be4a171..6f88051 100644 (file)
@@ -1,13 +1,12 @@
 
 import time
 import struct
-from pcucontrol import reboot
-
+from monitor import reboot
 from monitor import util
 from monitor import database
 from monitor.wrapper import plc, plccache
 
-from datetime import datetime 
+from datetime import datetime, timedelta
 from monitor.model import PersistFlags, Message
 
 esc = struct.pack('i', 27)
@@ -222,3 +221,20 @@ def email_exception(content=None):
     m=Message("exception running monitor", msg, False)
     m.send([config.cc_email])
     return
+
+def changed_lessthan(last_changed, days):
+       if datetime.now() - last_changed <= timedelta(days):
+               print "last changed less than %s" % timedelta(days)
+               return True
+       else:
+               print "last changed more than %s" % timedelta(days)
+               return False
+
+def changed_greaterthan(last_changed, days):
+       if datetime.now() - last_changed > timedelta(days):
+               print "last changed more than %s" % timedelta(days)
+               return True
+       else:
+               print "last changed less than %s" % timedelta(days)
+               return False
+       
index 2569e35..77e904c 100644 (file)
@@ -47,8 +47,27 @@ class ActionRecord(Entity):
 
 # ACCOUNTING
        date_created = Field(DateTime,default=datetime.now)
+       loginbase = Field(String,default=None)
        hostname = Field(String,default=None)
-       loginbase = Field(String)
+       # NOTE:
+       #       the expected kinds of actions are:
+       #               * reboot node
+       #               * open ticket, send notice 
+       #               * close ticket
+       #               * apply penalty to site
+       #               * backoff penalty to site
+       action = Field(String)
+
+       # NOTE: describes the kind of action.  i.e. online-notice, offline-notice,
+       # reboot-first-try, reboot-second-try, penalty-pause, penalty-warning, penalty-no-create,
+       # penalty-disable-slices, 
+       action_type = Field(String, default=None)
+
+       message_id = Field(Integer, default=0)
+       penalty_level = Field(Integer, default=0)
+
+       # NOTE: in case an exception is thrown while trying to perform an action.
+       error_string = Field(String, default=None)
 
        #issue = ManyToOne('IssueRecord')
        # NOTE: this is the parent relation to fb records.  first create the
@@ -61,15 +80,15 @@ class ActionRecord(Entity):
        #  OR
        #    - find fbnode records
        #    - create action record with fbnodes as argument
-       findbad_records = OneToMany('FindbadNodeRecord', order_by='-date_checked')
+       findbad_records = OneToMany('FindbadNodeRecord', order_by='-date_checked')
 
        # NOTE: can I move 'message_index, escellation_level, and penalty_level'
        #    into the same value?  Maybe not penalty level, since there are only two;
        #    and, there may be additional message and escellation levels.
-       send_email_to = Field(PickleType, default=None)
-       action_description = Field(PickleType, default=None)
-       message_arguments = Field(PickleType, default=None)
+       #send_email_to = Field(PickleType, default=None)
+       #action_description = Field(PickleType, default=None)
+       #message_arguments = Field(PickleType, default=None)
 
        # NOTE: not sure this needs to be in the db.
-       escellation_level = Field(Integer, default=0)
-       stage = Field(String, default=None)
+       #escellation_level = Field(Integer, default=0)
+       #stage = Field(String, default=None)
index e58ef3a..66859b1 100644 (file)
@@ -80,7 +80,7 @@ class FindbadNodeRecord(Entity):
        observed_status = Field(String,default=None)
 
        # NOTE: this is the child relation
-       action = ManyToOne('ActionRecord', required=False)
+       #action = ManyToOne('ActionRecord', required=False)
 
 class FindbadPCURecord(Entity):
        @classmethod
index dc53860..e31be2e 100644 (file)
@@ -50,6 +50,13 @@ class HistorySiteRecord(Entity):
 
        status = Field(String,default="unknown")
 
+       message_id = Field(Int, default=0)
+       message_status = Field(String, default=None)
+       message_queue = Field(String, default=None) 
+       message_created = Field(DateTime, default=None)
+
+       penalty_level = Field(Int, default=0)
+
        @classmethod
        def by_loginbase(cls, loginbase):
                return cls.query.filter_by(loginbase=loginbase).first()
index a34c16c..289fb47 100755 (executable)
@@ -67,13 +67,13 @@ def reboot_str(nodename):
        if not pcu:
                logger.debug("no pcu for %s" % nodename)
                print "no pcu for %s" % nodename
-               return False # "%s has no pcu" % nodename
+               return "%s has no pcu" % nodename
 
        values = get_pcu_values(pcu['pcu_id'])
        if values == None:
                logger.debug("No values for pcu probe %s" % nodename)
                print "No values for pcu probe %s" % nodename
-               return False #"no info for pcu_id %s" % pcu['pcu_id']
+               return "no info for pcu_id %s" % pcu['pcu_id']
        
        # Try the PCU first
        logger.debug("Trying PCU %s %s" % (pcu['hostname'], pcu['model']))
index d1bccaa..675068a 100644 (file)
@@ -207,6 +207,65 @@ ERROR-        This is an error state, where there is absolutely no contact
            with PlanetLab.
        """)
 
+       pcufailed_notice =("""Could not use PCU to reboot %(hostname)s""",
+
+"""As part of PlanetLab node monitoring and maintenance, we tried to use the PCU
+registered for %(hostname)s, but could not for some reason.
+
+Please help.
+
+Thank you very much for your help,
+  -- PlanetLab Central (support@planet-lab.org)
+""")
+       online_notice=("""Host %(hostname)s is online""",
+       """
+This notice is simply to let you know that:
+    %(hostname)s
+
+is online and operational.  Thank you very much for your help!
+       """)
+       test_notice=("""Host %(hostname)s is testing""",
+       """
+This notice is simply to test whether notices work.
+    %(hostname)s
+
+Thank you very much for your help!
+       """)
+       offline_notice=("""Host %(hostname)s is offline""",
+       """
+This notice is simply to let you know that:
+    %(hostname)s
+
+is offline and or non-operational.  Please investigate, thank you very much for your help!
+       """)
+
+       clear_penalty=("""All penalties have been cleared from site %(loginbase)s""",
+       """
+This notice is to let you know that any penalties previously applied to your site have 
+been removed: %(penalty_level)s.
+
+All privileges have been restored.  If your slices were disabled, please allow
+up to 30 minutes for them to return to enabled.
+
+Legend:
+
+  0  - no penalties applied
+  1  - site is disabled.  no new slices can be created.
+  2+ - all existing slices will be disabled.
+       """)
+
+       increase_penalty=("""Penalty increased for site %(loginbase)s""",
+       """
+This notice is to let you know that the penalty applied to your site has
+increased: %(penalty_level)s.
+
+legend:
+
+  0  - no penalty applied
+  1  - site is disabled.  no new slices can be created.
+  2+ - all existing slices will be disabled.
+       """)
+
        nmreset =("""NM Reset at %(loginbase)s""",
        """
 Monitor restarted NM on the following machines:
index 3efd791..db71b16 100755 (executable)
@@ -90,7 +90,7 @@ def init():
        api = plc.getCachedAuthAPI()
        l_sites = api.GetSites({'peer_id':None}, 
                                                        ['login_base', 'site_id', 'abbreviated_name', 'latitude', 
-                                                       'longitude', 'max_slices', 'slice_ids', 'node_ids', 'enabled' ])
+                                                       'longitude', 'max_slices', 'slice_ids', 'node_ids', 'enabled', 'date_created' ])
        l_nodes = api.GetNodes({'peer_id':None}, 
                                                        ['hostname', 'node_id', 'ports', 'site_id', 'version', 'last_updated', 
                                                         'date_created', 'last_contact', 'pcu_ids', 'nodenetwork_ids'])
index 767a4fe..90c3be0 100755 (executable)
@@ -22,33 +22,77 @@ api = plc.getAuthAPI()
 
 round = 1
 count = 0
+def main():
+       main2(config)
 
-def main(config):
+def main2(config):
 
        l_plcnodes = plccache.l_nodes
        l_nodes = get_nodeset(config)
        
        checkAndRecordState(l_nodes, l_plcnodes)
 
+# Node states:
+
+def check_node_state(rec, node):
+
+       node_state = rec.observed_status
+       if rec.plc_node_stats:
+               boot_state = rec.plc_node_stats['boot_state']
+               last_contact = rec.plc_node_stats['last_contact']
+       else:
+               boot_state = "unknown"
+               last_contact = None
+
+       if node_state == 'DOWN' and ( node.status == 'online' or node.status == 'good' ):
+               print "changed status from %s to offline" % node.status
+               node.status = 'offline'
+               node.last_changed = datetime.now()
+
+       if node_state == 'BOOT' and changed_lessthan(node.last_changed, 0.5) and node.status != 'online':
+               print "changed status from %s to online" % node.status
+               node.status = 'online'
+               node.last_changed = datetime.now()
+
+       if node.status == 'online' and changed_greaterthan(node.last_changed, 0.5):
+               #send thank you notice, or on-line notice.
+               print "changed status from %s to good" % node.status
+               node.status = 'good'
+               # NOTE: do not reset last_changed, or you lose how long it's been up.
+
+       #if node.status == 'offline' and changed_greaterthan(node.last_changed, 1): #  and pcu.status == 'good' 
+       #       # attempt reboots
+       #       pass
+       #if node.status == 'offline' and changed_greaterthan(node.last_changed, 1.5): # and node.has_pcu
+       #       # send PCU failure message
+       #       pass
+
+       if node.status == 'offline' and changed_greaterthan(node.last_changed, 2):
+               print "changed status from %s to down" % node.status
+               # send down node notice
+               node.status = 'down'
+               node.last_changed = datetime.now()
+
+       if ( boot_state == 'disabled' or last_contact == None ) and \
+                       changed_greaterthan(node.last_changed, 2*30) and \
+                       node.status != 'down':
+               print "changed status from %s to down" % node.status
+               node.status = 'down'
+               node.last_changed = datetime.now()
+
 def checkAndRecordState(l_nodes, l_plcnodes):
        global count
 
        for nodename in l_nodes:
-               d_node = None
-               for node in l_plcnodes:
-                       if node['hostname'] == nodename:
-                               d_node = node
-                               break
-               if not d_node:
-                       continue
 
-               pf = HistoryNodeRecord.findby_or_create(hostname=nodename)
-               pf.last_checked = datetime.now()
+               nodehist = HistoryNodeRecord.findby_or_create(hostname=nodename, 
+                                                       if_new_set={'status' : 'offline', 
+                                                                               'last_changed' : datetime.now()})
+               nodehist.last_checked = datetime.now()
 
                try:
                        # Find the most recent record
                        noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==nodename).order_by(FindbadNodeRecord.date_checked.desc()).first()
-                       #print "NODEREC: ", noderec.date_checked
                except:
                        print "COULD NOT FIND %s" % nodename
                        import traceback
@@ -59,33 +103,16 @@ def checkAndRecordState(l_nodes, l_plcnodes):
                        print "none object for %s"% nodename
                        continue
 
-               node_state = noderec.observed_status
-               if noderec.plc_node_stats:
-                       boot_state = noderec.plc_node_stats['boot_state']
-               else:
-                       boot_state = "unknown"
-
-               if node_state == "BOOT":
-                       if pf.status != "good": 
-                               pf.last_changed = datetime.now()
-                               pf.status = "good"
-               elif node_state == "DEBUG":
-                       if pf.status != boot_state: 
-                               pf.last_changed = datetime.now()
-                               pf.status = boot_state
-               else:
-                       if pf.status != "down": 
-                               pf.last_changed = datetime.now()
-                               pf.status = "down"
+               check_node_state(noderec, nodehist)
 
                count += 1
-               print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(time.mktime(pf.last_changed.timetuple())))
+               print "%d %35s %s since(%s)" % (count, nodename, nodehist.status, diff_time(time.mktime(nodehist.last_changed.timetuple())))
 
        # NOTE: this commits all pending operations to the DB.  Do not remove, or
        # replace with another operations that also commits all pending ops, such
        # as session.commit() or flush() or something
-       print HistoryNodeRecord.query.count()
        session.flush()
+       print HistoryNodeRecord.query.count()
 
        return True
 
@@ -97,7 +124,7 @@ if __name__ == '__main__':
        config = parsermodule.parse_args(parser)
 
        try:
-               main(config)
+               main2(config)
        except Exception, err:
                import traceback
                print traceback.print_exc()
index 13fce72..5d14475 100755 (executable)
--- a/pcubad.py
+++ b/pcubad.py
@@ -4,6 +4,7 @@ import os
 import sys
 import string
 import time
+import sets
 from datetime import datetime,timedelta
 
 from monitor import database
@@ -21,12 +22,23 @@ from monitor.model import *
 
 api = plc.getAuthAPI()
 
-def main(config):
+def main():
+       main2(config)
+
+def main2(config):
 
        l_plcpcus = plccache.l_pcus 
 
        l_pcus = None
-       if config.pcu:
+       if config.site is not None:
+               site = api.GetSites(config.site)
+               l_nodes = api.GetNodes(site[0]['node_ids'], ['pcu_ids'])
+               pcus = []
+               for node in l_nodes:
+                       pcus += node['pcu_ids']
+               # clear out dups.
+               l_pcus = [pcu for pcu in sets.Set(pcus)]
+       elif config.pcu:
                for pcu in l_plcpcus:
                        if ( pcu['hostname'] is not None and config.pcu in pcu['hostname'] ) or \
                           ( pcu['ip'] is not None and config.pcu in pcu['ip'] ):
@@ -41,6 +53,38 @@ def main(config):
 
 hn2lb = plccache.plcdb_hn2lb
 
+def check_pcu_state(rec, pcu):
+
+       pcu_state = rec.reboot_trial_status
+
+       if ( pcu_state == 'NetDown' or pcu_state == 'Not_Run' or not ( pcu_state == 0 or pcu_state == "0" ) ) and \
+                       ( pcu.status == 'online' or pcu.status == 'good' ):
+               print "changed status from %s to offline" % pcu.status
+               pcu.status = 'offline'
+               pcu.last_changed = datetime.now()
+
+       if ( pcu_state == 0 or pcu_state == "0" ) and changed_lessthan(pcu.last_changed, 0.5) and pcu.status != 'online':
+               print "changed status from %s to online" % pcu.status
+               pcu.status = 'online'
+               pcu.last_changed = datetime.now()
+
+       if pcu.status == 'online' and changed_greaterthan(pcu.last_changed, 0.5):
+               #send thank you notice, or on-line notice.
+               print "changed status from %s to good" % pcu.status
+               pcu.status = 'good'
+               # NOTE: do not reset last_changed, or you lose how long it's been up.
+
+       if pcu.status == 'offline' and changed_greaterthan(pcu.last_changed, 2):
+               # send down pcu notice
+               print "changed status from %s to down" % pcu.status
+               pcu.status = 'down'
+               pcu.last_changed = datetime.now()
+
+       if ( pcu.status == 'offline' or pcu.status == 'down' ) and changed_greaterthan(pcu.last_changed, 2*30):
+               print "changed status from %s to down" % pcu.status
+               pcu.status = 'down'
+               pcu.last_changed = datetime.now()
+
 def checkAndRecordState(l_pcus, l_plcpcus):
        count = 0
        for pcuname in l_pcus:
@@ -53,63 +97,52 @@ def checkAndRecordState(l_pcus, l_plcpcus):
                if not d_pcu:
                        continue
 
-               pf = HistoryPCURecord.findby_or_create(plc_pcuid=d_pcu['pcu_id'])
-               pf.last_checked = datetime.now()
+               pcuhist = HistoryPCURecord.findby_or_create(plc_pcuid=d_pcu['pcu_id'], 
+                                                                       if_new_set={'status' : 'offline', 
+                                                                                               'last_changed' : datetime.now()})
+               pcuhist.last_checked = datetime.now()
 
                try:
                        # Find the most recent record
                        pcurec = FindbadPCURecord.query.filter(FindbadPCURecord.plc_pcuid==pcuname).order_by(FindbadPCURecord.date_checked.desc()).first()
-                       print "NODEREC: ", pcurec.date_checked
                except:
-                       print "COULD NOT FIND FB record for %s" % reboot.pcu_name(pcu)
+                       print "COULD NOT FIND FB record for %s" % reboot.pcu_name(d_pcu)
                        import traceback
                        print traceback.print_exc()
                        # don't have the info to create a new entry right now, so continue.
                        continue 
 
-               pcu_state      = pcurec.reboot_trial_status
-               current_state = pcu_state
-
-               if current_state == 0 or current_state == "0":
-                       if pf.status != "good": 
-                               pf.last_changed = datetime.now() 
-                               pf.status = "good"
-               elif current_state == 'NetDown':
-                       if pf.status != "netdown": 
-                               pf.last_changed = datetime.now()
-                               pf.status = "netdown"
-               elif current_state == 'Not_Run':
-                       if pf.status != "badconfig": 
-                               pf.last_changed = datetime.now()
-                               pf.status = "badconfig"
-               else:
-                       if pf.status != "error": 
-                               pf.last_changed = datetime.now()
-                               pf.status = "error"
+               if not pcurec:
+                       print "none object for pcu %s"% reboot.pcu_name(d_pcu)
+                       continue
+
+               check_pcu_state(pcurec, pcuhist)
 
                count += 1
-               print "%d %35s %s since(%s)" % (count, reboot.pcu_name(d_pcu), pf.status, diff_time(time.mktime(pf.last_changed.timetuple())))
+               print "%d %35s %s since(%s)" % (count, reboot.pcu_name(d_pcu), pcuhist.status, diff_time(time.mktime(pcuhist.last_changed.timetuple())))
 
        # NOTE: this commits all pending operations to the DB.  Do not remove, or
        # replace with another operations that also commits all pending ops, such
        # as session.commit() or flush() or something
-       print HistoryPCURecord.query.count()
        session.flush()
+       print HistoryPCURecord.query.count()
 
        return True
 
 if __name__ == '__main__':
        parser = parsermodule.getParser()
-       parser.set_defaults(filename=None, pcu=None, pcuselect=False, pcugroup=None, cachepcus=False)
+       parser.set_defaults(filename=None, pcu=None, site=None, pcuselect=False, pcugroup=None, cachepcus=False)
        parser.add_option("", "--pcu", dest="pcu", metavar="hostname", 
                                                help="Provide a single pcu to operate on")
+       parser.add_option("", "--site", dest="site", metavar="sitename", 
+                                               help="Provide a single sitename to operate on")
        parser.add_option("", "--pculist", dest="pculist", metavar="file.list", 
                                                help="Provide a list of files to operate on")
 
        config = parsermodule.parse_args(parser)
 
        try:
-               main(config)
+               main2(config)
        except Exception, err:
                import traceback
                print traceback.print_exc()
index 15c8f1d..5a2f3be 100755 (executable)
@@ -9,7 +9,7 @@ from datetime import datetime,timedelta
 from monitor import database
 from monitor import parser as parsermodule
 from monitor import config
-from monitor.database.info.model import HistorySiteRecord, FindbadNodeRecord, session
+from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session
 from monitor.wrapper import plc, plccache
 from monitor.const import MINUP
 
@@ -36,33 +36,52 @@ def main2(config):
        
        checkAndRecordState(l_sites, l_plcsites)
 
-def getnewsite(nodelist):
-       new = True
-       for node in nodelist:
-               try:
-                       noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first()
-                       if noderec is not None and \
-                               noderec.plc_node_stats['last_contact'] != None:
-                               new = False
-               except:
-                       import traceback
-                       print traceback.print_exc()
-       return new
-
 def getnodesup(nodelist):
        up = 0
        for node in nodelist:
                try:
-                       noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first()
-                       #noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node['hostname'], 
-                       #                                                                  orderBy='date_checked').reversed()[0]
-                       if noderec is not None and noderec.observed_status == "BOOT":
+                       nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname'])
+                       if nodehist is not None and nodehist.status == "good":
                                up = up + 1
                except:
                        import traceback
                        print traceback.print_exc()
        return up
 
+def check_site_state(rec, sitehist):
+
+       if sitehist.new and sitehist.status != 'new':
+               sitehist.status = 'new'
+               sitehist.last_changed = datetime.now()
+
+       if not sitehist.new:
+
+               if sitehist.nodes_up >= MINUP:
+
+                       if sitehist.status != 'online' and sitehist.status != 'good':
+                               sitehist.last_changed = datetime.now()
+
+                       if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online':
+                               print "changed status from %s to online" % sitehist.status
+                               sitehist.status = 'online'
+
+                       if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good':
+                               print "changed status from %s to good" % sitehist.status
+                               sitehist.status = 'good'
+       
+               else: # sitehist.nodes_up < MINUP:
+
+                       if sitehist.status != 'offline' and sitehist.status != 'down':
+                               sitehist.last_changed = datetime.now()
+
+                       if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline':
+                               print "changed status from %s to offline" % sitehist.status
+                               sitehist.status = 'offline'
+
+                       if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down':
+                               print "changed status from %s to down" % sitehist.status
+                               sitehist.status = 'down'
+
 def checkAndRecordState(l_sites, l_plcsites):
        count = 0
        lb2hn = plccache.plcdb_lb2hn
@@ -76,27 +95,32 @@ def checkAndRecordState(l_sites, l_plcsites):
                        continue
 
                if sitename in lb2hn:
-                       pf = HistorySiteRecord.findby_or_create(loginbase=sitename)
-
-                       pf.last_checked = datetime.now()
-                       pf.slices_total = d_site['max_slices']
-                       pf.slices_used = len(d_site['slice_ids'])
-                       pf.nodes_total = len(lb2hn[sitename])
-                       pf.nodes_up = getnodesup(lb2hn[sitename])
-                       pf.new = getnewsite(lb2hn[sitename])
-                       pf.enabled = d_site['enabled']
-
-                       if pf.nodes_up >= MINUP:
-                               if pf.status != "good": pf.last_changed = datetime.now()
-                               pf.status = "good"
-                       else:
-                               if pf.status != "down": pf.last_changed = datetime.now()
-                               pf.status = "down"
+                       sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename,
+                                                                                               if_new_set={'status' : 'unknown', 
+                                                                                                                       'last_changed' : datetime.now(),
+                                                                                                                       'message_id': 0,
+                                                                                                                       'penalty_level' : 0})
+                       sitehist.last_checked = datetime.now()
+
+                       sitehist.slices_total = d_site['max_slices']
+                       sitehist.slices_used = len(d_site['slice_ids'])
+                       sitehist.nodes_total = len(lb2hn[sitename])
+                       if sitehist.message_id != 0:
+                               rtstatus = mailer.getTicketStatus(sitehist.message_id)
+                               sitehist.message_status = rtstatus['Status']
+                               sitehist.message_queue = rtstatus['Queue']
+                               sitehist.message_created = datetime.fromtimestamp(rtstatus['Created'])
+
+                       sitehist.nodes_up = getnodesup(lb2hn[sitename])
+                       sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago
+                       sitehist.enabled = d_site['enabled']
+
+                       check_site_state(d_site, sitehist)
 
                        count += 1
-                       print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
-                                                                                       pf.nodes_total, pf.nodes_up, pf.status)
-                       pf.flush()
+                       print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, sitehist.slices_used, 
+                                                                                       sitehist.nodes_total, sitehist.nodes_up, sitehist.status)
+                       sitehist.flush()
 
        print HistorySiteRecord.query.count()
        session.flush()
index a95f3d6..2112979 100644 (file)
@@ -389,7 +389,7 @@ class Root(controllers.RootController):
 
        @expose(template="monitorweb.templates.sitelist")
        def site(self, filter='all'):
-               filtercount = {'good' : 0, 'down': 0, 'new' : 0, 'pending' : 0, 'all' : 0}
+               filtercount = {'good' : 0, 'down': 0, 'online':0, 'offline' : 0, 'new' : 0, 'pending' : 0, 'all' : 0}
                fbquery = HistorySiteRecord.query.all()
                query = []
                for site in fbquery:
index df07184..473a4d9 100644 (file)
@@ -102,9 +102,16 @@ a.right { float: right; }
 #status-error  { background-color: indianred; }\r
 #status-none   { background-color: white; }\r
 \r
+#site-new { background-color: gold; }\r
 #site-good { background-color : darkseagreen; }\r
+#site-online { background-color : lightgreen; }\r
+#site-offline { background-color: red; }\r
 #site-down { background-color: indianred; }\r
 \r
+#site-0 { background-color : white; }\r
+#site-1 { background-color: gold; }\r
+#site-2 { background-color: indianred; }\r
+\r
 #node-BOOT { background-color: darkseagreen; }\r
 #node-DOWN { background-color: indianred; }\r
 #node-DEBUG { background-color: gold; }\r
index 9740ea0..c9e1bd4 100644 (file)
@@ -30,7 +30,7 @@ from links import *
                                                        <span class="icon">${site.loginbase}</span></a>
                                        </td>
                                        <td py:content="site.enabled"></td>
-                                       <td>n/a</td>
+                                       <td id="site-${site.penalty_level}">${site.penalty_level}</td>
                                        <td>${site.slices_used}/${site.slices_total}</td>
                                        <td>${site.nodes_up} / ${site.nodes_total}</td>
                                        <td id="site-${site.status}" py:content="diff_time(mktime(site.last_changed.timetuple()))"></td>
index a9b7685..a2bac31 100644 (file)
@@ -46,7 +46,7 @@ from links import *
                                                </div>
                                        </td>
                                        <td py:content="site.enabled"></td>
-                                       <td>n/a</td>
+                                       <td id="site-${site.penalty_level}">${site.penalty_level}</td>
                                        <td>${site.slices_used}/${site.slices_total}</td>
                                        <td>${site.nodes_up} / ${site.nodes_total}</td>
                                        <td id="site-${site.status}" py:content="diff_time(mktime(site.last_changed.timetuple()))"></td>