just commit everything...
authorStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 9 Oct 2008 20:58:59 +0000 (20:58 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Thu, 9 Oct 2008 20:58:59 +0000 (20:58 +0000)
bootman.py
config.py
moncommands.py
monitor/database.py
nodebad.py
nodequery.py
pcubad.py
reboot.py
sitebad.py
www/gadgets/sitemonitor.py
www/runlevels.py

index ff2a6d5..e7a47c3 100755 (executable)
@@ -505,6 +505,7 @@ def reboot(hostname, config=None, forced_action=None):
                        ('nodehostname' , 'Configured node hostname does not resolve'),
                        ('implementerror', 'Implementation Error'),
                        ('readonlyfs'   , '[Errno 30] Read-only file system'),
+                       ('baddisk'      , "IOError: [Errno 13] Permission denied: '/tmp/mnt/sysimg//vservers/\w+/etc/hosts'"),
                        ('noinstall'    , 'notinstalled'),
                        ('bziperror'    , 'bzip2: Data integrity error when decompressing.'),
                        ('noblockdev'   , "No block devices detected."),
@@ -514,6 +515,7 @@ def reboot(hostname, config=None, forced_action=None):
                        ('hardwarerequirefail' , 'Hardware requirements not met'),
                        ('mkfsfail'         , 'while running: Running mkfs.ext2 -q  -m 0 -j /dev/planetlab/vservers failed'),
                        ('nofilereference', "No such file or directory: '/tmp/mnt/sysimg//vservers/.vref/planetlab-f8-i386/etc/hosts'"),
+                       ('kernelcopyfail', "cp: cannot stat `/tmp/mnt/sysimg/boot/kernel-boot': No such file or directory"),
                        ('chrootfail'   , 'Running chroot /tmp/mnt/sysimg'),
                        ('modulefail'   , 'Unable to get list of system modules'),
                        ('writeerror'   , 'write error: No space left on device'),
@@ -583,6 +585,7 @@ def reboot(hostname, config=None, forced_action=None):
                        "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-update3-implementerror-nofilereference-update-debug-done",
                        "bminit-cfg-auth-getplc-update-hardware-installinit-installdisk-exception-mkfsfail-update-debug-done",
                        "bminit-cfg-auth-getplc-installinit-validate-rebuildinitrd-exception-chrootfail-update-debug-done",
+                       "bminit-cfg-auth-getplc-update-installinit-validate-rebuildinitrd-netcfg-disk-update4-exception-chrootfail-update-debug-done",
                        "bminit-cfg-auth-getplc-installinit-validate-exception-noinstall-update-debug-done",
                        ]:
                sequences.update({n : "restart_bootmanager_rins"})
index b37e04a..0cde6f3 100644 (file)
--- a/config.py
+++ b/config.py
@@ -50,6 +50,7 @@ if not config.imported:
        options = Options()
        update_section(options, 'commandline', True)
        update_section(options, 'monitorconfig')
+       update_section(options, 'monitordatabase')
 
 #for i in dir(config):
 #      if "__" not  in i:
index 869cc96..1b67570 100644 (file)
@@ -66,16 +66,14 @@ class CMD:
                o_value = ""
                e_value = ""
 
-               #print "reading from f_out"
-               if len(lout) > 0: o_value = f_out.read()
-               #print "reading from f_err"
-               if len(lerr) > 0: e_value = f_err.read()
+               o_value = f_out.read()
+               e_value = f_err.read()
 
                #print "striping output"
                o_value = o_value.strip()
                e_value = e_value.strip()
 
-               #print "OUTPUT", o_value, e_value
+               #print "OUTPUT -%s-%s-" % (o_value, e_value)
 
                #print "closing files"
                f_out.close()
index 3b5bd65..e127791 100644 (file)
@@ -3,8 +3,8 @@ import sys
 import pickle
 noserial=False
 try:
-       from PHPSerialize import *
-       from PHPUnserialize import *
+       from util.PHPSerialize import *
+       from util.PHPUnserialize import *
 except:
        #print >>sys.stderr, "PHPSerial db type not allowed."
        noserial=True
@@ -15,7 +15,7 @@ import config
 import config as monitorconfig
 
 DEBUG= 0
-PICKLE_PATH=monitorconfig.MONITOR_DATA_ROOT
+PICKLE_PATH=config.MONITOR_DATA_ROOT
 
 
 def dbLoad(name, type=None):
@@ -90,6 +90,7 @@ class SPickle:
                Otherwise, it's normal mode, if the file doesn't exist, raise error
                Load the file
                """
+               print "loading %s" % name
 
                if config.debug:
                        if self.exists("debug.%s" % name, type):
index 0130c3e..8d7650c 100755 (executable)
@@ -12,6 +12,14 @@ import threadpool
 import syncplcdb
 from nodequery import verify,query_to_dict,node_select
 from nodecommon import *
+from datetime import datetime,timedelta
+import config
+
+from sqlobject import connectionForURI,sqlhub
+connection = connectionForURI(config.sqlobjecturi)
+sqlhub.processConnection = connection
+from infovacuum.model_findbadrecord import *
+from infovacuum.model_historyrecord import *
 
 import plc
 api = plc.getAuthAPI()
@@ -19,129 +27,71 @@ from unified_model import *
 from const import MINUP
 
 round = 1
-externalState = {'round': round, 'nodes': {}}
 count = 0
 
 def main(config):
-       global externalState
-       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
-       if config.increment:
-               # update global round number to force refreshes across all nodes
-               externalState['round'] += 1
 
        l_nodes = syncplcdb.create_plcdb()
        l_plcnodes = database.dbLoad("l_plcnodes")
-
        l_nodes = get_nodeset(config)
-       #if config.node:
-       #       l_nodes = [config.node]
-       ##else:
-       #       l_nodes = [node['hostname'] for node in l_plcnodes]
        
        checkAndRecordState(l_nodes, l_plcnodes)
 
 def checkAndRecordState(l_nodes, l_plcnodes):
-       global externalState
        global count
-       global_round = externalState['round']
 
        for nodename in l_nodes:
-               if nodename not in externalState['nodes']:
-                       externalState['nodes'][nodename] = {'round': 0, 'values': []}
-
-               node_round   = externalState['nodes'][nodename]['round']
-               if node_round < global_round:
-                       # do work
-                       values = collectStatusAndState(nodename, l_plcnodes)
-                       global_round = externalState['round']
-                       externalState['nodes'][nodename]['values'] = values
-                       externalState['nodes'][nodename]['round'] = global_round
+               d_node = None
+               for node in l_plcnodes:
+                       if node['hostname'] == nodename:
+                               d_node = node
+                               break
+               if not d_node:
+                       continue
+
+               try:
+                       pf = HistoryNodeRecord.by_hostname(nodename)
+               except:
+                       pf = HistoryNodeRecord(hostname=nodename)
+
+               pf.last_checked = datetime.now()
+
+               try:
+                       # Find the most recent record
+                       noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==nodename, 
+                                                                                          orderBy='date_checked').reversed()[0]
+               except:
+                       # or create an empty one.
+                       noderec = FindbadNodeRecord(hostname=nodename)
+
+               node_state = noderec.observed_status
+               if noderec.plc_node_stats:
+                       boot_state = noderec.plc_node_stats['boot_state']
                else:
-                       count += 1
-
-               if count % 20 == 0:
-                       database.dbDump(config.dbname, externalState)
-
-       database.dbDump(config.dbname, externalState)
-
-fb = database.dbLoad('findbad')
-
-def getnodesup(nodelist):
-       up = 0
-       for node in nodelist:
-               if node['hostname'] in fb['nodes'].keys():
-                       try:
-                               if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
-                                       up = up + 1
-                       except:
-                               pass
-       return up
-
-def get(fb, path):
-       indexes = path.split("/")
-       values = fb
-       for index in indexes:
-               if index in values:
-                       values = values[index]
+                       boot_state = "unknown"
+
+               if node_state == "BOOT":
+                       if pf.status != "good": 
+                               pf.last_changed = datetime.now()
+                               pf.status = "good"
+               elif node_state == "DEBUG":
+                       if pf.status != boot_state: 
+                               pf.last_changed = datetime.now()
+                               pf.status = boot_state
                else:
-                       return None
-       return values
+                       if pf.status != "down": 
+                               pf.last_changed = datetime.now()
+                               pf.status = "down"
 
-def collectStatusAndState(nodename, l_plcnodes):
-       global count
-
-       d_node = None
-       for node in l_plcnodes:
-               if node['hostname'] == nodename:
-                       d_node = node
-                       break
-       if not d_node:
-               return None
-
-       pf = PersistFlags(nodename, 1, db='node_persistflags')
-
-       if not pf.checkattr('last_changed'):
-               pf.last_changed = time.time()
-               
-       pf.last_checked = time.time()
-
-       if not pf.checkattr('status'):
-               pf.status = "unknown"
-
-       state_path     = "nodes/" + nodename + "/values/state"
-       bootstate_path = "nodes/" + nodename + "/values/plcnode/boot_state"
-
-       if get(fb, state_path) == "BOOT":
-               if pf.status != "good": pf.last_changed = time.time()
-               pf.status = "good"
-       elif get(fb, state_path)  == "DEBUG":
-               bs = get(fb, bootstate_path)
-               if pf.status != bs: pf.last_changed = time.time()
-               pf.status = bs
-       else:
-               if pf.status != "down": pf.last_changed = time.time()
-               pf.status = "down"
-
-       count += 1
-       print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(pf.last_changed))
-       # updated by other modules
-       #pf.enabled = 
-       #pf.suspended = 
-
-       pf.save()
+               count += 1
+               print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(time.mktime(pf.last_changed.timetuple())))
 
        return True
 
 if __name__ == '__main__':
        import parser as parsermodule
        parser = parsermodule.getParser(['nodesets'])
-       parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, 
-                                               increment=False, dbname="nodebad", cachenodes=False)
-       
-       parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
-                                               help="Specify the name of the database to which the information is saved")
-       parser.add_option("-i", "--increment", action="store_true", dest="increment", 
-                                               help="Increment round number to force refresh or retry")
+       parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, cachenodes=False)
        parser = parsermodule.getParser(['defaults'], parser)
        config = parsermodule.parse_args(parser)
 
@@ -151,6 +101,4 @@ if __name__ == '__main__':
                import traceback
                print traceback.print_exc()
                print "Exception: %s" % err
-               print "Saving data... exitting."
-               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index 16c0bad..691ead5 100755 (executable)
@@ -13,13 +13,22 @@ import os
 from reboot import pcu_name
 import reboot
 import util.file
+import traceback
 
 import time
 import re
 
+import config
+
+from sqlobject import connectionForURI,sqlhub
+connection = connectionForURI(config.sqlobjecturi)
+sqlhub.processConnection = connection
+from infovacuum.model.findbadrecord import *
+
 #fb = {}
 fb = None
 fbpcu = None
+import string
 
 class NoKeyException(Exception): pass
 
@@ -31,20 +40,26 @@ def daysdown_print_nodeinfo(fbnode, hostname):
        print "%(intdaysdown)5s %(hostname)-44s | %(state)10.10s | %(daysdown)s" % fbnode
 
 def fb_print_nodeinfo(fbnode, hostname, fields=None):
-       fbnode['hostname'] = hostname
-       fbnode['checked'] = diff_time(fbnode['checked'])
-       if fbnode['bootcd']:
-               fbnode['bootcd'] = fbnode['bootcd'].split()[-1]
+       #fbnode['hostname'] = hostname
+       #fbnode['checked'] = diff_time(fbnode['checked'])
+       if fbnode['bootcd_version']:
+               fbnode['bootcd_version'] = fbnode['bootcd_version'].split()[-1]
        else:
-               fbnode['bootcd'] = "unknown"
+               fbnode['bootcd_version'] = "unknown"
        fbnode['pcu'] = color_pcu_state(fbnode)
 
        if not fields:
-               if 'ERROR' in fbnode['category']:
-                       fbnode['kernel'] = ""
+               if ( fbnode['observed_status'] is not None and \
+                  'DOWN' in fbnode['observed_status'] ) or \
+                  fbnode['kernel_version'] is None:
+                       fbnode['kernel_version'] = ""
                else:
-                       fbnode['kernel'] = fbnode['kernel'].split()[2]
-               fbnode['boot_state'] = fbnode['plcnode']['boot_state']
+                       fbnode['kernel_version'] = fbnode['kernel_version'].split()[2]
+
+               if fbnode['plc_node_stats'] is not None:
+                       fbnode['boot_state'] = fbnode['plc_node_stats']['boot_state']
+               else:
+                       fbnode['boot_state'] = "unknown"
 
                try:
                        if len(fbnode['nodegroups']) > 0:
@@ -53,7 +68,7 @@ def fb_print_nodeinfo(fbnode, hostname, fields=None):
                        #print "ERROR!!!!!!!!!!!!!!!!!!!!!"
                        pass
 
-               print "%(hostname)-45s | %(checked)11.11s | %(boot_state)5.5s| %(state)8.8s | %(ssh)5.5s | %(pcu)6.6s | %(bootcd)6.6s | %(category)8.8s | %(kernel)s" % fbnode
+               print "%(hostname)-45s | %(date_checked)11.11s | %(boot_state)5.5s| %(observed_status)8.8s | %(ssh_status)5.5s | %(pcu)6.6s | %(bootcd_version)6.6s | %(kernel_version)s" % fbnode
        else:
                format = ""
                for f in fields:
@@ -133,6 +148,65 @@ def verifyType(constraints, data):
 
        return con_or_true
 
+def verifyDBrecord(constraints, record):
+       """
+               constraints is a list of key, value pairs.
+               # [ {... : ...}==AND , ... , ... , ] == OR
+       """
+       def has_key(obj, key):
+               try:
+                       x = obj.__getattribute__(key)
+                       return True
+               except:
+                       return False
+
+       def get_val(obj, key):
+               try:
+                       return obj.__getattribute__(key)
+               except:
+                       return None
+
+       def get(obj, path):
+               indexes = path.split("/")
+               value = get_val(obj,indexes[0])
+               if value is not None and len(indexes) > 1:
+                       for key in indexes[1:]:
+                               if key in value:
+                                       value = value[key]
+                               else:
+                                       raise NoKeyException(key)
+               return value
+
+       #print constraints, record
+
+       con_or_true = False
+       for con in constraints:
+               #print "con: %s" % con
+               if len(con.keys()) == 0:
+                       con_and_true = False
+               else:
+                       con_and_true = True
+
+               for key in con.keys():
+                       #print "looking at key: %s" % key
+                       if has_key(record, key):
+                               value_re = re.compile(con[key])
+                               if type([]) == type(get(record,key)):
+                                       local_or_true = False
+                                       for val in get(record,key):
+                                               local_or_true = local_or_true | (value_re.search(val) is not None)
+                                       con_and_true = con_and_true & local_or_true
+                               else:
+                                       if get(record,key) is not None:
+                                               con_and_true = con_and_true & (value_re.search(get(record,key)) is not None)
+                       else:
+                               print "missing key %s" % key,
+                               pass
+
+               con_or_true = con_or_true | con_and_true
+
+       return con_or_true
+
 def verify(constraints, data):
        """
                constraints is a list of key, value pairs.
@@ -156,12 +230,11 @@ def verify(constraints, data):
                                                local_or_true = local_or_true | (value_re.search(val) is not None)
                                        con_and_true = con_and_true & local_or_true
                                else:
-                                       con_and_true = con_and_true & (value_re.search(data[key]) is not None)
+                                       if data[key] is not None:
+                                               con_and_true = con_and_true & (value_re.search(data[key]) is not None)
                        elif key not in data:
                                print "missing key %s" % key,
                                pass
-                               #print "missing key %s" % key
-                               #con_and_true = False
 
                con_or_true = con_or_true | con_and_true
 
@@ -239,18 +312,21 @@ def node_select(str_query, nodelist=None, fbdb=None):
        for node in fb['nodes'].keys():
                if nodelist is not None: 
                        if node not in nodelist: continue
-       
-               fb_nodeinfo  = fb['nodes'][node]['values']
 
-               if fb_nodeinfo == []:
-                       #print node, "has lost values"
+               try:
+                       fb_noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node, 
+                                                                                          orderBy='date_checked').reversed()[0]
+               except:
                        continue
-                       #sys.exit(1)
-               fb_nodeinfo['pcu'] = color_pcu_state(fb_nodeinfo)
-               fb_nodeinfo['hostname'] = node
-               if 'plcnode' in fb_nodeinfo:
-                       fb_nodeinfo.update(fb_nodeinfo['plcnode'])
 
+               
+               fb_nodeinfo = fb_noderec.toDict()
+
+               #fb_nodeinfo['pcu'] = color_pcu_state(fb_nodeinfo)
+               #if 'plcnode' in fb_nodeinfo:
+               #       fb_nodeinfo.update(fb_nodeinfo['plcnode'])
+
+               #if verifyDBrecord(dict_query, fb_nodeinfo):
                if verify(dict_query, fb_nodeinfo):
                        #print node #fb_nodeinfo
                        hostnames.append(node)
@@ -300,6 +376,7 @@ def main():
                os.chdir("..")
                fb = archive.load(file[:-4])
        else:
+               fbnodes = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname, orderBy='date_checked',distinct=True).reversed()
                fb = database.dbLoad("findbad")
 
        fbpcu = database.dbLoad("findbadpcus")
@@ -329,7 +406,13 @@ def main():
                if node not in fb['nodes']:
                        continue
 
-               fb_nodeinfo  = fb['nodes'][node]['values']
+               try:
+                       # Find the most recent record
+                       fb_noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node, 
+                                                                                          orderBy='date_checked').reversed()[0]
+               except:
+                       print traceback.print_exc()
+                       pass #fb_nodeinfo  = fb['nodes'][node]['values']
 
                if config.list:
                        print node
@@ -337,6 +420,7 @@ def main():
                        if config.daysdown:
                                daysdown_print_nodeinfo(fb_nodeinfo, node)
                        else:
+                               fb_nodeinfo = fb_noderec.toDict()
                                if config.select:
                                        if config.fields:
                                                fields = config.fields.split(",")
index c782b9a..38cf897 100755 (executable)
--- a/pcubad.py
+++ b/pcubad.py
@@ -14,31 +14,32 @@ import syncplcdb
 from nodequery import verify,query_to_dict,node_select
 import parser as parsermodule
 from nodecommon import *
+from datetime import datetime,timedelta
+import config
+
+from sqlobject import connectionForURI,sqlhub
+connection = connectionForURI(config.sqlobjecturi)
+sqlhub.processConnection = connection
+from infovacuum.model_findbadrecord import *
+from infovacuum.model_historyrecord import *
 
 import plc
 api = plc.getAuthAPI()
 from unified_model import *
 from const import MINUP
 
-round = 1
-externalState = {'round': round, 'nodes': {}}
-count = 0
 
 def main(config):
-       global externalState
-       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
-       if config.increment:
-               # update global round number to force refreshes across all pcus
-               externalState['round'] += 1
 
        l_plcpcus = database.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
 
-       l_pcu = None
+       l_pcus = None
        if config.pcu:
                for pcu in l_plcpcus:
-                       if pcu['hostname'] == config.pcu  or pcu['ip'] == config.pcu:
+                       if ( pcu['hostname'] is not None and config.pcu in pcu['hostname'] ) or \
+                          ( pcu['ip'] is not None and config.pcu in pcu['ip'] ):
                                l_pcus = [pcu['pcu_id']]
-               if not l_pcu:
+               if not l_pcus:
                        print "ERROR: could not find pcu %s" % config.pcu
                        sys.exit(1)
        else:
@@ -46,108 +47,68 @@ def main(config):
        
        checkAndRecordState(l_pcus, l_plcpcus)
 
-def checkAndRecordState(l_pcus, l_plcpcus):
-       global externalState
-       global count
-       global_round = externalState['round']
+hn2lb = database.dbLoad("plcdb_hn2lb")
 
+def checkAndRecordState(l_pcus, l_plcpcus):
+       count = 0
        for pcuname in l_pcus:
-               if pcuname not in externalState['nodes']:
-                       externalState['nodes'][pcuname] = {'round': 0, 'values': []}
-
-               pcu_round   = externalState['nodes'][pcuname]['round']
-               if pcu_round < global_round:
-                       # do work
-                       values = collectStatusAndState(pcuname, l_plcpcus)
-                       global_round = externalState['round']
-                       externalState['nodes'][pcuname]['values'] = values
-                       externalState['nodes'][pcuname]['round'] = global_round
-               else:
-                       count += 1
-
-               if count % 20 == 0:
-                       database.dbDump(config.dbname, externalState)
 
-       database.dbDump(config.dbname, externalState)
-
-fbpcu = database.dbLoad('findbadpcus')
-hn2lb = database.dbLoad("plcdb_hn2lb")
-
-def get(fb, path):
-       indexes = path.split("/")
-       values = fb
-       for index in indexes:
-               if index in values:
-                       values = values[index]
+               d_pcu = None
+               for pcu in l_plcpcus:
+                       if pcu['pcu_id'] == pcuname:
+                               d_pcu = pcu
+                               break
+               if not d_pcu:
+                       continue
+
+               try:
+                       pf = HistoryPCURecord.by_pcuid(d_pcu['pcu_id'])
+               except:
+                       pf = HistoryPCURecord(plc_pcuid=pcuname)
+
+               pf.last_checked = datetime.now()
+
+               try:
+                       # Find the most recent record
+                       pcurec = FindbadPCURecord.select(FindbadPCURecord.q.plc_pcuid==pcuname, 
+                                                                                          orderBy='date_checked').reversed()[0]
+               except:
+                       # don't have the info to create a new entry right now, so continue.
+                       continue 
+
+               pcu_state      = pcurec.reboot_trial_status
+               current_state = pcu_state
+
+               if current_state == 0 or current_state == "0":
+                       if pf.status != "good": 
+                               pf.last_changed = datetime.now() 
+                               pf.status = "good"
+               elif current_state == 'NetDown':
+                       if pf.status != "netdown": 
+                               pf.last_changed = datetime.now()
+                               pf.status = "netdown"
+               elif current_state == 'Not_Run':
+                       if pf.status != "badconfig": 
+                               pf.last_changed = datetime.now()
+                               pf.status = "badconfig"
                else:
-                       return None
-       return values
-
-def collectStatusAndState(pcuname, l_plcpcus):
-       global count
-
-       d_pcu = None
-       for pcu in l_plcpcus:
-               if pcu['pcu_id'] == pcuname:
-                       d_pcu = pcu
-                       break
-       if not d_pcu:
-               return None
-
-       pf = PersistFlags(pcuname, 1, db='pcu_persistflags')
-
-       if not pf.checkattr('last_changed'):
-               pf.last_changed = time.time()
-               
-       pf.last_checked = time.time()
-
-       if not pf.checkattr('valid'):
-               pf.valid = "unknown"
-               pf.last_valid = 0
-
-       if not pf.checkattr('status'):
-               pf.status = "unknown"
-
-       state_path     = "nodes/id_" + str(pcuname) + "/values/reboot"
-       bootstate_path = "nodes/id_" + str(pcuname) + "/values/plcpcu/boot_state"
-
-       current_state = get(fbpcu, state_path)
-       if current_state == 0:
-               if pf.status != "good": pf.last_changed = time.time()
-               pf.status = "good"
-       elif current_state == 'NetDown':
-               if pf.status != "netdown": pf.last_changed = time.time()
-               pf.status = "netdown"
-       elif current_state == 'Not_Run':
-               if pf.status != "badconfig": pf.last_changed = time.time()
-               pf.status = "badconfig"
-       else:
-               if pf.status != "error": pf.last_changed = time.time()
-               pf.status = "error"
-
-       count += 1
-       print "%d %35s %s since(%s)" % (count, pcu_name(d_pcu), pf.status, diff_time(pf.last_changed))
-       # updated by other modules
-       #pf.enabled = 
-       #pf.suspended = 
+                       if pf.status != "error": 
+                               pf.last_changed = datetime.now()
+                               pf.status = "error"
 
-       pf.save()
+               count += 1
+               print "%d %35s %s since(%s)" % (count, pcu_name(d_pcu), pf.status, diff_time(time.mktime(pf.last_changed.timetuple())))
 
        return True
 
 if __name__ == '__main__':
        parser = parsermodule.getParser()
-       parser.set_defaults(filename=None, pcu=None, pcuselect=False, pcugroup=None, 
-                                               increment=False, dbname="pcubad", cachepcus=False)
+       parser.set_defaults(filename=None, pcu=None, pcuselect=False, pcugroup=None, cachepcus=False)
        parser.add_option("", "--pcu", dest="pcu", metavar="hostname", 
                                                help="Provide a single pcu to operate on")
        parser.add_option("", "--pculist", dest="pculist", metavar="file.list", 
                                                help="Provide a list of files to operate on")
 
-       parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
-                                               help="Specify the name of the database to which the information is saved")
-       parser.add_option("-i", "--increment", action="store_true", dest="increment", 
-                                               help="Increment round number to force refresh or retry")
        config = parsermodule.parse_args(parser)
 
        try:
@@ -156,6 +117,4 @@ if __name__ == '__main__':
                import traceback
                print traceback.print_exc()
                print "Exception: %s" % err
-               print "Saving data... exitting."
-               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index e876a76..f3f7f32 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -1235,6 +1235,8 @@ def reboot_policy(nodename, continue_probe, dryrun):
 
 def reboot_test(nodename, values, continue_probe, verbose, dryrun):
        rb_ret = ""
+       if 'plc_pcu_stats' in values:
+               values.update(values['plc_pcu_stats'])
 
        try:
                # DataProbe iPal (many sites)
index f55a4d3..750572a 100755 (executable)
@@ -11,22 +11,21 @@ import comon
 import threadpool
 import syncplcdb
 from nodequery import verify,query_to_dict,node_select
+from datetime import datetime,timedelta
+import config
+
+from sqlobject import connectionForURI,sqlhub
+connection = connectionForURI(config.sqlobjecturi)
+sqlhub.processConnection = connection
+from infovacuum.model.findbadrecord import *
+from infovacuum.model.historyrecord import *
 
 import plc
 api = plc.getAuthAPI()
 from unified_model import *
 from const import MINUP
 
-round = 1
-externalState = {'round': round, 'sites': {}}
-count = 0
-
 def main(config):
-       global externalState
-       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
-       if config.increment:
-               # update global round number to force refreshes across all nodes
-               externalState['round'] += 1
 
        l_nodes = syncplcdb.create_plcdb()
        l_plcsites = database.dbLoad("l_plcsites")
@@ -38,83 +37,52 @@ def main(config):
        
        checkAndRecordState(l_sites, l_plcsites)
 
-def checkAndRecordState(l_sites, l_plcsites):
-       global externalState
-       global count
-       global_round = externalState['round']
-
-       for sitename in l_sites:
-               if sitename not in externalState['sites']:
-                       externalState['sites'][sitename] = {'round': 0, 'values': []}
-
-               site_round   = externalState['sites'][sitename]['round']
-               if site_round < global_round:
-                       # do work
-                       values = collectStatusAndState(sitename, l_plcsites)
-                       global_round = externalState['round']
-                       externalState['sites'][sitename]['values'] = values
-                       externalState['sites'][sitename]['round'] = global_round
-               else:
-                       count += 1
-
-               if count % 20 == 0:
-                       database.dbDump(config.dbname, externalState)
-
-       database.dbDump(config.dbname, externalState)
-
-fb = database.dbLoad('findbad')
-lb2hn = database.dbLoad("plcdb_lb2hn")
-
 def getnodesup(nodelist):
        up = 0
        for node in nodelist:
-               if node['hostname'] in fb['nodes'].keys():
+               try:
+                       noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node['hostname'], 
+                                                                                          orderBy='date_checked').reversed()[0]
+                       if noderec.observed_status == "BOOT":
+                               up = up + 1
+               except:
+                       pass
+       return up
+
+def checkAndRecordState(l_sites, l_plcsites):
+       count = 0
+       lb2hn = database.dbLoad("plcdb_lb2hn")
+       for sitename in l_sites:
+               d_site = None
+               for site in l_plcsites:
+                       if site['login_base'] == sitename:
+                               d_site = site
+                               break
+               if not d_site:
+                       continue
+
+               if sitename in lb2hn:
                        try:
-                               if fb['nodes'][node['hostname']]['values']['state'] == "BOOT":
-                                       up = up + 1
+                               pf = HistorySiteRecord.by_loginbase(sitename)
                        except:
-                               pass
-       return up
+                               pf = HistorySiteRecord(loginbase=sitename)
 
-def collectStatusAndState(sitename, l_plcsites):
-       global count
-
-       d_site = None
-       for site in l_plcsites:
-               if site['login_base'] == sitename:
-                       d_site = site
-                       break
-       if not d_site:
-               return None
-
-       if sitename in lb2hn:
-               pf = PersistFlags(sitename, 1, db='site_persistflags')
-
-               if not pf.checkattr('last_changed'):
-                       pf.last_changed = time.time()
-               
-               pf.last_checked = time.time()
-               pf.nodes_total = len(lb2hn[sitename])
-               pf.slices_used = len(d_site['slice_ids'])
-               pf.nodes_up = getnodesup(lb2hn[sitename])
-               if not pf.checkattr('status'):
-                       pf.status = "unknown"
-
-               if pf.nodes_up >= MINUP:
-                       if pf.status != "good": pf.last_changed = time.time()
-                       pf.status = "good"
-               else:
-                       if pf.status != "down": pf.last_changed = time.time()
-                       pf.status = "down"
-
-               count += 1
-               print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
-                                                                               pf.nodes_total, pf.nodes_up, pf.status)
-               # updated by other modules
-               #pf.enabled = 
-               #pf.suspended = 
-
-               pf.save()
+                       pf.last_checked = datetime.now()
+
+                       pf.slices_used = len(d_site['slice_ids'])
+                       pf.nodes_total = len(lb2hn[sitename])
+                       pf.nodes_up = getnodesup(lb2hn[sitename])
+
+                       if pf.nodes_up >= MINUP:
+                               if pf.status != "good": pf.last_changed = datetime.now()
+                               pf.status = "good"
+                       else:
+                               if pf.status != "down": pf.last_changed = datetime.now()
+                               pf.status = "down"
+
+                       count += 1
+                       print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, 
+                                                                                       pf.nodes_total, pf.nodes_up, pf.status)
 
        return True
 
@@ -122,17 +90,14 @@ if __name__ == '__main__':
        import parser as parsermodule
 
        parser = parsermodule.getParser()
-       parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, 
-                                               increment=False, dbname="sitebad", cachenodes=False)
+       parser.set_defaults(filename=None, node=None, site=None, 
+                                               nodeselect=False, nodegroup=None, cachenodes=False)
+
        parser.add_option("", "--site", dest="site", metavar="login_base", 
                                                help="Provide a single site to operate on")
        parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", 
                                                help="Provide a list of files to operate on")
 
-       parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
-                                               help="Specify the name of the database to which the information is saved")
-       parser.add_option("-i", "--increment", action="store_true", dest="increment", 
-                                               help="Increment round number to force refresh or retry")
        config = parsermodule.parse_args(parser)
 
        try:
@@ -141,6 +106,4 @@ if __name__ == '__main__':
                import traceback
                print traceback.print_exc()
                print "Exception: %s" % err
-               print "Saving data... exitting."
-               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index e92a9cc..a52fec5 100755 (executable)
@@ -102,6 +102,7 @@ def main():
        
        if form.has_key('loginbase'):
                loginbase = form.getvalue('loginbase')
+               loginbase = loginbase.rstrip("_")
        else:
                loginbase = "undefined"
 
@@ -114,7 +115,10 @@ def main():
        r = TR()
 
        if loginbase not in lb2hn:
-               value = ("Select 'Edit settings' to enter your Site's loginbase.", "")
+               value = ("""Select 'Edit settings' to enter your Site's loginbase.<br><br>
+                                       The loginbase is the unchangable portion of your slicename.  
+                                       For instance, your slice follows the pattern loginbase_slicename.<br><br>
+                                       If this hint is unclear, then you can find your loginbase by visiting 'My Site' at <a target=_blank href=http://www.planet-lab.org>'PlanetLab.org'</a>""", "")
                r = TR(TD(value[0]))
                t.append(r)
        else:
index fe44423..a61426e 100755 (executable)
@@ -66,6 +66,8 @@ vals = {}
 vals['ssh'] = get_value('ssh')
 vals['state'] = get_value('state')
 vals['nm'] = get_value('nm')
+vals['dns'] = None
+vals['readonlyfs'] = None
 vals['plcnode/last_contact'] = None
 vals['comonstats/uptime'] = None
 vals['princeton_comon'] = get_value('princeton_comon')
@@ -82,7 +84,19 @@ for mynode in fb['nodes'].keys():
        row = []
        row.append(mynode)
        add=True
-       for key in ['ssh', 'state', 'plcnode/last_contact', 'nm', 'princeton_comon', 'princeton_comon_running', 'princeton_comon_procs', 'comonstats/uptime']:
+       if 'readonlyfs' in fbnode:
+               if 'Read-only file system' in fbnode['readonlyfs']:
+                       fbnode['readonlyfs'] = 'Y'
+               else:
+                       fbnode['readonlyfs'] = '_'
+
+       if 'dns' in fbnode:
+               if 'boot.planet-lab.org has address' in fbnode['dns']:
+                       fbnode['dns'] = '_'
+               else:
+                       fbnode['dns'] = 'N'
+                       
+       for key in ['ssh', 'state', 'plcnode/last_contact', 'readonlyfs', 'dns', 'nm', 'princeton_comon', 'princeton_comon_running', 'princeton_comon_procs', 'comonstats/uptime']:
                if get(fbnode, key) is None:
                        row.append('nokey')
                else:
@@ -116,7 +130,7 @@ packed_values.sort(rowcmp)
 
 t = TABLE(border=1)
 r = TR()
-for value in ['num', 'host', 'ssh', 'state', 'last<br>contact', 'NM', 'comon<br>dir', 'comon<br>vserver', 'comon<br>procs']:
+for value in ['num', 'host', 'ssh', 'state', 'last<br>contact', 'readonlyfs', 'dns', 'NM', 'comon<br>dir', 'comon<br>vserver', 'comon<br>procs']:
        r.append(TD(value))
 t.append(r)