This commit changes the 'soltesz.py' module into 'moncommands.py' and
authorStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 1 Aug 2008 20:48:32 +0000 (20:48 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Fri, 1 Aug 2008 20:48:32 +0000 (20:48 +0000)
'database.py'

Also, findbad*.py include a timeout that should allow them to exit even if the
'futex' bug gets hung somewhere.

Also, the mailer, rt, and others are updated to use monitorconfig.py as the
source of their username and password information rather than random files
here and there.  This also allows us to keep this information out of svn.

48 files changed:
action.py
automate_pl03.sh
blacklist.py
bootman.py
bwlimit.py
clean_policy.py
commands.py [deleted file]
comon.py
diagnose.py
dumpact.py
dumpdiag.py
findbad.py
findbadpcu.py
get_metasite_nodes.py
getnodekey.py
getnodes.py
grouprins.py
mailer.py
monitor.py
monitor_policy.py
nodeaction.py
nodebad.py
nodecommon.py
nodeconfig.py
nodediff.py
nodegroups.py
nodehistory.py
nodeinfo.py
nodequery.py
pcubad.py
pcuinfo.py
pkl2php.py
plc.py
policy.py
printbadcsv.py
printpdb.py
reboot.py
rt.py
rtinfo.py
showlatlon.py
sitebad.py
siteinfo.py
siteleave.py
soltesz.py
syncplcdb.py
ticket_blacklist.py
todo
unified_model.py

index 269007e..23e4508 100755 (executable)
--- a/action.py
+++ b/action.py
@@ -46,7 +46,7 @@ config.parse_args()
 import rt
 # Correlates input with policy to form actions
 import policy
-import soltesz
+import database
 import plc
 
 # Log to what 
@@ -140,7 +140,7 @@ def main():
        #########  GET NODES    ########################################
        logger.info('Get Nodes from PLC')
        print "getnode from plc"
-       l_plcnodes = soltesz.if_cached_else(True,
+       l_plcnodes = database.if_cached_else(True,
                                                                "l_plcnodes", 
                                                                lambda : plc.getNodes({'peer_id':None}))
 
@@ -168,15 +168,15 @@ def main():
 
        print "len of l_nodes: %d" % len(l_nodes)
        # Minus blacklisted ones..
-       l_ticket_blacklist = soltesz.if_cached_else(1,"l_ticket_blacklist",lambda : [])
+       l_ticket_blacklist = database.if_cached_else(1,"l_ticket_blacklist",lambda : [])
 
-       l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : [])
+       l_blacklist = database.if_cached_else(1, "l_blacklist", lambda : [])
        l_nodes  = filter(lambda x : not x['hostname'] in l_blacklist, l_nodes)
 
        #######  Get RT tickets    #########################################
        #logger.info('Get Tickets from RT')
-       #t = soltesz.MyTimer()
-       #ad_dbTickets = soltesz.if_cached_else(config.cachert, "ad_dbTickets", rt.rt_tickets)
+       #t = commands.MyTimer()
+       #ad_dbTickets = database.if_cached_else(config.cachert, "ad_dbTickets", rt.rt_tickets)
        #print "Getting tickets from RT took: %f sec" % t.diff() ; del t
 
        logger.info('Start Action thread')
index a7712b4..536914e 100755 (executable)
@@ -25,7 +25,7 @@ echo $$ > $HOME/monitor/SKIP
 #########################
 # 1. FINDBAD NODES 
 rm -f pdb/production.findbad2.pkl
-./findbad.py --increment --cachenodes --debug=0 --dbname="findbad2" $DATE
+./findbad.py --increment --cachenodes --debug=0 --dbname="findbad2" $DATE || :
 
 ps ax | grep BatchMode | grep -v grep | awk '{print $1}' | xargs kill || :
 
@@ -47,7 +47,7 @@ cp badcsv.txt /plc/data/var/www/html/monitor/
 #########################
 # 2. FINDBAD PCUS
 rm -f pdb/production.findbadpcus2.pkl
-./findbadpcu.py --increment --refresh --debug=0 --dbname=findbadpcus2 $DATE            
+./findbadpcu.py --increment --refresh --debug=0 --dbname=findbadpcus2 $DATE || :
 
 ./sitebad.py --increment || :
 ./nodebad.py --increment || :
@@ -72,8 +72,12 @@ for f in findbad act_all findbadpcus l_plcnodes site_persistflags node_persistfl
        cp pdb/production.$f.pkl archive-pdb/`date +%F-%H:%M`.production.$f.pkl
 done
 
-./grouprins.py --mail=1 --nodeselect 'state=DEBUG&&boot_state=dbg' \
-                                               --stopselect 'state=BOOT&&kernel=2.6.22.19-vs2.3.0.34.9.planetlab' \
-                                               --reboot || :
+./grouprins.py --mail=1 \
+       --nodeselect 'state=DEBUG&&boot_state=dbg||state=DEBUG&&boot_state=boot' \
+       --stopselect 'state=BOOT&&kernel=2.6.22.19-vs2.3.0.34.9.planetlab' \
+       --reboot || :
+
+# cache the RT db locally.
+python ./rt.py
 
 rm -f $HOME/monitor/SKIP
index 11e1cfc..c96dc89 100755 (executable)
@@ -4,7 +4,7 @@ import os
 import sys
 import string
 import time
-import soltesz
+import database
 import plc
 import getopt
 
@@ -20,7 +20,7 @@ def main():
                print "Error: " + err.msg
                sys.exit(1)
 
-       l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : [])
+       l_blacklist = database.if_cached_else(1, "l_blacklist", lambda : [])
 
        for (opt, optval) in opts:
                if opt in ["-d", "--delete"]:
@@ -44,7 +44,7 @@ def main():
                        l_blacklist.append(line)
 
        print "Total %d nodes in blacklist" % (len(l_blacklist))
-       soltesz.dbDump("l_blacklist")
+       database.dbDump("l_blacklist")
        
 if __name__ == '__main__':
        import os
index ce9bb6e..2fd161c 100755 (executable)
@@ -14,7 +14,8 @@ from getsshkeys import SSHKnownHosts
 
 import subprocess
 import time
-import soltesz
+import database
+import moncommands
 from sets import Set
 
 import ssh.pxssh as pxssh
@@ -23,6 +24,8 @@ import ssh.pexpect as pexpect
 from unified_model import *
 from emailTxt import mailtxt
 
+import monitorconfig
+
 import signal
 class Sopen(subprocess.Popen):
        def kill(self, signal = signal.SIGTERM):
@@ -33,7 +36,7 @@ from Rpyc import SocketConnection, Async
 from Rpyc.Utils import *
 
 def get_fbnode(node):
-       fb = soltesz.dbLoad("findbad")
+       fb = database.dbLoad("findbad")
        fbnode = fb['nodes'][node]['values']
        return fbnode
 
@@ -65,8 +68,8 @@ class NodeConnection:
 
        def dump_plconf_file(self):
                c = self.c
-               c.modules.sys.path.append("/tmp/source/")
-               c.modules.os.chdir('/tmp/source')
+               self.c.modules.sys.path.append("/tmp/source/")
+               self.c.modules.os.chdir('/tmp/source')
 
                log = c.modules.BootManager.log('/tmp/new.log')
                bm = c.modules.BootManager.BootManager(log,'boot')
@@ -92,8 +95,8 @@ class NodeConnection:
 
        def compare_and_repair_nodekeys(self):
                c = self.c
-               c.modules.sys.path.append("/tmp/source/")
-               c.modules.os.chdir('/tmp/source')
+               self.c.modules.sys.path.append("/tmp/source/")
+               self.c.modules.os.chdir('/tmp/source')
 
                log = c.modules.BootManager.log('/tmp/new.log')
                bm = c.modules.BootManager.BootManager(log,'boot')
@@ -201,7 +204,7 @@ class PlanetLabSession:
                args['port'] = self.port
                args['user'] = 'root'
                args['hostname'] = self.node
-               args['monitordir'] = "/home/soltesz/monitor"
+               args['monitordir'] = monitorconfig.MONITOR_SCRIPT_ROOT
                ssh_port = 22
 
                if self.nosetup:
@@ -209,11 +212,11 @@ class PlanetLabSession:
                        return 
 
                # COPY Rpyc files to host
-               cmd = "rsync -qv -az -e ssh %(monitordir)s/Rpyc-2.45-2.3/ %(user)s@%(hostname)s:Rpyc 2> /dev/null" % args
+               cmd = "rsync -qv -az -e ssh %(monitordir)s/Rpyc/ %(user)s@%(hostname)s:Rpyc 2> /dev/null" % args
                if self.verbose: print cmd
                # TODO: Add timeout
                timeout = 120
-               localos = soltesz.CMD()
+               localos = moncommands.CMD()
 
                ret = localos.system(cmd, timeout)
                print ret
@@ -230,7 +233,7 @@ class PlanetLabSession:
 
                t1 = time.time()
                # KILL any already running servers.
-               ssh = soltesz.SSH(args['user'], args['hostname'], ssh_port)
+               ssh = moncommands.SSH(args['user'], args['hostname'], ssh_port)
                (ov,ev) = ssh.run_noexcept2("""<<\EOF
             rm -f out.log
             echo "kill server" >> out.log
@@ -270,7 +273,7 @@ EOF""")
                # TODO: the read() here may block indefinitely.  Need a better
                # approach therefore, that includes a timeout.
                #ret = self.command.stdout.read(5)
-               ret = soltesz.read_t(self.command.stdout, 5)
+               ret = moncommands.read_t(self.command.stdout, 5)
 
                t2 = time.time()
                if 'READY' in ret:
index 09d3167..6b93156 100755 (executable)
@@ -4,7 +4,6 @@ import os
 import sys
 import string
 import time
-import soltesz
 import plc
 
 bwlimit = {}
index dba9b9b..5e9f625 100644 (file)
@@ -1,7 +1,7 @@
 from config import config
 #print "policy"
 config = config()
-import soltesz
+import database 
 import time
 import mailer
 from www.printbadnodes import cmpCategoryVal
@@ -29,12 +29,12 @@ class MonitorMergeDiagnoseSendEscellate:
                self.act = act
                self.plcdb_hn2lb = None
                if self.plcdb_hn2lb is None:
-                       self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+                       self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
                self.loginbase = self.plcdb_hn2lb[self.hostname]
                return
 
        def getFBRecord(self):
-               fb = soltesz.dbLoad("findbad")
+               fb = database.dbLoad("findbad")
                if self.hostname in fb['nodes']:
                        fbnode = fb['nodes'][self.hostname]['values']
                else:
@@ -43,7 +43,7 @@ class MonitorMergeDiagnoseSendEscellate:
 
        def getActionRecord(self):
                # update ticket status
-               act_all = soltesz.dbLoad("act_all")
+               act_all = database.dbLoad("act_all")
                if self.hostname in act_all and len(act_all[self.hostname]) > 0:
                        actnode = act_all[self.hostname][0]
                else:
@@ -121,7 +121,7 @@ class MonitorMergeDiagnoseSendEscellate:
                        record.data['log'] = self.getDownLog(record)
 
                elif category == "prod":
-                       state = diag.getState()
+                       state = record.getState()
                        if state == "boot":
                                diag.setFlag('SendThankyou')
                                record.data['message'] = emailTxt.mailtxt.newthankyou
@@ -199,9 +199,9 @@ class MonitorMergeDiagnoseSendEscellate:
                return True
 
        def add_and_save_act_all(self, record):
-               self.act_all = soltesz.dbLoad("act_all")
+               self.act_all = database.dbLoad("act_all")
                self.act_all[self.hostname].insert(0,record.data)
-               soltesz.dbDump("act_all", self.act_all)
+               database.dbDump("act_all", self.act_all)
                
        def getDownLog(self, record):
 
diff --git a/commands.py b/commands.py
deleted file mode 100644 (file)
index 65684c5..0000000
+++ /dev/null
@@ -1,217 +0,0 @@
-import os
-
-DEBUG= 0
-
-COMMAND_TIMEOUT = 60
-ssh_options = { 'StrictHostKeyChecking':'no', 
-                               'BatchMode':'yes', 
-                               'PasswordAuthentication':'no',
-                               'ConnectTimeout':'%s' % COMMAND_TIMEOUT}
-from select import select 
-import subprocess
-import signal
-
-class Sopen(subprocess.Popen):
-       def kill(self, signal = signal.SIGTERM):
-               os.kill(self.pid, signal)
-
-def read_t(stream, count, timeout=COMMAND_TIMEOUT*2):
-       lin, lout, lerr = select([stream], [], [], timeout)
-       if len(lin) == 0:
-               raise ExceptionTimeout("TIMEOUT Running: %s" % cmd)
-
-       return stream.read(count)
-
-class CMD:
-       def __init__(self):
-               pass
-
-       def run_noexcept(self, cmd, timeout=COMMAND_TIMEOUT*2):
-
-               #print "CMD.run_noexcept(%s)" % cmd
-               try:
-                       return CMD.run(self,cmd,timeout)
-               except ExceptionTimeout:
-                       import traceback; print traceback.print_exc()
-                       return ("", "SCRIPTTIMEOUT")
-                       
-       def system(self, cmd, timeout=COMMAND_TIMEOUT*2):
-               (o,e) = self.run(cmd, timeout)
-               self.output = o
-               self.error = e
-               if self.s.returncode is None:
-                       self.s.wait()
-               return self.s.returncode
-
-       def run(self, cmd, timeout=COMMAND_TIMEOUT*2):
-
-               #print "CMD.run(%s)" % cmd
-               s = Sopen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
-               self.s = s
-               (f_in, f_out, f_err) = (s.stdin, s.stdout, s.stderr)
-               #print "calling select(%s)" % timeout
-               lout, lin, lerr = select([f_out], [], [f_err], timeout)
-               #print "TIMEOUT!!!!!!!!!!!!!!!!!!!"
-               if len(lin) == 0 and len(lout) == 0 and len(lerr) == 0:
-                       # Reached a timeout!  Nuke process so it does not hang.
-                       #print "KILLING"
-                       s.kill(signal.SIGKILL)
-                       raise ExceptionTimeout("TIMEOUT Running: %s" % cmd)
-               else:
-                       #print "RETURNING"
-                       #print len(lin), len(lout), len(lerr)
-                       pass
-
-               o_value = ""
-               e_value = ""
-
-               #print "reading from f_out"
-               if len(lout) > 0: o_value = f_out.read()
-               #print "reading from f_err"
-               if len(lerr) > 0: e_value = f_err.read()
-
-               #print "striping output"
-               o_value = o_value.strip()
-               e_value = e_value.strip()
-
-               #print "OUTPUT", o_value, e_value
-
-               #print "closing files"
-               f_out.close()
-               f_in.close()
-               f_err.close()
-               try:
-                       #print "s.kill()"
-                       s.kill()
-                       #print "after s.kill()"
-               except OSError:
-                       # no such process, due to it already exiting...
-                       pass
-
-               #print o_value, e_value
-               return (o_value, e_value)
-
-       def runargs(self, args, timeout=COMMAND_TIMEOUT*2):
-
-               #print "CMD.run(%s)" % " ".join(args)
-               s = Sopen(args, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
-               self.s = s
-               (f_in, f_out, f_err) = (s.stdin, s.stdout, s.stderr)
-               lout, lin, lerr = select([f_out], [], [f_err], timeout)
-               if len(lin) == 0 and len(lout) == 0 and len(lerr) == 0:
-                       # Reached a timeout!  Nuke process so it does not hang.
-                       s.kill(signal.SIGKILL)
-                       raise ExceptionTimeout("TIMEOUT Running: %s" % cmd)
-               o_value = f_out.read()
-               e_value = ""
-               if o_value == "":       # An error has occured
-                       e_value = f_err.read()
-
-               o_value = o_value.strip()
-               e_value = e_value.strip()
-
-               f_out.close()
-               f_in.close()
-               f_err.close()
-               try:
-                       s.kill()
-               except OSError:
-                       # no such process, due to it already exiting...
-                       pass
-
-               return (o_value, e_value)
-
-
-class SSH(CMD):
-       def __init__(self, user, host, port=22, options = ssh_options):
-               self.options = options
-               self.user = user
-               self.host = host
-               self.port = port
-               return
-
-       def __options_to_str(self):
-               options = ""
-               for o,v in self.options.iteritems():
-                       options = options + "-o %s=%s " % (o,v)
-               return options
-
-       def run(self, cmd, timeout=COMMAND_TIMEOUT*2):
-               cmd = "ssh -p %s %s %s@%s '%s'" % (self.port, self.__options_to_str(), 
-                                                                       self.user, self.host, cmd)
-               #print "SSH.run(%s)" % cmd
-               return CMD.run(self, cmd, timeout)
-
-       def get_file(self, rmt_filename, local_filename=None):
-               if local_filename == None:
-                       local_filename = "./"
-               cmd = "scp -P %s -B %s %s@%s:%s %s" % (self.port, self.__options_to_str(), 
-                                                                       self.user, self.host, 
-                                                                       rmt_filename, local_filename)
-               # output :
-               #       errors will be on stderr,
-               #   success will have a blank stderr...
-               return CMD.run_noexcept(self, cmd)
-
-       def run_noexcept(self, cmd):
-               cmd = "ssh -p %s %s %s@%s '%s'" % (self.port, self.__options_to_str(), 
-                                                                       self.user, self.host, cmd)
-               #print "SSH.run_noexcept(%s)" % cmd
-               return CMD.run_noexcept(self, cmd)
-
-       def run_noexcept2(self, cmd, timeout=COMMAND_TIMEOUT*2):
-               cmd = "ssh -p %s %s %s@%s %s" % (self.port, self.__options_to_str(), 
-                                                                       self.user, self.host, cmd)
-               #print "SSH.run_noexcept2(%s)" % cmd
-               r = CMD.run_noexcept(self, cmd, timeout)
-
-               # XXX: this may be resulting in deadlocks... not sure.
-               #if self.s.returncode is None:
-               #       #self.s.kill()
-               #       self.s.kill(signal.SIGKILL)
-               #       self.s.wait()
-               #       self.ret = self.s.returncode
-               self.ret = -1
-
-               return r
-
-       def system2(self, cmd, timeout=COMMAND_TIMEOUT*2):
-               cmd = "ssh -p %s %s %s@%s %s" % (self.port, self.__options_to_str(), 
-                                                                       self.user, self.host, cmd)
-               #print "SSH.system2(%s)" % cmd
-               return CMD.system(self, cmd, timeout)
-
-       def runE(self, cmd):
-               cmd = "ssh -p %s %s %s@%s '%s'" % (self.port, self.__options_to_str(), 
-                                                                       self.user, self.host, cmd)
-               if ( DEBUG == 1 ):
-                       print cmd,
-               (f_in, f_out, f_err) = os.popen3(cmd)
-
-               value = f_out.read()
-               if value == "": # An error has occured
-                       value = f_err.read()
-                       value = value.strip()
-
-               if ( DEBUG == 1 ):
-                       print " == %s" % value
-               f_out.close()
-               f_in.close()
-               f_err.close()
-               return value.strip()
-               
-import time
-class MyTimer:
-       def __init__(self):
-               self.start = time.time()
-
-       def end(self):
-               self.end = time.time()
-               t = self.end-self.start
-               return t
-
-       def diff(self):
-               self.end = time.time()
-               t = self.end-self.start
-               self.start = self.end
-               return t
index 7344df3..8d96e16 100755 (executable)
--- a/comon.py
+++ b/comon.py
@@ -245,12 +245,6 @@ def main():
                        print("%-40s \t Bootstate %s nodetype %s kernver %s keyok %s" % ( 
                                host, cdb[host]['bootstate'], cdb[host]['nodetype'], 
                                cdb[host]['kernver'], cdb[host]['keyok']))
-                       #ssh = soltesz.SSH('root', host)
-                       #try:
-                       #       val = ssh.run("uname -r")
-                       #       print "%s == %s" % (host, val),
-                       #except:
-                       #       pass
        #       else:
        #               print("key mismatch at: %s" % host)
        #print a.codata['michelangelo.ani.univie.ac.at']
index 855f52d..4e25974 100755 (executable)
@@ -49,7 +49,8 @@ config.parse_args()
 import rt
 # Correlates input with policy to form actions
 import policy
-import soltesz
+import moncommands
+import database 
 import plc
 import syncplcdb
 
@@ -153,7 +154,7 @@ def main():
        #########  GET NODES    ########################################
        logger.info('Get Nodes from PLC')
        print "getnode from plc: %s %s %s" % (config.debug, config.cachenodes, config.refresh)
-       l_plcnodes = soltesz.if_cached_else_refresh(config.cachenodes, 
+       l_plcnodes = database.if_cached_else_refresh(config.cachenodes, 
                                                                config.refresh, "l_plcnodes",
                                                                lambda : syncplcdb.create_plcdb() )
 
@@ -179,14 +180,14 @@ def main():
 
        print "len of l_nodes: %d" % len(l_nodes)
        # Minus blacklisted ones..
-       l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : [])
-       l_ticket_blacklist = soltesz.if_cached_else(1,"l_ticket_blacklist",lambda : [])
+       l_blacklist = database.if_cached_else(1, "l_blacklist", lambda : [])
+       l_ticket_blacklist = database.if_cached_else(1,"l_ticket_blacklist",lambda : [])
        l_nodes  = filter(lambda x : not x['hostname'] in l_blacklist, l_nodes)
 
        logger.info('Get Tickets from RT')
        #######  RT tickets    #########################################
-       t = soltesz.MyTimer()
-       ad_dbTickets = soltesz.if_cached_else_refresh(config.cachert, config.refresh, "ad_dbTickets", rt.rt_tickets)
+       t = moncommands.MyTimer()
+       ad_dbTickets = database.if_cached_else_refresh(config.cachert, config.refresh, "ad_dbTickets", rt.rt_tickets)
        if ad_dbTickets == "":
                print "ad_dbTickets failed..."
                sys.exit(1)
index 1ac0cb1..b710a54 100755 (executable)
@@ -5,12 +5,12 @@
 import sys
 import time
 import getopt
-import soltesz
+import database 
 
 def main():
 
-       act_all = soltesz.dbLoad(sys.argv[1])
-       plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+       act_all = database.dbLoad(sys.argv[1])
+       plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
        s_nodenames = ""
        sickdb = {}
 
index bed95dc..2a2d753 100755 (executable)
@@ -5,12 +5,12 @@
 import sys
 import time
 import getopt
-import soltesz
+import database 
 
 def main():
 
-       sickdb = soltesz.dbLoad(sys.argv[1])
-       plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+       sickdb = database.dbLoad(sys.argv[1])
+       plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
        s_nodenames = ""
 
        sorted_keys = sickdb.keys()
index 5b04398..7efa52c 100755 (executable)
@@ -21,7 +21,8 @@ externalState = {'round': round, 'nodes': {}}
 count = 0
 
 
-import soltesz
+import database
+import moncommands 
 import comon
 import threadpool
 import syncplcdb
@@ -33,7 +34,7 @@ api = plc.PLC(auth.auth, auth.plc)
 
 def collectPingAndSSH(nodename, cohash):
        ### RUN PING ######################
-       ping = soltesz.CMD()
+       ping = moncommands.CMD()
        (oval,errval) = ping.run_noexcept("ping -c 1 -q %s | grep rtt" % nodename)
 
        values = {}
@@ -46,7 +47,7 @@ def collectPingAndSSH(nodename, cohash):
 
        try:
                for port in [22, 806]: 
-                       ssh = soltesz.SSH('root', nodename, port)
+                       ssh = moncommands.SSH('root', nodename, port)
 
                        (oval, errval) = ssh.run_noexcept2(""" <<\EOF
                                echo "{"
@@ -77,7 +78,7 @@ EOF                   """)
 
        ### RUN SSH ######################
        b_getbootcd_id = True
-       #ssh = soltesz.SSH('root', nodename)
+       #ssh = moncommands.SSH('root', nodename)
        #oval = ""
        #errval = ""
        #(oval, errval) = ssh.run_noexcept('echo `uname -a ; ls /tmp/bm.log`')
@@ -266,7 +267,7 @@ def recordPingAndSSH(request, result):
                count += 1
                print "%d %s %s" % (count, nodename, externalState['nodes'][nodename]['values'])
                if count % 20 == 0:
-                       soltesz.dbDump(config.dbname, externalState)
+                       database.dbDump(config.dbname, externalState)
 
 # this will be called when an exception occurs within a thread
 def handle_exception(request, result):
@@ -301,10 +302,16 @@ def checkAndRecordState(l_nodes, cohash):
                        pass
 
        # WAIT while all the work requests are processed.
+       begin = time.time()
        while 1:
                try:
                        time.sleep(1)
                        tp.poll()
+                       # if more than two hours
+                       if time.time() - begin > (60*60*1.5):
+                               print "findbad.py has run out of time!!!!!!"
+                               database.dbDump(config.dbname, externalState)
+                               os._exit(1)
                except KeyboardInterrupt:
                        print "Interrupted!"
                        break
@@ -312,14 +319,14 @@ def checkAndRecordState(l_nodes, cohash):
                        print "All results collected."
                        break
 
-       soltesz.dbDump(config.dbname, externalState)
+       database.dbDump(config.dbname, externalState)
 
 
 
 def main():
        global externalState
 
-       externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
+       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
 
        if config.increment:
                # update global round number to force refreshes across all nodes
@@ -393,5 +400,5 @@ if __name__ == '__main__':
                print traceback.print_exc()
                print "Exception: %s" % err
                print "Saving data... exitting."
-               soltesz.dbDump(config.dbname, externalState)
+               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index 399359a..5f54235 100755 (executable)
@@ -44,7 +44,8 @@ count = 0
 import reboot
 from reboot import pcu_name
 
-import soltesz
+import database
+import moncommands
 import plc
 import comon
 import threadpool
@@ -74,7 +75,7 @@ def get_pcu(pcuname):
        except:
                try:
                        print "GetPCU from file %s" % pcuname
-                       l_pcus = soltesz.dbLoad("pculist")
+                       l_pcus = database.dbLoad("pculist")
                        for i in l_pcus:
                                if i['pcu_id'] == pcuname:
                                        l_pcu = i
@@ -92,7 +93,7 @@ def get_nodes(node_ids):
                l_node = plc.getNodes(node_ids, ['hostname', 'last_contact', 'node_id', 'ports'])
        except:
                try:
-                       plc_nodes = soltesz.dbLoad("l_plcnodes")
+                       plc_nodes = database.dbLoad("l_plcnodes")
                        for n in plc_nodes:
                                if n['node_id'] in node_ids:
                                        l_node.append(n)
@@ -148,7 +149,7 @@ def get_plc_site_values(site_id):
                        d_site = d_site[0]
        except:
                try:
-                       plc_sites = soltesz.dbLoad("l_plcsites")
+                       plc_sites = database.dbLoad("l_plcsites")
                        for site in plc_sites:
                                if site['site_id'] == site_id:
                                        d_site = site
@@ -258,7 +259,7 @@ def collectPingAndSSH(pcuname, cohash):
 
                #### RUN NMAP ###############################
                if continue_probe:
-                       nmap = soltesz.CMD()
+                       nmap = moncommands.CMD()
                        (oval,eval) = nmap.run_noexcept("nmap -oG - -P0 -p22,23,80,443,5869,9100,16992 %s | grep Host:" % pcu_name(values))
                        # NOTE: an empty / error value for oval, will still work.
                        (values['portstatus'], continue_probe) = nmap_portstatus(oval)
@@ -306,12 +307,12 @@ def recordPingAndSSH(request, result):
 
                count += 1
                print "%d %s %s" % (count, nodename, externalState['nodes'][pcu_id]['values'])
-               soltesz.dbDump(config.dbname, externalState)
+               database.dbDump(config.dbname, externalState)
 
        if errors is not None:
                pcu_id = "id_%s" % nodename
                errorState[pcu_id] = errors
-               soltesz.dbDump("findbadpcu_errors", errorState)
+               database.dbDump("findbadpcu_errors", errorState)
 
 # this will be called when an exception occurs within a thread
 def handle_exception(request, result):
@@ -349,10 +350,16 @@ def checkAndRecordState(l_pcus, cohash):
                        pass
 
        # WAIT while all the work requests are processed.
+       begin = time.time()
        while 1:
                try:
                        time.sleep(1)
                        tp.poll()
+                       # if more than two hours
+                       if time.time() - begin > (60*60*1):
+                               print "findbadpcus.py has run out of time!!!!!!"
+                               database.dbDump(config.dbname, externalState)
+                               os._exit(1)
                except KeyboardInterrupt:
                        print "Interrupted!"
                        break
@@ -365,8 +372,8 @@ def checkAndRecordState(l_pcus, cohash):
 def main():
        global externalState
 
-       l_pcus = soltesz.if_cached_else_refresh(1, config.refresh, "pculist", lambda : plc.GetPCUs())
-       externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
+       l_pcus = database.if_cached_else_refresh(1, config.refresh, "pculist", lambda : plc.GetPCUs())
+       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
        cohash = {}
 
        if config.increment:
@@ -432,5 +439,5 @@ if __name__ == '__main__':
                traceback.print_exc()
                print "Exception: %s" % err
                print "Saving data... exitting."
-               soltesz.dbDump(config.dbname, externalState)
+               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index e3b7959..7fb46ef 100755 (executable)
@@ -1,13 +1,13 @@
 #!/usr/bin/python
 import plc
-import soltesz
+import database
 import string
 import sys
 
 def main():
        meta_sites = ['canarie', 'rnp', 'jgn2', 'i2', 'tp', 'princeton', 'princetondsl', 'plcolo', 'wide']
        l_blacklist = [ "grouse.hpl.hp.com", "planet1.att.nodes.planet-lab.org"]
-       #l_blacklist = soltesz.dbLoad("l_blacklist")
+       #l_blacklist = database.dbLoad("l_blacklist")
        l_sitelist = []
        count = 0
        # for each prefix above
@@ -33,7 +33,7 @@ def main():
        print "Found %d nodes" % count
        print "Found %d sites " % len(l_sitelist)
 
-       soltesz.dbDump("l_blacklist")
+       database.dbDump("l_blacklist")
 
 if __name__=="__main__":
        main() 
index 78d9ce6..4ffe00d 100644 (file)
@@ -4,7 +4,7 @@ import os
 import sys
 import string
 import time
-import soltesz
+import moncommands
 import plc
 
 def main():
@@ -23,7 +23,7 @@ def main():
                #print n
 
        for host in d_nodes:
-               ssh = soltesz.SSH('root', host)
+               ssh = moncommands.SSH('root', host)
                val = ssh.runE("grep NODE_KEY /tmp/planet.cnf")
                print "%s == %s" % (host, val)
 
index 60dad7c..2116fe5 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import soltesz
+import database
 import plc
 from optparse import OptionParser
 import sys
@@ -27,7 +27,7 @@ if not config.run:
        print "Add --run to actually perform the command"
        sys.exit(1)
 
-nodelist = soltesz.if_cached_else_refresh(1, 
+nodelist = database.if_cached_else_refresh(1, 
                                                        config.refresh, 
                                                        "l_plcnodes", 
                                                        lambda : plc.getNodes({'peer_id':None}, ['hostname']))
index 95d0fc5..14be85f 100755 (executable)
@@ -24,7 +24,7 @@ from optparse import OptionParser
 
 from nodecommon import *
 from nodequery import verify,query_to_dict,node_select
-import soltesz
+import database
 from unified_model import *
 import os
 
@@ -156,7 +156,7 @@ def set_node_to_rins(host, fb):
 
 
 try:
-       rebootlog = soltesz.dbLoad("rebootlog")
+       rebootlog = database.dbLoad("rebootlog")
 except:
        rebootlog = LogRoll()
 
@@ -235,7 +235,7 @@ if config.findbad:
        configmodule.setFileFromList(file, hostnames)
        os.system("./findbad.py --cachenodes --debug=0 --dbname=findbad --increment --nodelist %s" % file)
 
-fb = soltesz.dbLoad("findbad")
+fb = database.dbLoad("findbad")
 # commands:
 i = 1
 count = 1
@@ -346,7 +346,7 @@ for host in hostnames:
        time.sleep(1)
        if count % 10 == 0:
                print "Saving rebootlog"
-               soltesz.dbDump("rebootlog", rebootlog)
+               database.dbDump("rebootlog", rebootlog)
                wait_time = int(config.timewait)
                print "Sleeping %d minutes" % wait_time
                ti = 0
@@ -361,4 +361,4 @@ for host in hostnames:
        count = count + 1
 
 print "Saving rebootlog"
-soltesz.dbDump("rebootlog", rebootlog)
+database.dbDump("rebootlog", rebootlog)
index 407390f..f2af6cf 100755 (executable)
--- a/mailer.py
+++ b/mailer.py
@@ -12,6 +12,7 @@ import calendar
 import logging
 import os
 import time
+import monitorconfig
 
 config = config()
 logger = logging.getLogger("monitor")
@@ -28,11 +29,11 @@ def reformat_for_rt(text):
                
 
 def _setupRTenvironment():
-       os.environ['PATH'] = os.environ['PATH'] + ":/home/soltesz/local/bin/"
-       os.environ['RTSERVER'] = "https://rt.planet-lab.org/"
-       os.environ['RTUSER']   = "monitor"
-       os.environ['RTPASSWD'] = "ssorcmor"
-       os.environ['RTDEBUG'] = "0"
+       os.environ['PATH'] = os.environ['PATH'] + ":" + monitorconfig.RT_WEB_TOOLS_PATH
+       os.environ['RTSERVER'] = monitorconfig.RT_WEB_SERVER
+       os.environ['RTUSER']   = monitorconfig.RT_WEB_USER
+       os.environ['RTPASSWD'] = monitorconfig.RT_WEB_PASSWORD
+       os.environ['RTDEBUG'] = monitorconfig.RT_WEB_DEBUG
        return
 
 def setTicketStatus(ticket_id, status):
index b8fe5cb..b9e3ece 100644 (file)
@@ -6,7 +6,7 @@
 #
 # $Id: monitor.py,v 1.7 2007/07/03 19:59:02 soltesz Exp $
 
-import soltesz
+import database
 
 from monitor_policy import *
 import rt
@@ -25,14 +25,14 @@ def reboot(hostname):
        if len(l_nodes) == 0:
                raise Exception("No such host: %s" % hostname)
        
-       l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : [])
-       l_ticket_blacklist = soltesz.if_cached_else(1,"l_ticket_blacklist",lambda : [])
+       l_blacklist = database.if_cached_else(1, "l_blacklist", lambda : [])
+       l_ticket_blacklist = database.if_cached_else(1,"l_ticket_blacklist",lambda : [])
 
        l_nodes  = filter(lambda x : not x['hostname'] in l_blacklist, l_nodes)
        if len(l_nodes) == 0:
                raise Exception("Host removed via blacklist: %s" % hostname)
 
-       ad_dbTickets = soltesz.if_cached_else_refresh(True, False, "ad_dbTickets", lambda : [])
+       ad_dbTickets = database.if_cached_else_refresh(True, False, "ad_dbTickets", lambda : [])
        if ad_dbTickets == None:
                raise Exception("Could not find cached dbTickets")
 
@@ -61,14 +61,14 @@ def reboot2(hostname):
        if len(l_nodes) == 0:
                raise Exception("No such host: %s" % hostname)
        
-       l_blacklist = soltesz.if_cached_else(1, "l_blacklist", lambda : [])
-       l_ticket_blacklist = soltesz.if_cached_else(1,"l_ticket_blacklist",lambda : [])
+       l_blacklist = database.if_cached_else(1, "l_blacklist", lambda : [])
+       l_ticket_blacklist = database.if_cached_else(1,"l_ticket_blacklist",lambda : [])
 
        l_nodes  = filter(lambda x : not x['hostname'] in l_blacklist, l_nodes)
        if len(l_nodes) == 0:
                raise Exception("Host removed via blacklist: %s" % hostname)
 
-       ad_dbTickets = soltesz.if_cached_else_refresh(True, False, "ad_dbTickets", lambda : None)
+       ad_dbTickets = database.if_cached_else_refresh(True, False, "ad_dbTickets", lambda : None)
        if ad_dbTickets == None:
                raise Exception("Could not find cached dbTickets")
 
index e8789da..7d79fab 100644 (file)
@@ -1,7 +1,7 @@
 from config import config
 #print "policy"
 config = config()
-import soltesz
+import database
 import time
 import mailer
 from www.printbadnodes import cmpCategoryVal
@@ -49,13 +49,13 @@ class Merge:
                self.merge_list = l_merge
 
                # the hostname to loginbase mapping
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
 
                # Previous actions taken on nodes.
-               self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {})
-               self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
+               self.act_all = database.if_cached_else(1, "act_all", lambda : {})
+               self.findbad = database.if_cached_else(1, "findbad", lambda : {})
 
-               self.cache_all = soltesz.if_cached_else(1, "act_all", lambda : {})
+               self.cache_all = database.if_cached_else(1, "act_all", lambda : {})
                self.sickdb = {}
                self.mergedb = {}
 
@@ -257,8 +257,8 @@ class RT:
 class Diagnose:
        def __init__(self, record_list):
                self.record_list = record_list
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
-               self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+               self.findbad = database.if_cached_else(1, "findbad", lambda : {})
 
                self.diagnose_in = {}
                self.diagnose_out = {}
@@ -827,12 +827,12 @@ class Diagnose:
 class Action:
        def __init__(self, diagnose_out):
                # the hostname to loginbase mapping
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
 
                # Actions to take.
                self.diagnose_db = diagnose_out
                # Actions taken.
-               self.act_all   = soltesz.if_cached_else(1, "act_all", lambda : {})
+               self.act_all   = database.if_cached_else(1, "act_all", lambda : {})
 
                # A dict of actions to specific functions. PICKLE doesnt' like lambdas.
                self.actions = {}
@@ -869,8 +869,8 @@ class Action:
                        print err
                        if config.policysavedb:
                                print "Saving Databases... act_all"
-                               soltesz.dbDump("act_all", self.act_all)
-                               soltesz.dbDump("diagnose_out", self.diagnose_db)
+                               database.dbDump("act_all", self.act_all)
+                               database.dbDump("diagnose_out", self.diagnose_db)
                        sys.exit(1)
 
                #print_stats("sites_observed", stats)
@@ -882,11 +882,11 @@ class Action:
 
                if config.policysavedb:
                        print "Saving Databases... act_all"
-                       #soltesz.dbDump("policy.eventlog", self.eventlog)
+                       #database.dbDump("policy.eventlog", self.eventlog)
                        # TODO: remove 'diagnose_out', 
                        #       or at least the entries that were acted on.
-                       soltesz.dbDump("act_all", self.act_all)
-                       soltesz.dbDump("diagnose_out", self.diagnose_db)
+                       database.dbDump("act_all", self.act_all)
+                       database.dbDump("diagnose_out", self.diagnose_db)
 
        def accumSites(self):
                """
@@ -1091,10 +1091,10 @@ class Action:
                
                if config.policysavedb:
                        #print "Saving Databases... act_all, diagnose_out"
-                       #soltesz.dbDump("act_all", self.act_all)
+                       #database.dbDump("act_all", self.act_all)
                        # remove site record from diagnose_out, it's in act_all as done.
                        del self.diagnose_db[loginbase]
-                       #soltesz.dbDump("diagnose_out", self.diagnose_db)
+                       #database.dbDump("diagnose_out", self.diagnose_db)
 
                print "sleeping for 1 sec"
                time.sleep(1)
index 1b0d38e..00d2810 100755 (executable)
@@ -4,10 +4,6 @@ import plc
 import auth
 api = plc.PLC(auth.auth, auth.plc)
 
-import soltesz
-#fb = soltesz.dbLoad("findbad")
-#act_all = soltesz.dbLoad("act_all")
-
 import reboot
 
 import time
index 74117a1..96720fb 100755 (executable)
@@ -6,7 +6,7 @@ import string
 import time
 
 
-import soltesz
+import database
 import comon
 import threadpool
 import syncplcdb
@@ -24,13 +24,13 @@ count = 0
 
 def main(config):
        global externalState
-       externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
+       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
        if config.increment:
                # update global round number to force refreshes across all nodes
                externalState['round'] += 1
 
        l_nodes = syncplcdb.create_plcdb()
-       l_plcnodes = soltesz.dbLoad("l_plcnodes")
+       l_plcnodes = database.dbLoad("l_plcnodes")
 
        if config.node:
                l_nodes = [config.node]
@@ -59,12 +59,12 @@ def checkAndRecordState(l_nodes, l_plcnodes):
                        count += 1
 
                if count % 20 == 0:
-                       soltesz.dbDump(config.dbname, externalState)
+                       database.dbDump(config.dbname, externalState)
 
-       soltesz.dbDump(config.dbname, externalState)
+       database.dbDump(config.dbname, externalState)
 
-fb = soltesz.dbLoad('findbad')
-hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+fb = database.dbLoad('findbad')
+hn2lb = database.dbLoad("plcdb_hn2lb")
 
 def getnodesup(nodelist):
        up = 0
@@ -157,5 +157,5 @@ if __name__ == '__main__':
                print traceback.print_exc()
                print "Exception: %s" % err
                print "Saving data... exitting."
-               soltesz.dbDump(config.dbname, externalState)
+               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index 3256b69..9469b81 100644 (file)
@@ -128,10 +128,10 @@ def nodegroup_display(node, fb, conf=None):
        return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)20.20s... %(kernel)43s %(lastupdate)12s " % node
 
 from model import *
-import soltesz
+import database
 
 def node_end_record(node):
-       act_all = soltesz.dbLoad("act_all")
+       act_all = database.dbLoad("act_all")
        if node not in act_all:
                del act_all
                return False
@@ -152,7 +152,7 @@ def node_end_record(node):
        rec['stage'] = "monitor-end-record"
        rec['time'] = time.time() - 7*60*60*24
        act_all[node].insert(0,rec)
-       soltesz.dbDump("act_all", act_all)
+       database.dbDump("act_all", act_all)
        del act_all
        return True
 
index d69ccfe..61d31f9 100755 (executable)
@@ -9,11 +9,11 @@ from optparse import OptionParser
 from sets import Set
 
 from nodecommon import *
-import soltesz
+import database
 
 def main():
        from config import config
-       fb = soltesz.dbLoad("findbad")
+       fb = database.dbLoad("findbad")
 
        parser = OptionParser()
        parser.set_defaults(nodelist=None,
index 76db428..a05f291 100644 (file)
@@ -1,13 +1,13 @@
 #!/usr/bin/python
 
 import sys
-import soltesz
+import database
 
 from config import config as cfg
 
 def nodes_from_time(time_str):
        path = "archive-pdb"
-       archive = soltesz.SPickle(path)
+       archive = database.SPickle(path)
        d = datetime_fromstr(config.fromtime)
        glob_str = "%s*.production.findbad.pkl" % d.strftime("%Y-%m-%d")
        os.chdir(path)
index fcaaefe..207efae 100755 (executable)
@@ -22,11 +22,11 @@ from sets import Set
 from nodequery import verify,query_to_dict,node_select
 
 from nodecommon import *
-import soltesz
+import database
 
 def main():
        from config import config
-       fb = soltesz.dbLoad("findbad")
+       fb = database.dbLoad("findbad")
 
        parser = OptionParser()
        parser.set_defaults(nodegroup="Alpha",
index 16e48a2..d09f01f 100755 (executable)
@@ -4,7 +4,7 @@ import plc
 import auth
 api = plc.PLC(auth.auth, auth.plc)
 
-import soltesz
+import database
 import reboot
 import time
 from datetime import datetime, timedelta
@@ -20,7 +20,7 @@ def get_filefromglob(d, str):
        import glob
        # TODO: This is aweful.
        path = "archive-pdb"
-       archive = soltesz.SPickle(path)
+       archive = database.SPickle(path)
        glob_str = "%s*.%s.pkl" % (d.strftime("%Y-%m-%d"), str)
        os.chdir(path)
        #print glob_str
@@ -89,7 +89,7 @@ def main():
        config.parse_args()
 
        path = "archive-pdb"
-       archive = soltesz.SPickle(path)
+       archive = database.SPickle(path)
 
        if config.fromtime:
                begin = config.fromtime
index 2a1d5f0..23afab9 100755 (executable)
@@ -4,7 +4,7 @@ import plc
 import auth
 api = plc.PLC(auth.auth, auth.plc)
 
-import soltesz
+import database
 import reboot
 
 import time
@@ -161,8 +161,8 @@ if config.findbad:
        configmodule.setFileFromList(file, config.args)
        os.system("./findbad.py --cachenodes --debug=0 --dbname=findbad --increment --nodelist %s" % file)
 
-fb = soltesz.dbLoad("findbad")
-act_all = soltesz.dbLoad("act_all")
+fb = database.dbLoad("findbad")
+act_all = database.dbLoad("act_all")
 
 for node in config.args:
        config.node = node
@@ -194,7 +194,7 @@ for node in config.args:
                        #rec['stage'] = "monitor-end-record"
                        #rec['time'] = time.time() - 7*60*60*24
                        #act_all[config.node].insert(0,rec)
-                       #soltesz.dbDump("act_all", act_all)
+                       #database.dbDump("act_all", act_all)
 
                for act_nodeinfo in act_all[config.node]:
                        act_print_nodeinfo(act_nodeinfo, header)
index 28cedb2..3ee4236 100755 (executable)
@@ -5,7 +5,7 @@ import auth
 api = plc.PLC(auth.auth, auth.plc)
 
 import sys
-import soltesz
+import database
 from nodecommon import *
 from policy import Diagnose
 import glob
@@ -16,7 +16,7 @@ import time
 import re
 
 #fb = {}
-fb = soltesz.dbLoad("findbad")
+fb = database.dbLoad("findbad")
 fbpcu = {}
 
 class NoKeyException(Exception): pass
@@ -264,7 +264,7 @@ def main():
        
        if config.fromtime:
                path = "archive-pdb"
-               archive = soltesz.SPickle(path)
+               archive = database.SPickle(path)
                d = datetime_fromstr(config.fromtime)
                glob_str = "%s*.production.findbad.pkl" % d.strftime("%Y-%m-%d")
                os.chdir(path)
@@ -274,9 +274,9 @@ def main():
                os.chdir("..")
                fb = archive.load(file[:-4])
        else:
-               fb = soltesz.dbLoad("findbad")
+               fb = database.dbLoad("findbad")
 
-       fbpcu = soltesz.dbLoad("findbadpcus")
+       fbpcu = database.dbLoad("findbadpcus")
 
        if config.nodelist:
                nodelist = config.getListFromFile(config.nodelist)
index ba9e83c..5b71845 100755 (executable)
--- a/pcubad.py
+++ b/pcubad.py
@@ -7,7 +7,7 @@ import time
 
 from reboot import pcu_name
 
-import soltesz
+import database
 import comon
 import threadpool
 import syncplcdb
@@ -25,12 +25,12 @@ count = 0
 
 def main(config):
        global externalState
-       externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
+       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
        if config.increment:
                # update global round number to force refreshes across all pcus
                externalState['round'] += 1
 
-       l_plcpcus = soltesz.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
+       l_plcpcus = database.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
 
        l_pcu = None
        if config.pcu:
@@ -65,12 +65,12 @@ def checkAndRecordState(l_pcus, l_plcpcus):
                        count += 1
 
                if count % 20 == 0:
-                       soltesz.dbDump(config.dbname, externalState)
+                       database.dbDump(config.dbname, externalState)
 
-       soltesz.dbDump(config.dbname, externalState)
+       database.dbDump(config.dbname, externalState)
 
-fbpcu = soltesz.dbLoad('findbadpcus')
-hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+fbpcu = database.dbLoad('findbadpcus')
+hn2lb = database.dbLoad("plcdb_hn2lb")
 
 def get(fb, path):
        indexes = path.split("/")
@@ -159,5 +159,5 @@ if __name__ == '__main__':
                print traceback.print_exc()
                print "Exception: %s" % err
                print "Saving data... exitting."
-               soltesz.dbDump(config.dbname, externalState)
+               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index 3c61cd1..20f9895 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import soltesz
+import database
 import plc
 from optparse import OptionParser
 import sys
@@ -33,7 +33,7 @@ if not config.run:
        print "Add --run to actually perform the command"
        sys.exit(1)
 
-pculist = soltesz.if_cached_else_refresh(1, 
+pculist = database.if_cached_else_refresh(1, 
                                                        config.refresh, 
                                                        "pculist", 
                                                        lambda : plc.GetPCUs())
@@ -54,4 +54,4 @@ for pcu in pculist:
                if values['reboot'] == 0:
                        print "%6d %20s %50s %s" % (pcu['pcu_id'], pcu['password'], "%s@%s" % (pcu['username'], host), portstatus)
 
-#soltesz.dbDump("pculist", pculist, 'php')
+#database.dbDump("pculist", pculist, 'php')
index 1d69ea0..34d90e0 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import soltesz
+import database 
 from config import config
 from optparse import OptionParser
 parser = OptionParser()
@@ -20,5 +20,5 @@ if config.output is None:
        # just use the input name.
        config.output = config.input
 
-data = soltesz.dbLoad(config.input)
-soltesz.dbDump(config.output, data, 'php')
+data = database.dbLoad(config.input)
+database.dbDump(config.output, data, 'php')
diff --git a/plc.py b/plc.py
index f609abb..0506ad5 100644 (file)
--- a/plc.py
+++ b/plc.py
@@ -53,8 +53,8 @@ class PLC:
                return self.api.__repr__()
 
 def getAuthAPI():
-       import auth
-       return PLC(auth.auth, auth.plc)
+       import monitorconfig
+       return PLC(monitorconfig.API_AUTH, monitorconfig.API_SERVER)
 
 '''
 Returns list of nodes in dbg as reported by PLC
index 2afba4d..e72ec8b 100644 (file)
--- a/policy.py
+++ b/policy.py
@@ -19,7 +19,7 @@ import plc
 import sys
 import os
 import reboot
-import soltesz
+import database
 import string
 from www.printbadnodes import cmpCategoryVal
 from config import config
@@ -102,13 +102,13 @@ class Merge(Thread):
                self.toRT = toRT
                self.merge_list = l_merge
                # the hostname to loginbase mapping
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
 
                # Previous actions taken on nodes.
-               self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {})
-               self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
+               self.act_all = database.if_cached_else(1, "act_all", lambda : {})
+               self.findbad = database.if_cached_else(1, "findbad", lambda : {})
 
-               self.cache_all = soltesz.if_cached_else(1, "act_all", lambda : {})
+               self.cache_all = database.if_cached_else(1, "act_all", lambda : {})
                self.sickdb = {}
                self.mergedb = {}
                Thread.__init__(self)
@@ -286,8 +286,8 @@ class Merge(Thread):
 class Diagnose(Thread):
        def __init__(self, fromRT):
                self.fromRT = fromRT
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
-               self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+               self.findbad = database.if_cached_else(1, "findbad", lambda : {})
 
                self.diagnose_in = {}
                self.diagnose_out = {}
@@ -316,7 +316,7 @@ class Diagnose(Thread):
 
                if config.policysavedb:
                        print "Saving Databases... diagnose_out"
-                       soltesz.dbDump("diagnose_out", self.diagnose_out)
+                       database.dbDump("diagnose_out", self.diagnose_out)
 
        def accumSickSites(self):
                """
@@ -950,12 +950,12 @@ class Action(Thread):
                self.l_action = l_action
 
                # the hostname to loginbase mapping
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
 
                # Actions to take.
-               self.diagnose_db = soltesz.if_cached_else(1, "diagnose_out", lambda : {})
+               self.diagnose_db = database.if_cached_else(1, "diagnose_out", lambda : {})
                # Actions taken.
-               self.act_all   = soltesz.if_cached_else(1, "act_all", lambda : {})
+               self.act_all   = database.if_cached_else(1, "act_all", lambda : {})
 
                # A dict of actions to specific functions. PICKLE doesnt' like lambdas.
                self.actions = {}
@@ -995,7 +995,7 @@ class Action(Thread):
                        print err
                        if config.policysavedb:
                                print "Saving Databases... act_all"
-                               soltesz.dbDump("act_all", self.act_all)
+                               database.dbDump("act_all", self.act_all)
                        sys.exit(1)
 
                print_stats("sites_observed", stats)
@@ -1007,10 +1007,10 @@ class Action(Thread):
 
                if config.policysavedb:
                        print "Saving Databases... act_all"
-                       #soltesz.dbDump("policy.eventlog", self.eventlog)
+                       #database.dbDump("policy.eventlog", self.eventlog)
                        # TODO: remove 'diagnose_out', 
                        #       or at least the entries that were acted on.
-                       soltesz.dbDump("act_all", self.act_all)
+                       database.dbDump("act_all", self.act_all)
 
        def accumSites(self):
                """
@@ -1230,10 +1230,10 @@ class Action(Thread):
                
                if config.policysavedb:
                        print "Saving Databases... act_all, diagnose_out"
-                       soltesz.dbDump("act_all", self.act_all)
+                       database.dbDump("act_all", self.act_all)
                        # remove site record from diagnose_out, it's in act_all as done.
                        del self.diagnose_db[loginbase]
-                       soltesz.dbDump("diagnose_out", self.diagnose_db)
+                       database.dbDump("diagnose_out", self.diagnose_db)
 
                print "sleeping for 1 sec"
                time.sleep(1)
index b411b34..0d6ccec 100755 (executable)
@@ -1,14 +1,14 @@
 #!/usr/bin/python
-import soltesz
+import database
 from config import config
 from optparse import OptionParser
 from www.printbadnodes import *
 
 def main():
        global fb
-       db = soltesz.dbLoad(config.dbname)
-       fb = soltesz.dbLoad("findbadpcus")
-       act= soltesz.dbLoad("act_all")
+       db = database.dbLoad(config.dbname)
+       fb = database.dbLoad("findbadpcus")
+       act= database.dbLoad("act_all")
 
        ## Field widths used for printing
        maxFieldLengths = { 'nodename' : -45,
index a916a05..557c8bc 100755 (executable)
@@ -2,8 +2,8 @@
 
 import pprint
 import sys
-import soltesz
+import database
 
 pp = pprint.PrettyPrinter(indent=4) 
-o = soltesz.dbLoad(sys.argv[1])
+o = database.dbLoad(sys.argv[1])
 pp.pprint(o) 
index c41bac8..f45a0a7 100755 (executable)
--- a/reboot.py
+++ b/reboot.py
@@ -19,6 +19,7 @@ from subprocess import PIPE, Popen
 import ssh.pxssh as pxssh
 import ssh.pexpect as pexpect
 import socket
+import moncommands 
 
 # Use our versions of telnetlib and pyssh
 sys.path.insert(0, os.path.dirname(sys.argv[0]))
@@ -559,9 +560,8 @@ class APC(PCUControl):
 
 class IntelAMT(PCUControl):
        def run(self, node_port, dryrun):
-               import soltesz
 
-               cmd = soltesz.CMD()
+               cmd = moncommands.CMD()
                #[cmd_str = "IntelAMTSDK/Samples/RemoteControl/remoteControl"
                cmd_str = "cmdamt/remoteControl"
 
@@ -625,9 +625,8 @@ class HPiLO(PCUControl):
                
 class HPiLOHttps(PCUControl):
        def run(self, node_port, dryrun):
-               import soltesz
 
-               locfg = soltesz.CMD()
+               locfg = moncommands.CMD()
                cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                        self.host, "iloxml/Get_Network.xml", 
                                        self.username, self.password)
@@ -638,7 +637,7 @@ class HPiLOHttps(PCUControl):
                        return sout.strip()
 
                if not dryrun:
-                       locfg = soltesz.CMD()
+                       locfg = moncommands.CMD()
                        cmd = "cmdhttps/locfg.pl -s %s -f %s -u %s -p '%s' | grep 'MESSAGE' | grep -v 'No error'" % (
                                                self.host, "iloxml/Reset_Server.xml", 
                                                self.username, self.password)
@@ -1163,8 +1162,8 @@ def pcu_name(pcu):
        else:
                return None
 
-import soltesz
-fb =soltesz.dbLoad("findbadpcus")
+import database
+fb =database.dbLoad("findbadpcus")
 
 def get_pcu_values(pcu_id):
        # TODO: obviously, this shouldn't be loaded each time...
diff --git a/rt.py b/rt.py
index 4a9c3fd..4c57ea8 100644 (file)
--- a/rt.py
+++ b/rt.py
@@ -8,14 +8,12 @@ import Queue
 import time 
 import re
 import comon
-import soltesz
+import database
 from threading import *
+import monitorconfig
 
 # TODO: merge the RT mailer from mailer.py into this file.
 
-# RT database access constants file
-RT_DB_CONSTANTS_PATH='rt_db'
-
 #Logging
 logger = logging.getLogger("monitor")
 
@@ -69,17 +67,17 @@ def readConstantsFile( file_path ):
 def open_rt_db():
 
        # read plc database passwords and connect
-       rt_db_constants= readConstantsFile(RT_DB_CONSTANTS_PATH)
-       if rt_db_constants is None:
-               print "Unable to read database access constants from %s" % \
-                         RT_DB_CONSTANTS_PATH
-               return -1
+       #rt_db_constants= readConstantsFile(RT_DB_CONSTANTS_PATH)
+       #if rt_db_constants is None:
+       #       print "Unable to read database access constants from %s" % \
+       #                 RT_DB_CONSTANTS_PATH
+       #       return -1
 
        try:
-               rt_db = MySQLdb.connect(host=rt_db_constants['RT_DB_HOST'],
-                               user=rt_db_constants['RT_DB_USER'],
-                               passwd=rt_db_constants['RT_DB_PASSWORD'],
-                               db=rt_db_constants['RT_DB_NAME'])
+               rt_db = MySQLdb.connect(host=monitorconfig.RT_DB_HOST,
+                                                               user=monitorconfig.RT_DB_USER,
+                                                               passwd=monitorconfig.RT_DB_PASSWORD,
+                                                               db=monitorconfig.RT_DB_NAME)
        except Exception, err:
                print "Failed to connect to RT database: %s" %err
                return -1
@@ -173,7 +171,7 @@ def rt_tickets():
        idTickets = {}
        for t in tickets_all:
                idTickets[t['ticket_id']] = t
-       soltesz.dbDump("idTickets", idTickets)
+       database.dbDump("idTickets", idTickets)
 
        return tickets
 
@@ -206,7 +204,7 @@ def is_host_in_rt_tickets(host, ticket_blacklist, ad_rt_tickets):
                return (False, None)
 
        # This search, while O(tickets), takes less than a millisecond, 05-25-07
-       #t = soltesz.MyTimer()
+       #t = commands.MyTimer()
        ret = search_tickets(host, ad_rt_tickets)
        #del t
 
@@ -312,7 +310,7 @@ def main():
        logger.addHandler(ch)
 
        tickets = rt_tickets()
-       soltesz.dbDump("ad_dbTickets", tickets)
+       database.dbDump("ad_dbTickets", tickets)
 
 
 if __name__ == '__main__':
index 575ba06..35d6973 100755 (executable)
--- a/rtinfo.py
+++ b/rtinfo.py
@@ -1,8 +1,8 @@
 #!/usr/bin/python
 
-import soltesz
+import database
 
-sql = soltesz.dbLoad("idTickets")
+sql = database.dbLoad("idTickets")
 import sys
 
 sortkeys = {}
index 6406c49..fbed374 100755 (executable)
@@ -8,11 +8,12 @@ import sys
 import reboot
 from datetime import datetime, timedelta
 
-import soltesz
+import database
 import comon
 from nodecommon import color_pcu_state, datetime_fromstr
 from nodehistory import get_filefromglob
 import time
+import traceback
 
 # region
 # total
@@ -21,10 +22,10 @@ import time
 # up with good hardware & functional pcu
 
 #cm_url="http://summer.cs.princeton.edu/status/tabulator.cgi?table=table_nodeviewshort&format=formatcsv&dumpcols='name,cpuspeed,memsize,disksize'"
-#cm = soltesz.if_cached_else(1, "cmhardware", lambda : comon.comonget(cm_url))
+#cm = database.if_cached_else(1, "cmhardware", lambda : comon.comonget(cm_url))
 
 def gethardwarequality(nodename, fb):
-       if nodename in fb['nodes']:
+       if nodename in fb['nodes'] and 'comonstats' in fb['nodes'][nodename]['values']:
                cstat = fb['nodes'][nodename]['values']['comonstats']
                for field in ['cpuspeed', 'memsize', 'disksize']:
                        if field not in cstat: cstat[field] = "null"
@@ -72,7 +73,7 @@ def main():
 
        stats = {}
        path = "archive-pdb"
-       archive = soltesz.SPickle(path)
+       archive = database.SPickle(path)
 
        if len(sys.argv) > 2:
                timestr = sys.argv[1]
@@ -86,9 +87,9 @@ def main():
        fbstr = get_filefromglob(d, "production.findbad")
        fbpcustr = get_filefromglob(d, "production.findbadpcus")
 
-       l_plcnodes = soltesz.dbLoad("l_plcnodes")
-       l_plcsites = soltesz.dbLoad("l_plcsites")
-       lb2hn = soltesz.dbLoad("plcdb_lb2hn")
+       l_plcnodes = database.dbLoad("l_plcnodes")
+       l_plcsites = database.dbLoad("l_plcsites")
+       lb2hn = database.dbLoad("plcdb_lb2hn")
        fb = archive.load(fbstr) 
        fbpcu = archive.load(fbpcustr)
        reboot.fb = fbpcu
@@ -138,12 +139,21 @@ def main():
                                        CC=fields[-1]
 
                                if hostname in fb['nodes']:
+                                       if 'state' in fb['nodes'][hostname]['values']:
+                                               state = fb['nodes'][hostname]['values']['state'].lower()
+                                       else:
+                                               state = "unknown"
+
                                        args = {'cc': CC, 
                                                'site' : site['login_base'],
                                                'host' : hostname,
-                                               'status' : fb['nodes'][hostname]['values']['state'].lower(),
+                                               'status' : state,
                                                'hardware' : gethardwarequality(hostname, fb),
                                                'pcuok' : color_pcu_state(fb['nodes'][hostname]['values']) }
+                                       #except:
+                                       #       print traceback.print_exc()
+                                       #       print args
+                                       #       print fb['nodes'][hostname]['values']
                                        results.append("%(cc)7s %(status)8s %(hardware)8s %(pcuok)8s %(site)15s %(host)42s " % args)
                                        addtostats(stats, args)
                else:
index eccaa28..c9e0033 100755 (executable)
@@ -6,7 +6,7 @@ import string
 import time
 
 
-import soltesz
+import database
 import comon
 import threadpool
 import syncplcdb
@@ -24,13 +24,13 @@ count = 0
 
 def main(config):
        global externalState
-       externalState = soltesz.if_cached_else(1, config.dbname, lambda : externalState) 
+       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
        if config.increment:
                # update global round number to force refreshes across all nodes
                externalState['round'] += 1
 
        l_nodes = syncplcdb.create_plcdb()
-       l_plcsites = soltesz.dbLoad("l_plcsites")
+       l_plcsites = database.dbLoad("l_plcsites")
 
        if config.site:
                l_sites = [config.site]
@@ -59,12 +59,12 @@ def checkAndRecordState(l_sites, l_plcsites):
                        count += 1
 
                if count % 20 == 0:
-                       soltesz.dbDump(config.dbname, externalState)
+                       database.dbDump(config.dbname, externalState)
 
-       soltesz.dbDump(config.dbname, externalState)
+       database.dbDump(config.dbname, externalState)
 
-fb = soltesz.dbLoad('findbad')
-lb2hn = soltesz.dbLoad("plcdb_lb2hn")
+fb = database.dbLoad('findbad')
+lb2hn = database.dbLoad("plcdb_lb2hn")
 
 def getnodesup(nodelist):
        up = 0
@@ -144,5 +144,5 @@ if __name__ == '__main__':
                print traceback.print_exc()
                print "Exception: %s" % err
                print "Saving data... exitting."
-               soltesz.dbDump(config.dbname, externalState)
+               database.dbDump(config.dbname, externalState)
                sys.exit(0)
index 10c42ef..d248b99 100755 (executable)
@@ -4,7 +4,7 @@ import plc
 import auth
 api = plc.PLC(auth.auth, auth.plc)
 
-import soltesz
+import database
 import reboot
 
 import time
@@ -79,8 +79,8 @@ def plc_print_siteinfo(plcsite):
                diff_time(plcnode['last_contact']))
 
 
-fb = soltesz.dbLoad("findbad")
-act_all = soltesz.dbLoad("act_all")
+fb = database.dbLoad("findbad")
+act_all = database.dbLoad("act_all")
 
 for site in config.args:
        config.site = site
index b1c1baa..f42e9ed 100755 (executable)
@@ -1,6 +1,5 @@
 #!/usr/bin/python
 
-import soltesz
 import plc
 import os
 import sys
index a0fe9a5..ea61b70 100644 (file)
@@ -14,8 +14,10 @@ import shutil
 from config import config as cfg
 config = cfg()
 
+import monitorconfig
+
 DEBUG= 0
-PICKLE_PATH="pdb"
+PICKLE_PATH=monitorconfig.MONITOR_DATA_ROOT
 
 class ExceptionTimeout(Exception): pass
 
index b0e42a6..e7a8a49 100755 (executable)
@@ -2,7 +2,7 @@
 
 import plc
 from config import config
-import soltesz
+import database
 import sys
 
 config = config()
@@ -88,12 +88,12 @@ def create_plcdb():
 
        if ('cachenodes' in dir(config) and config.cachenodes) or \
                'cachenodes' not in dir(config):
-               soltesz.dbDump("plcdb_hn2lb", hn2lb)
-               soltesz.dbDump("plcdb_lb2hn", lb2hn)
-               soltesz.dbDump("plcdb_netid2ip", netid2ip)
-               soltesz.dbDump("l_plcnodenetworks", l_nodenetworks)
-               soltesz.dbDump("l_plcnodes", l_nodes)
-               soltesz.dbDump("l_plcsites", l_sites)
+               database.dbDump("plcdb_hn2lb", hn2lb)
+               database.dbDump("plcdb_lb2hn", lb2hn)
+               database.dbDump("plcdb_netid2ip", netid2ip)
+               database.dbDump("l_plcnodenetworks", l_nodenetworks)
+               database.dbDump("l_plcnodes", l_nodes)
+               database.dbDump("l_plcsites", l_sites)
        
        return l_nodes
        
index 63bdcc0..08f50b7 100755 (executable)
@@ -4,7 +4,7 @@ import os
 import sys
 import string
 import time
-import soltesz
+import database
 import plc
 import getopt
 
@@ -20,7 +20,7 @@ def main():
                print "Error: " + err.msg
                sys.exit(1)
 
-       l_ticket_blacklist = soltesz.if_cached_else(1, "l_ticket_blacklist", lambda : [])
+       l_ticket_blacklist = database.if_cached_else(1, "l_ticket_blacklist", lambda : [])
 
        for (opt, optval) in opts:
                if opt in ["-d", "--delete"]:
@@ -44,7 +44,7 @@ def main():
                        l_ticket_blacklist.append(line)
 
        print "Total %d nodes in ticket_blacklist" % (len(l_ticket_blacklist))
-       soltesz.dbDump("l_ticket_blacklist")
+       database.dbDump("l_ticket_blacklist")
        
 if __name__ == '__main__':
        import os
diff --git a/todo b/todo
index 5d70086..09bdcbe 100644 (file)
--- a/todo
+++ b/todo
@@ -4,24 +4,24 @@ TODO:
    to share very similar argument or argument sets, as well as have some
    common config options.  I'm not sure the best way to do this.
 
- * pull out global configuration information from various files, like rt_db,
-   mailer.py,  auth.py, and any others.  Create a single configuration file
-   from which all others pull.
-
  * Find a better location to place and pull the PLK files currently in the pdb
    directory.  Ultimately, these should be stored in a real DB.  Until then,
    they should sit in a location that is accessible from the www scripts,
    backend scripts, and user utilities.
 
- * add a third package for user tools that will interact with the Monitor
-   service.  Mostly, I'm guessing this would be queries for the live status
-   nodes and a more reliable 'reboot' and 'reinstall' mechanism than currently
-   availble with PLC.
+ * pull out global configuration information from various files, like rt_db,
+   mailer.py,  auth.py, and any others.  Create a single configuration file
+   from which all others pull.
 
* convert plc and other files to use the new monitorconfig.py rather than
-   auth, or plc.*
  - convert plc and other files to use the new monitorconfig.py rather than
+     auth, or plc.*
 
 Lower priority:
  * Add a more structured, 'automate' library of scripts and means of making
    batch calls, etc.
 
+ * add a third package for user tools that will interact with the Monitor
+   service.  Mostly, I'm guessing this would be queries for the live status
+   nodes and a more reliable 'reboot' and 'reinstall' mechanism than currently
+   availble with PLC.
+
index e0a6ffa..602c902 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import soltesz
+import database
 
 import plc
 import auth
@@ -70,7 +70,7 @@ class PenaltyMap:
        #       condition/penalty is applied, move to the next phase.
 
 
-fb = soltesz.dbLoad("findbad")
+fb = database.dbLoad("findbad")
 
 class RT(object):
        def __init__(self, ticket_id = None):
@@ -150,10 +150,10 @@ class PersistFlags(Recent):
                        db = "persistflags"
 
                try:
-                       pm = soltesz.dbLoad(db)
+                       pm = database.dbLoad(db)
                except:
-                       soltesz.dbDump(db, {})
-                       pm = soltesz.dbLoad(db)
+                       database.dbDump(db, {})
+                       pm = database.dbLoad(db)
                #print pm
                if id in pm:
                        obj = pm[id]
@@ -172,9 +172,9 @@ class PersistFlags(Recent):
                Recent.__init__(self, withintime)
 
        def save(self):
-               pm = soltesz.dbLoad(self.db)
+               pm = database.dbLoad(self.db)
                pm[self.id] = self
-               soltesz.dbDump(self.db, pm)
+               database.dbDump(self.db, pm)
 
        def resetFlag(self, name):
                self.__setattr__(name, False)
@@ -222,10 +222,10 @@ class PersistMessage(Message):
                        db = "persistmessages"
 
                try:
-                       pm = soltesz.dbLoad(db)
+                       pm = database.dbLoad(db)
                except:
-                       soltesz.dbDump(db, {})
-                       pm = soltesz.dbLoad(db)
+                       database.dbDump(db, {})
+                       pm = database.dbLoad(db)
 
                #print pm
                if id in pm:
@@ -258,9 +258,9 @@ class PersistMessage(Message):
                        self.actiontracker.setRecent()
 
                        #print "recording object for persistance"
-                       pm = soltesz.dbLoad(self.db)
+                       pm = database.dbLoad(self.db)
                        pm[self.id] = self
-                       soltesz.dbDump(self.db, pm)
+                       database.dbDump(self.db, pm)
                else:
                        # NOTE: only send a new message every week, regardless.
                        print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // 60*60*24)
@@ -274,11 +274,11 @@ class MonitorMessage(object):
 
                try:
                        if 'reset' in kwargs and kwargs['reset'] == True:
-                               soltesz.dbDump(db, {})
-                       pm = soltesz.dbLoad(db)
+                               database.dbDump(db, {})
+                       pm = database.dbLoad(db)
                except:
-                       soltesz.dbDump(db, {})
-                       pm = soltesz.dbLoad(db)
+                       database.dbDump(db, {})
+                       pm = database.dbLoad(db)
 
                #print pm
                if id in pm:
@@ -346,11 +346,11 @@ class PersistSitePenalty(SitePenalty):
 
                try:
                        if 'reset' in kwargs and kwargs['reset'] == True:
-                               soltesz.dbDump(db, {})
-                       pm = soltesz.dbLoad(db)
+                               database.dbDump(db, {})
+                       pm = database.dbLoad(db)
                except:
-                       soltesz.dbDump(db, {})
-                       pm = soltesz.dbLoad(db)
+                       database.dbDump(db, {})
+                       pm = database.dbLoad(db)
 
                #print pm
                if id in pm:
@@ -369,9 +369,9 @@ class PersistSitePenalty(SitePenalty):
                self.id = id
 
        def save(self):
-               pm = soltesz.dbLoad(self.db)
+               pm = database.dbLoad(self.db)
                pm[self.id] = self
-               soltesz.dbDump(self.db, pm)
+               database.dbDump(self.db, pm)
 
 
 class Target:
@@ -413,7 +413,7 @@ class Record(object):
        def __init__(self, hostname, data):
                self.hostname = hostname
                self.data = data
-               self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
                self.loginbase = self.plcdb_hn2lb[self.hostname]
                return
 
@@ -612,7 +612,7 @@ class NodeRecord:
                        self.ticket.closeTicket()
 
        def exempt_from_penalties(self):
-               bl = soltesz.dbLoad("l_blacklist")
+               bl = database.dbLoad("l_blacklist")
                return self.hostname in bl
 
        def penalties(self):
@@ -644,10 +644,10 @@ class NodeRecord:
 
 if __name__ == "__main__":
        #r = RT()
-       #r.email("test", "body of test message", ['soltesz@cs.princeton.edu'])
+       #r.email("test", "body of test message", ['database@cs.princeton.edu'])
        #from emailTxt import mailtxt
        print "loaded"
-       #soltesz.dbDump("persistmessages", {});
+       #database.dbDump("persistmessages", {});
        #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah -  days down\n'}
        #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
        #m.send(['soltesz@cs.utk.edu'])