use plccache, instead of directly loading pkl files
authorStephen Soltesz <soltesz@cs.princeton.edu>
Wed, 12 Nov 2008 00:31:22 +0000 (00:31 +0000)
committerStephen Soltesz <soltesz@cs.princeton.edu>
Wed, 12 Nov 2008 00:31:22 +0000 (00:31 +0000)
dumpact.py
pcuinfo.py
findbad.py
showlatlon.py
nodebad.py
pcubad.py
dumpdiag.py
findbadpcu.py
nodeinfo.py
sitebad.py
monitor_policy.py

operate on new database model
nodequery.py
clean_policy.py
nodecommon.py  - updated key names in fields
bootman.py
nodeinfo.py
siteinfo.py
nodegroups.py
nodeconfig.py

Use latest monitor module
getconf.py
grouprins.py

todo

24 files changed:
bootman.py
clean_policy.py
dumpact.py
dumpdiag.py
findbad.py
findbadpcu.py
getconf.py
grouprins.py
monitor/wrapper/plccache.py
monitor_policy.py
nodebad.py
nodecommon.py
nodeconfig.py
nodegroups.py
nodeinfo.py
nodequery.py
pcubad.py
pcuinfo.py
printbadcsv.py
showlatlon.py
sitebad.py
siteinfo.py
todo
unified_model.py

index 0e13517..e8dc7b8 100755 (executable)
@@ -36,13 +36,6 @@ from Rpyc import SocketConnection, Async
 from Rpyc.Utils import *
 fb = None
 
-def get_fbnode(node):
-       global fb
-       if fb is None:
-               fb = database.dbLoad("findbad")
-       fbnode = fb['nodes'][node]['values']
-       return fbnode
-
 class NodeConnection:
        def __init__(self, connection, node, config):
                self.node = node
@@ -314,7 +307,7 @@ def reboot(hostname, config=None, forced_action=None):
 
        # NOTE: Nothing works if the bootcd is REALLY old.
        #       So, this is the first step.
-       fbnode = get_fbnode(hostname)
+       fbnode = FindbadNodeRecord.get_latest_by(hostname=hostname).to_dict()
        if fbnode['category'] == "OLDBOOTCD":
                print "...NOTIFY OWNER TO UPDATE BOOTCD!!!"
                args = {}
index 8e35903..516a8de 100644 (file)
@@ -6,6 +6,8 @@ from unified_model import cmpCategoryVal
 import sys
 import emailTxt
 import string
+from monitor.wrapper import plccache
+from datetime import datetime
 
 from rt import is_host_in_rt_tickets
 import plc
@@ -20,101 +22,76 @@ from const import *
 
 from unified_model import *
 
-def get_ticket_id(record):
-       if 'ticket_id' in record and record['ticket_id'] is not "" and record['ticket_id'] is not None:
-               return record['ticket_id']
-       elif            'found_rt_ticket' in record and \
-                record['found_rt_ticket'] is not "" and \
-                record['found_rt_ticket'] is not None:
-               return record['found_rt_ticket']
-       else:
-               return None
-
 class MonitorMergeDiagnoseSendEscellate:
        act_all = None
-       fb = None
 
        def __init__(self, hostname, act):
                self.hostname = hostname
                self.act = act
                self.plcdb_hn2lb = None
                if self.plcdb_hn2lb is None:
-                       self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+                       self.plcdb_hn2lb = plccache.plcdb_hn2lb 
                self.loginbase = self.plcdb_hn2lb[self.hostname]
                return
 
-       def getFBRecord(self):
-               if MonitorMergeDiagnoseSendEscellate.fb == None:
-                       MonitorMergeDiagnoseSendEscellate.fb = database.dbLoad("findbad")
-
-               fb = MonitorMergeDiagnoseSendEscellate.fb
-
-               if self.hostname in fb['nodes']:
-                       fbnode = fb['nodes'][self.hostname]['values']
+       def getFBRecords(self):
+               fbrecs = FindbadNodeRecord.get_latest_n_by(hostname=self.hostname)
+               fbnodes = None
+               if fbrec: 
+                       fbnodes = fbrecs
                else:
-                       raise Exception("Hostname %s not in scan database"% self.hostname)
-               return fbnode
-
-       def getActionRecord(self):
-               # update ticket status
-               if MonitorMergeDiagnoseSendEscellate.act_all == None:
-                       MonitorMergeDiagnoseSendEscellate.act_all = database.dbLoad("act_all")
-
-               act_all = MonitorMergeDiagnoseSendEscellate.act_all 
-
-               if self.hostname in act_all and len(act_all[self.hostname]) > 0:
-                       actnode = act_all[self.hostname][0]
+                       fbnodes = None
+               return fbnodes
+
+       def getLastActionRecord(self):
+               actrec = ActionRecord.get_latest_by(hostname=self.hostname)
+               actnode = None
+               if actrec:
+                       actnode = actrec
                else:
                        actnode = None
                return actnode
 
-       def getKernel(self, unamestr):
-               s = unamestr.split()
-               if len(s) > 2:
-                       return s[2]
-               else:
-                       return ""
-
-       def mergeRecord(self, fbnode, actnode):
-               fbnode['kernel'] = self.getKernel(fbnode['kernel'])
-               fbnode['stage'] = "findbad"
-               fbnode['message'] = None
-               fbnode['args'] = None
-               fbnode['info'] = None
-               fbnode['log'] = None
-               fbnode['time'] = time.time()
-               fbnode['email'] = TECH
-               fbnode['action-level'] = 0
-               fbnode['action'] = ['noop']
-               fbnode['date_created'] = time.time()
-
-               if actnode is None: # there is no entry in act_all
-                       actnode = {} 
-                       actnode.update(fbnode)
-                       actnode['ticket_id'] = ""
-                       actnode['prev_category'] = "ERROR" 
+       def getPreviousCategory(self, actrec):
+               ret = None
+               if actrec:
+                       ret = actrec.findbad_records[0].observed_category
                else:
-                       actnode['prev_category']= actnode['category']
-                       actnode['comonstats']   = fbnode['comonstats']
-                       actnode['category']             = fbnode['category']
-                       actnode['state']                = fbnode['state']
-                       actnode['kernel']               = fbnode['kernel']
-                       actnode['bootcd']               = fbnode['bootcd']
-                       actnode['plcnode']              = fbnode['plcnode']
-                       ticket = get_ticket_id(actnode)
-                       if ticket is None: actnode['ticket_id'] = ""
-                       actnode['rt'] = mailer.getTicketStatus(ticket)
-
-                       #for key in actnode.keys():
-                       #       print "%10s %s %s " % (key, "==", actnode[key])
-                       #print "----------------------------"
+                       ret = "ERROR"
+               return ret
 
-               return actnode
+
+       def mergeRecord(self, fbnodes, actrec):
+
+               actdefault = {}
+               actdefault['date_created'] = datetime.now()
+               actdefault['date_action_taken'] = datetime.now()
+
+               actdefault['stage'] = "initial"
+               actdefault['message_series'] = None
+               actdefault['message_index'] = None
+               actdefault['message_arguments'] = None
+
+               actdefault['send_email_to'] = TECH
+               actdefault['penalty_level'] = 0
+               actdefault['action'] = [ 'noop' ]
+               actdefault['take_action'] = False
+
+               actdefault['ticket_id'] = ""
+               actdefault['findbad_records'] = fbnodes
+               actdefault['last_action_record'] = actrec
+
+               actdefault['prev_category'] = self.getPreviousCategory(actrec)
+               actdefault['category']          = fbnodes[0].observed_category
+
+               actdefault['rt'] = mailer.getTicketStatus(actrec.ticket_id)
+
+               return actdefault
 
        def run(self):
-               fbnode = self.getFBRecord()
-               actnode= self.getActionRecord()
-               actrec = self.mergeRecord(fbnode, actnode)
+               fbnodes = self.getFBRecords()
+               actnode= self.getLastActionRecord()
+               actrec = self.mergeRecord(fbnodes, actnode)
                record = Record(self.hostname, actrec)
                diag   = self.diagnose(record)
                if self.act and diag is not None:
@@ -122,26 +99,21 @@ class MonitorMergeDiagnoseSendEscellate:
        
        def diagnose(self, record):
 
-               diag = PersistFlags(record.hostname, 60*60*24, db='persist_diagnose_flags')
+               diag = {}
                # NOTE: change record stage based on RT status.
-               #diag.setFlag('ResetStage')
                if record.stageIswaitforever():
                        ticket = record.data['rt']
                        if 'new' in ticket['Status']:
                                print "Resetting Stage!!!!!"
-                       #       diag.setFlag('ResetStage')
                                record.reset_stage()
-                       #if diag.getFlag('ResetStage'):
-                       #       print "diagnose: resetting stage"
-                       #       diag.resetFlag('ResetStage')
                                
                        if 'resolved' in ticket['Status']:
-                               diag.setFlag('RTEndRecord')
+                               diag['RTEndRecord'] = True
 
                # NOTE: take category, and prepare action
                category = record.getCategory()
                if category == "error":
-                       diag.setFlag('SendNodedown')
+                       diag['SendNodedown'] = True
                        record.data['message_series'] = emailTxt.mailtxt.newdown
                        record.data['log'] = self.getDownLog(record)
 
@@ -149,7 +121,7 @@ class MonitorMergeDiagnoseSendEscellate:
                        state = record.getState()
                        if state == "boot":
                                if record.severity() != 0:
-                                       diag.setFlag('SendThankyou')
+                                       diag['SendThankyou'] = True
                                        print "RESETTING STAGE: improvement"
                                        record.data['stage'] = 'improvement'
                                        record.data['message_series'] = emailTxt.mailtxt.newthankyou
@@ -167,105 +139,85 @@ class MonitorMergeDiagnoseSendEscellate:
 
 
                # TODO: how to not send email?...
-               record = self.checkStageAndTime(diag,record)
+               record = self.checkStageAndTime(record)
                #if record:
                print "diagnose: checkStageAndTime Returned Valid Record"
-               site = PersistFlags(self.loginbase, 1, db='site_persistflags')
+               siterec = HistorySiteRecord.by_loginbase(self.loginbase)
 
-               if "good" not in site.status: #  != "good":
+               if "good" not in siterec.status: #  != "good":
                        print "diagnose: Setting site %s for 'squeeze'" % self.loginbase
-                       diag.setFlag('Squeeze')
+                       diag['Squeeze'] = True
                else:
                        print "diagnose: Setting site %s for 'backoff'" % self.loginbase
-                       diag.setFlag('BackOff')
+                       diag['BackOff'] = True
 
-               diag.save()
                return diag
-               #else:
-               #       print "checkStageAndTime Returned NULL Record"
-               #       return None
 
        def action(self, record, diag):
 
                message = None
 
-               #print record.data['stage']
-               #print "improvement" in record.data['stage']
-               #print self.getSendEmailFlag(record)
                print "%s %s DAYS DOWN" % ( self.hostname, Record.getDaysDown(record.data) )
                if ( self.getSendEmailFlag(record) and Record.getDaysDown(record.data) >= 2 ) or \
                        "monitor-end-record" in record.data['stage']:
                        print "action: getting message"
+                       #### Send EMAIL
                        message = record.getMessage(record.data['ticket_id'])
                        if message:
-                               #message.reset()
                                print "action: sending email"
                                message.send(record.getContacts())
-                               #print "DEBUG NOT SENDING MESSAGE WHEN I SHOULD BE!!!!!"
-                               #print "DEBUG NOT SENDING MESSAGE WHEN I SHOULD BE!!!!!"
-                               #print "DEBUG NOT SENDING MESSAGE WHEN I SHOULD BE!!!!!"
-                               #print message
                                if message.rt.ticket_id:
                                        print "action: setting record ticket_id"
                                        record.data['ticket_id'] = message.rt.ticket_id
 
-                       if ( record.data['takeaction'] and diag.getFlag('Squeeze') ): 
+                       #### APPLY PENALTY
+                       if ( record.data['take_action'] and diag['Squeeze'] ): 
                                print "action: taking action"
-                               record.takeAction(record.data['action-level'])
-                               diag.resetFlag('Squeeze')
-                               diag.save()
+                               record.takeAction(record.data['penalty_level'])
+                               del diag['Squeeze']
                        if diag.getFlag('BackOff'):
                                record.takeAction(0)
-                               diag.resetFlag('BackOff')
-                               diag.save()
+                               del diag['BackOff']
 
+                       #### SAVE TO DB
                        if record.saveAction():
                                print "action: saving act_all db"
                                self.add_and_save_act_all(record)
                        else:
                                print "action: NOT saving act_all db"
-                               print "stage: %s %s" % ( record.data['stage'], record.data['save-act-all'] )
+                               print "stage: %s %s" % ( record.data['stage'], record.data['save_act_all'] )
 
-                       if record.improved() or diag.getFlag('RTEndRecord'):
+                       #### END RECORD
+                       if record.improved() or diag['RTEndRecord']:
                                print "action: end record for %s" % self.hostname
                                record.end_record()
-                               diag.setFlag('CloseRT')
-                               diag.resetFlag('RTEndRecord')
-                               diag.save()
-                               #return None
+                               diag['CloseRT'] = True
+                               del diag['RTEndRecord']
 
+                       #### CLOSE RT TICKET
                        if message:
-                               if diag.getFlag('CloseRT'):
+                               if diag['CloseRT']:
                                        message.rt.closeTicket()
-                                       diag.resetFlag('CloseRT')
-                                       diag.save()
+                                       del diag['CloseRT']
 
                else:
                        print "NOT sending email : %s %s" % (config.mail, record.data['rt'])
 
                return
 
-       def getSendEmailFlag(self, record):
-               if not config.mail:
-                       return False
-
-               # resend if open & created longer than 30 days ago.
-               if  'rt' in record.data and \
-                       'Status' in record.data['rt'] and \
-                       "open" in record.data['rt']['Status'] and \
-                       record.data['rt']['Created'] > int(time.time() - 60*60*24*30):
-                       # if created-time is greater than the thirty days ago from the current time
-                       return False
-
-               return True
-
        def add_and_save_act_all(self, record):
-               self.act_all = database.dbLoad("act_all")
-               if self.hostname not in self.act_all:
-                       self.act_all[self.hostname] = []
-               self.act_all[self.hostname].insert(0,record.data)
-               database.dbDump("act_all", self.act_all)
-               
+               """
+                       Read the sync record for this node, and increment the round and
+                       create an ActionRecord for this host using the record.data values.
+               """
+               recsync = RecordActionSync.get_by(hostname=self.hostname)
+               rec = RecordAction(hostname=self.hostname)
+               recsync.round += 1
+               record.data['round'] = recsync.round
+               # TODO: we will need to delete some of these before setting them in the DB.
+               rec.set(**record.data)
+               rec.flush()
+
        def getDownLog(self, record):
 
                record.data['args'] = {'nodename': self.hostname}
@@ -300,140 +252,82 @@ class MonitorMergeDiagnoseSendEscellate:
                        log = "IMPR: %s improved to %s " % (self.hostname, record.data['category'])
                return log
 
-       def checkStageAndTime(self, diag, record):
+       def makeRecord(self, **kwargs):
+               rec = {}
+               for key in kwargs.keys():
+                       rec[key] = kwargs[key]
+               return rec
+
+       def checkStageAndTime(self, record):
+       """
+               The core variables are:
+
+                       send_email_to  : defines who to send messages to at this time
+                       take_action    : whether or not to take action
+                       penalty_level  : how much of a penalty to apply
+                       message_index  : where in the escellation sequence we are.
+                       save_act_all   : whether or not to save the action record in the db.
+
+                       action/stage   : stage tracks which state we're in.
+       """
+               stages = {
+                       "initial"               : [ { action='noop', next="weekone"}],
+                       "weekone"               : [ { action='noop',         index=0, save=True, email=TECH,         length=7*SPERDAY,  next="weektwo" }, ],
+                       "weektwo"               : [ { action='nocreate',     index=1, save=True, email=TECH|PI,      length=7*SPERDAY,  next="waitforever" }, ],
+                       "waitforever"   : [ { action='suspendslices',index=2, save=True, email=TECH|PI|USER, length=7*SPERDAY,  next="waitforever" }, ],
+                       "paused"                : [ { action='noop',                              save=True                                              length=30*SPERDAY, next="weekone" }, ]
+                       "improvement"   : [ { action='close_rt',     index=0, save=True, email=TECH,         next="monitor-end-record" }, ],
+               }
+               # TODO: make this time relative to the PREVIOUS action taken.
                current_time = time.time()
-               delta = current_time - record.data['time']
-               #print record.data
-               if   'findbad' in record.data['stage']:
+               current_stage = record.getMostRecentStage()
+               recent_time   = record.getMostRecentTime()
+
+               delta = current_time - recent_time
+
+               if current_stage in stages:
+                       values = stages[current_stage][0]
+
+               if delta >= values['length']:
+                       print "checkStageAndTime: transition to next stage"
+                       new_stage = values['next']
+                       values = stages[new_stage]
+
+               elif delta >= values['length']/3 and not 'second_mail_at_oneweek' in record.data:
+                       print "checkStageAndTime: second message in one week for stage two"
+                       take_action=False
+                       pass
+               else:
+                       # DO NOTHING
+                       take_action=False, 
+                       save_act_all=False, 
+                       message_index=None, 
+                       print "checkStageAndTime: second message in one week for stage two"
+
+               rec = self.makeRecord( stage=new_stage, send_email_to=values['email'],
+                                                          action=values['action'], message_index=values['index'], 
+                                                          save_act_all=values['save'], penalty_level=values['index'], 
+                                                          date_action_taken=current_time)
+               record.data.update(rec)
+
+
+               if   'initial' in record.data['stage']:
                        # The node is bad, and there's no previous record of it.
-                       record.data['email'] = TECH
-                       record.data['action'] = ['noop']
-                       record.data['takeaction'] = False
-                       record.data['message'] = record.data['message_series'][0]
-                       record.data['stage'] = 'stage_actinoneweek'
-                       record.data['save-act-all'] = True
-                       record.data['action-level'] = 0
-
-               elif 'reboot_node' in record.data['stage']:
-                       record.data['email'] = TECH
-                       record.data['action'] = ['noop']
-                       record.data['message'] = record.data['message_series'][0]
-                       record.data['stage'] = 'stage_actinoneweek'
-                       record.data['takeaction'] = False
-                       record.data['save-act-all'] = False
-                       record.data['action-level'] = 0
-                       
+                       rec = self.makeRecord(
+                                                       stage="weekone", send_email_to=TECH, 
+                                                       action=['noop'], take_action=False, 
+                                                       message_index=0, save_act_all=True, 
+                                                       penalty_level=0, )
+                       record.data.update(rec)
+
                elif 'improvement' in record.data['stage']:
                        print "checkStageAndTime: backing off of %s" % self.hostname
-                       record.data['action'] = ['close_rt']
-                       record.data['takeaction'] = True
-                       record.data['message'] = record.data['message_series'][0]
-                       record.data['stage'] = 'monitor-end-record'
-                       record.data['save-act-all'] = True
-                       record.data['action-level'] = 0
-
-               elif 'actinoneweek' in record.data['stage']:
-                       if delta >= 7 * SPERDAY: 
-                               print "checkStageAndTime: transition to next stage actintwoweeks"
-                               record.data['email'] = TECH | PI
-                               record.data['stage'] = 'stage_actintwoweeks'
-                               record.data['message'] = record.data['message_series'][1]
-                               record.data['action'] = ['nocreate' ]
-                               record.data['time'] = current_time              # reset clock for waitforever
-                               record.data['takeaction'] = True
-                               record.data['save-act-all'] = True
-                               record.data['action-level'] = 1
-                       elif delta >= 3* SPERDAY and not 'second-mail-at-oneweek' in record.data:
-                               print "checkStageAndTime: second message in one week"
-                               record.data['email'] = TECH 
-                               record.data['message'] = record.data['message_series'][0]
-                               record.data['action'] = ['sendmailagain-waitforoneweekaction' ]
-                               record.data['second-mail-at-oneweek'] = True
-                               record.data['takeaction'] = False
-                               record.data['save-act-all'] = True
-                               record.data['action-level'] = 0
-                       else:
-                               record.data['message'] = None
-                               record.data['action'] = ['waitforoneweekaction' ]
-                               record.data['takeaction'] = False
-                               record.data['save-act-all'] = False
-                               record.data['action-level'] = 0
-                               print "checkStageAndTime: ignoring this record for: %s" % self.hostname
-                               #return None                    # don't send if there's no action
-
-               elif 'actintwoweeks' in record.data['stage']:
-                       if delta >= 7 * SPERDAY:
-                               print "checkStageAndTime: transition to next stage waitforever"
-                               record.data['email'] = TECH | PI | USER
-                               record.data['stage'] = 'stage_waitforever'
-                               record.data['message'] = record.data['message_series'][2]
-                               record.data['action'] = ['suspendslices']
-                               record.data['time'] = current_time              # reset clock for waitforever
-                               record.data['takeaction'] = True
-                               record.data['save-act-all'] = True
-                               record.data['action-level'] = 2
-                       elif delta >= 3* SPERDAY and not 'second-mail-at-twoweeks' in record.data:
-                               print "checkStageAndTime: second message in one week for stage two"
-                               record.data['email'] = TECH | PI
-                               record.data['message'] = record.data['message_series'][1]
-                               record.data['action'] = ['sendmailagain-waitfortwoweeksaction' ]
-                               record.data['second-mail-at-twoweeks'] = True
-                               record.data['takeaction'] = False
-                               record.data['save-act-all'] = True
-                               record.data['action-level'] = 1
-                       else:
-                               record.data['message'] = None
-                               record.data['takeaction'] = False
-                               record.data['action'] = ['waitfortwoweeksaction']
-                               record.data['save-act-all'] = False
-                               print "checkStageAndTime: second message in one week for stage two"
-                               record.data['action-level'] = 1
-                               #return None                    # don't send if there's no action
-
-               elif 'ticket_waitforever' in record.data['stage']:
-                       record.data['email'] = TECH
-                       record.data['takeaction'] = True
-                       if 'first-found' not in record.data:
-                               record.data['first-found'] = True
-                               record.data['log'] += " firstfound"
-                               record.data['action'] = ['ticket_waitforever']
-                               record.data['message'] = None
-                               record.data['time'] = current_time
-                               record.data['save-act-all'] = True
-                               record.data['action-level'] = 2
-                       else:
-                               if delta >= 7*SPERDAY:
-                                       record.data['action'] = ['ticket_waitforever']
-                                       record.data['message'] = None
-                                       record.data['time'] = current_time              # reset clock
-                                       record.data['save-act-all'] = True
-                                       record.data['action-level'] = 2
-                               else:
-                                       record.data['action'] = ['ticket_waitforever']
-                                       record.data['message'] = None
-                                       record.data['takeaction'] = False
-                                       record.data['save-act-all'] = False
-                                       record.data['action-level'] = 2
-                                       #return None
-
-               elif 'waitforever' in record.data['stage']:
-                       # more than 3 days since last action
-                       # TODO: send only on weekdays.
-                       # NOTE: expects that 'time' has been reset before entering waitforever stage
-                       record.data['takeaction'] = True
-                       if delta >= 3*SPERDAY:
-                               record.data['action'] = ['email-againwaitforever']
-                               record.data['message'] = record.data['message_series'][2]
-                               record.data['time'] = current_time              # reset clock
-                               record.data['save-act-all'] = True
-                               record.data['action-level'] = 2
-                       else:
-                               record.data['action'] = ['waitforever']
-                               record.data['message'] = None
-                               record.data['takeaction'] = False
-                               record.data['save-act-all'] = False
-                               record.data['action-level'] = 2
-                               #return None                    # don't send if there's no action
+                       rec = self.makeRecord(
+                                                       stage='monitor-end-record', send_email_to=TECH, 
+                                                       action=['close_rt'], take_action=True, 
+                                                       message_index=0, save_act_all=True, 
+                                                       penalty_level=0, )
+                       record.data.update(rec)
 
                else:
                        # There is no action to be taken, possibly b/c the stage has
@@ -443,16 +337,15 @@ class MonitorMergeDiagnoseSendEscellate:
                        #       2. delta is not big enough to bump it to the next stage.
                        # TODO: figure out which. for now assume 2.
                        print "UNKNOWN stage for %s; nothing done" % self.hostname
-                       record.data['action'] = ['unknown']
-                       record.data['message'] = record.data['message_series'][0]
-
-                       record.data['email'] = TECH
-                       record.data['action'] = ['noop']
-                       record.data['message'] = record.data['message_series'][0]
-                       record.data['stage'] = 'stage_actinoneweek'
-                       record.data['time'] = current_time              # reset clock
-                       record.data['takeaction'] = False
-                       record.data['save-act-all'] = True
+                       rec = self.makeRecord(
+                                                       stage='weekone', send_email_to=TECH,
+                                                       action=['noop'], 
+                                                       take_action=False, 
+                                                       save_act_all=True, 
+                                                       date_action_taken=current_time,
+                                                       message_index=0, 
+                                                       penalty_level=0, )
+                       record.data.update(rec)
 
                print "%s" % record.data['log'],
                print "%15s" % record.data['action']
index b710a54..713970c 100755 (executable)
@@ -6,11 +6,12 @@ import sys
 import time
 import getopt
 import database 
+from monitor.wrapper import plccache
 
 def main():
 
        act_all = database.dbLoad(sys.argv[1])
-       plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+       plcdb_hn2lb = plccache.plcdb_hn2lb
        s_nodenames = ""
        sickdb = {}
 
index 2a2d753..4e38459 100755 (executable)
@@ -6,11 +6,12 @@ import sys
 import time
 import getopt
 import database 
+from monitor.wrapper import plccache
 
 def main():
 
        sickdb = database.dbLoad(sys.argv[1])
-       plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+       plcdb_hn2lb = plccache.plcdb_hn2lb
        s_nodenames = ""
 
        sorted_keys = sickdb.keys()
index 9d2758c..1e412bc 100755 (executable)
@@ -13,9 +13,8 @@ from monitor.util import command
 from monitor import config
 from monitor.database import FindbadNodeRecordSync, FindbadNodeRecord
 from monitor.sources import comon
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
 
-import syncplcdb
 from nodequery import verify,query_to_dict,node_select
 import traceback
 
@@ -255,6 +254,7 @@ def recordPingAndSSH(request, result):
 
                        fbrec = FindbadNodeRecord(
                                                date_checked=datetime.fromtimestamp(values['date_checked']),
+                                               round=global_round,
                                                hostname=nodename,
                                                loginbase=values['loginbase'],
                                                kernel_version=values['kernel'],
@@ -274,6 +274,7 @@ def recordPingAndSSH(request, result):
                                                ssh_status = (values['ssh'] == "SSH"),
                                                ssh_error = values['ssherror'],
                                                observed_status = values['state'],
+                                               observed_category = values['category'],
                                        )
                        fbnodesync.round = global_round
 
@@ -353,7 +354,7 @@ def main():
        # history information for all nodes
        #cohash = {}
        cohash = cotop.coget(cotop_url)
-       l_nodes = syncplcdb.create_plcdb()
+       l_nodes = plccache.l_nodes
        if config.nodelist:
                f_nodes = util.file.getListFromFile(config.nodelist)
                l_nodes = filter(lambda x: x['hostname'] in f_nodes, l_nodes)
index 3ab97a3..1af600c 100755 (executable)
@@ -17,7 +17,7 @@ from monitor.pcu import reboot
 from monitor import config
 from monitor.database import FindbadPCURecordSync, FindbadPCURecord
 from monitor import util 
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
 from nodequery import pcu_select
 
 plc_lock = threading.Lock()
@@ -49,7 +49,7 @@ def get_pcu(pcuname):
        except:
                try:
                        #print "GetPCU from file %s" % pcuname
-                       l_pcus = database.dbLoad("pculist")
+                       l_pcus = plccache.l_pcus
                        for i in l_pcus:
                                if i['pcu_id'] == pcuname:
                                        l_pcu = i
@@ -67,7 +67,7 @@ def get_nodes(node_ids):
                l_node = plc.getNodes(node_ids, ['hostname', 'last_contact', 'node_id', 'ports'])
        except:
                try:
-                       plc_nodes = database.dbLoad("l_plcnodes")
+                       plc_nodes = plccache.l_plcnodes
                        for n in plc_nodes:
                                if n['node_id'] in node_ids:
                                        l_node.append(n)
@@ -123,7 +123,7 @@ def get_plc_site_values(site_id):
                        d_site = d_site[0]
        except:
                try:
-                       plc_sites = database.dbLoad("l_plcsites")
+                       plc_sites = plccache.l_plcsites
                        for site in plc_sites:
                                if site['site_id'] == site_id:
                                        d_site = site
@@ -274,6 +274,7 @@ def recordPingAndSSH(request, result):
 
                fbrec = FindbadPCURecord(
                                        date_checked=datetime.fromtimestamp(values['date_checked']),
+                                       record=fbsync.round,
                                        plc_pcuid=pcu_id,
                                        plc_pcu_stats=values['plc_pcu_stats'],
                                        dns_status=values['dnsmatch'],
@@ -344,7 +345,8 @@ def checkAndRecordState(l_pcus, cohash):
 def main():
        global global_round
 
-       l_pcus = monitor.database.if_cached_else_refresh(1, config.refresh, "pculist", lambda : plc.GetPCUs())
+       #  monitor.database.if_cached_else_refresh(1, config.refresh, "pculist", lambda : plc.GetPCUs())
+       l_pcus = plccache.l_pcus
        cohash = {}
 
        fbsync = FindbadPCURecordSync.findby_or_create(plc_pcuid=0, if_new_set={'round' : global_round})
index 721932f..1f84674 100755 (executable)
@@ -1,10 +1,11 @@
 #!/usr/bin/python
 
-import plc
+from monitor.wrapper import plc
+from monitor import config
+import monitor.parser as parsermodule
 api = plc.getAuthAPI()
 import sys
 import os
-import config
 
 def getconf(hostname, force=False, media=None):
        n = api.GetNodes(hostname)
@@ -36,8 +37,6 @@ def getconf(hostname, force=False, media=None):
        return args
 
 if __name__ == '__main__':
-       import parser as parsermodule
-
        parser = parsermodule.getParser()
        parser.set_defaults(media='both', force=False)
        parser.add_option("", "--media", dest="media", metavar="usb, iso, both", 
@@ -46,7 +45,7 @@ if __name__ == '__main__':
                                                help="""Force the recreation of the usb images.""")
        parser = parsermodule.getParser(['defaults'], parser)
 
-       config = parsesrmodule.parse_args(parser)
+       config = parsermodule.parse_args(parser)
 
        ret = {'url_list' : ''} 
        for i in config.args:
index cfefc6a..1eeb092 100755 (executable)
 #  * do something else to them all.
 # 
 
-import plc
+from monitor import config
+from monitor import util
+from monitor import const
+from monitor import database
+from monitor import parser as parsermodule
+from monitor.pcu import reboot
+from monitor.wrapper import plc
 api = plc.getAuthAPI()
 
 import traceback
-import config
-import util.file
 from optparse import OptionParser
 
-import const
 from nodecommon import *
 from nodequery import verify,query_to_dict,node_select
-import database
 from unified_model import *
 import os
 
 import time
-import parser as parsermodule
-
 from model import *
+
 import bootman                 # debug nodes
-import reboot          # down nodes without pcu
-import mailmonitor     # down nodes with pcu
+import mailmonitor     # down nodes without pcu
 from emailTxt import mailtxt
-#reboot.verbose = 0
 import sys
 
 class Reboot(object):
@@ -237,10 +236,11 @@ if config.node or config.nodelist:
        if config.node: hostnames = [ config.node ] 
        else: hostnames = util.file.getListFromFile(config.nodelist)
 
-fb = database.dbLoad("findbad")
+fbquery = FindbadNodeRecord.get_all_latest()
+fb_nodelist = [ n.hostname for n in fbquery ]
 
 if config.nodeselect:
-       hostnames = node_select(config.nodeselect, fb['nodes'].keys(), fb)
+       hostnames = node_select(config.nodeselect, fb_nodelist)
 
 if config.findbad:
        # rerun findbad with the nodes in the given nodes.
index 73a6e57..f872d7a 100755 (executable)
@@ -100,6 +100,7 @@ def init():
        
        return l_nodes
 
+
 def create_plcdb():
 
        # get sites, and stats
@@ -132,7 +133,9 @@ def create_plcdb():
                database.dbDump("l_plcsites", l_sites)
        
        return l_nodes
-       
 
 if __name__ == '__main__':
        create_plcdb()
+else:
+       print "calling plccache init()"
+       init()
index 45242ea..5049db2 100644 (file)
@@ -6,6 +6,7 @@ from unified_model import cmpCategoryVal
 import sys
 import emailTxt
 import string
+from monitor.wrapper import plccache
 
 from rt import is_host_in_rt_tickets
 import plc
@@ -56,7 +57,7 @@ class Merge:
                self.merge_list = l_merge
 
                # the hostname to loginbase mapping
-               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = plccache.plcdb_hn2lb
 
                # Previous actions taken on nodes.
                self.act_all = database.if_cached_else(1, "act_all", lambda : {})
@@ -264,7 +265,7 @@ class RT:
 class Diagnose:
        def __init__(self, record_list):
                self.record_list = record_list
-               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = plccache.plcdb_hn2lb
                self.findbad = database.if_cached_else(1, "findbad", lambda : {})
 
                self.diagnose_in = {}
@@ -845,7 +846,7 @@ def reboot_node(args):
 class Action:
        def __init__(self, diagnose_out):
                # the hostname to loginbase mapping
-               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = plccache.plcdb_hn2lb
 
                # Actions to take.
                self.diagnose_db = diagnose_out
index 57f23c0..185c385 100755 (executable)
@@ -8,11 +8,10 @@ from datetime import datetime,timedelta
 
 from nodequery import verify,query_to_dict,node_select
 
-import syncplcdb
 from nodecommon import *
 
 from monitor import config
-from monitor.wrapper import plc
+from monitor.wrapper import plc,plccache
 from monitor.const import MINUP
 from monitor.database import  FindbadNodeRecord, HistoryNodeRecord
 
@@ -25,8 +24,7 @@ count = 0
 
 def main(config):
 
-       l_nodes = syncplcdb.create_plcdb()
-       l_plcnodes = database.dbLoad("l_plcnodes")
+       l_plcnodes = plccache.l_nodes
        l_nodes = get_nodeset(config)
        
        checkAndRecordState(l_nodes, l_plcnodes)
@@ -49,7 +47,7 @@ def checkAndRecordState(l_nodes, l_plcnodes):
                try:
                        # Find the most recent record
                        noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==nodename).order_by(FindbadNodeRecord.date_checked.desc()).first()
-                       print "NODEREC: ", noderec.date_checked
+                       #print "NODEREC: ", noderec.date_checked
                except:
                        print "COULD NOT FIND %s" % nodename
                        import traceback
index 334bc3e..8e3d5a0 100644 (file)
@@ -5,7 +5,7 @@ from monitor.pcu import reboot
 
 from monitor import util
 from monitor import database
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
 
 from datetime import datetime 
 from unified_model import PersistFlags
@@ -34,8 +34,8 @@ def blue(str):
        return BLUE + str + NORMAL
 
 def get_current_state(fbnode):
-       if 'state' in fbnode:
-               state = fbnode['state']
+       if 'observed_status' in fbnode:
+               state = fbnode['observed_status']
        else:
                state = "none"
        l = state.lower()
@@ -122,40 +122,36 @@ def getvalue(fb, path):
             return None
     return values
 
-def nodegroup_display(node, fb, conf=None):
-       if node['hostname'] in fb['nodes']:
-               node['current'] = get_current_state(fb['nodes'][node['hostname']]['values'])
-       else:
-               node['current'] = 'none'
-
-       if fb['nodes'][node['hostname']]['values'] == []:
-               return ""
+def nodegroup_display(node, fbdata, conf=None):
+       node['current'] = get_current_state(fbdata)
 
-       s = fb['nodes'][node['hostname']]['values']['kernel'].split()
+       s = fbdata['kernel_version'].split()
        if len(s) >=3:
-               node['kernel'] = s[2]
+               node['kernel_version'] = s[2]
        else:
-               node['kernel'] = fb['nodes'][node['hostname']]['values']['kernel']
+               node['kernel_version'] = fbdata['kernel_version']
                
-       if '2.6' not in node['kernel']: node['kernel'] = ""
+       if '2.6' not in node['kernel_version']: node['kernel_version'] = ""
        if conf and not conf.nocolor:
            node['boot_state']  = color_boot_state(node['boot_state'])
            node['current']     = color_boot_state(node['current'])
-       #node['boot_state']     = node['boot_state']
-       #node['current']        = node['current']
-       node['pcu'] = fb['nodes'][node['hostname']]['values']['pcu']
+
+       if type(fbdata['plc_node_stats']['pcu_ids']) == type([]):
+               node['pcu'] = "PCU"
        node['lastupdate'] = diff_time(node['last_contact'])
+
        pf = PersistFlags(node['hostname'], 1, db='node_persistflags')
        try:
                node['lc'] = diff_time(pf.last_changed)
        except:
                node['lc'] = "err"
-       ut = fb['nodes'][node['hostname']]['values']['comonstats']['uptime']
+
+       ut = fbdata['comon_stats']['uptime']
        if ut != "null":
-               ut = diff_time(float(fb['nodes'][node['hostname']]['values']['comonstats']['uptime']), False)
+               ut = diff_time(float(fbdata['comon_stats']['uptime']), False)
        node['uptime'] = ut
 
-       return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node
+       return "%(hostname)-42s %(boot_state)8s %(current)5s %(pcu)6s %(key)10.10s... %(kernel_version)35.35s %(lastupdate)12s, %(lc)s, %(uptime)s" % node
 
 def datetime_fromstr(str):
        if '-' in str:
@@ -176,7 +172,7 @@ def get_nodeset(config):
                evaluates to.
        """
        api = plc.getAuthAPI()
-       l_nodes = database.dbLoad("l_plcnodes")
+       l_nodes = plccache.l_nodes
 
        if config.nodelist:
                f_nodes = util.file.getListFromFile(config.nodelist)
@@ -196,8 +192,9 @@ def get_nodeset(config):
        # perform this query after the above options, so that the filter above
        # does not break.
        if config.nodeselect:
-               fb = database.dbLoad("findbad")
-               l_nodes = node_select(config.nodeselect, fb['nodes'].keys(), fb)
+               fbquery = FindbadNodeRecord.get_all_latest()
+               node_list = [ n.hostname for n in fbquery ]
+               l_nodes = node_select(config.nodeselect, node_list, None)
 
        return l_nodes
        
index 2327ec0..b205900 100755 (executable)
@@ -1,14 +1,15 @@
 #!/usr/bin/python
 
 
-import plc
+from monitor.wrapper import plc
 api = plc.getAuthAPI()
 
-import parser as parsermodule
+from monitor import parser as parsermodule
 from sets import Set
 
 from nodecommon import *
-import database
+from monitor import database
+from monitor.database import FindbadNodeRecord
 
 def network_config_to_str(net):
 
@@ -21,7 +22,6 @@ def network_config_to_str(net):
        
 
 def main():
-       fb = database.dbLoad("findbad")
 
        parser = parsermodule.getParser()
        parser.set_defaults(nodelist=None,
@@ -67,7 +67,8 @@ def main():
                        i = 1
                        for node in nodelist:
                                print "%-2d" % i, 
-                               print nodegroup_display(node, fb)
+                               fbdata = FindbadNodeRecord.get_latest_by(hostname=node['hostname'])
+                               print nodegroup_display(node, fbdata.to_dict())
                                i += 1
 
                elif config.add and config.nodegroup:
index 3f4b980..9e14e2f 100755 (executable)
 # Given a nodelist, it could tag each one with a nodegroup name.
 #  * 
 
-import plc
+from monitor import database
+from monitor.database import FindbadNodeRecord
+from monitor import util
+from monitor.wrapper import plc
+from monitor import parser as parsermodule
+
 api = plc.getAuthAPI()
 
-import parser as parsermodule
-from sets import Set
 from nodequery import verify,query_to_dict,node_select
-
 from nodecommon import *
-import database
-import util.file
+from sets import Set
 
 def main():
-       fb = database.dbLoad("findbad")
 
        parser = parsermodule.getParser(['nodesets'])
        parser.set_defaults( list=True,
@@ -121,7 +121,9 @@ def main():
                i = 1
                for node in nodelist:
                        print "%-2d" % i, 
-                       print nodegroup_display(node, fb, config)
+                       fbrec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first()
+                       fbdata = fbrec.to_dict()
+                       print nodegroup_display(node, fbdata, config)
                        i += 1
 
        else:
index fee8eb3..4a946c5 100755 (executable)
@@ -1,21 +1,20 @@
 #!/usr/bin/python
 
-import plc
+from monitor.wrapper import plc
 api = plc.getAuthAPI()
 
 from monitor import *
-#import database
-import reboot
+from monitor import util
+from monitor import parser as parsermodule
+
+from monitor import database
+from monitor.pcu import reboot
 
 import time
 from model import *
 from nodecommon import *
 from unified_model import node_end_record, PersistFlags
 
-import util.file
-
-import parser as parsermodule
-
 parser = parsermodule.getParser()
 parser.set_defaults(node=None, 
                                        findbad=False,
@@ -138,11 +137,11 @@ if config.findbad:
 for node in config.args:
        config.node = node
 
-       fb = database.dbLoad("findbad")
        plc_nodeinfo = api.GetNodes({'hostname': config.node}, None)[0]
-       fb_nodeinfo  = fb['nodes'][config.node]['values']
-
+       fb_noderec = FindbadNodeRecord.get_latest_by(hostname=node) 
+       fb_nodeinfo = fb_noderec.to_dict()
        plc_print_nodeinfo(plc_nodeinfo)
+
        fb_nodeinfo['hostname'] = node
        fb_print_nodeinfo(fb_nodeinfo)
 
index 5e182e1..71c62bc 100755 (executable)
@@ -14,15 +14,13 @@ import re
 import string
 
 from monitor.pcu import reboot
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
 api = plc.getAuthAPI()
 
-from monitor.database import FindbadNodeRecord, FindbadNodeRecordSync
+from monitor.database import FindbadNodeRecord, FindbadPCURecord
 from monitor import util
 from monitor import config
 
-fb = None
-fbpcu = None
 
 class NoKeyException(Exception): pass
 
@@ -69,8 +67,12 @@ def fb_print_nodeinfo(fbnode, hostname, fields=None):
                        format += "%%(%s)s " % f
                print format % fbnode
 
+def first(path):
+       indexes = path.split(".")
+       return indexes[0]
+       
 def get(fb, path):
-    indexes = path.split("/")
+    indexes = path.split(".")
     values = fb
     for index in indexes:
         if index in values:
@@ -216,19 +218,18 @@ def verify(constraints, data):
 
                for key in con.keys():
                        #print "looking at key: %s" % key
-                       if key in data: 
+                       if first(key) in data: 
                                value_re = re.compile(con[key])
-                               if type([]) == type(data[key]):
+                               if type([]) == type(get(data,key)):
                                        local_or_true = False
-                                       for val in data[key]:
+                                       for val in get(data,key):
                                                local_or_true = local_or_true | (value_re.search(val) is not None)
                                        con_and_true = con_and_true & local_or_true
                                else:
-                                       if data[key] is not None:
-                                               con_and_true = con_and_true & (value_re.search(data[key]) is not None)
-                       elif key not in data:
-                               print "missing key %s" % key,
-                               pass
+                                       if get(data,key) is not None:
+                                               con_and_true = con_and_true & (value_re.search(get(data,key)) is not None)
+                       elif first(key) not in data:
+                               print "missing key %s" % first(key)
 
                con_or_true = con_or_true | con_and_true
 
@@ -260,38 +261,35 @@ def pcu_in(fbdata):
        return False
 
 def pcu_select(str_query, nodelist=None):
-       global fb
-       global fbpcu
        pcunames = []
        nodenames = []
        if str_query is None: return (nodenames, pcunames)
 
-       if fb is None:
-               fb = database.dbLoad("findbad")
-       if fbpcu is None:
-               fbpcu = database.dbLoad("findbadpcus")
+       if True:
+               fbquery = FindbadNodeRecord.get_all_latest()
+               fb_nodelist = [ n.hostname for n in fbquery ]
+       if True:
+               fbpcuquery = FindbadPCURecord.get_all_latest()
+               fbpcu_list = [ p.plc_pcuid for p in fbpcuquery ]
 
-       #print str_query
        dict_query = query_to_dict(str_query)
-       #print dict_query
 
-       for node in fb['nodes'].keys():
+       for noderec in fbquery:
                if nodelist is not None: 
-                       if node not in nodelist: continue
+                       if noderec.hostname not in nodelist: continue
        
-               fb_nodeinfo  = fb['nodes'][node]['values']
+               fb_nodeinfo  = noderec.to_dict()
                if pcu_in(fb_nodeinfo):
-                       pcuinfo = fbpcu['nodes']['id_%s' % fb_nodeinfo['plcnode']['pcu_ids'][0]]['values']
+                       pcurec = FindbadPCURecord.get_latest_by(plc_pcuid=get(fb_nodeinfo, 'plc_node_stats.pcu_ids')[0])
+                       pcuinfo = pcurec.to_dict()
                        if verify(dict_query, pcuinfo):
-                               nodenames.append(node)
+                               nodenames.append(noderec.hostname)
                                str = "cmdhttps/locfg.pl -s %s -f iloxml/License.xml -u %s -p '%s' | grep MESSAGE" % \
                                                        (reboot.pcu_name(pcuinfo), pcuinfo['username'], pcuinfo['password'])
-                               #pcunames.append(str)
-                               pcunames.append(pcuinfo['pcu_id'])
+                               pcunames.append(pcuinfo['plc_pcuid'])
        return (nodenames, pcunames)
 
-def node_select(str_query, nodelist=None, fbdb=None):
-       global fb
+def node_select(str_query, nodelist=None, fb=None):
 
        hostnames = []
        if str_query is None: return hostnames
@@ -300,16 +298,14 @@ def node_select(str_query, nodelist=None, fbdb=None):
        dict_query = query_to_dict(str_query)
        #print dict_query
 
-       if fbdb is not None:
-               fb = fbdb
-
        for node in nodelist:
                #if nodelist is not None: 
                #       if node not in nodelist: continue
 
                try:
                        fb_noderec = None
-                       fb_noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node).order_by(FindbadNodeRecord.date_checked.desc()).first()
+                       #fb_noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node).order_by(FindbadNodeRecord.date_checked.desc()).first()
+                       fb_noderec = FindbadNodeRecord.get_latest_by(hostname=node)
                except:
                        print traceback.print_exc()
                        continue
@@ -323,6 +319,7 @@ def node_select(str_query, nodelist=None, fbdb=None):
 
                        #if verifyDBrecord(dict_query, fb_nodeinfo):
                        if verify(dict_query, fb_nodeinfo):
+                               #print fb_nodeinfo.keys()
                                #print node #fb_nodeinfo
                                hostnames.append(node)
                        else:
@@ -333,13 +330,11 @@ def node_select(str_query, nodelist=None, fbdb=None):
 
 
 def main():
-       global fb
-       global fbpcu
 
        from monitor import parser as parsermodule
        parser = parsermodule.getParser()
 
-       parser.set_defaults(node=None, fromtime=None, select=None, list=None, 
+       parser.set_defaults(node=None, fromtime=None, select=None, list=None, listkeys=False,
                                                pcuselect=None, nodelist=None, daysdown=None, fields=None)
        parser.add_option("", "--daysdown", dest="daysdown", action="store_true",
                                                help="List the node state and days down...")
@@ -353,6 +348,8 @@ def main():
                                                help="List all nodes with the given key=value pattern")
        parser.add_option("", "--nodelist", dest="nodelist", metavar="nodelist.txt", 
                                                help="A list of nodes to bring out of debug mode.")
+       parser.add_option("", "--listkeys", dest="listkeys", action="store_true",
+                                               help="A list of nodes to bring out of debug mode.")
        parser.add_option("", "--fromtime", dest="fromtime", metavar="YYYY-MM-DD",
                                        help="Specify a starting date from which to begin the query.")
 
@@ -372,18 +369,16 @@ def main():
                fb = archive.load(file[:-4])
        else:
                #fbnodes = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname, orderBy='date_checked',distinct=True).reversed()
-               #fb = database.dbLoad("findbad")
                fb = None
 
-       fbpcu = database.dbLoad("findbadpcus")
-       reboot.fb = fbpcu
+       #reboot.fb = fbpcu
 
        if config.nodelist:
                nodelist = util.file.getListFromFile(config.nodelist)
        else:
                # NOTE: list of nodes should come from findbad db.   Otherwise, we
                # don't know for sure that there's a record in the db..
-               plcnodes = database.dbLoad("l_plcnodes")
+               plcnodes = plccache.l_nodes
                nodelist = [ node['hostname'] for node in plcnodes ]
                #nodelist = ['planetlab-1.cs.princeton.edu']
 
@@ -411,7 +406,15 @@ def main():
                        fb_noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node).order_by(FindbadNodeRecord.date_checked.desc()).first()
                except:
                        print traceback.print_exc()
-                       pass #fb_nodeinfo  = fb['nodes'][node]['values']
+                       pass
+
+               if config.listkeys:
+                       fb_nodeinfo = fb_noderec.to_dict()
+                       print "Primary keys available in the findbad object:"
+                       for key in fb_nodeinfo.keys():
+                               print "\t",key
+                       sys.exit(0)
+                       
 
                if config.list:
                        print node
index 1fd3371..6a1098b 100755 (executable)
--- a/pcubad.py
+++ b/pcubad.py
@@ -11,19 +11,19 @@ from monitor.pcu import reboot
 from monitor import parser as parsermodule
 from monitor import config
 from monitor.database import HistoryPCURecord, FindbadPCURecord
-from monitor.wrapper import plc
+from monitor.wrapper import plc,plccache
 from monitor.const import MINUP
 
 from nodecommon import *
 from nodequery import verify,query_to_dict,node_select
-import syncplcdb
 from unified_model import *
 
 api = plc.getAuthAPI()
 
 def main(config):
 
-       l_plcpcus = database.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
+       #l_plcpcus = database.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
+       l_plcpcus = plccache.l_pcus 
 
        l_pcus = None
        if config.pcu:
@@ -39,7 +39,7 @@ def main(config):
        
        checkAndRecordState(l_pcus, l_plcpcus)
 
-hn2lb = database.dbLoad("plcdb_hn2lb")
+hn2lb = plccache.plcdb_hn2lb
 
 def checkAndRecordState(l_pcus, l_plcpcus):
        count = 0
index d6d5e87..c9d1e90 100755 (executable)
@@ -30,10 +30,10 @@ if not config.run:
        print "Add --run to actually perform the command"
        sys.exit(1)
 
-pculist = database.if_cached_else_refresh(1, 
-                                                       config.refresh, 
-                                                       "pculist", 
-                                                       lambda : plc.GetPCUs())
+pculist = plccache.l_pcus # database.if_cached_else_refresh(1, 
+                                                 #     config.refresh, 
+                                                 #     "pculist", 
+                                                 #     lambda : plc.GetPCUs())
 for pcu in pculist:
        #print pcu
        #sys.exit(1)
index f064c11..cae8480 100755 (executable)
@@ -6,9 +6,7 @@ import parser as parsermodule
 from www.printbadnodes import *
 
 def main():
-       global fb
        db = database.dbLoad(config.dbname)
-       fb = database.dbLoad("findbadpcus")
        act= database.dbLoad("act_all")
 
        ## Field widths used for printing
index af01bd7..a556953 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 
-import plc
+from monitor.wrapper import plc, plccache
 api = plc.getAuthAPI()
 
 import sys
@@ -86,9 +86,9 @@ def main():
        fbstr = get_filefromglob(d, "production.findbad")
        fbpcustr = get_filefromglob(d, "production.findbadpcus")
 
-       l_plcnodes = database.dbLoad("l_plcnodes")
-       l_plcsites = database.dbLoad("l_plcsites")
-       lb2hn = database.dbLoad("plcdb_lb2hn")
+       l_plcnodes = plccache.l_nodes
+       l_plcsites = plccache.l_sites
+       lb2hn = plccache.plcdb_lb2hn
        fb = archive.load(fbstr) 
        fbpcu = archive.load(fbpcustr)
        reboot.fb = fbpcu
index 48ac79c..aff0444 100755 (executable)
@@ -11,20 +11,19 @@ from monitor.pcu import reboot
 from monitor import parser as parsermodule
 from monitor import config
 from monitor.database import HistorySiteRecord, FindbadNodeRecord
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
 from monitor.const import MINUP
 
 from nodecommon import *
 from nodequery import verify,query_to_dict,node_select
-import syncplcdb
 from unified_model import *
 
 api = plc.getAuthAPI()
 
 def main(config):
 
-       l_nodes = syncplcdb.create_plcdb()
-       l_plcsites = database.dbLoad("l_plcsites")
+       l_nodes = plccache.l_nodes
+       l_plcsites = plccache.l_sites
 
        if config.site:
                l_sites = [config.site]
@@ -49,7 +48,7 @@ def getnodesup(nodelist):
 
 def checkAndRecordState(l_sites, l_plcsites):
        count = 0
-       lb2hn = database.dbLoad("plcdb_lb2hn")
+       lb2hn = plccache.plcdb_lb2hn
        for sitename in l_sites:
                d_site = None
                for site in l_plcsites:
index e9dc9d5..041bf1c 100755 (executable)
@@ -1,18 +1,18 @@
 #!/usr/bin/python
 
-import plc
+from monitor.wrapper import plc
 api = plc.getAuthAPI()
 
-import database
-import reboot
+from monitor import database
+from monitor.pcu import reboot
 
 import time
 from model import *
 from nodecommon import *
 
-import util.file
-
-import parser as parsermodule
+from monitor import util
+from monitor import parser as parsermodule
+from unified_model import *
 
 
 parser = parsermodule.getParser()
@@ -31,7 +31,6 @@ parser.add_option("", "--disable", dest="disable", action="store_true",
                                        help="")
 config = parsermodule.parse_args(parser)
 
-from unified_model import *
 def color_sitestatus(status):
        if status == "good":
                return green(status)
@@ -69,7 +68,7 @@ def plc_print_siteinfo(plcsite):
        print "   Checked: %s" % time.ctime()
        print "\t                               host     | state | obs   |   created   |   updated   | last_contact "
        for plcnode in nodes:
-               fbnode = fb['nodes'][plcnode['hostname']]['values']
+               fbnode = FindbadNodeRecord.get_latest_by(hostname=plcnode['hostname']).to_dict()
                plcnode['state'] = color_boot_state(get_current_state(fbnode))
                print "\t  %37s |  %5s |  %5s | %11.11s | %11.11s | %12s " % \
                (plcnode['hostname'], color_boot_state(plcnode['boot_state']), plcnode['state'], 
@@ -77,7 +76,6 @@ def plc_print_siteinfo(plcsite):
                diff_time(plcnode['last_contact']))
 
 
-fb = database.dbLoad("findbad")
 act_all = database.dbLoad("act_all")
 
 for site in config.args:
diff --git a/todo b/todo
index 98ace66..b3dc4de 100644 (file)
--- a/todo
+++ b/todo
@@ -1,4 +1,60 @@
 
+for each node:
+       Check Status ->
+               if Pass Threshold -> 
+                       Create Issue -> 
+                               Take Action -> 
+                                       email
+                                       bm
+                                       pcu
+                                       plc reset
+                                       apply penalties
+                                       flag for admin
+
+for each issue
+       check issue.status
+       if issue.status is "open": 
+               issue.take_next_action()
+       if issue.closed:
+               issue.shutdown()
+       if issue.paused:
+               pass
+
+action_list for issuetype (pcudown)
+       send email
+               yield
+       send email, apply penalty
+               yield
+       send email, apply second penalty
+               yield
+       send email
+
+action_list for issuetype (badhardware)
+action_list for issuetype (dnserror)
+action_list for issuetype (nodeconfig)
+action_list for issuetype (oldbootcd)
+
+action_list for issuetype (nodedown)
+       if pcuok, reboot
+               yield
+       if pcuok, and reboot failed, set rins, reboot
+               yield
+       create_issue pcubroken
+       send email
+               yield
+       send email, apply penalty
+               yield
+       send email, apppy second penalty
+               yield
+       send email
+       
+
+TOOLS:
+  * add a '--nocache'  to the default set of options.
+  * add a cache parameter in the monitor.conf file.
+
+
+
 TODO:
  * install openssh-server, passwd, perl-libwww-perl (for rt), rt-3.4.1,  MySQL-python
        * had to mount -t devpts devpts /dev/pts to get ssh to work inside the
index 31b0ef6..805dd0e 100755 (executable)
@@ -2,7 +2,7 @@
 
 from monitor import database
 
-from monitor.wrapper import plc
+from monitor.wrapper import plc, plccache
 from monitor.wrapper import mailer
 import time
 
@@ -65,8 +65,6 @@ class PenaltyMap:
        #       condition/penalty is applied, move to the next phase.
 
 
-#fb = database.dbLoad("findbad")
-
 class RT(object):
        def __init__(self, ticket_id = None):
                self.ticket_id = ticket_id
@@ -410,7 +408,7 @@ class Record(object):
        def __init__(self, hostname, data):
                self.hostname = hostname
                self.data = data
-               self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+               self.plcdb_hn2lb = plccache.plcdb_hn2lb
                self.loginbase = self.plcdb_hn2lb[self.hostname]
                return
 
@@ -490,15 +488,27 @@ class Record(object):
                return daysdown
        getStrDaysDown = classmethod(getStrDaysDown)
 
-       #def getStrDaysDown(cls, diag_record):
-       #       daysdown = cls.getDaysDown(diag_record)
-       #       if daysdown > 0:
-       #               return "%d days down"%daysdown
-       #       elif daysdown == -1:
-       #               return "Never online"
-       #       else:
-       #               return "%d days up"% -daysdown
-       #getStrDaysDown = classmethod(getStrDaysDown)
+       def getSendEmailFlag(self):
+               if not config.mail:
+                       return False
+
+               # resend if open & created longer than 30 days ago.
+               if  'rt' in self.data and \
+                       'Status' in self.data['rt'] and \
+                       "open" in self.data['rt']['Status'] and \
+                       self.data['rt']['Created'] > int(time.time() - 60*60*24*30):
+                       # if created-time is greater than the thirty days ago from the current time
+                       return False
+
+               return True
+
+       def getMostRecentStage(self):
+               lastact = self.data['last_action_record']
+               return lastact.stage
+
+       def getMostRecentTime(self):
+               lastact = self.data['last_action_record']
+               return lastact.date_action_taken
 
        def takeAction(self, index=0):
                pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
@@ -524,7 +534,7 @@ class Record(object):
                        hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
                return hlist
        def saveAction(self):
-               if 'save-act-all' in self.data and self.data['save-act-all'] == True:
+               if 'save_act_all' in self.data and self.data['save_act_all'] == True:
                        return True
                else:
                        return False
@@ -579,79 +589,6 @@ class NodeRecord:
                self.hostname = hostname
                self.ticket = None
                self.target = target
-               #if hostname in fb['nodes']:
-               #       self.data = fb['nodes'][hostname]['values']
-               #else:
-               #       raise Exception("Hostname not in scan database")
-
-       def stageIswaitforever(self):
-               if 'waitforever' in self.data['stage']:
-                       return True
-               else:
-                       return False
-
-       def severity(self):
-               category = self.data['category']
-               prev_category = self.data['prev_category']
-               print "IMPROVED: ", category, prev_category
-               val = cmpCategoryVal(category, prev_category)
-               return val 
-
-       def improved(self):
-               return self.severity() > 0
-       
-       def end_record(self):
-               return node_end_record(self.hostname)
-
-       def reset_stage(self):
-               self.data['stage'] = 'findbad'
-               return True
-
-       def open_tickets(self):
-               if self.ticket and self.ticket.status['status'] == 'open':
-                       return 1
-               return 0
-       def setIntrospect(self):
-               pass
-
-       def email_notice(self):
-               message = self._get_message_for_condition()
-               message.send(self._get_contacts_for_condition())
-               return True
-       def close_ticket(self):
-               if self.ticket:
-                       self.ticket.closeTicket()
-
-       def exempt_from_penalties(self):
-               bl = database.dbLoad("l_blacklist")
-               return self.hostname in bl
-
-       def penalties(self):
-               return []
-       def escellate_penalty(self):
-               return True
-       def reduce_penalty(self):
-               return True
-
-
-       def atTarget(self):
-               return self.target.verify(self.data)
-
-       def _get_condition(self):
-               return self.data['category'].lower()
-
-       def _get_stage(self):
-               "improvement"
-               "firstnotice_noop"
-               "secondnotice_noslicecreation"
-               "thirdnotice_disableslices"
-
-               delta = current_time - self.data['time']
-
-       def _get_message_for_condition(self):
-               pass
-       def _get_contacts_for_condition(self):
-               pass
 
 class Action(MonRecord):
        def __init__(self, host, data):