moved nodequery common code to monitor/query.py
[monitor.git] / pcubad.py
index c2886f5..085389f 100755 (executable)
--- a/pcubad.py
+++ b/pcubad.py
@@ -4,41 +4,53 @@ import os
 import sys
 import string
 import time
+import sets
+from datetime import datetime,timedelta
+
+from monitor import database
+from monitor import reboot
+from monitor import parser as parsermodule
+from monitor import config
+from monitor.database.info.model import HistoryPCURecord, FindbadPCURecord
+from monitor.database.dborm import mon_session as session
+from monitor.wrapper import plc,plccache
+from monitor.const import MINUP
+
+from monitor.common import *
+from monitor.query import verify,query_to_dict,node_select
+from monitor.model import *
 
-from reboot import pcu_name
+api = plc.getAuthAPI()
 
-import database
-import comon
-import threadpool
-import syncplcdb
-from nodequery import verify,query_to_dict,node_select
-import parser as parsermodule
-from nodecommon import *
+def main():
+       main2(config)
 
-import plc
-api = plc.getAuthAPI()
-from unified_model import *
-from monitor_policy import MINUP
+def main2(config):
 
-round = 1
-externalState = {'round': round, 'nodes': {}}
-count = 0
+       l_plcpcus = plccache.l_pcus 
 
-def main(config):
-       global externalState
-       externalState = database.if_cached_else(1, config.dbname, lambda : externalState) 
-       if config.increment:
-               # update global round number to force refreshes across all pcus
-               externalState['round'] += 1
+       l_pcus = None
+       if config.site is not None:
+               site = plccache.GetSitesByName([config.site])
+               l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
+               pcus = []
+               for node in l_nodes:
+                       pcus += node['pcu_ids']
+               # clear out dups.
+               l_pcus = [pcu for pcu in sets.Set(pcus)]
 
-       l_plcpcus = database.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
+       elif config.node:
+               node = plccache.GetNodeByName(config.node)
+               pcus = node['pcu_ids']
+               # clear out dups.
+               l_pcus = [pcu for pcu in sets.Set(pcus)]
 
-       l_pcu = None
-       if config.pcu:
+       elif config.pcu:
                for pcu in l_plcpcus:
-                       if pcu['hostname'] == config.pcu  or pcu['ip'] == config.pcu:
+                       if ( pcu['hostname'] is not None and config.pcu in pcu['hostname'] ) or \
+                          ( pcu['ip'] is not None and config.pcu in pcu['ip'] ):
                                l_pcus = [pcu['pcu_id']]
-               if not l_pcu:
+               if not l_pcus:
                        print "ERROR: could not find pcu %s" % config.pcu
                        sys.exit(1)
        else:
@@ -46,116 +58,105 @@ def main(config):
        
        checkAndRecordState(l_pcus, l_plcpcus)
 
-def checkAndRecordState(l_pcus, l_plcpcus):
-       global externalState
-       global count
-       global_round = externalState['round']
+hn2lb = plccache.plcdb_hn2lb
 
-       for pcuname in l_pcus:
-               if pcuname not in externalState['nodes']:
-                       externalState['nodes'][pcuname] = {'round': 0, 'values': []}
-
-               pcu_round   = externalState['nodes'][pcuname]['round']
-               if pcu_round < global_round:
-                       # do work
-                       values = collectStatusAndState(pcuname, l_plcpcus)
-                       global_round = externalState['round']
-                       externalState['nodes'][pcuname]['values'] = values
-                       externalState['nodes'][pcuname]['round'] = global_round
-               else:
-                       count += 1
-
-               if count % 20 == 0:
-                       database.dbDump(config.dbname, externalState)
-
-       database.dbDump(config.dbname, externalState)
-
-fbpcu = database.dbLoad('findbadpcus')
-hn2lb = database.dbLoad("plcdb_hn2lb")
-
-def get(fb, path):
-       indexes = path.split("/")
-       values = fb
-       for index in indexes:
-               if index in values:
-                       values = values[index]
-               else:
-                       return None
-       return values
-
-def collectStatusAndState(pcuname, l_plcpcus):
-       global count
-
-       d_pcu = None
-       for pcu in l_plcpcus:
-               if pcu['pcu_id'] == pcuname:
-                       d_pcu = pcu
-                       break
-       if not d_pcu:
-               return None
-
-       pf = PersistFlags(pcuname, 1, db='pcu_persistflags')
-
-       if not pf.checkattr('last_changed'):
-               pf.last_changed = time.time()
-               
-       pf.last_checked = time.time()
-
-       if not pf.checkattr('valid'):
-               pf.valid = "unknown"
-               pf.last_valid = 0
-
-       if not pf.checkattr('status'):
-               pf.status = "unknown"
-
-       state_path     = "nodes/id_" + str(pcuname) + "/values/reboot"
-       bootstate_path = "nodes/id_" + str(pcuname) + "/values/plcpcu/boot_state"
-
-       current_state = get(fbpcu, state_path)
-       if current_state == 0:
-               if pf.status != "good": pf.last_changed = time.time()
-               pf.status = "good"
-       elif current_state == 'NetDown':
-               if pf.status != "netdown": pf.last_changed = time.time()
-               pf.status = "netdown"
-       elif current_state == 'Not_Run':
-               if pf.status != "badconfig": pf.last_changed = time.time()
-               pf.status = "badconfig"
-       else:
-               if pf.status != "error": pf.last_changed = time.time()
-               pf.status = "error"
+def check_pcu_state(rec, pcu):
+
+       pcu_state = rec.reboot_trial_status
+
+       # DOWN
+       if pcu_state not in [0, "0"] and pcu.status not in ['offline', 'down']:
+                       print "changed status from %s to offline" % pcu.status
+                       pcu.status = 'offline'
+                       pcu.last_changed = datetime.now()
+
+       # ONLINE
+       if pcu_state in [0, "0"] and pcu.status not in [ 'online', 'good' ]:
+               print "changed status from %s to online" % pcu.status
+               pcu.status = 'online'
+               pcu.last_changed = datetime.now()
 
-       count += 1
-       print "%d %35s %s since(%s)" % (count, pcu_name(d_pcu), pf.status, diff_time(pf.last_changed))
-       # updated by other modules
-       #pf.enabled = 
-       #pf.suspended = 
 
-       pf.save()
+       # STATE TRANSITIONS
+       if pcu.status == 'online' and changed_greaterthan(pcu.last_changed, 0.5):
+               #send thank you notice, or on-line notice.
+               print "changed status from %s to good" % pcu.status
+               pcu.status = 'good'
+               # NOTE: do not reset last_changed, or you lose how long it's been up.
+
+       if pcu.status == 'offline' and changed_greaterthan(pcu.last_changed, 2):
+               # send down pcu notice
+               print "changed status from %s to down" % pcu.status
+               pcu.status = 'down'
+
+#      if pcu.status in [ 'offline', 'down' ] and changed_greaterthan(pcu.last_changed, 2*30):
+#              print "changed status from %s to down" % pcu.status
+#              pcu.status = 'down'
+#              pcu.last_changed = datetime.now()
+
+def checkAndRecordState(l_pcus, l_plcpcus):
+       count = 0
+       for pcuname in l_pcus:
+
+               d_pcu = None
+               for pcu in l_plcpcus:
+                       if pcu['pcu_id'] == pcuname:
+                               d_pcu = pcu
+                               break
+               if not d_pcu:
+                       continue
+
+               pcuhist = HistoryPCURecord.findby_or_create(plc_pcuid=d_pcu['pcu_id'], 
+                                                                       if_new_set={'status' : 'offline', 
+                                                                                               'last_changed' : datetime.now()})
+               pcuhist.last_checked = datetime.now()
+
+               try:
+                       # Find the most recent record
+                       pcurec = FindbadPCURecord.query.filter(FindbadPCURecord.plc_pcuid==pcuname).first()
+               except:
+                       print "COULD NOT FIND FB record for %s" % reboot.pcu_name(d_pcu)
+                       import traceback
+                       email_exception()
+                       print traceback.print_exc()
+                       # don't have the info to create a new entry right now, so continue.
+                       continue 
+
+               if not pcurec:
+                       print "none object for pcu %s"% reboot.pcu_name(d_pcu)
+                       continue
+
+               check_pcu_state(pcurec, pcuhist)
+
+               count += 1
+               print "%d %35s %s since(%s)" % (count, reboot.pcu_name(d_pcu), pcuhist.status, diff_time(time.mktime(pcuhist.last_changed.timetuple())))
+
+       # NOTE: this commits all pending operations to the DB.  Do not remove, or
+       # replace with another operations that also commits all pending ops, such
+       # as session.commit() or flush() or something
+       session.flush()
+       print HistoryPCURecord.query.count()
 
        return True
 
 if __name__ == '__main__':
        parser = parsermodule.getParser()
-       parser.set_defaults(filename=None, pcu=None, pcuselect=False, pcugroup=None, 
-                                               increment=False, dbname="pcubad", cachepcus=False)
+       parser.set_defaults(filename=None, pcu=None, node=None, site=None, pcuselect=False, pcugroup=None, cachepcus=False)
        parser.add_option("", "--pcu", dest="pcu", metavar="hostname", 
                                                help="Provide a single pcu to operate on")
+       parser.add_option("", "--site", dest="site", metavar="sitename", 
+                                               help="Provide a single sitename to operate on")
+       parser.add_option("", "--node", dest="node", metavar="nodename", 
+                                               help="Provide a single node to operate on")
        parser.add_option("", "--pculist", dest="pculist", metavar="file.list", 
                                                help="Provide a list of files to operate on")
 
-       parser.add_option("", "--dbname", dest="dbname", metavar="FILE", 
-                                               help="Specify the name of the database to which the information is saved")
-       parser.add_option("-i", "--increment", action="store_true", dest="increment", 
-                                               help="Increment round number to force refresh or retry")
        config = parsermodule.parse_args(parser)
 
        try:
-               main(config)
+               main2(config)
        except Exception, err:
                import traceback
-               print traceback.print_exc()
+               traceback.print_exc()
                print "Exception: %s" % err
-               print "Saving data... exitting."
-               database.dbDump(config.dbname, externalState)
                sys.exit(0)