import sys
import string
import time
-
-from reboot import pcu_name
-
-import database
-import comon
-import threadpool
-import syncplcdb
+import sets
+from datetime import datetime,timedelta
+
+from monitor import database
+from monitor import reboot
+from monitor import parser as parsermodule
+from monitor import config
+from monitor.database.info.model import HistoryPCURecord, FindbadPCURecord
+from monitor.database.dborm import mon_session as session
+from monitor.wrapper import plc,plccache
+from monitor.const import MINUP
+
+from monitor.common import *
from nodequery import verify,query_to_dict,node_select
+from monitor.model import *
-import plc
api = plc.getAuthAPI()
-from unified_model import *
-from monitor_policy import MINUP
-round = 1
-externalState = {'round': round, 'nodes': {}}
-count = 0
+def main():
+ main2(config)
+
+def main2(config):
+
+ l_plcpcus = plccache.l_pcus
-def main(config):
- global externalState
- externalState = database.if_cached_else(1, config.dbname, lambda : externalState)
- if config.increment:
- # update global round number to force refreshes across all pcus
- externalState['round'] += 1
+ l_pcus = None
+ if config.site is not None:
+ site = plccache.GetSitesByName([config.site])
+ l_nodes = plccache.GetNodesByIds(site[0]['node_ids'])
+ pcus = []
+ for node in l_nodes:
+ pcus += node['pcu_ids']
+ # clear out dups.
+ l_pcus = [pcu for pcu in sets.Set(pcus)]
- l_plcpcus = database.if_cached_else_refresh(1, 1, "pculist", lambda : plc.GetPCUs())
+ elif config.node:
+ node = plccache.GetNodeByName(config.node)
+ pcus = node['pcu_ids']
+ # clear out dups.
+ l_pcus = [pcu for pcu in sets.Set(pcus)]
- l_pcu = None
- if config.pcu:
+ elif config.pcu:
for pcu in l_plcpcus:
- if pcu['hostname'] == config.pcu or pcu['ip'] == config.pcu:
+ if ( pcu['hostname'] is not None and config.pcu in pcu['hostname'] ) or \
+ ( pcu['ip'] is not None and config.pcu in pcu['ip'] ):
l_pcus = [pcu['pcu_id']]
- if not l_pcu:
+ if not l_pcus:
print "ERROR: could not find pcu %s" % config.pcu
sys.exit(1)
else:
checkAndRecordState(l_pcus, l_plcpcus)
-def checkAndRecordState(l_pcus, l_plcpcus):
- global externalState
- global count
- global_round = externalState['round']
+hn2lb = plccache.plcdb_hn2lb
- for pcuname in l_pcus:
- if pcuname not in externalState['nodes']:
- externalState['nodes'][pcuname] = {'round': 0, 'values': []}
-
- pcu_round = externalState['nodes'][pcuname]['round']
- if pcu_round < global_round:
- # do work
- values = collectStatusAndState(pcuname, l_plcpcus)
- global_round = externalState['round']
- externalState['nodes'][pcuname]['values'] = values
- externalState['nodes'][pcuname]['round'] = global_round
- else:
- count += 1
-
- if count % 20 == 0:
- database.dbDump(config.dbname, externalState)
-
- database.dbDump(config.dbname, externalState)
-
-fbpcu = database.dbLoad('findbadpcus')
-hn2lb = database.dbLoad("plcdb_hn2lb")
-
-def get(fb, path):
- indexes = path.split("/")
- values = fb
- for index in indexes:
- if index in values:
- values = values[index]
- else:
- return None
- return values
-
-def collectStatusAndState(pcuname, l_plcpcus):
- global count
-
- d_pcu = None
- for pcu in l_plcpcus:
- if pcu['pcu_id'] == pcuname:
- d_pcu = pcu
- break
- if not d_pcu:
- return None
-
- pf = PersistFlags(pcuname, 1, db='pcu_persistflags')
-
- if not pf.checkattr('last_changed'):
- pf.last_changed = time.time()
-
- pf.last_checked = time.time()
-
- if not pf.checkattr('valid'):
- pf.valid = "unknown"
- pf.last_valid = 0
-
- if not pf.checkattr('status'):
- pf.status = "unknown"
-
- state_path = "nodes/id_" + str(pcuname) + "/values/reboot"
- bootstate_path = "nodes/id_" + str(pcuname) + "/values/plcpcu/boot_state"
-
- current_state = get(fbpcu, state_path)
- if current_state == 0:
- if pf.status != "good": pf.last_changed = time.time()
- pf.status = "good"
- elif current_state == 'NetDown':
- if pf.status != "netdown": pf.last_changed = time.time()
- pf.status = "netdown"
- elif current_state == 'Not_Run':
- if pf.status != "badconfig": pf.last_changed = time.time()
- pf.status = "badconfig"
- else:
- if pf.status != "error": pf.last_changed = time.time()
- pf.status = "error"
+def check_pcu_state(rec, pcu):
+
+ pcu_state = rec.reboot_trial_status
- count += 1
- print "%d %35s %s since(%s)" % (count, pcu_name(d_pcu), pf.status, diff_time(pf.last_changed))
- # updated by other modules
- #pf.enabled =
- #pf.suspended =
+ if ( pcu_state == 'NetDown' or pcu_state == 'Not_Run' or not ( pcu_state == 0 or pcu_state == "0" ) ) and \
+ ( pcu.status == 'online' or pcu.status == 'good' ):
+ print "changed status from %s to offline" % pcu.status
+ pcu.status = 'offline'
+ pcu.last_changed = datetime.now()
- pf.save()
+ if ( pcu_state == 0 or pcu_state == "0" ) and pcu.status not in [ 'online', 'good' ]:
+ print "changed status from %s to online" % pcu.status
+ pcu.status = 'online'
+ pcu.last_changed = datetime.now()
+
+ if pcu.status == 'online' and changed_greaterthan(pcu.last_changed, 0.5):
+ #send thank you notice, or on-line notice.
+ print "changed status from %s to good" % pcu.status
+ pcu.status = 'good'
+ # NOTE: do not reset last_changed, or you lose how long it's been up.
+
+ if pcu.status == 'offline' and changed_greaterthan(pcu.last_changed, 2):
+ # send down pcu notice
+ print "changed status from %s to down" % pcu.status
+ pcu.status = 'down'
+ pcu.last_changed = datetime.now()
+
+ if ( pcu.status == 'offline' or pcu.status == 'down' ) and changed_greaterthan(pcu.last_changed, 2*30):
+ print "changed status from %s to down" % pcu.status
+ pcu.status = 'down'
+ pcu.last_changed = datetime.now()
+
+def checkAndRecordState(l_pcus, l_plcpcus):
+ count = 0
+ for pcuname in l_pcus:
+
+ d_pcu = None
+ for pcu in l_plcpcus:
+ if pcu['pcu_id'] == pcuname:
+ d_pcu = pcu
+ break
+ if not d_pcu:
+ continue
+
+ pcuhist = HistoryPCURecord.findby_or_create(plc_pcuid=d_pcu['pcu_id'],
+ if_new_set={'status' : 'offline',
+ 'last_changed' : datetime.now()})
+ pcuhist.last_checked = datetime.now()
+
+ try:
+ # Find the most recent record
+ pcurec = FindbadPCURecord.query.filter(FindbadPCURecord.plc_pcuid==pcuname).first()
+ except:
+ print "COULD NOT FIND FB record for %s" % reboot.pcu_name(d_pcu)
+ import traceback
+ email_exception()
+ print traceback.print_exc()
+ # don't have the info to create a new entry right now, so continue.
+ continue
+
+ if not pcurec:
+ print "none object for pcu %s"% reboot.pcu_name(d_pcu)
+ continue
+
+ check_pcu_state(pcurec, pcuhist)
+
+ count += 1
+ print "%d %35s %s since(%s)" % (count, reboot.pcu_name(d_pcu), pcuhist.status, diff_time(time.mktime(pcuhist.last_changed.timetuple())))
+
+ # NOTE: this commits all pending operations to the DB. Do not remove, or
+ # replace with another operations that also commits all pending ops, such
+ # as session.commit() or flush() or something
+ session.flush()
+ print HistoryPCURecord.query.count()
return True
if __name__ == '__main__':
- from config import config
- from optparse import OptionParser
- parser = OptionParser()
- parser.set_defaults(filename=None, pcu=None, pcuselect=False, pcugroup=None,
- increment=False, dbname="pcubad", cachepcus=False)
+ parser = parsermodule.getParser()
+ parser.set_defaults(filename=None, pcu=None, node=None, site=None, pcuselect=False, pcugroup=None, cachepcus=False)
parser.add_option("", "--pcu", dest="pcu", metavar="hostname",
help="Provide a single pcu to operate on")
+ parser.add_option("", "--site", dest="site", metavar="sitename",
+ help="Provide a single sitename to operate on")
+ parser.add_option("", "--node", dest="node", metavar="nodename",
+ help="Provide a single node to operate on")
parser.add_option("", "--pculist", dest="pculist", metavar="file.list",
help="Provide a list of files to operate on")
- parser.add_option("", "--dbname", dest="dbname", metavar="FILE",
- help="Specify the name of the database to which the information is saved")
- parser.add_option("-i", "--increment", action="store_true", dest="increment",
- help="Increment round number to force refresh or retry")
- config = config(parser)
- config.parse_args()
+ config = parsermodule.parse_args(parser)
try:
- main(config)
+ main2(config)
except Exception, err:
import traceback
- print traceback.print_exc()
+ traceback.print_exc()
print "Exception: %s" % err
- print "Saving data... exitting."
- database.dbDump(config.dbname, externalState)
sys.exit(0)