X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=nodebad.py;h=5543f903d5fa15c2795ee6826be0d90e6ce55d41;hb=da913fbd1629fc4669b186915df8ff3a340482d3;hp=0130c3e642e336ee5cbbf5c660c3225ae7af0783;hpb=6d46ab9b534b60675a3dcb11fcb664589a3691f8;p=monitor.git diff --git a/nodebad.py b/nodebad.py index 0130c3e..5543f90 100755 --- a/nodebad.py +++ b/nodebad.py @@ -4,144 +4,89 @@ import os import sys import string import time +from datetime import datetime,timedelta - -import database -import comon -import threadpool -import syncplcdb from nodequery import verify,query_to_dict,node_select + from nodecommon import * -import plc +from monitor import config +from monitor.wrapper import plc,plccache +from monitor.const import MINUP +from monitor.database import FindbadNodeRecord, HistoryNodeRecord + +from monitor.model import * + api = plc.getAuthAPI() -from unified_model import * -from const import MINUP round = 1 -externalState = {'round': round, 'nodes': {}} count = 0 def main(config): - global externalState - externalState = database.if_cached_else(1, config.dbname, lambda : externalState) - if config.increment: - # update global round number to force refreshes across all nodes - externalState['round'] += 1 - - l_nodes = syncplcdb.create_plcdb() - l_plcnodes = database.dbLoad("l_plcnodes") + l_plcnodes = plccache.l_nodes l_nodes = get_nodeset(config) - #if config.node: - # l_nodes = [config.node] - ##else: - # l_nodes = [node['hostname'] for node in l_plcnodes] checkAndRecordState(l_nodes, l_plcnodes) def checkAndRecordState(l_nodes, l_plcnodes): - global externalState global count - global_round = externalState['round'] for nodename in l_nodes: - if nodename not in externalState['nodes']: - externalState['nodes'][nodename] = {'round': 0, 'values': []} - - node_round = externalState['nodes'][nodename]['round'] - if node_round < global_round: - # do work - values = collectStatusAndState(nodename, l_plcnodes) - global_round = externalState['round'] - externalState['nodes'][nodename]['values'] = values - externalState['nodes'][nodename]['round'] = global_round + d_node = None + for node in l_plcnodes: + if node['hostname'] == nodename: + d_node = node + break + if not d_node: + continue + + pf = HistoryNodeRecord.findby_or_create(hostname=nodename) + pf.last_checked = datetime.now() + + try: + # Find the most recent record + noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==nodename).order_by(FindbadNodeRecord.date_checked.desc()).first() + #print "NODEREC: ", noderec.date_checked + except: + print "COULD NOT FIND %s" % nodename + import traceback + print traceback.print_exc() + continue + + node_state = noderec.observed_status + if noderec.plc_node_stats: + boot_state = noderec.plc_node_stats['boot_state'] else: - count += 1 - - if count % 20 == 0: - database.dbDump(config.dbname, externalState) - - database.dbDump(config.dbname, externalState) - -fb = database.dbLoad('findbad') - -def getnodesup(nodelist): - up = 0 - for node in nodelist: - if node['hostname'] in fb['nodes'].keys(): - try: - if fb['nodes'][node['hostname']]['values']['state'] == "BOOT": - up = up + 1 - except: - pass - return up - -def get(fb, path): - indexes = path.split("/") - values = fb - for index in indexes: - if index in values: - values = values[index] + boot_state = "unknown" + + if node_state == "BOOT": + if pf.status != "good": + pf.last_changed = datetime.now() + pf.status = "good" + elif node_state == "DEBUG": + if pf.status != boot_state: + pf.last_changed = datetime.now() + pf.status = boot_state else: - return None - return values + if pf.status != "down": + pf.last_changed = datetime.now() + pf.status = "down" -def collectStatusAndState(nodename, l_plcnodes): - global count + count += 1 + print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(time.mktime(pf.last_changed.timetuple()))) - d_node = None - for node in l_plcnodes: - if node['hostname'] == nodename: - d_node = node - break - if not d_node: - return None - - pf = PersistFlags(nodename, 1, db='node_persistflags') - - if not pf.checkattr('last_changed'): - pf.last_changed = time.time() - - pf.last_checked = time.time() - - if not pf.checkattr('status'): - pf.status = "unknown" - - state_path = "nodes/" + nodename + "/values/state" - bootstate_path = "nodes/" + nodename + "/values/plcnode/boot_state" - - if get(fb, state_path) == "BOOT": - if pf.status != "good": pf.last_changed = time.time() - pf.status = "good" - elif get(fb, state_path) == "DEBUG": - bs = get(fb, bootstate_path) - if pf.status != bs: pf.last_changed = time.time() - pf.status = bs - else: - if pf.status != "down": pf.last_changed = time.time() - pf.status = "down" - - count += 1 - print "%d %35s %s since(%s)" % (count, nodename, pf.status, diff_time(pf.last_changed)) - # updated by other modules - #pf.enabled = - #pf.suspended = - - pf.save() + # NOTE: this commits all pending operations to the DB. Do not remove, or + # replace with another operations that also commits all pending ops, such + # as session.commit() or flush() or something + print HistoryNodeRecord.query.count() return True if __name__ == '__main__': - import parser as parsermodule + from monitor import parser as parsermodule parser = parsermodule.getParser(['nodesets']) - parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, - increment=False, dbname="nodebad", cachenodes=False) - - parser.add_option("", "--dbname", dest="dbname", metavar="FILE", - help="Specify the name of the database to which the information is saved") - parser.add_option("-i", "--increment", action="store_true", dest="increment", - help="Increment round number to force refresh or retry") + parser.set_defaults(filename=None, node=None, nodeselect=False, nodegroup=None, cachenodes=False) parser = parsermodule.getParser(['defaults'], parser) config = parsermodule.parse_args(parser) @@ -151,6 +96,4 @@ if __name__ == '__main__': import traceback print traceback.print_exc() print "Exception: %s" % err - print "Saving data... exitting." - database.dbDump(config.dbname, externalState) sys.exit(0)