X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=sitebad.py;h=2c1628aace9752ba495b12508c1792374fb00f99;hb=da913fbd1629fc4669b186915df8ff3a340482d3;hp=dc0e8a716853ed29bfb632a6b230da2eef27ee61;hpb=c3f2afdc81c6711c3825c82e2cd4970671575438;p=monitor.git diff --git a/sitebad.py b/sitebad.py index dc0e8a7..2c1628a 100755 --- a/sitebad.py +++ b/sitebad.py @@ -4,32 +4,26 @@ import os import sys import string import time +from datetime import datetime,timedelta +from monitor import database +from monitor.pcu import reboot +from monitor import parser as parsermodule +from monitor import config +from monitor.database import HistorySiteRecord, FindbadNodeRecord +from monitor.wrapper import plc, plccache +from monitor.const import MINUP -import database -import comon -import threadpool -import syncplcdb +from nodecommon import * from nodequery import verify,query_to_dict,node_select +from monitor.model import * -import plc api = plc.getAuthAPI() -from unified_model import * -from monitor_policy import MINUP - -round = 1 -externalState = {'round': round, 'sites': {}} -count = 0 def main(config): - global externalState - externalState = database.if_cached_else(1, config.dbname, lambda : externalState) - if config.increment: - # update global round number to force refreshes across all nodes - externalState['round'] += 1 - l_nodes = syncplcdb.create_plcdb() - l_plcsites = database.dbLoad("l_plcsites") + l_nodes = plccache.l_nodes + l_plcsites = plccache.l_sites if config.site: l_sites = [config.site] @@ -38,101 +32,66 @@ def main(config): checkAndRecordState(l_sites, l_plcsites) -def checkAndRecordState(l_sites, l_plcsites): - global externalState - global count - global_round = externalState['round'] - - for sitename in l_sites: - if sitename not in externalState['sites']: - externalState['sites'][sitename] = {'round': 0, 'values': []} - - site_round = externalState['sites'][sitename]['round'] - if site_round < global_round: - # do work - values = collectStatusAndState(sitename, l_plcsites) - global_round = externalState['round'] - externalState['sites'][sitename]['values'] = values - externalState['sites'][sitename]['round'] = global_round - else: - count += 1 - - if count % 20 == 0: - database.dbDump(config.dbname, externalState) - - database.dbDump(config.dbname, externalState) - -fb = database.dbLoad('findbad') -lb2hn = database.dbLoad("plcdb_lb2hn") - def getnodesup(nodelist): up = 0 for node in nodelist: - if node['hostname'] in fb['nodes'].keys(): - try: - if fb['nodes'][node['hostname']]['values']['state'] == "BOOT": - up = up + 1 - except: - pass + try: + noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first() + #noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node['hostname'], + # orderBy='date_checked').reversed()[0] + if noderec is not None and noderec.observed_status == "BOOT": + up = up + 1 + except: + import traceback + print traceback.print_exc() return up -def collectStatusAndState(sitename, l_plcsites): - global count - - d_site = None - for site in l_plcsites: - if site['login_base'] == sitename: - d_site = site - break - if not d_site: - return None - - if sitename in lb2hn: - pf = PersistFlags(sitename, 1, db='site_persistflags') - - if not pf.checkattr('last_changed'): - pf.last_changed = time.time() - - pf.last_checked = time.time() - pf.nodes_total = len(lb2hn[sitename]) - pf.slices_used = len(d_site['slice_ids']) - pf.nodes_up = getnodesup(lb2hn[sitename]) - if not pf.checkattr('status'): - pf.status = "unknown" - - if pf.nodes_up >= MINUP: - if pf.status != "good": pf.last_changed = time.time() - pf.status = "good" - else: - if pf.status != "down": pf.last_changed = time.time() - pf.status = "down" - - count += 1 - print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, - pf.nodes_total, pf.nodes_up, pf.status) - # updated by other modules - #pf.enabled = - #pf.suspended = - - pf.save() +def checkAndRecordState(l_sites, l_plcsites): + count = 0 + lb2hn = plccache.plcdb_lb2hn + for sitename in l_sites: + d_site = None + for site in l_plcsites: + if site['login_base'] == sitename: + d_site = site + break + if not d_site: + continue + + if sitename in lb2hn: + pf = HistorySiteRecord.findby_or_create(loginbase=sitename) + + pf.last_checked = datetime.now() + pf.slices_used = len(d_site['slice_ids']) + pf.nodes_total = len(lb2hn[sitename]) + pf.nodes_up = getnodesup(lb2hn[sitename]) + + if pf.nodes_up >= MINUP: + if pf.status != "good": pf.last_changed = datetime.now() + pf.status = "good" + else: + if pf.status != "down": pf.last_changed = datetime.now() + pf.status = "down" + + count += 1 + print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, + pf.nodes_total, pf.nodes_up, pf.status) + print HistorySiteRecord.query.count() return True if __name__ == '__main__': - import parser as parsermodule + from monitor import parser as parsermodule parser = parsermodule.getParser() - parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, - increment=False, dbname="sitebad", cachenodes=False) + parser.set_defaults(filename=None, node=None, site=None, + nodeselect=False, nodegroup=None, cachenodes=False) + parser.add_option("", "--site", dest="site", metavar="login_base", help="Provide a single site to operate on") parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", help="Provide a list of files to operate on") - parser.add_option("", "--dbname", dest="dbname", metavar="FILE", - help="Specify the name of the database to which the information is saved") - parser.add_option("-i", "--increment", action="store_true", dest="increment", - help="Increment round number to force refresh or retry") config = parsermodule.parse_args(parser) try: @@ -141,6 +100,4 @@ if __name__ == '__main__': import traceback print traceback.print_exc() print "Exception: %s" % err - print "Saving data... exitting." - database.dbDump(config.dbname, externalState) sys.exit(0)