X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=sitebad.py;h=df4e522da22cbff5b8a1a4395762652670247493;hb=0a49240ceff10f9da64fd470ed883bb17a11c458;hp=dc0e8a716853ed29bfb632a6b230da2eef27ee61;hpb=c3f2afdc81c6711c3825c82e2cd4970671575438;p=monitor.git diff --git a/sitebad.py b/sitebad.py index dc0e8a7..df4e522 100755 --- a/sitebad.py +++ b/sitebad.py @@ -4,143 +4,156 @@ import os import sys import string import time +from datetime import datetime,timedelta +from monitor import database +from monitor import parser as parsermodule +from monitor import config +from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session, BlacklistRecord +from monitor.wrapper import plc, plccache +from monitor.const import MINUP -import database -import comon -import threadpool -import syncplcdb +from monitor.common import * from nodequery import verify,query_to_dict,node_select +from monitor.model import * -import plc api = plc.getAuthAPI() -from unified_model import * -from monitor_policy import MINUP +def main(): + main2(config) -round = 1 -externalState = {'round': round, 'sites': {}} -count = 0 +def main2(config): -def main(config): - global externalState - externalState = database.if_cached_else(1, config.dbname, lambda : externalState) - if config.increment: - # update global round number to force refreshes across all nodes - externalState['round'] += 1 - - l_nodes = syncplcdb.create_plcdb() - l_plcsites = database.dbLoad("l_plcsites") + l_nodes = plccache.l_nodes + l_plcsites = plccache.l_sites if config.site: l_sites = [config.site] + elif config.node: + l_sites = [plccache.plcdb_hn2lb[config.node]] + elif config.sitelist: + site_list = config.sitelist.split(',') + l_sites = site_list else: l_sites = [site['login_base'] for site in l_plcsites] checkAndRecordState(l_sites, l_plcsites) -def checkAndRecordState(l_sites, l_plcsites): - global externalState - global count - global_round = externalState['round'] +def getnodesup(nodelist): + # NOTE : assume that a blacklisted node is fine, since we're told not to + # ignore it, no policy actions should be taken for it. + up = 0 + for node in nodelist: + try: + # NOTE: adding a condition for nodehist.haspcu would include pcus + # in the calculation + nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname']) + nodebl = BlacklistRecord.get_by(hostname=node['hostname']) + if (nodehist is not None and nodehist.status != 'down') or \ + (nodebl is not None and not nodebl.expired()): + up = up + 1 + except: + import traceback + email_exception(node['hostname']) + print traceback.print_exc() + return up - for sitename in l_sites: - if sitename not in externalState['sites']: - externalState['sites'][sitename] = {'round': 0, 'values': []} - - site_round = externalState['sites'][sitename]['round'] - if site_round < global_round: - # do work - values = collectStatusAndState(sitename, l_plcsites) - global_round = externalState['round'] - externalState['sites'][sitename]['values'] = values - externalState['sites'][sitename]['round'] = global_round - else: - count += 1 +def check_site_state(rec, sitehist): - if count % 20 == 0: - database.dbDump(config.dbname, externalState) + if sitehist.new and sitehist.status not in ['new', 'online', 'good']: + sitehist.status = 'new' + sitehist.penalty_applied = True # because new sites are disabled by default, i.e. have a penalty. + sitehist.last_changed = datetime.now() - database.dbDump(config.dbname, externalState) + if sitehist.nodes_up >= MINUP: -fb = database.dbLoad('findbad') -lb2hn = database.dbLoad("plcdb_lb2hn") + if sitehist.status != 'online' and sitehist.status != 'good': + sitehist.last_changed = datetime.now() -def getnodesup(nodelist): - up = 0 - for node in nodelist: - if node['hostname'] in fb['nodes'].keys(): - try: - if fb['nodes'][node['hostname']]['values']['state'] == "BOOT": - up = up + 1 - except: - pass - return up + if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online': + print "changed status from %s to online" % sitehist.status + sitehist.status = 'online' -def collectStatusAndState(sitename, l_plcsites): - global count - - d_site = None - for site in l_plcsites: - if site['login_base'] == sitename: - d_site = site - break - if not d_site: - return None - - if sitename in lb2hn: - pf = PersistFlags(sitename, 1, db='site_persistflags') - - if not pf.checkattr('last_changed'): - pf.last_changed = time.time() - - pf.last_checked = time.time() - pf.nodes_total = len(lb2hn[sitename]) - pf.slices_used = len(d_site['slice_ids']) - pf.nodes_up = getnodesup(lb2hn[sitename]) - if not pf.checkattr('status'): - pf.status = "unknown" - - if pf.nodes_up >= MINUP: - if pf.status != "good": pf.last_changed = time.time() - pf.status = "good" - else: - if pf.status != "down": pf.last_changed = time.time() - pf.status = "down" - - count += 1 - print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, - pf.nodes_total, pf.nodes_up, pf.status) - # updated by other modules - #pf.enabled = - #pf.suspended = - - pf.save() + if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good': + print "changed status from %s to good" % sitehist.status + sitehist.status = 'good' + + elif not sitehist.new: + + if sitehist.status != 'offline' and sitehist.status != 'down': + sitehist.last_changed = datetime.now() + + if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline': + print "changed status from %s to offline" % sitehist.status + sitehist.status = 'offline' + + if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down': + print "changed status from %s to down" % sitehist.status + sitehist.status = 'down' + +def checkAndRecordState(l_sites, l_plcsites): + count = 0 + lb2hn = plccache.plcdb_lb2hn + for sitename in l_sites: + d_site = None + for site in l_plcsites: + if site['login_base'] == sitename: + d_site = site + break + if not d_site: + continue + + if sitename in lb2hn: + sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename, + if_new_set={'status' : 'unknown', + 'last_changed' : datetime.now(), + 'message_id': 0, + 'penalty_level' : 0}) + sitehist.last_checked = datetime.now() + + sitehist.plc_siteid = d_site['site_id'] + sitehist.slices_total = d_site['max_slices'] + sitehist.slices_used = len(d_site['slice_ids']) + sitehist.nodes_total = len(lb2hn[sitename]) + if sitehist.message_id != 0: + rtstatus = mailer.getTicketStatus(sitehist.message_id) + sitehist.message_status = rtstatus['Status'] + sitehist.message_queue = rtstatus['Queue'] + sitehist.message_created = datetime.fromtimestamp(rtstatus['Created']) + + sitehist.nodes_up = getnodesup(lb2hn[sitename]) + sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago + sitehist.enabled = d_site['enabled'] + + check_site_state(d_site, sitehist) + + count += 1 + print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, + sitehist.nodes_total, sitehist.nodes_up, sitehist.status) + sitehist.flush() + + print HistorySiteRecord.query.count() + session.flush() return True if __name__ == '__main__': - import parser as parsermodule + from monitor import parser as parsermodule parser = parsermodule.getParser() - parser.set_defaults(filename=None, node=None, site=None, nodeselect=False, nodegroup=None, - increment=False, dbname="sitebad", cachenodes=False) + parser.set_defaults(filename=None, node=None, site=None, + nodeselect=False, nodegroup=None, cachenodes=False) + parser.add_option("", "--site", dest="site", metavar="login_base", help="Provide a single site to operate on") - parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", - help="Provide a list of files to operate on") + parser.add_option("", "--sitelist", dest="sitelist", + help="Provide a list of sites separated by ','") - parser.add_option("", "--dbname", dest="dbname", metavar="FILE", - help="Specify the name of the database to which the information is saved") - parser.add_option("-i", "--increment", action="store_true", dest="increment", - help="Increment round number to force refresh or retry") config = parsermodule.parse_args(parser) try: - main(config) + main2(config) except Exception, err: import traceback print traceback.print_exc() print "Exception: %s" % err - print "Saving data... exitting." - database.dbDump(config.dbname, externalState) sys.exit(0)