X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=sitebad.py;h=1261bcc3fb73f2ecd90bfade137d3e8842175c6a;hb=59a3193348d8981ba5a648915c1517ad7d1d5911;hp=2c1628aace9752ba495b12508c1792374fb00f99;hpb=da913fbd1629fc4669b186915df8ff3a340482d3;p=monitor.git diff --git a/sitebad.py b/sitebad.py index 2c1628a..1261bcc 100755 --- a/sitebad.py +++ b/sitebad.py @@ -7,45 +7,89 @@ import time from datetime import datetime,timedelta from monitor import database -from monitor.pcu import reboot from monitor import parser as parsermodule from monitor import config -from monitor.database import HistorySiteRecord, FindbadNodeRecord +from monitor.database.info.model import HistorySiteRecord, HistoryNodeRecord, session, BlacklistRecord from monitor.wrapper import plc, plccache from monitor.const import MINUP -from nodecommon import * -from nodequery import verify,query_to_dict,node_select +from monitor.common import * +from monitor.query import verify,query_to_dict,node_select from monitor.model import * api = plc.getAuthAPI() +def main(): + main2(config) -def main(config): +def main2(config): l_nodes = plccache.l_nodes l_plcsites = plccache.l_sites if config.site: l_sites = [config.site] + elif config.node: + l_sites = [plccache.plcdb_hn2lb[config.node]] + elif config.sitelist: + site_list = config.sitelist.split(',') + l_sites = site_list else: l_sites = [site['login_base'] for site in l_plcsites] checkAndRecordState(l_sites, l_plcsites) def getnodesup(nodelist): + # NOTE : assume that a blacklisted node is fine, since we're told not to + # ignore it, no policy actions should be taken for it. up = 0 for node in nodelist: try: - noderec = FindbadNodeRecord.query.filter(FindbadNodeRecord.hostname==node['hostname']).order_by(FindbadNodeRecord.date_checked.desc()).first() - #noderec = FindbadNodeRecord.select(FindbadNodeRecord.q.hostname==node['hostname'], - # orderBy='date_checked').reversed()[0] - if noderec is not None and noderec.observed_status == "BOOT": + # NOTE: adding a condition for nodehist.haspcu would include pcus + # in the calculation + nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname']) + nodebl = BlacklistRecord.get_by(hostname=node['hostname']) + if (nodehist is not None and nodehist.status != 'down') or \ + (nodebl is not None and not nodebl.expired()): up = up + 1 except: import traceback + email_exception(node['hostname']) print traceback.print_exc() return up +def check_site_state(rec, sitehist): + + if sitehist.new and sitehist.status not in ['new', 'online', 'good']: + sitehist.status = 'new' + sitehist.penalty_applied = True # because new sites are disabled by default, i.e. have a penalty. + sitehist.last_changed = datetime.now() + + if sitehist.nodes_up >= MINUP: + + if sitehist.status != 'online' and sitehist.status != 'good': + sitehist.last_changed = datetime.now() + + if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online': + print "changed status from %s to online" % sitehist.status + sitehist.status = 'online' + + if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good': + print "changed status from %s to good" % sitehist.status + sitehist.status = 'good' + + elif not sitehist.new: + + if sitehist.status != 'offline' and sitehist.status != 'down': + sitehist.last_changed = datetime.now() + + if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline': + print "changed status from %s to offline" % sitehist.status + sitehist.status = 'offline' + + if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down': + print "changed status from %s to down" % sitehist.status + sitehist.status = 'down' + def checkAndRecordState(l_sites, l_plcsites): count = 0 lb2hn = plccache.plcdb_lb2hn @@ -59,24 +103,36 @@ def checkAndRecordState(l_sites, l_plcsites): continue if sitename in lb2hn: - pf = HistorySiteRecord.findby_or_create(loginbase=sitename) - - pf.last_checked = datetime.now() - pf.slices_used = len(d_site['slice_ids']) - pf.nodes_total = len(lb2hn[sitename]) - pf.nodes_up = getnodesup(lb2hn[sitename]) - - if pf.nodes_up >= MINUP: - if pf.status != "good": pf.last_changed = datetime.now() - pf.status = "good" - else: - if pf.status != "down": pf.last_changed = datetime.now() - pf.status = "down" + sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename, + if_new_set={'status' : 'unknown', + 'last_changed' : datetime.now(), + 'message_id': 0, + 'penalty_level' : 0}) + sitehist.last_checked = datetime.now() + + sitehist.plc_siteid = d_site['site_id'] + sitehist.slices_total = d_site['max_slices'] + sitehist.slices_used = len(d_site['slice_ids']) + sitehist.nodes_total = len(lb2hn[sitename]) + if sitehist.message_id != 0: + rtstatus = mailer.getTicketStatus(sitehist.message_id) + sitehist.message_status = rtstatus['Status'] + sitehist.message_queue = rtstatus['Queue'] + sitehist.message_created = datetime.fromtimestamp(rtstatus['Created']) + + sitehist.nodes_up = getnodesup(lb2hn[sitename]) + sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago + sitehist.enabled = d_site['enabled'] + + check_site_state(d_site, sitehist) count += 1 - print "%d %15s slices(%2s) nodes(%2s) up(%2s) %s" % (count, sitename, pf.slices_used, - pf.nodes_total, pf.nodes_up, pf.status) + print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, + sitehist.nodes_total, sitehist.nodes_up, sitehist.status) + sitehist.flush() + print HistorySiteRecord.query.count() + session.flush() return True @@ -89,13 +145,13 @@ if __name__ == '__main__': parser.add_option("", "--site", dest="site", metavar="login_base", help="Provide a single site to operate on") - parser.add_option("", "--sitelist", dest="sitelist", metavar="file.list", - help="Provide a list of files to operate on") + parser.add_option("", "--sitelist", dest="sitelist", + help="Provide a list of sites separated by ','") config = parsermodule.parse_args(parser) try: - main(config) + main2(config) except Exception, err: import traceback print traceback.print_exc()