#!/usr/bin/python import os import sys import string import time from datetime import datetime,timedelta from monitor import database from monitor import parser as parsermodule from monitor import config from monitor.database.info.model import * from monitor.wrapper import plc, plccache from monitor.const import MINUP from monitor.common import * from monitor.query import verify,query_to_dict,node_select from monitor.model import * api = plc.getAuthAPI() def main(): main2(config) def main2(config): l_nodes = plccache.l_nodes l_plcsites = plccache.l_sites if config.site: l_sites = [config.site] elif config.node: l_sites = [plccache.plcdb_hn2lb[config.node]] elif config.sitelist: site_list = config.sitelist.split(',') l_sites = site_list else: l_sites = [site['login_base'] for site in l_plcsites] checkAndRecordState(l_sites, l_plcsites, config.checkpcu) def getnodesup(nodelist, checkpcu): # NOTE : assume that a blacklisted node is fine, since we're told not to # ignore it, no policy actions should be taken for it. up = 0 for node in nodelist: try: # NOTE: adding a condition for nodehist.haspcu would include pcus # in the calculation nodehist = HistoryNodeRecord.findby_or_create(hostname=node['hostname']) nodebl = BlacklistRecord.get_by(hostname=node['hostname']) if checkpcu: # get pcu history for node if nodehist.haspcu: # get node record for pcuid noderec = FindbadNodeRecord.get_latest_by(hostname=node['hostname']) # get pcuhistory based on pcuid pcuhist = HistoryPCURecord.findby_or_create(plc_pcuid=noderec.plc_pcuid) # if pcu is not down & node is not down if (nodehist is not None and nodehist.status != 'down' and \ pcuhist is not None and pcuhist.status != 'down') or \ (nodebl is not None and not nodebl.expired()): up = up + 1 else: # todo: don't count pass else: if (nodehist is not None and nodehist.status != 'down') or \ (nodebl is not None and not nodebl.expired()): up = up + 1 except: import traceback email_exception(node['hostname']) print traceback.print_exc() return up def check_site_state(rec, sitehist): if sitehist.new and sitehist.status not in ['new', 'online', 'good']: sitehist.status = 'new' sitehist.penalty_applied = True # because new sites are disabled by default, i.e. have a penalty. sitehist.last_changed = datetime.now() if sitehist.nodes_up >= MINUP: if sitehist.status != 'online' and sitehist.status != 'good': sitehist.last_changed = datetime.now() if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'online': print "changed status from %s to online" % sitehist.status sitehist.status = 'online' if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'good': print "changed status from %s to good" % sitehist.status sitehist.status = 'good' elif not sitehist.new: if sitehist.status != 'offline' and sitehist.status != 'down': sitehist.last_changed = datetime.now() if changed_lessthan(sitehist.last_changed, 0.5) and sitehist.status != 'offline': print "changed status from %s to offline" % sitehist.status sitehist.status = 'offline' if changed_greaterthan(sitehist.last_changed, 0.5) and sitehist.status != 'down': print "changed status from %s to down" % sitehist.status sitehist.status = 'down' def checkAndRecordState(l_sites, l_plcsites, checkpcu): count = 0 lb2hn = plccache.plcdb_lb2hn for sitename in l_sites: d_site = None for site in l_plcsites: if site['login_base'] == sitename: d_site = site break if not d_site: continue if sitename in lb2hn: sitehist = HistorySiteRecord.findby_or_create(loginbase=sitename, if_new_set={'status' : 'unknown', 'last_changed' : datetime.now(), 'message_id': 0, 'penalty_level' : 0}) sitehist.last_checked = datetime.now() sitehist.plc_siteid = d_site['site_id'] sitehist.slices_total = d_site['max_slices'] sitehist.slices_used = len(d_site['slice_ids']) sitehist.nodes_total = len(lb2hn[sitename]) if sitehist.message_id != 0: rtstatus = mailer.getTicketStatus(sitehist.message_id) sitehist.message_status = rtstatus['Status'] sitehist.message_queue = rtstatus['Queue'] sitehist.message_created = datetime.fromtimestamp(rtstatus['Created']) sitehist.nodes_up = getnodesup(lb2hn[sitename], checkpcu) sitehist.new = changed_lessthan(datetime.fromtimestamp(d_site['date_created']), 30) # created < 30 days ago sitehist.enabled = d_site['enabled'] check_site_state(d_site, sitehist) count += 1 print "%d %15s slices(%2s) nodes(%2s) notdown(%2s) %s" % (count, sitename, sitehist.slices_used, sitehist.nodes_total, sitehist.nodes_up, sitehist.status) sitehist.flush() print HistorySiteRecord.query.count() session.flush() return True if __name__ == '__main__': from monitor import parser as parsermodule parser = parsermodule.getParser() parser.set_defaults(checkpcu=False) parser.add_option("", "--site", dest="site", metavar="login_base", help="Provide a single site to operate on") parser.add_option("", "--sitelist", dest="sitelist", help="Provide a list of sites separated by ','") parser.add_option("", "--checkpcu", dest="checkpcu", action="store_true", help="whether to include PCUs in the site status") config = parsermodule.parse_args(parser) try: main2(config) except Exception, err: import traceback print traceback.print_exc() print "Exception: %s" % err sys.exit(0)