X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=monitor_policy.py;h=45242eaf00c1d9ebd6d581b9e834f57caf11eb54;hb=6f2351e4b44590221425fa9b4bfa77c92db49b6a;hp=a44e9a13696f298961424b49d76a4104297ed997;hpb=d849bbfbcee1817fbdadc5ea35f00832c01d0dc9;p=monitor.git diff --git a/monitor_policy.py b/monitor_policy.py index a44e9a1..45242ea 100644 --- a/monitor_policy.py +++ b/monitor_policy.py @@ -1,18 +1,25 @@ -from config import config -#print "policy" -config = config() -import soltesz +import config +import database import time import mailer -from www.printbadnodes import cmpCategoryVal +from unified_model import cmpCategoryVal import sys import emailTxt import string -from policy import get_ticket_id, print_stats, close_rt_backoff, reboot_node from rt import is_host_in_rt_tickets import plc +def get_ticket_id(record): + if 'ticket_id' in record and record['ticket_id'] is not "" and record['ticket_id'] is not None: + return record['ticket_id'] + elif 'found_rt_ticket' in record and \ + record['found_rt_ticket'] is not "" and \ + record['found_rt_ticket'] is not None: + return record['found_rt_ticket'] + else: + return None + # Time to enforce policy POLSLEEP = 7200 @@ -42,18 +49,20 @@ PI=2 USER=4 ADMIN=8 +from unified_model import * + class Merge: def __init__(self, l_merge): self.merge_list = l_merge # the hostname to loginbase mapping - self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb") + self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb") # Previous actions taken on nodes. - self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {}) - self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {}) + self.act_all = database.if_cached_else(1, "act_all", lambda : {}) + self.findbad = database.if_cached_else(1, "findbad", lambda : {}) - self.cache_all = soltesz.if_cached_else(1, "act_all", lambda : {}) + self.cache_all = database.if_cached_else(1, "act_all", lambda : {}) self.sickdb = {} self.mergedb = {} @@ -255,8 +264,8 @@ class RT: class Diagnose: def __init__(self, record_list): self.record_list = record_list - self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb") - self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {}) + self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb") + self.findbad = database.if_cached_else(1, "findbad", lambda : {}) self.diagnose_in = {} self.diagnose_out = {} @@ -396,12 +405,13 @@ class Diagnose: # NOTE: these settings can be overridden by command line arguments, # or the state of a record, i.e. if already in RT's Support Queue. - nodes_up = self.getUpAtSite(loginbase, d_diag_site) + pf = PersistFlags(loginbase, 1, db='site_persistflags') + nodes_up = pf.nodes_up if nodes_up < MINUP: d_diag_site[loginbase]['config']['squeeze'] = True max_slices = self.getMaxSlices(loginbase) - num_nodes = self.getNumNodes(loginbase) + num_nodes = pf.nodes_total #self.getNumNodes(loginbase) # NOTE: when max_slices == 0, this is either a new site (the old way) # or an old disabled site from previous monitor (before site['enabled']) if nodes_up < num_nodes and max_slices != 0: @@ -820,16 +830,27 @@ class Diagnose: return up +def close_rt_backoff(args): + if 'ticket_id' in args and (args['ticket_id'] != "" and args['ticket_id'] != None): + mailer.closeTicketViaRT(args['ticket_id'], + "Ticket CLOSED automatically by SiteAssist.") + plc.enableSlices(args['hostname']) + plc.enableSliceCreation(args['hostname']) + return + +def reboot_node(args): + host = args['hostname'] + return reboot.reboot_policy(host, True, config.debug) class Action: def __init__(self, diagnose_out): # the hostname to loginbase mapping - self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb") + self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb") # Actions to take. self.diagnose_db = diagnose_out # Actions taken. - self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {}) + self.act_all = database.if_cached_else(1, "act_all", lambda : {}) # A dict of actions to specific functions. PICKLE doesnt' like lambdas. self.actions = {} @@ -866,22 +887,24 @@ class Action: print err if config.policysavedb: print "Saving Databases... act_all" - soltesz.dbDump("act_all", self.act_all) + database.dbDump("act_all", self.act_all) + database.dbDump("diagnose_out", self.diagnose_db) sys.exit(1) #print_stats("sites_observed", stats) #print_stats("sites_diagnosed", stats) #print_stats("nodes_diagnosed", stats) - print_stats("sites_emailed", stats) + self.print_stats("sites_emailed", stats) #print_stats("nodes_actedon", stats) print string.join(stats['allsites'], ",") if config.policysavedb: print "Saving Databases... act_all" - #soltesz.dbDump("policy.eventlog", self.eventlog) + #database.dbDump("policy.eventlog", self.eventlog) # TODO: remove 'diagnose_out', # or at least the entries that were acted on. - soltesz.dbDump("act_all", self.act_all) + database.dbDump("act_all", self.act_all) + database.dbDump("diagnose_out", self.diagnose_db) def accumSites(self): """ @@ -914,18 +937,22 @@ class Action: if ADMIN & roles: contacts += [config.email] if TECH & roles: - contacts += [TECHEMAIL % loginbase] + #contacts += [TECHEMAIL % loginbase] + contacts += plc.getTechEmails(loginbase) if PI & roles: - contacts += [PIEMAIL % loginbase] + #contacts += [PIEMAIL % loginbase] + contacts += plc.getPIEmails(loginbase) if USER & roles: + contacts += plc.getSliceUserEmails(loginbase) slices = plc.slices(loginbase) if len(slices) >= 1: - for slice in slices: - contacts += [SLICEMAIL % slice] print "SLIC: %20s : %d slices" % (loginbase, len(slices)) else: print "SLIC: %20s : 0 slices" % loginbase + unique_contacts = set(contacts) + contacts = [ c for c in unique_contacts ] # convert back into list + try: subject = message[0] % args body = message[1] % args @@ -1058,6 +1085,7 @@ class Action: if ticket_id == 0: # error. print "ticket_id == 0 for %s %s" % (loginbase, act_record['nodename']) + import os os._exit(1) pass @@ -1084,11 +1112,11 @@ class Action: i_nodes_actedon += 1 if config.policysavedb: - print "Saving Databases... act_all, diagnose_out" - soltesz.dbDump("act_all", self.act_all) + #print "Saving Databases... act_all, diagnose_out" + #database.dbDump("act_all", self.act_all) # remove site record from diagnose_out, it's in act_all as done. del self.diagnose_db[loginbase] - #soltesz.dbDump("diagnose_out", self.diagnose_db) + #database.dbDump("diagnose_out", self.diagnose_db) print "sleeping for 1 sec" time.sleep(1)