import sys
import os
import reboot
-import soltesz
+import database
import string
-from www.printbadnodes import cmpCategoryVal
-from config import config
-print "policy"
-config = config()
+from unified_model import cmpCategoryVal
+import config
DAT="./monitor.dat"
def print_stats(key, stats):
if key in stats: print "%20s : %d" % (key, stats[key])
-def get_ticket_id(record):
- if 'ticket_id' in record and record['ticket_id'] is not "" and record['ticket_id'] is not None:
- return record['ticket_id']
- elif 'found_rt_ticket' in record and \
- record['found_rt_ticket'] is not "" and \
- record['found_rt_ticket'] is not None:
- return record['found_rt_ticket']
- else:
- return None
class Merge(Thread):
def __init__(self, l_merge, toRT):
self.toRT = toRT
self.merge_list = l_merge
# the hostname to loginbase mapping
- self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+ self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
# Previous actions taken on nodes.
- self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {})
- self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
+ self.act_all = database.if_cached_else(1, "act_all", lambda : {})
+ self.findbad = database.if_cached_else(1, "findbad", lambda : {})
- self.cache_all = soltesz.if_cached_else(1, "act_all", lambda : {})
+ self.cache_all = database.if_cached_else(1, "act_all", lambda : {})
self.sickdb = {}
self.mergedb = {}
Thread.__init__(self)
class Diagnose(Thread):
def __init__(self, fromRT):
self.fromRT = fromRT
- self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
- self.findbad = soltesz.if_cached_else(1, "findbad", lambda : {})
+ self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
+ self.findbad = database.if_cached_else(1, "findbad", lambda : {})
self.diagnose_in = {}
self.diagnose_out = {}
if config.policysavedb:
print "Saving Databases... diagnose_out"
- soltesz.dbDump("diagnose_out", self.diagnose_out)
+ database.dbDump("diagnose_out", self.diagnose_out)
def accumSickSites(self):
"""
pass
- def __getDaysDown(self, diag_record, nodename):
+ def getDaysDown(cls, diag_record):
daysdown = -1
- if diag_record['comonstats']['sshstatus'] != "null":
- daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
- elif diag_record['comonstats']['lastcotop'] != "null":
- daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
+ last_contact = diag_record['plcnode']['last_contact']
+ date_created = diag_record['plcnode']['date_created']
+
+ if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
+ daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
+ elif last_contact is None:
+ if date_created is not None:
+ now = time.time()
+ diff = now - date_created
+ daysdown = diff // (60*60*24)
+ else:
+ daysdown = -1
else:
now = time.time()
- last_contact = diag_record['plcnode']['last_contact']
- if last_contact == None:
- # the node has never been up, so give it a break
- daysdown = -1
- else:
- diff = now - last_contact
- daysdown = diff // (60*60*24)
+ diff = now - last_contact
+ daysdown = diff // (60*60*24)
return daysdown
+ getDaysDown = classmethod(getDaysDown)
+
+ def getStrDaysDown(cls, diag_record):
+ daysdown = "unknown"
+ last_contact = diag_record['plcnode']['last_contact']
+ date_created = diag_record['plcnode']['date_created']
- def __getStrDaysDown(self, diag_record, nodename):
- daysdown = self.__getDaysDown(diag_record, nodename)
- if daysdown > 0:
- return "(%d days down)"%daysdown
+ if diag_record['comonstats']['uptime'] != "null" and \
+ diag_record['comonstats']['uptime'] != "-1":
+ daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
+ daysdown = "%d days up" % daysdown
+
+ elif last_contact is None:
+ if date_created is not None:
+ now = time.time()
+ diff = now - date_created
+ daysdown = diff // (60*60*24)
+ daysdown = "Never contacted PLC, created %s days ago" % daysdown
+ else:
+ daysdown = "Never contacted PLC"
else:
- return "Unknown number of days"
+ now = time.time()
+ diff = now - last_contact
+ daysdown = diff // (60*60*24)
+ daysdown = "%s days down" % daysdown
+ return daysdown
+ getStrDaysDown = classmethod(getStrDaysDown)
+ #def getStrDaysDown(cls, diag_record):
+ # daysdown = cls.getDaysDown(diag_record)
+ # if daysdown > -1:
+ # return "%d days down"%daysdown
+ # elif daysdown == -1:
+ # return "Has never contacted PLC"
+ # else:
+ # return "%d days up"% -daysdown
+ #getStrDaysDown = classmethod(getStrDaysDown)
def __getCDVersion(self, diag_record, nodename):
cdversion = ""
if "ERROR" in category: # i.e. "DOWN"
diag_record = {}
diag_record.update(node_record)
- daysdown = self.__getDaysDown(diag_record, nodename)
+ daysdown = self.getDaysDown(diag_record)
if daysdown < 7:
format = "DIAG: %20s : %-40s Down only %s days NOTHING DONE"
print format % (loginbase, nodename, daysdown)
return None
- s_daysdown = self.__getStrDaysDown(diag_record, nodename)
+ s_daysdown = self.getStrDaysDown(diag_record)
diag_record['message'] = emailTxt.mailtxt.newdown
diag_record['args'] = {'nodename': nodename}
diag_record['info'] = (nodename, s_daysdown, "")
elif "OLDBOOTCD" in category:
# V2 boot cds as determined by findbad
- s_daysdown = self.__getStrDaysDown(node_record, nodename)
+ s_daysdown = self.getStrDaysDown(node_record)
s_cdversion = self.__getCDVersion(node_record, nodename)
diag_record = {}
diag_record.update(node_record)
act_record['first-found'] = True
act_record['log'] += " firstfound"
act_record['action'] = ['ticket_waitforever']
- act_record['message'] = None
+ act_record['message'] = message[0]
act_record['time'] = current_time
else:
if delta >= 7*SPERDAY:
act_record['action'] = ['ticket_waitforever']
- act_record['message'] = None
+ if 'rt' in act_record and 'Status' in act_record['rt'] and \
+ act_record['rt']['Status'] == 'new':
+ act_record['message'] = message[0]
+ else:
+ act_record['message'] = None
+
act_record['time'] = current_time # reset clock
else:
act_record['action'] = ['ticket_waitforever']
if site_stats == None:
raise Exception, "loginbase with no nodes in findbad"
else:
- return site_stats['num_nodes']
+ if 'num_nodes' in site_stats:
+ return site_stats['num_nodes']
+ else:
+ return 0
"""
Returns number of up nodes as the total number *NOT* in act_all with a
# TODO: create class for each action below,
# allow for lists of actions to be performed...
-def close_rt_backoff(args):
- if 'ticket_id' in args and (args['ticket_id'] != "" and args['ticket_id'] != None):
- mailer.closeTicketViaRT(args['ticket_id'],
- "Ticket CLOSED automatically by SiteAssist.")
- plc.enableSlices(args['hostname'])
- plc.enableSliceCreation(args['hostname'])
- return
-def reboot_node(args):
- host = args['hostname']
- return reboot.reboot_policy(host, True, config.debug)
def reset_nodemanager(args):
os.system("ssh root@%s /sbin/service nm restart" % nodename)
self.l_action = l_action
# the hostname to loginbase mapping
- self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
+ self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
# Actions to take.
- self.diagnose_db = soltesz.if_cached_else(1, "diagnose_out", lambda : {})
+ self.diagnose_db = database.if_cached_else(1, "diagnose_out", lambda : {})
# Actions taken.
- self.act_all = soltesz.if_cached_else(1, "act_all", lambda : {})
+ self.act_all = database.if_cached_else(1, "act_all", lambda : {})
# A dict of actions to specific functions. PICKLE doesnt' like lambdas.
self.actions = {}
print err
if config.policysavedb:
print "Saving Databases... act_all"
- soltesz.dbDump("act_all", self.act_all)
+ database.dbDump("act_all", self.act_all)
sys.exit(1)
print_stats("sites_observed", stats)
if config.policysavedb:
print "Saving Databases... act_all"
- #soltesz.dbDump("policy.eventlog", self.eventlog)
+ #database.dbDump("policy.eventlog", self.eventlog)
# TODO: remove 'diagnose_out',
# or at least the entries that were acted on.
- soltesz.dbDump("act_all", self.act_all)
+ database.dbDump("act_all", self.act_all)
def accumSites(self):
"""
if config.policysavedb:
print "Saving Databases... act_all, diagnose_out"
- soltesz.dbDump("act_all", self.act_all)
+ database.dbDump("act_all", self.act_all)
# remove site record from diagnose_out, it's in act_all as done.
del self.diagnose_db[loginbase]
- soltesz.dbDump("diagnose_out", self.diagnose_db)
+ database.dbDump("diagnose_out", self.diagnose_db)
print "sleeping for 1 sec"
time.sleep(1)