X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=unified_model.py;h=891bab018e8c33c5655176ae97fc1b6403185445;hb=0fabfc8dbe8f1f2c0d12397e1bc8c6ed686fb5ed;hp=918f653f128b01c894c5cb754fe8417d2d1a4dd6;hpb=590ac12c941310b40a92d2fe938e62e3538f2893;p=monitor.git diff --git a/unified_model.py b/unified_model.py index 918f653..891bab0 100755 --- a/unified_model.py +++ b/unified_model.py @@ -1,20 +1,18 @@ #!/usr/bin/python -import soltesz +from monitor import database -import plc -import auth -api = plc.PLC(auth.auth, auth.plc) - -import config -import mailer +from monitor.wrapper import plc, plccache +from monitor.wrapper import mailer import time +from model import * +from monitor.const import * +from monitor import util +from monitor import config + def gethostlist(hostlist_file): - return config.getListFromFile(hostlist_file) - - #nodes = api.GetNodes({'peer_id' : None}, ['hostname']) - #return [ n['hostname'] for n in nodes ] + return util.file.getListFromFile(hostlist_file) def array_to_priority_map(array): """ Create a mapping where each entry of array is given a priority equal @@ -38,6 +36,10 @@ def cmpValMap(v1, v2, map): raise Exception("No index %s or %s in map" % (v1, v2)) def cmpCategoryVal(v1, v2): + # Terrible hack to manage migration to no more 'ALPHA' states. + if v1 == 'ALPHA': v1 = "PROD" + if v2 == 'ALPHA': v2 = "PROD" + #map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ]) map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ]) return cmpValMap(v1,v2,map) @@ -67,8 +69,6 @@ class PenaltyMap: # condition/penalty is applied, move to the next phase. -fb = soltesz.dbLoad("findbad") - class RT(object): def __init__(self, ticket_id = None): self.ticket_id = ticket_id @@ -88,7 +88,7 @@ class RT(object): return self.status def closeTicket(self): - mailer.closeTicketViaRT(self.ticket_id) + mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.") def email(self, subject, body, to): self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id) @@ -110,8 +110,14 @@ class Message(object): class Recent(object): def __init__(self, withintime): self.withintime = withintime - self.time = time.time() - self.action_taken = False + + try: + self.time = self.__getattribute__('time') + except: + self.time = time.time()- 7*24*60*60 + + #self.time = time.time() + #self.action_taken = False def isRecent(self): if self.time + self.withintime < time.time(): @@ -141,10 +147,10 @@ class PersistFlags(Recent): db = "persistflags" try: - pm = soltesz.dbLoad(db) + pm = database.dbLoad(db) except: - soltesz.dbDump(db, {}) - pm = soltesz.dbLoad(db) + database.dbDump(db, {}) + pm = database.dbLoad(db) #print pm if id in pm: obj = pm[id] @@ -152,6 +158,8 @@ class PersistFlags(Recent): obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs) for key in kwargs.keys(): obj.__setattr__(key, kwargs[key]) + obj.time = time.time() + obj.action_taken = False obj.db = db return obj @@ -161,9 +169,9 @@ class PersistFlags(Recent): Recent.__init__(self, withintime) def save(self): - pm = soltesz.dbLoad(self.db) + pm = database.dbLoad(self.db) pm[self.id] = self - soltesz.dbDump(self.db, pm) + database.dbDump(self.db, pm) def resetFlag(self, name): self.__setattr__(name, False) @@ -178,6 +186,10 @@ class PersistFlags(Recent): self.__setattr__(name, False) return False + def resetRecentFlag(self, name): + self.resetFlag(name) + self.unsetRecent() + def setRecentFlag(self, name): self.setFlag(name) self.setRecent() @@ -191,6 +203,14 @@ class PersistFlags(Recent): self.__setattr__(name, False) return False + def checkattr(self, name): + try: + x = self.__getattribute__(name) + return True + except: + return False + + class PersistMessage(Message): def __new__(typ, id, subject, message, via_rt, **kwargs): if 'db' in kwargs: @@ -199,22 +219,25 @@ class PersistMessage(Message): db = "persistmessages" try: - pm = soltesz.dbLoad(db) + pm = database.dbLoad(db) except: - soltesz.dbDump(db, {}) - pm = soltesz.dbLoad(db) + database.dbDump(db, {}) + pm = database.dbLoad(db) #print pm if id in pm: - print "Using existing object" + #print "Using existing object" obj = pm[id] else: - print "creating new object" + #print "creating new object" obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs) obj.id = id - obj.actiontracker = Recent(3*60*60*24) + obj.actiontracker = Recent(1*60*60*24) obj.ticket_id = None + if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None: + obj.ticket_id = kwargs['ticket_id'] + obj.db = db return obj @@ -226,19 +249,20 @@ class PersistMessage(Message): def reset(self): self.actiontracker.unsetRecent() + def save(self): + pm = database.dbLoad(self.db) + pm[self.id] = self + database.dbDump(self.db, pm) + def send(self, to): if not self.actiontracker.isRecent(): self.ticket_id = Message.send(self, to) self.actiontracker.setRecent() - - #print "recording object for persistance" - pm = soltesz.dbLoad(self.db) - pm[self.id] = self - soltesz.dbDump(self.db, pm) + self.save() else: # NOTE: only send a new message every week, regardless. - print "Not sending to host b/c not within window of 6 days" - pass + # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window. + print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24)) class MonitorMessage(object): def __new__(typ, id, *args, **kwargs): @@ -249,11 +273,11 @@ class MonitorMessage(object): try: if 'reset' in kwargs and kwargs['reset'] == True: - soltesz.dbDump(db, {}) - pm = soltesz.dbLoad(db) + database.dbDump(db, {}) + pm = database.dbLoad(db) except: - soltesz.dbDump(db, {}) - pm = soltesz.dbLoad(db) + database.dbDump(db, {}) + pm = database.dbLoad(db) #print pm if id in pm: @@ -321,11 +345,11 @@ class PersistSitePenalty(SitePenalty): try: if 'reset' in kwargs and kwargs['reset'] == True: - soltesz.dbDump(db, {}) - pm = soltesz.dbLoad(db) + database.dbDump(db, {}) + pm = database.dbLoad(db) except: - soltesz.dbDump(db, {}) - pm = soltesz.dbLoad(db) + database.dbDump(db, {}) + pm = database.dbLoad(db) #print pm if id in pm: @@ -342,13 +366,11 @@ class PersistSitePenalty(SitePenalty): def __init__(self, id, index, **kwargs): self.id = id - #SitePenalty.__init__(self, self.index) def save(self): - pm = soltesz.dbLoad(self.db) + pm = database.dbLoad(self.db) pm[self.id] = self - soltesz.dbDump(self.db, pm) - + database.dbDump(self.db, pm) class Target: @@ -385,79 +407,253 @@ class Target: return con_or_true -class NodeRecord: - def __init__(self, hostname, target): +class Record(object): + + def __init__(self, hostname, data): self.hostname = hostname - self.pcu = PCU(hostname) - self.ticket = None - self.target = target - if hostname in fb['nodes']: - self.data = fb['nodes'][hostname]['values'] + self.data = data + self.plcdb_hn2lb = plccache.plcdb_hn2lb + self.loginbase = self.plcdb_hn2lb[self.hostname] + return + + + def stageIswaitforever(self): + if 'waitforever' in self.data['stage']: + return True else: - raise Exception("Hostname not in scan database") + return False - def get(self): - pass def severity(self): category = self.data['category'] prev_category = self.data['prev_category'] + #print "SEVERITY: ", category, prev_category val = cmpCategoryVal(category, prev_category) return val - def open_tickets(self): - if self.ticket and self.ticket.status['status'] == 'open': - return 1 - return 0 - def setIntrospect(self): - pass - def email_notice(self): - message = self._get_message_for_condition() - message.send(self._get_contacts_for_condition()) + def improved(self): + return self.severity() > 0 + + def end_record(self): + return node_end_record(self.hostname) + + def reset_stage(self): + self.data['stage'] = 'findbad' return True - def close_ticket(self): - if self.ticket: - self.ticket.closeTicket() + + def getCategory(self): + return self.data['category'].lower() - def exempt_from_penalties(self): - bl = soltesz.dbLoad("l_blacklist") - return self.hostname in bl + def getState(self): + return self.data['state'].lower() + + def getDaysDown(cls, diag_record): + daysdown = -1 + if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1": + daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24) + #elif diag_record['comonstats']['sshstatus'] != "null": + # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24) + #elif diag_record['comonstats']['lastcotop'] != "null": + # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24) + else: + now = time.time() + last_contact = diag_record['plcnode']['last_contact'] + if last_contact == None: + # the node has never been up, so give it a break + daysdown = -1 + else: + diff = now - last_contact + daysdown = diff // (60*60*24) + return daysdown + getDaysDown = classmethod(getDaysDown) + + def getStrDaysDown(cls, diag_record): + daysdown = "unknown" + last_contact = diag_record['plcnode']['last_contact'] + date_created = diag_record['plcnode']['date_created'] + + if diag_record['comonstats']['uptime'] != "null" and \ + diag_record['comonstats']['uptime'] != "-1": + daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24) + daysdown = "%d days up" % daysdown + + elif last_contact is None: + if date_created is not None: + now = time.time() + diff = now - date_created + daysdown = diff // (60*60*24) + daysdown = "Never contacted PLC, created %s days ago" % daysdown + else: + daysdown = "Never contacted PLC" + else: + now = time.time() + diff = now - last_contact + daysdown = diff // (60*60*24) + daysdown = "%s days down" % daysdown + return daysdown + getStrDaysDown = classmethod(getStrDaysDown) + + def getSendEmailFlag(self): + if not config.mail: + return False + + # resend if open & created longer than 30 days ago. + if 'rt' in self.data and \ + 'Status' in self.data['rt'] and \ + "open" in self.data['rt']['Status'] and \ + self.data['rt']['Created'] > int(time.time() - 60*60*24*30): + # if created-time is greater than the thirty days ago from the current time + return False - def penalties(self): - return [] - def escellate_penalty(self): - return True - def reduce_penalty(self): return True + def getMostRecentStage(self): + lastact = self.data['last_action_record'] + return lastact.stage + + def getMostRecentTime(self): + lastact = self.data['last_action_record'] + return lastact.date_action_taken + + def takeAction(self, index=0): + pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames') + if 'improvement' in self.data['stage'] or self.improved() or \ + 'monitor-end-record' in self.data['stage']: + print "takeAction: decreasing penalty for %s"%self.hostname + pp.decrease() + pp.decrease() + else: + print "takeAction: increasing penalty for %s"%self.hostname + pp.increase() + pp.index = index + pp.apply(self.hostname) + pp.save() + + def _format_diaginfo(self): + info = self.data['info'] + print "FORMAT : STAGE: ", self.data['stage'] + if self.data['stage'] == 'monitor-end-record': + if info[2] == "ALPHA": info = (info[0], info[1], "PROD") + hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) + else: + hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn) + return hlist + def saveAction(self): + if 'save_act_all' in self.data and self.data['save_act_all'] == True: + return True + else: + return False - def atTarget(self): - return self.target.verify(self.data) + def getMessage(self, ticket_id=None): + self.data['args']['hostname'] = self.hostname + self.data['args']['loginbase'] = self.loginbase + self.data['args']['hostname_list'] = self._format_diaginfo() + #print self.data['message'] + if self.data['message']: + message = PersistMessage(self.hostname, + self.data['message'][0] % self.data['args'], + self.data['message'][1] % self.data['args'], + True, db='monitor_persistmessages', + ticket_id=ticket_id) + if self.data['stage'] == "improvement": + message.reset() + return message + else: + return None + + def getContacts(self): + roles = self.data['email'] + + if not config.mail and not config.debug and config.bcc: + roles = ADMIN + if config.mail and config.debug: + roles = ADMIN + + # build targets + contacts = [] + if ADMIN & roles: + contacts += [config.email] + if TECH & roles: + #contacts += [TECHEMAIL % self.loginbase] + contacts += plc.getTechEmails(self.loginbase) + if PI & roles: + #contacts += [PIEMAIL % self.loginbase] + contacts += plc.getSliceUserEmails(self.loginbase) + if USER & roles: + contacts += plc.getSliceUserEmails(self.loginbase) + slices = plc.slices(self.loginbase) + if len(slices) >= 1: + #for slice in slices: + # contacts += [SLICEMAIL % slice] + print "SLIC: %20s : %d slices" % (self.loginbase, len(slices)) + else: + print "SLIC: %20s : 0 slices" % self.loginbase + + return contacts - def _get_condition(self): - return self.data['category'].lower() - def _get_stage(self): - "improvement" - "firstnotice_noop" - "secondnotice_noslicecreation" - "thirdnotice_disableslices" +class NodeRecord: + def __init__(self, hostname, target): + self.hostname = hostname + self.ticket = None + self.target = target - delta = current_time - self.data['time'] +class Action(MonRecord): + def __init__(self, host, data): + self.host = host + MonRecord.__init__(self, data) + return - def _get_message_for_condition(self): - pass - def _get_contacts_for_condition(self): - pass + def deltaDays(self, delta): + t = datetime.fromtimestamp(self.__dict__['time']) + d = t + timedelta(delta) + self.__dict__['time'] = time.mktime(d.timetuple()) + +def node_end_record(node): + act_all = database.dbLoad("act_all") + if node not in act_all: + del act_all + return False + + if len(act_all[node]) == 0: + del act_all + return False + + pm = database.dbLoad("monitor_persistmessages") + if node not in pm: + del pm + return False + else: + print "deleting node record" + del pm[node] + database.dbDump("monitor_persistmessages", pm) + + a = Action(node, act_all[node][0]) + a.delField('rt') + a.delField('found_rt_ticket') + a.delField('second-mail-at-oneweek') + a.delField('second-mail-at-twoweeks') + a.delField('first-found') + rec = a.get() + rec['action'] = ["close_rt"] + rec['category'] = "ALPHA" # assume that it's up... + rec['stage'] = "monitor-end-record" + rec['ticket_id'] = None + rec['time'] = time.time() - 7*60*60*24 + act_all[node].insert(0,rec) + database.dbDump("act_all", act_all) + del act_all + return True if __name__ == "__main__": #r = RT() - #r.email("test", "body of test message", ['soltesz@cs.princeton.edu']) - from emailTxt import mailtxt - soltesz.dbDump("persistmessages", {}); - args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'} - m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True) - m.send(['soltesz@cs.utk.edu']) - m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True) + #r.email("test", "body of test message", ['database@cs.princeton.edu']) + #from emailTxt import mailtxt + print "loaded" + #database.dbDump("persistmessages", {}); + #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'} + #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True) + #m.send(['soltesz@cs.utk.edu']) + #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True) # TRICK timer to thinking some time has passed. - m.actiontracker.time = time.time() - 6*60*60*24 - m.send(['soltesz@cs.utk.edu']) + #m.actiontracker.time = time.time() - 6*60*60*24 + #m.send(['soltesz@cs.utk.edu'])