3 from monitor import database
14 def gethostlist(hostlist_file):
15 return util.file.getListFromFile(hostlist_file)
17 def array_to_priority_map(array):
18 """ Create a mapping where each entry of array is given a priority equal
19 to its position in the array. This is useful for subsequent use in the
28 def cmpValMap(v1, v2, map):
29 if v1 in map and v2 in map and map[v1] < map[v2]:
31 elif v1 in map and v2 in map and map[v1] > map[v2]:
33 elif v1 in map and v2 in map:
36 raise Exception("No index %s or %s in map" % (v1, v2))
38 def cmpCategoryVal(v1, v2):
39 map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
40 return cmpValMap(v1,v2,map)
44 def __init__(self, hostname):
45 self.hostname = hostname
51 def previous_attempt(self):
53 def setValidMapping(self):
57 def __init__(self, key, valuepattern, action):
64 # connect one penalty to another, in a FSM diagram. After one
65 # condition/penalty is applied, move to the next phase.
68 #fb = database.dbLoad("findbad")
71 def __init__(self, ticket_id = None):
72 self.ticket_id = ticket_id
74 print "getting ticket status",
75 self.status = mailer.getTicketStatus(self.ticket_id)
78 def setTicketStatus(self, status):
79 mailer.setTicketStatus(self.ticket_id, status)
80 self.status = mailer.getTicketStatus(self.ticket_id)
83 def getTicketStatus(self):
85 self.status = mailer.getTicketStatus(self.ticket_id)
88 def closeTicket(self):
89 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.")
91 def email(self, subject, body, to):
92 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
95 class Message(object):
96 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
98 self.subject = subject
99 self.message = message
100 self.rt = RT(ticket_id)
104 return self.rt.email(self.subject, self.message, to)
106 return mailer.email(self.subject, self.message, to)
108 class Recent(object):
109 def __init__(self, withintime):
110 self.withintime = withintime
113 self.time = self.__getattribute__('time')
115 self.time = time.time()- 7*24*60*60
117 #self.time = time.time()
118 #self.action_taken = False
121 if self.time + self.withintime < time.time():
122 self.action_taken = False
124 if self.time + self.withintime > time.time() and self.action_taken:
129 def unsetRecent(self):
130 self.action_taken = False
131 self.time = time.time()
135 self.action_taken = True
136 self.time = time.time()
139 class PersistFlags(Recent):
140 def __new__(typ, id, *args, **kwargs):
148 pm = database.dbLoad(db)
150 database.dbDump(db, {})
151 pm = database.dbLoad(db)
156 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
157 for key in kwargs.keys():
158 obj.__setattr__(key, kwargs[key])
159 obj.time = time.time()
160 obj.action_taken = False
165 def __init__(self, id, withintime, **kwargs):
167 Recent.__init__(self, withintime)
170 pm = database.dbLoad(self.db)
172 database.dbDump(self.db, pm)
174 def resetFlag(self, name):
175 self.__setattr__(name, False)
177 def setFlag(self, name):
178 self.__setattr__(name, True)
180 def getFlag(self, name):
182 return self.__getattribute__(name)
184 self.__setattr__(name, False)
187 def resetRecentFlag(self, name):
191 def setRecentFlag(self, name):
195 def getRecentFlag(self, name):
196 # if recent and flag set -> true
199 return self.isRecent() & self.__getattribute__(name)
201 self.__setattr__(name, False)
204 def checkattr(self, name):
206 x = self.__getattribute__(name)
212 class PersistMessage(Message):
213 def __new__(typ, id, subject, message, via_rt, **kwargs):
217 db = "persistmessages"
220 pm = database.dbLoad(db)
222 database.dbDump(db, {})
223 pm = database.dbLoad(db)
227 #print "Using existing object"
230 #print "creating new object"
231 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
233 obj.actiontracker = Recent(1*60*60*24)
236 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
237 obj.ticket_id = kwargs['ticket_id']
242 def __init__(self, id, subject, message, via_rt=True, **kwargs):
243 print "initializing object: %s" % self.ticket_id
245 Message.__init__(self, subject, message, via_rt, self.ticket_id)
248 self.actiontracker.unsetRecent()
251 pm = database.dbLoad(self.db)
253 database.dbDump(self.db, pm)
256 if not self.actiontracker.isRecent():
257 self.ticket_id = Message.send(self, to)
258 self.actiontracker.setRecent()
261 # NOTE: only send a new message every week, regardless.
262 # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window.
263 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
265 class MonitorMessage(object):
266 def __new__(typ, id, *args, **kwargs):
270 db = "monitormessages"
273 if 'reset' in kwargs and kwargs['reset'] == True:
274 database.dbDump(db, {})
275 pm = database.dbLoad(db)
277 database.dbDump(db, {})
278 pm = database.dbLoad(db)
282 print "Using existing object"
285 print "creating new object"
286 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
288 obj.sp = PersistSitePenalty(id, 0)
293 def __init__(self, id, message):
297 class SitePenalty(object):
299 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
300 'disable' : lambda host: None } )
301 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
302 'disable' : lambda host: plc.enableSliceCreation(host) } )
303 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
304 'disable' : lambda host: plc.enableSlices(host) } )
306 #def __init__(self, index=0, **kwargs):
309 def get_penalties(self):
310 # TODO: get penalties actually applied to a node from PLC DB.
311 return [ n['name'] for n in SitePenalty.penalty_map ]
314 self.index = self.index + 1
315 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
319 self.index = self.index - 1
320 if self.index < 0: self.index = 0
323 def apply(self, host):
325 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
326 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
327 SitePenalty.penalty_map[i]['disable'](host)
329 for i in range(0,self.index+1):
330 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
331 SitePenalty.penalty_map[i]['enable'](host)
337 class PersistSitePenalty(SitePenalty):
338 def __new__(typ, id, index, **kwargs):
342 db = "persistpenalties"
345 if 'reset' in kwargs and kwargs['reset'] == True:
346 database.dbDump(db, {})
347 pm = database.dbLoad(db)
349 database.dbDump(db, {})
350 pm = database.dbLoad(db)
354 print "Using existing object"
357 print "creating new object"
358 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
365 def __init__(self, id, index, **kwargs):
369 pm = database.dbLoad(self.db)
371 database.dbDump(self.db, pm)
376 Each host has a target set of attributes. Some may be set manually,
377 or others are set globally for the preferred target.
380 All nodes in the Alpha or Beta group would have constraints like:
381 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
383 def __init__(self, constraints):
384 self.constraints = constraints
386 def verify(self, data):
388 self.constraints is a list of key, value pairs.
389 # [ {... : ...}==AND , ... , ... , ] == OR
392 for con in self.constraints:
393 #print "con: %s" % con
395 for key in con.keys():
396 #print "looking at key: %s" % key
398 #print "%s %s" % (con[key], data[key])
399 con_and_true = con_and_true & (con[key] in data[key])
400 elif key not in data:
401 print "missing key %s" % key
404 con_or_true = con_or_true | con_and_true
408 class Record(object):
410 def __init__(self, hostname, data):
411 self.hostname = hostname
413 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
414 self.loginbase = self.plcdb_hn2lb[self.hostname]
418 def stageIswaitforever(self):
419 if 'waitforever' in self.data['stage']:
425 category = self.data['category']
426 prev_category = self.data['prev_category']
427 #print "SEVERITY: ", category, prev_category
428 val = cmpCategoryVal(category, prev_category)
432 return self.severity() > 0
434 def end_record(self):
435 return node_end_record(self.hostname)
437 def reset_stage(self):
438 self.data['stage'] = 'findbad'
441 def getCategory(self):
442 return self.data['category'].lower()
445 return self.data['state'].lower()
447 def getDaysDown(cls, diag_record):
449 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
450 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
451 #elif diag_record['comonstats']['sshstatus'] != "null":
452 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
453 #elif diag_record['comonstats']['lastcotop'] != "null":
454 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
457 last_contact = diag_record['plcnode']['last_contact']
458 if last_contact == None:
459 # the node has never been up, so give it a break
462 diff = now - last_contact
463 daysdown = diff // (60*60*24)
465 getDaysDown = classmethod(getDaysDown)
467 def getStrDaysDown(cls, diag_record):
469 last_contact = diag_record['plcnode']['last_contact']
470 date_created = diag_record['plcnode']['date_created']
472 if diag_record['comonstats']['uptime'] != "null" and \
473 diag_record['comonstats']['uptime'] != "-1":
474 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
475 daysdown = "%d days up" % daysdown
477 elif last_contact is None:
478 if date_created is not None:
480 diff = now - date_created
481 daysdown = diff // (60*60*24)
482 daysdown = "Never contacted PLC, created %s days ago" % daysdown
484 daysdown = "Never contacted PLC"
487 diff = now - last_contact
488 daysdown = diff // (60*60*24)
489 daysdown = "%s days down" % daysdown
491 getStrDaysDown = classmethod(getStrDaysDown)
493 #def getStrDaysDown(cls, diag_record):
494 # daysdown = cls.getDaysDown(diag_record)
496 # return "%d days down"%daysdown
497 # elif daysdown == -1:
498 # return "Never online"
500 # return "%d days up"% -daysdown
501 #getStrDaysDown = classmethod(getStrDaysDown)
503 def takeAction(self, index=0):
504 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
505 if 'improvement' in self.data['stage'] or self.improved() or \
506 'monitor-end-record' in self.data['stage']:
507 print "takeAction: decreasing penalty for %s"%self.hostname
511 print "takeAction: increasing penalty for %s"%self.hostname
514 pp.apply(self.hostname)
517 def _format_diaginfo(self):
518 info = self.data['info']
519 print "FORMAT : STAGE: ", self.data['stage']
520 if self.data['stage'] == 'monitor-end-record':
521 if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
522 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
524 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
526 def saveAction(self):
527 if 'save-act-all' in self.data and self.data['save-act-all'] == True:
532 def getMessage(self, ticket_id=None):
533 self.data['args']['hostname'] = self.hostname
534 self.data['args']['loginbase'] = self.loginbase
535 self.data['args']['hostname_list'] = self._format_diaginfo()
536 #print self.data['message']
537 if self.data['message']:
538 message = PersistMessage(self.hostname,
539 self.data['message'][0] % self.data['args'],
540 self.data['message'][1] % self.data['args'],
541 True, db='monitor_persistmessages',
543 if self.data['stage'] == "improvement":
549 def getContacts(self):
550 roles = self.data['email']
552 if not config.mail and not config.debug and config.bcc:
554 if config.mail and config.debug:
560 contacts += [config.email]
562 contacts += [TECHEMAIL % self.loginbase]
564 contacts += [PIEMAIL % self.loginbase]
566 slices = plc.slices(self.loginbase)
569 contacts += [SLICEMAIL % slice]
570 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
572 print "SLIC: %20s : 0 slices" % self.loginbase
578 def __init__(self, hostname, target):
579 self.hostname = hostname
582 #if hostname in fb['nodes']:
583 # self.data = fb['nodes'][hostname]['values']
585 # raise Exception("Hostname not in scan database")
587 def stageIswaitforever(self):
588 if 'waitforever' in self.data['stage']:
594 category = self.data['category']
595 prev_category = self.data['prev_category']
596 print "IMPROVED: ", category, prev_category
597 val = cmpCategoryVal(category, prev_category)
601 return self.severity() > 0
603 def end_record(self):
604 return node_end_record(self.hostname)
606 def reset_stage(self):
607 self.data['stage'] = 'findbad'
610 def open_tickets(self):
611 if self.ticket and self.ticket.status['status'] == 'open':
614 def setIntrospect(self):
617 def email_notice(self):
618 message = self._get_message_for_condition()
619 message.send(self._get_contacts_for_condition())
621 def close_ticket(self):
623 self.ticket.closeTicket()
625 def exempt_from_penalties(self):
626 bl = database.dbLoad("l_blacklist")
627 return self.hostname in bl
631 def escellate_penalty(self):
633 def reduce_penalty(self):
638 return self.target.verify(self.data)
640 def _get_condition(self):
641 return self.data['category'].lower()
643 def _get_stage(self):
646 "secondnotice_noslicecreation"
647 "thirdnotice_disableslices"
649 delta = current_time - self.data['time']
651 def _get_message_for_condition(self):
653 def _get_contacts_for_condition(self):
656 class Action(MonRecord):
657 def __init__(self, host, data):
659 MonRecord.__init__(self, data)
662 def deltaDays(self, delta):
663 t = datetime.fromtimestamp(self.__dict__['time'])
664 d = t + timedelta(delta)
665 self.__dict__['time'] = time.mktime(d.timetuple())
667 def node_end_record(node):
668 act_all = database.dbLoad("act_all")
669 if node not in act_all:
673 if len(act_all[node]) == 0:
677 pm = database.dbLoad("monitor_persistmessages")
682 print "deleting node record"
684 database.dbDump("monitor_persistmessages", pm)
686 a = Action(node, act_all[node][0])
688 a.delField('found_rt_ticket')
689 a.delField('second-mail-at-oneweek')
690 a.delField('second-mail-at-twoweeks')
691 a.delField('first-found')
693 rec['action'] = ["close_rt"]
694 rec['category'] = "ALPHA" # assume that it's up...
695 rec['stage'] = "monitor-end-record"
696 rec['ticket_id'] = None
697 rec['time'] = time.time() - 7*60*60*24
698 act_all[node].insert(0,rec)
699 database.dbDump("act_all", act_all)
703 if __name__ == "__main__":
705 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
706 #from emailTxt import mailtxt
708 #database.dbDump("persistmessages", {});
709 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
710 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
711 #m.send(['soltesz@cs.utk.edu'])
712 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
713 # TRICK timer to thinking some time has passed.
714 #m.actiontracker.time = time.time() - 6*60*60*24
715 #m.send(['soltesz@cs.utk.edu'])