3 from monitor import database
16 def gethostlist(hostlist_file):
17 return util.file.getListFromFile(hostlist_file)
19 #nodes = api.GetNodes({'peer_id' : None}, ['hostname'])
20 #return [ n['hostname'] for n in nodes ]
22 def array_to_priority_map(array):
23 """ Create a mapping where each entry of array is given a priority equal
24 to its position in the array. This is useful for subsequent use in the
33 def cmpValMap(v1, v2, map):
34 if v1 in map and v2 in map and map[v1] < map[v2]:
36 elif v1 in map and v2 in map and map[v1] > map[v2]:
38 elif v1 in map and v2 in map:
41 raise Exception("No index %s or %s in map" % (v1, v2))
43 def cmpCategoryVal(v1, v2):
44 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
45 return cmpValMap(v1,v2,map)
49 def __init__(self, hostname):
50 self.hostname = hostname
56 def previous_attempt(self):
58 def setValidMapping(self):
62 def __init__(self, key, valuepattern, action):
69 # connect one penalty to another, in a FSM diagram. After one
70 # condition/penalty is applied, move to the next phase.
73 #fb = database.dbLoad("findbad")
76 def __init__(self, ticket_id = None):
77 self.ticket_id = ticket_id
79 print "getting ticket status",
80 self.status = mailer.getTicketStatus(self.ticket_id)
83 def setTicketStatus(self, status):
84 mailer.setTicketStatus(self.ticket_id, status)
85 self.status = mailer.getTicketStatus(self.ticket_id)
88 def getTicketStatus(self):
90 self.status = mailer.getTicketStatus(self.ticket_id)
93 def closeTicket(self):
94 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.")
96 def email(self, subject, body, to):
97 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
100 class Message(object):
101 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
103 self.subject = subject
104 self.message = message
105 self.rt = RT(ticket_id)
109 return self.rt.email(self.subject, self.message, to)
111 return mailer.email(self.subject, self.message, to)
113 class Recent(object):
114 def __init__(self, withintime):
115 self.withintime = withintime
118 self.time = self.__getattribute__('time')
120 self.time = time.time()- 7*24*60*60
122 #self.time = time.time()
123 #self.action_taken = False
126 if self.time + self.withintime < time.time():
127 self.action_taken = False
129 if self.time + self.withintime > time.time() and self.action_taken:
134 def unsetRecent(self):
135 self.action_taken = False
136 self.time = time.time()
140 self.action_taken = True
141 self.time = time.time()
144 class PersistFlags(Recent):
145 def __new__(typ, id, *args, **kwargs):
153 pm = database.dbLoad(db)
155 database.dbDump(db, {})
156 pm = database.dbLoad(db)
161 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
162 for key in kwargs.keys():
163 obj.__setattr__(key, kwargs[key])
164 obj.time = time.time()
165 obj.action_taken = False
170 def __init__(self, id, withintime, **kwargs):
172 Recent.__init__(self, withintime)
175 pm = database.dbLoad(self.db)
177 database.dbDump(self.db, pm)
179 def resetFlag(self, name):
180 self.__setattr__(name, False)
182 def setFlag(self, name):
183 self.__setattr__(name, True)
185 def getFlag(self, name):
187 return self.__getattribute__(name)
189 self.__setattr__(name, False)
192 def resetRecentFlag(self, name):
196 def setRecentFlag(self, name):
200 def getRecentFlag(self, name):
201 # if recent and flag set -> true
204 return self.isRecent() & self.__getattribute__(name)
206 self.__setattr__(name, False)
209 def checkattr(self, name):
211 x = self.__getattribute__(name)
217 class PersistMessage(Message):
218 def __new__(typ, id, subject, message, via_rt, **kwargs):
222 db = "persistmessages"
225 pm = database.dbLoad(db)
227 database.dbDump(db, {})
228 pm = database.dbLoad(db)
232 #print "Using existing object"
235 #print "creating new object"
236 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
238 obj.actiontracker = Recent(3*60*60*24)
241 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
242 obj.ticket_id = kwargs['ticket_id']
247 def __init__(self, id, subject, message, via_rt=True, **kwargs):
248 print "initializing object: %s" % self.ticket_id
250 Message.__init__(self, subject, message, via_rt, self.ticket_id)
253 self.actiontracker.unsetRecent()
256 pm = database.dbLoad(self.db)
258 database.dbDump(self.db, pm)
261 if not self.actiontracker.isRecent():
262 self.ticket_id = Message.send(self, to)
263 self.actiontracker.setRecent()
266 # NOTE: only send a new message every week, regardless.
267 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
269 class MonitorMessage(object):
270 def __new__(typ, id, *args, **kwargs):
274 db = "monitormessages"
277 if 'reset' in kwargs and kwargs['reset'] == True:
278 database.dbDump(db, {})
279 pm = database.dbLoad(db)
281 database.dbDump(db, {})
282 pm = database.dbLoad(db)
286 print "Using existing object"
289 print "creating new object"
290 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
292 obj.sp = PersistSitePenalty(id, 0)
297 def __init__(self, id, message):
301 class SitePenalty(object):
303 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
304 'disable' : lambda host: None } )
305 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
306 'disable' : lambda host: plc.enableSliceCreation(host) } )
307 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
308 'disable' : lambda host: plc.enableSlices(host) } )
310 #def __init__(self, index=0, **kwargs):
313 def get_penalties(self):
314 # TODO: get penalties actually applied to a node from PLC DB.
315 return [ n['name'] for n in SitePenalty.penalty_map ]
318 self.index = self.index + 1
319 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
323 self.index = self.index - 1
324 if self.index < 0: self.index = 0
327 def apply(self, host):
329 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
330 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
331 SitePenalty.penalty_map[i]['disable'](host)
333 for i in range(0,self.index+1):
334 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
335 SitePenalty.penalty_map[i]['enable'](host)
341 class PersistSitePenalty(SitePenalty):
342 def __new__(typ, id, index, **kwargs):
346 db = "persistpenalties"
349 if 'reset' in kwargs and kwargs['reset'] == True:
350 database.dbDump(db, {})
351 pm = database.dbLoad(db)
353 database.dbDump(db, {})
354 pm = database.dbLoad(db)
358 print "Using existing object"
361 print "creating new object"
362 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
369 def __init__(self, id, index, **kwargs):
373 pm = database.dbLoad(self.db)
375 database.dbDump(self.db, pm)
380 Each host has a target set of attributes. Some may be set manually,
381 or others are set globally for the preferred target.
384 All nodes in the Alpha or Beta group would have constraints like:
385 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
387 def __init__(self, constraints):
388 self.constraints = constraints
390 def verify(self, data):
392 self.constraints is a list of key, value pairs.
393 # [ {... : ...}==AND , ... , ... , ] == OR
396 for con in self.constraints:
397 #print "con: %s" % con
399 for key in con.keys():
400 #print "looking at key: %s" % key
402 #print "%s %s" % (con[key], data[key])
403 con_and_true = con_and_true & (con[key] in data[key])
404 elif key not in data:
405 print "missing key %s" % key
408 con_or_true = con_or_true | con_and_true
412 class Record(object):
414 def __init__(self, hostname, data):
415 self.hostname = hostname
417 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
418 self.loginbase = self.plcdb_hn2lb[self.hostname]
422 def stageIswaitforever(self):
423 if 'waitforever' in self.data['stage']:
429 category = self.data['category']
430 prev_category = self.data['prev_category']
431 #print "SEVERITY: ", category, prev_category
432 val = cmpCategoryVal(category, prev_category)
436 return self.severity() > 0
438 def end_record(self):
439 return node_end_record(self.hostname)
441 def reset_stage(self):
442 self.data['stage'] = 'findbad'
445 def getCategory(self):
446 return self.data['category'].lower()
449 return self.data['state'].lower()
451 def getDaysDown(cls, diag_record):
453 if diag_record['comonstats']['uptime'] != "null":
454 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
455 #elif diag_record['comonstats']['sshstatus'] != "null":
456 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
457 #elif diag_record['comonstats']['lastcotop'] != "null":
458 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
461 last_contact = diag_record['plcnode']['last_contact']
462 if last_contact == None:
463 # the node has never been up, so give it a break
466 diff = now - last_contact
467 daysdown = diff // (60*60*24)
469 getDaysDown = classmethod(getDaysDown)
471 def getStrDaysDown(cls, diag_record):
473 last_contact = diag_record['plcnode']['last_contact']
474 date_created = diag_record['plcnode']['date_created']
476 if diag_record['comonstats']['uptime'] != "null" and \
477 diag_record['comonstats']['uptime'] != "-1":
478 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
479 daysdown = "%d days up" % daysdown
481 elif last_contact is None:
482 if date_created is not None:
484 diff = now - date_created
485 daysdown = diff // (60*60*24)
486 daysdown = "Never contacted PLC, created %s days ago" % daysdown
488 daysdown = "Never contacted PLC"
491 diff = now - last_contact
492 daysdown = diff // (60*60*24)
493 daysdown = "%s days down" % daysdown
495 getStrDaysDown = classmethod(getStrDaysDown)
497 #def getStrDaysDown(cls, diag_record):
498 # daysdown = cls.getDaysDown(diag_record)
500 # return "%d days down"%daysdown
501 # elif daysdown == -1:
502 # return "Never online"
504 # return "%d days up"% -daysdown
505 #getStrDaysDown = classmethod(getStrDaysDown)
507 def takeAction(self):
508 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
509 if 'improvement' in self.data['stage'] or self.improved() or \
510 'monitor-end-record' in self.data['stage']:
511 print "takeAction: decreasing penalty for %s"%self.hostname
515 print "takeAction: increasing penalty for %s"%self.hostname
517 pp.apply(self.hostname)
520 def _format_diaginfo(self):
521 info = self.data['info']
522 print "FORMAT : STAGE: ", self.data['stage']
523 if self.data['stage'] == 'monitor-end-record':
524 if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
525 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
527 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
529 def saveAction(self):
530 if 'save-act-all' in self.data and self.data['save-act-all'] == True:
535 def getMessage(self, ticket_id=None):
536 self.data['args']['hostname'] = self.hostname
537 self.data['args']['loginbase'] = self.loginbase
538 self.data['args']['hostname_list'] = self._format_diaginfo()
539 #print self.data['message']
540 if self.data['message']:
541 message = PersistMessage(self.hostname,
542 self.data['message'][0] % self.data['args'],
543 self.data['message'][1] % self.data['args'],
544 True, db='monitor_persistmessages',
550 def getContacts(self):
551 roles = self.data['email']
553 if not config.mail and not config.debug and config.bcc:
555 if config.mail and config.debug:
561 contacts += [config.email]
563 contacts += [TECHEMAIL % self.loginbase]
565 contacts += [PIEMAIL % self.loginbase]
567 slices = plc.slices(self.loginbase)
570 contacts += [SLICEMAIL % slice]
571 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
573 print "SLIC: %20s : 0 slices" % self.loginbase
579 def __init__(self, hostname, target):
580 self.hostname = hostname
583 #if hostname in fb['nodes']:
584 # self.data = fb['nodes'][hostname]['values']
586 # raise Exception("Hostname not in scan database")
588 def stageIswaitforever(self):
589 if 'waitforever' in self.data['stage']:
595 category = self.data['category']
596 prev_category = self.data['prev_category']
597 print "IMPROVED: ", category, prev_category
598 val = cmpCategoryVal(category, prev_category)
602 return self.severity() > 0
604 def end_record(self):
605 return node_end_record(self.hostname)
607 def reset_stage(self):
608 self.data['stage'] = 'findbad'
611 def open_tickets(self):
612 if self.ticket and self.ticket.status['status'] == 'open':
615 def setIntrospect(self):
618 def email_notice(self):
619 message = self._get_message_for_condition()
620 message.send(self._get_contacts_for_condition())
622 def close_ticket(self):
624 self.ticket.closeTicket()
626 def exempt_from_penalties(self):
627 bl = database.dbLoad("l_blacklist")
628 return self.hostname in bl
632 def escellate_penalty(self):
634 def reduce_penalty(self):
639 return self.target.verify(self.data)
641 def _get_condition(self):
642 return self.data['category'].lower()
644 def _get_stage(self):
647 "secondnotice_noslicecreation"
648 "thirdnotice_disableslices"
650 delta = current_time - self.data['time']
652 def _get_message_for_condition(self):
654 def _get_contacts_for_condition(self):
657 class Action(MonRecord):
658 def __init__(self, host, data):
660 MonRecord.__init__(self, data)
663 def deltaDays(self, delta):
664 t = datetime.fromtimestamp(self.__dict__['time'])
665 d = t + timedelta(delta)
666 self.__dict__['time'] = time.mktime(d.timetuple())
668 def node_end_record(node):
669 act_all = database.dbLoad("act_all")
670 if node not in act_all:
674 if len(act_all[node]) == 0:
678 pm = database.dbLoad("monitor_persistmessages")
683 print "deleting node record"
685 database.dbDump("monitor_persistmessages", pm)
687 a = Action(node, act_all[node][0])
689 a.delField('found_rt_ticket')
690 a.delField('second-mail-at-oneweek')
691 a.delField('second-mail-at-twoweeks')
692 a.delField('first-found')
694 rec['action'] = ["close_rt"]
695 rec['category'] = "ALPHA" # assume that it's up...
696 rec['stage'] = "monitor-end-record"
697 rec['ticket_id'] = None
698 rec['time'] = time.time() - 7*60*60*24
699 act_all[node].insert(0,rec)
700 database.dbDump("act_all", act_all)
704 if __name__ == "__main__":
706 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
707 #from emailTxt import mailtxt
709 #database.dbDump("persistmessages", {});
710 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
711 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
712 #m.send(['soltesz@cs.utk.edu'])
713 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
714 # TRICK timer to thinking some time has passed.
715 #m.actiontracker.time = time.time() - 6*60*60*24
716 #m.send(['soltesz@cs.utk.edu'])