3 from monitor import database
14 def gethostlist(hostlist_file):
15 return util.file.getListFromFile(hostlist_file)
17 def array_to_priority_map(array):
18 """ Create a mapping where each entry of array is given a priority equal
19 to its position in the array. This is useful for subsequent use in the
28 def cmpValMap(v1, v2, map):
29 if v1 in map and v2 in map and map[v1] < map[v2]:
31 elif v1 in map and v2 in map and map[v1] > map[v2]:
33 elif v1 in map and v2 in map:
36 raise Exception("No index %s or %s in map" % (v1, v2))
38 def cmpCategoryVal(v1, v2):
39 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
40 return cmpValMap(v1,v2,map)
44 def __init__(self, hostname):
45 self.hostname = hostname
51 def previous_attempt(self):
53 def setValidMapping(self):
57 def __init__(self, key, valuepattern, action):
64 # connect one penalty to another, in a FSM diagram. After one
65 # condition/penalty is applied, move to the next phase.
68 #fb = database.dbLoad("findbad")
71 def __init__(self, ticket_id = None):
72 self.ticket_id = ticket_id
74 print "getting ticket status",
75 self.status = mailer.getTicketStatus(self.ticket_id)
78 def setTicketStatus(self, status):
79 mailer.setTicketStatus(self.ticket_id, status)
80 self.status = mailer.getTicketStatus(self.ticket_id)
83 def getTicketStatus(self):
85 self.status = mailer.getTicketStatus(self.ticket_id)
88 def closeTicket(self):
89 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.")
91 def email(self, subject, body, to):
92 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
95 class Message(object):
96 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
98 self.subject = subject
99 self.message = message
100 self.rt = RT(ticket_id)
104 return self.rt.email(self.subject, self.message, to)
106 return mailer.email(self.subject, self.message, to)
108 class Recent(object):
109 def __init__(self, withintime):
110 self.withintime = withintime
113 self.time = self.__getattribute__('time')
115 self.time = time.time()- 7*24*60*60
117 #self.time = time.time()
118 #self.action_taken = False
121 if self.time + self.withintime < time.time():
122 self.action_taken = False
124 if self.time + self.withintime > time.time() and self.action_taken:
129 def unsetRecent(self):
130 self.action_taken = False
131 self.time = time.time()
135 self.action_taken = True
136 self.time = time.time()
139 class PersistFlags(Recent):
140 def __new__(typ, id, *args, **kwargs):
148 pm = database.dbLoad(db)
150 database.dbDump(db, {})
151 pm = database.dbLoad(db)
156 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
157 for key in kwargs.keys():
158 obj.__setattr__(key, kwargs[key])
159 obj.time = time.time()
160 obj.action_taken = False
165 def __init__(self, id, withintime, **kwargs):
167 Recent.__init__(self, withintime)
170 pm = database.dbLoad(self.db)
172 database.dbDump(self.db, pm)
174 def resetFlag(self, name):
175 self.__setattr__(name, False)
177 def setFlag(self, name):
178 self.__setattr__(name, True)
180 def getFlag(self, name):
182 return self.__getattribute__(name)
184 self.__setattr__(name, False)
187 def resetRecentFlag(self, name):
191 def setRecentFlag(self, name):
195 def getRecentFlag(self, name):
196 # if recent and flag set -> true
199 return self.isRecent() & self.__getattribute__(name)
201 self.__setattr__(name, False)
204 def checkattr(self, name):
206 x = self.__getattribute__(name)
212 class PersistMessage(Message):
213 def __new__(typ, id, subject, message, via_rt, **kwargs):
217 db = "persistmessages"
220 pm = database.dbLoad(db)
222 database.dbDump(db, {})
223 pm = database.dbLoad(db)
227 #print "Using existing object"
230 #print "creating new object"
231 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
233 obj.actiontracker = Recent(3*60*60*24)
236 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
237 obj.ticket_id = kwargs['ticket_id']
242 def __init__(self, id, subject, message, via_rt=True, **kwargs):
243 print "initializing object: %s" % self.ticket_id
245 Message.__init__(self, subject, message, via_rt, self.ticket_id)
248 self.actiontracker.unsetRecent()
251 pm = database.dbLoad(self.db)
253 database.dbDump(self.db, pm)
256 if not self.actiontracker.isRecent():
257 self.ticket_id = Message.send(self, to)
258 self.actiontracker.setRecent()
261 # NOTE: only send a new message every week, regardless.
262 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
264 class MonitorMessage(object):
265 def __new__(typ, id, *args, **kwargs):
269 db = "monitormessages"
272 if 'reset' in kwargs and kwargs['reset'] == True:
273 database.dbDump(db, {})
274 pm = database.dbLoad(db)
276 database.dbDump(db, {})
277 pm = database.dbLoad(db)
281 print "Using existing object"
284 print "creating new object"
285 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
287 obj.sp = PersistSitePenalty(id, 0)
292 def __init__(self, id, message):
296 class SitePenalty(object):
298 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
299 'disable' : lambda host: None } )
300 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
301 'disable' : lambda host: plc.enableSliceCreation(host) } )
302 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
303 'disable' : lambda host: plc.enableSlices(host) } )
305 #def __init__(self, index=0, **kwargs):
308 def get_penalties(self):
309 # TODO: get penalties actually applied to a node from PLC DB.
310 return [ n['name'] for n in SitePenalty.penalty_map ]
313 self.index = self.index + 1
314 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
318 self.index = self.index - 1
319 if self.index < 0: self.index = 0
322 def apply(self, host):
324 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
325 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
326 SitePenalty.penalty_map[i]['disable'](host)
328 for i in range(0,self.index+1):
329 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
330 SitePenalty.penalty_map[i]['enable'](host)
336 class PersistSitePenalty(SitePenalty):
337 def __new__(typ, id, index, **kwargs):
341 db = "persistpenalties"
344 if 'reset' in kwargs and kwargs['reset'] == True:
345 database.dbDump(db, {})
346 pm = database.dbLoad(db)
348 database.dbDump(db, {})
349 pm = database.dbLoad(db)
353 print "Using existing object"
356 print "creating new object"
357 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
364 def __init__(self, id, index, **kwargs):
368 pm = database.dbLoad(self.db)
370 database.dbDump(self.db, pm)
375 Each host has a target set of attributes. Some may be set manually,
376 or others are set globally for the preferred target.
379 All nodes in the Alpha or Beta group would have constraints like:
380 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
382 def __init__(self, constraints):
383 self.constraints = constraints
385 def verify(self, data):
387 self.constraints is a list of key, value pairs.
388 # [ {... : ...}==AND , ... , ... , ] == OR
391 for con in self.constraints:
392 #print "con: %s" % con
394 for key in con.keys():
395 #print "looking at key: %s" % key
397 #print "%s %s" % (con[key], data[key])
398 con_and_true = con_and_true & (con[key] in data[key])
399 elif key not in data:
400 print "missing key %s" % key
403 con_or_true = con_or_true | con_and_true
407 class Record(object):
409 def __init__(self, hostname, data):
410 self.hostname = hostname
412 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
413 self.loginbase = self.plcdb_hn2lb[self.hostname]
417 def stageIswaitforever(self):
418 if 'waitforever' in self.data['stage']:
424 category = self.data['category']
425 prev_category = self.data['prev_category']
426 #print "SEVERITY: ", category, prev_category
427 val = cmpCategoryVal(category, prev_category)
431 return self.severity() > 0
433 def end_record(self):
434 return node_end_record(self.hostname)
436 def reset_stage(self):
437 self.data['stage'] = 'findbad'
440 def getCategory(self):
441 return self.data['category'].lower()
444 return self.data['state'].lower()
446 def getDaysDown(cls, diag_record):
448 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
449 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
450 #elif diag_record['comonstats']['sshstatus'] != "null":
451 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
452 #elif diag_record['comonstats']['lastcotop'] != "null":
453 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
456 last_contact = diag_record['plcnode']['last_contact']
457 if last_contact == None:
458 # the node has never been up, so give it a break
461 diff = now - last_contact
462 daysdown = diff // (60*60*24)
464 getDaysDown = classmethod(getDaysDown)
466 def getStrDaysDown(cls, diag_record):
468 last_contact = diag_record['plcnode']['last_contact']
469 date_created = diag_record['plcnode']['date_created']
471 if diag_record['comonstats']['uptime'] != "null" and \
472 diag_record['comonstats']['uptime'] != "-1":
473 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
474 daysdown = "%d days up" % daysdown
476 elif last_contact is None:
477 if date_created is not None:
479 diff = now - date_created
480 daysdown = diff // (60*60*24)
481 daysdown = "Never contacted PLC, created %s days ago" % daysdown
483 daysdown = "Never contacted PLC"
486 diff = now - last_contact
487 daysdown = diff // (60*60*24)
488 daysdown = "%s days down" % daysdown
490 getStrDaysDown = classmethod(getStrDaysDown)
492 #def getStrDaysDown(cls, diag_record):
493 # daysdown = cls.getDaysDown(diag_record)
495 # return "%d days down"%daysdown
496 # elif daysdown == -1:
497 # return "Never online"
499 # return "%d days up"% -daysdown
500 #getStrDaysDown = classmethod(getStrDaysDown)
502 def takeAction(self, index=0):
503 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
504 if 'improvement' in self.data['stage'] or self.improved() or \
505 'monitor-end-record' in self.data['stage']:
506 print "takeAction: decreasing penalty for %s"%self.hostname
510 print "takeAction: increasing penalty for %s"%self.hostname
513 pp.apply(self.hostname)
516 def _format_diaginfo(self):
517 info = self.data['info']
518 print "FORMAT : STAGE: ", self.data['stage']
519 if self.data['stage'] == 'monitor-end-record':
520 if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
521 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
523 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
525 def saveAction(self):
526 if 'save-act-all' in self.data and self.data['save-act-all'] == True:
531 def getMessage(self, ticket_id=None):
532 self.data['args']['hostname'] = self.hostname
533 self.data['args']['loginbase'] = self.loginbase
534 self.data['args']['hostname_list'] = self._format_diaginfo()
535 #print self.data['message']
536 if self.data['message']:
537 message = PersistMessage(self.hostname,
538 self.data['message'][0] % self.data['args'],
539 self.data['message'][1] % self.data['args'],
540 True, db='monitor_persistmessages',
546 def getContacts(self):
547 roles = self.data['email']
549 if not config.mail and not config.debug and config.bcc:
551 if config.mail and config.debug:
557 contacts += [config.email]
559 contacts += [TECHEMAIL % self.loginbase]
561 contacts += [PIEMAIL % self.loginbase]
563 slices = plc.slices(self.loginbase)
566 contacts += [SLICEMAIL % slice]
567 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
569 print "SLIC: %20s : 0 slices" % self.loginbase
575 def __init__(self, hostname, target):
576 self.hostname = hostname
579 #if hostname in fb['nodes']:
580 # self.data = fb['nodes'][hostname]['values']
582 # raise Exception("Hostname not in scan database")
584 def stageIswaitforever(self):
585 if 'waitforever' in self.data['stage']:
591 category = self.data['category']
592 prev_category = self.data['prev_category']
593 print "IMPROVED: ", category, prev_category
594 val = cmpCategoryVal(category, prev_category)
598 return self.severity() > 0
600 def end_record(self):
601 return node_end_record(self.hostname)
603 def reset_stage(self):
604 self.data['stage'] = 'findbad'
607 def open_tickets(self):
608 if self.ticket and self.ticket.status['status'] == 'open':
611 def setIntrospect(self):
614 def email_notice(self):
615 message = self._get_message_for_condition()
616 message.send(self._get_contacts_for_condition())
618 def close_ticket(self):
620 self.ticket.closeTicket()
622 def exempt_from_penalties(self):
623 bl = database.dbLoad("l_blacklist")
624 return self.hostname in bl
628 def escellate_penalty(self):
630 def reduce_penalty(self):
635 return self.target.verify(self.data)
637 def _get_condition(self):
638 return self.data['category'].lower()
640 def _get_stage(self):
643 "secondnotice_noslicecreation"
644 "thirdnotice_disableslices"
646 delta = current_time - self.data['time']
648 def _get_message_for_condition(self):
650 def _get_contacts_for_condition(self):
653 class Action(MonRecord):
654 def __init__(self, host, data):
656 MonRecord.__init__(self, data)
659 def deltaDays(self, delta):
660 t = datetime.fromtimestamp(self.__dict__['time'])
661 d = t + timedelta(delta)
662 self.__dict__['time'] = time.mktime(d.timetuple())
664 def node_end_record(node):
665 act_all = database.dbLoad("act_all")
666 if node not in act_all:
670 if len(act_all[node]) == 0:
674 pm = database.dbLoad("monitor_persistmessages")
679 print "deleting node record"
681 database.dbDump("monitor_persistmessages", pm)
683 a = Action(node, act_all[node][0])
685 a.delField('found_rt_ticket')
686 a.delField('second-mail-at-oneweek')
687 a.delField('second-mail-at-twoweeks')
688 a.delField('first-found')
690 rec['action'] = ["close_rt"]
691 rec['category'] = "ALPHA" # assume that it's up...
692 rec['stage'] = "monitor-end-record"
693 rec['ticket_id'] = None
694 rec['time'] = time.time() - 7*60*60*24
695 act_all[node].insert(0,rec)
696 database.dbDump("act_all", act_all)
700 if __name__ == "__main__":
702 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
703 #from emailTxt import mailtxt
705 #database.dbDump("persistmessages", {});
706 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
707 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
708 #m.send(['soltesz@cs.utk.edu'])
709 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
710 # TRICK timer to thinking some time has passed.
711 #m.actiontracker.time = time.time() - 6*60*60*24
712 #m.send(['soltesz@cs.utk.edu'])