3 from monitor import database
14 def gethostlist(hostlist_file):
15 return util.file.getListFromFile(hostlist_file)
17 def array_to_priority_map(array):
18 """ Create a mapping where each entry of array is given a priority equal
19 to its position in the array. This is useful for subsequent use in the
28 def cmpValMap(v1, v2, map):
29 if v1 in map and v2 in map and map[v1] < map[v2]:
31 elif v1 in map and v2 in map and map[v1] > map[v2]:
33 elif v1 in map and v2 in map:
36 raise Exception("No index %s or %s in map" % (v1, v2))
38 def cmpCategoryVal(v1, v2):
39 # Terrible hack to manage migration to no more 'ALPHA' states.
40 if v1 == 'ALPHA': v1 = "PROD"
41 if v2 == 'ALPHA': v2 = "PROD"
42 #map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
43 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDPROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
44 return cmpValMap(v1,v2,map)
48 def __init__(self, hostname):
49 self.hostname = hostname
55 def previous_attempt(self):
57 def setValidMapping(self):
61 def __init__(self, key, valuepattern, action):
68 # connect one penalty to another, in a FSM diagram. After one
69 # condition/penalty is applied, move to the next phase.
72 #fb = database.dbLoad("findbad")
75 def __init__(self, ticket_id = None):
76 self.ticket_id = ticket_id
78 print "getting ticket status",
79 self.status = mailer.getTicketStatus(self.ticket_id)
82 def setTicketStatus(self, status):
83 mailer.setTicketStatus(self.ticket_id, status)
84 self.status = mailer.getTicketStatus(self.ticket_id)
87 def getTicketStatus(self):
89 self.status = mailer.getTicketStatus(self.ticket_id)
92 def closeTicket(self):
93 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.")
95 def email(self, subject, body, to):
96 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
99 class Message(object):
100 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
102 self.subject = subject
103 self.message = message
104 self.rt = RT(ticket_id)
108 return self.rt.email(self.subject, self.message, to)
110 return mailer.email(self.subject, self.message, to)
112 class Recent(object):
113 def __init__(self, withintime):
114 self.withintime = withintime
117 self.time = self.__getattribute__('time')
119 self.time = time.time()- 7*24*60*60
121 #self.time = time.time()
122 #self.action_taken = False
125 if self.time + self.withintime < time.time():
126 self.action_taken = False
128 if self.time + self.withintime > time.time() and self.action_taken:
133 def unsetRecent(self):
134 self.action_taken = False
135 self.time = time.time()
139 self.action_taken = True
140 self.time = time.time()
143 class PersistFlags(Recent):
144 def __new__(typ, id, *args, **kwargs):
152 pm = database.dbLoad(db)
154 database.dbDump(db, {})
155 pm = database.dbLoad(db)
160 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
161 for key in kwargs.keys():
162 obj.__setattr__(key, kwargs[key])
163 obj.time = time.time()
164 obj.action_taken = False
169 def __init__(self, id, withintime, **kwargs):
171 Recent.__init__(self, withintime)
174 pm = database.dbLoad(self.db)
176 database.dbDump(self.db, pm)
178 def resetFlag(self, name):
179 self.__setattr__(name, False)
181 def setFlag(self, name):
182 self.__setattr__(name, True)
184 def getFlag(self, name):
186 return self.__getattribute__(name)
188 self.__setattr__(name, False)
191 def resetRecentFlag(self, name):
195 def setRecentFlag(self, name):
199 def getRecentFlag(self, name):
200 # if recent and flag set -> true
203 return self.isRecent() & self.__getattribute__(name)
205 self.__setattr__(name, False)
208 def checkattr(self, name):
210 x = self.__getattribute__(name)
216 class PersistMessage(Message):
217 def __new__(typ, id, subject, message, via_rt, **kwargs):
221 db = "persistmessages"
224 pm = database.dbLoad(db)
226 database.dbDump(db, {})
227 pm = database.dbLoad(db)
231 #print "Using existing object"
234 #print "creating new object"
235 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
237 obj.actiontracker = Recent(1*60*60*24)
240 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
241 obj.ticket_id = kwargs['ticket_id']
246 def __init__(self, id, subject, message, via_rt=True, **kwargs):
247 print "initializing object: %s" % self.ticket_id
249 Message.__init__(self, subject, message, via_rt, self.ticket_id)
252 self.actiontracker.unsetRecent()
255 pm = database.dbLoad(self.db)
257 database.dbDump(self.db, pm)
260 if not self.actiontracker.isRecent():
261 self.ticket_id = Message.send(self, to)
262 self.actiontracker.setRecent()
265 # NOTE: only send a new message every week, regardless.
266 # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window.
267 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
269 class MonitorMessage(object):
270 def __new__(typ, id, *args, **kwargs):
274 db = "monitormessages"
277 if 'reset' in kwargs and kwargs['reset'] == True:
278 database.dbDump(db, {})
279 pm = database.dbLoad(db)
281 database.dbDump(db, {})
282 pm = database.dbLoad(db)
286 print "Using existing object"
289 print "creating new object"
290 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
292 obj.sp = PersistSitePenalty(id, 0)
297 def __init__(self, id, message):
301 class SitePenalty(object):
303 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
304 'disable' : lambda host: None } )
305 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
306 'disable' : lambda host: plc.enableSliceCreation(host) } )
307 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
308 'disable' : lambda host: plc.enableSlices(host) } )
310 #def __init__(self, index=0, **kwargs):
313 def get_penalties(self):
314 # TODO: get penalties actually applied to a node from PLC DB.
315 return [ n['name'] for n in SitePenalty.penalty_map ]
318 self.index = self.index + 1
319 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
323 self.index = self.index - 1
324 if self.index < 0: self.index = 0
327 def apply(self, host):
329 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
330 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
331 SitePenalty.penalty_map[i]['disable'](host)
333 for i in range(0,self.index+1):
334 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
335 SitePenalty.penalty_map[i]['enable'](host)
341 class PersistSitePenalty(SitePenalty):
342 def __new__(typ, id, index, **kwargs):
346 db = "persistpenalties"
349 if 'reset' in kwargs and kwargs['reset'] == True:
350 database.dbDump(db, {})
351 pm = database.dbLoad(db)
353 database.dbDump(db, {})
354 pm = database.dbLoad(db)
358 print "PersistSitePenalty Using existing object"
361 print "creating new object"
362 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
369 def __init__(self, id, index, **kwargs):
373 pm = database.dbLoad(self.db)
375 database.dbDump(self.db, pm)
380 Each host has a target set of attributes. Some may be set manually,
381 or others are set globally for the preferred target.
384 All nodes in the Alpha or Beta group would have constraints like:
385 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
387 def __init__(self, constraints):
388 self.constraints = constraints
390 def verify(self, data):
392 self.constraints is a list of key, value pairs.
393 # [ {... : ...}==AND , ... , ... , ] == OR
396 for con in self.constraints:
397 #print "con: %s" % con
399 for key in con.keys():
400 #print "looking at key: %s" % key
402 #print "%s %s" % (con[key], data[key])
403 con_and_true = con_and_true & (con[key] in data[key])
404 elif key not in data:
405 print "missing key %s" % key
408 con_or_true = con_or_true | con_and_true
412 class Record(object):
414 def __init__(self, hostname, data):
415 self.hostname = hostname
417 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
418 self.loginbase = self.plcdb_hn2lb[self.hostname]
422 def stageIswaitforever(self):
423 if 'waitforever' in self.data['stage']:
429 category = self.data['category']
430 prev_category = self.data['prev_category']
431 print "SEVERITY: ", category, prev_category
433 print "SEVERITY state: ", self.data['state'], self.data['prev_state']
435 print "SEVERITY state: unknown unknown"
436 val = cmpCategoryVal(category, prev_category)
440 return self.severity() > 0
442 def end_record(self):
443 return node_end_record(self.hostname)
445 def reset_stage(self):
446 self.data['stage'] = 'findbad'
449 def getCategory(self):
450 return self.data['category'].lower()
453 return self.data['state'].lower()
455 def getDaysDown(cls, diag_record):
457 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
458 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
459 #elif diag_record['comonstats']['sshstatus'] != "null":
460 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
461 #elif diag_record['comonstats']['lastcotop'] != "null":
462 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
465 last_contact = diag_record['plcnode']['last_contact']
466 if last_contact == None:
467 # the node has never been up, so give it a break
470 diff = now - last_contact
471 daysdown = diff // (60*60*24)
473 getDaysDown = classmethod(getDaysDown)
475 def getStrDaysDown(cls, diag_record):
477 last_contact = diag_record['plcnode']['last_contact']
478 date_created = diag_record['plcnode']['date_created']
480 if diag_record['comonstats']['uptime'] != "null" and \
481 diag_record['comonstats']['uptime'] != "-1":
482 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
483 daysdown = "%d days up" % daysdown
485 elif last_contact is None:
486 if date_created is not None:
488 diff = now - date_created
489 daysdown = diff // (60*60*24)
490 daysdown = "Never contacted PLC, created %s days ago" % daysdown
492 daysdown = "Never contacted PLC"
495 diff = now - last_contact
496 daysdown = diff // (60*60*24)
497 daysdown = "%s days down" % daysdown
499 getStrDaysDown = classmethod(getStrDaysDown)
501 #def getStrDaysDown(cls, diag_record):
502 # daysdown = cls.getDaysDown(diag_record)
504 # return "%d days down"%daysdown
505 # elif daysdown == -1:
506 # return "Never online"
508 # return "%d days up"% -daysdown
509 #getStrDaysDown = classmethod(getStrDaysDown)
511 def takeAction(self, index=0):
512 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
513 if 'improvement' in self.data['stage'] or self.improved() or \
514 'monitor-end-record' in self.data['stage']:
515 print "takeAction: decreasing penalty for %s"%self.hostname
519 print "takeAction: increasing penalty for %s"%self.hostname
521 print "takeAction: applying penalty to %s as index %s"% (self.hostname, index)
523 pp.apply(self.hostname)
526 def _format_diaginfo(self):
527 info = self.data['info']
528 print "FORMAT : STAGE: ", self.data['stage']
529 if self.data['stage'] == 'monitor-end-record':
530 if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
531 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
533 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
535 def saveAction(self):
536 if 'save-act-all' in self.data and self.data['save-act-all'] == True:
541 def getMessage(self, ticket_id=None):
542 self.data['args']['hostname'] = self.hostname
543 self.data['args']['loginbase'] = self.loginbase
544 self.data['args']['hostname_list'] = self._format_diaginfo()
545 #print self.data['message']
546 if self.data['message']:
547 message = PersistMessage(self.hostname,
548 self.data['message'][0] % self.data['args'],
549 self.data['message'][1] % self.data['args'],
550 True, db='monitor_persistmessages',
552 if self.data['stage'] == "improvement":
558 def getContacts(self):
559 roles = self.data['email']
561 if not config.mail and not config.debug and config.bcc:
563 if config.mail and config.debug:
569 contacts += [config.email]
571 #contacts += [TECHEMAIL % self.loginbase]
572 contacts += plc.getTechEmails(self.loginbase)
574 #contacts += [PIEMAIL % self.loginbase]
575 contacts += plc.getPIEmails(self.loginbase)
577 contacts += plc.getSliceUserEmails(self.loginbase)
578 slices = plc.slices(self.loginbase)
580 #for slice in slices:
581 # contacts += [SLICEMAIL % slice]
582 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
584 print "SLIC: %20s : 0 slices" % self.loginbase
590 def __init__(self, hostname, target):
591 self.hostname = hostname
594 #if hostname in fb['nodes']:
595 # self.data = fb['nodes'][hostname]['values']
597 # raise Exception("Hostname not in scan database")
599 def stageIswaitforever(self):
600 if 'waitforever' in self.data['stage']:
606 category = self.data['category']
607 prev_category = self.data['prev_category']
608 print "IMPROVED: ", category, prev_category
609 val = cmpCategoryVal(category, prev_category)
613 return self.severity() > 0
615 def end_record(self):
616 return node_end_record(self.hostname)
618 def reset_stage(self):
619 self.data['stage'] = 'findbad'
622 def open_tickets(self):
623 if self.ticket and self.ticket.status['status'] == 'open':
626 def setIntrospect(self):
629 def email_notice(self):
630 message = self._get_message_for_condition()
631 message.send(self._get_contacts_for_condition())
633 def close_ticket(self):
635 self.ticket.closeTicket()
637 def exempt_from_penalties(self):
638 bl = database.dbLoad("l_blacklist")
639 return self.hostname in bl
643 def escellate_penalty(self):
645 def reduce_penalty(self):
650 return self.target.verify(self.data)
652 def _get_condition(self):
653 return self.data['category'].lower()
655 def _get_stage(self):
658 "secondnotice_noslicecreation"
659 "thirdnotice_disableslices"
661 delta = current_time - self.data['time']
663 def _get_message_for_condition(self):
665 def _get_contacts_for_condition(self):
668 class Action(MonRecord):
669 def __init__(self, host, data):
671 MonRecord.__init__(self, data)
674 def deltaDays(self, delta):
675 t = datetime.fromtimestamp(self.__dict__['time'])
676 d = t + timedelta(delta)
677 self.__dict__['time'] = time.mktime(d.timetuple())
679 def node_end_record(node):
680 act_all = database.dbLoad("act_all")
681 if node not in act_all:
685 if len(act_all[node]) == 0:
689 pm = database.dbLoad("monitor_persistmessages")
694 print "deleting node record"
696 database.dbDump("monitor_persistmessages", pm)
698 a = Action(node, act_all[node][0])
700 a.delField('found_rt_ticket')
701 a.delField('second-mail-at-oneweek')
702 a.delField('second-mail-at-twoweeks')
703 a.delField('first-found')
705 rec['action'] = ["close_rt"]
706 rec['category'] = "ALPHA" # assume that it's up...
707 rec['stage'] = "monitor-end-record"
708 rec['ticket_id'] = None
709 rec['time'] = time.time() - 7*60*60*24
710 act_all[node].insert(0,rec)
711 database.dbDump("act_all", act_all)
715 if __name__ == "__main__":
717 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
718 #from emailTxt import mailtxt
720 #database.dbDump("persistmessages", {});
721 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
722 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
723 #m.send(['soltesz@cs.utk.edu'])
724 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
725 # TRICK timer to thinking some time has passed.
726 #m.actiontracker.time = time.time() - 6*60*60*24
727 #m.send(['soltesz@cs.utk.edu'])