3 from monitor import database
5 from monitor.wrapper import plc, plccache
6 from monitor.wrapper import mailer
10 from monitor.const import *
11 from monitor import util
12 from monitor import config
14 def gethostlist(hostlist_file):
15 return util.file.getListFromFile(hostlist_file)
17 def array_to_priority_map(array):
18 """ Create a mapping where each entry of array is given a priority equal
19 to its position in the array. This is useful for subsequent use in the
28 def cmpValMap(v1, v2, map):
29 if v1 in map and v2 in map and map[v1] < map[v2]:
31 elif v1 in map and v2 in map and map[v1] > map[v2]:
33 elif v1 in map and v2 in map:
36 raise Exception("No index %s or %s in map" % (v1, v2))
38 def cmpCategoryVal(v1, v2):
39 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
40 return cmpValMap(v1,v2,map)
44 def __init__(self, hostname):
45 self.hostname = hostname
51 def previous_attempt(self):
53 def setValidMapping(self):
57 def __init__(self, key, valuepattern, action):
64 # connect one penalty to another, in a FSM diagram. After one
65 # condition/penalty is applied, move to the next phase.
69 def __init__(self, ticket_id = None):
70 self.ticket_id = ticket_id
72 print "getting ticket status",
73 self.status = mailer.getTicketStatus(self.ticket_id)
76 def setTicketStatus(self, status):
77 mailer.setTicketStatus(self.ticket_id, status)
78 self.status = mailer.getTicketStatus(self.ticket_id)
81 def getTicketStatus(self):
83 self.status = mailer.getTicketStatus(self.ticket_id)
86 def closeTicket(self):
87 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.")
89 def email(self, subject, body, to):
90 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
93 class Message(object):
94 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
96 self.subject = subject
97 self.message = message
98 self.rt = RT(ticket_id)
102 return self.rt.email(self.subject, self.message, to)
104 return mailer.email(self.subject, self.message, to)
106 class Recent(object):
107 def __init__(self, withintime):
108 self.withintime = withintime
111 self.time = self.__getattribute__('time')
113 self.time = time.time()- 7*24*60*60
115 #self.time = time.time()
116 #self.action_taken = False
119 if self.time + self.withintime < time.time():
120 self.action_taken = False
122 if self.time + self.withintime > time.time() and self.action_taken:
127 def unsetRecent(self):
128 self.action_taken = False
129 self.time = time.time()
133 self.action_taken = True
134 self.time = time.time()
137 class PersistFlags(Recent):
138 def __new__(typ, id, *args, **kwargs):
146 pm = database.dbLoad(db)
148 database.dbDump(db, {})
149 pm = database.dbLoad(db)
154 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
155 for key in kwargs.keys():
156 obj.__setattr__(key, kwargs[key])
157 obj.time = time.time()
158 obj.action_taken = False
163 def __init__(self, id, withintime, **kwargs):
165 Recent.__init__(self, withintime)
168 pm = database.dbLoad(self.db)
170 database.dbDump(self.db, pm)
172 def resetFlag(self, name):
173 self.__setattr__(name, False)
175 def setFlag(self, name):
176 self.__setattr__(name, True)
178 def getFlag(self, name):
180 return self.__getattribute__(name)
182 self.__setattr__(name, False)
185 def resetRecentFlag(self, name):
189 def setRecentFlag(self, name):
193 def getRecentFlag(self, name):
194 # if recent and flag set -> true
197 return self.isRecent() & self.__getattribute__(name)
199 self.__setattr__(name, False)
202 def checkattr(self, name):
204 x = self.__getattribute__(name)
210 class PersistMessage(Message):
211 def __new__(typ, id, subject, message, via_rt, **kwargs):
215 db = "persistmessages"
218 pm = database.dbLoad(db)
220 database.dbDump(db, {})
221 pm = database.dbLoad(db)
225 #print "Using existing object"
228 #print "creating new object"
229 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
231 obj.actiontracker = Recent(1*60*60*24)
234 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
235 obj.ticket_id = kwargs['ticket_id']
240 def __init__(self, id, subject, message, via_rt=True, **kwargs):
241 print "initializing object: %s" % self.ticket_id
243 Message.__init__(self, subject, message, via_rt, self.ticket_id)
246 self.actiontracker.unsetRecent()
249 pm = database.dbLoad(self.db)
251 database.dbDump(self.db, pm)
254 if not self.actiontracker.isRecent():
255 self.ticket_id = Message.send(self, to)
256 self.actiontracker.setRecent()
259 # NOTE: only send a new message every week, regardless.
260 # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window.
261 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
263 class MonitorMessage(object):
264 def __new__(typ, id, *args, **kwargs):
268 db = "monitormessages"
271 if 'reset' in kwargs and kwargs['reset'] == True:
272 database.dbDump(db, {})
273 pm = database.dbLoad(db)
275 database.dbDump(db, {})
276 pm = database.dbLoad(db)
280 print "Using existing object"
283 print "creating new object"
284 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
286 obj.sp = PersistSitePenalty(id, 0)
291 def __init__(self, id, message):
295 class SitePenalty(object):
297 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
298 'disable' : lambda host: None } )
299 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
300 'disable' : lambda host: plc.enableSliceCreation(host) } )
301 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
302 'disable' : lambda host: plc.enableSlices(host) } )
304 #def __init__(self, index=0, **kwargs):
307 def get_penalties(self):
308 # TODO: get penalties actually applied to a node from PLC DB.
309 return [ n['name'] for n in SitePenalty.penalty_map ]
312 self.index = self.index + 1
313 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
317 self.index = self.index - 1
318 if self.index < 0: self.index = 0
321 def apply(self, host):
323 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
324 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
325 SitePenalty.penalty_map[i]['disable'](host)
327 for i in range(0,self.index+1):
328 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
329 SitePenalty.penalty_map[i]['enable'](host)
335 class PersistSitePenalty(SitePenalty):
336 def __new__(typ, id, index, **kwargs):
340 db = "persistpenalties"
343 if 'reset' in kwargs and kwargs['reset'] == True:
344 database.dbDump(db, {})
345 pm = database.dbLoad(db)
347 database.dbDump(db, {})
348 pm = database.dbLoad(db)
352 print "Using existing object"
355 print "creating new object"
356 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
363 def __init__(self, id, index, **kwargs):
367 pm = database.dbLoad(self.db)
369 database.dbDump(self.db, pm)
374 Each host has a target set of attributes. Some may be set manually,
375 or others are set globally for the preferred target.
378 All nodes in the Alpha or Beta group would have constraints like:
379 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
381 def __init__(self, constraints):
382 self.constraints = constraints
384 def verify(self, data):
386 self.constraints is a list of key, value pairs.
387 # [ {... : ...}==AND , ... , ... , ] == OR
390 for con in self.constraints:
391 #print "con: %s" % con
393 for key in con.keys():
394 #print "looking at key: %s" % key
396 #print "%s %s" % (con[key], data[key])
397 con_and_true = con_and_true & (con[key] in data[key])
398 elif key not in data:
399 print "missing key %s" % key
402 con_or_true = con_or_true | con_and_true
406 class Record(object):
408 def __init__(self, hostname, data):
409 self.hostname = hostname
411 self.plcdb_hn2lb = plccache.plcdb_hn2lb
412 self.loginbase = self.plcdb_hn2lb[self.hostname]
416 def stageIswaitforever(self):
417 if 'waitforever' in self.data['stage']:
423 category = self.data['category']
424 prev_category = self.data['prev_category']
425 #print "SEVERITY: ", category, prev_category
426 val = cmpCategoryVal(category, prev_category)
430 return self.severity() > 0
432 def end_record(self):
433 return node_end_record(self.hostname)
435 def reset_stage(self):
436 self.data['stage'] = 'findbad'
439 def getCategory(self):
440 return self.data['category'].lower()
443 return self.data['state'].lower()
445 def getDaysDown(cls, diag_record):
447 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
448 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
449 #elif diag_record['comonstats']['sshstatus'] != "null":
450 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
451 #elif diag_record['comonstats']['lastcotop'] != "null":
452 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
455 last_contact = diag_record['plcnode']['last_contact']
456 if last_contact == None:
457 # the node has never been up, so give it a break
460 diff = now - last_contact
461 daysdown = diff // (60*60*24)
463 getDaysDown = classmethod(getDaysDown)
465 def getStrDaysDown(cls, diag_record):
467 last_contact = diag_record['plcnode']['last_contact']
468 date_created = diag_record['plcnode']['date_created']
470 if diag_record['comonstats']['uptime'] != "null" and \
471 diag_record['comonstats']['uptime'] != "-1":
472 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
473 daysdown = "%d days up" % daysdown
475 elif last_contact is None:
476 if date_created is not None:
478 diff = now - date_created
479 daysdown = diff // (60*60*24)
480 daysdown = "Never contacted PLC, created %s days ago" % daysdown
482 daysdown = "Never contacted PLC"
485 diff = now - last_contact
486 daysdown = diff // (60*60*24)
487 daysdown = "%s days down" % daysdown
489 getStrDaysDown = classmethod(getStrDaysDown)
491 def getSendEmailFlag(self):
495 # resend if open & created longer than 30 days ago.
496 if 'rt' in self.data and \
497 'Status' in self.data['rt'] and \
498 "open" in self.data['rt']['Status'] and \
499 self.data['rt']['Created'] > int(time.time() - 60*60*24*30):
500 # if created-time is greater than the thirty days ago from the current time
505 def getMostRecentStage(self):
506 lastact = self.data['last_action_record']
509 def getMostRecentTime(self):
510 lastact = self.data['last_action_record']
511 return lastact.date_action_taken
513 def takeAction(self, index=0):
514 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
515 if 'improvement' in self.data['stage'] or self.improved() or \
516 'monitor-end-record' in self.data['stage']:
517 print "takeAction: decreasing penalty for %s"%self.hostname
521 print "takeAction: increasing penalty for %s"%self.hostname
524 pp.apply(self.hostname)
527 def _format_diaginfo(self):
528 info = self.data['info']
529 print "FORMAT : STAGE: ", self.data['stage']
530 if self.data['stage'] == 'monitor-end-record':
531 if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
532 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
534 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
536 def saveAction(self):
537 if 'save_act_all' in self.data and self.data['save_act_all'] == True:
542 def getMessage(self, ticket_id=None):
543 self.data['args']['hostname'] = self.hostname
544 self.data['args']['loginbase'] = self.loginbase
545 self.data['args']['hostname_list'] = self._format_diaginfo()
546 #print self.data['message']
547 if self.data['message']:
548 message = PersistMessage(self.hostname,
549 self.data['message'][0] % self.data['args'],
550 self.data['message'][1] % self.data['args'],
551 True, db='monitor_persistmessages',
553 if self.data['stage'] == "improvement":
559 def getContacts(self):
560 roles = self.data['email']
562 if not config.mail and not config.debug and config.bcc:
564 if config.mail and config.debug:
570 contacts += [config.email]
572 contacts += [TECHEMAIL % self.loginbase]
574 contacts += [PIEMAIL % self.loginbase]
576 slices = plc.slices(self.loginbase)
579 contacts += [SLICEMAIL % slice]
580 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
582 print "SLIC: %20s : 0 slices" % self.loginbase
588 def __init__(self, hostname, target):
589 self.hostname = hostname
593 class Action(MonRecord):
594 def __init__(self, host, data):
596 MonRecord.__init__(self, data)
599 def deltaDays(self, delta):
600 t = datetime.fromtimestamp(self.__dict__['time'])
601 d = t + timedelta(delta)
602 self.__dict__['time'] = time.mktime(d.timetuple())
604 def node_end_record(node):
605 act_all = database.dbLoad("act_all")
606 if node not in act_all:
610 if len(act_all[node]) == 0:
614 pm = database.dbLoad("monitor_persistmessages")
619 print "deleting node record"
621 database.dbDump("monitor_persistmessages", pm)
623 a = Action(node, act_all[node][0])
625 a.delField('found_rt_ticket')
626 a.delField('second-mail-at-oneweek')
627 a.delField('second-mail-at-twoweeks')
628 a.delField('first-found')
630 rec['action'] = ["close_rt"]
631 rec['category'] = "ALPHA" # assume that it's up...
632 rec['stage'] = "monitor-end-record"
633 rec['ticket_id'] = None
634 rec['time'] = time.time() - 7*60*60*24
635 act_all[node].insert(0,rec)
636 database.dbDump("act_all", act_all)
640 if __name__ == "__main__":
642 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
643 #from emailTxt import mailtxt
645 #database.dbDump("persistmessages", {});
646 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
647 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
648 #m.send(['soltesz@cs.utk.edu'])
649 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
650 # TRICK timer to thinking some time has passed.
651 #m.actiontracker.time = time.time() - 6*60*60*24
652 #m.send(['soltesz@cs.utk.edu'])