3 from monitor import database
5 from monitor.wrapper import plc, plccache
6 from monitor.wrapper import mailer
10 from monitor.const import *
11 from monitor import util
12 from monitor import config
14 def gethostlist(hostlist_file):
15 return util.file.getListFromFile(hostlist_file)
17 def array_to_priority_map(array):
18 """ Create a mapping where each entry of array is given a priority equal
19 to its position in the array. This is useful for subsequent use in the
28 def cmpValMap(v1, v2, map):
29 if v1 in map and v2 in map and map[v1] < map[v2]:
31 elif v1 in map and v2 in map and map[v1] > map[v2]:
33 elif v1 in map and v2 in map:
36 raise Exception("No index %s or %s in map" % (v1, v2))
38 def cmpCategoryVal(v1, v2):
39 # Terrible hack to manage migration to no more 'ALPHA' states.
40 if v1 == 'ALPHA': v1 = "PROD"
41 if v2 == 'ALPHA': v2 = "PROD"
42 #map = array_to_priority_map([ None, 'PROD', 'ALPHA', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
43 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
44 return cmpValMap(v1,v2,map)
48 def __init__(self, hostname):
49 self.hostname = hostname
55 def previous_attempt(self):
57 def setValidMapping(self):
61 def __init__(self, key, valuepattern, action):
68 # connect one penalty to another, in a FSM diagram. After one
69 # condition/penalty is applied, move to the next phase.
73 def __init__(self, ticket_id = None):
74 self.ticket_id = ticket_id
76 print "getting ticket status",
77 self.status = mailer.getTicketStatus(self.ticket_id)
80 def setTicketStatus(self, status):
81 mailer.setTicketStatus(self.ticket_id, status)
82 self.status = mailer.getTicketStatus(self.ticket_id)
85 def getTicketStatus(self):
87 self.status = mailer.getTicketStatus(self.ticket_id)
90 def closeTicket(self):
91 mailer.closeTicketViaRT(self.ticket_id, "Ticket CLOSED automatically by SiteAssist.")
93 def email(self, subject, body, to):
94 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
97 class Message(object):
98 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
100 self.subject = subject
101 self.message = message
102 self.rt = RT(ticket_id)
106 return self.rt.email(self.subject, self.message, to)
108 return mailer.email(self.subject, self.message, to)
110 class Recent(object):
111 def __init__(self, withintime):
112 self.withintime = withintime
115 self.time = self.__getattribute__('time')
117 self.time = time.time()- 7*24*60*60
119 #self.time = time.time()
120 #self.action_taken = False
123 if self.time + self.withintime < time.time():
124 self.action_taken = False
126 if self.time + self.withintime > time.time() and self.action_taken:
131 def unsetRecent(self):
132 self.action_taken = False
133 self.time = time.time()
137 self.action_taken = True
138 self.time = time.time()
141 class PersistFlags(Recent):
142 def __new__(typ, id, *args, **kwargs):
150 pm = database.dbLoad(db)
152 database.dbDump(db, {})
153 pm = database.dbLoad(db)
158 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
159 for key in kwargs.keys():
160 obj.__setattr__(key, kwargs[key])
161 obj.time = time.time()
162 obj.action_taken = False
167 def __init__(self, id, withintime, **kwargs):
169 Recent.__init__(self, withintime)
172 pm = database.dbLoad(self.db)
174 database.dbDump(self.db, pm)
176 def resetFlag(self, name):
177 self.__setattr__(name, False)
179 def setFlag(self, name):
180 self.__setattr__(name, True)
182 def getFlag(self, name):
184 return self.__getattribute__(name)
186 self.__setattr__(name, False)
189 def resetRecentFlag(self, name):
193 def setRecentFlag(self, name):
197 def getRecentFlag(self, name):
198 # if recent and flag set -> true
201 return self.isRecent() & self.__getattribute__(name)
203 self.__setattr__(name, False)
206 def checkattr(self, name):
208 x = self.__getattribute__(name)
214 class PersistMessage(Message):
215 def __new__(typ, id, subject, message, via_rt, **kwargs):
219 db = "persistmessages"
222 pm = database.dbLoad(db)
224 database.dbDump(db, {})
225 pm = database.dbLoad(db)
229 #print "Using existing object"
232 #print "creating new object"
233 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
235 obj.actiontracker = Recent(1*60*60*24)
238 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
239 obj.ticket_id = kwargs['ticket_id']
244 def __init__(self, id, subject, message, via_rt=True, **kwargs):
245 print "initializing object: %s" % self.ticket_id
247 Message.__init__(self, subject, message, via_rt, self.ticket_id)
250 self.actiontracker.unsetRecent()
253 pm = database.dbLoad(self.db)
255 database.dbDump(self.db, pm)
258 if not self.actiontracker.isRecent():
259 self.ticket_id = Message.send(self, to)
260 self.actiontracker.setRecent()
263 # NOTE: only send a new message every week, regardless.
264 # NOTE: can cause thank-you messages to be lost, for instance when node comes back online within window.
265 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // (60*60*24))
267 class MonitorMessage(object):
268 def __new__(typ, id, *args, **kwargs):
272 db = "monitormessages"
275 if 'reset' in kwargs and kwargs['reset'] == True:
276 database.dbDump(db, {})
277 pm = database.dbLoad(db)
279 database.dbDump(db, {})
280 pm = database.dbLoad(db)
284 print "Using existing object"
287 print "creating new object"
288 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
290 obj.sp = PersistSitePenalty(id, 0)
295 def __init__(self, id, message):
299 class SitePenalty(object):
301 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
302 'disable' : lambda host: None } )
303 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
304 'disable' : lambda host: plc.enableSliceCreation(host) } )
305 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
306 'disable' : lambda host: plc.enableSlices(host) } )
308 #def __init__(self, index=0, **kwargs):
311 def get_penalties(self):
312 # TODO: get penalties actually applied to a node from PLC DB.
313 return [ n['name'] for n in SitePenalty.penalty_map ]
316 self.index = self.index + 1
317 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
321 self.index = self.index - 1
322 if self.index < 0: self.index = 0
325 def apply(self, host):
327 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
328 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
329 SitePenalty.penalty_map[i]['disable'](host)
331 for i in range(0,self.index+1):
332 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
333 SitePenalty.penalty_map[i]['enable'](host)
339 class PersistSitePenalty(SitePenalty):
340 def __new__(typ, id, index, **kwargs):
344 db = "persistpenalties"
347 if 'reset' in kwargs and kwargs['reset'] == True:
348 database.dbDump(db, {})
349 pm = database.dbLoad(db)
351 database.dbDump(db, {})
352 pm = database.dbLoad(db)
356 print "Using existing object"
359 print "creating new object"
360 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
367 def __init__(self, id, index, **kwargs):
371 pm = database.dbLoad(self.db)
373 database.dbDump(self.db, pm)
378 Each host has a target set of attributes. Some may be set manually,
379 or others are set globally for the preferred target.
382 All nodes in the Alpha or Beta group would have constraints like:
383 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
385 def __init__(self, constraints):
386 self.constraints = constraints
388 def verify(self, data):
390 self.constraints is a list of key, value pairs.
391 # [ {... : ...}==AND , ... , ... , ] == OR
394 for con in self.constraints:
395 #print "con: %s" % con
397 for key in con.keys():
398 #print "looking at key: %s" % key
400 #print "%s %s" % (con[key], data[key])
401 con_and_true = con_and_true & (con[key] in data[key])
402 elif key not in data:
403 print "missing key %s" % key
406 con_or_true = con_or_true | con_and_true
410 class Record(object):
412 def __init__(self, hostname, data):
413 self.hostname = hostname
415 self.plcdb_hn2lb = plccache.plcdb_hn2lb
416 self.loginbase = self.plcdb_hn2lb[self.hostname]
420 def stageIswaitforever(self):
421 if 'waitforever' in self.data['stage']:
427 category = self.data['category']
428 prev_category = self.data['prev_category']
429 #print "SEVERITY: ", category, prev_category
430 val = cmpCategoryVal(category, prev_category)
434 return self.severity() > 0
436 def end_record(self):
437 return node_end_record(self.hostname)
439 def reset_stage(self):
440 self.data['stage'] = 'findbad'
443 def getCategory(self):
444 return self.data['category'].lower()
447 return self.data['state'].lower()
449 def getDaysDown(cls, diag_record):
451 if diag_record['comonstats']['uptime'] != "null" and diag_record['comonstats']['uptime'] != "-1":
452 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
453 #elif diag_record['comonstats']['sshstatus'] != "null":
454 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
455 #elif diag_record['comonstats']['lastcotop'] != "null":
456 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
459 last_contact = diag_record['plcnode']['last_contact']
460 if last_contact == None:
461 # the node has never been up, so give it a break
464 diff = now - last_contact
465 daysdown = diff // (60*60*24)
467 getDaysDown = classmethod(getDaysDown)
469 def getStrDaysDown(cls, diag_record):
471 last_contact = diag_record['plcnode']['last_contact']
472 date_created = diag_record['plcnode']['date_created']
474 if diag_record['comonstats']['uptime'] != "null" and \
475 diag_record['comonstats']['uptime'] != "-1":
476 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
477 daysdown = "%d days up" % daysdown
479 elif last_contact is None:
480 if date_created is not None:
482 diff = now - date_created
483 daysdown = diff // (60*60*24)
484 daysdown = "Never contacted PLC, created %s days ago" % daysdown
486 daysdown = "Never contacted PLC"
489 diff = now - last_contact
490 daysdown = diff // (60*60*24)
491 daysdown = "%s days down" % daysdown
493 getStrDaysDown = classmethod(getStrDaysDown)
495 def getSendEmailFlag(self):
499 # resend if open & created longer than 30 days ago.
500 if 'rt' in self.data and \
501 'Status' in self.data['rt'] and \
502 "open" in self.data['rt']['Status'] and \
503 self.data['rt']['Created'] > int(time.time() - 60*60*24*30):
504 # if created-time is greater than the thirty days ago from the current time
509 def getMostRecentStage(self):
510 lastact = self.data['last_action_record']
513 def getMostRecentTime(self):
514 lastact = self.data['last_action_record']
515 return lastact.date_action_taken
517 def takeAction(self, index=0):
518 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
519 if 'improvement' in self.data['stage'] or self.improved() or \
520 'monitor-end-record' in self.data['stage']:
521 print "takeAction: decreasing penalty for %s"%self.hostname
525 print "takeAction: increasing penalty for %s"%self.hostname
528 pp.apply(self.hostname)
531 def _format_diaginfo(self):
532 info = self.data['info']
533 print "FORMAT : STAGE: ", self.data['stage']
534 if self.data['stage'] == 'monitor-end-record':
535 if info[2] == "ALPHA": info = (info[0], info[1], "PROD")
536 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
538 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
540 def saveAction(self):
541 if 'save_act_all' in self.data and self.data['save_act_all'] == True:
546 def getMessage(self, ticket_id=None):
547 self.data['args']['hostname'] = self.hostname
548 self.data['args']['loginbase'] = self.loginbase
549 self.data['args']['hostname_list'] = self._format_diaginfo()
550 #print self.data['message']
551 if self.data['message']:
552 message = PersistMessage(self.hostname,
553 self.data['message'][0] % self.data['args'],
554 self.data['message'][1] % self.data['args'],
555 True, db='monitor_persistmessages',
557 if self.data['stage'] == "improvement":
563 def getContacts(self):
564 roles = self.data['email']
566 if not config.mail and not config.debug and config.bcc:
568 if config.mail and config.debug:
574 contacts += [config.email]
576 #contacts += [TECHEMAIL % self.loginbase]
577 contacts += plc.getTechEmails(self.loginbase)
579 #contacts += [PIEMAIL % self.loginbase]
580 contacts += plc.getSliceUserEmails(self.loginbase)
582 contacts += plc.getSliceUserEmails(self.loginbase)
583 slices = plc.slices(self.loginbase)
585 #for slice in slices:
586 # contacts += [SLICEMAIL % slice]
587 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
589 print "SLIC: %20s : 0 slices" % self.loginbase
595 def __init__(self, hostname, target):
596 self.hostname = hostname
600 class Action(MonRecord):
601 def __init__(self, host, data):
603 MonRecord.__init__(self, data)
606 def deltaDays(self, delta):
607 t = datetime.fromtimestamp(self.__dict__['time'])
608 d = t + timedelta(delta)
609 self.__dict__['time'] = time.mktime(d.timetuple())
611 def node_end_record(node):
612 act_all = database.dbLoad("act_all")
613 if node not in act_all:
617 if len(act_all[node]) == 0:
621 pm = database.dbLoad("monitor_persistmessages")
626 print "deleting node record"
628 database.dbDump("monitor_persistmessages", pm)
630 a = Action(node, act_all[node][0])
632 a.delField('found_rt_ticket')
633 a.delField('second-mail-at-oneweek')
634 a.delField('second-mail-at-twoweeks')
635 a.delField('first-found')
637 rec['action'] = ["close_rt"]
638 rec['category'] = "ALPHA" # assume that it's up...
639 rec['stage'] = "monitor-end-record"
640 rec['ticket_id'] = None
641 rec['time'] = time.time() - 7*60*60*24
642 act_all[node].insert(0,rec)
643 database.dbDump("act_all", act_all)
647 if __name__ == "__main__":
649 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
650 #from emailTxt import mailtxt
652 #database.dbDump("persistmessages", {});
653 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
654 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
655 #m.send(['soltesz@cs.utk.edu'])
656 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
657 # TRICK timer to thinking some time has passed.
658 #m.actiontracker.time = time.time() - 6*60*60*24
659 #m.send(['soltesz@cs.utk.edu'])