7 api = plc.PLC(auth.auth, auth.plc)
11 from nodecommon import *
15 def gethostlist(hostlist_file):
17 return config.getListFromFile(hostlist_file)
19 #nodes = api.GetNodes({'peer_id' : None}, ['hostname'])
20 #return [ n['hostname'] for n in nodes ]
22 def array_to_priority_map(array):
23 """ Create a mapping where each entry of array is given a priority equal
24 to its position in the array. This is useful for subsequent use in the
33 def cmpValMap(v1, v2, map):
34 if v1 in map and v2 in map and map[v1] < map[v2]:
36 elif v1 in map and v2 in map and map[v1] > map[v2]:
38 elif v1 in map and v2 in map:
41 raise Exception("No index %s or %s in map" % (v1, v2))
43 def cmpCategoryVal(v1, v2):
44 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
45 return cmpValMap(v1,v2,map)
49 def __init__(self, hostname):
50 self.hostname = hostname
56 def previous_attempt(self):
58 def setValidMapping(self):
62 def __init__(self, key, valuepattern, action):
69 # connect one penalty to another, in a FSM diagram. After one
70 # condition/penalty is applied, move to the next phase.
73 fb = soltesz.dbLoad("findbad")
76 def __init__(self, ticket_id = None):
77 self.ticket_id = ticket_id
79 print "getting ticket status",
80 self.status = mailer.getTicketStatus(self.ticket_id)
83 def setTicketStatus(self, status):
84 mailer.setTicketStatus(self.ticket_id, status)
85 self.status = mailer.getTicketStatus(self.ticket_id)
88 def getTicketStatus(self):
90 self.status = mailer.getTicketStatus(self.ticket_id)
93 def closeTicket(self):
94 mailer.closeTicketViaRT(self.ticket_id)
96 def email(self, subject, body, to):
97 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
100 class Message(object):
101 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
103 self.subject = subject
104 self.message = message
105 self.rt = RT(ticket_id)
109 return self.rt.email(self.subject, self.message, to)
111 return mailer.email(self.subject, self.message, to)
113 class Recent(object):
114 def __init__(self, withintime):
115 self.withintime = withintime
118 self.time = self.__getattribute__('time')
120 self.time = time.time()- 7*24*60*60
122 #self.time = time.time()
123 #self.action_taken = False
126 if self.time + self.withintime < time.time():
127 self.action_taken = False
129 if self.time + self.withintime > time.time() and self.action_taken:
134 def unsetRecent(self):
135 self.action_taken = False
136 self.time = time.time()
140 self.action_taken = True
141 self.time = time.time()
144 class PersistFlags(Recent):
145 def __new__(typ, id, *args, **kwargs):
153 pm = soltesz.dbLoad(db)
155 soltesz.dbDump(db, {})
156 pm = soltesz.dbLoad(db)
161 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
162 for key in kwargs.keys():
163 obj.__setattr__(key, kwargs[key])
164 obj.time = time.time()
165 obj.action_taken = False
170 def __init__(self, id, withintime, **kwargs):
172 Recent.__init__(self, withintime)
175 pm = soltesz.dbLoad(self.db)
177 soltesz.dbDump(self.db, pm)
179 def resetFlag(self, name):
180 self.__setattr__(name, False)
182 def setFlag(self, name):
183 self.__setattr__(name, True)
185 def getFlag(self, name):
187 return self.__getattribute__(name)
189 self.__setattr__(name, False)
192 def resetRecentFlag(self, name):
196 def setRecentFlag(self, name):
200 def getRecentFlag(self, name):
201 # if recent and flag set -> true
204 return self.isRecent() & self.__getattribute__(name)
206 self.__setattr__(name, False)
209 def checkattr(self, name):
211 x = self.__getattribute__(name)
217 class PersistMessage(Message):
218 def __new__(typ, id, subject, message, via_rt, **kwargs):
222 db = "persistmessages"
225 pm = soltesz.dbLoad(db)
227 soltesz.dbDump(db, {})
228 pm = soltesz.dbLoad(db)
232 print "Using existing object"
235 print "creating new object"
236 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
238 obj.actiontracker = Recent(3*60*60*24)
241 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
242 obj.ticket_id = kwargs['ticket_id']
247 def __init__(self, id, subject, message, via_rt=True, **kwargs):
248 print "initializing object: %s" % self.ticket_id
250 Message.__init__(self, subject, message, via_rt, self.ticket_id)
253 self.actiontracker.unsetRecent()
256 if not self.actiontracker.isRecent():
257 self.ticket_id = Message.send(self, to)
258 self.actiontracker.setRecent()
260 #print "recording object for persistance"
261 pm = soltesz.dbLoad(self.db)
263 soltesz.dbDump(self.db, pm)
265 # NOTE: only send a new message every week, regardless.
266 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // 60*60*24)
268 class MonitorMessage(object):
269 def __new__(typ, id, *args, **kwargs):
273 db = "monitormessages"
276 if 'reset' in kwargs and kwargs['reset'] == True:
277 soltesz.dbDump(db, {})
278 pm = soltesz.dbLoad(db)
280 soltesz.dbDump(db, {})
281 pm = soltesz.dbLoad(db)
285 print "Using existing object"
288 print "creating new object"
289 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
291 obj.sp = PersistSitePenalty(id, 0)
296 def __init__(self, id, message):
300 class SitePenalty(object):
302 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
303 'disable' : lambda host: None } )
304 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
305 'disable' : lambda host: plc.enableSliceCreation(host) } )
306 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
307 'disable' : lambda host: plc.enableSlices(host) } )
309 #def __init__(self, index=0, **kwargs):
312 def get_penalties(self):
313 # TODO: get penalties actually applied to a node from PLC DB.
314 return [ n['name'] for n in SitePenalty.penalty_map ]
317 self.index = self.index + 1
318 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
322 self.index = self.index - 1
323 if self.index < 0: self.index = 0
326 def apply(self, host):
328 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
329 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
330 SitePenalty.penalty_map[i]['disable'](host)
332 for i in range(0,self.index+1):
333 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
334 SitePenalty.penalty_map[i]['enable'](host)
340 class PersistSitePenalty(SitePenalty):
341 def __new__(typ, id, index, **kwargs):
345 db = "persistpenalties"
348 if 'reset' in kwargs and kwargs['reset'] == True:
349 soltesz.dbDump(db, {})
350 pm = soltesz.dbLoad(db)
352 soltesz.dbDump(db, {})
353 pm = soltesz.dbLoad(db)
357 print "Using existing object"
360 print "creating new object"
361 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
368 def __init__(self, id, index, **kwargs):
372 pm = soltesz.dbLoad(self.db)
374 soltesz.dbDump(self.db, pm)
379 Each host has a target set of attributes. Some may be set manually,
380 or others are set globally for the preferred target.
383 All nodes in the Alpha or Beta group would have constraints like:
384 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
386 def __init__(self, constraints):
387 self.constraints = constraints
389 def verify(self, data):
391 self.constraints is a list of key, value pairs.
392 # [ {... : ...}==AND , ... , ... , ] == OR
395 for con in self.constraints:
396 #print "con: %s" % con
398 for key in con.keys():
399 #print "looking at key: %s" % key
401 #print "%s %s" % (con[key], data[key])
402 con_and_true = con_and_true & (con[key] in data[key])
403 elif key not in data:
404 print "missing key %s" % key
407 con_or_true = con_or_true | con_and_true
411 class Record(object):
413 def __init__(self, hostname, data):
414 self.hostname = hostname
416 self.plcdb_hn2lb = soltesz.dbLoad("plcdb_hn2lb")
417 self.loginbase = self.plcdb_hn2lb[self.hostname]
421 def stageIswaitforever(self):
422 if 'waitforever' in self.data['stage']:
428 category = self.data['category']
429 prev_category = self.data['prev_category']
430 val = cmpCategoryVal(category, prev_category)
434 return self.severity() > 0
436 def end_record(self):
437 return node_end_record(self.hostname)
439 def reset_stage(self):
440 self.data['stage'] = 'findbad'
443 def getCategory(self):
444 return self.data['category'].lower()
447 return self.data['state'].lower()
449 def getDaysDown(cls, diag_record):
451 if diag_record['comonstats']['uptime'] != "null":
452 #print "uptime %s" % (int(float(diag_record['comonstats']['uptime'])) // (60*60*24))
453 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
454 elif diag_record['comonstats']['sshstatus'] != "null":
455 daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
456 elif diag_record['comonstats']['lastcotop'] != "null":
457 daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
460 last_contact = diag_record['plcnode']['last_contact']
461 if last_contact == None:
462 # the node has never been up, so give it a break
465 diff = now - last_contact
466 daysdown = diff // (60*60*24)
468 getDaysDown = classmethod(getDaysDown)
470 def getStrDaysDown(cls, diag_record):
471 daysdown = cls.getDaysDown(diag_record)
473 return "%d days down"%daysdown
475 return "Unknown number of days"
477 return "%d days up"% -daysdown
478 getStrDaysDown = classmethod(getStrDaysDown)
480 def takeAction(self):
481 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
482 if 'improvement' in self.data['stage'] or self.improved():
483 print "decreasing penalty for %s"%self.hostname
486 print "increasing penalty for %s"%self.hostname
488 pp.apply(self.hostname)
491 def _format_diaginfo(self):
492 info = self.data['info']
493 if self.data['stage'] == 'monitor-end-record':
494 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
496 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
499 def getMessage(self, ticket_id=None):
500 self.data['args']['hostname'] = self.hostname
501 self.data['args']['loginbase'] = self.loginbase
502 self.data['args']['hostname_list'] = self._format_diaginfo()
503 message = PersistMessage(self.hostname,
504 self.data['message'][0] % self.data['args'],
505 self.data['message'][1] % self.data['args'],
506 True, db='monitor_persistmessages',
510 def getContacts(self):
511 from config import config
515 roles = self.data['email']
517 if not config.mail and not config.debug and config.bcc:
519 if config.mail and config.debug:
525 contacts += [config.email]
527 contacts += [TECHEMAIL % self.loginbase]
529 contacts += [PIEMAIL % self.loginbase]
531 slices = plc.slices(self.loginbase)
534 contacts += [SLICEMAIL % slice]
535 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
537 print "SLIC: %20s : 0 slices" % self.loginbase
543 def __init__(self, hostname, target):
544 self.hostname = hostname
547 if hostname in fb['nodes']:
548 self.data = fb['nodes'][hostname]['values']
550 raise Exception("Hostname not in scan database")
552 def stageIswaitforever(self):
553 if 'waitforever' in self.data['stage']:
559 category = self.data['category']
560 prev_category = self.data['prev_category']
561 val = cmpCategoryVal(category, prev_category)
565 return self.severity() > 0
567 def end_record(self):
568 return node_end_record(self.hostname)
570 def reset_stage(self):
571 self.data['stage'] = 'findbad'
574 def open_tickets(self):
575 if self.ticket and self.ticket.status['status'] == 'open':
578 def setIntrospect(self):
581 def email_notice(self):
582 message = self._get_message_for_condition()
583 message.send(self._get_contacts_for_condition())
585 def close_ticket(self):
587 self.ticket.closeTicket()
589 def exempt_from_penalties(self):
590 bl = soltesz.dbLoad("l_blacklist")
591 return self.hostname in bl
595 def escellate_penalty(self):
597 def reduce_penalty(self):
602 return self.target.verify(self.data)
604 def _get_condition(self):
605 return self.data['category'].lower()
607 def _get_stage(self):
610 "secondnotice_noslicecreation"
611 "thirdnotice_disableslices"
613 delta = current_time - self.data['time']
615 def _get_message_for_condition(self):
617 def _get_contacts_for_condition(self):
620 if __name__ == "__main__":
622 #r.email("test", "body of test message", ['soltesz@cs.princeton.edu'])
623 #from emailTxt import mailtxt
625 #soltesz.dbDump("persistmessages", {});
626 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
627 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
628 #m.send(['soltesz@cs.utk.edu'])
629 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
630 # TRICK timer to thinking some time has passed.
631 #m.actiontracker.time = time.time() - 6*60*60*24
632 #m.send(['soltesz@cs.utk.edu'])