10 from nodecommon import *
14 def gethostlist(hostlist_file):
16 return config.getListFromFile(hostlist_file)
18 #nodes = api.GetNodes({'peer_id' : None}, ['hostname'])
19 #return [ n['hostname'] for n in nodes ]
21 def array_to_priority_map(array):
22 """ Create a mapping where each entry of array is given a priority equal
23 to its position in the array. This is useful for subsequent use in the
32 def cmpValMap(v1, v2, map):
33 if v1 in map and v2 in map and map[v1] < map[v2]:
35 elif v1 in map and v2 in map and map[v1] > map[v2]:
37 elif v1 in map and v2 in map:
40 raise Exception("No index %s or %s in map" % (v1, v2))
42 def cmpCategoryVal(v1, v2):
43 map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
44 return cmpValMap(v1,v2,map)
48 def __init__(self, hostname):
49 self.hostname = hostname
55 def previous_attempt(self):
57 def setValidMapping(self):
61 def __init__(self, key, valuepattern, action):
68 # connect one penalty to another, in a FSM diagram. After one
69 # condition/penalty is applied, move to the next phase.
72 fb = database.dbLoad("findbad")
75 def __init__(self, ticket_id = None):
76 self.ticket_id = ticket_id
78 print "getting ticket status",
79 self.status = mailer.getTicketStatus(self.ticket_id)
82 def setTicketStatus(self, status):
83 mailer.setTicketStatus(self.ticket_id, status)
84 self.status = mailer.getTicketStatus(self.ticket_id)
87 def getTicketStatus(self):
89 self.status = mailer.getTicketStatus(self.ticket_id)
92 def closeTicket(self):
93 mailer.closeTicketViaRT(self.ticket_id)
95 def email(self, subject, body, to):
96 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
99 class Message(object):
100 def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
102 self.subject = subject
103 self.message = message
104 self.rt = RT(ticket_id)
108 return self.rt.email(self.subject, self.message, to)
110 return mailer.email(self.subject, self.message, to)
112 class Recent(object):
113 def __init__(self, withintime):
114 self.withintime = withintime
117 self.time = self.__getattribute__('time')
119 self.time = time.time()- 7*24*60*60
121 #self.time = time.time()
122 #self.action_taken = False
125 if self.time + self.withintime < time.time():
126 self.action_taken = False
128 if self.time + self.withintime > time.time() and self.action_taken:
133 def unsetRecent(self):
134 self.action_taken = False
135 self.time = time.time()
139 self.action_taken = True
140 self.time = time.time()
143 class PersistFlags(Recent):
144 def __new__(typ, id, *args, **kwargs):
152 pm = database.dbLoad(db)
154 database.dbDump(db, {})
155 pm = database.dbLoad(db)
160 obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
161 for key in kwargs.keys():
162 obj.__setattr__(key, kwargs[key])
163 obj.time = time.time()
164 obj.action_taken = False
169 def __init__(self, id, withintime, **kwargs):
171 Recent.__init__(self, withintime)
174 pm = database.dbLoad(self.db)
176 database.dbDump(self.db, pm)
178 def resetFlag(self, name):
179 self.__setattr__(name, False)
181 def setFlag(self, name):
182 self.__setattr__(name, True)
184 def getFlag(self, name):
186 return self.__getattribute__(name)
188 self.__setattr__(name, False)
191 def resetRecentFlag(self, name):
195 def setRecentFlag(self, name):
199 def getRecentFlag(self, name):
200 # if recent and flag set -> true
203 return self.isRecent() & self.__getattribute__(name)
205 self.__setattr__(name, False)
208 def checkattr(self, name):
210 x = self.__getattribute__(name)
216 class PersistMessage(Message):
217 def __new__(typ, id, subject, message, via_rt, **kwargs):
221 db = "persistmessages"
224 pm = database.dbLoad(db)
226 database.dbDump(db, {})
227 pm = database.dbLoad(db)
231 print "Using existing object"
234 print "creating new object"
235 obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
237 obj.actiontracker = Recent(3*60*60*24)
240 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
241 obj.ticket_id = kwargs['ticket_id']
246 def __init__(self, id, subject, message, via_rt=True, **kwargs):
247 print "initializing object: %s" % self.ticket_id
249 Message.__init__(self, subject, message, via_rt, self.ticket_id)
252 self.actiontracker.unsetRecent()
255 if not self.actiontracker.isRecent():
256 self.ticket_id = Message.send(self, to)
257 self.actiontracker.setRecent()
259 #print "recording object for persistance"
260 pm = database.dbLoad(self.db)
262 database.dbDump(self.db, pm)
264 # NOTE: only send a new message every week, regardless.
265 print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // 60*60*24)
267 class MonitorMessage(object):
268 def __new__(typ, id, *args, **kwargs):
272 db = "monitormessages"
275 if 'reset' in kwargs and kwargs['reset'] == True:
276 database.dbDump(db, {})
277 pm = database.dbLoad(db)
279 database.dbDump(db, {})
280 pm = database.dbLoad(db)
284 print "Using existing object"
287 print "creating new object"
288 obj = super(object, typ).__new__(typ, id, *args, **kwargs)
290 obj.sp = PersistSitePenalty(id, 0)
295 def __init__(self, id, message):
299 class SitePenalty(object):
301 penalty_map.append( { 'name': 'noop', 'enable' : lambda host: None,
302 'disable' : lambda host: None } )
303 penalty_map.append( { 'name': 'nocreate', 'enable' : lambda host: plc.removeSliceCreation(host),
304 'disable' : lambda host: plc.enableSliceCreation(host) } )
305 penalty_map.append( { 'name': 'suspendslices', 'enable' : lambda host: plc.suspendSlices(host),
306 'disable' : lambda host: plc.enableSlices(host) } )
308 #def __init__(self, index=0, **kwargs):
311 def get_penalties(self):
312 # TODO: get penalties actually applied to a node from PLC DB.
313 return [ n['name'] for n in SitePenalty.penalty_map ]
316 self.index = self.index + 1
317 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
321 self.index = self.index - 1
322 if self.index < 0: self.index = 0
325 def apply(self, host):
327 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
328 print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
329 SitePenalty.penalty_map[i]['disable'](host)
331 for i in range(0,self.index+1):
332 print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
333 SitePenalty.penalty_map[i]['enable'](host)
339 class PersistSitePenalty(SitePenalty):
340 def __new__(typ, id, index, **kwargs):
344 db = "persistpenalties"
347 if 'reset' in kwargs and kwargs['reset'] == True:
348 database.dbDump(db, {})
349 pm = database.dbLoad(db)
351 database.dbDump(db, {})
352 pm = database.dbLoad(db)
356 print "Using existing object"
359 print "creating new object"
360 obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
367 def __init__(self, id, index, **kwargs):
371 pm = database.dbLoad(self.db)
373 database.dbDump(self.db, pm)
378 Each host has a target set of attributes. Some may be set manually,
379 or others are set globally for the preferred target.
382 All nodes in the Alpha or Beta group would have constraints like:
383 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
385 def __init__(self, constraints):
386 self.constraints = constraints
388 def verify(self, data):
390 self.constraints is a list of key, value pairs.
391 # [ {... : ...}==AND , ... , ... , ] == OR
394 for con in self.constraints:
395 #print "con: %s" % con
397 for key in con.keys():
398 #print "looking at key: %s" % key
400 #print "%s %s" % (con[key], data[key])
401 con_and_true = con_and_true & (con[key] in data[key])
402 elif key not in data:
403 print "missing key %s" % key
406 con_or_true = con_or_true | con_and_true
410 class Record(object):
412 def __init__(self, hostname, data):
413 self.hostname = hostname
415 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
416 self.loginbase = self.plcdb_hn2lb[self.hostname]
420 def stageIswaitforever(self):
421 if 'waitforever' in self.data['stage']:
427 category = self.data['category']
428 prev_category = self.data['prev_category']
429 val = cmpCategoryVal(category, prev_category)
433 return self.severity() > 0
435 def end_record(self):
436 return node_end_record(self.hostname)
438 def reset_stage(self):
439 self.data['stage'] = 'findbad'
442 def getCategory(self):
443 return self.data['category'].lower()
446 return self.data['state'].lower()
448 def getDaysDown(cls, diag_record):
450 if diag_record['comonstats']['uptime'] != "null":
451 daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
452 #elif diag_record['comonstats']['sshstatus'] != "null":
453 # daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
454 #elif diag_record['comonstats']['lastcotop'] != "null":
455 # daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
458 last_contact = diag_record['plcnode']['last_contact']
459 if last_contact == None:
460 # the node has never been up, so give it a break
463 diff = now - last_contact
464 daysdown = diff // (60*60*24)
466 getDaysDown = classmethod(getDaysDown)
468 def getStrDaysDown(cls, diag_record):
470 last_contact = diag_record['plcnode']['last_contact']
471 date_created = diag_record['plcnode']['date_created']
473 if diag_record['comonstats']['uptime'] != "null" and \
474 diag_record['comonstats']['uptime'] != "-1":
475 daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
476 daysdown = "%d days up" % daysdown
478 elif last_contact is None:
479 if date_created is not None:
481 diff = now - date_created
482 daysdown = diff // (60*60*24)
483 daysdown = "Never contacted PLC, created %s days ago" % daysdown
485 daysdown = "Never contacted PLC"
488 diff = now - last_contact
489 daysdown = diff // (60*60*24)
490 daysdown = "%s days down" % daysdown
492 getStrDaysDown = classmethod(getStrDaysDown)
494 #def getStrDaysDown(cls, diag_record):
495 # daysdown = cls.getDaysDown(diag_record)
497 # return "%d days down"%daysdown
498 # elif daysdown == -1:
499 # return "Never online"
501 # return "%d days up"% -daysdown
502 #getStrDaysDown = classmethod(getStrDaysDown)
504 def takeAction(self):
505 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
506 if 'improvement' in self.data['stage'] or self.improved():
507 print "decreasing penalty for %s"%self.hostname
510 print "increasing penalty for %s"%self.hostname
512 pp.apply(self.hostname)
515 def _format_diaginfo(self):
516 info = self.data['info']
517 if self.data['stage'] == 'monitor-end-record':
518 hlist = " %s went from '%s' to '%s'\n" % (info[0], info[1], info[2])
520 hlist = " %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
523 def getMessage(self, ticket_id=None):
524 self.data['args']['hostname'] = self.hostname
525 self.data['args']['loginbase'] = self.loginbase
526 self.data['args']['hostname_list'] = self._format_diaginfo()
527 message = PersistMessage(self.hostname,
528 self.data['message'][0] % self.data['args'],
529 self.data['message'][1] % self.data['args'],
530 True, db='monitor_persistmessages',
534 def getContacts(self):
535 from config import config
539 roles = self.data['email']
541 if not config.mail and not config.debug and config.bcc:
543 if config.mail and config.debug:
549 contacts += [config.email]
551 contacts += [TECHEMAIL % self.loginbase]
553 contacts += [PIEMAIL % self.loginbase]
555 slices = plc.slices(self.loginbase)
558 contacts += [SLICEMAIL % slice]
559 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
561 print "SLIC: %20s : 0 slices" % self.loginbase
567 def __init__(self, hostname, target):
568 self.hostname = hostname
571 if hostname in fb['nodes']:
572 self.data = fb['nodes'][hostname]['values']
574 raise Exception("Hostname not in scan database")
576 def stageIswaitforever(self):
577 if 'waitforever' in self.data['stage']:
583 category = self.data['category']
584 prev_category = self.data['prev_category']
585 val = cmpCategoryVal(category, prev_category)
589 return self.severity() > 0
591 def end_record(self):
592 return node_end_record(self.hostname)
594 def reset_stage(self):
595 self.data['stage'] = 'findbad'
598 def open_tickets(self):
599 if self.ticket and self.ticket.status['status'] == 'open':
602 def setIntrospect(self):
605 def email_notice(self):
606 message = self._get_message_for_condition()
607 message.send(self._get_contacts_for_condition())
609 def close_ticket(self):
611 self.ticket.closeTicket()
613 def exempt_from_penalties(self):
614 bl = database.dbLoad("l_blacklist")
615 return self.hostname in bl
619 def escellate_penalty(self):
621 def reduce_penalty(self):
626 return self.target.verify(self.data)
628 def _get_condition(self):
629 return self.data['category'].lower()
631 def _get_stage(self):
634 "secondnotice_noslicecreation"
635 "thirdnotice_disableslices"
637 delta = current_time - self.data['time']
639 def _get_message_for_condition(self):
641 def _get_contacts_for_condition(self):
644 if __name__ == "__main__":
646 #r.email("test", "body of test message", ['database@cs.princeton.edu'])
647 #from emailTxt import mailtxt
649 #database.dbDump("persistmessages", {});
650 #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah - days down\n'}
651 #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
652 #m.send(['soltesz@cs.utk.edu'])
653 #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
654 # TRICK timer to thinking some time has passed.
655 #m.actiontracker.time = time.time() - 6*60*60*24
656 #m.send(['soltesz@cs.utk.edu'])