www/printbadnodes.py
[monitor.git] / unified_model.py
1 #!/usr/bin/python
2
3 from monitor import database
4
5 import plc
6 api = plc.getAuthAPI()
7
8 import mailer
9 import time
10
11 from model import *
12 from const import *
13 import util.file
14 import config
15
16 def gethostlist(hostlist_file):
17         return util.file.getListFromFile(hostlist_file)
18         
19         #nodes = api.GetNodes({'peer_id' : None}, ['hostname'])
20         #return [ n['hostname'] for n in nodes ]
21
22 def array_to_priority_map(array):
23         """ Create a mapping where each entry of array is given a priority equal
24         to its position in the array.  This is useful for subsequent use in the
25         cmpMap() function."""
26         map = {}
27         count = 0
28         for i in array:
29                 map[i] = count
30                 count += 1
31         return map
32
33 def cmpValMap(v1, v2, map):
34         if v1 in map and v2 in map and map[v1] < map[v2]:
35                 return 1
36         elif v1 in map and v2 in map and map[v1] > map[v2]:
37                 return -1
38         elif v1 in map and v2 in map:
39                 return 0
40         else:
41                 raise Exception("No index %s or %s in map" % (v1, v2))
42
43 def cmpCategoryVal(v1, v2):
44         map = array_to_priority_map([ None, 'ALPHA', 'PROD', 'OLDBOOTCD', 'UNKNOWN', 'FORCED', 'ERROR', ])
45         return cmpValMap(v1,v2,map)
46
47
48 class PCU:
49         def __init__(self, hostname):
50                 self.hostname = hostname
51
52         def reboot(self):
53                 return True
54         def available(self):
55                 return True
56         def previous_attempt(self):
57                 return True
58         def setValidMapping(self):
59                 pass
60
61 class Penalty:
62         def __init__(self, key, valuepattern, action):
63                 pass
64
65 class PenaltyMap:
66         def __init__(self):
67                 pass
68
69         # connect one penalty to another, in a FSM diagram.  After one
70         #       condition/penalty is applied, move to the next phase.
71
72
73 #fb = database.dbLoad("findbad")
74
75 class RT(object):
76         def __init__(self, ticket_id = None):
77                 self.ticket_id = ticket_id
78                 if self.ticket_id:
79                         print "getting ticket status",
80                         self.status = mailer.getTicketStatus(self.ticket_id)
81                         print self.status
82
83         def setTicketStatus(self, status):
84                 mailer.setTicketStatus(self.ticket_id, status)
85                 self.status = mailer.getTicketStatus(self.ticket_id)
86                 return True
87         
88         def getTicketStatus(self):
89                 if not self.status:
90                         self.status = mailer.getTicketStatus(self.ticket_id)
91                 return self.status
92
93         def closeTicket(self):
94                 mailer.closeTicketViaRT(self.ticket_id) 
95
96         def email(self, subject, body, to):
97                 self.ticket_id = mailer.emailViaRT(subject, body, to, self.ticket_id)
98                 return self.ticket_id
99
100 class Message(object):
101         def __init__(self, subject, message, via_rt=True, ticket_id=None, **kwargs):
102                 self.via_rt = via_rt
103                 self.subject = subject
104                 self.message = message
105                 self.rt = RT(ticket_id)
106
107         def send(self, to):
108                 if self.via_rt:
109                         return self.rt.email(self.subject, self.message, to)
110                 else:
111                         return mailer.email(self.subject, self.message, to)
112
113 class Recent(object):
114         def __init__(self, withintime):
115                 self.withintime = withintime
116
117                 try:
118                         self.time = self.__getattribute__('time')
119                 except:
120                         self.time = time.time()- 7*24*60*60
121
122                 #self.time = time.time()
123                 #self.action_taken = False
124
125         def isRecent(self):
126                 if self.time + self.withintime < time.time():
127                         self.action_taken = False
128
129                 if self.time + self.withintime > time.time() and self.action_taken:
130                         return True
131                 else:
132                         return False
133
134         def unsetRecent(self):
135                 self.action_taken = False
136                 self.time = time.time()
137                 return True
138
139         def setRecent(self):
140                 self.action_taken = True
141                 self.time = time.time()
142                 return True
143                 
144 class PersistFlags(Recent):
145         def __new__(typ, id, *args, **kwargs):
146                 if 'db' in kwargs:
147                         db = kwargs['db']
148                         del kwargs['db']
149                 else:
150                         db = "persistflags"
151
152                 try:
153                         pm = database.dbLoad(db)
154                 except:
155                         database.dbDump(db, {})
156                         pm = database.dbLoad(db)
157                 #print pm
158                 if id in pm:
159                         obj = pm[id]
160                 else:
161                         obj = super(PersistFlags, typ).__new__(typ, *args, **kwargs)
162                         for key in kwargs.keys():
163                                 obj.__setattr__(key, kwargs[key])
164                         obj.time = time.time()
165                         obj.action_taken = False
166
167                 obj.db = db
168                 return obj
169
170         def __init__(self, id, withintime, **kwargs):
171                 self.id = id
172                 Recent.__init__(self, withintime)
173
174         def save(self):
175                 pm = database.dbLoad(self.db)
176                 pm[self.id] = self
177                 database.dbDump(self.db, pm)
178
179         def resetFlag(self, name):
180                 self.__setattr__(name, False)
181
182         def setFlag(self, name):
183                 self.__setattr__(name, True)
184                 
185         def getFlag(self, name):
186                 try:
187                         return self.__getattribute__(name)
188                 except:
189                         self.__setattr__(name, False)
190                         return False
191
192         def resetRecentFlag(self, name):
193                 self.resetFlag(name)
194                 self.unsetRecent()
195
196         def setRecentFlag(self, name):
197                 self.setFlag(name)
198                 self.setRecent()
199
200         def getRecentFlag(self, name):
201                 # if recent and flag set -> true
202                 # else false
203                 try:
204                         return self.isRecent() & self.__getattribute__(name)
205                 except:
206                         self.__setattr__(name, False)
207                         return False
208
209         def checkattr(self, name):
210                 try:
211                         x = self.__getattribute__(name)
212                         return True
213                 except:
214                         return False
215                 
216
217 class PersistMessage(Message):
218         def __new__(typ, id, subject, message, via_rt, **kwargs):
219                 if 'db' in kwargs:
220                         db = kwargs['db']
221                 else:
222                         db = "persistmessages"
223
224                 try:
225                         pm = database.dbLoad(db)
226                 except:
227                         database.dbDump(db, {})
228                         pm = database.dbLoad(db)
229
230                 #print pm
231                 if id in pm:
232                         print "Using existing object"
233                         obj = pm[id]
234                 else:
235                         print "creating new object"
236                         obj = super(PersistMessage, typ).__new__(typ, [id, subject, message, via_rt], **kwargs)
237                         obj.id = id
238                         obj.actiontracker = Recent(3*60*60*24)
239                         obj.ticket_id = None
240
241                 if 'ticket_id' in kwargs and kwargs['ticket_id'] is not None:
242                         obj.ticket_id = kwargs['ticket_id']
243
244                 obj.db = db
245                 return obj
246
247         def __init__(self, id, subject, message, via_rt=True, **kwargs):
248                 print "initializing object: %s" % self.ticket_id
249                 self.id = id
250                 Message.__init__(self, subject, message, via_rt, self.ticket_id)
251
252         def reset(self):
253                 self.actiontracker.unsetRecent()
254
255         def send(self, to):
256                 if not self.actiontracker.isRecent():
257                         self.ticket_id = Message.send(self, to)
258                         self.actiontracker.setRecent()
259
260                         #print "recording object for persistance"
261                         pm = database.dbLoad(self.db)
262                         pm[self.id] = self
263                         database.dbDump(self.db, pm)
264                 else:
265                         # NOTE: only send a new message every week, regardless.
266                         print "Not sending to host b/c not within window of %s days" % (self.actiontracker.withintime // 60*60*24)
267
268 class MonitorMessage(object):
269         def __new__(typ, id, *args, **kwargs):
270                 if 'db' in kwargs:
271                         db = kwargs['db']
272                 else:
273                         db = "monitormessages"
274
275                 try:
276                         if 'reset' in kwargs and kwargs['reset'] == True:
277                                 database.dbDump(db, {})
278                         pm = database.dbLoad(db)
279                 except:
280                         database.dbDump(db, {})
281                         pm = database.dbLoad(db)
282
283                 #print pm
284                 if id in pm:
285                         print "Using existing object"
286                         obj = pm[id]
287                 else:
288                         print "creating new object"
289                         obj = super(object, typ).__new__(typ, id, *args, **kwargs)
290                         obj.id = id
291                         obj.sp = PersistSitePenalty(id, 0)
292
293                 obj.db = db
294                 return obj
295
296         def __init__(self, id, message):
297                 pass
298                 
299
300 class SitePenalty(object):
301         penalty_map = [] 
302         penalty_map.append( { 'name': 'noop',                   'enable'   : lambda host: None,
303                                                                                                         'disable'  : lambda host: None } )
304         penalty_map.append( { 'name': 'nocreate',               'enable'   : lambda host: plc.removeSliceCreation(host),
305                                                                                                         'disable'  : lambda host: plc.enableSliceCreation(host) } )
306         penalty_map.append( { 'name': 'suspendslices',  'enable'   : lambda host: plc.suspendSlices(host),
307                                                                                                         'disable'  : lambda host: plc.enableSlices(host) } )
308
309         #def __init__(self, index=0, **kwargs):
310         #       self.index = index
311
312         def get_penalties(self):
313                 # TODO: get penalties actually applied to a node from PLC DB.
314                 return [ n['name'] for n in SitePenalty.penalty_map ] 
315
316         def increase(self):
317                 self.index = self.index + 1
318                 if self.index > len(SitePenalty.penalty_map)-1: self.index = len(SitePenalty.penalty_map)-1
319                 return True
320
321         def decrease(self):
322                 self.index = self.index - 1
323                 if self.index < 0: self.index = 0
324                 return True
325
326         def apply(self, host):
327
328                 for i in range(len(SitePenalty.penalty_map)-1,self.index,-1):
329                         print "\tdisabling %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
330                         SitePenalty.penalty_map[i]['disable'](host)
331
332                 for i in range(0,self.index+1):
333                         print "\tapplying %s on %s" % (SitePenalty.penalty_map[i]['name'], host)
334                         SitePenalty.penalty_map[i]['enable'](host)
335
336                 return
337
338
339
340 class PersistSitePenalty(SitePenalty):
341         def __new__(typ, id, index, **kwargs):
342                 if 'db' in kwargs:
343                         db = kwargs['db']
344                 else:
345                         db = "persistpenalties"
346
347                 try:
348                         if 'reset' in kwargs and kwargs['reset'] == True:
349                                 database.dbDump(db, {})
350                         pm = database.dbLoad(db)
351                 except:
352                         database.dbDump(db, {})
353                         pm = database.dbLoad(db)
354
355                 #print pm
356                 if id in pm:
357                         print "Using existing object"
358                         obj = pm[id]
359                 else:
360                         print "creating new object"
361                         obj = super(PersistSitePenalty, typ).__new__(typ, [index], **kwargs)
362                         obj.id = id
363                         obj.index = index
364
365                 obj.db = db
366                 return obj
367
368         def __init__(self, id, index, **kwargs):
369                 self.id = id
370
371         def save(self):
372                 pm = database.dbLoad(self.db)
373                 pm[self.id] = self
374                 database.dbDump(self.db, pm)
375
376
377 class Target:
378         """
379                 Each host has a target set of attributes.  Some may be set manually,
380                 or others are set globally for the preferred target.
381
382                 For instance:
383                         All nodes in the Alpha or Beta group would have constraints like:
384                                 [ { 'state' : 'BOOT', 'kernel' : '2.6.22' } ]
385         """
386         def __init__(self, constraints):
387                 self.constraints = constraints
388
389         def verify(self, data):
390                 """
391                         self.constraints is a list of key, value pairs.
392                         # [ {... : ...}==AND , ... , ... , ] == OR
393                 """
394                 con_or_true = False
395                 for con in self.constraints:
396                         #print "con: %s" % con
397                         con_and_true = True
398                         for key in con.keys():
399                                 #print "looking at key: %s" % key
400                                 if key in data: 
401                                         #print "%s %s" % (con[key], data[key])
402                                         con_and_true = con_and_true & (con[key] in data[key])
403                                 elif key not in data:
404                                         print "missing key %s" % key
405                                         con_and_true = False
406
407                         con_or_true = con_or_true | con_and_true
408
409                 return con_or_true
410
411 class Record(object):
412
413         def __init__(self, hostname, data):
414                 self.hostname = hostname
415                 self.data = data
416                 self.plcdb_hn2lb = database.dbLoad("plcdb_hn2lb")
417                 self.loginbase = self.plcdb_hn2lb[self.hostname]
418                 return
419
420
421         def stageIswaitforever(self):
422                 if 'waitforever' in self.data['stage']:
423                         return True
424                 else:
425                         return False
426
427         def severity(self):
428                 category = self.data['category']
429                 prev_category = self.data['prev_category']
430                 val = cmpCategoryVal(category, prev_category)
431                 return val 
432
433         def improved(self):
434                 return self.severity() > 0
435         
436         def end_record(self):
437                 return node_end_record(self.hostname)
438
439         def reset_stage(self):
440                 self.data['stage'] = 'findbad'
441                 return True
442         
443         def getCategory(self):
444                 return self.data['category'].lower()
445
446         def getState(self):
447                 return self.data['state'].lower()
448
449         def getDaysDown(cls, diag_record):
450                 daysdown = -1
451                 if diag_record['comonstats']['uptime'] != "null":
452                         daysdown = - int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
453                 #elif diag_record['comonstats']['sshstatus'] != "null":
454                 #       daysdown = int(diag_record['comonstats']['sshstatus']) // (60*60*24)
455                 #elif diag_record['comonstats']['lastcotop'] != "null":
456                 #       daysdown = int(diag_record['comonstats']['lastcotop']) // (60*60*24)
457                 else:
458                         now = time.time()
459                         last_contact = diag_record['plcnode']['last_contact']
460                         if last_contact == None:
461                                 # the node has never been up, so give it a break
462                                 daysdown = -1
463                         else:
464                                 diff = now - last_contact
465                                 daysdown = diff // (60*60*24)
466                 return daysdown
467         getDaysDown = classmethod(getDaysDown)
468
469         def getStrDaysDown(cls, diag_record):
470                 daysdown = "unknown"
471                 last_contact = diag_record['plcnode']['last_contact']
472                 date_created = diag_record['plcnode']['date_created']
473
474                 if      diag_record['comonstats']['uptime'] != "null" and \
475                         diag_record['comonstats']['uptime'] != "-1":
476                         daysdown = int(float(diag_record['comonstats']['uptime'])) // (60*60*24)
477                         daysdown = "%d days up" % daysdown
478
479                 elif last_contact is None:
480                         if date_created is not None:
481                                 now = time.time()
482                                 diff = now - date_created
483                                 daysdown = diff // (60*60*24)
484                                 daysdown = "Never contacted PLC, created %s days ago" % daysdown
485                         else:
486                                 daysdown = "Never contacted PLC"
487                 else:
488                         now = time.time()
489                         diff = now - last_contact
490                         daysdown = diff // (60*60*24)
491                         daysdown = "%s days down" % daysdown
492                 return daysdown
493         getStrDaysDown = classmethod(getStrDaysDown)
494
495         #def getStrDaysDown(cls, diag_record):
496         #       daysdown = cls.getDaysDown(diag_record)
497         #       if daysdown > 0:
498         #               return "%d days down"%daysdown
499         #       elif daysdown == -1:
500         #               return "Never online"
501         #       else:
502         #               return "%d days up"% -daysdown
503         #getStrDaysDown = classmethod(getStrDaysDown)
504
505         def takeAction(self):
506                 pp = PersistSitePenalty(self.hostname, 0, db='persistpenalty_hostnames')
507                 if 'improvement' in self.data['stage'] or self.improved():
508                         print "decreasing penalty for %s"%self.hostname
509                         pp.decrease()
510                 else:
511                         print "increasing penalty for %s"%self.hostname
512                         pp.increase()
513                 pp.apply(self.hostname)
514                 pp.save()
515
516         def _format_diaginfo(self):
517                 info = self.data['info']
518                 if self.data['stage'] == 'monitor-end-record':
519                         hlist = "    %s went from '%s' to '%s'\n" % (info[0], info[1], info[2]) 
520                 else:
521                         hlist = "    %s %s - %s\n" % (info[0], info[2], info[1]) #(node,ver,daysdn)
522                 return hlist
523
524         def getMessage(self, ticket_id=None):
525                 self.data['args']['hostname'] = self.hostname
526                 self.data['args']['loginbase'] = self.loginbase
527                 self.data['args']['hostname_list'] = self._format_diaginfo()
528                 message = PersistMessage(self.hostname, 
529                                                                  self.data['message'][0] % self.data['args'],
530                                                                  self.data['message'][1] % self.data['args'],
531                                                                  True, db='monitor_persistmessages',
532                                                                  ticket_id=ticket_id)
533                 return message
534         
535         def getContacts(self):
536                 roles = self.data['email']
537
538                 if not config.mail and not config.debug and config.bcc:
539                         roles = ADMIN
540                 if config.mail and config.debug:
541                         roles = ADMIN
542
543                 # build targets
544                 contacts = []
545                 if ADMIN & roles:
546                         contacts += [config.email]
547                 if TECH & roles:
548                         contacts += [TECHEMAIL % self.loginbase]
549                 if PI & roles:
550                         contacts += [PIEMAIL % self.loginbase]
551                 if USER & roles:
552                         slices = plc.slices(self.loginbase)
553                         if len(slices) >= 1:
554                                 for slice in slices:
555                                         contacts += [SLICEMAIL % slice]
556                                 print "SLIC: %20s : %d slices" % (self.loginbase, len(slices))
557                         else:
558                                 print "SLIC: %20s : 0 slices" % self.loginbase
559
560                 return contacts
561
562
563 class NodeRecord:
564         def __init__(self, hostname, target):
565                 self.hostname = hostname
566                 self.ticket = None
567                 self.target = target
568                 #if hostname in fb['nodes']:
569                 #       self.data = fb['nodes'][hostname]['values']
570                 #else:
571                 #       raise Exception("Hostname not in scan database")
572
573         def stageIswaitforever(self):
574                 if 'waitforever' in self.data['stage']:
575                         return True
576                 else:
577                         return False
578
579         def severity(self):
580                 category = self.data['category']
581                 prev_category = self.data['prev_category']
582                 val = cmpCategoryVal(category, prev_category)
583                 return val 
584
585         def improved(self):
586                 return self.severity() > 0
587         
588         def end_record(self):
589                 return node_end_record(self.hostname)
590
591         def reset_stage(self):
592                 self.data['stage'] = 'findbad'
593                 return True
594
595         def open_tickets(self):
596                 if self.ticket and self.ticket.status['status'] == 'open':
597                         return 1
598                 return 0
599         def setIntrospect(self):
600                 pass
601
602         def email_notice(self):
603                 message = self._get_message_for_condition()
604                 message.send(self._get_contacts_for_condition())
605                 return True
606         def close_ticket(self):
607                 if self.ticket:
608                         self.ticket.closeTicket()
609
610         def exempt_from_penalties(self):
611                 bl = database.dbLoad("l_blacklist")
612                 return self.hostname in bl
613
614         def penalties(self):
615                 return []
616         def escellate_penalty(self):
617                 return True
618         def reduce_penalty(self):
619                 return True
620
621
622         def atTarget(self):
623                 return self.target.verify(self.data)
624
625         def _get_condition(self):
626                 return self.data['category'].lower()
627
628         def _get_stage(self):
629                 "improvement"
630                 "firstnotice_noop"
631                 "secondnotice_noslicecreation"
632                 "thirdnotice_disableslices"
633
634                 delta = current_time - self.data['time']
635
636         def _get_message_for_condition(self):
637                 pass
638         def _get_contacts_for_condition(self):
639                 pass
640
641 class Action(MonRecord):
642         def __init__(self, host, data):
643                 self.host = host
644                 MonRecord.__init__(self, data)
645                 return
646
647         def deltaDays(self, delta):
648                 t = datetime.fromtimestamp(self.__dict__['time'])
649                 d = t + timedelta(delta)
650                 self.__dict__['time'] = time.mktime(d.timetuple())
651                 
652 def node_end_record(node):
653         act_all = database.dbLoad("act_all")
654         if node not in act_all:
655                 del act_all
656                 return False
657
658         if len(act_all[node]) == 0:
659                 del act_all
660                 return False
661
662         a = Action(node, act_all[node][0])
663         a.delField('rt')
664         a.delField('found_rt_ticket')
665         a.delField('second-mail-at-oneweek')
666         a.delField('second-mail-at-twoweeks')
667         a.delField('first-found')
668         rec = a.get()
669         rec['action'] = ["close_rt"]
670         rec['category'] = "UNKNOWN"
671         rec['stage'] = "monitor-end-record"
672         rec['time'] = time.time() - 7*60*60*24
673         act_all[node].insert(0,rec)
674         database.dbDump("act_all", act_all)
675         del act_all
676         return True
677
678 if __name__ == "__main__":
679         #r = RT()
680         #r.email("test", "body of test message", ['database@cs.princeton.edu'])
681         #from emailTxt import mailtxt
682         print "loaded"
683         #database.dbDump("persistmessages", {});
684         #args = {'url_list': 'http://www.planet-lab.org/bootcds/planet1.usb\n','hostname': 'planet1','hostname_list': ' blahblah -  days down\n'}
685         #m = PersistMessage("blue", "test 1", mailtxt.newdown_one[1] % args, True)
686         #m.send(['soltesz@cs.utk.edu'])
687         #m = PersistMessage("blue", "test 1 - part 2", mailtxt.newalphacd_one[1] % args, True)
688         # TRICK timer to thinking some time has passed.
689         #m.actiontracker.time = time.time() - 6*60*60*24
690         #m.send(['soltesz@cs.utk.edu'])